SNAP Library, Developer Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
xmath.h
Go to the documentation of this file.
00001 
00002 // Mathematics-Utilities
00003 class TMath{
00004 public:
00005   static double E;
00006   static double Pi;
00007   static double LogOf2;
00008 
00009   static double Inv(const double& x){IAssert(x!=0.0); return (1.0/x);}
00010   static double Sqr(const double& x){return x*x;}
00011   static double Sqrt(const double& x){IAssert(!(x<0.0)); return sqrt(x);}
00012   static double Log(const double& Val){return log(Val);}
00013   static double Log2(const double& Val){return log(Val)/LogOf2;}
00014   static double Round(const double& Val){
00015     return (Val>0)?floor(Val+0.5):ceil(Val-0.5);}
00016   static double Round(const double & Val, int Decs){
00017     const double pwr=pow(10.0, Decs); return Round(Val * pwr) / pwr;}
00018   static int Fac(const int& Val){
00019     if (Val<=1){return 1;} else {return Val*Fac(Val-1);}}
00020   static int Choose(const int& N, const int& K){ // binomial coefficient
00021     return Fac(N)/(Fac(K)*Fac(N-K)); }
00022   static uint Pow2(const int& pow){return uint(1u<<pow);}
00023   static double Power(const double& Base, const double& Exponent){
00024     return exp(log(Base)*Exponent);}
00025 
00026   template <typename T>
00027   static int Sign(const T& Val){return Val<0?-1:(Val>0?1:0);}
00028 
00029   template <class T>
00030   static const T& Mx(const T& LVal, const T& RVal) {
00031     return LVal > RVal ? LVal : RVal;}
00032 
00033   template <class T>
00034   static const T& Mn(const T& LVal, const T& RVal){
00035     return LVal < RVal ? LVal : RVal;}
00036 
00037   template <class T>
00038   static const T& Mx(const T& Val1, const T& Val2, const T& Val3) {
00039     if (Val1 > Val2) {
00040       if (Val1 > Val3) return Val1;
00041       else return Val3;
00042     } else {
00043       if (Val2 > Val3) return Val2;
00044       else return Val3;
00045     }
00046   }
00047 
00048   template <class T>
00049   static const T& Mn(const T& Val1, const T& Val2, const T& Val3) {
00050     if(Val1 < Val2) {
00051       if (Val1 < Val3) return Val1;
00052       else return Val3;
00053     } else {
00054       if (Val2 < Val3) return Val2;
00055       else return Val3;
00056     }
00057   }
00058 
00059   template <class T>
00060   static const T& Median(const T& Val1, const T& Val2, const T& Val3) {
00061     if (Val1 < Val2) {
00062       if (Val2 < Val3) return Val2;
00063       else if (Val3 < Val1) return Val1;
00064       else return Val3;
00065     } else {
00066       if (Val1 < Val3) return Val1;
00067       else if (Val3 < Val2)  return Val2;
00068       else return Val3;
00069     }
00070   }
00071 
00072   template <class T>
00073   static const T& InRange(const T& Val, const T& Mn, const T& Mx) {
00074     IAssert(Mn <= Mx); return Val < Mn ? Mn : (Val > Mx ? Mx : Val);}
00075 
00076   template <class T>
00077   static bool IsInRange(const T& Val, const T& Mn, const T& Mx) {
00078     IAssert(Mn <= Mx); return Val >= Mn && Val <= Mx;}
00079 
00080   template <class T>
00081   static bool IsInEps(const T& Val, const T& Eps) {
00082     return Val >= -Eps && Val <= Eps;}
00083 };
00084 
00086 // Special-Functions
00087 class TSpecFunc{
00088 public:
00089   static void GammaPSeries/*gser*/(
00090    double& gamser, const double& a, const double& x, double& gln);
00091   static void GammaQContFrac/*gcf*/(
00092    double& gammcf, const double& a, const double& x, double& gln);
00093   static double GammaQ/*gammq*/(const double& a, const double& x);
00094   static double LnGamma/*gammln*/(const double& xx);
00095   static double BetaCf/*betacf*/(
00096    const double& a, const double& b, const double& x);
00097   static double BetaI(const double& a, const double& b, const double& x);
00098 
00099   static void LinearFit( // Y = A + B*X
00100    const TVec<TFltPr>& XY, double& A, double& B,
00101    double& SigA, double& SigB, double& Chi2, double& R2);
00102   static void PowerFit( // Y = A * X^B
00103    const TVec<TFltPr>& XY, double& A, double& B,
00104    double& SigA, double& SigB, double& Chi2, double& R2);
00105   static void LogFit( // Y = A + B*log(X)
00106    const TVec<TFltPr>& XY, double& A, double& B,
00107    double& SigA, double& SigB, double& Chi2, double& R2);
00108   static void ExpFit( // Y = A * exp(B*X)
00109    const TVec<TFltPr>& XY, double& A, double& B,
00110    double& SigA, double& SigB, double& Chi2, double& R2);
00111 public:
00112   static double LnComb(const int& n, const int& k);
00113 public:
00114   static double Entropy(const TIntV& ValV);
00115   static double Entropy(const TFltV& ValV);
00116   static void EntropyFracDim(const TIntV& ValV, TFltV& EntropyV);
00117   static void EntropyFracDim(const TFltV& ValV, TFltV& EntropyV);
00118 public:
00119   static double EntropyBias(const double& B); // solves for p: B = p*log2(p)+(1-p)log2(1-p)
00120   //MLE of the power-law coefficient
00121   static double GetPowerCoef(const TFltV& XValV, double MinX=-1.0); // values (sampled from the distribution)
00122   static double GetPowerCoef(const TFltPrV& XValCntV, double MinX=-1.0); // (value, count) pairs
00123 };
00124 
00126 // Statistical-Moments
00127 ClassTPV(TMom, PMom, TMomV)//{
00128 private:
00129   TBool DefP;
00130   TFltPrV ValWgtV;
00131   TFlt SumW, ValSumW;
00132   TInt Vals;
00133   TBool UsableP;
00134   TFlt UnusableVal;
00135   TFlt Mn, Mx;
00136   TFlt Mean, Vari, SDev, SErr;
00137   TFlt Median, Quart1, Quart3;
00138   TFltV DecileV; // 0=min 1=1.decile, ..., 9=9.decile, 10=max
00139   TFltV PercentileV; // 0=min 1=1.percentile, ..., 9=9.percentile, 10=max
00140 public:
00141   TMom():
00142     DefP(false), ValWgtV(),
00143     SumW(), ValSumW(), Vals(),
00144     UsableP(false), UnusableVal(-1),
00145     Mn(), Mx(),
00146     Mean(), Vari(), SDev(), SErr(),
00147     Median(), Quart1(), Quart3(),
00148     DecileV(), PercentileV(){}
00149   TMom(const TMom& Mom):
00150     DefP(Mom.DefP), ValWgtV(Mom.ValWgtV),
00151     SumW(Mom.SumW), ValSumW(Mom.ValSumW), Vals(Mom.Vals),
00152     UsableP(Mom.UsableP), UnusableVal(Mom.UnusableVal),
00153     Mn(Mom.Mn), Mx(Mom.Mx),
00154     Mean(Mom.Mean), Vari(Mom.Vari), SDev(Mom.SDev), SErr(Mom.SErr),
00155     Median(Mom.Median), Quart1(Mom.Quart1), Quart3(Mom.Quart3),
00156     DecileV(Mom.DecileV), PercentileV(Mom.PercentileV){}
00157   static PMom New(){return PMom(new TMom());}
00158   static void NewV(TMomV& MomV, const int& Moms){
00159     MomV.Gen(Moms); for (int MomN=0; MomN<Moms; MomN++){MomV[MomN]=New();}}
00160   static void NewVV(TVVec<PMom>& MomVV, const int& XMoms, const int& YMoms){
00161     MomVV.Gen(XMoms, YMoms);
00162     for (int XMomN=0; XMomN<XMoms; XMomN++){
00163       for (int YMomN=0; YMomN<YMoms; YMomN++){
00164         MomVV.At(XMomN, YMomN)=New();}}}
00165   TMom(const TFltV& _ValV);
00166   static PMom New(const TFltV& ValV){
00167     return PMom(new TMom(ValV));}
00168   TMom(TSIn& SIn):
00169     DefP(SIn),
00170     ValWgtV(SIn),
00171     SumW(SIn), ValSumW(SIn), Vals(SIn),
00172     UsableP(SIn), UnusableVal(SIn),
00173     Mn(SIn), Mx(SIn),
00174     Mean(SIn), Vari(SIn), SDev(SIn), SErr(SIn),
00175     Median(SIn), Quart1(SIn), Quart3(SIn),
00176     DecileV(SIn), PercentileV(SIn){}
00177   static PMom Load(TSIn& SIn){return new TMom(SIn);}
00178   void Save(TSOut& SOut) const {
00179     DefP.Save(SOut);
00180     ValWgtV.Save(SOut);
00181     SumW.Save(SOut); ValSumW.Save(SOut); Vals.Save(SOut);
00182     UsableP.Save(SOut); UnusableVal.Save(SOut);
00183     Mn.Save(SOut); Mx.Save(SOut);
00184     Mean.Save(SOut); Vari.Save(SOut); SDev.Save(SOut); SErr.Save(SOut);
00185     Median.Save(SOut); Quart1.Save(SOut); Quart3.Save(SOut);
00186     DecileV.Save(SOut); PercentileV.Save(SOut);}
00187 
00188   TMom& operator=(const TMom& Mom){
00189     Assert(!DefP); DefP=Mom.DefP;
00190     ValWgtV=Mom.ValWgtV;
00191     SumW=Mom.SumW; ValSumW=Mom.ValSumW; Vals=Mom.Vals;
00192     UsableP=Mom.UsableP; UnusableVal=Mom.UnusableVal;
00193     Mn=Mom.Mn; Mx=Mom.Mx;
00194     Mean=Mom.Mean; Vari=Mom.Vari; SDev=Mom.SDev; SErr=Mom.SErr;
00195     Median=Mom.Median; Quart1=Mom.Quart1; Quart3=Mom.Quart3;
00196     DecileV=Mom.DecileV; PercentileV=Mom.PercentileV;
00197     return *this;}
00198   bool operator==(const TMom& Mom) const {
00199     return Vals==Mom.Vals;}
00200   bool operator<(const TMom& Mom) const {
00201     return Vals<Mom.Vals;}
00202 
00203   // define
00204   void Def();
00205   static void DefV(TMomV& MomV){
00206     for (int MomN=0; MomN<MomV.Len(); MomN++){MomV[MomN]->Def();}}
00207   static void DefVV(TVVec<PMom>& MomVV){
00208     for (int XMomN=0; XMomN<MomVV.GetXDim(); XMomN++){
00209       for (int YMomN=0; YMomN<MomVV.GetYDim(); YMomN++){
00210         MomVV.At(XMomN, YMomN)->Def();}}}
00211   bool IsDef() const {return DefP;}
00212 
00213   // values
00214   void Add(const TFlt& Val, const TFlt& Wgt=1){Assert(!DefP);
00215     ValWgtV.Add(TFltPr(Val, Wgt)); SumW+=Wgt; ValSumW+=Wgt*Val; Vals++;}
00216   double GetWgt() const {return SumW;}
00217   int GetVals() const {return Vals;}
00218   TFlt GetVal(const int& ValN) const {IAssert(!IsDef()); return ValWgtV[ValN].Val1;}
00219   //const TFltV& GetValV() const {IAssert(!IsDef()); return ValV;} //J:
00220 
00221   // usability
00222   bool IsUsable() const {Assert(DefP); return UsableP;}
00223   static bool IsUsableV(const TMomV& MomV){
00224     for (int MomN=0; MomN<MomV.Len(); MomN++){
00225       if (!MomV[MomN]->IsUsable()){return false;}}
00226     return true;}
00227   static bool IsUsableVV(const TVVec<PMom>& MomVV){
00228     for (int XMomN=0; XMomN<MomVV.GetXDim(); XMomN++){
00229       for (int YMomN=0; YMomN<MomVV.GetYDim(); YMomN++){
00230         if (!MomVV.At(XMomN, YMomN)->IsUsable()){return false;}}}
00231     return true;}
00232 
00233   // moments
00234   double GetMn() const {Assert(DefP&&UsableP); return Mn;}
00235   double GetMx() const {Assert(DefP&&UsableP); return Mx;}
00236   double GetExtent() const {Assert(DefP&&UsableP); return Mx-Mn;}
00237   double GetMean() const {Assert(DefP&&UsableP); return Mean;}
00238   double GetVari() const {Assert(DefP&&UsableP); return Vari;}
00239   double GetSDev() const {Assert(DefP&&UsableP); return SDev;}
00240   double GetSErr() const {Assert(DefP&&UsableP); return SErr;}
00241   double GetMedian() const {Assert(DefP&&UsableP); return Median;}
00242   double GetQuart1() const {Assert(DefP&&UsableP); return Quart1;}
00243   double GetQuart3() const {Assert(DefP&&UsableP); return Quart3;}
00244   double GetDecile(const int& DecileN) const {
00245     Assert(DefP&&UsableP); return DecileV[DecileN];}
00246   double GetPercentile(const int& PercentileN) const {
00247     Assert(DefP&&UsableP); return PercentileV[PercentileN];}
00248   double GetByNm(const TStr& MomNm) const;
00249   TStr GetStrByNm(const TStr& MomNm, char* FmtStr=NULL) const;
00250 
00251   // strings
00252   TStr GetStr(const char& SepCh=' ', const char& DelimCh=':',
00253    const bool& DecileP=true, const bool& PercentileP=true, const TStr& FmtStr="%g") const;
00254   static TStr GetNmVStr(const TStr& VarPfx,
00255    const char& SepCh='\t', const bool& DecileP=true, const bool& PercentileP=true);
00256   TStr GetValVStr(const char& SepCh='\t', const bool& DecileP=true, const bool& PercentileP=true) const;
00257 };
00258 typedef TVVec<PMom> TMomVV;
00259 typedef THash<TInt, PMom> TIntMomH;
00260 typedef THash<TInt, TMomV> TIntMomVH;
00261 typedef THash<TInt, TMomVV> TIntMomVVH;
00262 
00264 // Correlation
00265 ClassTP(TCorr, PCorr)//{
00266 private:
00267   int ValVLen;
00268   double CorrCf;
00269   double CorrCfPrb;
00270   double FisherZ;
00271 public:
00272   TCorr(){}
00273   TCorr(const TFltV& ValV1, const TFltV& ValV2);
00274   static PCorr New(const TFltV& ValV1, const TFltV& ValV2){
00275     return PCorr(new TCorr(ValV1, ValV2));}
00276   TCorr(TSIn&){Fail;}
00277   static PCorr Load(TSIn& SIn){return new TCorr(SIn);}
00278   void Save(TSOut&){Fail;}
00279 
00280   TCorr& operator=(const TCorr&){Fail; return *this;}
00281 
00282   double GetCorrCf() const {return CorrCf;}
00283   double GetCorrCfPrb() const {return CorrCfPrb;}
00284 
00285   TStr GetStr() const;
00286 };
00287 
00289 // Statistical Tests
00290 class TStatTest {
00291 private:
00292   static void AveVar(const TFltV& ValV, double& Ave, double& Var);
00293   static double KsProb(const double& Alam);
00294 public:
00295   static void ChiSquareOne(
00296    const TFltV& ObservedBinV, const TFltV& ExpectedBinV,
00297    double& ChiSquareVal, double& SignificancePrb);
00298   static void ChiSquareTwo(
00299    const TFltV& ObservedBin1V, const TFltV& ObservedBin2V,
00300    double& ChiSquareVal, double& SignificancePrb);
00301 
00302   static void TTest(
00303    const TFltV& ValV1, const TFltV& ValV2, double& TTestVal, double& TTestPrb);
00304 
00305   // Kolmogorov-Smirnov (are two distributions the same)
00306   static void KsTest(const TFltV& ValV1, const TFltV& ValV2, double& DStat, double& PVal);
00307   static void KsTest(const TFltPrV& ValCntV1, const TFltPrV& ValCntV2, double& DStat, double& PVal);
00308 };
00309 
00311 // Combinations
00312 ClassTP(TComb, PComb)//{
00313 public:
00314   int Items;
00315   int Order;
00316   int CombN;
00317   TIntV ItemV;
00318 public:
00319   TComb(): Items(-1), Order(-1), CombN(-1), ItemV(){}
00320   TComb(const int& _Items, const int& _Order):
00321     Items(_Items), Order(_Order), CombN(0), ItemV(){
00322     IAssert((Order>0)&&(Order<=Items));}
00323   static PComb New(const int& Items, const int& Order){
00324     return PComb(new TComb(Items, Order));}
00325   ~TComb(){}
00326   TComb(TSIn&){Fail;}
00327   static PComb Load(TSIn& SIn){return new TComb(SIn);}
00328   void Save(TSOut&){Fail;}
00329 
00330   TComb& operator=(const TComb&){Fail; return *this;}
00331 
00332   bool GetNext();
00333   TIntV& GetItemV(){return ItemV;}
00334   int GetCombN() const {return CombN;}
00335   int GetCombs() const;
00336   void Wr();
00337 };
00338 
00340 // Linear-Regression
00341 ClassTP(TLinReg, PLinReg)//{
00342 public:
00343   TFltVV XVV;
00344   TFltV YV;
00345   TFltV SigV;
00346   int Recs, Vars;
00347   TFltVV CovarVV; // 1 based
00348   TFltV CfV; // 1 based
00349   double ChiSq;
00350   void GetXV(const int RecN, TFltV& VarV) const {
00351     VarV.Gen(Vars+1);
00352     for (int VarN=0; VarN<Vars; VarN++){VarV[VarN+1]=XVV.At(RecN-1, VarN);}
00353   }
00354   double GetY(const int RecN) const {return YV[RecN-1];}
00355   double GetSig(const int RecN) const {return SigV[RecN-1];}
00356   void NR_covsrt(TFltVV& CovarVV, const int& Vars, const TIntV& ia, const int& mfit);
00357   void NR_gaussj(TFltVV& a, const int& n, TFltVV& b, const int& m);
00358   void NR_lfit();
00359 public:
00360   TLinReg(){}
00361   static PLinReg New(
00362    const TFltVV& XVV, const TFltV& YV, const TFltV& SigV=TFltV());
00363   ~TLinReg(){}
00364   TLinReg(TSIn&){Fail;}
00365   static PLinReg Load(TSIn& SIn){return new TLinReg(SIn);}
00366   void Save(TSOut&){Fail;}
00367 
00368   TLinReg& operator=(const TLinReg&){Fail; return *this;}
00369 
00370   int GetRecs() const {return Recs;}
00371   int GetVars() const {return Vars;}
00372 
00373   double GetCf(const int& VarN) const {return CfV[VarN+1];}
00374   double GetCfUncer(const int& VarN) const {
00375     return sqrt(double(CovarVV.At(VarN+1, VarN+1)));}
00376   double GetCovar(const int& VarN1, const int& VarN2) const {
00377     return CovarVV.At(VarN1+1, VarN2+1);}
00378 
00379   double GetChiSq() const {return ChiSq;}
00380 
00381   static double LinInterp(const double& x1, const double& y1,
00382    const double& x2, const double& y2, const double& AtX) _CMPWARN{
00383     if (x1 == x2) return (y1+y2)/2.0;
00384     const double k = (y2 - y1) / (x2 - x1);
00385     return k*(AtX - x1) + y1;
00386   }
00387 
00388   void Wr() const;
00389 };
00390 
00392 // Singular-Value-Decomposition
00393 ClassTP(TSvd, PSvd)//{
00394 public:
00395   TFltVV XVV;
00396   TFltV YV;
00397   TFltV SigV;
00398   int Recs, Vars;
00399   TFltVV CovarVV; // 1 based
00400   TFltV CfV; // 1 based
00401   double ChiSq;
00402   void GetXV(const int RecN, TFltV& VarV) const {
00403     VarV.Gen(Vars+1);
00404     for (int VarN=0; VarN<Vars; VarN++){VarV[VarN+1]=XVV.At(RecN-1, VarN);}
00405   }
00406   double GetY(const int RecN) const {return YV[RecN-1];}
00407   double GetSig(const int RecN) const {return SigV[RecN-1];}
00408   static double NR_SIGN(double a, double b){return b >= 0.0 ? fabs(a) : -fabs(a);}
00409   static double NR_FMAX(double maxarg1, double maxarg2){
00410     return maxarg1 > maxarg2 ? maxarg1 : maxarg2;}
00411   static int NR_IMIN(int iminarg1, int iminarg2){
00412     return iminarg1 < iminarg2 ? iminarg1 : iminarg2;}
00413   static double NR_pythag(double a, double b);
00414   static void NR_svdcmp(TFltVV& a, int m, int n, TFltV& w, TFltVV& v);
00415   void NR_svbksb(
00416    TFltVV& u, TFltV& w, TFltVV& v, int m, int n, TFltV& b, TFltV& x);
00417   void NR_svdvar(TFltVV& v, int ma, TFltV& w, TFltVV& cvm);
00418   void NR_svdfit();
00419 public:
00420   TSvd(){}
00421   static PSvd New(
00422    const TFltVV& XVV, const TFltV& YV, const TFltV& SigV=TFltV());
00423   ~TSvd(){}
00424   TSvd(TSIn&){Fail;}
00425   static PSvd Load(TSIn& SIn){return new TSvd(SIn);}
00426   void Save(TSOut&){Fail;}
00427 
00428   TSvd& operator=(const TSvd&){Fail; return *this;}
00429 
00430   int GetRecs() const {return Recs;}
00431   int GetVars() const {return Vars;}
00432 
00433   double GetCf(const int& VarN) const {return CfV[VarN+1];}
00434   double GetCfUncer(const int& VarN) const {
00435     return sqrt(double(CovarVV.At(VarN+1, VarN+1)));}
00436   double GetCovar(const int& VarN1, const int& VarN2) const {
00437     return CovarVV.At(VarN1+1, VarN2+1);}
00438 
00439   double GetChiSq() const {return ChiSq;}
00440 
00441   void GetCfV(TFltV& _CfV);
00442   void GetCfUncerV(TFltV& CfUncerV);
00443 
00444   static void Svd(const TFltVV& InMtx, TFltVV& LSingV, TFltV& SingValV, TFltVV& RSingV);
00445   static void Svd1Based(const TFltVV& InMtx1, TFltVV& LSingV, TFltV& SingValV, TFltVV& RSingV);
00446 
00447   void Wr() const;
00448 };
00449 
00451 // Histogram
00452 class THist {
00453 private:
00454         TFlt MnVal;
00455         TFlt MxVal;
00456     TIntV BucketV;
00457         TFlt BucketSize;
00458     TInt Vals;
00459 public:
00460     THist() { }
00461     THist(const double& _MnVal, const double& _MxVal, const int& Buckets):
00462       MnVal(_MnVal), MxVal(_MxVal), BucketV(Buckets),
00463           BucketSize(1.01 * double(MxVal - MnVal) / double(Buckets)) { }
00464 
00465     void Add(const double& Val, const bool& OnlyInP);
00466 
00467         int GetVals() const { return Vals; }
00468         int GetBuckets() const { return BucketV.Len(); }
00469         double GetBucketMn(const int& BucketN) const { return MnVal + BucketN * BucketSize; }
00470         double GetBucketMx(const int& BucketN) const { return MnVal + (BucketN + 1) * BucketSize; }
00471         int GetBucketVal(const int& BucketN) const { return BucketV[BucketN]; }
00472         double GetBucketValPerc(const int& BucketN) const { 
00473                 return (Vals > 0) ? (double(BucketV[BucketN]) / double(Vals)) : 0.0; }
00474 
00475     void SaveStat(const TStr& ValNm, TSOut& FOut) const;
00476     void SaveTxt(const TStr& ValNm, const TStr& FNm) const {
00477         TFOut FOut(FNm); SaveStat(ValNm, FOut); }
00478 };