SNAP Library 2.2, User Reference  2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
agmfast.h
Go to the documentation of this file.
00001 #ifndef snap_agmfast_h
00002 #define snap_agmfast_h
00003 #include "Snap.h"
00004 
00007 class TAGMFast { 
00008 private:
00009   PUNGraph G; //graph to fit
00010   TVec<TIntFltH> F; // membership for each user (Size: Nodes * Coms)
00011   TRnd Rnd; // random number generator
00012   TIntV NIDV; // original node ID vector
00013   TFlt RegCoef; //Regularization coefficient when we fit for P_c +: L1, -: L2
00014   TFltV SumFV; // sum_u F_uc for each community c. Needed for efficient calculation
00015   TBool NodesOk; // Node ID is from 0 ~ N-1
00016   TInt NumComs; // number of communities
00017 public:
00018   TVec<TIntSet> HOVIDSV; //NID pairs to hold out for cross validation
00019   TFlt MinVal; // minimum value of F (0)
00020   TFlt MaxVal; // maximum value of F (for numerical reason)
00021   TFlt NegWgt; // weight of negative example (a pair of nodes without an edge)
00022   TFlt PNoCom; // base probability \varepsilon (edge probability between a pair of nodes sharing no community
00023   TBool DoParallel; // whether to use parallelism for computation
00024 
00025   TAGMFast(const PUNGraph& GraphPt, const int& InitComs, const int RndSeed = 0): Rnd(RndSeed), RegCoef(0), 
00026     NodesOk(true), MinVal(0.0), MaxVal(1000.0), NegWgt(1.0) { SetGraph(GraphPt); RandomInit(InitComs); }
00027   void SetGraph(const PUNGraph& GraphPt);
00028   void SetRegCoef(const double _RegCoef) { RegCoef = _RegCoef; }
00029   double GetRegCoef() { return RegCoef; }
00030   void RandomInit(const int InitComs);
00031   void NeighborComInit(const int InitComs);
00032   void SetCmtyVV(const TVec<TIntV>& CmtyVV);
00033   double Likelihood(const bool DoParallel = false);
00034   double LikelihoodForRow(const int UID);
00035   double LikelihoodForRow(const int UID, const TIntFltH& FU);
00036   int MLENewton(const double& Thres, const int& MaxIter, const TStr PlotNm = TStr());
00037   void GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet);
00038   double GradientForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
00039   double HessianForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
00040   double LikelihoodForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
00041   void GetCmtyVV(TVec<TIntV>& CmtyVV);
00042   void GetCmtyVV(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3);
00043   int FindComsByCV(TIntV& ComsV, const double HOFrac = 0.2, const int NumThreads = 20, const TStr PlotLFNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1);
00044   int FindComsByCV(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const double StepAlpha = 0.3, const double StepBeta = 0.3);
00045   double LikelihoodHoldOut(const bool DoParallel = false);
00046   double GetStepSizeByLineSearch(const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter = 10);
00047   int MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
00048   int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
00049   int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const TStr PlotNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1) {
00050     int ChunkSize = G->GetNodes() / 10 / ChunkNum;
00051     if (ChunkSize == 0) { ChunkSize = 1; }
00052     return MLEGradAscentParallel(Thres, MaxIter, ChunkNum, ChunkSize, PlotNm, StepAlpha, StepBeta);
00053   }
00054   //double FindOptimalThres(const TVec<TIntV>& TrueCmtyVV, TVec<TIntV>& CmtyVV);
00055   void Save(TSOut& SOut);
00056   void Load(TSIn& SIn, const int& RndSeed = 0);
00057   double inline GetCom(const int& NID, const int& CID) {
00058     if (F[NID].IsKey(CID)) {
00059       return F[NID].GetDat(CID);
00060     } else {
00061       return 0.0;
00062     }
00063   }
00064   void inline AddCom(const int& NID, const int& CID, const double& Val) {
00065     if (F[NID].IsKey(CID)) {
00066       SumFV[CID] -= F[NID].GetDat(CID);
00067     }
00068     F[NID].AddDat(CID) = Val;
00069     SumFV[CID] += Val;
00070   }
00071 
00072   void inline DelCom(const int& NID, const int& CID) {
00073     if (F[NID].IsKey(CID)) {
00074       SumFV[CID] -= F[NID].GetDat(CID);
00075       F[NID].DelKey(CID);
00076     }
00077   }
00078   double inline DotProduct(const TIntFltH& UV, const TIntFltH& VV) {
00079     double DP = 0;
00080     if (UV.Len() > VV.Len()) {
00081       for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
00082         if (VV.IsKey(HI.GetKey())) { 
00083           DP += VV.GetDat(HI.GetKey()) * HI.GetDat(); 
00084         }
00085       }
00086     } else {
00087       for (TIntFltH::TIter HI = VV.BegI(); HI < VV.EndI(); HI++) {
00088         if (UV.IsKey(HI.GetKey())) { 
00089           DP += UV.GetDat(HI.GetKey()) * HI.GetDat(); 
00090         }
00091       }
00092     }
00093     return DP;
00094   }
00095   double inline DotProduct(const int& UID, const int& VID) {
00096     return DotProduct(F[UID], F[VID]);
00097   }
00098   double inline Prediction(const TIntFltH& FU, const TIntFltH& FV) {
00099     double DP = log (1.0 / (1.0 - PNoCom)) + DotProduct(FU, FV);
00100     IAssertR(DP > 0.0, TStr::Fmt("DP: %f", DP));
00101     return exp(- DP);
00102   }
00103   double inline Prediction(const int& UID, const int& VID) {
00104     return Prediction(F[UID], F[VID]);
00105   }
00106   double inline Sum(const TIntFltH& UV) {
00107     double N = 0.0;
00108     for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
00109       N += HI.GetDat();
00110     }
00111     return N;
00112   }
00113   double inline Norm2(const TIntFltH& UV) {
00114     double N = 0.0;
00115     for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
00116       N += HI.GetDat() * HI.GetDat();
00117     }
00118     return N;
00119   }
00120 };
00121 
00122 
00125 
00126 class TAGMFastUtil {
00127 public:
00128   //static double GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges);
00129   //static double GetConductance(const PNGraph& Graph, const TIntSet& CmtyS, const int Edges);
00130 template<class PGraph>
00131 static double GetConductance(const PGraph& Graph, const TIntSet& CmtyS, const int Edges) {
00132   const bool GraphType = HasGraphFlag(typename PGraph::TObj, gfDirected);
00133   int Edges2;
00134   if (GraphType) { Edges2 = Edges >= 0 ? Edges : Graph->GetEdges(); }
00135   else { Edges2 = Edges >= 0 ? 2 * Edges : Graph->GetEdges(); }
00136   int Vol = 0,  Cut = 0; 
00137   double Phi = 0.0;
00138   for (int i = 0; i < CmtyS.Len(); i++) {
00139     if (! Graph->IsNode(CmtyS[i])) { continue; }
00140     typename PGraph::TObj::TNodeI  NI = Graph->GetNI(CmtyS[i]);
00141     for (int e = 0; e < NI.GetOutDeg(); e++) {
00142       if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; }
00143     }
00144     Vol += NI.GetOutDeg();
00145   }
00146   // get conductance
00147   if (Vol != Edges2) {
00148     if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); }
00149     else if (Vol == 0) { Phi = 0.0; }
00150     else { Phi = Cut / double(Vol); }
00151   } else {
00152     if (Vol == Edges2) { Phi = 1.0; }
00153   }
00154   return Phi;
00155 }
00156 
00157 
00158 template<class PGraph>
00159   static void GenHoldOutPairs(const PGraph& G, TVec<TIntSet>& HoldOutSet, double HOFrac, TRnd& Rnd)  {
00160     TIntPrV EdgeV(G->GetEdges(), 0);
00161     for (typename PGraph::TObj::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
00162       EdgeV.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
00163     }
00164     EdgeV.Shuffle(Rnd);
00165 
00166     const bool GraphType = HasGraphFlag(typename PGraph::TObj, gfDirected);
00167     HoldOutSet.Gen(G->GetNodes());
00168     int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
00169     if (GraphType) { HOTotal *= 2;}
00170     int HOCnt = 0;
00171     int HOEdges = (int) TMath::Round(HOFrac * G->GetEdges());
00172     printf("holding out %d edges...\n", HOEdges);
00173     for (int he = 0; he < (int) HOEdges; he++) {
00174       HoldOutSet[EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
00175       if (! GraphType) { HoldOutSet[EdgeV[he].Val2].AddKey(EdgeV[he].Val1); }
00176       HOCnt++;
00177     }
00178     printf("%d Edges hold out\n", HOCnt);
00179     while(HOCnt++ < HOTotal) {
00180       int SrcNID = Rnd.GetUniDevInt(G->GetNodes());
00181       int DstNID = Rnd.GetUniDevInt(G->GetNodes());
00182       if (SrcNID == DstNID) { continue; }
00183       HoldOutSet[SrcNID].AddKey(DstNID);
00184       if (! GraphType) { HoldOutSet[DstNID].AddKey(SrcNID); }
00185     }
00186   }
00187 
00188 template<class PGraph>
00189   static void GetNbhCom(const PGraph& Graph, const int NID, TIntSet& NBCmtyS) {
00190     typename PGraph::TObj::TNodeI NI = Graph->GetNI(NID);
00191     NBCmtyS.Gen(NI.GetDeg());
00192     NBCmtyS.AddKey(NID);
00193     for (int e = 0; e < NI.GetDeg(); e++) {
00194       NBCmtyS.AddKey(NI.GetNbrNId(e));
00195     }
00196   }
00197 template<class PGraph>
00198   static void GetNIdPhiV(const PGraph& G, TFltIntPrV& NIdPhiV) {
00199     NIdPhiV.Gen(G->GetNodes(), 0);
00200     const int Edges = G->GetEdges();
00201     TExeTm RunTm;
00202     //compute conductance of neighborhood community
00203     for (typename PGraph::TObj::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
00204       TIntSet NBCmty(NI.GetDeg() + 1);
00205       double Phi;
00206       if (NI.GetDeg() < 5) { //do not include nodes with too few degree
00207         Phi = 1.0; 
00208       } else {
00209         TAGMFastUtil::GetNbhCom<PGraph>(G, NI.GetId(), NBCmty);
00210         Phi = TAGMFastUtil::GetConductance(G, NBCmty, Edges);
00211       }
00212       NIdPhiV.Add(TFltIntPr(Phi, NI.GetId()));
00213     }
00214     printf("conductance computation completed [%s]\n", RunTm.GetTmStr());
00215     fflush(stdout);
00216   }
00217 };
00218 
00219 #endif