SNAP Library 2.0, Developer Reference  2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
agm.h
Go to the documentation of this file.
00001 #ifndef snap_agm_h
00002 #define snap_agm_h
00003 #include "Snap.h"
00004 
00007 class TAGM {
00008 public:
00009   static void RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd);
00010   static PUNGraph GenAGM(const TIntV& NIdV , THash<TInt,TIntV >& CmtyVH, const TStr& AGMNm, const double& PiCoef, const double& ProbBase, TRnd& Rnd=TInt::Rnd);
00011   static PUNGraph GenAGM(TVec<TIntV >& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd=TInt::Rnd);
00012   static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd);
00013   static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom = -1.0);
00014 };
00015 
00018 class TAGMUtil {
00019 public:
00020   static void GenPLSeq(TIntV& SzSeq,const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max);
00021   static void ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd);
00022   static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const PUNGraph& Graph, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd);
00023   static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const TIntV& NIDV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd);
00024   static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd);
00025   static void RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd);
00026   static PUNGraph GenAGM(TVec<TIntV >& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd=TInt::Rnd);
00027   static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd);
00028   static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom = -1.0);
00029   static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV);
00030   static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV, const TIntV& NIDV);
00031   static void GetNodeMembership(TIntH& NIDComVH, const THash<TInt,TIntV >& CmtyVH);
00032   static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntSet>& CmtyVV);
00033   static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const THash<TInt,TIntV >& CmtyVH);
00034   static void GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const THash<TInt,TIntV >& CmtyVH);
00035   static void GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const TVec<TIntV >& CmtyVV);
00036   static void GetNodeMembership(TIntH& NIDComVH, const TVec<TIntV >& CmtyVV);
00037   static void LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV);
00038   static void DumpCmtyVV(const TStr& OutFNm, const TVec<TIntV>& CmtyVV);
00039   static void DumpCmtyVV(const TStr OutFNm, TVec<TIntV>& CmtyVV, TIntStrH& NIDNmH);
00040   static int TotalMemberships(const TVec<TIntV>& CmtyVV);
00041   static void RewireCmtyVV(const TVec<TIntV>& CmtyVVIn, TVec<TIntV>& CmtyVVOut, TRnd& Rnd);
00042   static void RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd);
00043   static int Intersection(const TIntV& C1, const TIntV& C2);
00044   static void GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C);
00045   static int Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B);
00046   static double GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges);
00047   static void GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS);
00048   static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz) {
00049     THash<TInt, TStr> TmpH;
00050     SaveGephi(OutFNm, G, CmtyVVAtr, MaxSz, MinSz, TmpH);
00051   }
00052   static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz, const THash<TInt, TStr>& NIDNameH) { 
00053     THash<TInt, TIntTr> TmpH; 
00054     SaveGephi(OutFNm, G, CmtyVVAtr, MaxSz, MinSz, NIDNameH, TmpH);
00055   }
00056   static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz, const THash<TInt, TStr>& NIDNameH, const THash<TInt, TIntTr >& NIDColorH);
00057   static void SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr >& NIDColorH, const THash<TInt, TIntTr >& CIDColorH);
00058   static int FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom = 0.0, const TStr PltFPrx = TStr());
00059   template <class PGraph>
00060   static PGraph LoadEdgeListStr(const TStr& InFNm, TIntStrH& NIDNameH, const int& SrcColId = 0, const int& DstColId = 1, const TSsFmt SsFmt = ssfTabSep) {
00061     TSsParser Ss(InFNm, SsFmt);
00062     PGraph Graph = PGraph::TObj::New();
00063     TStrHash<TInt> StrSet(Mega(1), true);
00064     while (Ss.Next()) {
00065       const int SrcNId = StrSet.AddKey(Ss[SrcColId]);
00066       const int DstNId = StrSet.AddKey(Ss[DstColId]);
00067       if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00068       if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00069       Graph->AddEdge(SrcNId, DstNId);
00070     }
00071     NIDNameH.Gen(StrSet.Len());
00072     for (int s = 0; s < StrSet.Len(); s++) { NIDNameH.AddDat(s, StrSet.GetKey(s)); }
00073     IAssert(NIDNameH.Len() == Graph->GetNodes());
00074 
00075     Graph->Defrag();
00076     return Graph;
00077   }
00078 
00079   template<class PGraph>
00080   static void GVizComGraph(const PGraph& Graph,const TVec<TIntV >& CmtyVV, const TStr& OutFNm, const TStr& Desc = TStr()){
00081     TStrV Colors = TStrV::GetV("red","blue","green","pink","cyan");
00082     TStrV Shapes = TStrV::GetV("ellipse","triangle","square","pentagon","hexagon");
00083     THash<TInt,TIntV> NIDComVH;
00084     GetNodeMembership(NIDComVH,CmtyVV);
00085 
00086     const TStr Ext = OutFNm.GetFExt();
00087     const TStr GraphFNm = OutFNm.GetSubStr(0, OutFNm.Len() - Ext.Len()) + "dot";
00088     const bool IsDir = HasGraphFlag(typename PGraph::TObj, gfDirected);
00089     FILE *F = fopen(GraphFNm.CStr(), "wt");
00090     if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr());
00091     if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); }
00092     fprintf(F, "  graph [splines=false overlap=false]\n"); //size=\"12,10\" ratio=fill
00093     fprintf(F, "  node  [width=0.3, height=0.3]\n");
00094     //nodes
00095     for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00096       int NID = NI.GetId();
00097       TIntV& CIDV = NIDComVH.GetDat(NID);
00098       IAssert(CIDV.Len()>0);
00099       TStr ShapeNm = Shapes[(CIDV.Len()-1) % Shapes.Len()];
00100       TStr ColorNm = Colors[CIDV[0] % Colors.Len()];
00101       TStr NodeComLabel = TStr::Fmt("%d(",NID);
00102       for(int i=0;i<CIDV.Len();i++) {
00103         TStr TmpStr = TStr::Fmt("%d",int(CIDV[i]));
00104         NodeComLabel += TmpStr;
00105         if(i<CIDV.Len()-1){NodeComLabel+=",";}
00106       }
00107       NodeComLabel += ")";
00108       fprintf(F, "  %d [style=filled, shape=\"%s\" fillcolor=\"%s\" label=\"%s\"];\n", NI.GetId(), ShapeNm.CStr(),ColorNm.CStr(), NodeComLabel.CStr()); 
00109     }
00110   
00111     // edges
00112     for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00113       if (NI.GetOutDeg()==0 && NI.GetInDeg()==0  ) { 
00114         fprintf(F, "%d;\n", NI.GetId()); }
00115       else {
00116         for (int e = 0; e < NI.GetOutDeg(); e++) {
00117           if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; }
00118           fprintf(F, "  %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e)); 
00119         }
00120       }
00121     }
00122     if (! Desc.Empty()) {
00123       fprintf(F, "  label = \"\\n%s\\n\";", Desc.CStr());
00124       fprintf(F, "  fontsize=24;\n");
00125     }
00126     fprintf(F, "}\n");
00127     fclose(F);
00128     TSnap::TSnapDetail::GVizDoLayout(GraphFNm, OutFNm, gvlNeato);
00129   }
00130 };
00131 
00132 //Light version of logistic regression (originally in Snap)-- Jaewon Yang, 04/07/12
00133 // forward
00134 class TLogRegPredict;
00135 typedef TPt<TLogRegPredict> PLogRegPredict;
00136 
00138 // Logistic Regression using gradient descent
00139 // X: N * M matrix where N = number of examples and M = number of features.
00140 //J: JAEWON THIS HAS NOTHING TO DO WITH AGM. THIS NEEDS TO BE MOVED SOMEWHERE ELSE (glib/xmath.h)
00141 //J: BUT I THINK THERE IS ALREADY A LOGISTIC REGRESSION CLASS THERE!!
00142 class TLogRegFit {
00143 private:
00144   TVec<TFltV> X;
00145   TFltV Y;
00146   TFltV Theta;
00147   int M; // number of features
00148 public:
00149   TLogRegFit() {}
00150   ~TLogRegFit() {}
00151   // returns model for logistic regression
00152   PLogRegPredict CalcLogRegGradient(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm = TStr(), const double& ChangeEps = 0.01, const int& MaxStep = 200, const bool InterceptPt = false);
00153   PLogRegPredict CalcLogRegNewton(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm = TStr(), const double& ChangeEps = 0.01, const int& MaxStep = 200, const bool InterceptPt = false);
00154   int MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm);
00155   int MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm);
00156   double GetStepSizeByLineSearch(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta);
00157   double Likelihood(const TFltV& NewTheta);
00158   double Likelihood() { return Likelihood(Theta); }
00159   void Gradient(TFltV& GradV);
00160   void Hessian(TFltVV& HVV);
00161   void GetNewtonStep(TFltVV& HVV, const TFltV& GradV, TFltV& DeltaLV);
00162 };
00163 
00164 
00166 // Logistic-Regression-Model
00167 //J: JAEWON THIS HAS NOTHING TO DO WITH AGM. THIS NEEDS TO BE MOVED SOMEWHERE ELSE (glib/xmath.h)
00168 //J: BUT I THINK THERE IS ALREADY A LOGISTIC REGRESSION CLASS THERE!!
00169 class TLogRegPredict {
00170 private: 
00171     TCRef CRef;
00172 private:
00173     TFltV Theta;
00174 public:
00175     TLogRegPredict(const TFltV& _bb): Theta(_bb) { };
00176     //static PLogRegMd New(PSparseTrainSet Set) { return TLogReg::CalcLogReg(Set); }; //TODO
00177 
00178     TLogRegPredict(TSIn& SIn) { Theta.Load(SIn); };
00179     static PLogRegPredict Load(TSIn& SIn) { return new TLogRegPredict(SIn); };
00180     void Save(TSOut& SOut) const { Theta.Save(SOut); };
00181 
00182     UndefDefaultCopyAssign(TLogRegPredict);
00183 public:    
00184     // classifies vector, returns probability that AttrV is positive
00185   static void GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta);
00186   static double GetCfy(const TFltV& AttrV, const TFltV& NewTheta);
00187   double GetCfy(const TFltV& AttrV) { return GetCfy(AttrV, Theta); }
00188   void GetTheta(TFltV& _Theta) { _Theta = Theta; }
00189   void GetCfy(const TVec<TFltV>& X, TFltV& OutV) { GetCfy(X, OutV, Theta); }
00190   void PrintTheta() { for (int t = 0; t < Theta.Len(); t++) { printf("Theta[%d] = %f\n", t, Theta[t].Val); } }
00191   friend class TPt<TLogRegPredict>;
00192 };
00193 
00194 #endif