SNAP Library 2.2, Developer Reference  2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
agm.h
Go to the documentation of this file.
00001 #ifndef snap_agm_h
00002 #define snap_agm_h
00003 #include "Snap.h"
00004 
00007 class TAGM {
00008 public:
00009   static void RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd);
00010   static PUNGraph GenAGM(const TIntV& NIdV , THash<TInt,TIntV >& CmtyVH, const TStr& AGMNm, const double& PiCoef, const double& ProbBase, TRnd& Rnd=TInt::Rnd);
00011   static PUNGraph GenAGM(TVec<TIntV >& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd=TInt::Rnd);
00012   static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd);
00013   static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom = -1.0);
00014 };
00015 
00018 class TAGMUtil {
00019 public:
00020   static void GenPLSeq(TIntV& SzSeq,const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max);
00021   static void ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd);
00022   static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const PUNGraph& Graph, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd);
00023   static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const TIntV& NIDV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd);
00024   static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd);
00025   static void RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd);
00026   static PUNGraph GenAGM(TVec<TIntV >& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd=TInt::Rnd);
00027   static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd);
00028   static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom = -1.0);
00029   static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV);
00030   static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV, const TIntV& NIDV);
00031   static void GetNodeMembership(TIntH& NIDComVH, const THash<TInt,TIntV >& CmtyVH);
00032   static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntSet>& CmtyVV);
00033   static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const THash<TInt,TIntV >& CmtyVH);
00034   static void GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const THash<TInt,TIntV >& CmtyVH);
00035   static void GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const TVec<TIntV >& CmtyVV);
00036   static void GetNodeMembership(TIntH& NIDComVH, const TVec<TIntV >& CmtyVV);
00037   static void LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV);
00038   static void LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV, TStrHash<TInt>& StrToNIdH, const int BeginCol, const int MinSz = 3, const TSsFmt Sep = ssfTabSep);
00039   static void DumpCmtyVV(const TStr& OutFNm, const TVec<TIntV>& CmtyVV);
00040   static void DumpCmtyVV(const TStr OutFNm, TVec<TIntV>& CmtyVV, TIntStrH& NIDNmH);
00041   static int TotalMemberships(const TVec<TIntV>& CmtyVV);
00042   static void RewireCmtyVV(const TVec<TIntV>& CmtyVVIn, TVec<TIntV>& CmtyVVOut, TRnd& Rnd);
00043   static void RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd);
00044   static int Intersection(const TIntV& C1, const TIntV& C2);
00045   static void GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C);
00046   static int Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B);
00047   static double GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges);
00048   static void GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS);
00049   static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz) {
00050     THash<TInt, TStr> TmpH;
00051     SaveGephi(OutFNm, G, CmtyVVAtr, MaxSz, MinSz, TmpH);
00052   }
00053   static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz, const THash<TInt, TStr>& NIDNameH) { 
00054     THash<TInt, TIntTr> TmpH; 
00055     SaveGephi(OutFNm, G, CmtyVVAtr, MaxSz, MinSz, NIDNameH, TmpH);
00056   }
00057   static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz, const THash<TInt, TStr>& NIDNameH, const THash<TInt, TIntTr >& NIDColorH);
00058   static void SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr >& NIDColorH, const THash<TInt, TIntTr >& CIDColorH);
00059   static int FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom = 0.0, const TStr PltFPrx = TStr());
00060   template <class PGraph>
00061   static PGraph LoadEdgeListStr(const TStr& InFNm, TIntStrH& NIDNameH, const int& SrcColId = 0, const int& DstColId = 1, const TSsFmt SsFmt = ssfTabSep) {
00062     TSsParser Ss(InFNm, SsFmt);
00063     PGraph Graph = PGraph::TObj::New();
00064     TStrHash<TInt> StrSet(Mega(1), true);
00065     while (Ss.Next()) {
00066       const int SrcNId = StrSet.AddKey(Ss[SrcColId]);
00067       const int DstNId = StrSet.AddKey(Ss[DstColId]);
00068       if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00069       if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00070       Graph->AddEdge(SrcNId, DstNId);
00071     }
00072     NIDNameH.Gen(StrSet.Len());
00073     for (int s = 0; s < StrSet.Len(); s++) { NIDNameH.AddDat(s, StrSet.GetKey(s)); }
00074     IAssert(NIDNameH.Len() == Graph->GetNodes());
00075 
00076     Graph->Defrag();
00077     return Graph;
00078   }
00079 
00080   template<class PGraph>
00081   static void GVizComGraph(const PGraph& Graph,const TVec<TIntV >& CmtyVV, const TStr& OutFNm, const TStr& Desc = TStr()){
00082     TStrV Colors = TStrV::GetV("red","blue","green","pink","cyan");
00083     TStrV Shapes = TStrV::GetV("ellipse","triangle","square","pentagon","hexagon");
00084     THash<TInt,TIntV> NIDComVH;
00085     GetNodeMembership(NIDComVH,CmtyVV);
00086 
00087     const TStr Ext = OutFNm.GetFExt();
00088     const TStr GraphFNm = OutFNm.GetSubStr(0, OutFNm.Len() - Ext.Len()) + "dot";
00089     const bool IsDir = HasGraphFlag(typename PGraph::TObj, gfDirected);
00090     FILE *F = fopen(GraphFNm.CStr(), "wt");
00091     if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr());
00092     if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); }
00093     fprintf(F, "  graph [splines=false overlap=false]\n"); //size=\"12,10\" ratio=fill
00094     fprintf(F, "  node  [width=0.3, height=0.3]\n");
00095     //nodes
00096     for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00097       int NID = NI.GetId();
00098       TIntV& CIDV = NIDComVH.GetDat(NID);
00099       IAssert(CIDV.Len()>0);
00100       TStr ShapeNm = Shapes[(CIDV.Len()-1) % Shapes.Len()];
00101       TStr ColorNm = Colors[CIDV[0] % Colors.Len()];
00102       TStr NodeComLabel = TStr::Fmt("%d(",NID);
00103       for(int i=0;i<CIDV.Len();i++) {
00104         TStr TmpStr = TStr::Fmt("%d",int(CIDV[i]));
00105         NodeComLabel += TmpStr;
00106         if(i<CIDV.Len()-1){NodeComLabel+=",";}
00107       }
00108       NodeComLabel += ")";
00109       fprintf(F, "  %d [style=filled, shape=\"%s\" fillcolor=\"%s\" label=\"%s\"];\n", NI.GetId(), ShapeNm.CStr(),ColorNm.CStr(), NodeComLabel.CStr()); 
00110     }
00111   
00112     // edges
00113     for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00114       if (NI.GetOutDeg()==0 && NI.GetInDeg()==0  ) { 
00115         fprintf(F, "%d;\n", NI.GetId()); }
00116       else {
00117         for (int e = 0; e < NI.GetOutDeg(); e++) {
00118           if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; }
00119           fprintf(F, "  %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e)); 
00120         }
00121       }
00122     }
00123     if (! Desc.Empty()) {
00124       fprintf(F, "  label = \"\\n%s\\n\";", Desc.CStr());
00125       fprintf(F, "  fontsize=24;\n");
00126     }
00127     fprintf(F, "}\n");
00128     fclose(F);
00129     TSnap::TSnapDetail::GVizDoLayout(GraphFNm, OutFNm, gvlNeato);
00130   }
00131 };
00132 
00133 //Light version of logistic regression (originally in Snap)-- Jaewon Yang, 04/07/12
00134 // forward
00135 class TLogRegPredict;
00136 typedef TPt<TLogRegPredict> PLogRegPredict;
00137 
00139 // Logistic Regression using gradient descent
00140 // X: N * M matrix where N = number of examples and M = number of features.
00141 //J: JAEWON THIS HAS NOTHING TO DO WITH AGM. THIS NEEDS TO BE MOVED SOMEWHERE ELSE (glib/xmath.h)
00142 //J: BUT I THINK THERE IS ALREADY A LOGISTIC REGRESSION CLASS THERE!!
00143 class TLogRegFit {
00144 private:
00145   TVec<TFltV> X;
00146   TFltV Y;
00147   TFltV Theta;
00148   int M; // number of features
00149 public:
00150   TLogRegFit() {}
00151   ~TLogRegFit() {}
00152   // returns model for logistic regression
00153   PLogRegPredict CalcLogRegGradient(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm = TStr(), const double& ChangeEps = 0.01, const int& MaxStep = 200, const bool InterceptPt = false);
00154   PLogRegPredict CalcLogRegNewton(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm = TStr(), const double& ChangeEps = 0.01, const int& MaxStep = 200, const bool InterceptPt = false);
00155   int MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm);
00156   int MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm);
00157   double GetStepSizeByLineSearch(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta);
00158   double Likelihood(const TFltV& NewTheta);
00159   double Likelihood() { return Likelihood(Theta); }
00160   void Gradient(TFltV& GradV);
00161   void Hessian(TFltVV& HVV);
00162   void GetNewtonStep(TFltVV& HVV, const TFltV& GradV, TFltV& DeltaLV);
00163 };
00164 
00165 
00167 // Logistic-Regression-Model
00168 //J: JAEWON THIS HAS NOTHING TO DO WITH AGM. THIS NEEDS TO BE MOVED SOMEWHERE ELSE (glib/xmath.h)
00169 //J: BUT I THINK THERE IS ALREADY A LOGISTIC REGRESSION CLASS THERE!!
00170 class TLogRegPredict {
00171 private: 
00172     TCRef CRef;
00173 private:
00174     TFltV Theta;
00175 public:
00176     TLogRegPredict(const TFltV& _bb): Theta(_bb) { };
00177     //static PLogRegMd New(PSparseTrainSet Set) { return TLogReg::CalcLogReg(Set); }; //TODO
00178 
00179     TLogRegPredict(TSIn& SIn) { Theta.Load(SIn); };
00180     static PLogRegPredict Load(TSIn& SIn) { return new TLogRegPredict(SIn); };
00181     void Save(TSOut& SOut) const { Theta.Save(SOut); };
00182 
00183     UndefDefaultCopyAssign(TLogRegPredict);
00184 public:    
00185     // classifies vector, returns probability that AttrV is positive
00186   static void GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta);
00187   static double GetCfy(const TFltV& AttrV, const TFltV& NewTheta);
00188   double GetCfy(const TFltV& AttrV) { return GetCfy(AttrV, Theta); }
00189   void GetTheta(TFltV& _Theta) { _Theta = Theta; }
00190   void GetCfy(const TVec<TFltV>& X, TFltV& OutV) { GetCfy(X, OutV, Theta); }
00191   void PrintTheta() { for (int t = 0; t < Theta.Len(); t++) { printf("Theta[%d] = %f\n", t, Theta[t].Val); } }
00192   friend class TPt<TLogRegPredict>;
00193 };
00194 
00195 #endif