SNAP Library, User Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ncp.h
Go to the documentation of this file.
00001 #ifndef snap_network_community_profile_h
00002 #define snap_network_community_profile_h
00003 
00004 #include "Snap.h"
00005 
00006 class TLocClust;
00007 class TLocClustStat;
00008 class TNcpGraphsBase;
00009 
00012 
00024 class TLocClust {
00025 public:
00026   static bool Verbose;
00027 private:
00028   PUNGraph Graph;
00029   int Nodes, Edges2;       // Nodes, 2*edges in Graph
00030   TIntFltH ProbH, ResH;
00031   TIntQ NodeQ;
00032   double Alpha;            // PageRank jump probability (smaller Alpha diffuses the mass farther away)
00033   int SeedNId;             // Seed node
00034   // volume, cut size, node ids, conductances
00035   TIntV NIdV, VolV, CutV;  // Vol=2*edges_inside+cut (vol = sum of the degrees)
00036   TFltV PhiV;              // Conductance
00037   int BestCutIdx;          // Index K to vectors where the conductance of the bounding cut (PhiV[K]) achieves its minimum
00038 private:
00040   const TIntV& GetNIdV() const { return NIdV; }
00042   const TIntV& GetVolV() const { return VolV; } 
00044   const TIntV& GetCutV() const { return CutV; } 
00046   const TFltV& GetPhiV() const { return PhiV; } 
00047 public:
00048   TLocClust(const PUNGraph& GraphPt, const double& AlphaVal) :
00049     Graph(GraphPt), Nodes(GraphPt->GetNodes()), Edges2(2*GraphPt->GetEdges()), Alpha(AlphaVal) { }
00051   int Len() const { return GetRndWalkSup(); }
00053   int GetRndWalkSup() const { return VolV.Len(); }
00054 
00056   int GetNId(const int& NodeN) const { return NIdV[NodeN]; }
00058 
00060   int GetVol(const int& Nodes) const { return VolV[Nodes]; }
00062 
00064   int GetCut(const int& Nodes) const { return CutV[Nodes]; }
00066 
00068   double GetPhi(const int& ValId) const { return PhiV[ValId]; }
00069 
00071 
00073   int BestCut() const { return BestCutIdx; }
00075   int BestCutNodes() const { return BestCutIdx+1; }
00077   int GetCutEdges() const { return GetCut(BestCut()); }
00079   int GetCutVol() const { return GetVol(BestCut()); }
00081   double GetCutPhi() const { return GetPhi(BestCut()); }
00082 
00084 
00086   int ApproxPageRank(const int& SeedNode, const double& Eps);
00088   void SupportSweep();
00090 
00093   void FindBestCut(const int& SeedNode, const int& ClustSz, const double& MinSizeFrac=0.2);
00094 
00096   void PlotVolDistr(const TStr& OutFNm, TStr Desc=TStr()) const;
00098   void PlotCutDistr(const TStr& OutFNm, TStr Desc=TStr()) const;
00100   void PlotPhiDistr(const TStr& OutFNm, TStr Desc=TStr()) const;
00102   void SavePajek(const TStr& OutFNm) const;
00103 
00105   static void DrawWhiskers(const PUNGraph& Graph, TStr FNmPref, const int& PlotN);
00107   static void GetCutStat(const PUNGraph& Graph, const TIntV& NIdV, int& Vol, int& Cut, double& Phi, int GraphEdges=-1);
00109   static void GetCutStat(const PUNGraph& Graph, const TIntSet& NIdSet, int& Vol, int& Cut, double& Phi, int GraphEdges=-1);
00112 
00117   static void PlotNCP(const PUNGraph& Graph, const TStr& FNm, const TStr Desc="", const bool& BagOfWhiskers=true, const bool& RewireNet=false,
00118     const int& KMin=10, const int& KMax=Mega(100), const int& Coverage=10, const bool& SaveTxtStat=false, const bool& PlotBoltzman=false);
00119 
00120   friend class TLocClustStat;
00121 };
00122 
00125 class TLocClustStat {
00126 public:
00127   static double BinFactor;
00128   static int TakeValAt;
00129 public:
00130   class TNodeSweep {
00131   public:
00132     TInt SeedNId;
00133     TIntV SweepV;  // nodes inside the cut: cut of size k has node ids CutV[0...k-1]
00134     TFltV PhiV;    // conductance at the cut
00135   public:
00136     TNodeSweep() {}
00137     TNodeSweep(const int& SeedNode, const TIntV& SweepNIdV, const TFltV& PhiNIdV) :
00138       SeedNId(SeedNode), SweepV(SweepNIdV), PhiV(PhiNIdV) { IAssert(SweepV.Len()==PhiV.Len()); }
00139     TNodeSweep(const TNodeSweep& NS) : SeedNId(NS.SeedNId), SweepV(NS.SweepV), PhiV(NS.PhiV) { }
00140     int Len() const { return SweepV.Len(); }
00141     int GetSeed() const { return SeedNId; }
00142     int NId(const int i) const { return SweepV[i]; }
00143     double Phi(const int i) const { return PhiV[i]; }
00144     bool operator < (const TNodeSweep& CS) const { return Len() < CS.Len(); }
00145   };
00146 
00147   class TCutInfo {
00148   public:
00149     TInt Nodes, Edges, CutSz; // nodes inside, edges inside, edges cut
00150     TIntV CutNIdV;            // node ids inside the cluster
00151   public:
00152     TCutInfo() : Nodes(0), Edges(0), CutSz(0), CutNIdV() { }
00153     TCutInfo(const int& ClustNodes, const int& EdgesInside, const int& CutSize) : Nodes(ClustNodes), Edges(EdgesInside), CutSz(CutSize) { }
00154     TCutInfo(const int& ClustNodes, const int& EdgesInside, const int& CutSize, const TIntV& NIdV) :
00155       Nodes(ClustNodes), Edges(EdgesInside), CutSz(CutSize), CutNIdV(NIdV) { }
00156     TCutInfo(const PUNGraph& G, const TIntV& ClustNIdV, bool TakeNIdV=false) : Nodes(ClustNIdV.Len()) {
00157       TSnap::GetEdgesInOut(G, ClustNIdV, Edges.Val, CutSz.Val); if(TakeNIdV){CutNIdV=ClustNIdV;} }
00158     TCutInfo(const TCutInfo& CS) : Nodes(CS.Nodes), Edges(CS.Edges), CutSz(CS.CutSz), CutNIdV(CS.CutNIdV) { }
00159     int GetNodes() const { return Nodes; }
00160     int GetEdges() const { return Edges; }
00161     int GetVol() const { return 2*Edges+CutSz; }
00162     int GetCutSz() const { return CutSz; }
00163     // community score measures (lower is better)
00164     double GetPhi() const { return double(CutSz)/double(2*Edges+CutSz); }                                     // conductance
00165     double GetExpansion() const { return Nodes<2 ? 1.0 : double(CutSz)/double(Nodes); }                       // expansion
00166     double GetIntDens() const { return 1.0 - ((Nodes<2) ? 0 : 2.0*double(Edges)/double(Nodes*(Nodes-1))); }   // internal density
00167     double GetCutRatio(const int& GNodes) const { return double(CutSz)/double(Nodes*(GNodes-Nodes)); }        // cut ratio (external density)
00168     double GetNormCut(const int& GEdges) const { return GetPhi() + double(CutSz)/double(2*GEdges-GetVol()); } // normalized cut
00169     double GetFracDegOut(const PUNGraph& Graph, double& MxFrac, double& AvgFrac, double& MedianFrac, double& Pct9Frac, double& Flake) const; // fraction of node's edges pointing outside the cluster
00170     double GetModular(const int& GEdges) const { return (2.0*Edges - GetExpEdgesIn(GEdges)) / (2.0*GEdges); } // modularity
00171     double GetModRat(const int& GEdges) const { return (2.0*Edges) / GetExpEdgesIn(GEdges); }                 // modularity ratio
00172     double GetExpEdgesIn(const int& GEdges) const { return TMath::Sqr(2.0*Edges+CutSz)/(2.0*GEdges); }        // expected edges inside (sum of degrees on nodes inside)^2/(2*E)
00173     bool operator < (const TCutInfo& CS) const { return GetPhi() < CS.GetPhi(); }
00174   };
00175 
00176 private:
00177   // parameters
00178   TFlt Alpha, SizeFrac, KFac;
00179   TInt KMin, KMax, Coverage;
00180   PUNGraph Graph; // set at ::Run()
00181 //private:
00182 public:
00183   TVec<TNodeSweep> SweepsV;       // node ids and conductances for each run of local clustering
00184   THash<TInt, TFltV> SizePhiH;    // all conductances at cluster size K
00185   THash<TInt, TCutInfo> BestCutH; // best cut (min conductance) at size K (edges inside, edges cut)
00186   TFltPrV BagOfWhiskerV;          // (size, conductance) for bag of whiskers
00187   TFltPr BestWhisk;               // best whisker (whisker with largest volume), (size, conductance)
00188   TCutInfo BestCut;               // best over-all cut
00189   TIntSet SizeBucketSet;          // for exponential bucketing (only indexes BestCutH at positions in BucketSet have CutNIdV filled)
00190 public:
00191   TLocClustStat(const double& _Alpha, const int& _KMin, const int& _KMax, const double& _KFac, const int& _Coverage, const double& _SizeFrac);
00192   void Save(TSOut& SOut) const;
00193   void Clr();
00194   void SetGraph(const PUNGraph& GraphPt) { Graph=GraphPt; }
00195   void Run(const PUNGraph& Graph, const bool& SaveAllSweeps=false, const bool& SaveAllCond=false, const bool& SaveBestNodesAtK=false);
00196   void AddBagOfWhiskers();
00197   void AddCut(const TIntV& NIdV);
00198   void AddCut(const int& ClustSz, const double& Phi);
00199   void AddToBestCutH(const PUNGraph& Graph, const TIntV& NIdV, const bool& AddBestCond=true);
00200 
00201   double FindBestCut(const int& Nodes, const TIntSet& TabuNIdSet, int& BestCutId) const;
00202   const TCutInfo& FindBestCut(const int& Nodes=-1) const;
00203   double FindBestCut(const int& Nodes, TIntV& ClustNIdV) const;
00204   int FindBestCut(const int& Nodes, int& Vol, int& Cut, double& Phi) const;
00205 
00206   const TCutInfo& GetBestCut() const { return BestCut; } // overall best cut
00207   int GetCuts() const { return BestCutH.Len(); }
00208   const TCutInfo& GetKNodeCut(const int& Nodes) const { return BestCutH.GetDat(Nodes); }
00209   const TCutInfo& GetCutN(const int& N) const { return BestCutH[N]; }
00210 
00211   int BestWhiskNodes() const { return int(BestWhisk.Val1.Val); }
00212   int BestWhiskEdges() const { return (int)TMath::Round(1.0/BestWhisk.Val2.Val)/2-1; }
00213   double BestWhiskPhi() const { return BestWhisk.Val2; }
00214   TFltPr GetBestWhisk() const { return BestWhisk; }
00215   const TFltPrV& GetBagOfWhiskersV() const { return BagOfWhiskerV; }
00216   void GetCurveStat(TFltPrV& MeanV, TFltPrV& MedV, TFltPrV& Dec1V, TFltPrV& MinV, TFltPrV& MaxV) const;
00217   void GetBoltzmanCurveStat(const TFltV& TempV, TVec<TFltPrV>& NcpV) const;
00218 
00219   TStr ParamStr() const;
00220   void PlotNCP(const TStr& OutFNm, TStr Desc=TStr()) const; // lower-envelope of conductance
00221   void PlotNCPModul(const TStr& OutFNm, TStr Desc=TStr()) const; // NCP but with modularity on Y-axis
00222   void PlotNcpTop10(const TStr& OutFNm, TStr Desc, const int& TakeMinN) const;
00223   void PlotPhiInOut(const TStr& OutFNm, TStr Desc=TStr()) const; // conductance on the boundary / counductance inside
00224   void PlotBestClustDens(TStr OutFNm, TStr Desc=TStr()) const; // plot edges inside, cut size, conductance
00225   void PlotNCPScatter(const TStr& OutFNm, TStr Desc=TStr()) const; // all different conducances of all sizes (not just min)
00226   void PlotPhiDistr(const int& CmtySz, const TStr& OutFNm, TStr Desc=TStr()) const; // histogram of conductances for a fixed CmtySz
00227   void PlotBoltzmanCurve(const TStr& OutFNm, TStr Desc=TStr()) const;
00228 
00229   void ImposeNCP(const TLocClustStat& LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const;
00230   void ImposeNCP(const TLocClustStat& LcStat2, const TLocClustStat& LcStat3, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2, TStr Desc3) const;
00231   void SaveTxtInfo(const TStr& OutFNmPref, const TStr& Desc, const bool& SetMaxAt1) const;
00232 
00233   static void BagOfWhiskers(const PUNGraph& Graph, TFltPrV& SizePhiV, TFltPr& BestWhisk);
00234   static void BagOfWhiskers2(const PUNGraph& Graph, TFltPrV& SizePhiV);
00235   static void MakeExpBins(const TFltPrV& ValV, TFltPrV& NewV);
00236   static void MakeExpBins(const TFltV& ValV, TFltV& NewV);
00237 };
00238 
00241 class TNcpGraphsBase {
00242 private:
00243   TStrV GNmV;
00244   TFltV ParamValV ;
00245   TIntPrV GSizeV;
00246   TFltPrV WhiskerV, RewWhiskerV; // (size, conductance)
00247   TVec<TFltPrV> NcpV;
00248   TVec<TFltPrV> RewNcpV;
00249   TVec<TFltPrV> WhiskNcpV;
00250 public:
00251   TNcpGraphsBase(const TStr& FNmWc);
00252   TNcpGraphsBase(TSIn& SIn);
00253   void Save(TSOut& SOut) const;
00254   void Impose(const TStr& OutFNm, const int& TopN, const bool& Smooth);
00255   double GetXAtMinY(const TFltPrV& Ncp, const int& NNodes);
00256   TFltPr GetXYAtMinY(const TFltPrV& Ncp, const int& NNodes);
00257   void PlotNcpMin(const TStr& OutFNm, const bool& VsGraphN=false);
00258   void SaveTxtNcpMin(const TStr& OutFNm, const bool& VsGraphN=false);
00259   void PlotRewNcpMin(const TStr& OutFNm, const bool& VsGraphN=false);
00260   void PlotBestWhisker(const TStr& OutFNm, const bool& VsGraphN=false);
00261   void PlotRewBestWhisker(const TStr& OutFNm, const bool& VsGraphN=false);
00262   void PlotAvgNcp(const TStr& OutFNm, const TVec<TFltPrV>& NcpVec, const int& MinSz, const double& MaxMinY);
00263   void SaveTxt(const TStr& OutFNm);
00264   static void PlotDataset(const TStr& InFNmWc, const TStr& OutFNm, const bool& ImposeNcp=false, const bool& VsGraphN=false);
00265 };
00266 
00267 #endif