SNAP Library 2.1, Developer Reference  2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ncp.h
Go to the documentation of this file.
00001 #ifndef snap_network_community_profile_h
00002 #define snap_network_community_profile_h
00003 
00004 #include "Snap.h"
00005 
00006 class TLocClust;
00007 class TLocClustStat;
00008 class TNcpGraphsBase;
00009 
00010 //#///////////////////////////////////////////////
00012 
00024 class TLocClust {
00025 public:
00026   static bool Verbose;
00027 private:
00028   PUNGraph Graph;
00029   int Nodes, Edges2;       // Nodes, 2*edges in Graph
00030   TIntFltH ProbH, ResH;
00031   TIntQ NodeQ;
00032   double Alpha;            // PageRank jump probability (smaller Alpha diffuses the mass farther away)
00033   int SeedNId;             // Seed node
00034   // volume, cut size, node ids, conductances
00035   TIntV NIdV, VolV, CutV;  // Vol=2*edges_inside+cut (vol = sum of the degrees)
00036   TFltV PhiV;              // Conductance
00037   int BestCutIdx;          // Index K to vectors where the conductance of the bounding cut (PhiV[K]) achieves its minimum
00038 public:
00039   TLocClust(const PUNGraph& GraphPt, const double& AlphaVal) :
00040     Graph(GraphPt), Nodes(GraphPt->GetNodes()), Edges2(2*GraphPt->GetEdges()), Alpha(AlphaVal) { }
00042   int Len() const { return GetRndWalkSup(); }
00044   int GetRndWalkSup() const { return VolV.Len(); }
00045 
00047   int GetNId(const int& NodeN) const { return NIdV[NodeN]; }
00049 
00051   int GetVol(const int& Nodes) const { return VolV[Nodes]; }
00053 
00055   int GetCut(const int& Nodes) const { return CutV[Nodes]; }
00057 
00059   double GetPhi(const int& ValId) const { return PhiV[ValId]; }
00060 
00062   const TIntV& GetNIdV() const { return NIdV; }
00064   const TIntV& GetVolV() const { return VolV; } 
00066   const TIntV& GetCutV() const { return CutV; } 
00068   const TFltV& GetPhiV() const { return PhiV; } 
00069 
00071 
00073   int BestCut() const { return BestCutIdx; }
00075   int BestCutNodes() const { return BestCutIdx+1; }
00077   int GetCutEdges() const { return GetCut(BestCut()); }
00079   int GetCutVol() const { return GetVol(BestCut()); }
00081   double GetCutPhi() const { return GetPhi(BestCut()); }
00082 
00084 
00086   int ApproxPageRank(const int& SeedNode, const double& Eps);
00088   void SupportSweep();
00090 
00093   void FindBestCut(const int& SeedNode, const int& ClustSz, const double& MinSizeFrac=0.2);
00094 
00096   void PlotVolDistr(const TStr& OutFNm, TStr Desc=TStr()) const;
00098   void PlotCutDistr(const TStr& OutFNm, TStr Desc=TStr()) const;
00100   void PlotPhiDistr(const TStr& OutFNm, TStr Desc=TStr()) const;
00102   void SavePajek(const TStr& OutFNm) const;
00103 
00105   static void DrawWhiskers(const PUNGraph& Graph, TStr FNmPref, const int& PlotN);
00107   static void GetCutStat(const PUNGraph& Graph, const TIntV& NIdV, int& Vol, int& Cut, double& Phi, int GraphEdges=-1);
00109   static void GetCutStat(const PUNGraph& Graph, const TIntSet& NIdSet, int& Vol, int& Cut, double& Phi, int GraphEdges=-1);
00112 
00117   static void PlotNCP(const PUNGraph& Graph, const TStr& FNm, const TStr Desc="", const bool& BagOfWhiskers=true, const bool& RewireNet=false,
00118     const int& KMin=10, const int& KMax=Mega(100), const int& Coverage=10, const bool& SaveTxtStat=false, const bool& PlotBoltzman=false);
00119 
00120   friend class TLocClustStat;
00121 };
00122 
00123 //#///////////////////////////////////////////////
00125 class TLocClustStat {
00126 public:
00127   static double BinFactor;
00128   static int TakeValAt;
00129 public:
00130   class TNodeSweep {
00131   public:
00132     TInt SeedNId;
00133     TIntV SweepV;  // nodes inside the cut: cut of size k has node ids CutV[0...k-1]
00134     TFltV PhiV;    // conductance at the cut
00135   public:
00136     TNodeSweep() {}
00137     TNodeSweep(const int& SeedNode, const TIntV& SweepNIdV, const TFltV& PhiNIdV) :
00138       SeedNId(SeedNode), SweepV(SweepNIdV), PhiV(PhiNIdV) { IAssert(SweepV.Len()==PhiV.Len()); }
00139     TNodeSweep(const TNodeSweep& NS) : SeedNId(NS.SeedNId), SweepV(NS.SweepV), PhiV(NS.PhiV) { }
00140     int Len() const { return SweepV.Len(); }
00141     int GetSeed() const { return SeedNId; }
00142     int NId(const int i) const { return SweepV[i]; }
00143     double Phi(const int i) const { return PhiV[i]; }
00144     bool operator < (const TNodeSweep& CS) const { return Len() < CS.Len(); }
00145   };
00146 
00147   class TCutInfo {
00148   public:
00149     TInt Nodes, Edges, CutSz; // nodes inside, edges inside, edges cut
00150     TIntV CutNIdV;            // node ids inside the cluster
00151   public:
00152     TCutInfo() : Nodes(0), Edges(0), CutSz(0), CutNIdV() { }
00153     TCutInfo(const int& ClustNodes, const int& EdgesInside, const int& CutSize) : Nodes(ClustNodes), Edges(EdgesInside), CutSz(CutSize) { }
00154     TCutInfo(const int& ClustNodes, const int& EdgesInside, const int& CutSize, const TIntV& NIdV) :
00155       Nodes(ClustNodes), Edges(EdgesInside), CutSz(CutSize), CutNIdV(NIdV) { }
00156     TCutInfo(const PUNGraph& G, const TIntV& ClustNIdV, bool TakeNIdV=false) : Nodes(ClustNIdV.Len()) {
00157       TSnap::GetEdgesInOut(G, ClustNIdV, Edges.Val, CutSz.Val); if(TakeNIdV){CutNIdV=ClustNIdV;} }
00158     TCutInfo(const TCutInfo& CS) : Nodes(CS.Nodes), Edges(CS.Edges), CutSz(CS.CutSz), CutNIdV(CS.CutNIdV) { }
00159     int GetNodes() const { return Nodes; }
00160     int GetEdges() const { return Edges; }
00161     int GetVol() const { return 2*Edges+CutSz; }
00162     int GetCutSz() const { return CutSz; }
00163     // community score measures (lower is better)
00164     double GetPhi() const { return double(CutSz)/double(2*Edges+CutSz); }                                     // conductance
00165     double GetExpansion() const { return Nodes<2 ? 1.0 : double(CutSz)/double(Nodes); }                       // expansion
00166     double GetIntDens() const { return 1.0 - ((Nodes<2) ? 0 : 2.0*double(Edges)/double(Nodes*(Nodes-1))); }   // internal density
00167     double GetCutRatio(const int& GNodes) const { return double(CutSz)/double(Nodes*(GNodes-Nodes)); }        // cut ratio (external density)
00168     double GetNormCut(const int& GEdges) const { return GetPhi() + double(CutSz)/double(2*GEdges-GetVol()); } // normalized cut
00169     double GetFracDegOut(const PUNGraph& Graph, double& MxFrac, double& AvgFrac, double& MedianFrac, double& Pct9Frac, double& Flake) const; // fraction of node's edges pointing outside the cluster
00170     double GetModular(const int& GEdges) const { return (2.0*Edges - GetExpEdgesIn(GEdges)) / (2.0*GEdges); } // modularity
00171     double GetModRat(const int& GEdges) const { return (2.0*Edges) / GetExpEdgesIn(GEdges); }                 // modularity ratio
00172     double GetExpEdgesIn(const int& GEdges) const { return TMath::Sqr(2.0*Edges+CutSz)/(2.0*GEdges); }        // expected edges inside (sum of degrees on nodes inside)^2/(2*E)
00173     bool operator < (const TCutInfo& CS) const { return GetPhi() < CS.GetPhi(); }
00174   };
00175 
00176 private:
00177   // parameters
00178   TFlt Alpha, SizeFrac, KFac;
00179   TInt KMin, KMax, Coverage;
00180   PUNGraph Graph; // set at ::Run()
00181 //private:
00182 public:
00183   TVec<TNodeSweep> SweepsV;       // node ids and conductances for each run of local clustering
00184   THash<TInt, TFltV> SizePhiH;    // all conductances at cluster size K
00185   THash<TInt, TCutInfo> BestCutH; // best cut (min conductance) at size K (edges inside, edges cut)
00186   TFltPrV BagOfWhiskerV;          // (size, conductance) for bag of whiskers
00187   TFltPr BestWhisk;               // best whisker (whisker with largest volume), (size, conductance)
00188   TCutInfo BestCut;               // best over-all cut
00189   TIntSet SizeBucketSet;          // for exponential bucketing (only indexes BestCutH at positions in BucketSet have CutNIdV filled)
00190 public:
00191   TLocClustStat(const double& _Alpha, const int& _KMin, const int& _KMax, const double& _KFac, const int& _Coverage, const double& _SizeFrac);
00192   void Save(TSOut& SOut) const;
00193   void Clr();
00194   void SetGraph(const PUNGraph& GraphPt) { Graph=GraphPt; }
00195   void Run(const PUNGraph& Graph, const bool& SaveAllSweeps=false, const bool& SaveAllCond=false, const bool& SaveBestNodesAtK=false);
00196   void AddBagOfWhiskers();
00197   void AddCut(const TIntV& NIdV);
00198   void AddCut(const int& ClustSz, const double& Phi);
00199   void AddToBestCutH(const PUNGraph& Graph, const TIntV& NIdV, const bool& AddBestCond=true);
00200 
00201   double FindBestCut(const int& Nodes, const TIntSet& TabuNIdSet, int& BestCutId) const;
00202   const TCutInfo& FindBestCut(const int& Nodes=-1) const;
00203   double FindBestCut(const int& Nodes, TIntV& ClustNIdV) const;
00204   int FindBestCut(const int& Nodes, int& Vol, int& Cut, double& Phi) const;
00205 
00206   const TCutInfo& GetBestCut() const { return BestCut; } // overall best cut
00207   int GetCuts() const { return BestCutH.Len(); }
00208   const TCutInfo& GetKNodeCut(const int& Nodes) const { return BestCutH.GetDat(Nodes); }
00209   const TCutInfo& GetCutN(const int& N) const { return BestCutH[N]; }
00210 
00211   int BestWhiskNodes() const { return int(BestWhisk.Val1.Val); }
00212   int BestWhiskEdges() const { return (int)TMath::Round(1.0/BestWhisk.Val2.Val)/2-1; }
00213   double BestWhiskPhi() const { return BestWhisk.Val2; }
00214   TFltPr GetBestWhisk() const { return BestWhisk; }
00215   const TFltPrV& GetBagOfWhiskersV() const { return BagOfWhiskerV; }
00216   void GetCurveStat(TFltPrV& MeanV, TFltPrV& MedV, TFltPrV& Dec1V, TFltPrV& MinV, TFltPrV& MaxV) const;
00217   void GetBoltzmanCurveStat(const TFltV& TempV, TVec<TFltPrV>& NcpV) const;
00218 
00219   TStr ParamStr() const;
00220   void PlotNCP(const TStr& OutFNm, TStr Desc=TStr()) const; // lower-envelope of conductance
00221   void PlotNCPModul(const TStr& OutFNm, TStr Desc=TStr()) const; // NCP but with modularity on Y-axis
00222   void PlotNcpTop10(const TStr& OutFNm, TStr Desc, const int& TakeMinN) const;
00223   void PlotPhiInOut(const TStr& OutFNm, TStr Desc=TStr()) const; // conductance on the boundary / counductance inside
00224   void PlotBestClustDens(TStr OutFNm, TStr Desc=TStr()) const; // plot edges inside, cut size, conductance
00225   void PlotNCPScatter(const TStr& OutFNm, TStr Desc=TStr()) const; // all different conducances of all sizes (not just min)
00226   void PlotPhiDistr(const int& CmtySz, const TStr& OutFNm, TStr Desc=TStr()) const; // histogram of conductances for a fixed CmtySz
00227   void PlotBoltzmanCurve(const TStr& OutFNm, TStr Desc=TStr()) const;
00228 
00229   void ImposeNCP(const TLocClustStat& LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const;
00230   void ImposeNCP(const TLocClustStat& LcStat2, const TLocClustStat& LcStat3, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2, TStr Desc3) const;
00231   void SaveTxtInfo(const TStr& OutFNmPref, const TStr& Desc, const bool& SetMaxAt1) const;
00232 
00233   static void BagOfWhiskers(const PUNGraph& Graph, TFltPrV& SizePhiV, TFltPr& BestWhisk);
00234   static void BagOfWhiskers2(const PUNGraph& Graph, TFltPrV& SizePhiV);
00235   static void MakeExpBins(const TFltPrV& ValV, TFltPrV& NewV);
00236   static void MakeExpBins(const TFltV& ValV, TFltV& NewV);
00237 };
00238 
00239 //#///////////////////////////////////////////////
00241 class TNcpGraphsBase {
00242 private:
00243   TStrV GNmV;
00244   TFltV ParamValV ;
00245   TIntPrV GSizeV;
00246   TFltPrV WhiskerV, RewWhiskerV; // (size, conductance)
00247   TVec<TFltPrV> NcpV;
00248   TVec<TFltPrV> RewNcpV;
00249   TVec<TFltPrV> WhiskNcpV;
00250 public:
00251   TNcpGraphsBase(const TStr& FNmWc);
00252   TNcpGraphsBase(TSIn& SIn);
00253   void Save(TSOut& SOut) const;
00254   void Impose(const TStr& OutFNm, const int& TopN, const bool& Smooth);
00255   double GetXAtMinY(const TFltPrV& Ncp, const int& NNodes);
00256   TFltPr GetXYAtMinY(const TFltPrV& Ncp, const int& NNodes);
00257   void PlotNcpMin(const TStr& OutFNm, const bool& VsGraphN=false);
00258   void SaveTxtNcpMin(const TStr& OutFNm, const bool& VsGraphN=false);
00259   void PlotRewNcpMin(const TStr& OutFNm, const bool& VsGraphN=false);
00260   void PlotBestWhisker(const TStr& OutFNm, const bool& VsGraphN=false);
00261   void PlotRewBestWhisker(const TStr& OutFNm, const bool& VsGraphN=false);
00262   void PlotAvgNcp(const TStr& OutFNm, const TVec<TFltPrV>& NcpVec, const int& MinSz, const double& MaxMinY);
00263   void SaveTxt(const TStr& OutFNm);
00264   static void PlotDataset(const TStr& InFNmWc, const TStr& OutFNm, const bool& ImposeNcp=false, const bool& VsGraphN=false);
00265 };
00266 
00267 #endif