SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
statplot.h
Go to the documentation of this file.
1 namespace TSnap {
2 
4 // Plot graph properties
5 
9 template <class PGraph> void PlotInDegDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr(), const bool& PlotCCdf=false, const bool& PowerFit=false);
13 template <class PGraph> void PlotOutDegDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr(), const bool& PlotCCdf=false, const bool& PowerFit=false);
15 template <class PGraph> void PlotWccDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
17 template <class PGraph> void PlotSccDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
19 template <class PGraph> void PlotClustCf(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
22 template <class PGraph> void PlotHops(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr(), const bool& IsDir=false, const int& NApprox=32);
24 template <class PGraph> void PlotShortPathDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr(), int TestNodes=TInt::Mx);
26 template <class PGraph> void PlotKCoreNodes(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
28 template <class PGraph> void PlotKCoreEdges(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
29 
31 void PlotEigValRank(const PUNGraph& Graph, const int& EigVals, const TStr& FNmPref, TStr DescStr=TStr());
33 void PlotEigValDistr(const PUNGraph& Graph, const int& EigVals, const TStr& FNmPref, TStr DescStr=TStr());
36 void PlotInvParticipRat(const PUNGraph& Graph, const int& MaxEigVecs, const int& TimeLimit, const TStr& FNmPref, TStr DescStr=TStr());
38 void PlotSngValRank(const PNGraph& Graph, const int& SngVals, const TStr& FNmPref, TStr DescStr=TStr());
40 void PlotSngValDistr(const PNGraph& Graph, const int& SngVals, const TStr& FNmPref, TStr DescStr=TStr());
42 void PlotSngVec(const PNGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
43 
45 // Implementation
46 template <class PGraph>
47 void PlotInDegDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr, const bool& PlotCCdf, const bool& PowerFit) {
48  TIntPrV DegCntV;
49  TSnap::GetInDegCnt(Graph, DegCntV);
50  const double AvgDeg = 2*Graph->GetEdges()/double(Graph->GetNodes());
51  int AboveAvg=0, Above2Avg=0;
52  for (int i = 0; i < DegCntV.Len(); i++) {
53  if (DegCntV[i].Val1 > AvgDeg) { AboveAvg += DegCntV[i].Val2; }
54  if (DegCntV[i].Val1 > 2*AvgDeg) { Above2Avg += DegCntV[i].Val2; }
55  }
56  if (PlotCCdf) {
57  DegCntV = TGUtil::GetCCdf(DegCntV); }
58  if (DescStr.Empty()) { DescStr = FNmPref; }
59  TGnuPlot::PlotValV(DegCntV, (PlotCCdf?"inDegC.":"inDeg.")+FNmPref,
60  TStr::Fmt("%s. G(%d, %d). %d (%.4f) nodes with in-deg > avg deg (%.1f), %d (%.4f) with >2*avg.deg", DescStr.CStr(),
61  Graph->GetNodes(), Graph->GetEdges(), AboveAvg, AboveAvg/double(Graph->GetNodes()), AvgDeg, Above2Avg, Above2Avg/double(Graph->GetNodes())),
62  "In-degree", PlotCCdf?"Count (CCDF)":"Count", gpsLog10XY, PowerFit, gpwLinesPoints);
63 }
64 
65 template <class PGraph>
66 void PlotOutDegDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr, const bool& PlotCCdf, const bool& PowerFit) {
67  TIntPrV DegCntV;
68  TSnap::GetOutDegCnt(Graph, DegCntV);
69  const double AvgDeg = 2*Graph->GetEdges()/double(Graph->GetNodes());
70  int AboveAvg=0, Above2Avg=0;
71  for (int i = 0; i < DegCntV.Len(); i++) {
72  if (DegCntV[i].Val1 > AvgDeg) { AboveAvg += DegCntV[i].Val2; }
73  if (DegCntV[i].Val1 > 2*AvgDeg) { Above2Avg += DegCntV[i].Val2; }
74  }
75  if (PlotCCdf) {
76  DegCntV = TGUtil::GetCCdf(DegCntV); }
77  if (DescStr.Empty()) { DescStr = FNmPref; }
78  TGnuPlot::PlotValV(DegCntV, (PlotCCdf?"outDegC.":"outDeg.")+FNmPref,
79  TStr::Fmt("%s. G(%d, %d). %d (%.4f) nodes with out-deg > avg deg (%.1f), %d (%.4f) with >2*avg.deg", DescStr.CStr(),
80  Graph->GetNodes(), Graph->GetEdges(), AboveAvg, AboveAvg/double(Graph->GetNodes()), AvgDeg, Above2Avg, Above2Avg/double(Graph->GetNodes())),
81  "Out-degree", PlotCCdf?"Count (CCDF)":"Count", gpsLog10XY, PowerFit, gpwLinesPoints);
82 }
83 
84 template <class PGraph>
85 void PlotWccDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr) {
86  TIntPrV WccSzCnt;
87  TSnap::GetWccSzCnt(Graph, WccSzCnt);
88  if (DescStr.Empty()) { DescStr = FNmPref; }
89  TGnuPlot GnuPlot("wcc."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest component has %f nodes",
90  DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(), WccSzCnt.Last().Val1/double(Graph->GetNodes())));
91  GnuPlot.AddPlot(WccSzCnt, gpwLinesPoints, "", "pt 6");
92  GnuPlot.SetXYLabel("Size of weakly connected component", "Number of components");
93  GnuPlot.SetScale(gpsLog10XY);
94  GnuPlot.SavePng();
95 }
96 
97 template <class PGraph>
98 void PlotSccDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr) {
99  TIntPrV SccSzCnt;
100  TSnap::GetSccSzCnt(Graph, SccSzCnt);
101  if (DescStr.Empty()) { DescStr = FNmPref; }
102  TGnuPlot GnuPlot("scc."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest component has %f nodes",
103  DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(), SccSzCnt.Last().Val1/double(Graph->GetNodes())));
104  GnuPlot.AddPlot(SccSzCnt, gpwLinesPoints, "", "pt 6");
105  GnuPlot.SetXYLabel("Size of strongly connected component", "Number of components");
106  GnuPlot.SetScale(gpsLog10XY);
107  GnuPlot.SavePng();
108 }
109 
110 template <class PGraph>
111 void PlotClustCf(const PGraph& Graph, const TStr& FNmPref, TStr DescStr) {
112  TFltPrV DegToCCfV;
113  int64 ClosedTriads, OpenTriads;
114  const double CCF = GetClustCf(Graph, DegToCCfV, ClosedTriads, OpenTriads);
115  if (DescStr.Empty()) { DescStr = FNmPref; }
116  TGnuPlot GnuPlot("ccf."+FNmPref,
117  TStr::Fmt("%s. G(%d, %d). Average clustering: %.4f OpenTriads: %d (%.4f) ClosedTriads: %d (%.4f)", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(),
118  CCF, OpenTriads, OpenTriads/double(OpenTriads+ClosedTriads), ClosedTriads, ClosedTriads/double(OpenTriads+ClosedTriads)));
119  GnuPlot.AddPlot(DegToCCfV, gpwLinesPoints, "", "pt 6");
120  GnuPlot.SetXYLabel("Node degree", "Average clustering coefficient");
121  GnuPlot.SetScale(gpsLog10XY);
122  GnuPlot.SavePng();
123 }
124 
125 template <class PGraph>
126 void PlotHops(const PGraph& Graph, const TStr& FNmPref, TStr DescStr, const bool& IsDir, const int& NApprox) {
127  TIntFltKdV DistNbrsV;
128  TSnap::GetAnf(Graph, DistNbrsV, -1, IsDir, NApprox);
129  const double EffDiam = TSnap::TSnapDetail::CalcEffDiam(DistNbrsV, 0.9);
130  if (DescStr.Empty()) { DescStr = FNmPref; }
131  TGnuPlot GnuPlot("hop."+FNmPref, TStr::Fmt("%s. Hop plot. EffDiam: %g, G(%d, %d)",
132  DescStr.CStr(), EffDiam, Graph->GetNodes(), Graph->GetEdges()));
133  GnuPlot.SetXYLabel("Number of hops", "Number of pairs of nodes");
134  GnuPlot.SetScale(gpsLog10Y);
135  GnuPlot.AddPlot(DistNbrsV, gpwLinesPoints, "", "pt 6");
136  GnuPlot.SavePng();
137 }
138 
139 template <class PGraph>
140 void PlotShortPathDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr, int TestNodes) {
141  TIntH DistToCntH;
142  TBreathFS<PGraph> BFS(Graph);
143  // shotest paths
144  TIntV NodeIdV;
145  Graph->GetNIdV(NodeIdV); NodeIdV.Shuffle(TInt::Rnd);
146  for (int tries = 0; tries < TMath::Mn(TestNodes, Graph->GetNodes()); tries++) {
147  const int NId = NodeIdV[tries];
148  BFS.DoBfs(NId, true, false, -1, TInt::Mx);
149  for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
150  DistToCntH.AddDat(BFS.NIdDistH[i]) += 1; }
151  }
152  DistToCntH.SortByKey(true);
153  TFltPrV DistNbrsPdfV;
154  for (int i = 0; i < DistToCntH.Len(); i++) {
155  DistNbrsPdfV.Add(TFltPr(DistToCntH.GetKey(i)(), DistToCntH[i]()));
156  }
157  const double EffDiam = TSnap::TSnapDetail::CalcEffDiamPdf(DistNbrsPdfV, 0.9);
158  const double AvgDiam = TSnap::TSnapDetail::CalcAvgDiamPdf(DistNbrsPdfV);
159  const int FullDiam = (int) DistNbrsPdfV.Last().Val1;
160  if (DescStr.Empty()) { DescStr = FNmPref; }
161  TGnuPlot::PlotValV(DistNbrsPdfV, "diam."+FNmPref,
162  TStr::Fmt("%s. G(%d, %d). Diam: avg:%.2f eff:%.2f max:%d", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(),
163  AvgDiam, EffDiam, FullDiam), "Number of hops", "Number of shortest paths", gpsLog10Y, false, gpwLinesPoints);
164 }
165 
166 template <class PGraph>
167 void PlotKCoreNodes(const PGraph& Graph, const TStr& FNmPref, TStr DescStr) {
168  TIntPrV CoreNodesV;
169  TSnap::GetKCoreNodes(Graph, CoreNodesV);
170  if (DescStr.Empty()) { DescStr = FNmPref; }
171  TGnuPlot::PlotValV(CoreNodesV, "coreNodes."+FNmPref, TStr::Fmt("%s. G(%d, %d).", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges()), "k-Core", "Number of nodes in the k-Core", gpsLog10Y, false, gpwLinesPoints);
172 }
173 
174 template <class PGraph>
175 void PlotKCoreEdges(const PGraph& Graph, const TStr& FNmPref, TStr DescStr) {
176  TIntPrV CoreEdgesV;
177  TSnap::GetKCoreEdges(Graph, CoreEdgesV);
178  if (DescStr.Empty()) { DescStr = FNmPref; }
179  TGnuPlot::PlotValV(CoreEdgesV, "coreEdges."+FNmPref, TStr::Fmt("%s. G(%d, %d).", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges()), "k-Core", "Number of edges in the k-Core", gpsLog10Y, false, gpwLinesPoints);
180 }
181 
182 //TIntPrV CoreNV, CoreEV;
183 //TSnap::GetKCoreNodes(UG, CoreNV);
184 //TSnap::GetKCoreEdges(UG, CoreEV);
185 //TGnuPlot::PlotValV(CoreNV, "kcoreN.fullUndir", "kCore nodes size", "k-Core", "Number of nodes", gpsLog10Y);
186 //TGnuPlot::PlotValV(CoreEV, "kcoreE.fullUndir", "kCore edges size", "k-Core", "Number of edges", gpsLog10Y);
187 
188 
189 } // namespace TSnap
void PlotSccDistr(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr())
Plots the distribution of sizes of strongly connected components of a Graph.
Definition: statplot.h:98
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
int GetKCoreNodes(const PGraph &Graph, TIntPrV &CoreIdSzV)
Returns the number of nodes in each core of order K (where K=0, 1, ...)
Definition: kcore.h:114
void PlotKCoreNodes(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr())
Plots the k-Core node-size distribution: Core k vs. number of nodes in k-core.
Definition: statplot.h:167
void GetOutDegCnt(const PGraph &Graph, TIntPrV &DegToCntV)
Returns an out-degree histogram: a set of pairs (out-degree, number of nodes of such out-degree) ...
Definition: alg.h:201
void SavePng(const int &SizeX=1000, const int &SizeY=800, const TStr &Comment=TStr())
Definition: gnuplot.h:120
static const int Mx
Definition: dt.h:1049
void PlotOutDegDistr(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr(), const bool &PlotCCdf=false, const bool &PowerFit=false)
Definition: statplot.h:66
int DoBfs(const int &StartNode, const bool &FollowOut, const bool &FollowIn, const int &TargetNId=-1, const int &MxDist=TInt::Mx)
Performs BFS from node id StartNode for at maps MxDist steps by only following in-links (parameter Fo...
Definition: bfsdfs.h:119
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
void GetAnf(const PGraph &Graph, const int &SrcNId, TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir, const int &NApprox=32)
Definition: anf.h:205
void SetXYLabel(const TStr &XLabel, const TStr &YLabel)
Definition: gnuplot.h:73
void PlotSngValRank(const PNGraph &Graph, const int &SngVals, const TStr &FNmPref, TStr DescStr)
Plots the rank distribution of singular values of the Graph adjacency matrix. Plots first SngVals val...
Definition: statplot.cpp:49
void PlotEigValRank(const PUNGraph &Graph, const int &EigVals, const TStr &FNmPref, TStr DescStr)
Plots the eigen-value rank distribution of the Graph adjacency matrix. Plots first EigVals eigenvalue...
Definition: statplot.cpp:5
void PlotEigValDistr(const PUNGraph &Graph, const int &EigVals, const TStr &FNmPref, TStr DescStr)
Plots the distribution of components of the leading eigen-vector of the Graph adjacency matrix...
Definition: statplot.cpp:14
void PlotInvParticipRat(const PUNGraph &Graph, const int &MaxEigVecs, const int &TimeLimit, const TStr &FNmPref, TStr DescStr)
Definition: statplot.cpp:39
static TRnd Rnd
Definition: dt.h:1053
void GetSccSzCnt(const PGraph &Graph, TIntPrV &SccSzCnt)
Returns a distribution of strongly connected component sizes.
Definition: cncom.h:420
void GetInDegCnt(const PGraph &Graph, TIntPrV &DegToCntV)
Returns an in-degree histogram: a set of pairs (in-degree, number of nodes of such in-degree) ...
Definition: alg.h:179
void PlotWccDistr(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr())
Plots the distribution of sizes of weakly connected components of a Graph.
Definition: statplot.h:85
double CalcEffDiamPdf(const TIntFltKdV &DistNbrsPdfV, const double &Percentile)
Helper function for computing a given Percentile of a (unnormalized) probability distribution functio...
Definition: anf.cpp:29
int GetKCoreEdges(const PGraph &Graph, TIntPrV &CoreIdSzV)
Returns the number of edges in each core of order K (where K=0, 1, ...)
Definition: kcore.h:126
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
void PlotSngValDistr(const PNGraph &Graph, const int &SngVals, const TStr &FNmPref, TStr DescStr)
Plots the rank distribution of singular values of the Graph adjacency matrix. Plots first SngVals val...
Definition: statplot.cpp:58
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
void PlotKCoreEdges(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr())
Plots the k-Core edge-size distribution: Core k vs. number of edges in k-core.
Definition: statplot.h:175
void PlotHops(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr(), const bool &IsDir=false, const int &NApprox=32)
Definition: statplot.h:126
void PlotSngVec(const PNGraph &Graph, const TStr &FNmPref, TStr DescStr)
Plots the distribution of the values of the leading left singular vector of the Graph adjacency matri...
Definition: statplot.cpp:81
void SortByKey(const bool &Asc=true)
Definition: hash.h:249
static void GetCCdf(const TIntPrV &PdfV, TIntPrV &CCdfV)
Definition: util.cpp:33
double CalcAvgDiamPdf(const TIntFltKdV &DistNbrsPdfV)
Helper function for computing the mean of a (unnormalized) probability distribution function...
Definition: anf.cpp:41
double CalcEffDiam(const TIntFltKdV &DistNbrsCdfV, const double &Percentile)
Helper function for computing a given Percentile of a (unnormalized) cumulative distribution function...
Definition: anf.cpp:7
long long int64
Definition: bd.h:27
void SetScale(const TGpScaleTy &GpScaleTy)
Definition: gnuplot.h:78
Definition: dt.h:412
bool Empty() const
Definition: dt.h:488
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
void PlotShortPathDistr(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr(), int TestNodes=TInt::Mx)
Plots the distribution of the shortest path lengths of a Graph. Implementation is based on BFS...
Definition: statplot.h:140
Definition: hash.h:88
void Shuffle(TRnd &Rnd)
Randomly shuffles the elements of the vector.
Definition: ds.h:1271
void PlotClustCf(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr())
Plots the distribution of clustering coefficient of a Graph.
Definition: statplot.h:111
double GetClustCf(const PGraph &Graph, int SampleNodes=-1)
Computes the average clustering coefficient as defined in Watts and Strogatz, Collective dynamics of ...
Definition: triad.h:117
int AddPlot(const TIntV &YValV, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const TStr &Label=TStr(), const TStr &Style=TStr())
Definition: gnuplot.cpp:186
char * CStr()
Definition: dt.h:476
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
TIntH NIdDistH
Definition: bfsdfs.h:79
int Len() const
Definition: hash.h:186
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
void GetWccSzCnt(const PGraph &Graph, TIntPrV &WccSzCnt)
Returns a distribution of weakly connected component sizes.
Definition: cncom.h:337
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
void PlotInDegDistr(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr(), const bool &PlotCCdf=false, const bool &PowerFit=false)
Definition: statplot.h:47
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429