SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
ncp.cpp
Go to the documentation of this file.
1  #include "stdafx.h"
2 #include "ncp.h"
3 
5 // Local Spectral Clustering
6 
7 bool TLocClust::Verbose = true;
8 
9 int TLocClust::ApproxPageRank(const int& SeedNode, const double& Eps) {
10  for (int i = 0; i < ProbH.Len(); i++) { ProbH[i]=0.0; }
11  for (int i = 0; i < ResH.Len(); i++) { ResH[i]=0.0; }
12  ProbH.Clr(false, -1, false);
13  ResH.Clr(false, -1, false);
14  ResH.AddDat(SeedNode, 1.0);
15  int iter = 0;
16  double OldRes = 0.0;
17  NodeQ.Clr(false);
18  NodeQ.Push(SeedNode);
19  TExeTm ExeTm;
20  while (! NodeQ.Empty()) {
21  const int NId = NodeQ.Top(); NodeQ.Pop();
22  const TUNGraph::TNodeI& Node = Graph->GetNI(NId);
23  const int NIdDeg = Node.GetOutDeg();
24  const double PushVal = ResH.GetDat(NId) - 0.5*Eps*NIdDeg;
25  const double PutVal = (1.0-Alpha) * PushVal / double(NIdDeg);
26  ProbH.AddDat(NId) += Alpha*PushVal;
27  ResH.AddDat(NId) = 0.5 * Eps * NIdDeg;
28  for (int e = 0; e < NIdDeg; e++) {
29  const int DstNId = Node.GetOutNId(e);
30  const int DstDeg = Graph->GetNI(DstNId).GetOutDeg();
31  double& ResVal = ResH.AddDat(DstNId).Val;
32  OldRes = ResVal;
33  ResVal += PutVal;
34  if (ResVal >= Eps*DstDeg && OldRes < Eps*DstDeg) {
35  NodeQ.Push(DstNId); }
36  }
37  iter++;
38  if (iter % Mega(1) == 0) {
39  printf(" %d[%s]", NodeQ.Len(), ExeTm.GetStr());
40  if (iter/1000 > Graph->GetNodes() || ExeTm.GetSecs() > 4*3600) { // more than 2 hours
41  printf("Too many iterations! Stop to save time.\n");
42  return iter; }
43  }
44  }
45  // check that the residuals are sufficiently small
46  /*for (int i =0; i < ProbH.Len(); i++) {
47  const int Deg = Graph->GetNI(ResH.GetKey(i)).GetOutDeg();
48  IAssert(ResH[i] < Eps*Deg); } //*/
49  return iter;
50 }
51 
53  TExeTm ExeTm;
54  VolV.Clr(false); CutV.Clr(false); PhiV.Clr(false);
55  if (ProbH.Empty()) { return; }
56  const int TopNId = ProbH.GetKey(0);
57  const int TopNIdDeg = Graph->GetNI(TopNId).GetOutDeg();
58  int Vol = TopNIdDeg, Cut = TopNIdDeg;
59  double Phi = Cut/double(Vol);
60  VolV.Add(Vol); CutV.Add(Cut); PhiV.Add(1.0);
61  for (int i = 1; i < ProbH.Len(); i++) {
62  const int NId = ProbH.GetKey(i);
63  const TUNGraph::TNodeI& Node = Graph->GetNI(NId);
64  const int OutDeg = Node.GetOutDeg();
65  int CutSz = OutDeg; // edges outside
66  for (int e = 0; e < OutDeg; e++) {
67  const int Rank = ProbH.GetKeyId(Node.GetOutNId(e));
68  if ( Rank > -1 && Rank < i) { CutSz -= 2; }
69  }
70  Vol += OutDeg; Cut += CutSz;
71  if (Vol < Edges2) {
72  if (2*Vol > Edges2) { Phi = Cut / double(Edges2-Vol); }
73  else { Phi = Cut / double(Vol); }
74  } else {
75  Phi = 1.0;
76  }
77  IAssert((Phi+1e-6) >= double(1.0)/double(i*(i+1)+1)); // conductance is worse than the best possible
78  VolV.Add(Vol); CutV.Add(Cut); PhiV.Add(Phi);
79  }}
80 
81 void TLocClust::FindBestCut(const int& SeedNode, const int& ClustSz, const double& MinSizeFrac) {
82  double MaxPhi = TFlt::Mx;
83  BestCutIdx = -1;
84  SeedNId = SeedNode;
85  // calculate pagerank and cut sets
86  ApproxPageRank(SeedNId, 1.0/double(ClustSz));
87  for (int i = 0; i < ProbH.Len(); i++) {
88  ProbH[i] /= Graph->GetNI(ProbH.GetKey(i)).GetOutDeg(); }
89  ProbH.SortByDat(false);
90  SupportSweep();
91  // find best cut
92  NIdV.Clr(false);
93  for (int i = 0; i < PhiV.Len(); i++) {
94  const double Phi = PhiV[i];
95  NIdV.Add(ProbH.GetKey(i));
96  if (Phi < MaxPhi) { MaxPhi = Phi; BestCutIdx = i; }
97  }
98 }
99 
100 void TLocClust::PlotVolDistr(const TStr& OutFNm, TStr Desc) const {
101  TFltPrV RankValV(VolV.Len(), 0);
102  for (int i = 0; i < VolV.Len(); i++) {
103  RankValV.Add(TFltPr(i+1, VolV[i].Val)); }
104  if (Desc.Empty()) { Desc = OutFNm; }
105  TFltPrV NewV; TLocClustStat::MakeExpBins(RankValV, NewV);
106  TGnuPlot GP(OutFNm+"-VOL", TStr::Fmt("VOLUME. Seed node %d.", SeedNId, Desc.CStr()));
107  GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
108  GP.SetXYLabel("Rank", "Volume");
109  //GP.SetScale(gpsLog10X);
110  GP.SavePng();
111 }
112 
113 void TLocClust::PlotCutDistr(const TStr& OutFNm, TStr Desc) const {
114  TFltPrV RankValV(CutV.Len(), 0);
115  for (int i = 0; i < CutV.Len(); i++) {
116  RankValV.Add(TFltPr(i+1, CutV[i].Val)); }
117  if (Desc.Empty()) { Desc = OutFNm; }
118  TFltPrV NewV; TLocClustStat::MakeExpBins(RankValV, NewV);
119  TGnuPlot GP(OutFNm+"-CUT", TStr::Fmt("CUT SIZE. Seed node %d.", SeedNId, Desc.CStr()));
120  GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
121  GP.SetXYLabel("Rank", "Cut size");
122  //GP.SetScale(gpsLog10X);
123  GP.SavePng();
124 }
125 
126 void TLocClust::PlotPhiDistr(const TStr& OutFNm, TStr Desc) const {
127  TFltPrV RankValV(PhiV.Len(), 0);
128  for (int i = 0; i < PhiV.Len(); i++) {
129  RankValV.Add(TFltPr(i+1, PhiV[i])); }
130  if (Desc.Empty()) { Desc = OutFNm; }
131  TFltPrV NewV; TLocClustStat::MakeExpBins(RankValV, NewV);
132  TGnuPlot GP(OutFNm+"-PHI", TStr::Fmt("CONDUCTANCE (Cut size / Volume). Seed node %d.", SeedNId, Desc.CStr()));
133  GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
134  GP.SetXYLabel("Rank", "Conductance (Cut size / Volume)");
135  //GP.SetScale(gpsLog10X);
136  GP.SavePng();
137 }
138 
139 void TLocClust::SavePajek(const TStr& OutFNm) const {
140  FILE *F = fopen(TStr::Fmt("%s.net", OutFNm.CStr()).CStr(), "wt");
141  TIntH NIdToIdH(Graph->GetNodes(), true);
142  TIntH ClustSet(BestCut()+1);
143  const int BucketSz = BestCutNodes()/ 4;
144  TStrV ClrV = TStrV::GetV("Black", "Gray80", "Gray60", "Gray40", "Gray20", "RedViolet");
145  for (int a = 0; a < BestCutNodes(); a++) {
146  ClustSet.AddDat(NIdV[a], (a+1)/BucketSz);
147  }
148  fprintf(F, "*Vertices %d\n", Graph->GetNodes());
149  int i = 0;
150  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
151  const int NId = NI.GetId();
152  if (NId == SeedNId) {
153  fprintf(F, "%d \"%d\" diamond x_fact 2 y_fact 2 ic Green fos 10\n", i+1, NId); }
154  else if (ClustSet.IsKey(NId)) {
155  //fprintf(F, "%d \"%d\" box x_fact 1 y_fact 1 ic Red fos 10\n", i+1, NId); }
156  fprintf(F, "%d \"%d\" box x_fact 2 y_fact 2 ic %s fos 10\n", i+1, NId, ClrV[ClustSet.GetDat(NId)].CStr()); }
157  else {
158  fprintf(F, "%d \"%d\" ellipse x_fact 2 y_fact 2 ic Blue fos 10\n", i+1, NId); }
159  NIdToIdH.AddDat(NId, i+1);
160  i++;
161  }
162  fprintf(F, "*Arcs %d\n", Graph->GetEdges()); // arcs are directed, edges are undirected
163  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
164  const int NId = NIdToIdH.GetDat(NI.GetId());
165  for (int e = 0; e < NI.GetOutDeg(); e++) {
166  fprintf(F, "%d %d %g c Tan\n", NId, NIdToIdH.GetDat(NI.GetOutNId(e)).Val, 1.0);
167  }
168  }
169  fclose(F);
170 }
171 
172 void TLocClust::DrawWhiskers(const PUNGraph& Graph, TStr FNmPref, const int& PlotN=10) {
173  TCnComV CnComV; TSnap::Get1CnCom(Graph, CnComV);
174  CnComV.Sort(false);
175  // plot size distribution
176  { TIntH SzCntH;
177  for (int i = 0; i < CnComV.Len(); i++) { SzCntH.AddDat(CnComV[i].Len()) += 1; }
178  TGnuPlot::PlotValCntH(SzCntH, "whiskSz."+FNmPref, TStr::Fmt("%s. G(%d, %d)", FNmPref.CStr(), Graph->GetNodes(),
179  Graph->GetEdges()), "Whisker size (Maximal component connected with a bridge edge)", "Count", gpsLog10XY, false); }
180  // draw them
181  int BrNodeId = -1;
182  for (int c = 0; c < TMath::Mn(CnComV.Len(), PlotN); c++) {
183  const PUNGraph BrClust = TSnap::GetSubGraph(Graph, CnComV[c].NIdV);
184  for (TUNGraph::TNodeI NI = BrClust->BegNI(); NI < BrClust->EndNI(); NI++) {
185  if (NI.GetOutDeg() != Graph->GetNI(NI.GetId()).GetOutDeg()) { BrNodeId=NI.GetId(); break; } }
186  TIntStrH ClrH; ClrH.AddDat(BrNodeId, "red");
187  TSnap::DrawGViz(BrClust, gvlNeato, TStr::Fmt("whisk-%s-%02d.gif", FNmPref.CStr(), c+1),
188  TStr::Fmt("Bridge node id: %d, n=%d, e=%d", BrNodeId, BrClust->GetNodes(), BrClust->GetEdges()), false, ClrH);
189  }
190 }
191 
192 void TLocClust::GetCutStat(const PUNGraph& Graph, const TIntV& NIdV, int& Vol, int& Cut, double& Phi, int GraphEdges) {
193  TIntSet NIdSet(NIdV.Len());
194  for (int i = 0; i < NIdV.Len(); i++) { NIdSet.AddKey(NIdV[i]); }
195  GetCutStat(Graph, NIdSet, Vol, Cut, Phi, GraphEdges);
196 }
197 
198 void TLocClust::GetCutStat(const PUNGraph& Graph, const TIntSet& NIdSet, int& Vol, int& Cut, double& Phi, int GraphEdges) {
199  const int Edges2 = GraphEdges>=0 ? 2*GraphEdges : Graph->GetEdges();
200  Vol=0; Cut=0; Phi=0.0;
201  for (int i = 0; i < NIdSet.Len(); i++) {
202  if (! Graph->IsNode(NIdSet[i])) { continue; }
203  TUNGraph::TNodeI NI = Graph->GetNI(NIdSet[i]);
204  for (int e = 0; e < NI.GetOutDeg(); e++) {
205  if (! NIdSet.IsKey(NI.GetOutNId(e))) { Cut += 1; }
206  }
207  Vol += NI.GetOutDeg();
208  }
209  // get conductance
210  if (Vol != Edges2) {
211  if (2*Vol > Edges2) { Phi = Cut / double(Edges2-Vol); }
212  else if (Vol == 0) { Phi = 0.0; }
213  else { Phi = Cut / double(Vol); }
214  } else {
215  if (Vol == Edges2) { Phi = 1.0; }
216  }
217 }
218 
219 void TLocClust::PlotNCP(const PUNGraph& Graph, const TStr& FNm, const TStr Desc, const bool& BagOfWhiskers, const bool& RewireNet, const int& KMin, const int& KMax, const int& Coverage, const bool& SaveTxtStat, const bool& PlotBoltzman) {
220  const double Alpha = 0.001, KFac = 1.5, SizeFrac = 0.001;
221  //const int KMin = 2, KMax = Mega(100), Coverage = 10;
222  TLocClustStat ClusStat1(Alpha, KMin, KMax, KFac, Coverage, SizeFrac);
223  ClusStat1.Run(Graph, false, PlotBoltzman, SaveTxtStat);
224  if (BagOfWhiskers) { ClusStat1.AddBagOfWhiskers(); }
225  TLocClustStat ClusStat2(Alpha, KMin, KMax, KFac, Coverage, SizeFrac);
226  ClusStat1.ImposeNCP(ClusStat2, FNm, Desc, "ORIGINAL", "REWIRED"); // plot before rewiring
227  if (SaveTxtStat) { // for every piece size store modularity
228  ClusStat1.SaveTxtInfo(FNm, Desc, false);
229  }
230  if (PlotBoltzman) {
231  ClusStat1.PlotBoltzmanCurve(FNm, Desc);
232  }
233  if (RewireNet) {
234  ClusStat2.Run(TSnap::GenRewire(Graph, 50, TInt::Rnd), false, false, false);
235  if (BagOfWhiskers) { ClusStat2.AddBagOfWhiskers(); }
236  ClusStat1.ImposeNCP(ClusStat2, FNm, Desc, "ORIGINAL", "REWIRED"); // if rewire, plot again
237  }
238 }
239 
241 // Local Clustering Statistics
242 
243 double TLocClustStat::BinFactor = 1.01;
244 
245 double TLocClustStat::TCutInfo::GetFracDegOut(const PUNGraph& Graph, double& MxFrac, double& AvgFrac, double& MedianFrac, double& Pct9Frac, double& Flake) const {
246  if (CutNIdV.Empty()) {
247  IAssert(Nodes<100 || ! CutNIdV.Empty());
248  MxFrac=1; AvgFrac=1; MedianFrac=1; Pct9Frac=1; Flake=1;
249  return 1;
250  }
251  TMom FracDegMom;
252  TIntSet InNIdSet(CutNIdV.Len());
253  int NHalfIn=0;
254  for (int i = 0; i < CutNIdV.Len(); i++) {
255  InNIdSet.AddKey(CutNIdV[i]); }
256  for (int n = 0; n < CutNIdV.Len(); n++) {
257  const TUNGraph::TNodeI NI = Graph->GetNI(CutNIdV[n]);
258  int EdgesOut = 0;
259  for (int i = 0; i < NI.GetDeg(); i++) {
260  if (! InNIdSet.IsKey(NI.GetNbrNId(i))) { EdgesOut++; }
261  }
262  const double FracOut = EdgesOut/double(NI.GetDeg());
263  if (FracOut <= 0.5) { NHalfIn++; }
264  FracDegMom.Add(FracOut);
265  }
266  FracDegMom.Def();
267  MxFrac = FracDegMom.GetMx();
268  AvgFrac = FracDegMom.GetMean();
269  MedianFrac = FracDegMom.GetMedian();
270  Pct9Frac = FracDegMom.GetDecile(9);
271  Flake = 1.0 - double(NHalfIn)/double(CutNIdV.Len());
272  return MxFrac;
273 }
274 
275 TLocClustStat::TLocClustStat(const double& _Alpha, const int& _KMin, const int& _KMax, const double& _KFac,
276  const int& _Coverage, const double& _SizeFrac) : Alpha(_Alpha), SizeFrac(_SizeFrac), KFac(_KFac),
277  KMin(_KMin), KMax(_KMax), Coverage(_Coverage) {
278 }
279 
280 void TLocClustStat::Save(TSOut& SOut) const {
281  // parameters
282  Alpha.Save(SOut);
283  SizeFrac.Save(SOut);
284  KFac.Save(SOut);
285  KMin.Save(SOut);
286  KMax.Save(SOut);
287  Coverage.Save(SOut);
288  // cluster stat
289  //SweepsV.Save(SOut);
290  //SizePhiH.Save(SOut);
291 }
292 
294  SweepsV.Clr(false); // node ids and conductances for each run of local clustering
295  SizePhiH.Clr(false); // conductances at cluster size K
296  BestCutH.Clr(false); // best cut (min conductance) at size K
297  BagOfWhiskerV.Clr(false); // (Size, Conductance) of bag of whiskers clusters
298 }
299 
300 void TLocClustStat::Run(const PUNGraph& _Graph, const bool& SaveAllSweeps, const bool& SaveAllCond, const bool& SaveBestNodesAtK) {
301  Graph = TSnap::GetMxWcc(_Graph);
302  const int Nodes = Graph->GetNodes();
303  const int Edges = Graph->GetEdges();
304  printf("\nLocal clustering: Graph(%d, %d)\n", Nodes, Edges);
305  printf(" Alpha: %g\n", Alpha());
306  printf(" K: %d -- %g -- %dm\n", KMin(), KFac(), int(KMax/Mega(1)));
307  printf(" Coverage: %d\n", Coverage());
308  printf(" SizeFrac: %g\n\n", SizeFrac());
309  TExeTm TotTm;
310  Clr();
311  TLocClust Clust(Graph, Alpha);
312  BestCut.CutNIdV.Clr(false);
313  BestCut.CutSz=-1; BestCut.Edges=-1;
314  double BestPhi = TFlt::Mx;
315  int prevK=-1;
316  bool NextDone=false;
317  if (SaveBestNodesAtK) { // fill buckets (only store nodes in clusters for sizes in SizeBucketSet)
318  SizeBucketSet.Clr();
319  double PrevBPos = 1, BPos = 1;
320  while (BPos <= Mega(100)) {
321  PrevBPos = (uint) floor(BPos);
322  BPos *= BinFactor;
323  if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
324  SizeBucketSet.AddKey(int(floor(BPos) - 1));
325  }
326  }
327  for (int K = KMin, cnt=1; K < KMax; K = int(KFac * double(K))+1, cnt++) {
328  if (K == prevK) { K++; } prevK = K;
329  const int Runs = 2 + int(Coverage * floor(double(Graph->GetEdges()) / double(K)));
330  printf("%d] K: %d, %d runs\n", cnt+1, K, Runs);
331  if (NextDone) { break; } // done
332  if (K+1 > 2*Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
333  //if (K+1 > Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
334  TExeTm ExeTm, IterTm;
335  double MeanSz=0.0, MeanVol=0.0, Count=0.0;
336  for (int run = 0; run < Runs; run++) {
337  const int SeedNId = Graph->GetRndNId(); IterTm.Tick();
338  Clust.FindBestCut(SeedNId, K, SizeFrac);
339  const int Sz = Clust.BestCutNodes();
340  const int Vol = Clust.GetCutVol();
341  const double Phi = TMath::Round(Clust.GetCutPhi(), 4);
342  if (Sz == 0 || Vol == 0 || Phi == 0) { continue; }
343  MeanSz+=Sz; MeanVol+=Vol; Count+= 1;
344  if (SaveAllSweeps) { // save the full cut set and conductances for all trials
345  SweepsV.Add(TNodeSweep(SeedNId, Clust.GetNIdV(), Clust.GetPhiV())); }
346  int SAtBestPhi=-1;
347  for (int s = 0; s < Clust.Len(); s++) {
348  const int size = s+1;
349  const int cut = Clust.GetCut(s);
350  const int edges = (Clust.GetVol(s)-cut)/2;
351  const double phi = Clust.GetPhi(s);
352  if (( Clust.GetPhi(s) != double(cut)/double(2*edges+cut))) { continue; } // more than half of the edges
353  IAssert((Clust.GetVol(s) - cut) % 2 == 0);
354  IAssert(phi == double(cut)/double(2*edges+cut));
355  IAssert(phi >= 1.0/double((1+s)*s+1));
357  // TCutInfo CutInfo(size, edges, cut); Clust.GetNIdV().GetSubValV(0, size-1, CutInfo.CutNIdV);
358  //double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
359  //CutInfo.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
360  //const double phi = MxFrac;
361  if (BestPhi >= phi) {
362  BestPhi = phi;
363  BestCut = TCutInfo(size, edges, cut);
364  SAtBestPhi = s;
365  }
367  //bool TAKE=false; if (! BestCutH.IsKey(size)) { TAKE=true; }
368  //else { BestCutH.GetDat(size).GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake); if (MxFrac >= phi) { TAKE = true; } }
369  // if (TAKE) {
370  if (! BestCutH.IsKey(size) || BestCutH.GetDat(size).GetPhi() >= phi) { //new best cut (size, edges inside and nodes)
371  BestCutH.AddDat(size, TCutInfo(size, edges, cut)); // for every size store best cut (NIds inside the cut)
372  if (SaveBestNodesAtK) { // store node ids in best community for each size k
373  if (! SizeBucketSet.Empty() && ! SizeBucketSet.IsKey(size)) { continue; } // only save best clusters at SizeBucketSet
374  Clust.GetNIdV().GetSubValV(0, size-1, BestCutH.GetDat(size).CutNIdV); }
375  }
376  if (SaveAllCond) { // for every size store all conductances
377  SizePhiH.AddDat(size).Add(phi); }
378  }
379  if (SAtBestPhi != -1) { // take nodes in best cluster
380  const int size = SAtBestPhi+1;
381  Clust.GetNIdV().GetSubValV(0, size-1, BestCut.CutNIdV);
382  }
383  if (TLocClust::Verbose) {
384  printf(".");
385  if (run % 50 == 0) {
386  printf("\r %d / %d \r", run, Runs); }
387  }
388  }
389  if (TLocClust::Verbose) {
390  printf("\r %d / %d: %s \n", Runs, Runs, ExeTm.GetStr());
391  }
392  MeanSz/=Count; MeanVol/=Count;
393  printf(" Graph(%d, %d) ", Nodes, Edges);
394  printf(" mean: sz: %.2f vol: %.2f [%s] %s\n", MeanSz, MeanVol, ExeTm.GetStr(), TExeTm::GetCurTm());
395  }
397  for (int k = 0; k < SizePhiH.Len(); k++) {
398  SizePhiH[k].Sort(); }
399  SweepsV.Sort();
400  BestCutH.SortByKey();
401  printf("DONE. Graph(%d, %d): Total run time: %s\n\n", Nodes, Edges, TotTm.GetStr());
402 }
403 
405  BagOfWhiskers(Graph, BagOfWhiskerV, BestWhisk); // slow but exact
406  //BagOfWhiskers2(Graph, BagOfWhiskerV);
407 }
408 
409 // add a cut on NIdV nodes
410 void TLocClustStat::AddCut(const TIntV& NIdV) {
411  int Vol, Cut; double Phi;
412  TLocClust::GetCutStat(Graph, NIdV, Vol, Cut, Phi, Graph->GetEdges());
413  SizePhiH.AddDat(NIdV.Len()).Add(Phi);
414 }
415 
416 // add a cut of particular conductance
417 void TLocClustStat::AddCut(const int& ClustSz, const double& Phi) {
418  SizePhiH.AddDat(ClustSz).Add(Phi);
419 }
420 
421 // add the cut on particular set of nodes (calculate conductance and modularity)
422 void TLocClustStat::AddToBestCutH(const PUNGraph& Graph, const TIntV& NIdV, const bool& AddBestCond) {
423  const int K = NIdV.Len();
424  TCutInfo CutInfo(Graph, NIdV, true);
425  printf("new: %d\t%d\t%d\t%f\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(),
426  CutInfo.GetCutSz(), CutInfo.GetPhi(), CutInfo.GetModular(Graph->GetEdges()));
427  if (! BestCutH.IsKey(K)) { BestCutH.AddDat(K, CutInfo); return; }
428  TCutInfo& CurCut = BestCutH.GetDat(K);
429  // keep the cluster with best conductance
430  if (AddBestCond && CurCut.GetPhi() > CutInfo.GetPhi()) { CurCut=CutInfo; }
431  // keep the cluster with best modularity
432  //const int E = Graph->GetEdges();
433  //if (! AddBestCond && CurCut.GetModular(E) < CutInfo.GetModular(E)) { CurCut=CutInfo; }
434 }
435 
436 const TLocClustStat::TCutInfo& TLocClustStat::FindBestCut(const int& Nodes) const {
437  double bestPhi = 1;
438  int CutId = -1;
439  if (Nodes > 0) {
440  IAssert(BestCutH.IsKey(Nodes));
441  return BestCutH.GetDat(Nodes);
442  } else {
443  for (int n = 0; n < BestCutH.Len(); n++) {
444  if (BestCutH[n].GetPhi() <= bestPhi) {
445  bestPhi = BestCutH[n].GetPhi(); CutId = n; }
446  }
447  IAssert(CutId != -1);
448  IAssert(! BestCutH[CutId].CutNIdV.Empty());
449  return BestCutH[CutId];
450  }
451 }
452 
453 // find the cut with the lowest Phi of particular size (if Nodes=-1 find absolute best cut)
454 double TLocClustStat::FindBestCut(const int& Nodes, TIntV& ClustNIdV) const {
455  const TCutInfo& Cut = FindBestCut(Nodes);
456  ClustNIdV = Cut.CutNIdV;
457  return Cut.GetPhi();
458 }
459 
460 int TLocClustStat::FindBestCut(const int& Nodes, int& Vol, int& Cut, double& Phi) const {
461  const TCutInfo& CutInfo = FindBestCut(Nodes);
462  Vol = CutInfo.GetVol();
463  Cut = CutInfo.GetCutSz();
464  Phi = CutInfo.GetPhi();
465  return CutInfo.GetNodes();
466 }
467 
468 // find best phi where the cut has N nodes, and the nodes in the cluster are not in the TabuSet
469 double TLocClustStat::FindBestCut(const int& Nodes, const TIntSet& TabuNIdSet, int& BestCutId) const {
470  double bestPhi = 1;
471  BestCutId = -1;
472  bool Tabu;
473  IAssert(! SweepsV.Empty());
474  for (int c = 0; c < SweepsV.Len(); c++) {
475  const TNodeSweep& Sweep = SweepsV[c];
476  if (Sweep.Len() < Nodes) { continue; }
477  if (Sweep.Phi(Nodes-1) > bestPhi) { continue; }
478  Tabu = false;
479  for (int i = 0; i < Nodes; i++) {
480  if (TabuNIdSet.IsKey(Sweep.NId(i))) { Tabu=true; break; }
481  }
482  if (Tabu) { continue; }
483  bestPhi = Sweep.Phi(Nodes-1);
484  BestCutId = c;
485  }
486  return bestPhi;
487 }
488 
489 // get statistics over all conductances at all cluster sizes
490 void TLocClustStat::GetCurveStat(TFltPrV& MeanV, TFltPrV& MedV, TFltPrV& Dec1V, TFltPrV& MinV, TFltPrV& MaxV) const {
491  TFltPrV BucketV;
492  MeanV.Clr(false); MedV.Clr(false); Dec1V.Clr(false); MinV.Clr(false); MaxV.Clr(false);
493  if (! SizePhiH.Empty()) { // stores conductances of all little clusters
494  const THash<TInt, TFltV>& KvH = SizePhiH;
495  for (int i = 0; i < KvH.Len(); i++) {
496  const double X = KvH.GetKey(i).Val; IAssert(X >= 1.0);
497  const TFltV& YVec = KvH[i];
498  TMom Mom;
499  for (int j = 0; j < YVec.Len(); j++) { Mom.Add(YVec[j]); }
500  Mom.Def();
501  /*IAssert(Mom.GetMean()>=0 && Mom.GetMean()<=1);
502  IAssert(Mom.GetMedian()>=0 && Mom.GetMedian()<=1);
503  IAssert(Mom.GetDecile(1)>=0 && Mom.GetDecile(1)<=1);
504  IAssert(Mom.GetMn()>=0 && Mom.GetMn()<=1);
505  IAssert(Mom.GetMx()>=0 && Mom.GetMx()<=1);*/
506  MeanV.Add(TFltPr(X, Mom.GetMean()));
507  MedV.Add(TFltPr(X, Mom.GetMedian()));
508  Dec1V.Add(TFltPr(X, Mom.GetDecile(1)));
509  MinV.Add(TFltPr(X, Mom.GetMn()));
510  MaxV.Add(TFltPr(X, Mom.GetMx()));
511  }
512  MeanV.Sort(); MedV.Sort(); Dec1V.Sort(); MinV.Sort(); MaxV.Sort();
513  // create log buckets (makes plots smaller and smoother)
514  TLocClustStat::MakeExpBins(MeanV, BucketV); MeanV.Swap(BucketV);
515  TLocClustStat::MakeExpBins(MedV, BucketV); MedV.Swap(BucketV);
516  TLocClustStat::MakeExpBins(Dec1V, BucketV); Dec1V.Swap(BucketV);
517  TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV);
518  TLocClustStat::MakeExpBins(MaxV, BucketV); MaxV.Swap(BucketV);
519  } else {
520  //const int GEdges = Graph->GetEdges();
521  for (int i = 0; i < BestCutH.Len(); i++) {
522  MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetPhi()));
523  }
524  MinV.Sort();
525  TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV);
526  }
527 }
528 
529 void TLocClustStat::GetBoltzmanCurveStat(const TFltV& TempV, TVec<TFltPrV>& NcpV) const {
530  IAssert(! SizePhiH.Empty()); // stores conductances of all little clusters
531  NcpV.Gen(TempV.Len());
532  TFltPrV BucketV;
533  for (int t = 0; t < TempV.Len(); t++) {
534  const double T = TempV[t];
535  for (int i = 0; i < SizePhiH.Len(); i++) {
536  const double X = SizePhiH.GetKey(i).Val; IAssert(X >= 1.0);
537  const TFltV& PhiV = SizePhiH[i];
538  double V = 0.0, SumW = 0.0;
539  for (int j = 0; j < PhiV.Len(); j++) {
540  V += PhiV[j] * exp(-PhiV[j]/T);
541  SumW += exp(-PhiV[j]/T);
542  }
543  //V /= PhiV.Len();
544  V /= SumW;
545  NcpV[t].Add(TFltPr(X, V));
546  }
547  TLocClustStat::MakeExpBins(NcpV[t], BucketV); NcpV[t].Swap(BucketV);
548  // normalize to start at (1,1)
549  //for (int i = 0; i < NcpV[t].Len(); i++) {
550  // NcpV[t][i].Val2 /= NcpV[t][0].Val2;
551  //}
552  }
553 }
554 
556  if (Graph.Empty()) {
557  return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac()); }
558  else {
559  return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g G(%d, %d)", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac(),
560  Graph->GetNodes(), Graph->GetEdges());
561  }
562 }
563 
564 void TLocClustStat::PlotNCP(const TStr& OutFNm, TStr Desc) const {
565  if (Desc.Empty()) { Desc = OutFNm; }
566  TFltPrV MeanV, MedV, Dec1V, MinV, MaxV;
567  GetCurveStat(MeanV, MedV, Dec1V, MinV, MaxV);
568  TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
569  if (! MaxV.Empty()) { GP.AddPlot((MaxV), gpwLines, "MAX"); }
570  if (! MedV.Empty()) { GP.AddPlot((MedV), gpwLines, "MEDIAN"); }
571  if (! MeanV.Empty()) { GP.AddPlot((MeanV), gpwLines, "MEAN"); }
572  if (! Dec1V.Empty()) { GP.AddPlot((Dec1V), gpwLines, "1-st DECILE"); }
573  if (! MinV.Empty()) {
574  GP.AddPlot((MinV), gpwLines, "MIN");
575  //GP.AddPlot(MinV, gpwLines, "smooth MIN", "lw 2 smooth bezier");
576  }
577  if (! BagOfWhiskerV.Empty()) {
578  GP.AddPlot(BagOfWhiskerV, gpwLines, "Whiskers", "lw 1");
579  TFltPrV BestWhiskV; BestWhiskV.Add(TFltPr(BestWhisk));
580  GP.AddPlot(BestWhiskV, gpwPoints, "Best whisker", "pt 5 ps 2");
581  }
582  GP.SetScale(gpsLog10XY);
583  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
584  GP.SetXRange(1, Graph->GetNodes()/2);
585  GP.SavePng();
586 }
587 
588 // NCP but with modularity on Y-axis
589 void TLocClustStat::PlotNCPModul(const TStr& OutFNm, TStr Desc) const {
590  if (Desc.Empty()) { Desc = OutFNm; }
591  TFltPrV MinV, BucketV;
592  const int GEdges = Graph->GetEdges();
593  for (int i = 0; i < BestCutH.Len(); i++) {
594  MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetModular(GEdges))); }
595  MinV.Sort();
596  TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV);
597  TGnuPlot GP("ncpMod."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
598  if (! MinV.Empty()) {
599  GP.AddPlot((MinV), gpwLines, "MIN"); }
600  GP.SetScale(gpsLog10XY);
601  GP.SetXYLabel("k (number of nodes in the cluster)", "Q (modularity)");
602  GP.SetXRange(1, Graph->GetNodes()/2);
603  GP.SavePng();
604 }
605 
606 // plot top 10 surface/volume curves (disjont clusters of particular size)
607 void TLocClustStat::PlotNcpTop10(const TStr& OutFNm, TStr Desc, const int& TakeMinN) const {
608  if (Desc.Empty()) { Desc = OutFNm; }
609  const double BinFactor = 1.05;
610  double BinPos=1;
611  int PrevBPos=1, CurBPos=1, CutId;
612  bool AddSome;
613  TVec<TFltPrV> Curves(TakeMinN);
614  while (true) {
615  PrevBPos = CurBPos;
616  BinPos *= BinFactor; // do exponential binning
617  CurBPos = (int) floor(BinPos);
618  if (CurBPos == PrevBPos) { CurBPos=PrevBPos+1; BinPos=CurBPos; }
619  const int Nodes = CurBPos;
620  TIntSet TabuNIdSet(Graph->GetNodes());
621  AddSome = false;
622  for (int t = 0; t < TakeMinN; t++) {
623  const double Phi = FindBestCut(Nodes, TabuNIdSet, CutId);
624  if (CutId == -1) { break; }
625  Curves[t].Add(TFltPr(Nodes, Phi));
626  for (int n = 0; n < Nodes; n++) {
627  TabuNIdSet.AddKey(SweepsV[CutId].NId(n)); }
628  AddSome = true;
629  }
630  if (! AddSome) { break; }
631  }
632  TGnuPlot GP("ncpTop."+OutFNm, TStr::Fmt("%s. Top disjoint clusters. Take:%d, %s", Desc.CStr(), TakeMinN, ParamStr().CStr()));
633  for (int i = 0; i < Curves.Len(); i++) {
634  GP.AddPlot(Curves[i], gpwLines, TStr::Fmt("MIN %d", i+1), "lw 1"); }
635  //if (! BagOfWhiskerV.Empty()) { GP.AddPlot(BagOfWhiskerV, gpwLines, " Whiskers", "lw 2"); }
636  GP.SetScale(gpsLog10XY);
637  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
638  GP.SetXRange(1, Graph->GetNodes()/2);
639  GP.SavePng();
640 }
641 
642 // plot conductance on the boundary / counductance inside vs. cluster size
643 void TLocClustStat::PlotPhiInOut(const TStr& OutFNm, TStr Desc) const {
644  IAssert(! BestCutH.Empty() && ! Graph.Empty());
645  TFltPrV PhiInV, PhiBoundV, PhiRatV;
646  FILE *F = fopen(TStr::Fmt("phiInOut.%s-all.tab", OutFNm.CStr()).CStr(), "wt");
647  fprintf(F, "#Nodes\tEdges\tVol\tCut\tPhi\tInNodes\tInEdges\tInVol\tInCut\tInPhi\n");
648  TLocClustStat ClustStat2(Alpha, 1, KMax, KFac, Coverage, SizeFrac);
649  const double IdxFact = 1.05;
650  int curIdx=1, prevIdx=1;
651  while (curIdx <= BestCutH.Len()) {
652  const TCutInfo& CutInfo = BestCutH[curIdx-1];
653  if (CutInfo.GetNodes() > 1) {
654  PUNGraph ClustG = TSnap::GetSubGraph(Graph, CutInfo.CutNIdV);
655  ClustStat2.Run(ClustG);
656  const TCutInfo& InCut = ClustStat2.FindBestCut(-1);
657  PhiInV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()));
658  PhiBoundV.Add(TFltPr(CutInfo.GetNodes(), CutInfo.GetPhi()));
659  PhiRatV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()/CutInfo.GetPhi()));
660  fprintf(F, "%d\t%d\t%d\t%d\t%f\t%d\t%d\t%d\t%d\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), CutInfo.GetVol(),
661  CutInfo.GetCutSz(), CutInfo.GetPhi(), InCut.GetNodes(), InCut.GetEdges(), InCut.GetVol(), InCut.GetCutSz(), InCut.GetPhi());
662  fflush(F);
663  }
664  prevIdx = curIdx;
665  curIdx = (int) TMath::Round(double(curIdx)*IdxFact);
666  if (prevIdx == curIdx) { curIdx++; }
667  }
668  fclose(F);
669  { TGnuPlot GP("phiInOut."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
670  GP.AddPlot(PhiBoundV, gpwLines, "CUT conductance", "lw 1");
671  GP.AddPlot(PhiInV, gpwLines, "INSIDE conductance", "lw 1");
672  //GP.AddPlot(PhiRatV, gpwLines, "RATIO (inside/boundary)", "lw 1");
673  GP.SetXRange(1, Graph->GetNodes()/2); GP.SetScale(gpsLog10XY);
674  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
675  //GP.AddCmd("set logscale xyy2 10");
676  //GP.AddCmd("set y2label \"Conductance ratio (inside/boundary -- higher better)\"");
677  GP.SavePng(); }
678  //system(TStr(TStr("replace_all.py phiInOut.")+OutFNm+".plt \"title \\\"RATIO (inside/boundary)\" \"axis x1y2 title \\\"RATIO (inside/boundary)\"").CStr());
679  //GP.RunGnuPlot();
680  { TGnuPlot GP("phiInOutRat."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
681  GP.AddPlot(PhiRatV, gpwLines, "RATIO (Inside / Boundary)", "lw 1");
682  GP.SetXRange(1, Graph->GetNodes()/2); GP.SetScale(gpsLog10XY);
683  GP.SetXYLabel("Nodes", "Conductance ratio (inside/boundary) -- higher better");
684  GP.SavePng(); }
685 }
686 
687 // Plot edges inside, edges cut and conductance.
688 // run: replace_all.py cutEdges.*.plt "title \"Conductance" "axis x1y2 title \"Conductance"
689 void TLocClustStat::PlotBestClustDens(TStr OutFNm, TStr Desc) const {
690  if (Desc.Empty()) { Desc = OutFNm; }
691  const int len = BestCutH.Len();
692  TFltPrV CutV(len, 0), EdgesV(len, 0), PhiV(len,0);
693  for (int i = 0; i < BestCutH.Len(); i++) {
694  const double size = BestCutH.GetKey(i).Val;
695  CutV.Add(TFltPr(size, BestCutH[i].GetCutSz()));
696  EdgesV.Add(TFltPr(size, BestCutH[i].GetEdges()));
697  PhiV.Add(TFltPr(size, BestCutH[i].GetPhi()));
698  }
699  TGnuPlot GP("cutEdges."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
700  TFltPrV NewV; TLocClustStat::MakeExpBins(EdgesV, NewV);
701  GP.AddPlot(NewV, gpwLines, "Edges inside");
702  TLocClustStat::MakeExpBins(CutV, NewV);
703  GP.AddPlot(NewV, gpwLines, "Cut edges");
704  TLocClustStat::MakeExpBins(PhiV, NewV);
705  GP.AddPlot(NewV, gpwLines, "Conductance");
706  GP.SetXYLabel("Cluster size", "Edges"); GP.SetScale(gpsLog10XY);
707  GP.AddCmd("set logscale xyy2 10");
708  GP.AddCmd("set y2label \"Conductance\"");
709  GP.SavePng();
710  system(TStr(TStr("replace_all.py cutEdges.")+OutFNm+".plt \"title \\\"Conductance\" \"axis x1y2 title \\\"Conductance\"").CStr());
711  GP.RunGnuPlot();
712 }
713 
714 // all different conducances of all sizes (not just lower envelope)
715 void TLocClustStat::PlotNCPScatter(const TStr& OutFNm, TStr Desc) const {
716  if (Desc.Empty()) { Desc = OutFNm; }
717  THashSet<TFltPr> PhiSzH;
718  IAssertR(! SizePhiH.Empty(), "Set SaveAllCond=true in TLocClustStat::Run()");
719  for (int k = 0; k < SizePhiH.Len(); k++) {
720  const int K = SizePhiH.GetKey(k);
721  const TFltV& PhiV = SizePhiH[k];
722  for (int p = 0; p < PhiV.Len(); p++) {
723  PhiSzH.AddKey(TFltPr(K, TMath::Round(PhiV[p], 3))); }
724  }
725  TFltPrV PhiSzV; PhiSzH.GetKeyV(PhiSzV);
726  TGnuPlot GP("ncpScatter."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
727  GP.AddPlot(PhiSzV, gpwPoints, "", "pt 5 ps 0.2");
728  GP.SetScale(gpsLog10XY);
729  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
730  GP.SetXRange(1, Graph->GetNodes()/2);
731  GP.SavePng();
732 }
733 
734 // histogram of conductances for a fixed CmtySz
735 void TLocClustStat::PlotPhiDistr(const int& CmtySz, const TStr& OutFNm, TStr Desc) const {
736  IAssert(! SizePhiH.Empty());
737  const TFltV& PhiV = SizePhiH.GetDat(CmtySz);
738  THash<TFlt, TInt> PhiCntH;
739  for (int i = 0; i < PhiV.Len(); i++) {
740  const double Buck = TMath::Round(PhiV[i], 3);
741  PhiCntH.AddDat(Buck) += 1;
742  }
743  TGnuPlot::PlotValCntH(PhiCntH, TStr::Fmt("phiDistr%03d.%s", CmtySz, OutFNm.CStr()),
744  TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()), "{/Symbol \\F} (conductance)", "Count");
745 }
746 
747 void TLocClustStat::PlotBoltzmanCurve(const TStr& OutFNm, TStr Desc) const {
748  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
749  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
750  TVec<TFltPrV> NcpV;
751  //const TFltV TempV = TFltV::GetV(0.05, 0.01, 0.1, 10, 100, 1000);
752  const TFltV TempV = TFltV::GetV(0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.5, 1);
753  GetBoltzmanCurveStat(TempV, NcpV);
754  TGnuPlot GP("ncp."+OutFNm+"-B", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
755  GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1");
756  GP.AddPlot(MeanV1, gpwLines, "Avg", "lw 1");
757  GP.AddPlot(MedV1, gpwLines, "Median", "lw 1");
758  GP.AddPlot(Dec1V1, gpwLines, "Decile-1", "lw 1");
759  for (int t = 0; t < TempV.Len(); t++) {
760  GP.AddPlot(NcpV[t], gpwLines, TStr::Fmt("Temp %g", TempV[t]()), "lw 1");
761  }
762  GP.SetScale(gpsLog10XY);
763  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
764  GP.SavePng();
765  //
766  TFltPrV SzNClustV;
767  int kCnt=1;
768  for (int i = 0; i < SizePhiH.Len(); i++) {
769  const int K = SizePhiH.GetKey(i);
770  const TFltV& PhiV = SizePhiH[i];
771  SzNClustV.Add(TFltPr(K, PhiV.Len()));
772  if (K>2 && (pow(10.0, (int)log10((double)K))==K || (K >=10 && K<=100 && K%10==0) || (K >=100 && K<=1000 && K%100==0))) {
773  THash<TFlt, TFlt> CntH;
774  for (int p = 0; p < PhiV.Len(); p++) {
775  CntH.AddDat(TMath::Round(log10(PhiV[p].Val),1)) += 1;
776  }
777  TGnuPlot::PlotValCntH(CntH, TStr::Fmt("ncp.%s-c%02d", OutFNm.CStr(), kCnt++), TStr::Fmt("%s. K=%d, NPieces=%d, %s",
778  Desc.CStr(), K, PhiV.Len(), ParamStr().CStr()), "log_10 {/Symbol \\F} (conductance)",
779  TStr::Fmt("Number of pieces of such conductance, K=%d, NPieces=%d)", K, PhiV.Len()));
780  }
781  }
782  TGnuPlot::PlotValV(SzNClustV, "ncp."+OutFNm+"-ClSz", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()),
783  "k (cluster size)", "c(k) (number of extracted clusters)", gpsLog);
784 }
785 
786 void TLocClustStat::ImposeNCP(const TLocClustStat& LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const {
787  if (Desc.Empty()) { Desc = OutFNm; }
788  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
789  TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
790  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
791  LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
792  // plot
793  TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
794  if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc1.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1"); }
795  if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc2.CStr(), LcStat2.Graph->GetNodes(), LcStat2.Graph->GetEdges()), "lw 1"); }
796  if (! BagOfWhiskerV.Empty()) {
797  GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1");
798  TFltPrV BestWhiskV; BestWhiskV.Add(BestWhisk);
799  GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
800  if (! LcStat2.BagOfWhiskerV.Empty()) {
801  GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1");
802  TFltPrV BestWhiskV; BestWhiskV.Add(LcStat2.BestWhisk);
803  GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
804  GP.SetScale(gpsLog10XY);
805  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
806  //GP.SetXRange(1, Graph->GetNodes()/2);
807  GP.SavePng();
808 }
809 
810 void TLocClustStat::ImposeNCP(const TLocClustStat& LcStat2, const TLocClustStat& LcStat3, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2, TStr Desc3) const {
811  if (Desc.Empty()) { Desc = OutFNm; }
812  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
813  TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
814  TFltPrV MeanV3, MedV3, Dec1V3, MinV3, MaxV3;
815  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
816  LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
817  LcStat3.GetCurveStat(MeanV3, MedV3, Dec1V3, MinV3, MaxV3);
818  // plot
819  TGnuPlot GP("phiTR."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
820  if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, Desc1+" MIN", "lw 1"); }
821  if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, Desc2+" MIN", "lw 1"); }
822  if (! MinV3.Empty()) { GP.AddPlot(MinV3, gpwLines, Desc3+" MIN", "lw 1"); }
823  if (! BagOfWhiskerV.Empty()) {
824  GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1");
825  TFltPrV BestWhiskV; BestWhiskV.Add(BestWhisk);
826  GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
827  if (! LcStat2.BagOfWhiskerV.Empty()) {
828  GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1");
829  TFltPrV BestWhiskV; BestWhiskV.Add(LcStat2.BestWhisk);
830  GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
831  if (! LcStat3.BagOfWhiskerV.Empty()) {
832  GP.AddPlot(LcStat3.BagOfWhiskerV, gpwLines, Desc3+" whiskers", "lw 1");
833  TFltPrV BestWhiskV; BestWhiskV.Add(LcStat3.BestWhisk);
834  GP.AddPlot(BestWhiskV, gpwPoints, Desc3+" Best whisker", "pt 5 ps 2"); }
835  GP.SetScale(gpsLog10XY);
836  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
837  GP.SetXRange(1, Graph->GetNodes()/2);
838  GP.SavePng();
839 }
840 
841 void TLocClustStat::SaveTxtInfo(const TStr& OutFNmPref, const TStr& Desc, const bool& SetMaxAt1) const {
842  printf("Save text info...");
843  TExeTm ExeTm;
844  const int GNodes = Graph->GetNodes();
845  const int GEdges = Graph->GetEdges();
846  TVec<TFltV> ColV(17);
847  double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
848  // double PrevBPos = 1, BPos = 1;
849  for (int i = 0; i < SizeBucketSet.Len(); i++) {
850  if ( !BestCutH.IsKey(SizeBucketSet[i])) { continue; }
852  C.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
853  ColV[0].Add(C.Nodes()); ColV[1].Add(C.Edges());
854  ColV[2].Add(C.CutSz()); ColV[3].Add(C.GetPhi());
855  ColV[4].Add(C.GetExpansion()); ColV[5].Add(C.GetIntDens());
856  ColV[6].Add(C.GetCutRatio(GNodes)); ColV[7].Add(C.GetNormCut(GEdges));
857  ColV[8].Add(MxFrac); ColV[9].Add(AvgFrac);
858  ColV[10].Add(MedianFrac); ColV[11].Add(Pct9Frac); ColV[12].Add(Flake);
859  ColV[13].Add(double(2.0*C.Edges)); ColV[14].Add(C.GetExpEdgesIn(GEdges));
860  ColV[15].Add(C.GetModular(GEdges)); ColV[16].Add(C.GetModRat(GEdges));
861  printf(".");
862  }
863  // normalize so that maximum value is at 1
864  if (SetMaxAt1) {
865  for (int c = 1; c < ColV.Len(); c++) {
866  double MaxVal=1e-10;
867  for (int r = 0; r < ColV[c].Len(); r++) { MaxVal = TMath::Mx(MaxVal, ColV[c][r]()); }
868  for (int r = 0; r < ColV[c].Len(); r++) { ColV[c][r] /= MaxVal; }
869  }
870  }
871  // save
872  const TStr DatFNm = TStr::Fmt("ncp.%s.INFO.tab", OutFNmPref.CStr());
873  FILE *F = fopen(DatFNm.CStr(), "wt");
874  fprintf(F, "# %s %s\n", Desc.CStr(), ParamStr().CStr());
875  fprintf(F, "#N_inside\tE_inside\tE_across\tConductance\tExpansion\tIntDensity\tCutRatio\tNormCut\tMx_FracDegOut\tAvg_FDO\tMedian_FDO\t90Pct_FDO\tFlake_FDO\tVolume\tExpVolume\tModularity\tModRatio\n");
876  for (int r = 0; r < ColV[0].Len(); r++) {
877  fprintf(F, "%g", ColV[0][r]());
878  for (int c = 1; c < ColV.Len(); c++) {
879  fprintf(F, "\t%g", ColV[c][r]()); }
880  fprintf(F, "\n");
881  }
882  fclose(F);
883  printf("[%s]\n", ExeTm.GetStr());
884  TGnuPlot GP(TStr::Fmt("ncp.%s.All", OutFNmPref.CStr()), TStr::Fmt("%s %s",
885  Desc.CStr(), ParamStr().CStr()));
886  GP.AddPlot(DatFNm, 1, 4, gpwLines, "Conductance", "lw 2");
887  GP.AddPlot(DatFNm, 1, 5, gpwPoints, "Expansion", "pt 3");
888  GP.AddPlot(DatFNm, 1, 6, gpwPoints, "Internal Density", "pt 5 ps 0.8");
889  GP.AddPlot(DatFNm, 1, 7, gpwPoints, "Cut Ratio", "pt 6");
890  GP.AddPlot(DatFNm, 1, 8, gpwPoints, "Normalized Cut", "pt 7");
891  GP.AddPlot(DatFNm, 1, 9, gpwPoints, "Maximum FDO", "pt 9");
892  GP.AddPlot(DatFNm, 1, 10, gpwPoints, "Avg FDO", "pt 11");
893  GP.AddPlot(DatFNm, 1, 13, gpwPoints, "Flake FDO", "pt 13");
894  GP.SetScale(gpsLog10XY);
895  GP.SetXYLabel("k (number of nodes in the cluster)", "Normalized community score (lower is better)");
896  GP.SavePng();
897 }
898 
899 // conductances if clusters are composed of disjoint pieces that can be separated
900 // from the graph by a single edge
901 void TLocClustStat::BagOfWhiskers(const PUNGraph& Graph, TFltPrV& SizePhiV, TFltPr& MaxWhisk) {
902  TCnComV Cn1ComV;
903  TSnap::Get1CnCom(Graph, Cn1ComV);
904  TIntPrV SzVolV;
905  int MxSize=0;
906  if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr(); return; }
907  // Graph->SaveEdgeList("g-vol.txt"); TGraphViz::Plot(Graph, gvlNeato, "g-vol.gif"); Fail; } IAssert(vol <= sz*(sz-1));
908  printf(" 1-connected components: %d\n", Cn1ComV.Len());
909  MaxWhisk = TFltPr(1,1);
910  for (int c = 0; c < Cn1ComV.Len(); c++) {
911  const TIntV& NIdV = Cn1ComV[c].NIdV;
912  const int sz = NIdV.Len();
913  if (sz < 2) { continue; }
914  int vol = 0; // volume is the size of degrees
915  for (int n = 0; n < sz; n++) {
916  vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
917  SzVolV.Add(TIntPr(sz, vol));
918  MxSize += sz;
919  if (1.0/double(vol) < MaxWhisk.Val2) { MaxWhisk=TFltPr(NIdV.Len(), 1.0/double(vol)); }
920  }
921  SzVolV.Sort(false);
922  // compose clusters
923  THash<TInt, TIntSet> ItemSetH(MxSize, true);
924  THash<TInt, TInt> VolH(MxSize, true);
925  THash<TInt, TFlt> CostH(MxSize, true);
926  ItemSetH.AddKey(0); VolH.AddKey(0);
927  TExeTm ExeTm;
928  // exact up to size 1000
929  for (int size = 2; size <= TMath::Mn(MxSize, 1000); size++) {
930  for (int item = 0; item <SzVolV.Len(); item++) {
931  const int smallSz = size-SzVolV[item].Val1;
932  if (ItemSetH.IsKey(smallSz)) {
933  const TIntSet& SmallSet = ItemSetH.GetDat(smallSz);
934  if (SmallSet.IsKey(item)) { continue; }
935  const int SmallVol = VolH.GetDat(smallSz);
936  // combine smaller nad new solution
937  const double curCost = CostH.IsKey(size) ? double(CostH.GetDat(size)) : double(10e10);
938  const double newCost = double(SmallSet.Len()+1) / double(SmallVol+SzVolV[item].Val2);
939  if (curCost < newCost) { continue; }
940  VolH.AddDat(size, SmallVol+SzVolV[item].Val2);
941  ItemSetH.AddDat(size, SmallSet);
942  ItemSetH.GetDat(size).AddKey(item);
943  CostH.AddDat(size, newCost);
944  }
945  }
946  if (VolH.IsKey(size) && size%100==0) {
947  printf("\rSize: %d/%d: vol: %d, items: %d/%d [%s]", size, MxSize, VolH.GetDat(size).Val,
948  ItemSetH.GetDat(size).Len(), SzVolV.Len(), ExeTm.GetStr());
949  }
950  }
951  // add sizes larger than 1000
952  printf("\nAdding sizes > 1000 nodes...");
953  int partSz=0; double partVol=0.0;
954  for (int i = 0; i < SzVolV.Len(); i++) {
955  partSz += SzVolV[i].Val1();
956  partVol += SzVolV[i].Val2();
957  if (partSz < 1000) { continue; }
958  const double curCost = CostH.IsKey(partSz) ? double(CostH.GetDat(partSz)) : double(10e10);
959  const double partPhi = double(i+1)/partVol;
960  if (partPhi < curCost) {
961  CostH.AddDat(partSz, partPhi);
962  }
963  }
964  VolH.SortByKey();
965  CostH.SortByKey();
966  SizePhiV.Gen(CostH.Len(), 0);
967  SizePhiV.Add(TFltPr(1, 1));
968  for (int s = 0; s < CostH.Len(); s++) {
969  const int size = CostH.GetKey(s);
970  const double cond = CostH[s]; //double(ItemSetH.GetDat(size).Len())/double(VolH[s]);
971  SizePhiV.Add(TFltPr(size, cond));
972  }
973  printf("done\n");
974 }
975 
976 // faster greedy version
977 void TLocClustStat::BagOfWhiskers2(const PUNGraph& Graph, TFltPrV& SizePhiV) {
978  TCnComV Cn1ComV;
979  TSnap::Get1CnCom(Graph, Cn1ComV);
980  TIntPrV SzVolV;
981  int MxSize=0, TotVol=0;
982  if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr(); return; }
983  printf(" 1-connected components: %d\n", Cn1ComV.Len());
984  for (int c = 0; c < Cn1ComV.Len(); c++) {
985  const TIntV& NIdV = Cn1ComV[c].NIdV;
986  const int sz = NIdV.Len();
987  if (sz < 2) { continue; }
988  int vol = 0; // volume is the size of degrees
989  for (int n = 0; n < sz; n++) {
990  vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
991  SzVolV.Add(TIntPr(sz, vol));
992  MxSize += sz; TotVol += vol;
993  }
994  printf(" Total size: %d\t Total vol: %d\n", MxSize, TotVol);
995  SzVolV.Sort(false);
996  // compose clusters
997  THash<TFlt, TFlt> SizePhiH(MxSize, true);
998  for (int i = 0; i < SzVolV.Len(); i++) {
999  const int Sz = SzVolV[i].Val1();
1000  const double Phi = 1.0/double(SzVolV[i].Val2());
1001  if ((! SizePhiH.IsKey(Sz)) || SizePhiH.GetDat(Sz) > Phi) {
1002  SizePhiH.AddDat(Sz, Phi); }
1003  }
1004  double partSz=0.0, partVol=0.0;
1005  for (int i = 0; i < SzVolV.Len(); i++) {
1006  partSz += SzVolV[i].Val1();
1007  partVol += SzVolV[i].Val2();
1008  const double partPhi = double(i+1)/partVol;
1009  if ((! SizePhiH.IsKey(partSz)) || partPhi < SizePhiH.GetDat(partSz)) {
1010  SizePhiH.AddDat(partSz, partPhi); }
1011  }
1012  SizePhiV.Gen(SizePhiH.Len()+1, 0);
1013  SizePhiV.Add(TFltPr(1, 1));
1014  SizePhiH.SortByKey();
1015  for (int s = 0; s < SizePhiH.Len(); s++) {
1016  SizePhiV.Add(TFltPr(SizePhiH.GetKey(s), SizePhiH[s]));
1017  }
1018 }
1019 
1020 void TLocClustStat::MakeExpBins(const TFltPrV& ValV, TFltPrV& NewV) {
1021  if (ValV.Empty()) { NewV.Clr(false); return; }
1022  NewV.Gen(1000, 0);
1023  double PrevBPos = 1, BPos = 1;
1024  int i = 0;
1025  while (BPos <= ValV.Last().Val1) {
1026  //if (TakeValAt == 1) {
1027  // while (i < ValV.Len() && ValV[i].Val1 <= BPos) { i++; }
1028  // NewV.Add(ValV[i-1]); }
1029  //else if (TakeValAt == 2) {
1030  int MinI=-1; double MinCnt=TFlt::Mx;
1031  while (i < ValV.Len() && ValV[i].Val1 <= BPos) {
1032  if (ValV[i].Val2 < MinCnt) { MinCnt=ValV[i].Val2; MinI=i; } i++; }
1033  if (MinI>=0 && MinI<ValV.Len()) {
1034  NewV.Add(ValV[MinI]); }
1035  PrevBPos = (uint) floor(BPos);
1036  BPos *= BinFactor;
1037  if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
1038  }
1039  NewV.Add(ValV.Last());
1040 }
1041 
1042 void TLocClustStat::MakeExpBins(const TFltV& ValV, TFltV& NewV) {
1043  if (ValV.Empty()) { NewV.Clr(false); return; }
1044  NewV.Gen(1000, 0);
1045  double PrevBPos = 1, BPos = 1;
1046  int i = 1;
1047  NewV.Add(ValV[0]);
1048  while (BPos <= ValV.Len()) {
1049  int MinI=-1; double MinCnt=TFlt::Mx;
1050  while (i < ValV.Len() && i <= BPos) {
1051  if (ValV[i] < MinCnt) { MinCnt=ValV[i]; MinI=i; } i++; }
1052  if (MinI>=0 && MinI<ValV.Len()) {
1053  NewV.Add(ValV[MinI]); }
1054  PrevBPos = (uint) floor(BPos);
1055  BPos *= BinFactor;
1056  if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
1057  }
1058  NewV.Add(ValV.Last());
1059 }
1060 
1061 
1063 // Local clustering for a set of graphs (loads ncp-*.tab files)
1065  TStr FNm;
1066  for (TFFile FFile(FNmWc); FFile.Next(FNm); ) {
1067  TSsParser Ss(FNm, ssfTabSep, true, false);
1068  int TrueNcpId=-1, WhiskId=-1, RewBestWhiskId=-1, RewId=-1, BestWhiskId=-1;
1069  while (Ss.Next()) {
1070  for (int f = 0; f < Ss.GetFlds(); f++) {
1071  // load ForestFire parameter (fwd burn prob)
1072  if (strstr(Ss[f], "FWD:")) {
1073  TStr S(Ss[f]); const int x = S.SearchStr("FWD:");
1074  ParamValV.Add(S.GetSubStr(x+4, S.SearchCh(' ', x+1)-1).GetFlt());
1075  }
1076  // extract column names
1077  if (strstr(Ss[f], "ORIGINAL MIN")!=NULL) {
1078  GNmV.Add(TStr::Fmt("%s %s", FNm.GetSubStr(FNm.SearchCh('.')+1, FNm.SearchChBack('.')-1).CStr(), strchr(Ss[f], '(')));
1079  int Nodes=0,Edges=0; sscanf(strchr(Ss[f], '(')+1, "%d,%d)", &Nodes, &Edges);
1080  GSizeV.Add(TIntPr(Nodes, Edges));
1081  printf("%s: %d %d\n", GNmV.Last().CStr(), Nodes, Edges);
1082  TrueNcpId=f;
1083  }
1084  if (strstr(Ss[f], "ORIGINAL whisker")!=NULL || strstr(Ss[f], "TRUE whisker")!=NULL) { WhiskId=f; }
1085  if (strstr(Ss[f], "ORIGINAL Best whisker")!=NULL || strstr(Ss[f], "TRUE Best whisker")!=NULL) { BestWhiskId=f; }
1086  if (strstr(Ss[f], "REWIRED MIN")!=NULL || strstr(Ss[f], "RAND MIN")!=NULL) { RewId=f; }
1087  if (strstr(Ss[f], "REWIRED Best whisker")!=NULL || strstr(Ss[f], "RAND Best whisker")!=NULL) { RewBestWhiskId=f; }
1088  }
1089  if (TrueNcpId!=-1 || WhiskId!=-1) { break; }
1090  }
1091  if (TrueNcpId < 0) { printf("%s\n", FNm.GetFMid().CStr()); break; }
1092  if (BestWhiskId < 0) { WhiskerV.Add(TFltPr(1,1)); }
1093  if (RewBestWhiskId < 0) { RewWhiskerV.Add(TFltPr(1,1)); }
1094  NcpV.Add(); WhiskNcpV.Add(); RewNcpV.Add();
1095  TFltPrV& Ncp = NcpV.Last();
1096  TFltPrV& WhiskNcp = WhiskNcpV.Last();
1097  TFltPrV& RewNcp = RewNcpV.Last();
1098  bool Once=false, Once2=false;
1099  while (Ss.Next()) {
1100  if (TrueNcpId < Ss.GetFlds()&& Ss.IsFlt(TrueNcpId)) { Ncp.Add(TFltPr(Ss.GetFlt(TrueNcpId-1), Ss.GetFlt(TrueNcpId))); }
1101  if (WhiskId>=0 && WhiskId < Ss.GetFlds() && Ss.IsFlt(WhiskId)) { WhiskNcp.Add(TFltPr(Ss.GetFlt(WhiskId-1), Ss.GetFlt(WhiskId))); }
1102  if (RewId >=0 && RewId < Ss.GetFlds()&& Ss.IsFlt(RewId)) { RewNcp.Add(TFltPr(Ss.GetFlt(RewId-1), Ss.GetFlt(RewId))); }
1103  if (BestWhiskId>=0 && BestWhiskId < Ss.GetFlds() && ! Once) { Once=true;
1104  int W2=BestWhiskId-1; while (W2 > 0 && Ss.GetFlt(W2)!=(double)int(Ss.GetFlt(W2))) { W2--; }
1105  WhiskerV.Add(TFltPr(Ss.GetFlt(W2), Ss.GetFlt(BestWhiskId))); }
1106  if (RewBestWhiskId>=0 && RewBestWhiskId < Ss.GetFlds() && ! Once2) { Once2=true;
1107  int W2=RewBestWhiskId-1; while (W2 > 0 && Ss.GetFlt(W2)!=(double)int(Ss.GetFlt(W2))) { W2--; }
1108  RewWhiskerV.Add(TFltPr(Ss.GetFlt(W2), Ss.GetFlt(RewBestWhiskId))); }
1109  }
1110  printf(" ncp:%d whisk:%d rew:%d\n", NcpV.Last().Len(), WhiskNcpV.Last().Len(), RewNcpV.Last().Len());
1111  }
1112  IAssert(NcpV.Len() == GSizeV.Len());
1113 }
1114 TNcpGraphsBase::TNcpGraphsBase(TSIn& SIn) : GNmV(SIn), GSizeV(SIn), WhiskerV(SIn),
1115  RewWhiskerV(SIn),NcpV(SIn), RewNcpV(SIn),WhiskNcpV(SIn) {
1116 }
1117 
1118 void TNcpGraphsBase::Save(TSOut& SOut) const {
1119  GNmV.Save(SOut); GSizeV.Save(SOut);
1120  WhiskerV.Save(SOut); RewWhiskerV.Save(SOut); NcpV.Save(SOut);
1121  RewNcpV.Save(SOut); WhiskNcpV.Save(SOut);
1122 }
1123 
1124 void TNcpGraphsBase::Impose(const TStr& OutFNm, const int& TopN, const bool& Smooth) {
1125  TGnuPlot GP(OutFNm);
1126  for (int i = 0; i < TMath::Mn(NcpV.Len(), TopN); i++) {
1127  GP.AddPlot(NcpV[i], gpwLines, GNmV[i], Smooth?"smooth csplines":"");
1128  }
1129  GP.SetScale(gpsLog10XY);
1130  GP.SavePng();
1131 }
1132 
1133 double TNcpGraphsBase::GetXAtMinY(const TFltPrV& Ncp, const int& NNodes) {
1134  double MinX1=1, MinY1=1;
1135  for (int k = 0; k < Ncp.Len(); k++) {
1136  if (Ncp[k].Val2<MinY1) { MinX1=Ncp[k].Val1; MinY1=Ncp[k].Val2; } }
1137  return MinX1<1 ? 1 : MinX1;
1138  //if (NNodes < 1000) return MinX1;
1139  // smooth
1140  /*const int WndSize = 50;
1141  double MinX=1, MinY=1;
1142  TFltPrV Ncp2V;
1143  for (int k = 0; k < Ncp.Len(); k++) {
1144  int WndSz = k > WndSize ? WndSize : k;
1145  double SmoothVal=0.0, SmoothCnt=0;
1146  for (int i = -WndSz; i <= WndSz; i++) {
1147  if (k+i > -1 && k+i < Ncp.Len()) { SmoothCnt+=pow(1.1, -abs(i));
1148  SmoothVal+=pow(1.1, -abs(i)) * Ncp[k+i].Val2; }
1149  }
1150  SmoothVal = SmoothVal/SmoothCnt;
1151  Ncp2V.Add(TFltPr(Ncp[k].Val1, SmoothVal));
1152  if (SmoothVal<MinY) { MinX=Ncp[k].Val1; MinY=SmoothVal; }
1153  }
1154  static int cnt = 0;
1155  if (Ncp2V.Len() > 10 && cnt < 10) {
1156  TGnuPlot GP(TStr::Fmt("test-%03d", ++cnt));
1157  GP.SetScale(gpsLog10XY);
1158  GP.AddPlot(Ncp, gpwLines, "true");
1159  GP.AddPlot(Ncp2V, gpwLines, "smooth");
1160  GP.SavePng();
1161  }
1162  if (MinX < 1) { return 1; } else if (MinX > 1000) { return 1000; }
1163  return MinX;*/
1164 }
1165 
1166 TFltPr TNcpGraphsBase::GetXYAtMinY(const TFltPrV& Ncp, const int& NNodes) {
1167  double MinX1=1, MinY1=1;
1168  for (int k = 0; k < Ncp.Len(); k++) {
1169  if (Ncp[k].Val2<MinY1) { MinX1=Ncp[k].Val1; MinY1=Ncp[k].Val2; } }
1170  return TFltPr(MinX1<1?1:MinX1, MinY1);
1171 }
1172 
1173 void TNcpGraphsBase::PlotNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
1174  TFltPrV GSzMinK, GSzMinY;
1175  for (int i = 0; i < NcpV.Len(); i++) {
1176  const TFltPr XYAtMinY = GetXYAtMinY(NcpV[i], GSizeV[i].Val1);
1177  const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
1178  GSzMinK.Add(TFltPr(X, XYAtMinY.Val1));
1179  GSzMinY.Add(TFltPr(X, XYAtMinY.Val2));
1180  }
1181  GSzMinK.Sort(); GSzMinY.Sort();
1182  const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
1183  TGnuPlot::PlotValV(GSzMinK, TStr("bestK-")+OutFNm, "Network", XLabel, "size of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1184  TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestK-")+OutFNm, "Network", XLabel, "conductance of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1185 }
1186 
1187 void TNcpGraphsBase::SaveTxtNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
1189  for (int i = 0; i < NcpV.Len(); i++) {
1190  const TFltPr XYAtMinY = GetXYAtMinY(NcpV[i], GSizeV[i].Val1);
1191  const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
1192  GSzMinK.Add(TQuad<TInt, TInt, TFlt, TStr>((int)X, (int)XYAtMinY.Val1(), XYAtMinY.Val2, GNmV[i]));
1193  }
1194  GSzMinK.Sort();
1195  FILE *F = fopen(TStr::Fmt("bestK-%s.txt", OutFNm.CStr()).CStr(), "wt");
1196  fprintf(F, "#nodes\tbestK\tcondAtBestK\tgraph name\n");
1197  for (int i = 0; i < GSzMinK.Len(); i++) {
1198  fprintf(F, "%d\t%d\t%f\t%s\n", GSzMinK[i].Val1(), GSzMinK[i].Val2(), GSzMinK[i].Val3(), GSzMinK[i].Val4.CStr());
1199  }
1200  fclose(F);
1201 }
1202 
1203 void TNcpGraphsBase::PlotRewNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
1204  TFltPrV GSzMinK, GSzMinY;
1205  for (int i = 0; i < NcpV.Len(); i++) {
1206  const TFltPr XYAtMinY = GetXYAtMinY(RewNcpV[i], GSizeV[i].Val1);
1207  const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
1208  GSzMinK.Add(TFltPr(X, XYAtMinY.Val1));
1209  GSzMinY.Add(TFltPr(X, XYAtMinY.Val2));
1210  }
1211  GSzMinK.Sort(); GSzMinY.Sort();
1212  const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
1213  TGnuPlot::PlotValV(GSzMinK, TStr("bestR-")+OutFNm, "Rewired network", XLabel, "size of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1214  TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestR-")+OutFNm, "Rewired network", XLabel, "conductance of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1215 }
1216 
1217 void TNcpGraphsBase::PlotBestWhisker(const TStr& OutFNm, const bool& VsGraphN) {
1218  TFltPrV GSzMinK, GSzMinY;
1219  for (int i = 0; i < GSizeV.Len(); i++) {
1220  if (WhiskerV[i].Val1()>0) {
1221  const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
1222  GSzMinK.Add(TFltPr(X, WhiskerV[i].Val1()));
1223  GSzMinY.Add(TFltPr(X, WhiskerV[i].Val2()));
1224  }
1225  }
1226  GSzMinK.Sort(); GSzMinY.Sort();
1227  const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
1228  TGnuPlot::PlotValV(GSzMinK, TStr("bestW-")+OutFNm, "Network", XLabel, "size of best whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1229  TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestW-")+OutFNm, "Network", XLabel, "conductance of best whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1230 }
1231 
1232 void TNcpGraphsBase::PlotRewBestWhisker(const TStr& OutFNm, const bool& VsGraphN) {
1233  TFltPrV GSzMinK, GSzMinY;
1234  for (int i = 0; i < GSizeV.Len(); i++) {
1235  if (WhiskerV[i].Val1()>0) {
1236  const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
1237  GSzMinK.Add(TFltPr(X, RewWhiskerV[i].Val1()));
1238  GSzMinY.Add(TFltPr(X, RewWhiskerV[i].Val2()));
1239  }
1240  }
1241  GSzMinK.Sort(); GSzMinY.Sort();
1242  const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
1243  TGnuPlot::PlotValV(GSzMinK, TStr("bestWR-")+OutFNm, "Rewired network", XLabel, "size of best rewired whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1244  TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestWR-")+OutFNm, "Rewired network", XLabel, "conductance of best rewired whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1245 }
1246 
1247 void TNcpGraphsBase::PlotAvgNcp(const TStr& OutFNm, const TVec<TFltPrV>& NcpVec, const int& MinSz, const double& MaxMinY) {
1248  THash<TFlt, TMom> MomH;
1249  int Cnt=0;
1250  for (int i = 0; i < NcpVec.Len(); i++) {
1251  if (GSizeV[i].Val1 < MinSz) { continue; }
1252  const TFltPrV& Ncp = NcpVec[i];
1253  double MinX=1, MinY=1;
1254  for (int k = 0; k < Ncp.Len(); k++){
1255  if (Ncp[k].Val2<MinY) { MinX=Ncp[k].Val1; MinY=Ncp[k].Val2; } }
1256  if (MinY > MaxMinY) { continue; } Cnt++;
1257  //const double Coef = (1-0.0001)/(1.0-MinY);
1258  for (int k = 0; k < Ncp.Len(); k++){
1259  //MomH.AddDat(TMath::Round(exp(TMath::Round(log(Ncp[k].Val1()), 2)),2)).Add(0.0001+(Ncp[k].Val2-MinY)*Coef);
1260  MomH.AddDat(TMath::Round(exp(TMath::Round(log(Ncp[k].Val1()), 1)),0)).Add(Ncp[k].Val2);
1261  }
1262  }
1263  TGnuPlot::PlotValMomH(MomH, OutFNm, "", "size of the cluster, k", "phi(k)", gpsLog, gpwLines, true, true,true,true);
1264  printf(" minSz: %d, miny %g\t%d\n", MinSz, MaxMinY, Cnt);
1265 }
1266 
1267 void TNcpGraphsBase::SaveTxt(const TStr& OutFNm) {
1268  FILE *F=fopen(OutFNm.CStr(), "wt");
1269  fprintf(F, "#Nodes\tEdges\tBestK\tPhi(BestK)\tMaxWhiskN\tPhi(MaxWhisk)\tGraph\n");
1270  for (int i = 0; i < NcpV.Len(); i++) {
1271  const TFltPrV& Ncp = NcpV[i];
1272  double MinX=1, MinY=1;
1273  for (int k = 0; k < Ncp.Len(); k++){
1274  if (Ncp[k].Val2<MinY) { MinX=Ncp[k].Val1; MinY=Ncp[k].Val2; } }
1275  fprintf(F, "%d\t%d\t%d\t%f\t%d\t%f\t%s\n", GSizeV[i].Val1(), GSizeV[i].Val2(),
1276  int(MinX), MinY, int(WhiskerV[i].Val1), WhiskerV[i].Val2(), GNmV[i].CStr());
1277  }
1278  fclose(F);
1279 }
1280 
1281 void TNcpGraphsBase::PlotDataset(const TStr& InFNmWc, const TStr& OutFNm, const bool& ImposeNcp, const bool& VsGraphN) {
1282  TNcpGraphsBase NcpBs(InFNmWc);
1283  //NcpBs.Save(TFOut(OutFNm+".NcpBase"));
1284  //TNcpGraphsBase NcpBs(TFIn(OutFNm+".NcpBase"));
1285  if (ImposeNcp) {
1286  NcpBs.Impose(OutFNm+"5R", 5, false); NcpBs.Impose(OutFNm+"5S", 5, true);
1287  NcpBs.Impose(OutFNm+"R", 10, false); NcpBs.Impose(OutFNm+"S", 10, true);
1288  }
1289  NcpBs.PlotNcpMin(OutFNm, VsGraphN);
1290  //NcpBs.SaveTxtNcpMin(OutFNm, VsGraphN);
1291  NcpBs.PlotRewNcpMin(OutFNm, VsGraphN);
1292  NcpBs.PlotBestWhisker(OutFNm, VsGraphN);
1293  NcpBs.PlotRewBestWhisker(OutFNm, VsGraphN);
1294 
1295  //NcpBs.PlotAvgNcp(OutFNm+"AvgNcp", NcpBs.NcpV, 1, 1);
1296  //NcpBs.PlotAvgNcp(OutFNm+"AvgRewNcp", NcpBs.RewNcpV, 1, 1);
1297  /*NcpBs.PlotAvgNcp(OutFNm+"AvgNcp2", NcpBs.NcpV, 100, 0.1);
1298  NcpBs.PlotAvgNcp(OutFNm+"AvgNcp3", NcpBs.NcpV, 100, 0.01);
1299  NcpBs.PlotAvgNcp(OutFNm+"AvgNcp4", NcpBs.NcpV, 100, 0.001);
1300  NcpBs.PlotAvgNcp(OutFNm+"AvgNcp5", NcpBs.NcpV, 100, 0.0001);
1301  NcpBs.PlotAvgNcp(OutFNm+"RewNcp1", NcpBs.RewNcpV, 1000, 1);
1302  NcpBs.PlotAvgNcp(OutFNm+"RewNcp2", NcpBs.RewNcpV, 100, 0.1);
1303  NcpBs.PlotAvgNcp(OutFNm+"RewNcp3", NcpBs.RewNcpV, 100, 0.01);
1304  NcpBs.PlotAvgNcp(OutFNm+"RewNcp4", NcpBs.RewNcpV, 100, 0.001);
1305  NcpBs.PlotAvgNcp(OutFNm+"RewNcp5", NcpBs.RewNcpV, 100, 0.0001);
1306  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp1", NcpBs.WhiskNcpV, 1000, 1);
1307  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp2", NcpBs.WhiskNcpV, 100, 0.1);
1308  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp3", NcpBs.WhiskNcpV, 100, 0.01);
1309  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp4", NcpBs.WhiskNcpV, 100, 0.001);
1310  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp5", NcpBs.WhiskNcpV, 100, 0.0001);
1311  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp6", NcpBs.WhiskNcpV, 100, 0.00004);
1312  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp7", NcpBs.WhiskNcpV, 100, 0.00005);*/
1313  //NcpBs.SaveTxt(OutFNm+"bestK.txt");
1314 }
double GetModRat(const int &GEdges) const
Definition: ncp.h:171
void Clr(const bool &DoDel=true, const int &NoDelLim=-1)
Definition: shash.h:1243
void DrawGViz(const PGraph &Graph, const TGVizLayout &Layout, const TStr &PltFNm, const TStr &Desc=TStr(), const bool &NodeLabels=false, const TIntStrH &NIdColorH=TIntStrH())
Draws a given Graph using a selected GraphViz Layout engine with nodes colored.
Definition: gviz.h:34
#define IAssert(Cond)
Definition: bd.h:262
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
TFlt SizeFrac
Definition: ncp.h:178
TFltPrV RewWhiskerV
Definition: ncp.h:246
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
void SavePajek(const TStr &OutFNm) const
Saves the network in the Pajek format so it can be visualized. Red node represents the seed and color...
Definition: ncp.cpp:139
int SearchCh(const char &Ch, const int &BChN=0) const
Definition: dt.cpp:1043
TVec< TNodeSweep > SweepsV
Definition: ncp.h:183
static void PlotDataset(const TStr &InFNmWc, const TStr &OutFNm, const bool &ImposeNcp=false, const bool &VsGraphN=false)
Definition: ncp.cpp:1281
void PlotAvgNcp(const TStr &OutFNm, const TVec< TFltPrV > &NcpVec, const int &MinSz, const double &MaxMinY)
Definition: ncp.cpp:1247
static void BagOfWhiskers2(const PUNGraph &Graph, TFltPrV &SizePhiV)
Definition: ncp.cpp:977
int BestCutIdx
Definition: ncp.h:37
#define IAssertR(Cond, Reason)
Definition: bd.h:265
TIntFltH ResH
Definition: ncp.h:30
static char * GetCurTm()
Definition: tm.h:374
static void DrawWhiskers(const PUNGraph &Graph, TStr FNmPref, const int &PlotN)
Draws the 'whiskers' of the graph. Whiskers are small sub-graphs that are attached to the rest of the...
Definition: ncp.cpp:172
Definition: xfl.h:30
TStr GetFMid() const
Definition: dt.cpp:1403
int GetVol(const int &Nodes) const
Returns the volume of the set of first NodeN nodes in the sweep vector.
Definition: ncp.h:51
Definition: tm.h:355
static TStr GetMegaStr(const int &Val)
Definition: dt.h:1226
void SetXRange(const double &Min, const double &Max)
Definition: gnuplot.h:79
double GetMedian() const
Definition: xmath.h:244
void SupportSweep()
After the function ApproxPageRank() has been run the SupportSweep() computes the volume, cut size, node ids, conductance vectors.
Definition: ncp.cpp:52
TInt KMax
Definition: ncp.h:179
TVec< TFltPrV > NcpV
Definition: ncp.h:247
void Sort(const bool &CmpKey, const bool &Asc)
Definition: hash.h:570
bool Empty() const
Definition: ds.h:2646
TFltV PhiV
Definition: ncp.h:36
static void GetCutStat(const PUNGraph &Graph, const TIntV &NIdV, int &Vol, int &Cut, double &Phi, int GraphEdges=-1)
For a given Graph and a set of nodes NIdV the function returns the Volume, CutSize and the Conductanc...
Definition: ncp.cpp:192
int GetCut(const int &Nodes) const
Returns the size of the cut of the first Nodes nodes in the sweep vector.
Definition: ncp.h:55
PUNGraph GenRewire(const PUNGraph &OrigGraph, const int &NSwitch, TRnd &Rnd)
Rewire a random undirected graph. Keeps node degrees the same, but randomly rewires the edges...
Definition: ggen.cpp:168
const TIntV & GetNIdV() const
Vector of node IDs sorted in the random walk order.
Definition: ncp.h:62
void SavePng(const int &SizeX=1000, const int &SizeY=800, const TStr &Comment=TStr())
Definition: gnuplot.h:120
void Run(const PUNGraph &Graph, const bool &SaveAllSweeps=false, const bool &SaveAllCond=false, const bool &SaveBestNodesAtK=false)
Definition: ncp.cpp:300
PGraph GetMxWcc(const PGraph &Graph)
Returns a graph representing the largest weakly connected component on an input Graph.
Definition: cncom.h:452
static const T & Mx(const T &LVal, const T &RVal)
Definition: xmath.h:32
int Val
Definition: dt.h:1139
int SearchChBack(const char &Ch, int BChN=-1) const
Definition: dt.cpp:1053
TVec< TFltPrV > WhiskNcpV
Definition: ncp.h:249
void Save(TSOut &SOut) const
Definition: dt.h:1153
TFlt KFac
Definition: ncp.h:178
int ApproxPageRank(const int &SeedNode, const double &Eps)
Computes Approximate PageRank from the seed node SeedNId and with tolerance Eps.
Definition: ncp.cpp:9
unsigned int uint
Definition: bd.h:11
double GetPhi() const
Definition: ncp.h:164
TInt Coverage
Definition: ncp.h:179
TStr ParamStr() const
Definition: ncp.cpp:555
bool Empty() const
Definition: bd.h:501
TFltPr GetXYAtMinY(const TFltPrV &Ncp, const int &NNodes)
Definition: ncp.cpp:1166
void PlotNcpMin(const TStr &OutFNm, const bool &VsGraphN=false)
Definition: ncp.cpp:1173
void PlotPhiDistr(const int &CmtySz, const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:735
void FindBestCut(const int &SeedNode, const int &ClustSz, const double &MinSizeFrac=0.2)
Finds minimum conductance cut in the graph around the seed node.
Definition: ncp.cpp:81
double Val
Definition: dt.h:1388
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void PlotPhiDistr(const TStr &OutFNm, TStr Desc=TStr()) const
Plots the cluster conductance vs. cluster size K (cluster is composed of nodes NIdV[1...K]).
Definition: ncp.cpp:126
PUNGraph Graph
Definition: ncp.h:180
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:68
double GetExpEdgesIn(const int &GEdges) const
Definition: ncp.h:172
void PlotCutDistr(const TStr &OutFNm, TStr Desc=TStr()) const
Plots the cluster cut size vs. cluster size K (cluster is composed of nodes NIdV[1...K]).
Definition: ncp.cpp:113
void GetBoltzmanCurveStat(const TFltV &TempV, TVec< TFltPrV > &NcpV) const
Definition: ncp.cpp:529
static bool Verbose
Definition: ncp.h:26
bool Empty() const
Definition: hash.h:227
const TCutInfo & GetKNodeCut(const int &Nodes) const
Definition: ncp.h:208
int Len() const
Returns the support of the approximate random walk, the number of nodes with non-zero PageRank score...
Definition: ncp.h:42
void GetKeyV(TVec< TKey > &KeyV) const
Definition: shash.h:1347
Definition: ss.h:72
void PlotNCP(const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:564
TInt KMin
Definition: ncp.h:179
TStr GetSubStr(const int &BChN, const int &EChN) const
Definition: dt.cpp:811
void SetXYLabel(const TStr &XLabel, const TStr &YLabel)
Definition: gnuplot.h:73
int GetVol() const
Definition: ncp.h:161
static double BinFactor
Definition: ncp.h:127
Definition: xmath.h:129
double GetNormCut(const int &GEdges) const
Definition: ncp.h:168
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
int GetFlds() const
Returns the number of fields in the current line.
Definition: ss.h:116
static TRnd Rnd
Definition: dt.h:1146
static const double Mx
Definition: dt.h:1391
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
double GetCutPhi() const
Conductance of the 'best' (minimum conductance) cut.
Definition: ncp.h:81
void PlotRewBestWhisker(const TStr &OutFNm, const bool &VsGraphN=false)
Definition: ncp.cpp:1232
Definition: gviz.h:3
TCutInfo BestCut
Definition: ncp.h:188
int GetCutSz() const
Definition: ncp.h:162
Definition: fl.h:58
void Save(TSOut &SOut) const
Definition: ds.h:954
int GetRndNId(TRnd &Rnd=TInt::Rnd)
Returns an ID of a random node in the graph.
Definition: graph.h:289
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
void PlotNCPModul(const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:589
double GetExpansion() const
Definition: ncp.h:165
static void PlotValMomH(const THash< TVal1, TMom > &ValMomH, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const TGpSeriesTy &SeriesTy=gpwLinesPoints, bool PlotAvg=true, bool PlotMed=true, bool PlotMin=false, bool PlotMax=false, bool PlotSDev=false, bool PlotStdErr=true, bool PlotScatter=false)
Definition: gnuplot.h:491
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1101
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:90
bool Empty() const
Definition: shash.h:1120
Local-Spectral-Clustering statistics of a given Graph.
Definition: ncp.h:125
void Pop()
Definition: ds.h:2650
TFltPrV WhiskerV
Definition: ncp.h:246
int GetOutDeg() const
Returns out-degree of the current node (returns same as value GetDeg() since the graph is undirected)...
Definition: graph.h:94
static void BagOfWhiskers(const PUNGraph &Graph, TFltPrV &SizePhiV, TFltPr &BestWhisk)
Definition: ncp.cpp:901
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
void AddCmd(const TStr &Cmd)
Definition: gnuplot.h:81
double GetMx() const
Definition: xmath.h:238
void AddToBestCutH(const PUNGraph &Graph, const TIntV &NIdV, const bool &AddBestCond=true)
Definition: ncp.cpp:422
void Add(const TFlt &Val, const TFlt &Wgt=1)
Definition: xmath.h:217
Local Spectral Clustering algorithm.
Definition: ncp.h:24
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
Definition: gnuplot.h:7
void SaveTxtNcpMin(const TStr &OutFNm, const bool &VsGraphN=false)
Definition: ncp.cpp:1187
TStrV GNmV
Definition: ncp.h:243
void PlotBestWhisker(const TStr &OutFNm, const bool &VsGraphN=false)
Definition: ncp.cpp:1217
double GetCutRatio(const int &GNodes) const
Definition: ncp.h:167
int Edges2
Definition: ncp.h:29
void Save(TSOut &SOut) const
Definition: ncp.cpp:1118
int SearchStr(const TStr &Str, const int &BChN=0) const
Definition: dt.cpp:1065
const TFltV & GetPhiV() const
Conducatance of the cut separating first K-nodes in the node-id vector (GetNIdV()) from the rest of t...
Definition: ncp.h:68
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
void PlotRewNcpMin(const TStr &OutFNm, const bool &VsGraphN=false)
Definition: ncp.cpp:1203
TVec< TFltPrV > RewNcpV
Definition: ncp.h:248
double GetIntDens() const
Definition: ncp.h:166
double FindBestCut(const int &Nodes, const TIntSet &TabuNIdSet, int &BestCutId) const
Definition: ncp.cpp:469
static double Round(const double &Val)
Definition: xmath.h:16
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:241
#define Mega(n)
Definition: gbase.h:4
TIntV VolV
Definition: ncp.h:35
int Len() const
Definition: ds.h:2647
int AddKey(const TKey &Key)
Definition: shash.h:1254
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
Tab separated.
Definition: ss.h:6
int GetNodes() const
Definition: ncp.h:159
bool GetFlt(const int &FldN, double &Val) const
If the field FldN is a float its value is returned in Val and the function returns true...
Definition: ss.cpp:485
static void MakeExpBins(const TFltPrV &ValV, TFltPrV &NewV)
Definition: ncp.cpp:1020
void Tick()
Definition: tm.h:364
void Clr()
Definition: ncp.cpp:293
const TVal & Top() const
Definition: ds.h:2648
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
Definition: fl.h:128
double Phi(const int i) const
Definition: ncp.h:143
double GetPhi(const int &ValId) const
Returns the conductance of the cut separating the first Nodes nodes in the sweep vector from the rest...
Definition: ncp.h:59
int GetCutVol() const
Volume of the 'best' (minimum conductance) cut.
Definition: ncp.h:79
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
void PlotNCPScatter(const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:715
void PlotPhiInOut(const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:643
double Alpha
Definition: ncp.h:32
void GetCurveStat(TFltPrV &MeanV, TFltPrV &MedV, TFltPrV &Dec1V, TFltPrV &MinV, TFltPrV &MaxV) const
Definition: ncp.cpp:490
double GetFracDegOut(const PUNGraph &Graph, double &MxFrac, double &AvgFrac, double &MedianFrac, double &Pct9Frac, double &Flake) const
Definition: ncp.cpp:245
void SortByKey(const bool &Asc=true)
Definition: hash.h:291
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
void SaveTxt(const TStr &OutFNm)
Definition: ncp.cpp:1267
double GetModular(const int &GEdges) const
Definition: ncp.h:170
int GetKeyId(const TKey &Key) const
Definition: hash.h:466
void Get1CnCom(const PUNGraph &Graph, TCnComV &Cn1ComV)
Returns 1-components: maximal connected components of that can be disconnected from the Graph by remo...
Definition: cncom.cpp:98
TIntPrV GSizeV
Definition: ncp.h:245
int GetEdges() const
Definition: ncp.h:160
TIntV CutV
Definition: ncp.h:35
void ImposeNCP(const TLocClustStat &LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const
Definition: ncp.cpp:786
int Len() const
Definition: shash.h:1121
int AddKey(const TKey &Key)
Definition: hash.h:373
TFlt Alpha
Definition: ncp.h:178
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:106
void PlotNcpTop10(const TStr &OutFNm, TStr Desc, const int &TakeMinN) const
Definition: ncp.cpp:607
void SetScale(const TGpScaleTy &GpScaleTy)
Definition: gnuplot.h:78
TFltPr BestWhisk
Definition: ncp.h:187
double GetMean() const
Definition: xmath.h:240
TIntV NIdV
Definition: ncp.h:35
Definition: dt.h:412
Definition: ds.h:219
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
void PlotBestClustDens(TStr OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:689
PUNGraph Graph
Definition: ncp.h:28
TIntQ NodeQ
Definition: ncp.h:31
double GetSecs() const
Definition: tm.h:366
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:239
TNcpGraphsBase(const TStr &FNmWc)
Definition: ncp.cpp:1064
TVal1 Val1
Definition: ds.h:34
int AddPlot(const TIntV &YValV, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const TStr &Label=TStr(), const TStr &Style=TStr())
Definition: gnuplot.cpp:186
static void PlotValCntH(const THash< TKey, TVal, THashFunc > &ValCntH, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const bool &PlotCCDF=false, const bool &ExpBucket=false)
Definition: gnuplot.h:434
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:111
TVal2 Val2
Definition: ds.h:35
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361
int BestCut() const
Index K of the cut of the minimum conductance around the seed node.
Definition: ncp.h:73
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:235
bool Next()
Loads next line from the input file.
Definition: ss.cpp:412
static void Sweep(const TSparseColMatrix &W, const TFltV &fvec, TFltV &conds, TIntV &order)
void Push(const TVal &Val)
Definition: ds.h:2653
double GetDecile(const int &DecileN) const
Definition: xmath.h:248
TIntSet SizeBucketSet
Definition: ncp.h:189
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
TFltPrV BagOfWhiskerV
Definition: ncp.h:186
bool Next(TStr &FNm)
int SeedNId
Definition: ncp.h:33
void Clr(const bool &DoDel=true)
Definition: ds.h:2635
void PlotBoltzmanCurve(const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:747
static TVec< TStr, int > GetV(const TStr &Val1)
Returns a vector on element Val1.
Definition: ds.h:848
void PlotVolDistr(const TStr &OutFNm, TStr Desc=TStr()) const
Plots the cluster volume vs. cluster size K (cluster is composed of nodes NIdV[1...K]).
Definition: ncp.cpp:100
void AddCut(const TIntV &NIdV)
Definition: ncp.cpp:410
const char * GetStr() const
Definition: tm.h:368
char * CStr()
Definition: dt.h:479
bool IsKey(const TKey &Key) const
Definition: hash.h:258
void Save(TSOut &SOut) const
Definition: ncp.cpp:280
void AddBagOfWhiskers()
Definition: ncp.cpp:404
void RunGnuPlot() const
Definition: gnuplot.cpp:873
TFltV ParamValV
Definition: ncp.h:244
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:237
int Len() const
Definition: hash.h:228
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void SaveTxtInfo(const TStr &OutFNmPref, const TStr &Desc, const bool &SetMaxAt1) const
Definition: ncp.cpp:841
int BestCutNodes() const
Number of nodes inside the 'best' (minimum conductance) cut.
Definition: ncp.h:75
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
double GetMn() const
Definition: xmath.h:237
int Len() const
Definition: ncp.h:140
double GetXAtMinY(const TFltPrV &Ncp, const int &NNodes)
Definition: ncp.cpp:1133
void Def()
Definition: xmath.cpp:339
void Impose(const TStr &OutFNm, const int &TopN, const bool &Smooth)
Definition: ncp.cpp:1124
static void PlotNCP(const PUNGraph &Graph, const TStr &FNm, const TStr Desc="", const bool &BagOfWhiskers=true, const bool &RewireNet=false, const int &KMin=10, const int &KMax=Mega(100), const int &Coverage=10, const bool &SaveTxtStat=false, const bool &PlotBoltzman=false)
Definition: ncp.cpp:219
void Save(TSOut &SOut) const
Definition: dt.h:1402
Local-Spectral-Clustering for a set of graphs (loads ncp-*.tab files)
Definition: ncp.h:241
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
int NId(const int i) const
Definition: ncp.h:142
bool IsFlt(const int &FldN) const
Checks whether fields FldN is a float.
Definition: ss.h:148
void GetSubValV(const TSizeTy &BValN, const TSizeTy &EValN, TVec< TVal, TSizeTy > &ValV) const
Fills ValV with elements at positions BValN...EValN.
Definition: ds.h:1170
TLocClustStat(const double &_Alpha, const int &_KMin, const int &_KMax, const double &_KFac, const int &_Coverage, const double &_SizeFrac)
Definition: ncp.cpp:275
TIntFltH ProbH
Definition: ncp.h:30
void SortByDat(const bool &Asc=true)
Definition: hash.h:292