SNAP Library 2.0, Developer Reference  2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ncp.cpp
Go to the documentation of this file.
00001  #include "stdafx.h"
00002 #include "ncp.h"
00003 
00005 // Local Spectral Clustering
00006 
00007 bool TLocClust::Verbose = true;
00008 
00009 int TLocClust::ApproxPageRank(const int& SeedNode, const double& Eps) {
00010   for (int i = 0; i < ProbH.Len(); i++) { ProbH[i]=0.0; }
00011   for (int i = 0; i < ResH.Len(); i++) { ResH[i]=0.0; }
00012   ProbH.Clr(false, -1, false);
00013   ResH.Clr(false, -1, false);
00014   ResH.AddDat(SeedNode, 1.0);
00015   int iter = 0;
00016   double OldRes = 0.0;
00017   NodeQ.Clr(false);
00018   NodeQ.Push(SeedNode);
00019   TExeTm ExeTm;
00020   while (! NodeQ.Empty()) {
00021     const int NId = NodeQ.Top(); NodeQ.Pop();
00022     const TUNGraph::TNodeI& Node = Graph->GetNI(NId);
00023     const int NIdDeg = Node.GetOutDeg();
00024     const double PushVal = ResH.GetDat(NId) - 0.5*Eps*NIdDeg;
00025     const double PutVal = (1.0-Alpha) * PushVal / double(NIdDeg);
00026     ProbH.AddDat(NId) += Alpha*PushVal;
00027     ResH.AddDat(NId) = 0.5 * Eps * NIdDeg;
00028     for (int e = 0; e < NIdDeg; e++) {
00029       const int DstNId = Node.GetOutNId(e);
00030       const int DstDeg = Graph->GetNI(DstNId).GetOutDeg();
00031       double& ResVal = ResH.AddDat(DstNId).Val;
00032       OldRes = ResVal;
00033       ResVal += PutVal;
00034       if (ResVal >= Eps*DstDeg && OldRes < Eps*DstDeg) {
00035         NodeQ.Push(DstNId); }
00036     }
00037     iter++;
00038     if (iter % Mega(1) == 0) { 
00039       printf(" %d[%s]", NodeQ.Len(), ExeTm.GetStr());
00040       if (iter/1000 > Graph->GetNodes() || ExeTm.GetSecs() > 4*3600) { // more than 2 hours
00041         printf("Too many iterations! Stop to save time.\n");
00042         return iter; }
00043     }
00044   }
00045   // check that the residuals are sufficiently small
00046   /*for (int i =0; i < ProbH.Len(); i++) {
00047     const int Deg = Graph->GetNI(ResH.GetKey(i)).GetOutDeg();
00048     IAssert(ResH[i] < Eps*Deg); } //*/
00049   return iter;
00050 }
00051 
00052 void TLocClust::SupportSweep() {
00053   TExeTm ExeTm;
00054   VolV.Clr(false);  CutV.Clr(false);  PhiV.Clr(false);
00055   if (ProbH.Empty()) { return; }
00056   const int TopNId = ProbH.GetKey(0);
00057   const int TopNIdDeg = Graph->GetNI(TopNId).GetOutDeg();
00058   int Vol = TopNIdDeg, Cut = TopNIdDeg;
00059   double Phi = Cut/double(Vol);
00060   VolV.Add(Vol);  CutV.Add(Cut);  PhiV.Add(1.0);
00061   for (int i = 1; i < ProbH.Len(); i++) {
00062     const int NId = ProbH.GetKey(i);
00063     const TUNGraph::TNodeI& Node = Graph->GetNI(NId);
00064     const int OutDeg = Node.GetOutDeg();
00065     int CutSz = OutDeg; // edges outside
00066     for (int e = 0; e < OutDeg; e++) {
00067       const int Rank = ProbH.GetKeyId(Node.GetOutNId(e));
00068       if ( Rank > -1 && Rank < i) { CutSz -= 2;  }
00069     }
00070     Vol += OutDeg;  Cut += CutSz;
00071     if (Vol < Edges2) {
00072       if (2*Vol > Edges2) { Phi = Cut / double(Edges2-Vol); }
00073       else { Phi = Cut / double(Vol); }
00074     } else {
00075       Phi = 1.0;
00076     }
00077     IAssert((Phi+1e-6) >= double(1.0)/double(i*(i+1)+1)); // conductance is worse than the best possible
00078     VolV.Add(Vol);  CutV.Add(Cut);  PhiV.Add(Phi);
00079   }}
00080 
00081 void TLocClust::FindBestCut(const int& SeedNode, const int& ClustSz, const double& MinSizeFrac) {
00082   double MaxPhi = TFlt::Mx;
00083   BestCutIdx = -1;
00084   SeedNId = SeedNode;
00085   // calculate pagerank and cut sets
00086   ApproxPageRank(SeedNId, 1.0/double(ClustSz));
00087   for (int i = 0; i < ProbH.Len(); i++) {
00088     ProbH[i] /= Graph->GetNI(ProbH.GetKey(i)).GetOutDeg(); }
00089   ProbH.SortByDat(false);
00090   SupportSweep();
00091   // find best cut
00092   NIdV.Clr(false);
00093   for (int i = 0; i < PhiV.Len(); i++) {
00094     const double Phi = PhiV[i];
00095     NIdV.Add(ProbH.GetKey(i));
00096     if (Phi < MaxPhi) { MaxPhi = Phi;  BestCutIdx = i; }
00097   }
00098 }
00099 
00100 void TLocClust::PlotVolDistr(const TStr& OutFNm, TStr Desc) const {
00101   TFltPrV RankValV(VolV.Len(), 0);
00102   for (int i = 0; i < VolV.Len(); i++) {
00103     RankValV.Add(TFltPr(i+1, VolV[i].Val)); }
00104   if (Desc.Empty()) { Desc = OutFNm; }
00105   TFltPrV NewV;  TLocClustStat::MakeExpBins(RankValV, NewV);
00106   TGnuPlot GP(OutFNm+"-VOL", TStr::Fmt("VOLUME. Seed node %d.", SeedNId, Desc.CStr()));
00107   GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
00108   GP.SetXYLabel("Rank", "Volume");
00109   //GP.SetScale(gpsLog10X);
00110   GP.SavePng();
00111 }
00112 
00113 void TLocClust::PlotCutDistr(const TStr& OutFNm, TStr Desc) const {
00114   TFltPrV RankValV(CutV.Len(), 0);
00115   for (int i = 0; i < CutV.Len(); i++) {
00116     RankValV.Add(TFltPr(i+1, CutV[i].Val)); }
00117   if (Desc.Empty()) { Desc = OutFNm; }
00118   TFltPrV NewV;  TLocClustStat::MakeExpBins(RankValV, NewV);
00119   TGnuPlot GP(OutFNm+"-CUT", TStr::Fmt("CUT SIZE. Seed node %d.", SeedNId, Desc.CStr()));
00120   GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
00121   GP.SetXYLabel("Rank", "Cut size");
00122   //GP.SetScale(gpsLog10X);
00123   GP.SavePng();
00124 }
00125 
00126 void TLocClust::PlotPhiDistr(const TStr& OutFNm, TStr Desc) const {
00127   TFltPrV RankValV(PhiV.Len(), 0);
00128   for (int i = 0; i < PhiV.Len(); i++) {
00129     RankValV.Add(TFltPr(i+1, PhiV[i])); }
00130   if (Desc.Empty()) { Desc = OutFNm; }
00131   TFltPrV NewV;  TLocClustStat::MakeExpBins(RankValV, NewV);
00132   TGnuPlot GP(OutFNm+"-PHI", TStr::Fmt("CONDUCTANCE (Cut size / Volume). Seed node %d.", SeedNId, Desc.CStr()));
00133   GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
00134   GP.SetXYLabel("Rank", "Conductance (Cut size / Volume)");
00135   //GP.SetScale(gpsLog10X);
00136   GP.SavePng();
00137 }
00138 
00139 void TLocClust::SavePajek(const TStr& OutFNm) const {
00140   FILE *F = fopen(TStr::Fmt("%s.net", OutFNm.CStr()).CStr(), "wt");
00141   TIntH NIdToIdH(Graph->GetNodes(), true);
00142   TIntH ClustSet(BestCut()+1);
00143   const int BucketSz = BestCutNodes()/ 4;
00144   TStrV ClrV = TStrV::GetV("Black", "Gray80", "Gray60", "Gray40", "Gray20", "RedViolet");
00145   for (int a = 0; a < BestCutNodes(); a++) {
00146     ClustSet.AddDat(NIdV[a], (a+1)/BucketSz);
00147   }
00148   fprintf(F, "*Vertices %d\n", Graph->GetNodes());
00149   int i = 0;
00150   for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00151     const int NId = NI.GetId();
00152     if (NId == SeedNId) {
00153       fprintf(F, "%d  \"%d\" diamond x_fact 2 y_fact 2 ic Green fos 10\n", i+1, NId); }
00154     else if (ClustSet.IsKey(NId)) {
00155       //fprintf(F, "%d  \"%d\" box x_fact 1 y_fact 1 ic Red fos 10\n", i+1, NId); }
00156       fprintf(F, "%d  \"%d\" box x_fact 2 y_fact 2 ic %s fos 10\n", i+1, NId, ClrV[ClustSet.GetDat(NId)].CStr()); }
00157     else {
00158       fprintf(F, "%d  \"%d\" ellipse x_fact 2 y_fact 2 ic Blue fos 10\n", i+1, NId); }
00159     NIdToIdH.AddDat(NId, i+1);
00160     i++;
00161   }
00162   fprintf(F, "*Arcs %d\n", Graph->GetEdges()); // arcs are directed, edges are undirected
00163   for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00164     const int NId = NIdToIdH.GetDat(NI.GetId());
00165     for (int e = 0; e < NI.GetOutDeg(); e++) {
00166       fprintf(F, "%d %d %g c Tan\n", NId, NIdToIdH.GetDat(NI.GetOutNId(e)).Val, 1.0);
00167     }
00168   }
00169   fclose(F);
00170 }
00171 
00172 void TLocClust::DrawWhiskers(const PUNGraph& Graph, TStr FNmPref, const int& PlotN=10) {
00173   TCnComV CnComV;  TSnap::Get1CnCom(Graph, CnComV);
00174   CnComV.Sort(false);
00175   // plot size distribution
00176   { TIntH SzCntH;
00177   for (int i = 0; i < CnComV.Len(); i++) { SzCntH.AddDat(CnComV[i].Len()) += 1; }
00178   TGnuPlot::PlotValCntH(SzCntH, "whiskSz."+FNmPref, TStr::Fmt("%s. G(%d, %d)", FNmPref.CStr(), Graph->GetNodes(),
00179     Graph->GetEdges()), "Whisker size (Maximal component connected with a bridge edge)", "Count", gpsLog10XY, false); }
00180   // draw them
00181   int BrNodeId = -1;
00182   for (int c = 0; c < TMath::Mn(CnComV.Len(), PlotN); c++) {
00183     const PUNGraph BrClust = TSnap::GetSubGraph(Graph, CnComV[c].NIdV);
00184     for (TUNGraph::TNodeI NI = BrClust->BegNI(); NI < BrClust->EndNI(); NI++) {
00185       if (NI.GetOutDeg() != Graph->GetNI(NI.GetId()).GetOutDeg()) { BrNodeId=NI.GetId(); break; } }
00186     TIntStrH ClrH;  ClrH.AddDat(BrNodeId, "red");
00187     TSnap::DrawGViz(BrClust, gvlNeato, TStr::Fmt("whisk-%s-%02d.gif", FNmPref.CStr(), c+1),
00188       TStr::Fmt("Bridge node id: %d, n=%d, e=%d", BrNodeId, BrClust->GetNodes(), BrClust->GetEdges()), false, ClrH);
00189   }
00190 }
00191 
00192 void TLocClust::GetCutStat(const PUNGraph& Graph, const TIntV& NIdV, int& Vol, int& Cut, double& Phi, int GraphEdges) {
00193   TIntSet NIdSet(NIdV.Len());
00194   for (int i = 0; i < NIdV.Len(); i++) { NIdSet.AddKey(NIdV[i]); }
00195   GetCutStat(Graph, NIdSet, Vol, Cut, Phi, GraphEdges);
00196 }
00197 
00198 void TLocClust::GetCutStat(const PUNGraph& Graph, const TIntSet& NIdSet, int& Vol, int& Cut, double& Phi, int GraphEdges) {
00199   const int Edges2 = GraphEdges>=0 ? 2*GraphEdges : Graph->GetEdges();
00200   Vol=0;  Cut=0; Phi=0.0;
00201   for (int i = 0; i < NIdSet.Len(); i++) {
00202     if (! Graph->IsNode(NIdSet[i])) { continue; }
00203     TUNGraph::TNodeI NI = Graph->GetNI(NIdSet[i]);
00204     for (int e = 0; e < NI.GetOutDeg(); e++) {
00205       if (! NIdSet.IsKey(NI.GetOutNId(e))) { Cut += 1; }
00206     }
00207     Vol += NI.GetOutDeg();
00208   }
00209   // get conductance
00210   if (Vol != Edges2) {
00211     if (2*Vol > Edges2) { Phi = Cut / double(Edges2-Vol); }
00212     else if (Vol == 0) { Phi = 0.0; }
00213     else { Phi = Cut / double(Vol); }
00214   } else {
00215     if (Vol == Edges2) { Phi = 1.0; }
00216   }
00217 }
00218 
00219 void TLocClust::PlotNCP(const PUNGraph& Graph, const TStr& FNm, const TStr Desc, const bool& BagOfWhiskers, const bool& RewireNet, const int& KMin, const int& KMax, const int& Coverage, const bool& SaveTxtStat, const bool& PlotBoltzman) {
00220   const double Alpha = 0.001, KFac = 1.5, SizeFrac = 0.001;
00221   //const int KMin = 2, KMax = Mega(100), Coverage = 10;
00222   TLocClustStat ClusStat1(Alpha, KMin, KMax, KFac, Coverage, SizeFrac);
00223   ClusStat1.Run(Graph, false, PlotBoltzman, SaveTxtStat);
00224   if (BagOfWhiskers) { ClusStat1.AddBagOfWhiskers(); }
00225   TLocClustStat ClusStat2(Alpha, KMin, KMax, KFac, Coverage, SizeFrac);
00226   ClusStat1.ImposeNCP(ClusStat2, FNm, Desc, "ORIGINAL", "REWIRED"); // plot before rewiring
00227   if (SaveTxtStat) { // for every piece size store modularity
00228     ClusStat1.SaveTxtInfo(FNm, Desc, false);
00229   }
00230   if (PlotBoltzman) {
00231     ClusStat1.PlotBoltzmanCurve(FNm, Desc);
00232   }
00233   if (RewireNet) {
00234     ClusStat2.Run(TSnap::GenRewire(Graph, 50, TInt::Rnd), false, false, false);
00235     if (BagOfWhiskers) { ClusStat2.AddBagOfWhiskers(); }
00236     ClusStat1.ImposeNCP(ClusStat2, FNm, Desc, "ORIGINAL", "REWIRED"); // if rewire, plot again
00237   }
00238 }
00239 
00241 // Local Clustering Statistics
00242 
00243 double TLocClustStat::BinFactor = 1.01;
00244 
00245 double TLocClustStat::TCutInfo::GetFracDegOut(const PUNGraph& Graph, double& MxFrac, double& AvgFrac, double& MedianFrac, double& Pct9Frac, double& Flake) const {
00246   if (CutNIdV.Empty()) {
00247     IAssert(Nodes<100 || ! CutNIdV.Empty());
00248     MxFrac=1; AvgFrac=1; MedianFrac=1; Pct9Frac=1; Flake=1; 
00249     return 1;
00250   }
00251   TMom FracDegMom;
00252   TIntSet InNIdSet(CutNIdV.Len());
00253   int NHalfIn=0;
00254   for (int i = 0; i < CutNIdV.Len(); i++) { 
00255     InNIdSet.AddKey(CutNIdV[i]); }
00256   for (int n = 0; n < CutNIdV.Len(); n++) {
00257     const TUNGraph::TNodeI NI = Graph->GetNI(CutNIdV[n]);
00258     int EdgesOut = 0;
00259     for (int i = 0; i < NI.GetDeg(); i++) {
00260       if (! InNIdSet.IsKey(NI.GetNbrNId(i))) { EdgesOut++; }
00261     }
00262     const double FracOut = EdgesOut/double(NI.GetDeg());
00263     if (FracOut <= 0.5) { NHalfIn++; }
00264     FracDegMom.Add(FracOut);
00265   }
00266   FracDegMom.Def();
00267   MxFrac = FracDegMom.GetMx();
00268   AvgFrac = FracDegMom.GetMean();
00269   MedianFrac = FracDegMom.GetMedian();
00270   Pct9Frac = FracDegMom.GetDecile(9);
00271   Flake = 1.0 - double(NHalfIn)/double(CutNIdV.Len());
00272   return MxFrac;
00273 }
00274 
00275 TLocClustStat::TLocClustStat(const double& _Alpha, const int& _KMin, const int& _KMax, const double& _KFac,
00276   const int& _Coverage, const double& _SizeFrac) : Alpha(_Alpha), SizeFrac(_SizeFrac), KFac(_KFac),
00277   KMin(_KMin), KMax(_KMax), Coverage(_Coverage) {
00278 }
00279 
00280 void TLocClustStat::Save(TSOut& SOut) const {
00281   // parameters
00282   Alpha.Save(SOut);
00283   SizeFrac.Save(SOut);
00284   KFac.Save(SOut);
00285   KMin.Save(SOut);
00286   KMax.Save(SOut);
00287   Coverage.Save(SOut);
00288   // cluster stat
00289   //SweepsV.Save(SOut);
00290   //SizePhiH.Save(SOut);
00291 }
00292 
00293 void TLocClustStat::Clr() {
00294   SweepsV.Clr(false);      // node ids and conductances for each run of local clustering
00295   SizePhiH.Clr(false);     // conductances at cluster size K
00296   BestCutH.Clr(false);     // best cut (min conductance) at size K
00297   BagOfWhiskerV.Clr(false); // (Size, Conductance) of bag of whiskers clusters
00298 }
00299 
00300 void TLocClustStat::Run(const PUNGraph& _Graph, const bool& SaveAllSweeps, const bool& SaveAllCond, const bool& SaveBestNodesAtK) {
00301   Graph = TSnap::GetMxWcc(_Graph);
00302   const int Nodes = Graph->GetNodes();
00303   const int Edges = Graph->GetEdges();
00304   printf("\nLocal clustering: Graph(%d, %d)\n", Nodes, Edges);
00305   printf("  Alpha:    %g\n", Alpha());
00306   printf("  K: %d -- %g -- %dm\n", KMin(), KFac(), int(KMax/Mega(1)));
00307   printf("  Coverage: %d\n", Coverage());
00308   printf("  SizeFrac: %g\n\n", SizeFrac());
00309   TExeTm TotTm;
00310   Clr();
00311   TLocClust Clust(Graph, Alpha);
00312   BestCut.CutNIdV.Clr(false); 
00313   BestCut.CutSz=-1;  BestCut.Edges=-1;
00314   double BestPhi = TFlt::Mx;
00315   int prevK=-1;
00316   bool NextDone=false;
00317   if (SaveBestNodesAtK) { // fill buckets (only store nodes in clusters for sizes in SizeBucketSet)
00318     SizeBucketSet.Clr();
00319     double PrevBPos = 1, BPos = 1;
00320     while (BPos <= Mega(100)) {
00321       PrevBPos = (uint) floor(BPos);
00322       BPos *= BinFactor;
00323       if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
00324       SizeBucketSet.AddKey(int(floor(BPos) - 1));
00325     }
00326   }
00327   for (int K = KMin, cnt=1; K < KMax; K = int(KFac * double(K))+1, cnt++) {
00328     if (K == prevK) { K++; } prevK = K;
00329     const int Runs = 2 + int(Coverage  * floor(double(Graph->GetEdges()) / double(K)));
00330     printf("%d] K: %d, %d runs\n", cnt+1, K, Runs);
00331     if (NextDone) { break; } // done
00332     if (K+1 > 2*Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
00333     //if (K+1 > Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
00334     TExeTm ExeTm, IterTm;
00335     double MeanSz=0.0, MeanVol=0.0, Count=0.0;
00336     for (int run = 0; run < Runs; run++) {
00337       const int SeedNId = Graph->GetRndNId();  IterTm.Tick();
00338       Clust.FindBestCut(SeedNId, K, SizeFrac);
00339       const int Sz = Clust.BestCutNodes();
00340       const int Vol = Clust.GetCutVol();
00341       const double Phi = TMath::Round(Clust.GetCutPhi(), 4);
00342       if (Sz == 0 || Vol == 0 || Phi == 0) { continue; }
00343       MeanSz+=Sz;  MeanVol+=Vol;  Count+= 1;
00344       if (SaveAllSweeps) { // save the full cut set and conductances for all trials
00345         SweepsV.Add(TNodeSweep(SeedNId, Clust.GetNIdV(), Clust.GetPhiV())); }
00346       int SAtBestPhi=-1;
00347       for (int s = 0; s < Clust.Len(); s++) {
00348         const int size = s+1;
00349         const int cut = Clust.GetCut(s);
00350         const int edges = (Clust.GetVol(s)-cut)/2;
00351         const double phi = Clust.GetPhi(s);
00352         if (( Clust.GetPhi(s) != double(cut)/double(2*edges+cut))) { continue; } // more than half of the edges
00353         IAssert((Clust.GetVol(s) - cut) % 2 == 0);
00354         IAssert(phi == double(cut)/double(2*edges+cut));
00355         IAssert(phi >= 1.0/double((1+s)*s+1));
00357         // TCutInfo CutInfo(size, edges, cut); Clust.GetNIdV().GetSubValV(0, size-1, CutInfo.CutNIdV);
00358         //double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
00359         //CutInfo.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
00360         //const double phi = MxFrac;
00361         if (BestPhi >= phi) {
00362           BestPhi = phi;
00363           BestCut = TCutInfo(size, edges, cut);
00364           SAtBestPhi = s;
00365         }
00367         //bool TAKE=false;  if (! BestCutH.IsKey(size)) { TAKE=true; }
00368         //else { BestCutH.GetDat(size).GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);  if (MxFrac >= phi) { TAKE = true; } }
00369         // if (TAKE) {
00370         if (! BestCutH.IsKey(size) || BestCutH.GetDat(size).GetPhi() >= phi) { //new best cut (size, edges inside and nodes)
00371           BestCutH.AddDat(size, TCutInfo(size, edges, cut));  // for every size store best cut (NIds inside the cut)
00372           if (SaveBestNodesAtK) { // store node ids in best community for each size k
00373             if (! SizeBucketSet.Empty() && ! SizeBucketSet.IsKey(size)) { continue; } // only save best clusters at SizeBucketSet
00374             Clust.GetNIdV().GetSubValV(0, size-1, BestCutH.GetDat(size).CutNIdV); }
00375         }
00376         if (SaveAllCond) { // for every size store all conductances
00377           SizePhiH.AddDat(size).Add(phi); }
00378       }
00379       if (SAtBestPhi != -1) { // take nodes in best cluster
00380         const int size = SAtBestPhi+1;
00381         Clust.GetNIdV().GetSubValV(0, size-1, BestCut.CutNIdV); 
00382       }
00383       if (TLocClust::Verbose) {
00384         printf(".");
00385         if (run % 50 == 0) {
00386           printf("\r                                                   %d / %d \r", run, Runs); }
00387       }
00388     }
00389     if (TLocClust::Verbose) {
00390       printf("\r  %d / %d: %s                                                   \n", Runs, Runs, ExeTm.GetStr()); 
00391     }
00392     MeanSz/=Count;  MeanVol/=Count;
00393     printf("  Graph(%d, %d)  ", Nodes, Edges);
00394     printf("       mean:  sz: %.2f  vol: %.2f [%s] %s\n", MeanSz, MeanVol, ExeTm.GetStr(), TExeTm::GetCurTm());
00395   }
00396   SizePhiH.SortByKey();
00397   for (int k = 0; k < SizePhiH.Len(); k++) { 
00398     SizePhiH[k].Sort(); }
00399   SweepsV.Sort();
00400   BestCutH.SortByKey();
00401   printf("DONE. Graph(%d, %d): Total run time: %s\n\n", Nodes, Edges, TotTm.GetStr());
00402 }
00403 
00404 void TLocClustStat::AddBagOfWhiskers() {
00405   BagOfWhiskers(Graph, BagOfWhiskerV, BestWhisk); // slow but exact
00406   //BagOfWhiskers2(Graph, BagOfWhiskerV);
00407 }
00408 
00409 // add a cut on NIdV nodes
00410 void TLocClustStat::AddCut(const TIntV& NIdV) {
00411   int Vol, Cut; double Phi;
00412   TLocClust::GetCutStat(Graph, NIdV, Vol, Cut, Phi, Graph->GetEdges());
00413   SizePhiH.AddDat(NIdV.Len()).Add(Phi);
00414 }
00415 
00416 // add a cut of particular conductance
00417 void TLocClustStat::AddCut(const int& ClustSz, const double& Phi) {
00418   SizePhiH.AddDat(ClustSz).Add(Phi);
00419 }
00420 
00421 // add the cut on particular set of nodes (calculate conductance and modularity)
00422 void TLocClustStat::AddToBestCutH(const PUNGraph& Graph, const TIntV& NIdV, const bool& AddBestCond) {
00423   const int K = NIdV.Len();
00424   TCutInfo CutInfo(Graph, NIdV, true);
00425   printf("new: %d\t%d\t%d\t%f\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), 
00426     CutInfo.GetCutSz(), CutInfo.GetPhi(), CutInfo.GetModular(Graph->GetEdges()));
00427   if (! BestCutH.IsKey(K)) { BestCutH.AddDat(K, CutInfo);  return; }
00428   TCutInfo& CurCut = BestCutH.GetDat(K);
00429   // keep the cluster with best conductance
00430   if (AddBestCond && CurCut.GetPhi() > CutInfo.GetPhi()) { CurCut=CutInfo; }
00431   // keep the cluster with best modularity
00432   //const int E = Graph->GetEdges();  
00433   //if (! AddBestCond && CurCut.GetModular(E) < CutInfo.GetModular(E)) { CurCut=CutInfo; }
00434 }
00435 
00436 const TLocClustStat::TCutInfo& TLocClustStat::FindBestCut(const int& Nodes) const {
00437   double bestPhi = 1;
00438   int CutId = -1;
00439   if (Nodes > 0) {
00440     IAssert(BestCutH.IsKey(Nodes));
00441     return BestCutH.GetDat(Nodes);
00442   } else {
00443     for (int n = 0; n < BestCutH.Len(); n++) {
00444       if (BestCutH[n].GetPhi() <= bestPhi) {
00445         bestPhi = BestCutH[n].GetPhi();  CutId = n; }
00446     }
00447     IAssert(CutId != -1);
00448     IAssert(! BestCutH[CutId].CutNIdV.Empty());
00449     return BestCutH[CutId];
00450   }
00451 }
00452 
00453 // find the cut with the lowest Phi of particular size (if Nodes=-1 find absolute best cut)
00454 double TLocClustStat::FindBestCut(const int& Nodes, TIntV& ClustNIdV) const {
00455   const TCutInfo& Cut = FindBestCut(Nodes);
00456   ClustNIdV = Cut.CutNIdV;
00457   return Cut.GetPhi();
00458 }
00459 
00460 int TLocClustStat::FindBestCut(const int& Nodes, int& Vol, int& Cut, double& Phi) const {
00461   const TCutInfo& CutInfo = FindBestCut(Nodes);
00462   Vol = CutInfo.GetVol();
00463   Cut = CutInfo.GetCutSz();
00464   Phi = CutInfo.GetPhi();
00465   return CutInfo.GetNodes();
00466 }
00467 
00468 // find best phi where the cut has N nodes, and the nodes in the cluster are not in the TabuSet
00469 double TLocClustStat::FindBestCut(const int& Nodes, const TIntSet& TabuNIdSet, int& BestCutId) const {
00470   double bestPhi = 1;
00471   BestCutId = -1;
00472   bool Tabu;
00473   IAssert(! SweepsV.Empty());
00474   for (int c = 0; c < SweepsV.Len(); c++) {
00475     const TNodeSweep& Sweep = SweepsV[c];
00476     if (Sweep.Len() < Nodes) { continue; }
00477     if (Sweep.Phi(Nodes-1) > bestPhi) { continue; }
00478     Tabu = false;
00479     for (int i = 0; i < Nodes; i++) {
00480       if (TabuNIdSet.IsKey(Sweep.NId(i))) { Tabu=true; break; }
00481     }
00482     if (Tabu) { continue; }
00483     bestPhi = Sweep.Phi(Nodes-1);
00484     BestCutId = c;
00485   }
00486   return bestPhi;
00487 }
00488 
00489 // get statistics over all conductances at all cluster sizes
00490 void TLocClustStat::GetCurveStat(TFltPrV& MeanV, TFltPrV& MedV, TFltPrV& Dec1V, TFltPrV& MinV, TFltPrV& MaxV) const {
00491   TFltPrV BucketV;
00492   MeanV.Clr(false); MedV.Clr(false); Dec1V.Clr(false); MinV.Clr(false); MaxV.Clr(false);
00493   if (! SizePhiH.Empty()) { // stores conductances of all little clusters
00494     const THash<TInt, TFltV>& KvH = SizePhiH; 
00495     for (int i = 0; i < KvH.Len(); i++) {
00496       const double X = KvH.GetKey(i).Val;  IAssert(X >= 1.0);
00497       const TFltV& YVec = KvH[i];
00498       TMom Mom;
00499       for (int j = 0; j < YVec.Len(); j++) { Mom.Add(YVec[j]); }
00500       Mom.Def();
00501       /*IAssert(Mom.GetMean()>=0 && Mom.GetMean()<=1);
00502       IAssert(Mom.GetMedian()>=0 && Mom.GetMedian()<=1);
00503       IAssert(Mom.GetDecile(1)>=0 && Mom.GetDecile(1)<=1);
00504       IAssert(Mom.GetMn()>=0 && Mom.GetMn()<=1);
00505       IAssert(Mom.GetMx()>=0 && Mom.GetMx()<=1);*/
00506       MeanV.Add(TFltPr(X, Mom.GetMean()));
00507       MedV.Add(TFltPr(X, Mom.GetMedian()));
00508       Dec1V.Add(TFltPr(X, Mom.GetDecile(1)));
00509       MinV.Add(TFltPr(X, Mom.GetMn()));
00510       MaxV.Add(TFltPr(X, Mom.GetMx()));
00511     }
00512     MeanV.Sort(); MedV.Sort();  Dec1V.Sort();  MinV.Sort();  MaxV.Sort();
00513     // create log buckets (makes plots smaller and smoother)
00514     TLocClustStat::MakeExpBins(MeanV, BucketV);  MeanV.Swap(BucketV);
00515     TLocClustStat::MakeExpBins(MedV, BucketV);  MedV.Swap(BucketV);
00516     TLocClustStat::MakeExpBins(Dec1V, BucketV);  Dec1V.Swap(BucketV);
00517     TLocClustStat::MakeExpBins(MinV, BucketV);  MinV.Swap(BucketV);
00518     TLocClustStat::MakeExpBins(MaxV, BucketV);  MaxV.Swap(BucketV);
00519   } else {
00520     //const int GEdges = Graph->GetEdges();
00521     for (int i = 0; i < BestCutH.Len(); i++) {
00522       MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetPhi()));
00523     }
00524     MinV.Sort();
00525     TLocClustStat::MakeExpBins(MinV, BucketV);  MinV.Swap(BucketV);
00526   }
00527 }
00528 
00529 void TLocClustStat::GetBoltzmanCurveStat(const TFltV& TempV, TVec<TFltPrV>& NcpV) const {
00530   IAssert(! SizePhiH.Empty()); // stores conductances of all little clusters
00531   NcpV.Gen(TempV.Len());
00532   TFltPrV BucketV;
00533   for (int t = 0; t < TempV.Len(); t++) {
00534     const double T = TempV[t];
00535     for (int i = 0; i < SizePhiH.Len(); i++) {
00536       const double X = SizePhiH.GetKey(i).Val;  IAssert(X >= 1.0);
00537       const TFltV& PhiV = SizePhiH[i];
00538       double V = 0.0, SumW = 0.0;
00539       for (int j = 0; j < PhiV.Len(); j++) { 
00540         V += PhiV[j] * exp(-PhiV[j]/T); 
00541         SumW += exp(-PhiV[j]/T); 
00542       }
00543       //V /= PhiV.Len();
00544       V /= SumW;
00545       NcpV[t].Add(TFltPr(X, V));
00546     }
00547     TLocClustStat::MakeExpBins(NcpV[t], BucketV);  NcpV[t].Swap(BucketV);
00548     // normalize to start at (1,1)
00549     //for (int i = 0; i < NcpV[t].Len(); i++) {
00550     //  NcpV[t][i].Val2 /= NcpV[t][0].Val2;
00551     //}
00552   }
00553 }
00554 
00555 TStr TLocClustStat::ParamStr() const {
00556   if (Graph.Empty()) {
00557     return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac()); }
00558   else {
00559     return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g G(%d, %d)", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac(),
00560       Graph->GetNodes(), Graph->GetEdges());
00561   }
00562 }
00563 
00564 void TLocClustStat::PlotNCP(const TStr& OutFNm, TStr Desc) const {
00565   if (Desc.Empty()) { Desc = OutFNm; }
00566   TFltPrV MeanV, MedV, Dec1V, MinV, MaxV;
00567   GetCurveStat(MeanV, MedV, Dec1V, MinV, MaxV);
00568   TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00569   if (! MaxV.Empty()) { GP.AddPlot((MaxV), gpwLines, "MAX"); }
00570   if (! MedV.Empty()) { GP.AddPlot((MedV), gpwLines, "MEDIAN"); }
00571   if (! MeanV.Empty()) { GP.AddPlot((MeanV), gpwLines, "MEAN"); }
00572   if (! Dec1V.Empty()) { GP.AddPlot((Dec1V), gpwLines, "1-st DECILE"); }
00573   if (! MinV.Empty()) { 
00574     GP.AddPlot((MinV), gpwLines, "MIN"); 
00575     //GP.AddPlot(MinV, gpwLines, "smooth MIN", "lw 2 smooth bezier");
00576   }
00577   if (! BagOfWhiskerV.Empty()) {
00578     GP.AddPlot(BagOfWhiskerV, gpwLines, "Whiskers", "lw 1"); 
00579     TFltPrV BestWhiskV;  BestWhiskV.Add(TFltPr(BestWhisk));
00580     GP.AddPlot(BestWhiskV, gpwPoints, "Best whisker", "pt 5 ps 2");
00581   }
00582   GP.SetScale(gpsLog10XY);
00583   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00584   GP.SetXRange(1, Graph->GetNodes()/2);
00585   GP.SavePng();
00586 }
00587 
00588 // NCP but with modularity on Y-axis
00589 void TLocClustStat::PlotNCPModul(const TStr& OutFNm, TStr Desc) const {
00590   if (Desc.Empty()) { Desc = OutFNm; }
00591   TFltPrV MinV, BucketV;
00592   const int GEdges = Graph->GetEdges();
00593   for (int i = 0; i < BestCutH.Len(); i++) {
00594     MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetModular(GEdges))); }
00595   MinV.Sort();
00596   TLocClustStat::MakeExpBins(MinV, BucketV);  MinV.Swap(BucketV);
00597   TGnuPlot GP("ncpMod."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00598   if (! MinV.Empty()) { 
00599     GP.AddPlot((MinV), gpwLines, "MIN"); }
00600   GP.SetScale(gpsLog10XY);
00601   GP.SetXYLabel("k (number of nodes in the cluster)", "Q (modularity)");
00602   GP.SetXRange(1, Graph->GetNodes()/2);
00603   GP.SavePng();
00604 }
00605 
00606 // plot top 10 surface/volume curves (disjont clusters of particular size)
00607 void TLocClustStat::PlotNcpTop10(const TStr& OutFNm, TStr Desc, const int& TakeMinN) const {
00608   if (Desc.Empty()) { Desc = OutFNm; }
00609   const double BinFactor = 1.05;
00610   double BinPos=1;
00611   int PrevBPos=1, CurBPos=1, CutId;
00612   bool AddSome;
00613   TVec<TFltPrV> Curves(TakeMinN);
00614   while (true) {
00615     PrevBPos = CurBPos;
00616     BinPos *= BinFactor; // do exponential binning
00617     CurBPos = (int) floor(BinPos);
00618     if (CurBPos == PrevBPos) { CurBPos=PrevBPos+1;  BinPos=CurBPos; }
00619     const int Nodes = CurBPos;
00620     TIntSet TabuNIdSet(Graph->GetNodes());
00621     AddSome = false;
00622     for (int t = 0; t < TakeMinN; t++) {
00623       const double Phi = FindBestCut(Nodes, TabuNIdSet, CutId);
00624       if (CutId == -1) { break; }
00625       Curves[t].Add(TFltPr(Nodes, Phi));
00626       for (int n = 0; n < Nodes; n++) {
00627         TabuNIdSet.AddKey(SweepsV[CutId].NId(n)); }
00628       AddSome = true;
00629     }
00630     if (! AddSome) { break; }
00631   }
00632   TGnuPlot GP("ncpTop."+OutFNm, TStr::Fmt("%s. Top disjoint clusters. Take:%d, %s", Desc.CStr(), TakeMinN, ParamStr().CStr()));
00633   for (int i = 0; i < Curves.Len(); i++) {
00634     GP.AddPlot(Curves[i], gpwLines, TStr::Fmt("MIN %d", i+1), "lw 1"); }
00635   //if (! BagOfWhiskerV.Empty()) { GP.AddPlot(BagOfWhiskerV, gpwLines, " Whiskers", "lw 2"); }
00636   GP.SetScale(gpsLog10XY);
00637   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00638   GP.SetXRange(1, Graph->GetNodes()/2);
00639   GP.SavePng();
00640 }
00641 
00642 // plot conductance on the boundary / counductance inside vs. cluster size
00643 void TLocClustStat::PlotPhiInOut(const TStr& OutFNm, TStr Desc) const {
00644   IAssert(! BestCutH.Empty() && ! Graph.Empty());
00645   TFltPrV PhiInV, PhiBoundV, PhiRatV;
00646   FILE *F = fopen(TStr::Fmt("phiInOut.%s-all.tab", OutFNm.CStr()).CStr(), "wt");
00647   fprintf(F, "#Nodes\tEdges\tVol\tCut\tPhi\tInNodes\tInEdges\tInVol\tInCut\tInPhi\n");
00648   TLocClustStat ClustStat2(Alpha, 1, KMax, KFac, Coverage, SizeFrac);
00649   const double IdxFact = 1.05;
00650   int curIdx=1, prevIdx=1;
00651   while (curIdx <= BestCutH.Len()) {
00652     const TCutInfo& CutInfo = BestCutH[curIdx-1];
00653     if (CutInfo.GetNodes() > 1) {
00654       PUNGraph ClustG = TSnap::GetSubGraph(Graph, CutInfo.CutNIdV);
00655       ClustStat2.Run(ClustG);
00656       const TCutInfo& InCut = ClustStat2.FindBestCut(-1);
00657       PhiInV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()));
00658       PhiBoundV.Add(TFltPr(CutInfo.GetNodes(), CutInfo.GetPhi()));
00659       PhiRatV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()/CutInfo.GetPhi()));
00660       fprintf(F, "%d\t%d\t%d\t%d\t%f\t%d\t%d\t%d\t%d\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), CutInfo.GetVol(),
00661         CutInfo.GetCutSz(), CutInfo.GetPhi(),  InCut.GetNodes(), InCut.GetEdges(), InCut.GetVol(), InCut.GetCutSz(), InCut.GetPhi());
00662       fflush(F);
00663     }
00664     prevIdx = curIdx;
00665     curIdx = (int) TMath::Round(double(curIdx)*IdxFact);
00666     if (prevIdx == curIdx) { curIdx++; }
00667   }
00668   fclose(F);
00669   { TGnuPlot GP("phiInOut."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00670   GP.AddPlot(PhiBoundV, gpwLines, "CUT conductance", "lw 1");
00671   GP.AddPlot(PhiInV, gpwLines, "INSIDE conductance", "lw 1");
00672   //GP.AddPlot(PhiRatV, gpwLines, "RATIO (inside/boundary)", "lw 1");
00673   GP.SetXRange(1, Graph->GetNodes()/2);  GP.SetScale(gpsLog10XY);
00674   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00675   //GP.AddCmd("set logscale xyy2 10");
00676   //GP.AddCmd("set y2label \"Conductance ratio (inside/boundary -- higher better)\"");
00677   GP.SavePng(); }
00678   //system(TStr(TStr("replace_all.py phiInOut.")+OutFNm+".plt \"title \\\"RATIO (inside/boundary)\" \"axis x1y2 title \\\"RATIO (inside/boundary)\"").CStr());
00679   //GP.RunGnuPlot();
00680   { TGnuPlot GP("phiInOutRat."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00681   GP.AddPlot(PhiRatV, gpwLines, "RATIO (Inside / Boundary)", "lw 1");
00682   GP.SetXRange(1, Graph->GetNodes()/2);  GP.SetScale(gpsLog10XY);
00683   GP.SetXYLabel("Nodes", "Conductance ratio (inside/boundary) -- higher better");
00684   GP.SavePng(); }
00685 }
00686 
00687 // Plot edges inside, edges cut and conductance.
00688 // run: replace_all.py cutEdges.*.plt "title \"Conductance" "axis x1y2 title \"Conductance"
00689 void TLocClustStat::PlotBestClustDens(TStr OutFNm, TStr Desc) const {
00690   if (Desc.Empty()) { Desc = OutFNm; }
00691   const int len = BestCutH.Len();
00692   TFltPrV CutV(len, 0), EdgesV(len, 0), PhiV(len,0);
00693   for (int i = 0; i < BestCutH.Len(); i++) {
00694     const double size = BestCutH.GetKey(i).Val;
00695     CutV.Add(TFltPr(size, BestCutH[i].GetCutSz()));
00696     EdgesV.Add(TFltPr(size, BestCutH[i].GetEdges()));
00697     PhiV.Add(TFltPr(size, BestCutH[i].GetPhi()));
00698   }
00699   TGnuPlot GP("cutEdges."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00700   TFltPrV NewV;  TLocClustStat::MakeExpBins(EdgesV, NewV);
00701   GP.AddPlot(NewV, gpwLines, "Edges inside");
00702   TLocClustStat::MakeExpBins(CutV, NewV);
00703   GP.AddPlot(NewV, gpwLines, "Cut edges");
00704   TLocClustStat::MakeExpBins(PhiV, NewV);
00705   GP.AddPlot(NewV, gpwLines, "Conductance");
00706   GP.SetXYLabel("Cluster size", "Edges"); GP.SetScale(gpsLog10XY);
00707   GP.AddCmd("set logscale xyy2 10");
00708   GP.AddCmd("set y2label \"Conductance\"");
00709   GP.SavePng();
00710   system(TStr(TStr("replace_all.py cutEdges.")+OutFNm+".plt \"title \\\"Conductance\" \"axis x1y2 title \\\"Conductance\"").CStr());
00711   GP.RunGnuPlot();
00712 }
00713 
00714 // all different conducances of all sizes (not just lower envelope)
00715 void TLocClustStat::PlotNCPScatter(const TStr& OutFNm, TStr Desc) const {
00716   if (Desc.Empty()) { Desc = OutFNm; }
00717   THashSet<TFltPr> PhiSzH;
00718   IAssertR(! SizePhiH.Empty(), "Set SaveAllCond=true in TLocClustStat::Run()");
00719   for (int k = 0; k < SizePhiH.Len(); k++) {
00720     const int K = SizePhiH.GetKey(k);
00721     const TFltV& PhiV = SizePhiH[k];
00722     for (int p = 0; p < PhiV.Len(); p++) {
00723       PhiSzH.AddKey(TFltPr(K, TMath::Round(PhiV[p], 3))); }
00724   }
00725   TFltPrV PhiSzV;  PhiSzH.GetKeyV(PhiSzV);
00726   TGnuPlot GP("ncpScatter."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00727   GP.AddPlot(PhiSzV, gpwPoints, "", "pt 5 ps 0.2");
00728   GP.SetScale(gpsLog10XY);
00729   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00730   GP.SetXRange(1, Graph->GetNodes()/2);
00731   GP.SavePng();
00732 }
00733 
00734 // histogram of conductances for a fixed CmtySz
00735 void TLocClustStat::PlotPhiDistr(const int& CmtySz, const TStr& OutFNm, TStr Desc) const {
00736   IAssert(! SizePhiH.Empty());
00737   const TFltV& PhiV = SizePhiH.GetDat(CmtySz);
00738   THash<TFlt, TInt> PhiCntH;
00739   for (int i = 0; i < PhiV.Len(); i++) {
00740     const double Buck =  TMath::Round(PhiV[i], 3);
00741     PhiCntH.AddDat(Buck) += 1;
00742   }
00743   TGnuPlot::PlotValCntH(PhiCntH, TStr::Fmt("phiDistr%03d.%s", CmtySz, OutFNm.CStr()), 
00744     TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()), "{/Symbol \\F} (conductance)",  "Count");
00745 }
00746 
00747 void TLocClustStat::PlotBoltzmanCurve(const TStr& OutFNm, TStr Desc) const {
00748   TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
00749   GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
00750   TVec<TFltPrV> NcpV;
00751   //const TFltV TempV = TFltV::GetV(0.05, 0.01, 0.1, 10, 100, 1000);
00752   const TFltV TempV = TFltV::GetV(0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.5, 1);
00753   GetBoltzmanCurveStat(TempV, NcpV);
00754   TGnuPlot GP("ncp."+OutFNm+"-B", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));  
00755   GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1");
00756   GP.AddPlot(MeanV1, gpwLines, "Avg", "lw 1");
00757   GP.AddPlot(MedV1, gpwLines, "Median", "lw 1");
00758   GP.AddPlot(Dec1V1, gpwLines, "Decile-1", "lw 1");
00759   for (int t = 0; t < TempV.Len(); t++) {
00760     GP.AddPlot(NcpV[t], gpwLines, TStr::Fmt("Temp %g", TempV[t]()), "lw 1");
00761   }
00762   GP.SetScale(gpsLog10XY);
00763   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00764   GP.SavePng();
00765   //
00766   TFltPrV SzNClustV;
00767   int kCnt=1;
00768   for (int i = 0; i < SizePhiH.Len(); i++) {
00769     const int K = SizePhiH.GetKey(i);
00770     const TFltV& PhiV = SizePhiH[i];
00771     SzNClustV.Add(TFltPr(K, PhiV.Len()));
00772     if (K>2 && (pow(10.0, (int)log10((double)K))==K || (K >=10 && K<=100 && K%10==0) || (K >=100 && K<=1000 && K%100==0))) {
00773       THash<TFlt, TFlt> CntH;
00774       for (int p = 0; p < PhiV.Len(); p++) {
00775         CntH.AddDat(TMath::Round(log10(PhiV[p].Val),1)) += 1;
00776       }
00777       TGnuPlot::PlotValCntH(CntH, TStr::Fmt("ncp.%s-c%02d", OutFNm.CStr(), kCnt++), TStr::Fmt("%s. K=%d, NPieces=%d, %s", 
00778         Desc.CStr(), K, PhiV.Len(), ParamStr().CStr()), "log_10 {/Symbol \\F} (conductance)", 
00779         TStr::Fmt("Number of pieces of such conductance, K=%d, NPieces=%d)", K, PhiV.Len()));
00780     }
00781   }
00782   TGnuPlot::PlotValV(SzNClustV, "ncp."+OutFNm+"-ClSz", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()),
00783     "k (cluster size)", "c(k) (number of extracted clusters)", gpsLog);
00784 }
00785 
00786 void TLocClustStat::ImposeNCP(const TLocClustStat& LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const {
00787   if (Desc.Empty()) { Desc = OutFNm; }
00788   TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
00789   TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
00790   GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
00791   LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
00792   // plot
00793   TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00794   if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc1.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1"); }
00795   if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc2.CStr(), LcStat2.Graph->GetNodes(), LcStat2.Graph->GetEdges()), "lw 1"); }
00796   if (! BagOfWhiskerV.Empty()) { 
00797     GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1"); 
00798     TFltPrV BestWhiskV;  BestWhiskV.Add(BestWhisk);
00799     GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
00800   if (! LcStat2.BagOfWhiskerV.Empty()) {
00801     GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1"); 
00802     TFltPrV BestWhiskV;  BestWhiskV.Add(LcStat2.BestWhisk);
00803     GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
00804   GP.SetScale(gpsLog10XY);
00805   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00806   //GP.SetXRange(1, Graph->GetNodes()/2);
00807   GP.SavePng();
00808 }
00809 
00810 void TLocClustStat::ImposeNCP(const TLocClustStat& LcStat2, const TLocClustStat& LcStat3, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2, TStr Desc3) const {
00811   if (Desc.Empty()) { Desc = OutFNm; }
00812   TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
00813   TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
00814   TFltPrV MeanV3, MedV3, Dec1V3, MinV3, MaxV3;
00815   GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
00816   LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
00817   LcStat3.GetCurveStat(MeanV3, MedV3, Dec1V3, MinV3, MaxV3);
00818   // plot
00819   TGnuPlot GP("phiTR."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00820   if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, Desc1+" MIN", "lw 1"); }
00821   if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, Desc2+" MIN", "lw 1"); }
00822   if (! MinV3.Empty()) { GP.AddPlot(MinV3, gpwLines, Desc3+" MIN", "lw 1"); }
00823   if (! BagOfWhiskerV.Empty()) { 
00824     GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1"); 
00825     TFltPrV BestWhiskV;  BestWhiskV.Add(BestWhisk);
00826     GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
00827   if (! LcStat2.BagOfWhiskerV.Empty()) {
00828     GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1"); 
00829     TFltPrV BestWhiskV;  BestWhiskV.Add(LcStat2.BestWhisk);
00830     GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
00831   if (! LcStat3.BagOfWhiskerV.Empty()) { 
00832     GP.AddPlot(LcStat3.BagOfWhiskerV, gpwLines, Desc3+" whiskers", "lw 1"); 
00833     TFltPrV BestWhiskV;  BestWhiskV.Add(LcStat3.BestWhisk);
00834     GP.AddPlot(BestWhiskV, gpwPoints, Desc3+" Best whisker", "pt 5 ps 2"); }
00835   GP.SetScale(gpsLog10XY);
00836   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00837   GP.SetXRange(1, Graph->GetNodes()/2);
00838   GP.SavePng();
00839 }
00840 
00841 void TLocClustStat::SaveTxtInfo(const TStr& OutFNmPref, const TStr& Desc, const bool& SetMaxAt1) const {
00842   printf("Save text info...");
00843   TExeTm ExeTm;
00844   const int GNodes = Graph->GetNodes();
00845   const int GEdges = Graph->GetEdges();
00846   TVec<TFltV> ColV(17);
00847   double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
00848   // double PrevBPos = 1, BPos = 1;
00849   for (int i = 0; i < SizeBucketSet.Len(); i++) {
00850     if ( !BestCutH.IsKey(SizeBucketSet[i])) { continue; }
00851     const TLocClustStat::TCutInfo& C = GetKNodeCut(SizeBucketSet[i]);
00852     C.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
00853     ColV[0].Add(C.Nodes());  ColV[1].Add(C.Edges()); 
00854     ColV[2].Add(C.CutSz());  ColV[3].Add(C.GetPhi());
00855     ColV[4].Add(C.GetExpansion());       ColV[5].Add(C.GetIntDens()); 
00856     ColV[6].Add(C.GetCutRatio(GNodes));  ColV[7].Add(C.GetNormCut(GEdges));
00857     ColV[8].Add(MxFrac);       ColV[9].Add(AvgFrac); 
00858     ColV[10].Add(MedianFrac);  ColV[11].Add(Pct9Frac);  ColV[12].Add(Flake);
00859     ColV[13].Add(double(2.0*C.Edges));  ColV[14].Add(C.GetExpEdgesIn(GEdges));
00860     ColV[15].Add(C.GetModular(GEdges)); ColV[16].Add(C.GetModRat(GEdges));
00861     printf(".");
00862   }
00863   // normalize so that maximum value is at 1
00864   if (SetMaxAt1) {
00865     for (int c = 1; c < ColV.Len(); c++) {
00866       double MaxVal=1e-10;
00867       for (int r = 0; r < ColV[c].Len(); r++) { MaxVal = TMath::Mx(MaxVal, ColV[c][r]()); }
00868       for (int r = 0; r < ColV[c].Len(); r++) { ColV[c][r] /= MaxVal; }
00869     }
00870   }
00871   // save
00872   const TStr DatFNm = TStr::Fmt("ncp.%s.INFO.tab", OutFNmPref.CStr());
00873   FILE *F = fopen(DatFNm.CStr(), "wt");
00874   fprintf(F, "# %s %s\n", Desc.CStr(), ParamStr().CStr());
00875   fprintf(F, "#N_inside\tE_inside\tE_across\tConductance\tExpansion\tIntDensity\tCutRatio\tNormCut\tMx_FracDegOut\tAvg_FDO\tMedian_FDO\t90Pct_FDO\tFlake_FDO\tVolume\tExpVolume\tModularity\tModRatio\n");
00876   for (int r = 0; r < ColV[0].Len(); r++) {
00877     fprintf(F, "%g", ColV[0][r]());
00878     for (int c = 1; c < ColV.Len(); c++) {
00879       fprintf(F, "\t%g", ColV[c][r]()); }
00880     fprintf(F, "\n");
00881   }
00882   fclose(F);
00883   printf("[%s]\n", ExeTm.GetStr());
00884   TGnuPlot GP(TStr::Fmt("ncp.%s.All", OutFNmPref.CStr()), TStr::Fmt("%s %s", 
00885     Desc.CStr(), ParamStr().CStr()));
00886   GP.AddPlot(DatFNm, 1, 4, gpwLines, "Conductance", "lw 2");
00887   GP.AddPlot(DatFNm, 1, 5, gpwPoints, "Expansion", "pt 3");
00888   GP.AddPlot(DatFNm, 1, 6, gpwPoints, "Internal Density", "pt 5 ps 0.8");
00889   GP.AddPlot(DatFNm, 1, 7, gpwPoints, "Cut Ratio", "pt 6");
00890   GP.AddPlot(DatFNm, 1, 8, gpwPoints, "Normalized Cut", "pt 7");
00891   GP.AddPlot(DatFNm, 1, 9, gpwPoints, "Maximum FDO", "pt 9");
00892   GP.AddPlot(DatFNm, 1, 10, gpwPoints, "Avg FDO", "pt 11");
00893   GP.AddPlot(DatFNm, 1, 13, gpwPoints, "Flake FDO", "pt 13");
00894   GP.SetScale(gpsLog10XY);
00895   GP.SetXYLabel("k (number of nodes in the cluster)", "Normalized community score (lower is better)");
00896   GP.SavePng();
00897 }
00898 
00899 // conductances if clusters are composed of disjoint pieces that can be separated
00900 // from the graph by a single edge
00901 void TLocClustStat::BagOfWhiskers(const PUNGraph& Graph, TFltPrV& SizePhiV, TFltPr& MaxWhisk) {
00902   TCnComV Cn1ComV;
00903   TSnap::Get1CnCom(Graph, Cn1ComV);
00904   TIntPrV SzVolV;
00905   int MxSize=0;
00906   if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr();  return; }
00907   //  Graph->SaveEdgeList("g-vol.txt");  TGraphViz::Plot(Graph, gvlNeato, "g-vol.gif");  Fail; } IAssert(vol <= sz*(sz-1));
00908   printf("  1-connected components: %d\n", Cn1ComV.Len());
00909   MaxWhisk = TFltPr(1,1);
00910   for (int c = 0; c < Cn1ComV.Len(); c++) {
00911     const TIntV& NIdV = Cn1ComV[c].NIdV;
00912     const int sz = NIdV.Len();
00913     if (sz < 2) { continue; }
00914     int vol = 0; // volume is the size of degrees
00915     for (int n = 0; n < sz; n++) {
00916       vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
00917     SzVolV.Add(TIntPr(sz, vol));
00918     MxSize += sz;
00919     if (1.0/double(vol) < MaxWhisk.Val2) { MaxWhisk=TFltPr(NIdV.Len(), 1.0/double(vol)); }
00920   }
00921   SzVolV.Sort(false);
00922   // compose clusters
00923   THash<TInt, TIntSet> ItemSetH(MxSize, true);
00924   THash<TInt, TInt> VolH(MxSize, true);
00925   THash<TInt, TFlt> CostH(MxSize, true);
00926   ItemSetH.AddKey(0);  VolH.AddKey(0);
00927   TExeTm ExeTm;
00928   // exact up to size 1000
00929   for (int size = 2; size <= TMath::Mn(MxSize, 1000); size++) {
00930     for (int item = 0; item <SzVolV.Len(); item++) {
00931       const int smallSz = size-SzVolV[item].Val1;
00932       if (ItemSetH.IsKey(smallSz)) {
00933         const TIntSet& SmallSet = ItemSetH.GetDat(smallSz);
00934         if (SmallSet.IsKey(item)) { continue; }
00935         const int SmallVol = VolH.GetDat(smallSz);
00936         // combine smaller nad new solution
00937         const double curCost = CostH.IsKey(size) ? double(CostH.GetDat(size)) : double(10e10);
00938         const double newCost = double(SmallSet.Len()+1) / double(SmallVol+SzVolV[item].Val2);
00939         if (curCost < newCost) { continue; }
00940         VolH.AddDat(size, SmallVol+SzVolV[item].Val2);
00941         ItemSetH.AddDat(size, SmallSet);
00942         ItemSetH.GetDat(size).AddKey(item);
00943         CostH.AddDat(size, newCost);
00944       }
00945     }
00946     if (VolH.IsKey(size) && size%100==0) {
00947       printf("\rSize: %d/%d: vol: %d,  items: %d/%d [%s]", size, MxSize, VolH.GetDat(size).Val,
00948         ItemSetH.GetDat(size).Len(), SzVolV.Len(), ExeTm.GetStr());
00949     }
00950   }
00951   // add sizes larger than 1000
00952   printf("\nAdding sizes > 1000 nodes...");
00953   int partSz=0; double partVol=0.0;
00954   for (int i = 0; i < SzVolV.Len(); i++) {
00955     partSz += SzVolV[i].Val1();
00956     partVol += SzVolV[i].Val2();
00957     if (partSz < 1000) { continue; }
00958     const double curCost = CostH.IsKey(partSz) ? double(CostH.GetDat(partSz)) : double(10e10);
00959     const double partPhi = double(i+1)/partVol;
00960     if (partPhi < curCost) {
00961       CostH.AddDat(partSz, partPhi);
00962     }
00963   }
00964   VolH.SortByKey();
00965   CostH.SortByKey();
00966   SizePhiV.Gen(CostH.Len(), 0);
00967   SizePhiV.Add(TFltPr(1, 1));
00968   for (int s = 0; s < CostH.Len(); s++) {
00969     const int size = CostH.GetKey(s);
00970     const double cond = CostH[s]; //double(ItemSetH.GetDat(size).Len())/double(VolH[s]);
00971     SizePhiV.Add(TFltPr(size, cond));
00972   }
00973   printf("done\n");
00974 }
00975 
00976 // faster greedy version
00977 void TLocClustStat::BagOfWhiskers2(const PUNGraph& Graph, TFltPrV& SizePhiV) {
00978   TCnComV Cn1ComV;
00979   TSnap::Get1CnCom(Graph, Cn1ComV);
00980   TIntPrV SzVolV;
00981   int MxSize=0, TotVol=0;
00982   if (Cn1ComV.Empty()) { printf("** No bridges\n");  SizePhiV.Clr();  return; }
00983   printf("  1-connected components: %d\n", Cn1ComV.Len());
00984   for (int c = 0; c < Cn1ComV.Len(); c++) {
00985     const TIntV& NIdV = Cn1ComV[c].NIdV;
00986     const int sz = NIdV.Len();
00987     if (sz < 2) { continue; }
00988     int vol = 0; // volume is the size of degrees
00989     for (int n = 0; n < sz; n++) {
00990       vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
00991     SzVolV.Add(TIntPr(sz, vol));
00992     MxSize += sz;  TotVol += vol;
00993   }
00994   printf("  Total size: %d\t Total vol: %d\n", MxSize, TotVol);
00995   SzVolV.Sort(false);
00996   // compose clusters
00997   THash<TFlt, TFlt> SizePhiH(MxSize, true);
00998   for (int i = 0; i < SzVolV.Len(); i++) {
00999     const int Sz = SzVolV[i].Val1();
01000     const double Phi = 1.0/double(SzVolV[i].Val2());
01001     if ((! SizePhiH.IsKey(Sz)) || SizePhiH.GetDat(Sz) > Phi) {
01002       SizePhiH.AddDat(Sz, Phi);  }
01003   }
01004   double partSz=0.0, partVol=0.0;
01005   for (int i = 0; i < SzVolV.Len(); i++) {
01006     partSz += SzVolV[i].Val1();
01007     partVol += SzVolV[i].Val2();
01008     const double partPhi = double(i+1)/partVol;
01009     if ((! SizePhiH.IsKey(partSz)) || partPhi < SizePhiH.GetDat(partSz)) {
01010       SizePhiH.AddDat(partSz, partPhi); }
01011   }
01012   SizePhiV.Gen(SizePhiH.Len()+1, 0);
01013   SizePhiV.Add(TFltPr(1, 1));
01014   SizePhiH.SortByKey();
01015   for (int s = 0; s < SizePhiH.Len(); s++) {
01016     SizePhiV.Add(TFltPr(SizePhiH.GetKey(s), SizePhiH[s]));
01017   }
01018 }
01019 
01020 void TLocClustStat::MakeExpBins(const TFltPrV& ValV, TFltPrV& NewV) {
01021   if (ValV.Empty()) { NewV.Clr(false); return; }
01022   NewV.Gen(1000, 0);
01023   double PrevBPos = 1, BPos = 1;
01024   int i = 0;
01025   while (BPos <= ValV.Last().Val1) {
01026     //if (TakeValAt == 1) {
01027     //  while (i < ValV.Len() && ValV[i].Val1 <= BPos) { i++; }
01028     //  NewV.Add(ValV[i-1]); }
01029     //else if (TakeValAt == 2) {
01030     int MinI=-1;  double MinCnt=TFlt::Mx;
01031     while (i < ValV.Len() && ValV[i].Val1 <= BPos) {
01032       if (ValV[i].Val2 < MinCnt) { MinCnt=ValV[i].Val2; MinI=i; } i++; }
01033     if (MinI>=0 && MinI<ValV.Len()) {
01034       NewV.Add(ValV[MinI]); }
01035     PrevBPos = (uint) floor(BPos);
01036     BPos *= BinFactor;
01037     if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
01038   }
01039   NewV.Add(ValV.Last());
01040 }
01041 
01042 void TLocClustStat::MakeExpBins(const TFltV& ValV, TFltV& NewV) {
01043   if (ValV.Empty()) { NewV.Clr(false); return; }
01044   NewV.Gen(1000, 0);
01045   double PrevBPos = 1, BPos = 1;
01046   int i = 1;
01047   NewV.Add(ValV[0]);
01048   while (BPos <= ValV.Len()) {
01049     int MinI=-1;  double MinCnt=TFlt::Mx;
01050     while (i < ValV.Len() && i <= BPos) {
01051       if (ValV[i] < MinCnt) { MinCnt=ValV[i]; MinI=i; } i++; }
01052     if (MinI>=0 && MinI<ValV.Len()) {
01053       NewV.Add(ValV[MinI]); }
01054     PrevBPos = (uint) floor(BPos);
01055     BPos *= BinFactor;
01056     if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
01057   }
01058   NewV.Add(ValV.Last());
01059 }
01060 
01061 
01063 // Local clustering for a set of graphs (loads ncp-*.tab files)
01064 TNcpGraphsBase::TNcpGraphsBase(const TStr& FNmWc) {
01065   TStr FNm;
01066   for (TFFile FFile(FNmWc); FFile.Next(FNm); ) {
01067     TSsParser Ss(FNm, ssfTabSep, true, false);
01068     int TrueNcpId=-1, WhiskId=-1, RewBestWhiskId=-1, RewId=-1, BestWhiskId=-1;
01069     while (Ss.Next()) {
01070       for (int f = 0; f < Ss.GetFlds(); f++) {
01071         // load ForestFire parameter (fwd burn prob)
01072         if (strstr(Ss[f], "FWD:")) { 
01073           TStr S(Ss[f]); const int x = S.SearchStr("FWD:");
01074           ParamValV.Add(S.GetSubStr(x+4, S.SearchCh(' ', x+1)-1).GetFlt());
01075         }
01076         // extract column names
01077         if (strstr(Ss[f], "ORIGINAL MIN")!=NULL) { 
01078           GNmV.Add(TStr::Fmt("%s %s", FNm.GetSubStr(FNm.SearchCh('.')+1, FNm.SearchChBack('.')-1).CStr(), strchr(Ss[f], '('))); 
01079           int Nodes=0,Edges=0; sscanf(strchr(Ss[f], '(')+1, "%d,%d)", &Nodes, &Edges);
01080           GSizeV.Add(TIntPr(Nodes, Edges));
01081           printf("%s: %d %d\n", GNmV.Last().CStr(), Nodes, Edges);
01082           TrueNcpId=f;
01083         }
01084         if (strstr(Ss[f], "ORIGINAL whisker")!=NULL || strstr(Ss[f], "TRUE whisker")!=NULL) { WhiskId=f; } 
01085         if (strstr(Ss[f], "ORIGINAL Best whisker")!=NULL || strstr(Ss[f], "TRUE Best whisker")!=NULL) { BestWhiskId=f; }
01086         if (strstr(Ss[f], "REWIRED MIN")!=NULL || strstr(Ss[f], "RAND MIN")!=NULL) { RewId=f; } 
01087         if (strstr(Ss[f], "REWIRED Best whisker")!=NULL || strstr(Ss[f], "RAND Best whisker")!=NULL) { RewBestWhiskId=f; }
01088       }
01089       if (TrueNcpId!=-1 || WhiskId!=-1) { break; }
01090     }
01091     if (TrueNcpId < 0) { printf("%s\n", FNm.GetFMid().CStr()); break; }
01092     if (BestWhiskId < 0) { WhiskerV.Add(TFltPr(1,1)); }
01093     if (RewBestWhiskId < 0) { RewWhiskerV.Add(TFltPr(1,1)); }
01094     NcpV.Add(); WhiskNcpV.Add(); RewNcpV.Add();
01095     TFltPrV& Ncp = NcpV.Last();
01096     TFltPrV& WhiskNcp = WhiskNcpV.Last();
01097     TFltPrV& RewNcp = RewNcpV.Last();
01098     bool Once=false, Once2=false;
01099     while (Ss.Next()) { 
01100       if (TrueNcpId < Ss.GetFlds()&& Ss.IsFlt(TrueNcpId)) { Ncp.Add(TFltPr(Ss.GetFlt(TrueNcpId-1), Ss.GetFlt(TrueNcpId))); }
01101       if (WhiskId>=0 && WhiskId < Ss.GetFlds() && Ss.IsFlt(WhiskId)) { WhiskNcp.Add(TFltPr(Ss.GetFlt(WhiskId-1), Ss.GetFlt(WhiskId))); }
01102       if (RewId >=0 && RewId < Ss.GetFlds()&& Ss.IsFlt(RewId)) { RewNcp.Add(TFltPr(Ss.GetFlt(RewId-1), Ss.GetFlt(RewId))); }
01103       if (BestWhiskId>=0 && BestWhiskId < Ss.GetFlds() && ! Once) {  Once=true;
01104         int W2=BestWhiskId-1;  while (W2 > 0 && Ss.GetFlt(W2)!=(double)int(Ss.GetFlt(W2))) { W2--; }
01105         WhiskerV.Add(TFltPr(Ss.GetFlt(W2), Ss.GetFlt(BestWhiskId))); }
01106       if (RewBestWhiskId>=0 && RewBestWhiskId < Ss.GetFlds() && ! Once2) {  Once2=true;
01107         int W2=RewBestWhiskId-1;  while (W2 > 0 && Ss.GetFlt(W2)!=(double)int(Ss.GetFlt(W2))) { W2--; }
01108         RewWhiskerV.Add(TFltPr(Ss.GetFlt(W2), Ss.GetFlt(RewBestWhiskId))); }
01109     }
01110     printf("  ncp:%d  whisk:%d  rew:%d\n", NcpV.Last().Len(), WhiskNcpV.Last().Len(), RewNcpV.Last().Len());
01111   }
01112   IAssert(NcpV.Len() == GSizeV.Len());
01113 }
01114 TNcpGraphsBase::TNcpGraphsBase(TSIn& SIn) : GNmV(SIn), GSizeV(SIn), WhiskerV(SIn), 
01115   RewWhiskerV(SIn),NcpV(SIn), RewNcpV(SIn),WhiskNcpV(SIn) { 
01116 }
01117 
01118 void TNcpGraphsBase::Save(TSOut& SOut) const { 
01119   GNmV.Save(SOut); GSizeV.Save(SOut); 
01120   WhiskerV.Save(SOut);  RewWhiskerV.Save(SOut); NcpV.Save(SOut); 
01121   RewNcpV.Save(SOut); WhiskNcpV.Save(SOut); 
01122 }
01123 
01124 void TNcpGraphsBase::Impose(const TStr& OutFNm, const int& TopN, const bool& Smooth) { 
01125   TGnuPlot GP(OutFNm);
01126   for (int i = 0; i < TMath::Mn(NcpV.Len(), TopN); i++) {
01127     GP.AddPlot(NcpV[i], gpwLines, GNmV[i], Smooth?"smooth csplines":"");
01128   }
01129   GP.SetScale(gpsLog10XY);
01130   GP.SavePng();
01131 }
01132 
01133 double TNcpGraphsBase::GetXAtMinY(const TFltPrV& Ncp, const int& NNodes) {
01134   double MinX1=1, MinY1=1;
01135   for (int k = 0; k < Ncp.Len(); k++) {
01136     if (Ncp[k].Val2<MinY1) { MinX1=Ncp[k].Val1; MinY1=Ncp[k].Val2; } }
01137   return MinX1<1 ? 1 : MinX1;
01138   //if (NNodes < 1000) return MinX1;
01139   // smooth
01140   /*const int WndSize = 50;
01141   double MinX=1, MinY=1;
01142   TFltPrV Ncp2V;
01143   for (int k = 0; k < Ncp.Len(); k++) {
01144     int WndSz = k > WndSize ? WndSize : k;
01145     double SmoothVal=0.0, SmoothCnt=0;
01146     for (int i = -WndSz; i <= WndSz; i++) {
01147       if (k+i > -1 && k+i < Ncp.Len()) { SmoothCnt+=pow(1.1, -abs(i));
01148         SmoothVal+=pow(1.1, -abs(i)) * Ncp[k+i].Val2; }
01149     }
01150     SmoothVal = SmoothVal/SmoothCnt;
01151     Ncp2V.Add(TFltPr(Ncp[k].Val1, SmoothVal));
01152     if (SmoothVal<MinY) { MinX=Ncp[k].Val1; MinY=SmoothVal; }
01153   }
01154   static int cnt = 0;
01155   if (Ncp2V.Len() > 10 && cnt < 10) { 
01156     TGnuPlot GP(TStr::Fmt("test-%03d", ++cnt));
01157     GP.SetScale(gpsLog10XY);
01158     GP.AddPlot(Ncp, gpwLines, "true");
01159     GP.AddPlot(Ncp2V, gpwLines, "smooth");
01160     GP.SavePng();
01161   }
01162   if (MinX < 1) { return 1; } else if (MinX > 1000) { return 1000; }
01163   return MinX;*/
01164 }
01165 
01166 TFltPr TNcpGraphsBase::GetXYAtMinY(const TFltPrV& Ncp, const int& NNodes) {
01167   double MinX1=1, MinY1=1;
01168   for (int k = 0; k < Ncp.Len(); k++) {
01169     if (Ncp[k].Val2<MinY1) { MinX1=Ncp[k].Val1; MinY1=Ncp[k].Val2; } }
01170   return TFltPr(MinX1<1?1:MinX1, MinY1);
01171 }
01172 
01173 void TNcpGraphsBase::PlotNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
01174   TFltPrV GSzMinK, GSzMinY;
01175   for (int i = 0; i < NcpV.Len(); i++) {
01176     const TFltPr XYAtMinY = GetXYAtMinY(NcpV[i], GSizeV[i].Val1);
01177     const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
01178     GSzMinK.Add(TFltPr(X, XYAtMinY.Val1));
01179     GSzMinY.Add(TFltPr(X, XYAtMinY.Val2));
01180   }
01181   GSzMinK.Sort();  GSzMinY.Sort();
01182   const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
01183   TGnuPlot::PlotValV(GSzMinK, TStr("bestK-")+OutFNm, "Network", XLabel, "size of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01184   TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestK-")+OutFNm, "Network", XLabel, "conductance of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01185 }
01186 
01187 void TNcpGraphsBase::SaveTxtNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
01188   TVec<TQuad<TInt, TInt, TFlt, TStr> > GSzMinK;
01189   for (int i = 0; i < NcpV.Len(); i++) {
01190     const TFltPr XYAtMinY = GetXYAtMinY(NcpV[i], GSizeV[i].Val1);
01191     const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
01192     GSzMinK.Add(TQuad<TInt, TInt, TFlt, TStr>((int)X, (int)XYAtMinY.Val1(), XYAtMinY.Val2, GNmV[i]));
01193   }
01194   GSzMinK.Sort();
01195   FILE *F = fopen(TStr::Fmt("bestK-%s.txt", OutFNm.CStr()).CStr(), "wt");
01196   fprintf(F, "#nodes\tbestK\tcondAtBestK\tgraph name\n");
01197   for (int i = 0; i < GSzMinK.Len(); i++) {
01198     fprintf(F, "%d\t%d\t%f\t%s\n", GSzMinK[i].Val1(), GSzMinK[i].Val2(), GSzMinK[i].Val3(), GSzMinK[i].Val4.CStr());
01199   }
01200   fclose(F);
01201 }
01202 
01203 void TNcpGraphsBase::PlotRewNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
01204   TFltPrV GSzMinK, GSzMinY;
01205   for (int i = 0; i < NcpV.Len(); i++) {
01206     const TFltPr XYAtMinY = GetXYAtMinY(RewNcpV[i], GSizeV[i].Val1);
01207     const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
01208     GSzMinK.Add(TFltPr(X, XYAtMinY.Val1));
01209     GSzMinY.Add(TFltPr(X, XYAtMinY.Val2));
01210   }
01211   GSzMinK.Sort();  GSzMinY.Sort();
01212   const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
01213   TGnuPlot::PlotValV(GSzMinK, TStr("bestR-")+OutFNm, "Rewired network", XLabel, "size of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01214   TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestR-")+OutFNm, "Rewired network", XLabel, "conductance of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01215 }
01216 
01217 void TNcpGraphsBase::PlotBestWhisker(const TStr& OutFNm, const bool& VsGraphN) {
01218   TFltPrV GSzMinK, GSzMinY;
01219   for (int i = 0; i < GSizeV.Len(); i++) {
01220     if (WhiskerV[i].Val1()>0) {
01221       const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
01222       GSzMinK.Add(TFltPr(X, WhiskerV[i].Val1()));
01223       GSzMinY.Add(TFltPr(X, WhiskerV[i].Val2()));
01224     }
01225   }
01226   GSzMinK.Sort();  GSzMinY.Sort();
01227   const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
01228   TGnuPlot::PlotValV(GSzMinK, TStr("bestW-")+OutFNm, "Network", XLabel, "size of best whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01229   TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestW-")+OutFNm, "Network", XLabel, "conductance of best whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01230 }
01231 
01232 void TNcpGraphsBase::PlotRewBestWhisker(const TStr& OutFNm, const bool& VsGraphN) {
01233   TFltPrV GSzMinK, GSzMinY;
01234   for (int i = 0; i < GSizeV.Len(); i++) {
01235     if (WhiskerV[i].Val1()>0) {
01236       const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
01237       GSzMinK.Add(TFltPr(X, RewWhiskerV[i].Val1()));
01238       GSzMinY.Add(TFltPr(X, RewWhiskerV[i].Val2()));
01239     }
01240   }
01241   GSzMinK.Sort();  GSzMinY.Sort();
01242   const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
01243   TGnuPlot::PlotValV(GSzMinK, TStr("bestWR-")+OutFNm, "Rewired network", XLabel, "size of best rewired whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01244   TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestWR-")+OutFNm, "Rewired network", XLabel, "conductance of best rewired whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01245 }
01246 
01247 void TNcpGraphsBase::PlotAvgNcp(const TStr& OutFNm, const TVec<TFltPrV>& NcpVec, const int& MinSz, const double& MaxMinY) {
01248   THash<TFlt, TMom> MomH;
01249   int Cnt=0;
01250   for (int i = 0; i < NcpVec.Len(); i++) {
01251     if (GSizeV[i].Val1 < MinSz) { continue; }
01252     const TFltPrV& Ncp = NcpVec[i];
01253     double MinX=1, MinY=1;
01254     for (int k = 0; k < Ncp.Len(); k++){
01255       if (Ncp[k].Val2<MinY) { MinX=Ncp[k].Val1; MinY=Ncp[k].Val2; } }
01256     if (MinY > MaxMinY) { continue; }  Cnt++;
01257     //const double Coef = (1-0.0001)/(1.0-MinY);
01258     for (int k = 0; k < Ncp.Len(); k++){
01259       //MomH.AddDat(TMath::Round(exp(TMath::Round(log(Ncp[k].Val1()), 2)),2)).Add(0.0001+(Ncp[k].Val2-MinY)*Coef);
01260       MomH.AddDat(TMath::Round(exp(TMath::Round(log(Ncp[k].Val1()), 1)),0)).Add(Ncp[k].Val2);
01261     }
01262   }
01263   TGnuPlot::PlotValMomH(MomH, OutFNm, "", "size of the cluster, k", "phi(k)", gpsLog, gpwLines, true, true,true,true);
01264   printf("  minSz: %d, miny %g\t%d\n", MinSz, MaxMinY, Cnt);
01265 }
01266 
01267 void TNcpGraphsBase::SaveTxt(const TStr& OutFNm) {
01268   FILE *F=fopen(OutFNm.CStr(), "wt");
01269   fprintf(F, "#Nodes\tEdges\tBestK\tPhi(BestK)\tMaxWhiskN\tPhi(MaxWhisk)\tGraph\n");
01270   for (int i = 0; i < NcpV.Len(); i++) {
01271     const TFltPrV& Ncp = NcpV[i];
01272     double MinX=1, MinY=1;
01273     for (int k = 0; k < Ncp.Len(); k++){
01274       if (Ncp[k].Val2<MinY) { MinX=Ncp[k].Val1; MinY=Ncp[k].Val2; } }
01275     fprintf(F, "%d\t%d\t%d\t%f\t%d\t%f\t%s\n", GSizeV[i].Val1(), GSizeV[i].Val2(), 
01276       int(MinX), MinY, int(WhiskerV[i].Val1), WhiskerV[i].Val2(), GNmV[i].CStr());
01277   }
01278   fclose(F);
01279 }
01280 
01281 void TNcpGraphsBase::PlotDataset(const TStr& InFNmWc, const TStr& OutFNm, const bool& ImposeNcp, const bool& VsGraphN) {
01282   TNcpGraphsBase NcpBs(InFNmWc);  
01283   //NcpBs.Save(TFOut(OutFNm+".NcpBase"));
01284   //TNcpGraphsBase NcpBs(TFIn(OutFNm+".NcpBase"));
01285   if (ImposeNcp) {
01286     NcpBs.Impose(OutFNm+"5R", 5, false);  NcpBs.Impose(OutFNm+"5S", 5, true); 
01287     NcpBs.Impose(OutFNm+"R", 10, false);  NcpBs.Impose(OutFNm+"S", 10, true); 
01288   }
01289   NcpBs.PlotNcpMin(OutFNm, VsGraphN);
01290   //NcpBs.SaveTxtNcpMin(OutFNm, VsGraphN);
01291   NcpBs.PlotRewNcpMin(OutFNm, VsGraphN);
01292   NcpBs.PlotBestWhisker(OutFNm, VsGraphN);
01293   NcpBs.PlotRewBestWhisker(OutFNm, VsGraphN);
01294   
01295   //NcpBs.PlotAvgNcp(OutFNm+"AvgNcp", NcpBs.NcpV, 1, 1);
01296   //NcpBs.PlotAvgNcp(OutFNm+"AvgRewNcp", NcpBs.RewNcpV, 1, 1);
01297   /*NcpBs.PlotAvgNcp(OutFNm+"AvgNcp2", NcpBs.NcpV, 100, 0.1);
01298   NcpBs.PlotAvgNcp(OutFNm+"AvgNcp3", NcpBs.NcpV, 100, 0.01);
01299   NcpBs.PlotAvgNcp(OutFNm+"AvgNcp4", NcpBs.NcpV, 100, 0.001);
01300   NcpBs.PlotAvgNcp(OutFNm+"AvgNcp5", NcpBs.NcpV, 100, 0.0001);
01301   NcpBs.PlotAvgNcp(OutFNm+"RewNcp1", NcpBs.RewNcpV, 1000, 1);
01302   NcpBs.PlotAvgNcp(OutFNm+"RewNcp2", NcpBs.RewNcpV, 100, 0.1);
01303   NcpBs.PlotAvgNcp(OutFNm+"RewNcp3", NcpBs.RewNcpV, 100, 0.01);
01304   NcpBs.PlotAvgNcp(OutFNm+"RewNcp4", NcpBs.RewNcpV, 100, 0.001);
01305   NcpBs.PlotAvgNcp(OutFNm+"RewNcp5", NcpBs.RewNcpV, 100, 0.0001);
01306   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp1", NcpBs.WhiskNcpV, 1000, 1);
01307   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp2", NcpBs.WhiskNcpV, 100, 0.1);
01308   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp3", NcpBs.WhiskNcpV, 100, 0.01);
01309   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp4", NcpBs.WhiskNcpV, 100, 0.001);
01310   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp5", NcpBs.WhiskNcpV, 100, 0.0001);
01311   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp6", NcpBs.WhiskNcpV, 100, 0.00004);
01312   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp7", NcpBs.WhiskNcpV, 100, 0.00005);*/
01313   //NcpBs.SaveTxt(OutFNm+"bestK.txt");
01314 }