SNAP Library, Developer Reference  2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ncp.cpp
Go to the documentation of this file.
00001  #include "stdafx.h"
00002 #include "ncp.h"
00003 
00005 // Local Spectral Clustering
00006 
00007 bool TLocClust::Verbose = true;
00008 
00009 int TLocClust::ApproxPageRank(const int& SeedNode, const double& Eps) {
00010   for (int i = 0; i < ProbH.Len(); i++) { ProbH[i]=0.0; }
00011   for (int i = 0; i < ResH.Len(); i++) { ResH[i]=0.0; }
00012   ProbH.Clr(false, -1, false);
00013   ResH.Clr(false, -1, false);
00014   ResH.AddDat(SeedNode, 1.0);
00015   int iter = 0;
00016   double OldRes = 0.0;
00017   NodeQ.Clr(false);
00018   NodeQ.Push(SeedNode);
00019   TExeTm ExeTm;
00020   while (! NodeQ.Empty()) {
00021     const int NId = NodeQ.Top(); NodeQ.Pop();
00022     const TUNGraph::TNodeI& Node = Graph->GetNI(NId);
00023     const int NIdDeg = Node.GetOutDeg();
00024     const double PushVal = ResH.GetDat(NId) - 0.5*Eps*NIdDeg;
00025     const double PutVal = (1.0-Alpha) * PushVal / double(NIdDeg);
00026     ProbH.AddDat(NId) += Alpha*PushVal;
00027     ResH.AddDat(NId) = 0.5 * Eps * NIdDeg;
00028     for (int e = 0; e < NIdDeg; e++) {
00029       const int DstNId = Node.GetOutNId(e);
00030       const int DstDeg = Graph->GetNI(DstNId).GetOutDeg();
00031       double& ResVal = ResH.AddDat(DstNId).Val;
00032       OldRes = ResVal;
00033       ResVal += PutVal;
00034       if (ResVal >= Eps*DstDeg && OldRes < Eps*DstDeg) {
00035         NodeQ.Push(DstNId); }
00036     }
00037     iter++;
00038     if (iter % Mega(1) == 0) { 
00039       printf(" %d[%s]", NodeQ.Len(), ExeTm.GetStr());
00040       if (iter/1000 > Graph->GetNodes() || ExeTm.GetSecs() > 4*3600) { // more than 2 hours
00041         printf("Too many iterations! Stop to save time.\n");
00042         return iter; }
00043     }
00044   }
00045   // check that the residuals are sufficiently small
00046   /*for (int i =0; i < ProbH.Len(); i++) {
00047     const int Deg = Graph->GetNI(ResH.GetKey(i)).GetOutDeg();
00048     IAssert(ResH[i] < Eps*Deg); } //*/
00049   return iter;
00050 }
00051 
00052 void TLocClust::SupportSweep() {
00053   TExeTm ExeTm;
00054   VolV.Clr(false);  CutV.Clr(false);  PhiV.Clr(false);
00055   if (ProbH.Empty()) { return; }
00056   const int TopNId = ProbH.GetKey(0);
00057   const int TopNIdDeg = Graph->GetNI(TopNId).GetOutDeg();
00058   int Vol = TopNIdDeg, Cut = TopNIdDeg;
00059   double Phi = Cut/double(Vol);
00060   VolV.Add(Vol);  CutV.Add(Cut);  PhiV.Add(1.0);
00061   for (int i = 1; i < ProbH.Len(); i++) {
00062     const int NId = ProbH.GetKey(i);
00063     const TUNGraph::TNodeI& Node = Graph->GetNI(NId);
00064     const int OutDeg = Node.GetOutDeg();
00065     int CutSz = OutDeg; // edges outside
00066     for (int e = 0; e < OutDeg; e++) {
00067       const int Rank = ProbH.GetKeyId(Node.GetOutNId(e));
00068       if ( Rank > -1 && Rank < i) { CutSz -= 2;  }
00069     }
00070     Vol += OutDeg;  Cut += CutSz;
00071     if (Vol < Edges2) {
00072       if (2*Vol > Edges2) { Phi = Cut / double(Edges2-Vol); }
00073       else { Phi = Cut / double(Vol); }
00074     } else {
00075       Phi = 1.0;
00076     }
00077     IAssert((Phi+1e-6) >= double(1.0)/double(i*(i+1)+1)); // conductance is worse than the best possible
00078     VolV.Add(Vol);  CutV.Add(Cut);  PhiV.Add(Phi);
00079   }}
00080 
00081 void TLocClust::FindBestCut(const int& SeedNode, const int& ClustSz, const double& MinSizeFrac) {
00082   double MaxPhi = TFlt::Mx;
00083   BestCutIdx = -1;
00084   SeedNId = SeedNode;
00085   // calculate pagerank and cut sets
00086   ApproxPageRank(SeedNId, 1.0/double(ClustSz));
00087   for (int i = 0; i < ProbH.Len(); i++) {
00088     ProbH[i] /= Graph->GetNI(ProbH.GetKey(i)).GetOutDeg(); }
00089   ProbH.SortByDat(false);
00090   SupportSweep();
00091   // find best cut
00092   NIdV.Clr(false);
00093   for (int i = 0; i < PhiV.Len(); i++) {
00094     const double Phi = PhiV[i];
00095     NIdV.Add(ProbH.GetKey(i));
00096     if (Phi < MaxPhi) { MaxPhi = Phi;  BestCutIdx = i; }
00097   }
00098 }
00099 
00100 void TLocClust::PlotVolDistr(const TStr& OutFNm, TStr Desc) const {
00101   TFltPrV RankValV(VolV.Len(), 0);
00102   for (int i = 0; i < VolV.Len(); i++) {
00103     RankValV.Add(TFltPr(i+1, VolV[i].Val)); }
00104   if (Desc.Empty()) { Desc = OutFNm; }
00105   TFltPrV NewV;  TLocClustStat::MakeExpBins(RankValV, NewV);
00106   TGnuPlot GP(OutFNm+"-VOL", TStr::Fmt("VOLUME. Seed node %d.", SeedNId, Desc.CStr()));
00107   GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
00108   GP.SetXYLabel("Rank", "Volume");
00109   //GP.SetScale(gpsLog10X);
00110   GP.SavePng();
00111 }
00112 
00113 void TLocClust::PlotCutDistr(const TStr& OutFNm, TStr Desc) const {
00114   TFltPrV RankValV(CutV.Len(), 0);
00115   for (int i = 0; i < CutV.Len(); i++) {
00116     RankValV.Add(TFltPr(i+1, CutV[i].Val)); }
00117   if (Desc.Empty()) { Desc = OutFNm; }
00118   TFltPrV NewV;  TLocClustStat::MakeExpBins(RankValV, NewV);
00119   TGnuPlot GP(OutFNm+"-CUT", TStr::Fmt("CUT SIZE. Seed node %d.", SeedNId, Desc.CStr()));
00120   GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
00121   GP.SetXYLabel("Rank", "Cut size");
00122   //GP.SetScale(gpsLog10X);
00123   GP.SavePng();
00124 }
00125 
00126 void TLocClust::PlotPhiDistr(const TStr& OutFNm, TStr Desc) const {
00127   TFltPrV RankValV(PhiV.Len(), 0);
00128   for (int i = 0; i < PhiV.Len(); i++) {
00129     RankValV.Add(TFltPr(i+1, PhiV[i])); }
00130   if (Desc.Empty()) { Desc = OutFNm; }
00131   TFltPrV NewV;  TLocClustStat::MakeExpBins(RankValV, NewV);
00132   TGnuPlot GP(OutFNm+"-PHI", TStr::Fmt("CONDUCTANCE (Cut size / Volume). Seed node %d.", SeedNId, Desc.CStr()));
00133   GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
00134   GP.SetXYLabel("Rank", "Conductance (Cut size / Volume)");
00135   //GP.SetScale(gpsLog10X);
00136   GP.SavePng();
00137 }
00138 
00139 void TLocClust::SavePajek(const TStr& OutFNm) const {
00140   FILE *F = fopen(TStr::Fmt("%s.net", OutFNm.CStr()).CStr(), "wt");
00141   TIntH NIdToIdH(Graph->GetNodes(), true);
00142   TIntH ClustSet(BestCut()+1);
00143   const int BucketSz = BestCutNodes()/ 4;
00144   TStrV ClrV = TStrV::GetV("Black", "Gray80", "Gray60", "Gray40", "Gray20", "RedViolet");
00145   for (int a = 0; a < BestCutNodes(); a++) {
00146     ClustSet.AddDat(NIdV[a], (a+1)/BucketSz);
00147   }
00148   fprintf(F, "*Vertices %d\n", Graph->GetNodes());
00149   int i = 0;
00150   for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00151     const int NId = NI.GetId();
00152     if (NId == SeedNId) {
00153       fprintf(F, "%d  \"%d\" diamond x_fact 2 y_fact 2 ic Green fos 10\n", i+1, NId); }
00154     else if (ClustSet.IsKey(NId)) {
00155       //fprintf(F, "%d  \"%d\" box x_fact 1 y_fact 1 ic Red fos 10\n", i+1, NId); }
00156       fprintf(F, "%d  \"%d\" box x_fact 2 y_fact 2 ic %s fos 10\n", i+1, NId, ClrV[ClustSet.GetDat(NId)].CStr()); }
00157     else {
00158       fprintf(F, "%d  \"%d\" ellipse x_fact 2 y_fact 2 ic Blue fos 10\n", i+1, NId); }
00159     NIdToIdH.AddDat(NId, i+1);
00160     i++;
00161   }
00162   fprintf(F, "*Arcs %d\n", Graph->GetEdges()); // arcs are directed, edges are undirected
00163   for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00164     const int NId = NIdToIdH.GetDat(NI.GetId());
00165     for (int e = 0; e < NI.GetOutDeg(); e++) {
00166       fprintf(F, "%d %d %g c Tan\n", NId, NIdToIdH.GetDat(NI.GetOutNId(e)).Val, 1.0);
00167     }
00168   }
00169   fclose(F);
00170 }
00171 
00172 void TLocClust::DrawWhiskers(const PUNGraph& Graph, TStr FNmPref, const int& PlotN=10) {
00173   TCnComV CnComV;  TSnap::Get1CnCom(Graph, CnComV);
00174   CnComV.Sort(false);
00175   // plot size distribution
00176   { TIntH SzCntH;
00177   for (int i = 0; i < CnComV.Len(); i++) { SzCntH.AddDat(CnComV[i].Len()) += 1; }
00178   TGnuPlot::PlotValCntH(SzCntH, "whiskSz."+FNmPref, TStr::Fmt("%s. G(%d, %d)", FNmPref.CStr(), Graph->GetNodes(),
00179     Graph->GetEdges()), "Whisker size (Maximal component connected with a bridge edge)", "Count", gpsLog10XY, false); }
00180   // draw them
00181   int BrNodeId = -1;
00182   for (int c = 0; c < TMath::Mn(CnComV.Len(), PlotN); c++) {
00183     const PUNGraph BrClust = TSnap::GetSubGraph(Graph, CnComV[c].NIdV);
00184     for (TUNGraph::TNodeI NI = BrClust->BegNI(); NI < BrClust->EndNI(); NI++) {
00185       if (NI.GetOutDeg() != Graph->GetNI(NI.GetId()).GetOutDeg()) { BrNodeId=NI.GetId(); break; } }
00186     TIntStrH ClrH;  ClrH.AddDat(BrNodeId, "red");
00187     TSnap::DrawGViz(BrClust, gvlNeato, TStr::Fmt("whisk-%s-%02d.gif", FNmPref.CStr(), c+1),
00188       TStr::Fmt("Bridge node id: %d, n=%d, e=%d", BrNodeId, BrClust->GetNodes(), BrClust->GetEdges()), false, ClrH);
00189   }
00190 }
00191 
00192 void TLocClust::GetCutStat(const PUNGraph& Graph, const TIntV& NIdV, int& Vol, int& Cut, double& Phi, int GraphEdges) {
00193   TIntSet NIdSet(NIdV.Len());
00194   for (int i = 0; i < NIdV.Len(); i++) { NIdSet.AddKey(NIdV[i]); }
00195   GetCutStat(Graph, NIdSet, Vol, Cut, Phi, GraphEdges);
00196 }
00197 
00198 void TLocClust::GetCutStat(const PUNGraph& Graph, const TIntSet& NIdSet, int& Vol, int& Cut, double& Phi, int GraphEdges) {
00199   const int Edges2 = GraphEdges>=0 ? 2*GraphEdges : Graph->GetEdges();
00200   Vol=0;  Cut=0; Phi=0.0;
00201   for (int i = 0; i < NIdSet.Len(); i++) {
00202     if (! Graph->IsNode(NIdSet[i])) { continue; }
00203     TUNGraph::TNodeI NI = Graph->GetNI(NIdSet[i]);
00204     for (int e = 0; e < NI.GetOutDeg(); e++) {
00205       if (! NIdSet.IsKey(NI.GetOutNId(e))) { Cut += 1; }
00206     }
00207     Vol += NI.GetOutDeg();
00208   }
00209   // get conductance
00210   if (Vol != Edges2) {
00211     if (2*Vol > Edges2) { Phi = Cut / double(Edges2-Vol); }
00212     else if (Vol == 0) { Phi = 0.0; }
00213     else { Phi = Cut / double(Vol); }
00214   } else {
00215     if (Vol == Edges2) { Phi = 1.0; }
00216   }
00217 }
00218 
00219 void TLocClust::PlotNCP(const PUNGraph& Graph, const TStr& FNm, const TStr Desc, const bool& BagOfWhiskers, const bool& RewireNet, const int& KMin, const int& KMax, const int& Coverage, const bool& SaveTxtStat, const bool& PlotBoltzman) {
00220   const double Alpha = 0.001, KFac = 1.5, SizeFrac = 0.001;
00221   //const int KMin = 2, KMax = Mega(100), Coverage = 10;
00222   TLocClustStat ClusStat1(Alpha, KMin, KMax, KFac, Coverage, SizeFrac);
00223   ClusStat1.Run(Graph, false, PlotBoltzman, SaveTxtStat);
00224   if (BagOfWhiskers) { ClusStat1.AddBagOfWhiskers(); }
00225   TLocClustStat ClusStat2(Alpha, KMin, KMax, KFac, Coverage, SizeFrac);
00226   ClusStat1.ImposeNCP(ClusStat2, FNm, Desc, "ORIGINAL", "REWIRED"); // plot before rewiring
00227   if (SaveTxtStat) { // for every piece size store modularity
00228     ClusStat1.SaveTxtInfo(FNm, Desc, false);
00229   }
00230   if (PlotBoltzman) {
00231     ClusStat1.PlotBoltzmanCurve(FNm, Desc);
00232   }
00233   if (RewireNet) {
00234     ClusStat2.Run(TSnap::GenRewire(Graph, 50, TInt::Rnd), false, false, false);
00235     if (BagOfWhiskers) { ClusStat2.AddBagOfWhiskers(); }
00236     ClusStat1.ImposeNCP(ClusStat2, FNm, Desc, "ORIGINAL", "REWIRED"); // if rewire, plot again
00237   }
00238 }
00239 
00241 // Local Clustering Statistics
00242 
00243 double TLocClustStat::BinFactor = 1.01;
00244 
00245 double TLocClustStat::TCutInfo::GetFracDegOut(const PUNGraph& Graph, double& MxFrac, double& AvgFrac, double& MedianFrac, double& Pct9Frac, double& Flake) const {
00246   if (CutNIdV.Empty()) {
00247     IAssert(Nodes<100 || ! CutNIdV.Empty());
00248     MxFrac=1; AvgFrac=1; MedianFrac=1; Pct9Frac=1; Flake=1; 
00249     return 1;
00250   }
00251   TMom FracDegMom;
00252   TIntSet InNIdSet(CutNIdV.Len());
00253   int NHalfIn=0;
00254   for (int i = 0; i < CutNIdV.Len(); i++) { 
00255     InNIdSet.AddKey(CutNIdV[i]); }
00256   for (int n = 0; n < CutNIdV.Len(); n++) {
00257     const TUNGraph::TNodeI NI = Graph->GetNI(CutNIdV[n]);
00258     int EdgesOut = 0;
00259     for (int i = 0; i < NI.GetDeg(); i++) {
00260       if (! InNIdSet.IsKey(NI.GetNbrNId(i))) { EdgesOut++; }
00261     }
00262     const double FracOut = EdgesOut/double(NI.GetDeg());
00263     if (FracOut <= 0.5) { NHalfIn++; }
00264     FracDegMom.Add(FracOut);
00265   }
00266   FracDegMom.Def();
00267   MxFrac = FracDegMom.GetMx();
00268   AvgFrac = FracDegMom.GetMean();
00269   MedianFrac = FracDegMom.GetMedian();
00270   Pct9Frac = FracDegMom.GetDecile(9);
00271   Flake = 1.0 - double(NHalfIn)/double(CutNIdV.Len());
00272   return MxFrac;
00273 }
00274 
00275 TLocClustStat::TLocClustStat(const double& _Alpha, const int& _KMin, const int& _KMax, const double& _KFac,
00276   const int& _Coverage, const double& _SizeFrac) : Alpha(_Alpha), SizeFrac(_SizeFrac), KFac(_KFac),
00277   KMin(_KMin), KMax(_KMax), Coverage(_Coverage) {
00278 }
00279 
00280 void TLocClustStat::Save(TSOut& SOut) const {
00281   // parameters
00282   Alpha.Save(SOut);
00283   SizeFrac.Save(SOut);
00284   KFac.Save(SOut);
00285   KMin.Save(SOut);
00286   KMax.Save(SOut);
00287   Coverage.Save(SOut);
00288   // cluster stat
00289   //SweepsV.Save(SOut);
00290   //SizePhiH.Save(SOut);
00291 }
00292 
00293 void TLocClustStat::Clr() {
00294   SweepsV.Clr(false);      // node ids and conductances for each run of local clustering
00295   SizePhiH.Clr(false);     // conductances at cluster size K
00296   BestCutH.Clr(false);     // best cut (min conductance) at size K
00297   BagOfWhiskerV.Clr(false); // (Size, Conductance) of bag of whiskers clusters
00298 }
00299 
00300 void TLocClustStat::Run(const PUNGraph& _Graph, const bool& SaveAllSweeps, const bool& SaveAllCond, const bool& SaveBestNodesAtK) {
00301   Graph = TSnap::GetMxWcc(_Graph);
00302   const int Nodes = Graph->GetNodes();
00303   const int Edges = Graph->GetEdges();
00304   printf("\nLocal clustering: Graph(%d, %d)\n", Nodes, Edges);
00305   printf("  Alpha:    %g\n", Alpha());
00306   printf("  K: %d -- %g -- %dm\n", KMin(), KFac(), int(KMax/Mega(1)));
00307   printf("  Coverage: %d\n", Coverage());
00308   printf("  SizeFrac: %g\n\n", SizeFrac());
00309   TExeTm TotTm;
00310   Clr();
00311   TLocClust Clust(Graph, Alpha);
00312   BestCut.CutNIdV.Clr(false); 
00313   BestCut.CutSz=-1;  BestCut.Edges=-1;
00314   double BestPhi = TFlt::Mx;
00315   int prevK=-1;
00316   bool NextDone=false;
00317   if (SaveBestNodesAtK) { // fill buckets (only store nodes in clusters for sizes in SizeBucketSet)
00318     SizeBucketSet.Clr();
00319     double PrevBPos = 1, BPos = 1;
00320     while (BPos <= Mega(100)) {
00321       PrevBPos = (uint) floor(BPos);
00322       BPos *= BinFactor;
00323       if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
00324       SizeBucketSet.AddKey(int(floor(BPos) - 1));
00325     }
00326   }
00327   for (int K = KMin, cnt=1; K < KMax; K = int(KFac * double(K))+1, cnt++) {
00328     if (K == prevK) { K++; } prevK = K;
00329     const int Runs = 2 + int(Coverage  * floor(double(Graph->GetEdges()) / double(K)));
00330     printf("%d] K: %d, %d runs\n", cnt+1, K, Runs);
00331     if (NextDone) { break; } // done
00332     if (K+1 > 2*Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
00333     //if (K+1 > Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
00334     TExeTm ExeTm, IterTm;
00335     double MeanSz=0.0, MeanVol=0.0, Count=0.0;
00336     for (int run = 0; run < Runs; run++) {
00337       const int SeedNId = Graph->GetRndNId();  IterTm.Tick();
00338       Clust.FindBestCut(SeedNId, K, SizeFrac);
00339       const int Sz = Clust.BestCutNodes();
00340       const int Vol = Clust.GetCutVol();
00341       const double Phi = TMath::Round(Clust.GetCutPhi(), 4);
00342       if (Sz == 0 || Vol == 0 || Phi == 0) { continue; }
00343       MeanSz+=Sz;  MeanVol+=Vol;  Count+= 1;
00344       if (SaveAllSweeps) { // save the full cut set and conductances for all trials
00345         SweepsV.Add(TNodeSweep(SeedNId, Clust.GetNIdV(), Clust.GetPhiV())); }
00346       int SAtBestPhi=-1;
00347       for (int s = 0; s < Clust.Len(); s++) {
00348         const int size = s+1;
00349         const int cut = Clust.GetCut(s);
00350         const int edges = (Clust.GetVol(s)-cut)/2;
00351         const double phi = Clust.GetPhi(s);
00352         if (( Clust.GetPhi(s) != double(cut)/double(2*edges+cut))) { continue; } // more than half of the edges
00353         IAssert((Clust.GetVol(s) - cut) % 2 == 0);
00354         IAssert(phi == double(cut)/double(2*edges+cut));
00355         IAssert(phi >= 1.0/double((1+s)*s+1));
00357         // TCutInfo CutInfo(size, edges, cut); Clust.GetNIdV().GetSubValV(0, size-1, CutInfo.CutNIdV);
00358         //double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
00359         //CutInfo.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
00360         //const double phi = MxFrac;
00361         if (BestPhi >= phi) {
00362           BestPhi = phi;
00363           BestCut = TCutInfo(size, edges, cut);
00364           SAtBestPhi = s;
00365         }
00367         //bool TAKE=false;  if (! BestCutH.IsKey(size)) { TAKE=true; }
00368         //else { BestCutH.GetDat(size).GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);  if (MxFrac >= phi) { TAKE = true; } }
00369         // if (TAKE) {
00370         if (! BestCutH.IsKey(size) || BestCutH.GetDat(size).GetPhi() >= phi) { //new best cut (size, edges inside and nodes)
00371           BestCutH.AddDat(size, TCutInfo(size, edges, cut));  // for every size store best cut (NIds inside the cut)
00372           if (SaveBestNodesAtK) { // store node ids in best community for each size k
00373             if (! SizeBucketSet.Empty() && ! SizeBucketSet.IsKey(size)) { continue; } // only save best clusters at SizeBucketSet
00374             Clust.GetNIdV().GetSubValV(0, size-1, BestCutH.GetDat(size).CutNIdV); }
00375         }
00376         if (SaveAllCond) { // for every size store all conductances
00377           SizePhiH.AddDat(size).Add(phi); }
00378       }
00379       if (SAtBestPhi != -1) { // take nodes in best cluster
00380         const int size = SAtBestPhi+1;
00381         Clust.GetNIdV().GetSubValV(0, size-1, BestCut.CutNIdV); 
00382       }
00383       if (TLocClust::Verbose) {
00384         printf(".");
00385         if (run % 50 == 0) {
00386           printf("\r                                                   %d / %d \r", run, Runs); }
00387       }
00388     }
00389     if (TLocClust::Verbose) {
00390       printf("\r  %d / %d: %s                                                   \n", Runs, Runs, ExeTm.GetStr()); 
00391     }
00392     MeanSz/=Count;  MeanVol/=Count;
00393     printf("  Graph(%d, %d)  ", Nodes, Edges);
00394     printf("       mean:  sz: %.2f  vol: %.2f [%s] %s\n", MeanSz, MeanVol, ExeTm.GetStr(), TExeTm::GetCurTm());
00395   }
00396   SizePhiH.SortByKey();
00397   for (int k = 0; k < SizePhiH.Len(); k++) { 
00398     SizePhiH[k].Sort(); }
00399   SweepsV.Sort();
00400   BestCutH.SortByKey();
00401   printf("DONE. Graph(%d, %d): Total run time: %s\n\n", Nodes, Edges, TotTm.GetStr());
00402 }
00403 
00404 void TLocClustStat::AddBagOfWhiskers() {
00405   BagOfWhiskers(Graph, BagOfWhiskerV, BestWhisk); // slow but exact
00406   //BagOfWhiskers2(Graph, BagOfWhiskerV);
00407 }
00408 
00409 // add a cut on NIdV nodes
00410 void TLocClustStat::AddCut(const TIntV& NIdV) {
00411   int Vol, Cut; double Phi;
00412   TLocClust::GetCutStat(Graph, NIdV, Vol, Cut, Phi, Graph->GetEdges());
00413   SizePhiH.AddDat(NIdV.Len()).Add(Phi);
00414 }
00415 
00416 // add a cut of particular conductance
00417 void TLocClustStat::AddCut(const int& ClustSz, const double& Phi) {
00418   SizePhiH.AddDat(ClustSz).Add(Phi);
00419 }
00420 
00421 // add the cut on particular set of nodes (calculate conductance and modularity)
00422 void TLocClustStat::AddToBestCutH(const PUNGraph& Graph, const TIntV& NIdV, const bool& AddBestCond) {
00423   const int K = NIdV.Len();
00424   TCutInfo CutInfo(Graph, NIdV, true);
00425   printf("new: %d\t%d\t%d\t%f\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), 
00426     CutInfo.GetCutSz(), CutInfo.GetPhi(), CutInfo.GetModular(Graph->GetEdges()));
00427   if (! BestCutH.IsKey(K)) { BestCutH.AddDat(K, CutInfo);  return; }
00428   TCutInfo& CurCut = BestCutH.GetDat(K);
00429   // keep the cluster with best conductance
00430   if (AddBestCond && CurCut.GetPhi() > CutInfo.GetPhi()) { CurCut=CutInfo; }
00431   // keep the cluster with best modularity
00432   //const int E = Graph->GetEdges();  
00433   //if (! AddBestCond && CurCut.GetModular(E) < CutInfo.GetModular(E)) { CurCut=CutInfo; }
00434 }
00435 
00436 const TLocClustStat::TCutInfo& TLocClustStat::FindBestCut(const int& Nodes) const {
00437   double bestPhi = 1;
00438   int CutId = -1;
00439   if (Nodes > 0) {
00440     IAssert(BestCutH.IsKey(Nodes));
00441     return BestCutH.GetDat(Nodes);
00442   } else {
00443     for (int n = 0; n < BestCutH.Len(); n++) {
00444       if (BestCutH[n].GetPhi() <= bestPhi) {
00445         bestPhi = BestCutH[n].GetPhi();  CutId = n; }
00446     }
00447     IAssert(CutId != -1);
00448     IAssert(! BestCutH[CutId].CutNIdV.Empty());
00449     return BestCutH[CutId];
00450   }
00451 }
00452 
00453 // find the cut with the lowest Phi of particular size (if Nodes=-1 find absolute best cut)
00454 double TLocClustStat::FindBestCut(const int& Nodes, TIntV& ClustNIdV) const {
00455   const TCutInfo& Cut = FindBestCut(Nodes);
00456   ClustNIdV = Cut.CutNIdV;
00457   return Cut.GetPhi();
00458 }
00459 
00460 int TLocClustStat::FindBestCut(const int& Nodes, int& Vol, int& Cut, double& Phi) const {
00461   const TCutInfo& CutInfo = FindBestCut(Nodes);
00462   Vol = CutInfo.GetVol();
00463   Cut = CutInfo.GetCutSz();
00464   Phi = CutInfo.GetPhi();
00465   return CutInfo.GetNodes();
00466 }
00467 
00468 // find best phi where the cut has N nodes, and the nodes in the cluster are not in the TabuSet
00469 double TLocClustStat::FindBestCut(const int& Nodes, const TIntSet& TabuNIdSet, int& BestCutId) const {
00470   double bestPhi = 1;
00471   BestCutId = -1;
00472   bool Tabu;
00473   IAssert(! SweepsV.Empty());
00474   for (int c = 0; c < SweepsV.Len(); c++) {
00475     const TNodeSweep& Sweep = SweepsV[c];
00476     if (Sweep.Len() < Nodes) { continue; }
00477     if (Sweep.Phi(Nodes-1) > bestPhi) { continue; }
00478     Tabu = false;
00479     for (int i = 0; i < Nodes; i++) {
00480       if (TabuNIdSet.IsKey(Sweep.NId(i))) { Tabu=true; break; }
00481     }
00482     if (Tabu) { continue; }
00483     bestPhi = Sweep.Phi(Nodes-1);
00484     BestCutId = c;
00485   }
00486   return bestPhi;
00487 }
00488 
00489 // get statistics over all conductances at all cluster sizes
00490 void TLocClustStat::GetCurveStat(TFltPrV& MeanV, TFltPrV& MedV, TFltPrV& Dec1V, TFltPrV& MinV, TFltPrV& MaxV) const {
00491   TFltPrV BucketV;
00492   MeanV.Clr(false); MedV.Clr(false); Dec1V.Clr(false); MinV.Clr(false); MaxV.Clr(false);
00493   if (! SizePhiH.Empty()) { // stores conductances of all little clusters
00494     const THash<TInt, TFltV>& KvH = SizePhiH; 
00495     for (int i = 0; i < KvH.Len(); i++) {
00496       const double X = KvH.GetKey(i).Val;  IAssert(X >= 1.0);
00497       const TFltV& YVec = KvH[i];
00498       TMom Mom;
00499       for (int j = 0; j < YVec.Len(); j++) { Mom.Add(YVec[j]); }
00500       Mom.Def();
00501       /*IAssert(Mom.GetMean()>=0 && Mom.GetMean()<=1);
00502       IAssert(Mom.GetMedian()>=0 && Mom.GetMedian()<=1);
00503       IAssert(Mom.GetDecile(1)>=0 && Mom.GetDecile(1)<=1);
00504       IAssert(Mom.GetMn()>=0 && Mom.GetMn()<=1);
00505       IAssert(Mom.GetMx()>=0 && Mom.GetMx()<=1);*/
00506       MeanV.Add(TFltPr(X, Mom.GetMean()));
00507       MedV.Add(TFltPr(X, Mom.GetMedian()));
00508       Dec1V.Add(TFltPr(X, Mom.GetDecile(1)));
00509       MinV.Add(TFltPr(X, Mom.GetMn()));
00510       MaxV.Add(TFltPr(X, Mom.GetMx()));
00511     }
00512     MeanV.Sort(); MedV.Sort();  Dec1V.Sort();  MinV.Sort();  MaxV.Sort();
00513     // create log buckets (makes plots smaller and smoother)
00514     TLocClustStat::MakeExpBins(MeanV, BucketV);  MeanV.Swap(BucketV);
00515     TLocClustStat::MakeExpBins(MedV, BucketV);  MedV.Swap(BucketV);
00516     TLocClustStat::MakeExpBins(Dec1V, BucketV);  Dec1V.Swap(BucketV);
00517     TLocClustStat::MakeExpBins(MinV, BucketV);  MinV.Swap(BucketV);
00518     TLocClustStat::MakeExpBins(MaxV, BucketV);  MaxV.Swap(BucketV);
00519   } else {
00520     //const int GEdges = Graph->GetEdges();
00521     for (int i = 0; i < BestCutH.Len(); i++) {
00522       MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetPhi()));
00523     }
00524     MinV.Sort();
00525     TLocClustStat::MakeExpBins(MinV, BucketV);  MinV.Swap(BucketV);
00526   }
00527 }
00528 
00529 void TLocClustStat::GetBoltzmanCurveStat(const TFltV& TempV, TVec<TFltPrV>& NcpV) const {
00530   IAssert(! SizePhiH.Empty()); // stores conductances of all little clusters
00531   NcpV.Gen(TempV.Len());
00532   TFltPrV BucketV;
00533   for (int t = 0; t < TempV.Len(); t++) {
00534     const double T = TempV[t];
00535     for (int i = 0; i < SizePhiH.Len(); i++) {
00536       const double X = SizePhiH.GetKey(i).Val;  IAssert(X >= 1.0);
00537       const TFltV& PhiV = SizePhiH[i];
00538       double V = 0.0, SumW = 0.0;
00539       for (int j = 0; j < PhiV.Len(); j++) { 
00540         V += PhiV[j] * exp(-PhiV[j]/T); 
00541         SumW += exp(-PhiV[j]/T); 
00542       }
00543       //V /= PhiV.Len();
00544       V /= SumW;
00545       NcpV[t].Add(TFltPr(X, V));
00546     }
00547     TLocClustStat::MakeExpBins(NcpV[t], BucketV);  NcpV[t].Swap(BucketV);
00548     // normalize to start at (1,1)
00549     //for (int i = 0; i < NcpV[t].Len(); i++) {
00550     //  NcpV[t][i].Val2 /= NcpV[t][0].Val2;
00551     //}
00552   }
00553 }
00554 
00555 TStr TLocClustStat::ParamStr() const {
00556   if (Graph.Empty()) {
00557     return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac()); }
00558   else {
00559     return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g G(%d, %d)", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac(),
00560       Graph->GetNodes(), Graph->GetEdges());
00561   }
00562 }
00563 
00564 void TLocClustStat::PlotNCP(const TStr& OutFNm, TStr Desc) const {
00565   if (Desc.Empty()) { Desc = OutFNm; }
00566   TFltPrV MeanV, MedV, Dec1V, MinV, MaxV;
00567   GetCurveStat(MeanV, MedV, Dec1V, MinV, MaxV);
00568   TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00569   if (! MaxV.Empty()) { GP.AddPlot((MaxV), gpwLines, "MAX"); }
00570   if (! MedV.Empty()) { GP.AddPlot((MedV), gpwLines, "MEDIAN"); }
00571   if (! MeanV.Empty()) { GP.AddPlot((MeanV), gpwLines, "MEAN"); }
00572   if (! Dec1V.Empty()) { GP.AddPlot((Dec1V), gpwLines, "1-st DECILE"); }
00573   if (! MinV.Empty()) { 
00574     GP.AddPlot((MinV), gpwLines, "MIN"); 
00575     //GP.AddPlot(MinV, gpwLines, "smooth MIN", "lw 2 smooth bezier");
00576   }
00577   if (! BagOfWhiskerV.Empty()) {
00578     GP.AddPlot(BagOfWhiskerV, gpwLines, "Whiskers", "lw 1"); 
00579     TFltPrV BestWhiskV;  BestWhiskV.Add(TFltPr(BestWhisk));
00580     GP.AddPlot(BestWhiskV, gpwPoints, "Best whisker", "pt 5 ps 2");
00581   }
00582   GP.SetScale(gpsLog10XY);
00583   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00584   GP.SetXRange(1, Graph->GetNodes()/2);
00585   GP.SavePng();
00586 }
00587 
00588 // NCP but with modularity on Y-axis
00589 void TLocClustStat::PlotNCPModul(const TStr& OutFNm, TStr Desc) const {
00590   if (Desc.Empty()) { Desc = OutFNm; }
00591   TFltPrV MinV, BucketV;
00592   const int GEdges = Graph->GetEdges();
00593   for (int i = 0; i < BestCutH.Len(); i++) {
00594     MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetModular(GEdges))); }
00595   MinV.Sort();
00596   TLocClustStat::MakeExpBins(MinV, BucketV);  MinV.Swap(BucketV);
00597   TGnuPlot GP("ncpMod."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00598   if (! MinV.Empty()) { 
00599     GP.AddPlot((MinV), gpwLines, "MIN"); }
00600   GP.SetScale(gpsLog10XY);
00601   GP.SetXYLabel("k (number of nodes in the cluster)", "Q (modularity)");
00602   GP.SetXRange(1, Graph->GetNodes()/2);
00603   GP.SavePng();
00604 }
00605 
00606 // plot top 10 surface/volume curves (disjont clusters of particular size)
00607 void TLocClustStat::PlotNcpTop10(const TStr& OutFNm, TStr Desc, const int& TakeMinN) const {
00608   if (Desc.Empty()) { Desc = OutFNm; }
00609   const double BinFactor = 1.05;
00610   double BinPos=1;
00611   int PrevBPos=1, CurBPos=1, CutId;
00612   bool AddSome;
00613   TVec<TFltPrV> Curves(TakeMinN);
00614   while (true) {
00615     PrevBPos = CurBPos;
00616     BinPos *= BinFactor; // do exponential binning
00617     CurBPos = (int) floor(BinPos);
00618     if (CurBPos == PrevBPos) { CurBPos=PrevBPos+1;  BinPos=CurBPos; }
00619     const int Nodes = CurBPos;
00620     TIntSet TabuNIdSet(Graph->GetNodes());
00621     AddSome = false;
00622     for (int t = 0; t < TakeMinN; t++) {
00623       const double Phi = FindBestCut(Nodes, TabuNIdSet, CutId);
00624       if (CutId == -1) { break; }
00625       Curves[t].Add(TFltPr(Nodes, Phi));
00626       for (int n = 0; n < Nodes; n++) {
00627         TabuNIdSet.AddKey(SweepsV[CutId].NId(n)); }
00628       AddSome = true;
00629     }
00630     if (! AddSome) { break; }
00631   }
00632   TGnuPlot GP("ncpTop."+OutFNm, TStr::Fmt("%s. Top disjoint clusters. Take:%d, %s", Desc.CStr(), TakeMinN, ParamStr().CStr()));
00633   for (int i = 0; i < Curves.Len(); i++) {
00634     GP.AddPlot(Curves[i], gpwLines, TStr::Fmt("MIN %d", i+1), "lw 1"); }
00635   //if (! BagOfWhiskerV.Empty()) { GP.AddPlot(BagOfWhiskerV, gpwLines, " Whiskers", "lw 2"); }
00636   GP.SetScale(gpsLog10XY);
00637   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00638   GP.SetXRange(1, Graph->GetNodes()/2);
00639   GP.SavePng();
00640 }
00641 
00642 // plot conductance on the boundary / counductance inside vs. cluster size
00643 void TLocClustStat::PlotPhiInOut(const TStr& OutFNm, TStr Desc) const {
00644   IAssert(! BestCutH.Empty() && ! Graph.Empty());
00645   TFltPrV PhiInV, PhiBoundV, PhiRatV;
00646   FILE *F = fopen(TStr::Fmt("phiInOut.%s-all.tab", OutFNm.CStr()).CStr(), "wt");
00647   fprintf(F, "#Nodes\tEdges\tVol\tCut\tPhi\tInNodes\tInEdges\tInVol\tInCut\tInPhi\n");
00648   TLocClustStat ClustStat2(Alpha, 1, KMax, KFac, Coverage, SizeFrac);
00649   const double IdxFact = 1.05;
00650   int curIdx=1, prevIdx=1;
00651   while (curIdx <= BestCutH.Len()) {
00652     const TCutInfo& CutInfo = BestCutH[curIdx-1];
00653     if (CutInfo.GetNodes() > 1) {
00654       PUNGraph ClustG = TSnap::GetSubGraph(Graph, CutInfo.CutNIdV);
00655       ClustStat2.Run(ClustG);
00656       const TCutInfo& InCut = ClustStat2.FindBestCut(-1);
00657       PhiInV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()));
00658       PhiBoundV.Add(TFltPr(CutInfo.GetNodes(), CutInfo.GetPhi()));
00659       PhiRatV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()/CutInfo.GetPhi()));
00660       fprintf(F, "%d\t%d\t%d\t%d\t%f\t%d\t%d\t%d\t%d\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), CutInfo.GetVol(),
00661         CutInfo.GetCutSz(), CutInfo.GetPhi(),  InCut.GetNodes(), InCut.GetEdges(), InCut.GetVol(), InCut.GetCutSz(), InCut.GetPhi());
00662       fflush(F);
00663     }
00664     prevIdx = curIdx;
00665     curIdx = (int) TMath::Round(double(curIdx)*IdxFact);
00666     if (prevIdx == curIdx) { curIdx++; }
00667   }
00668   fclose(F);
00669   { TGnuPlot GP("phiInOut."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00670   GP.AddPlot(PhiBoundV, gpwLines, "CUT conductance", "lw 1");
00671   GP.AddPlot(PhiInV, gpwLines, "INSIDE conductance", "lw 1");
00672   //GP.AddPlot(PhiRatV, gpwLines, "RATIO (inside/boundary)", "lw 1");
00673   GP.SetXRange(1, Graph->GetNodes()/2);  GP.SetScale(gpsLog10XY);
00674   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00675   //GP.AddCmd("set logscale xyy2 10");
00676   //GP.AddCmd("set y2label \"Conductance ratio (inside/boundary -- higher better)\"");
00677   GP.SavePng(); }
00678   //system(TStr(TStr("replace_all.py phiInOut.")+OutFNm+".plt \"title \\\"RATIO (inside/boundary)\" \"axis x1y2 title \\\"RATIO (inside/boundary)\"").CStr());
00679   //GP.RunGnuPlot();
00680   { TGnuPlot GP("phiInOutRat."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00681   GP.AddPlot(PhiRatV, gpwLines, "RATIO (Inside / Boundary)", "lw 1");
00682   GP.SetXRange(1, Graph->GetNodes()/2);  GP.SetScale(gpsLog10XY);
00683   GP.SetXYLabel("Nodes", "Conductance ratio (inside/boundary) -- higher better");
00684   GP.SavePng(); }
00685 }
00686 
00687 // Plot edges inside, edges cut and conductance.
00688 // run: replace_all.py cutEdges.*.plt "title \"Conductance" "axis x1y2 title \"Conductance"
00689 void TLocClustStat::PlotBestClustDens(TStr OutFNm, TStr Desc) const {
00690   if (Desc.Empty()) { Desc = OutFNm; }
00691   const int len = BestCutH.Len();
00692   TFltPrV CutV(len, 0), EdgesV(len, 0), PhiV(len,0);
00693   for (int i = 0; i < BestCutH.Len(); i++) {
00694     const double size = BestCutH.GetKey(i).Val;
00695     CutV.Add(TFltPr(size, BestCutH[i].GetCutSz()));
00696     EdgesV.Add(TFltPr(size, BestCutH[i].GetEdges()));
00697     PhiV.Add(TFltPr(size, BestCutH[i].GetPhi()));
00698   }
00699   TGnuPlot GP("cutEdges."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00700   TFltPrV NewV;  TLocClustStat::MakeExpBins(EdgesV, NewV);
00701   GP.AddPlot(NewV, gpwLines, "Edges inside");
00702   TLocClustStat::MakeExpBins(CutV, NewV);
00703   GP.AddPlot(NewV, gpwLines, "Cut edges");
00704   TLocClustStat::MakeExpBins(PhiV, NewV);
00705   GP.AddPlot(NewV, gpwLines, "Conductance");
00706   GP.SetXYLabel("Cluster size", "Edges"); GP.SetScale(gpsLog10XY);
00707   GP.AddCmd("set logscale xyy2 10");
00708   GP.AddCmd("set y2label \"Conductance\"");
00709   GP.SavePng();
00710   system(TStr(TStr("replace_all.py cutEdges.")+OutFNm+".plt \"title \\\"Conductance\" \"axis x1y2 title \\\"Conductance\"").CStr());
00711   GP.RunGnuPlot();
00712 }
00713 
00714 // all different conducances of all sizes (not just lower envelope)
00715 void TLocClustStat::PlotNCPScatter(const TStr& OutFNm, TStr Desc) const {
00716   if (Desc.Empty()) { Desc = OutFNm; }
00717   THashSet<TFltPr> PhiSzH;
00718   IAssertR(! SizePhiH.Empty(), "Set SaveAllCond=true in TLocClustStat::Run()");
00719   for (int k = 0; k < SizePhiH.Len(); k++) {
00720     const int K = SizePhiH.GetKey(k);
00721     const TFltV& PhiV = SizePhiH[k];
00722     for (int p = 0; p < PhiV.Len(); p++) {
00723       PhiSzH.AddKey(TFltPr(K, TMath::Round(PhiV[p], 3))); }
00724   }
00725   TFltPrV PhiSzV;  PhiSzH.GetKeyV(PhiSzV);
00726   TGnuPlot GP("ncpScatter."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00727   GP.AddPlot(PhiSzV, gpwPoints, "", "pt 5 ps 0.2");
00728   GP.SetScale(gpsLog10XY);
00729   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00730   GP.SetXRange(1, Graph->GetNodes()/2);
00731   GP.SavePng();
00732 }
00733 
00734 // histogram of conductances for a fixed CmtySz
00735 void TLocClustStat::PlotPhiDistr(const int& CmtySz, const TStr& OutFNm, TStr Desc) const {
00736   IAssert(! SizePhiH.Empty());
00737   const TFltV& PhiV = SizePhiH.GetDat(CmtySz);
00738   THash<TFlt, TInt> PhiCntH;
00739   for (int i = 0; i < PhiV.Len(); i++) {
00740     const double Buck =  TMath::Round(PhiV[i], 3);
00741     PhiCntH.AddDat(Buck) += 1;
00742   }
00743   TGnuPlot::PlotValCntH(PhiCntH, TStr::Fmt("phiDistr%03d.%s", CmtySz, OutFNm.CStr()), 
00744     TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()), "{/Symbol \\F} (conductance)",  "Count");
00745 }
00746 
00747 void TLocClustStat::PlotBoltzmanCurve(const TStr& OutFNm, TStr Desc) const {
00748   TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
00749   GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
00750   TVec<TFltPrV> NcpV;
00751   //const TFltV TempV = TFltV::GetV(0.05, 0.01, 0.1, 10, 100, 1000);
00752   const TFltV TempV = TFltV::GetV(0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.5, 1);
00753   GetBoltzmanCurveStat(TempV, NcpV);
00754   TGnuPlot GP("ncp."+OutFNm+"-B", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));  
00755   GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1");
00756   GP.AddPlot(MeanV1, gpwLines, "Avg", "lw 1");
00757   GP.AddPlot(MedV1, gpwLines, "Median", "lw 1");
00758   GP.AddPlot(Dec1V1, gpwLines, "Decile-1", "lw 1");
00759   for (int t = 0; t < TempV.Len(); t++) {
00760     GP.AddPlot(NcpV[t], gpwLines, TStr::Fmt("Temp %g", TempV[t]()), "lw 1");
00761   }
00762   GP.SetScale(gpsLog10XY);
00763   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00764   GP.SavePng();
00765   //
00766   TFltPrV SzNClustV;
00767   int kCnt=1;
00768   for (int i = 0; i < SizePhiH.Len(); i++) {
00769     const int K = SizePhiH.GetKey(i);
00770     const TFltV& PhiV = SizePhiH[i];
00771     SzNClustV.Add(TFltPr(K, PhiV.Len()));
00772     if (K>2 && (pow(10.0, (int)log10((double)K))==K || (K >=10 && K<=100 && K%10==0) || (K >=100 && K<=1000 && K%100==0))) {
00773       THash<TFlt, TFlt> CntH;
00774       for (int p = 0; p < PhiV.Len(); p++) {
00775         CntH.AddDat(TMath::Round(log10(PhiV[p].Val),1)) += 1;
00776       }
00777       TGnuPlot::PlotValCntH(CntH, TStr::Fmt("ncp.%s-c%02d", OutFNm.CStr(), kCnt++), TStr::Fmt("%s. K=%d, NPieces=%d, %s", 
00778         Desc.CStr(), K, PhiV.Len(), ParamStr().CStr()), "log_10 {/Symbol \\F} (conductance)", 
00779         TStr::Fmt("Number of pieces of such conductance, K=%d, NPieces=%d)", K, PhiV.Len()));
00780     }
00781   }
00782   TGnuPlot::PlotValV(SzNClustV, "ncp."+OutFNm+"-ClSz", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()),
00783     "k (cluster size)", "c(k) (number of extracted clusters)", gpsLog);
00784 }
00785 
00786 void TLocClustStat::ImposeNCP(const TLocClustStat& LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const {
00787   if (Desc.Empty()) { Desc = OutFNm; }
00788   TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
00789   TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
00790   GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
00791   LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
00792   // plot
00793   TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00794   if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc1.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1"); }
00795   if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc2.CStr(), LcStat2.Graph->GetNodes(), LcStat2.Graph->GetEdges()), "lw 1"); }
00796   if (! BagOfWhiskerV.Empty()) { 
00797     GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1"); 
00798     TFltPrV BestWhiskV;  BestWhiskV.Add(BestWhisk);
00799     GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
00800   if (! LcStat2.BagOfWhiskerV.Empty()) {
00801     GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1"); 
00802     TFltPrV BestWhiskV;  BestWhiskV.Add(LcStat2.BestWhisk);
00803     GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
00804   GP.SetScale(gpsLog10XY);
00805   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00806   //GP.SetXRange(1, Graph->GetNodes()/2);
00807   GP.SavePng();
00808 }
00809 
00810 void TLocClustStat::ImposeNCP(const TLocClustStat& LcStat2, const TLocClustStat& LcStat3, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2, TStr Desc3) const {
00811   if (Desc.Empty()) { Desc = OutFNm; }
00812   TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
00813   TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
00814   TFltPrV MeanV3, MedV3, Dec1V3, MinV3, MaxV3;
00815   GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
00816   LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
00817   LcStat3.GetCurveStat(MeanV3, MedV3, Dec1V3, MinV3, MaxV3);
00818   // plot
00819   TGnuPlot GP("phiTR."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
00820   if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, Desc1+" MIN", "lw 1"); }
00821   if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, Desc2+" MIN", "lw 1"); }
00822   if (! MinV3.Empty()) { GP.AddPlot(MinV3, gpwLines, Desc3+" MIN", "lw 1"); }
00823   if (! BagOfWhiskerV.Empty()) { 
00824     GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1"); 
00825     TFltPrV BestWhiskV;  BestWhiskV.Add(BestWhisk);
00826     GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
00827   if (! LcStat2.BagOfWhiskerV.Empty()) {
00828     GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1"); 
00829     TFltPrV BestWhiskV;  BestWhiskV.Add(LcStat2.BestWhisk);
00830     GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
00831   if (! LcStat3.BagOfWhiskerV.Empty()) { 
00832     GP.AddPlot(LcStat3.BagOfWhiskerV, gpwLines, Desc3+" whiskers", "lw 1"); 
00833     TFltPrV BestWhiskV;  BestWhiskV.Add(LcStat3.BestWhisk);
00834     GP.AddPlot(BestWhiskV, gpwPoints, Desc3+" Best whisker", "pt 5 ps 2"); }
00835   GP.SetScale(gpsLog10XY);
00836   GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
00837   GP.SetXRange(1, Graph->GetNodes()/2);
00838   GP.SavePng();
00839 }
00840 
00841 void TLocClustStat::SaveTxtInfo(const TStr& OutFNmPref, const TStr& Desc, const bool& SetMaxAt1) const {
00842   printf("Save text info...");
00843   TExeTm ExeTm;
00844   const int GNodes = Graph->GetNodes();
00845   const int GEdges = Graph->GetEdges();
00846   TVec<TFltV> ColV(17);
00847   double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
00848   // double PrevBPos = 1, BPos = 1;
00849   for (int i = 0; i < SizeBucketSet.Len(); i++) {
00850     if ( !BestCutH.IsKey(SizeBucketSet[i])) { continue; }
00851     const TLocClustStat::TCutInfo& C = GetKNodeCut(SizeBucketSet[i]);
00852     C.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
00853     ColV[0].Add(C.Nodes());  ColV[1].Add(C.Edges()); 
00854     ColV[2].Add(C.CutSz());  ColV[3].Add(C.GetPhi());
00855     ColV[4].Add(C.GetExpansion());       ColV[5].Add(C.GetIntDens()); 
00856     ColV[6].Add(C.GetCutRatio(GNodes));  ColV[7].Add(C.GetNormCut(GEdges));
00857     ColV[8].Add(MxFrac);       ColV[9].Add(AvgFrac); 
00858     ColV[10].Add(MedianFrac);  ColV[11].Add(Pct9Frac);  ColV[12].Add(Flake);
00859     ColV[13].Add(double(2.0*C.Edges));  ColV[14].Add(C.GetExpEdgesIn(GEdges));
00860     ColV[15].Add(C.GetModular(GEdges)); ColV[16].Add(C.GetModRat(GEdges));
00861     printf(".");
00862   }
00863   // normalize so that maximum value is at 1
00864   if (SetMaxAt1) {
00865     for (int c = 1; c < ColV.Len(); c++) {
00866       double MaxVal=1e-10;
00867       for (int r = 0; r < ColV[c].Len(); r++) { MaxVal = TMath::Mx(MaxVal, ColV[c][r]()); }
00868       for (int r = 0; r < ColV[c].Len(); r++) { ColV[c][r] /= MaxVal; }
00869     }
00870   }
00871   // save
00872   const TStr DatFNm = TStr::Fmt("ncp.%s.INFO.tab", OutFNmPref.CStr());
00873   FILE *F = fopen(DatFNm.CStr(), "wt");
00874   fprintf(F, "# %s %s\n", Desc.CStr(), ParamStr().CStr());
00875   fprintf(F, "#N_inside\tE_inside\tE_across\tConductance\tExpansion\tIntDensity\tCutRatio\tNormCut\tMx_FracDegOut\tAvg_FDO\tMedian_FDO\t90Pct_FDO\tFlake_FDO\tVolume\tExpVolume\tModularity\tModRatio\n");
00876   for (int r = 0; r < ColV[0].Len(); r++) {
00877     fprintf(F, "%g", ColV[0][r]());
00878     for (int c = 1; c < ColV.Len(); c++) {
00879       fprintf(F, "\t%g", ColV[c][r]()); }
00880     fprintf(F, "\n");
00881   }
00882   fclose(F);
00883   printf("[%s]\n", ExeTm.GetStr());
00884   TGnuPlot GP(TStr::Fmt("ncp.%s.All", OutFNmPref.CStr()), TStr::Fmt("%s %s", 
00885     Desc.CStr(), ParamStr().CStr()));
00886   GP.AddPlot(DatFNm, 1, 4, gpwLines, "Conductance", "lw 2");
00887   GP.AddPlot(DatFNm, 1, 5, gpwPoints, "Expansion", "pt 3");
00888   GP.AddPlot(DatFNm, 1, 6, gpwPoints, "Internal Density", "pt 5 ps 0.8");
00889   GP.AddPlot(DatFNm, 1, 7, gpwPoints, "Cut Ratio", "pt 6");
00890   GP.AddPlot(DatFNm, 1, 8, gpwPoints, "Normalized Cut", "pt 7");
00891   GP.AddPlot(DatFNm, 1, 9, gpwPoints, "Maximum FDO", "pt 9");
00892   GP.AddPlot(DatFNm, 1, 10, gpwPoints, "Avg FDO", "pt 11");
00893   GP.AddPlot(DatFNm, 1, 13, gpwPoints, "Flake FDO", "pt 13");
00894   GP.SetScale(gpsLog10XY);
00895   GP.SetXYLabel("k (number of nodes in the cluster)", "Normalized community score (lower is better)");
00896   GP.SavePng();
00897 }
00898 
00899 // conductances if clusters are composed of disjoint pieces that can be separated
00900 // from the graph by a single edge
00901 void TLocClustStat::BagOfWhiskers(const PUNGraph& Graph, TFltPrV& SizePhiV, TFltPr& MaxWhisk) {
00902   TCnComV Cn1ComV;
00903   TSnap::Get1CnCom(Graph, Cn1ComV);
00904   TIntPrV SzVolV;
00905   int MxSize=0;
00906   if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr();  return; }
00907   //  Graph->SaveEdgeList("g-vol.txt");  TGraphViz::Plot(Graph, gvlNeato, "g-vol.gif");  Fail; } IAssert(vol <= sz*(sz-1));
00908   printf("  1-connected components: %d\n", Cn1ComV.Len());
00909   MaxWhisk = TFltPr(1,1);
00910   for (int c = 0; c < Cn1ComV.Len(); c++) {
00911     const TIntV& NIdV = Cn1ComV[c].NIdV;
00912     const int sz = NIdV.Len();
00913     if (sz < 2) { continue; }
00914     int vol = 0; // volume is the size of degrees
00915     for (int n = 0; n < sz; n++) {
00916       vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
00917     SzVolV.Add(TIntPr(sz, vol));
00918     MxSize += sz;
00919     if (1.0/double(vol) < MaxWhisk.Val2) { MaxWhisk=TFltPr(NIdV.Len(), 1.0/double(vol)); }
00920   }
00921   SzVolV.Sort(false);
00922   // compose clusters
00923   THash<TInt, TIntSet> ItemSetH(MxSize, true);
00924   THash<TInt, TInt> VolH(MxSize, true);
00925   THash<TInt, TFlt> CostH(MxSize, true);
00926   ItemSetH.AddKey(0);  VolH.AddKey(0);
00927   TExeTm ExeTm;
00928   // exact up to size 1000
00929   for (int size = 2; size <= TMath::Mn(MxSize, 1000); size++) {
00930     for (int item = 0; item <SzVolV.Len(); item++) {
00931       const int smallSz = size-SzVolV[item].Val1;
00932       if (ItemSetH.IsKey(smallSz)) {
00933         const TIntSet& SmallSet = ItemSetH.GetDat(smallSz);
00934         if (SmallSet.IsKey(item)) { continue; }
00935         const int SmallVol = VolH.GetDat(smallSz);
00936         // combine smaller nad new solution
00937         const double curCost = CostH.IsKey(size) ? double(CostH.GetDat(size)) : double(10e10);
00938         const double newCost = double(SmallSet.Len()+1) / double(SmallVol+SzVolV[item].Val2);
00939         if (curCost < newCost) { continue; }
00940         VolH.AddDat(size, SmallVol+SzVolV[item].Val2);
00941         ItemSetH.AddDat(size, SmallSet);
00942         ItemSetH.GetDat(size).AddKey(item);
00943         CostH.AddDat(size, newCost);
00944       }
00945     }
00946     if (VolH.IsKey(size) && size%100==0) {
00947       printf("\rSize: %d/%d: vol: %d,  items: %d/%d [%s]", size, MxSize, VolH.GetDat(size).Val,
00948         ItemSetH.GetDat(size).Len(), SzVolV.Len(), ExeTm.GetStr());
00949     }
00950   }
00951   // add sizes larger than 1000
00952   printf("\nAdding sizes > 1000 nodes...");
00953   int partSz=0; double partVol=0.0;
00954   for (int i = 0; i < SzVolV.Len(); i++) {
00955     partSz += SzVolV[i].Val1();
00956     partVol += SzVolV[i].Val2();
00957     if (partSz < 1000) { continue; }
00958     const double curCost = CostH.IsKey(partSz) ? double(CostH.GetDat(partSz)) : double(10e10);
00959     const double partPhi = double(i+1)/partVol;
00960     if (partPhi < curCost) {
00961       CostH.AddDat(partSz, partPhi);
00962     }
00963   }
00964   VolH.SortByKey();
00965   CostH.SortByKey();
00966   SizePhiV.Gen(CostH.Len(), 0);
00967   SizePhiV.Add(TFltPr(1, 1));
00968   for (int s = 0; s < CostH.Len(); s++) {
00969     const int size = CostH.GetKey(s);
00970     const double cond = CostH[s]; //double(ItemSetH.GetDat(size).Len())/double(VolH[s]);
00971     SizePhiV.Add(TFltPr(size, cond));
00972   }
00973   printf("done\n");
00974 }
00975 
00976 // faster greedy version
00977 void TLocClustStat::BagOfWhiskers2(const PUNGraph& Graph, TFltPrV& SizePhiV) {
00978   TCnComV Cn1ComV;
00979   TSnap::Get1CnCom(Graph, Cn1ComV);
00980   TIntPrV SzVolV;
00981   int MxSize=0, TotVol=0;
00982   if (Cn1ComV.Empty()) { printf("** No bridges\n");  SizePhiV.Clr();  return; }
00983   printf("  1-connected components: %d\n", Cn1ComV.Len());
00984   for (int c = 0; c < Cn1ComV.Len(); c++) {
00985     const TIntV& NIdV = Cn1ComV[c].NIdV;
00986     const int sz = NIdV.Len();
00987     if (sz < 2) { continue; }
00988     int vol = 0; // volume is the size of degrees
00989     for (int n = 0; n < sz; n++) {
00990       vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
00991     SzVolV.Add(TIntPr(sz, vol));
00992     MxSize += sz;  TotVol += vol;
00993   }
00994   printf("  Total size: %d\t Total vol: %d\n", MxSize, TotVol);
00995   SzVolV.Sort(false);
00996   // compose clusters
00997   THash<TFlt, TFlt> SizePhiH(MxSize, true);
00998   for (int i = 0; i < SzVolV.Len(); i++) {
00999     const int Sz = SzVolV[i].Val1();
01000     const double Phi = 1.0/double(SzVolV[i].Val2());
01001     if ((! SizePhiH.IsKey(Sz)) || SizePhiH.GetDat(Sz) > Phi) {
01002       SizePhiH.AddDat(Sz, Phi);  }
01003   }
01004   double partSz=0.0, partVol=0.0;
01005   for (int i = 0; i < SzVolV.Len(); i++) {
01006     partSz += SzVolV[i].Val1();
01007     partVol += SzVolV[i].Val2();
01008     const double partPhi = double(i+1)/partVol;
01009     if ((! SizePhiH.IsKey(partSz)) || partPhi < SizePhiH.GetDat(partSz)) {
01010       SizePhiH.AddDat(partSz, partPhi); }
01011   }
01012   SizePhiV.Gen(SizePhiH.Len()+1, 0);
01013   SizePhiV.Add(TFltPr(1, 1));
01014   SizePhiH.SortByKey();
01015   for (int s = 0; s < SizePhiH.Len(); s++) {
01016     SizePhiV.Add(TFltPr(SizePhiH.GetKey(s), SizePhiH[s]));
01017   }
01018 }
01019 
01020 void TLocClustStat::MakeExpBins(const TFltPrV& ValV, TFltPrV& NewV) {
01021   if (ValV.Empty()) { NewV.Clr(false); return; }
01022   NewV.Gen(1000, 0);
01023   double PrevBPos = 1, BPos = 1;
01024   int i = 0;
01025   while (BPos <= ValV.Last().Val1) {
01026     //if (TakeValAt == 1) {
01027     //  while (i < ValV.Len() && ValV[i].Val1 <= BPos) { i++; }
01028     //  NewV.Add(ValV[i-1]); }
01029     //else if (TakeValAt == 2) {
01030     int MinI=-1;  double MinCnt=TFlt::Mx;
01031     while (i < ValV.Len() && ValV[i].Val1 <= BPos) {
01032       if (ValV[i].Val2 < MinCnt) { MinCnt=ValV[i].Val2; MinI=i; } i++; }
01033     if (MinI>=0 && MinI<ValV.Len()) {
01034       NewV.Add(ValV[MinI]); }
01035     PrevBPos = (uint) floor(BPos);
01036     BPos *= BinFactor;
01037     if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
01038   }
01039   NewV.Add(ValV.Last());
01040 }
01041 
01042 void TLocClustStat::MakeExpBins(const TFltV& ValV, TFltV& NewV) {
01043   if (ValV.Empty()) { NewV.Clr(false); return; }
01044   NewV.Gen(1000, 0);
01045   double PrevBPos = 1, BPos = 1;
01046   int i = 1;
01047   NewV.Add(ValV[0]);
01048   while (BPos <= ValV.Len()) {
01049     int MinI=-1;  double MinCnt=TFlt::Mx;
01050     while (i < ValV.Len() && i <= BPos) {
01051       if (ValV[i] < MinCnt) { MinCnt=ValV[i]; MinI=i; } i++; }
01052     if (MinI>=0 && MinI<ValV.Len()) {
01053       NewV.Add(ValV[MinI]); }
01054     PrevBPos = (uint) floor(BPos);
01055     BPos *= BinFactor;
01056     if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
01057   }
01058   NewV.Add(ValV.Last());
01059 }
01060 
01061 
01063 // Local clustering for a set of graphs (loads ncp-*.tab files)
01064 TNcpGraphsBase::TNcpGraphsBase(const TStr& FNmWc) {
01065   TStr FNm;
01066   for (TFFile FFile(FNmWc); FFile.Next(FNm); ) {
01067     TSsParser Ss(FNm, ssfTabSep, true, false);
01068     int TrueNcpId=-1, WhiskId=-1, RewBestWhiskId=-1, RewId=-1, BestWhiskId=-1;
01069     while (Ss.Next()) {
01070       for (int f = 0; f < Ss.GetFlds(); f++) {
01071         // load ForestFire parameter (fwd burn prob)
01072         if (strstr(Ss[f], "FWD:")) { 
01073           TStr S(Ss[f]); const int x = S.SearchStr("FWD:");
01074           ParamValV.Add(S.GetSubStr(x+4, S.SearchCh(' ', x+1)-1).GetFlt());
01075         }
01076         // extract column names
01077         if (strstr(Ss[f], "ORIGINAL MIN")!=NULL) { 
01078           GNmV.Add(TStr::Fmt("%s %s", FNm.GetSubStr(FNm.SearchCh('.')+1, FNm.SearchChBack('.')-1).CStr(), strchr(Ss[f], '('))); 
01079           int Nodes=0,Edges=0; sscanf(strchr(Ss[f], '(')+1, "%d,%d)", &Nodes, &Edges);
01080           GSizeV.Add(TIntPr(Nodes, Edges));
01081           printf("%s: %d %d\n", GNmV.Last().CStr(), Nodes, Edges);
01082           TrueNcpId=f;
01083         }
01084         if (strstr(Ss[f], "ORIGINAL whisker")!=NULL || strstr(Ss[f], "TRUE whisker")!=NULL) { WhiskId=f; } 
01085         if (strstr(Ss[f], "ORIGINAL Best whisker")!=NULL || strstr(Ss[f], "TRUE Best whisker")!=NULL) { BestWhiskId=f; }
01086         if (strstr(Ss[f], "REWIRED MIN")!=NULL || strstr(Ss[f], "RAND MIN")!=NULL) { RewId=f; } 
01087         if (strstr(Ss[f], "REWIRED Best whisker")!=NULL || strstr(Ss[f], "RAND Best whisker")!=NULL) { RewBestWhiskId=f; }
01088       }
01089       if (TrueNcpId!=-1 || WhiskId!=-1) { break; }
01090     }
01091     if (TrueNcpId < 0) { printf("%s\n", FNm.GetFMid().CStr()); break; }
01092     if (BestWhiskId < 0) { WhiskerV.Add(TFltPr(1,1)); }
01093     if (RewBestWhiskId < 0) { RewWhiskerV.Add(TFltPr(1,1)); }
01094     NcpV.Add(); WhiskNcpV.Add(); RewNcpV.Add();
01095     TFltPrV& Ncp = NcpV.Last();
01096     TFltPrV& WhiskNcp = WhiskNcpV.Last();
01097     TFltPrV& RewNcp = RewNcpV.Last();
01098     bool Once=false, Once2=false;
01099     while (Ss.Next()) { 
01100       if (TrueNcpId < Ss.GetFlds()&& Ss.IsFlt(TrueNcpId)) { Ncp.Add(TFltPr(Ss.GetFlt(TrueNcpId-1), Ss.GetFlt(TrueNcpId))); }
01101       if (WhiskId>=0 && WhiskId < Ss.GetFlds() && Ss.IsFlt(WhiskId)) { WhiskNcp.Add(TFltPr(Ss.GetFlt(WhiskId-1), Ss.GetFlt(WhiskId))); }
01102       if (RewId >=0 && RewId < Ss.GetFlds()&& Ss.IsFlt(RewId)) { RewNcp.Add(TFltPr(Ss.GetFlt(RewId-1), Ss.GetFlt(RewId))); }
01103       if (BestWhiskId>=0 && BestWhiskId < Ss.GetFlds() && ! Once) {  Once=true;
01104         int W2=BestWhiskId-1;  while (W2 > 0 && Ss.GetFlt(W2)!=(double)int(Ss.GetFlt(W2))) { W2--; }
01105         WhiskerV.Add(TFltPr(Ss.GetFlt(W2), Ss.GetFlt(BestWhiskId))); }
01106       if (RewBestWhiskId>=0 && RewBestWhiskId < Ss.GetFlds() && ! Once2) {  Once2=true;
01107         int W2=RewBestWhiskId-1;  while (W2 > 0 && Ss.GetFlt(W2)!=(double)int(Ss.GetFlt(W2))) { W2--; }
01108         RewWhiskerV.Add(TFltPr(Ss.GetFlt(W2), Ss.GetFlt(RewBestWhiskId))); }
01109     }
01110     printf("  ncp:%d  whisk:%d  rew:%d\n", NcpV.Last().Len(), WhiskNcpV.Last().Len(), RewNcpV.Last().Len());
01111   }
01112   IAssert(NcpV.Len() == GSizeV.Len());
01113 }
01114 TNcpGraphsBase::TNcpGraphsBase(TSIn& SIn) : GNmV(SIn), GSizeV(SIn), WhiskerV(SIn), 
01115   RewWhiskerV(SIn),NcpV(SIn), RewNcpV(SIn),WhiskNcpV(SIn) { 
01116 }
01117 
01118 void TNcpGraphsBase::Save(TSOut& SOut) const { 
01119   GNmV.Save(SOut); GSizeV.Save(SOut); 
01120   WhiskerV.Save(SOut);  RewWhiskerV.Save(SOut); NcpV.Save(SOut); 
01121   RewNcpV.Save(SOut); WhiskNcpV.Save(SOut); 
01122 }
01123 
01124 void TNcpGraphsBase::Impose(const TStr& OutFNm, const int& TopN, const bool& Smooth) { 
01125   TGnuPlot GP(OutFNm);
01126   for (int i = 0; i < TMath::Mn(NcpV.Len(), TopN); i++) {
01127     GP.AddPlot(NcpV[i], gpwLines, GNmV[i], Smooth?"smooth csplines":"");
01128   }
01129   GP.SetScale(gpsLog10XY);
01130   GP.SavePng();
01131 }
01132 
01133 double TNcpGraphsBase::GetXAtMinY(const TFltPrV& Ncp, const int& NNodes) {
01134   double MinX1=1, MinY1=1;
01135   for (int k = 0; k < Ncp.Len(); k++) {
01136     if (Ncp[k].Val2<MinY1) { MinX1=Ncp[k].Val1; MinY1=Ncp[k].Val2; } }
01137   return MinX1<1 ? 1 : MinX1;
01138   //if (NNodes < 1000) return MinX1;
01139   // smooth
01140   /*const int WndSize = 50;
01141   double MinX=1, MinY=1;
01142   TFltPrV Ncp2V;
01143   for (int k = 0; k < Ncp.Len(); k++) {
01144     int WndSz = k > WndSize ? WndSize : k;
01145     double SmoothVal=0.0, SmoothCnt=0;
01146     for (int i = -WndSz; i <= WndSz; i++) {
01147       if (k+i > -1 && k+i < Ncp.Len()) { SmoothCnt+=pow(1.1, -abs(i));
01148         SmoothVal+=pow(1.1, -abs(i)) * Ncp[k+i].Val2; }
01149     }
01150     SmoothVal = SmoothVal/SmoothCnt;
01151     Ncp2V.Add(TFltPr(Ncp[k].Val1, SmoothVal));
01152     if (SmoothVal<MinY) { MinX=Ncp[k].Val1; MinY=SmoothVal; }
01153   }
01154   static int cnt = 0;
01155   if (Ncp2V.Len() > 10 && cnt < 10) { 
01156     TGnuPlot GP(TStr::Fmt("test-%03d", ++cnt));
01157     GP.SetScale(gpsLog10XY);
01158     GP.AddPlot(Ncp, gpwLines, "true");
01159     GP.AddPlot(Ncp2V, gpwLines, "smooth");
01160     GP.SavePng();
01161   }
01162   if (MinX < 1) { return 1; } else if (MinX > 1000) { return 1000; }
01163   return MinX;*/
01164 }
01165 
01166 TFltPr TNcpGraphsBase::GetXYAtMinY(const TFltPrV& Ncp, const int& NNodes) {
01167   double MinX1=1, MinY1=1;
01168   for (int k = 0; k < Ncp.Len(); k++) {
01169     if (Ncp[k].Val2<MinY1) { MinX1=Ncp[k].Val1; MinY1=Ncp[k].Val2; } }
01170   return TFltPr(MinX1<1?1:MinX1, MinY1);
01171 }
01172 
01173 void TNcpGraphsBase::PlotNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
01174   TFltPrV GSzMinK, GSzMinY;
01175   for (int i = 0; i < NcpV.Len(); i++) {
01176     const TFltPr XYAtMinY = GetXYAtMinY(NcpV[i], GSizeV[i].Val1);
01177     const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
01178     GSzMinK.Add(TFltPr(X, XYAtMinY.Val1));
01179     GSzMinY.Add(TFltPr(X, XYAtMinY.Val2));
01180   }
01181   GSzMinK.Sort();  GSzMinY.Sort();
01182   const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
01183   TGnuPlot::PlotValV(GSzMinK, TStr("bestK-")+OutFNm, "Network", XLabel, "size of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01184   TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestK-")+OutFNm, "Network", XLabel, "conductance of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01185 }
01186 
01187 void TNcpGraphsBase::SaveTxtNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
01188   TVec<TQuad<TInt, TInt, TFlt, TStr> > GSzMinK;
01189   for (int i = 0; i < NcpV.Len(); i++) {
01190     const TFltPr XYAtMinY = GetXYAtMinY(NcpV[i], GSizeV[i].Val1);
01191     const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
01192     GSzMinK.Add(TQuad<TInt, TInt, TFlt, TStr>((int)X, (int)XYAtMinY.Val1(), XYAtMinY.Val2, GNmV[i]));
01193   }
01194   GSzMinK.Sort();
01195   FILE *F = fopen(TStr::Fmt("bestK-%s.txt", OutFNm.CStr()).CStr(), "wt");
01196   fprintf(F, "#nodes\tbestK\tcondAtBestK\tgraph name\n");
01197   for (int i = 0; i < GSzMinK.Len(); i++) {
01198     fprintf(F, "%d\t%d\t%f\t%s\n", GSzMinK[i].Val1(), GSzMinK[i].Val2(), GSzMinK[i].Val3(), GSzMinK[i].Val4.CStr());
01199   }
01200   fclose(F);
01201 }
01202 
01203 void TNcpGraphsBase::PlotRewNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
01204   TFltPrV GSzMinK, GSzMinY;
01205   for (int i = 0; i < NcpV.Len(); i++) {
01206     const TFltPr XYAtMinY = GetXYAtMinY(RewNcpV[i], GSizeV[i].Val1);
01207     const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
01208     GSzMinK.Add(TFltPr(X, XYAtMinY.Val1));
01209     GSzMinY.Add(TFltPr(X, XYAtMinY.Val2));
01210   }
01211   GSzMinK.Sort();  GSzMinY.Sort();
01212   const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
01213   TGnuPlot::PlotValV(GSzMinK, TStr("bestR-")+OutFNm, "Rewired network", XLabel, "size of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01214   TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestR-")+OutFNm, "Rewired network", XLabel, "conductance of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01215 }
01216 
01217 void TNcpGraphsBase::PlotBestWhisker(const TStr& OutFNm, const bool& VsGraphN) {
01218   TFltPrV GSzMinK, GSzMinY;
01219   for (int i = 0; i < GSizeV.Len(); i++) {
01220     if (WhiskerV[i].Val1()>0) {
01221       const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
01222       GSzMinK.Add(TFltPr(X, WhiskerV[i].Val1()));
01223       GSzMinY.Add(TFltPr(X, WhiskerV[i].Val2()));
01224     }
01225   }
01226   GSzMinK.Sort();  GSzMinY.Sort();
01227   const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
01228   TGnuPlot::PlotValV(GSzMinK, TStr("bestW-")+OutFNm, "Network", XLabel, "size of best whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01229   TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestW-")+OutFNm, "Network", XLabel, "conductance of best whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01230 }
01231 
01232 void TNcpGraphsBase::PlotRewBestWhisker(const TStr& OutFNm, const bool& VsGraphN) {
01233   TFltPrV GSzMinK, GSzMinY;
01234   for (int i = 0; i < GSizeV.Len(); i++) {
01235     if (WhiskerV[i].Val1()>0) {
01236       const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
01237       GSzMinK.Add(TFltPr(X, RewWhiskerV[i].Val1()));
01238       GSzMinY.Add(TFltPr(X, RewWhiskerV[i].Val2()));
01239     }
01240   }
01241   GSzMinK.Sort();  GSzMinY.Sort();
01242   const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
01243   TGnuPlot::PlotValV(GSzMinK, TStr("bestWR-")+OutFNm, "Rewired network", XLabel, "size of best rewired whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01244   TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestWR-")+OutFNm, "Rewired network", XLabel, "conductance of best rewired whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
01245 }
01246 
01247 void TNcpGraphsBase::PlotAvgNcp(const TStr& OutFNm, const TVec<TFltPrV>& NcpVec, const int& MinSz, const double& MaxMinY) {
01248   THash<TFlt, TMom> MomH;
01249   int Cnt=0;
01250   for (int i = 0; i < NcpVec.Len(); i++) {
01251     if (GSizeV[i].Val1 < MinSz) { continue; }
01252     const TFltPrV& Ncp = NcpVec[i];
01253     double MinX=1, MinY=1;
01254     for (int k = 0; k < Ncp.Len(); k++){
01255       if (Ncp[k].Val2<MinY) { MinX=Ncp[k].Val1; MinY=Ncp[k].Val2; } }
01256     if (MinY > MaxMinY) { continue; }  Cnt++;
01257     //const double Coef = (1-0.0001)/(1.0-MinY);
01258     for (int k = 0; k < Ncp.Len(); k++){
01259       //MomH.AddDat(TMath::Round(exp(TMath::Round(log(Ncp[k].Val1()), 2)),2)).Add(0.0001+(Ncp[k].Val2-MinY)*Coef);
01260       MomH.AddDat(TMath::Round(exp(TMath::Round(log(Ncp[k].Val1()), 1)),0)).Add(Ncp[k].Val2);
01261     }
01262   }
01263   TGnuPlot::PlotValMomH(MomH, OutFNm, "", "size of the cluster, k", "phi(k)", gpsLog, gpwLines, true, true,true,true);
01264   printf("  minSz: %d, miny %g\t%d\n", MinSz, MaxMinY, Cnt);
01265 }
01266 
01267 void TNcpGraphsBase::SaveTxt(const TStr& OutFNm) {
01268   FILE *F=fopen(OutFNm.CStr(), "wt");
01269   fprintf(F, "#Nodes\tEdges\tBestK\tPhi(BestK)\tMaxWhiskN\tPhi(MaxWhisk)\tGraph\n");
01270   for (int i = 0; i < NcpV.Len(); i++) {
01271     const TFltPrV& Ncp = NcpV[i];
01272     double MinX=1, MinY=1;
01273     for (int k = 0; k < Ncp.Len(); k++){
01274       if (Ncp[k].Val2<MinY) { MinX=Ncp[k].Val1; MinY=Ncp[k].Val2; } }
01275     fprintf(F, "%d\t%d\t%d\t%f\t%d\t%f\t%s\n", GSizeV[i].Val1(), GSizeV[i].Val2(), 
01276       int(MinX), MinY, int(WhiskerV[i].Val1), WhiskerV[i].Val2(), GNmV[i].CStr());
01277   }
01278   fclose(F);
01279 }
01280 
01281 void TNcpGraphsBase::PlotDataset(const TStr& InFNmWc, const TStr& OutFNm, const bool& ImposeNcp, const bool& VsGraphN) {
01282   TNcpGraphsBase NcpBs(InFNmWc);  
01283   //NcpBs.Save(TFOut(OutFNm+".NcpBase"));
01284   //TNcpGraphsBase NcpBs(TFIn(OutFNm+".NcpBase"));
01285   if (ImposeNcp) {
01286     NcpBs.Impose(OutFNm+"5R", 5, false);  NcpBs.Impose(OutFNm+"5S", 5, true); 
01287     NcpBs.Impose(OutFNm+"R", 10, false);  NcpBs.Impose(OutFNm+"S", 10, true); 
01288   }
01289   NcpBs.PlotNcpMin(OutFNm, VsGraphN);
01290   //NcpBs.SaveTxtNcpMin(OutFNm, VsGraphN);
01291   NcpBs.PlotRewNcpMin(OutFNm, VsGraphN);
01292   NcpBs.PlotBestWhisker(OutFNm, VsGraphN);
01293   NcpBs.PlotRewBestWhisker(OutFNm, VsGraphN);
01294   
01295   //NcpBs.PlotAvgNcp(OutFNm+"AvgNcp", NcpBs.NcpV, 1, 1);
01296   //NcpBs.PlotAvgNcp(OutFNm+"AvgRewNcp", NcpBs.RewNcpV, 1, 1);
01297   /*NcpBs.PlotAvgNcp(OutFNm+"AvgNcp2", NcpBs.NcpV, 100, 0.1);
01298   NcpBs.PlotAvgNcp(OutFNm+"AvgNcp3", NcpBs.NcpV, 100, 0.01);
01299   NcpBs.PlotAvgNcp(OutFNm+"AvgNcp4", NcpBs.NcpV, 100, 0.001);
01300   NcpBs.PlotAvgNcp(OutFNm+"AvgNcp5", NcpBs.NcpV, 100, 0.0001);
01301   NcpBs.PlotAvgNcp(OutFNm+"RewNcp1", NcpBs.RewNcpV, 1000, 1);
01302   NcpBs.PlotAvgNcp(OutFNm+"RewNcp2", NcpBs.RewNcpV, 100, 0.1);
01303   NcpBs.PlotAvgNcp(OutFNm+"RewNcp3", NcpBs.RewNcpV, 100, 0.01);
01304   NcpBs.PlotAvgNcp(OutFNm+"RewNcp4", NcpBs.RewNcpV, 100, 0.001);
01305   NcpBs.PlotAvgNcp(OutFNm+"RewNcp5", NcpBs.RewNcpV, 100, 0.0001);
01306   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp1", NcpBs.WhiskNcpV, 1000, 1);
01307   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp2", NcpBs.WhiskNcpV, 100, 0.1);
01308   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp3", NcpBs.WhiskNcpV, 100, 0.01);
01309   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp4", NcpBs.WhiskNcpV, 100, 0.001);
01310   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp5", NcpBs.WhiskNcpV, 100, 0.0001);
01311   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp6", NcpBs.WhiskNcpV, 100, 0.00004);
01312   NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp7", NcpBs.WhiskNcpV, 100, 0.00005);*/
01313   //NcpBs.SaveTxt(OutFNm+"bestK.txt");
01314 }