SNAP Library, User Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
timenet.cpp
Go to the documentation of this file.
00001 
00002 // Time Network
00003 TTimeNet& TTimeNet::operator = (const TTimeNet& TimeNet) {
00004   if (this != &TimeNet) {
00005     TNet::operator=(TimeNet);
00006   }
00007   return *this;
00008 }
00009 
00010 PTimeNet TTimeNet::GetSubGraph(const TIntV& NIdV) const {
00011   PTimeNet NewNetPt = TTimeNet::New();
00012   TTimeNet& NewNet = *NewNetPt;
00013   NewNet.Reserve(NIdV.Len(), -1);
00014   int node, edge;
00015   TNodeI NI;
00016   for (node = 0; node < NIdV.Len(); node++) {
00017     NewNet.AddNode(NIdV[node], GetNDat(NIdV[node])); // also copy the node data
00018   }
00019   for (node = 0; node < NIdV.Len(); node++) {
00020     NI = GetNI(NIdV[node]);
00021     const int SrcNId = NI.GetId();
00022     for (edge = 0; edge < NI.GetOutDeg(); edge++) {
00023       const int OutNId = NI.GetOutNId(edge);
00024       if (NewNet.IsNode(OutNId)) {
00025         NewNet.AddEdge(SrcNId, OutNId); }
00026     }
00027   }
00028   NewNet.Defrag();
00029   return NewNetPt;
00030 }
00031 
00032 PTimeNENet TTimeNet::GetTimeNENet() const {
00033   TIntV NIdV;  GetNIdByTm(NIdV);
00034   PTimeNENet OutNet = TTimeNENet::New(GetNodes(), GetEdges());
00035   for (int i = 0; i < NIdV.Len(); i++) {
00036     const int Src = NIdV[i];
00037     const TTimeNet::TNodeI NI = GetNI(Src);
00038     const TSecTm SrcTm = NI.GetDat();
00039     if (! OutNet->IsNode(Src)) { OutNet->AddNode(Src, SrcTm); }
00040     for (int e = 0; e < NI.GetOutDeg(); e++) {
00041       if (! OutNet->IsNode(NI.GetOutNId(e))) { OutNet->AddNode(NI.GetOutNId(e), SrcTm); }
00042       OutNet->AddEdge(Src, NI.GetOutNId(e), -1, SrcTm);
00043     }
00044   }
00045   return OutNet;
00046 }
00047 
00048 void TTimeNet::GetNIdByTm(TIntV& NIdV) const {
00049   TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0);
00050   for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
00051     TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); }
00052   TmToNIdV.Sort();
00053   NIdV.Gen(GetNodes(), 0);
00054   for (int i = 0; i < TmToNIdV.Len(); i++) {
00055     NIdV.Add(TmToNIdV[i].Dat); }
00056 }
00057 
00058 // put nodes into buckets by TmUnit
00059 void TTimeNet::GetTmBuckets(const TTmUnit& TmUnit, TTmBucketV& TmBucketV) const {
00060   THash<TInt, TIntV> TmIdToNIdVH;
00061   for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
00062     const int TmId = NodeI().Round(TmUnit);
00063     if (! TmIdToNIdVH.IsKey(TmId)) TmIdToNIdVH.AddKey(TmId);
00064     TmIdToNIdVH.GetDat(TmId).Add(NodeI.GetId());
00065   }
00066   TVec<TPair<TInt, TIntV> > TmIdNIdVV;
00067   TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV);
00068   TmIdNIdVV.Sort();
00069   TmBucketV.Gen(TmIdNIdVV.Len());
00070   for (int i = 0; i < TmIdNIdVV.Len(); i++) {
00071     TTmBucket& Bucket = TmBucketV[i];
00072     Bucket.BegTm = TmIdNIdVV[i].Val1;
00073     Bucket.NIdV = TmIdNIdVV[i].Val2;
00074   }
00075 }
00076 
00077 void TTimeNet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
00078   TIntV NIdV;
00079   GetNIdByTm(NIdV);
00080   TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0);
00081   for (int i = 0; i < NIdV.Len(); i++) {
00082     const int b = i/NodesPerBucket;
00083     if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
00084     TmBucketV[b].NIdV.Add(NIdV[i]);
00085   }
00086 }
00087 
00088 PGStatVec TTimeNet::TimeGrowth(const TTmUnit& TmUnit, const TFSet& TakeStat, const TSecTm& StartTm) const {
00089   PGStatVec GrowthStat = new TGStatVec(TmUnit, TakeStat);
00090   TTmBucketV TmBucketV;
00091   GetTmBuckets(TmUnit, TmBucketV);
00092   TIntV NodeIdV;
00093   TExeTm ExeTm;
00094   for (int t = 0; t < TmBucketV.Len(); t++) {
00095     NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
00096     printf("\n=== %d/%d] %s (%d nodes)\n", t+1, TmBucketV.Len(),
00097       TmBucketV[t].BegTm.GetStr().CStr(), NodeIdV.Len());  ExeTm.Tick();
00098     if (TmBucketV[t].BegTm < StartTm) continue;
00099     //PNGraph PreGraph = GetSubGraph(NodeIdV, true); // renumber nodes
00100     PNGraph PreGraph = TSnap::ConvertSubGraph<PNGraph>(PTimeNet((TTimeNet*)this), NodeIdV); // don't renumber nodes
00101     GrowthStat->Add(PreGraph, TmBucketV[t].BegTm);
00102   }
00103   return GrowthStat;
00104 }
00105 
00106 void TTimeNet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
00107                            const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc, const bool& AlsoRewire) const {
00108   const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr();
00109   TTmBucketV TmBucketV;
00110   GetTmBuckets(TmUnit, TmBucketV);
00111   TIntV NodeIdV;
00112   TExeTm ExeTm, Run1Tm;
00113   TFltTrV TmDiamV, NdsDiamV;
00114   TFltTrV RwTmDiamV, RwNdsDiamV;
00115   for (int t = 0; t < TmBucketV.Len(); t++) {
00116     NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
00117     printf("\n*** %d/%d] at %s (%d nodes)\n", t+1, TmBucketV.Len(),
00118       TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), NodeIdV.Len());  ExeTm.Tick();
00119     if (TmBucketV[t].BegTm < StartTm) continue;
00120     //PUNGraph PreGraph = GetSubUNGraph(NodeIdV, true);
00121     PUNGraph PreGraph = TSnap::ConvertSubGraph<PUNGraph>(PTimeNet((TTimeNet*)this), NodeIdV);
00122     { TMom Mom;
00123     for (int r = 0; r < NDiamRuns; r++) {
00124       printf("%d...", r+1);  Run1Tm.Tick();
00125       const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph) : PreGraph);
00126       Mom.Add(EffDiam);  printf("[%s]\r", Run1Tm.GetTmStr());
00127     }
00128     Mom.Def();
00129     TmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev()));
00130     NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
00131     NdsDiamV.Sort();
00132     printf("  [%s]          \n", ExeTm.GetTmStr()); }
00133     if (AlsoRewire) {
00134       //PUNGraph RwGraph = TGGen::GenRndDegSeqS(PreGraph, 50, TInt::Rnd); // edge switching model
00135       TIntV DegSeqV(PreGraph->GetNodes(), 0);
00136       for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) { DegSeqV.Add(NI.GetDeg()); }
00137       PUNGraph RwGraph = TSnap::GenConfModel(DegSeqV, TInt::Rnd);
00138       printf("Configuration model: (%d, %d) --> (%d, %d)\n", PreGraph->GetNodes(), PreGraph->GetEdges(), RwGraph->GetNodes(), RwGraph->GetEdges());
00139       TMom Mom;
00140       for (int r = 0; r < NDiamRuns; r++) {
00141         printf("  diam run %d...", r+1);  Run1Tm.Tick();
00142         const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph):PreGraph);
00143         Mom.Add(EffDiam);  printf(" current run [%s]\n", Run1Tm.GetTmStr());
00144       }
00145       Mom.Def();
00146       RwTmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev()));
00147       RwNdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
00148       RwNdsDiamV.Sort();
00149       printf("done with diameter. Total time [%s] \n", ExeTm.GetTmStr());
00150     }
00151     // plot
00152     { TGnuPlot GnuPlot("diamEff-T."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges()));
00153     GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), WccStr+"Effective Diameter");
00154     GnuPlot.AddErrBar(TmDiamV, "True", "");
00155     if (! RwTmDiamV.Empty()) { GnuPlot.AddErrBar(RwTmDiamV, "Rewired", "");}
00156     GnuPlot.SavePng(); }
00157     { TGnuPlot GnuPlot("diamEff-N."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges()));
00158     GnuPlot.SetXYLabel("NODES", WccStr+"Effective Diameter");
00159     GnuPlot.AddErrBar(NdsDiamV, "True", "");
00160     if (! RwNdsDiamV.Empty()) { GnuPlot.AddErrBar(RwNdsDiamV, "Rewired", "");}
00161     GnuPlot.SavePng(); }
00162   }
00163 }
00164 
00165 // pretend we only have link data starting in PostTmDiam
00166 // compare the diameter of the nodes after PostTmDiam with diameter
00167 // of the nodes after PostTmDiam but *also* using nodes and edges
00168 // from before PostTmDiam
00169 void TTimeNet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
00170                                const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam) const {
00171   printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n  %s group by %s.\n",
00172     FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr());
00173   printf("  Delete out-edges of pre time %s nodes.\n  Take subgraph of post year %s subgraph.\n\n",
00174     DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr());
00175   const int NDiamRuns = 10;
00176   const int NSamples = 100;
00177   //PUNGraph FullGraph = GetUNGraph();
00178   PUNGraph FullGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
00179   // delete past
00180   if (DelPreTmEdges.IsDef()) {
00181     int NDelNodes = 0, NDelEdges = 0;
00182     printf("Deleting pre-%s node out-links\n", DelPreTmEdges.GetStr().CStr());
00183     for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
00184       if (NodeI() < DelPreTmEdges) {
00185         const int NodeId = NodeI.GetId();
00186         for (int edge = 0; edge < NodeI.GetOutDeg(); edge++) {
00187           FullGraph->DelEdge(NodeId, NodeI.GetOutNId(edge)); }
00188         NDelEdges += NodeI.GetOutDeg();  NDelNodes++;
00189       }
00190     }
00191     printf("  Deleted %d nodes out-edges (%d edges total).\n", NDelNodes, NDelEdges);
00192     FullGraph->Defrag(true);
00193   }
00194   PGStatVec GrowthStat = TGStatVec::New(TmUnit);
00195   TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev;
00196   TIntV NodeIdV;
00197   TExeTm ExeTm;
00198   TTmBucketV TmBucketV;
00199   GetTmBuckets(TmUnit, TmBucketV);
00200   for (int t = 0; t < TmBucketV.Len(); t++) {
00201     printf("\nGraph: %s (%d / %d) [%s]\n", TmBucketV[t].BegTm.GetTmStr().CStr(),
00202       t+1, TmBucketV.Len(), TExeTm::GetCurTm());
00203     // up-to-year subgraph
00204     NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
00205     if (TmBucketV[t].BegTm < PostTmDiam) { continue; }
00206     const PUNGraph PreGraph = TSnap::GetSubGraph(FullGraph, NodeIdV, true);
00207     const PUNGraph WccGraph = TSnap::GetMxWcc(PreGraph);
00208     TIntV PostYearNIdV, WccPostYearNIdV;
00209     for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) {
00210       if (GetNDat(NI.GetId()) >= PostTmDiam) {
00211         PostYearNIdV.Add(NI.GetId());
00212         if (WccGraph->IsNode(NI.GetId())) { WccPostYearNIdV.Add(NI.GetId()); }
00213       }
00214     }
00215     TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom;
00216     // diameter of PostYearDiam subgraph using whole graph (all edges)
00217     int FullDiam; double EffDiam;
00218     for (int r = 0; r < NDiamRuns; r++) {
00219       if (! PostYearNIdV.Empty()) {
00220         TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false, EffDiam, FullDiam);
00221         PreDiamMom.Add(FullDiam);  PreEffDiamMom.Add(EffDiam);
00222       }
00223       if (! WccPostYearNIdV.Empty()) {
00224         TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false, EffDiam, FullDiam);
00225         WccDiamMom.Add(FullDiam);  WccEffDiamMom.Add(EffDiam);
00226       }
00227       printf("  diam: %d  [%s]  \r", r+1, ExeTm.GetTmStr());  ExeTm.Tick();
00228     }
00229     PreDiamMom.Def();  PreEffDiamMom.Def();
00230     WccDiamMom.Def();  WccEffDiamMom.Def();
00231     // save stat
00232     PGStat GraphStatPt = GrowthStat->Add(TmBucketV[t].BegTm);
00233     TGStat& GS = *GraphStatPt;
00234     GS.TakeBasicStat(PreGraph, false);
00235     GS.TakeBasicStat(WccGraph, true);
00236     GS.SetVal(gsvFullDiam, PreDiamMom.GetMean()); // mean
00237     GS.SetVal(gsvEffDiam, PreEffDiamMom.GetMean());
00238     GS.SetVal(gsvFullWccDiam, WccDiamMom.GetMean());
00239     GS.SetVal(gsvEffWccDiam, WccEffDiamMom.GetMean());
00240     GS.SetVal(gsvFullDiamDev, PreDiamMom.GetSDev()); // variance
00241     GS.SetVal(gsvEffDiamDev, PreEffDiamMom.GetSDev());
00242     GS.SetVal(gsvFullWccDiamDev, WccDiamMom.GetSDev());
00243     GS.SetVal(gsvEffWccDiamDev, WccEffDiamMom.GetSDev());
00244     { TFOut FOut("growth."+FNmPref+".gStatVec");  GrowthStat->Save(FOut); }
00245     GrowthStat->SaveTxt(FNmPref, TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%s\nPostYearDiam\t%s\n",
00246       Desc.CStr(), DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr()));
00247   }
00248   // diameter plots
00249   //GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.",
00250   //  DelPreEdges, PostYearDiam));*/
00251 }
00252 
00253 void TTimeNet::PlotCCfOverTm(const TStr& FNmPref, TStr Desc, const TTmUnit& TmUnit, const int& NodesBucket) const {
00254   if (Desc.Empty()) { Desc = FNmPref; }
00255   TTimeNet::TTmBucketV TmBucketV;
00256   TStr XLbl;
00257   if (TmUnit == tmuNodes) {
00258     XLbl = "Number of nodes (time)";
00259     IAssert(NodesBucket > 0);
00260     GetNodeBuckets(NodesBucket, TmBucketV); }
00261   else {
00262     XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr());
00263     GetTmBuckets(TmUnit, TmBucketV);
00264   }
00265   TIntV NodeIdV;
00266   TFltPrV DegToCCfV, CcfV, OpClV, OpV;
00267   TVec<TTuple<TFlt, 4> > OpenClsV;
00268   TTuple<TFlt, 4> Tuple;
00269   TExeTm ExeTm;
00270   int XVal = 0;
00271   printf("Clustering coefficient over time:\n  %d edges, %d edges per bucket, %d buckets \n", GetEdges(), 100000, TmBucketV.Len());
00272   PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
00273   for (int t = 0; t < TmBucketV.Len(); t++) {
00274     printf("\r  %d/%d: ", t+1, TmBucketV.Len());
00275     NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
00276     int Open=0, Close=0;
00277     const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV);
00278     const double CCf = TSnap::GetClustCf(Graph, DegToCCfV, Open, Close);
00279     if (TmUnit == tmuNodes) { XVal = Graph->GetNodes(); }
00280     else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); }
00281     CcfV.Add(TFltPr(XVal, CCf));
00282     OpClV.Add(TFltPr(XVal, Open+Close==0?0:Close/(Open+Close)));
00283     OpV.Add(TFltPr(XVal, Open==0?0:Close/Open));
00284     Tuple[0]=Graph->GetNodes();
00285     Tuple[1]=Graph->GetEdges();
00286     Tuple[2]=Close;  Tuple[3]=Open;
00287     OpenClsV.Add(Tuple);
00288     printf(" %s", ExeTm.GetStr());
00289     TGnuPlot::PlotValV(DegToCCfV, TStr::Fmt("ccfAt%02dtm.%s", t+1, FNmPref.CStr()),
00290       TStr::Fmt("%s. At time %d. Clustering Coefficient. G(%d,%d)", Desc.CStr(), t+1, Graph->GetNodes(), Graph->GetEdges()),
00291       "Degree", "Clustering coefficient", gpsLog10XY, false);
00292   }
00293   TGnuPlot::PlotValV(CcfV, "ccfOverTm."+FNmPref, Desc+". Average Clustering Coefficient", XLbl, "Average clustering coefficient", gpsAuto, false);
00294   TGnuPlot::PlotValV(OpClV, "ClsOpnTr1."+FNmPref, Desc+". Close/(Open+Closed) triads", XLbl, "Close / (Open+Closed) triads", gpsAuto, false);
00295   TGnuPlot::PlotValV(OpV, "ClsOpnTr2."+FNmPref, Desc+". Close/Open triads", XLbl, "Close / Open triads", gpsAuto, false);
00296   TGnuPlot::SaveTs(OpenClsV, "ClsOpnTr."+FNmPref+".tab", TStr::Fmt("#%s\n#Nodes\tEdges\tClosed\tOpenTriads", Desc.CStr()));
00297   printf("\n");
00298 }
00299 
00300 void TTimeNet::PlotMedianDegOverTm(const TStr& FNmPref, const TTmUnit& TmUnit, const int& NodesPerBucket) const {
00301   TTimeNet::TTmBucketV TmBucketV;
00302   TStr XLbl;
00303   if (TmUnit == tmuNodes) {
00304     XLbl = "Number of nodes (time)";  IAssert(NodesPerBucket > 0);
00305     GetNodeBuckets(NodesPerBucket, TmBucketV); }
00306   else {
00307     XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr());
00308     GetTmBuckets(TmUnit, TmBucketV); }
00309   printf("\n\n%s\nMedian degree over time:\n  %d edges, %d edges per bucket, %d buckets \n", FNmPref.CStr(), GetEdges(), NodesPerBucket, TmBucketV.Len());
00310   TFltPrV MedDegV, MedInDegV, MedOutDegV;
00311   TIntV NodeIdV;
00312   int XVal;
00313   PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
00314   PNGraph NGraph = TSnap::ConvertGraph<PNGraph>(PTimeNet((TTimeNet*)this));
00315   FILE  *F = fopen(("gStat-"+FNmPref+".tab").CStr(), "wt");
00316   fprintf(F, "UndirNodes\tUndirEdges\tUndirNonZNodes\tMedianDeg\tMeanDeg\tDirNodes\tDirEdges\tDirNonzNodes\tMedInDeg\tMedOutDeg\tMeanInDeg\tMeanOutDeg\n");
00317   for (int t = 0; t < TmBucketV.Len(); t++) {
00318     printf("\r  %d/%d: ", t+1, TmBucketV.Len());
00319     NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
00320     if (TmUnit == tmuNodes) { XVal = NodeIdV.Len(); }
00321     else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); }
00322     // un graph
00323     { const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV);  TMom Mom;
00324     for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { if (NI.GetOutDeg()>0) { Mom.Add(NI.GetOutDeg());} }
00325     Mom.Def();  MedDegV.Add(TFltPr(XVal, Mom.GetMedian()));
00326     fprintf(F, "%d\t%d\t%d\t%f\t%f", Graph->GetNodes(), Graph->GetEdges(), TSnap::CntNonZNodes(Graph), (float)Mom.GetMedian(), (float)Mom.GetMean()); }
00327     // directed graph
00328     { const PNGraph Graph = TSnap::GetSubGraph<PNGraph>(NGraph, NodeIdV); TMom MomOut, MomIn;
00329     for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00330       if (NI.GetOutDeg()>0) { MomOut.Add(NI.GetOutDeg()); }
00331       if (NI.GetInDeg()>0) { MomIn.Add(NI.GetInDeg()); } }
00332     MomOut.Def();  MedOutDegV.Add(TFltPr(XVal, MomOut.GetMedian()));
00333     MomIn.Def();  MedInDegV.Add(TFltPr(XVal, MomIn.GetMedian()));
00334     fprintf(F, "\t%d\t%d\t%d\t%f\t%f\t%f\t%f\n", Graph->GetNodes(), Graph->GetEdges(), (int)TSnap::CntNonZNodes(Graph), (float)MomIn.GetMedian(), (float)MomOut.GetMedian(), (float)MomIn.GetMean(), (float)MomOut.GetMean()); }
00335   }
00336   fclose(F);
00337   TGnuPlot::PlotValV(MedDegV, "medDeg."+FNmPref, FNmPref+" Median degree", TTmInfo::GetTmUnitStr(TmUnit), "Median degree");
00338   TGnuPlot::PlotValV(MedOutDegV, "medOutDeg."+FNmPref, FNmPref+" Median OUT degree", TTmInfo::GetTmUnitStr(TmUnit), "Median OUT degree");
00339   TGnuPlot::PlotValV(MedInDegV, "medInDeg."+FNmPref, FNmPref+" Median IN degree", TTmInfo::GetTmUnitStr(TmUnit), "Median IN degree");
00340 }
00341 
00342 // load Arxiv affiliation network (papers to authors bipartite graph)
00343 // <set1_id>  <time>  <set2_id1>  <set2_id2> ... (all ids have to be unique)
00344 PTimeNet TTimeNet::LoadBipartite(const TStr& InFNm) {
00345   PTimeNet TimeNetPt = TTimeNet::New();
00346   TTimeNet& TimeNet = *TimeNetPt;
00347   PSs Ss = TSs::LoadTxt(ssfTabSep, InFNm.CStr());
00348   TIntH Set1IdH; // paper ids
00349   TStrV StrTimeV;
00350   for (int y = 0; y < Ss->GetYLen(); y++) {
00351     if (Ss->At(0, y)[0] == '#') continue; // skip comments
00352     if (Ss->GetXLen(y) < 3) continue;     // there must be at least one author
00353     const int& SrcId = Ss->At(0, y).GetInt();
00354     IAssert(! Set1IdH.IsKey(SrcId));
00355     IAssert(! TimeNet.IsNode(SrcId));
00356     Set1IdH.AddKey(SrcId);
00357     Ss->At(1, y).SplitOnAllCh('-', StrTimeV);
00358     const int Year = StrTimeV[0].GetInt();
00359     const int Month = StrTimeV[1].GetInt();
00360     const int Day = StrTimeV[2].GetInt();
00361     const TSecTm NodeTm(Year, Month, Day);
00362     TimeNet.AddNode(SrcId, NodeTm);
00363     for (int dst = 2; dst < Ss->GetXLen(y); dst++) {
00364       const int DstId = Ss->At(dst, y).GetInt();
00365       IAssert(! Set1IdH.IsKey(DstId));
00366       if (! TimeNet.IsNode(DstId)) { TimeNet.AddNode(DstId, NodeTm); }
00367       else { TimeNet.GetNDat(DstId) = TMath::Mn(NodeTm, TimeNet.GetNDat(DstId)); }
00368       if (! TimeNet.IsEdge(SrcId, DstId)) { TimeNet.AddEdge(SrcId, DstId); }
00369     }
00370   }
00371   TimeNet.Defrag();
00372   printf("Bipartate graph: nodes: %d  edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00373   printf("  Bipartate sets: %d nodes --> %d nodes\n", TSnap::CntInDegNodes(TimeNetPt, 0),
00374     TSnap::CntOutDegNodes(TimeNetPt, 0));
00375   return TimeNetPt;
00376 }
00377 
00378 // load Arxiv paper citation network
00379 //   PaperFNm: <paper> <time>
00380 //   CiteFNm:  <source> <destination>
00381 PTimeNet TTimeNet::LoadArxiv(const TStr& PaperFNm, const TStr& CiteFNm) {
00382   TExeTm ExeTm;
00383   PTimeNet TimeNetPt = TTimeNet::New();
00384   TTimeNet& TimeNet = *TimeNetPt;
00385   printf("Arxiv citation graph (paper publication year)...\n");
00386   // load time data (file hep-ph-slacdates)
00387   char Line [1024];
00388   FILE *PprF = fopen(PaperFNm.CStr(), "rt");
00389   TStr StrId, StrTime;
00390   TStrV StrV, StrTimeV;
00391   int N = 0, DuplicateNode = 0;
00392   while (! feof(PprF)) {
00393     Line[0] = 0;
00394     fgets(Line, 1024, PprF);
00395     if (strlen(Line) == 0 || Line[0] == '#') continue;
00396     Line[strlen(Line)-1] = 0; // delete trailing '\n'
00397     TStr(Line).SplitOnWs(StrV);  IAssert(StrV.Len() == 2);
00398     StrId = StrV[0];  StrTime = StrV[1];  IAssert(!StrId.Empty() && !StrTime.Empty());
00399     StrTime.SplitOnAllCh('-', StrTimeV);  IAssert(StrTimeV.Len() == 3);
00400     const int NodeId = StrId.GetInt();
00401     if (! TimeNet.IsNode(NodeId)) {
00402       const int Year = StrTimeV[0].GetInt();
00403       const int Month = StrTimeV[1].GetInt();
00404       const int Day = StrTimeV[2].GetInt();
00405       TimeNet.AddNode(NodeId, TSecTm(Year, Month, Day));
00406     } else { DuplicateNode++; }
00407     if (++N % 10000 == 0) printf("\r  %dk", N/1000);
00408   }
00409   printf("\r  %d nodes read. %d duplicate nodes. %s\n", N, DuplicateNode, ExeTm.GetTmStr());
00410   fclose(PprF);
00411   // load citations (file hep-ph-citations)
00412   int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0;
00413   FILE *CiteF = fopen(CiteFNm.CStr(), "rt");
00414   N = 0;  ExeTm.Tick();
00415   printf("Loading Arxiv citations...\n");
00416   TIntPrV EdgeV;
00417   THash<TInt, TSecTm> NIdToTimeH;
00418   while (! feof(CiteF)) {
00419     Line[0] = 0;
00420     fgets(Line, 1024, CiteF);
00421     if (strlen(Line) == 0 || Line[0] == '#') continue;
00422     Line[strlen(Line)-1] = 0; // delete trailing '\n'
00423     TStr(Line).SplitOnWs(StrV);  IAssert(StrV.Len() == 2);
00424     const int SrcNId = StrV[0].GetInt();
00425     const int DstNId = StrV[1].GetInt();
00426     EdgeV.Add(TIntPr(SrcNId, DstNId));
00427     // infer time of destination node -- earliest paper that cites the node (paper)
00428     if (! TimeNet.IsNode(DstNId) && TimeNet.IsNode(SrcNId)) {
00429       const TSecTm& SrcTm = TimeNet.GetNDat(SrcNId);
00430       if (! NIdToTimeH.IsKey(DstNId)) {
00431         NIdToTimeH.AddDat(DstNId, SrcTm);
00432         NewDstIds++;
00433       }
00434       else if (NIdToTimeH.GetDat(DstNId) < SrcTm) {
00435         NIdToTimeH.GetDat(DstNId) = SrcTm; }
00436     }
00437     if (++N % 10000 == 0) printf("\r  %dk", N/1000);
00438   }
00439   fclose(CiteF);
00440   // add infeered time nodes (nodes which are cited by papers with known time)
00441   for (int i = 0; i < NIdToTimeH.Len(); i++) {
00442     TimeNet.AddNode(NIdToTimeH.GetKey(i), NIdToTimeH[i]);
00443   }
00444   // add links
00445   for (int i = 0; i < EdgeV.Len(); i++) {
00446     const int SrcNId = EdgeV[i].Val1;
00447     const int DstNId = EdgeV[i].Val2;
00448     if (TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) {
00449       if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); }
00450       else { DupLinks++; }
00451     } else {
00452       if (! TimeNet.IsNode(SrcNId)) {
00453         NewSrcIds++;
00454         if (! TimeNet.IsNode(DstNId)) { NewCits++; }
00455       }
00456     }
00457   }
00458   printf("\r  %d citations read. %s\n", N, ExeTm.GetTmStr());
00459   printf("Graph: nodes: %d    edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00460   printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0));
00461   TIntV RmNIdV;
00462   for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) {
00463     if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); }
00464   }
00465   for (int i = 0; i < RmNIdV.Len(); i++) {
00466     TimeNet.DelNode(RmNIdV[i]);
00467   }
00468   TimeNet.Defrag(true);
00469   printf("\nFinal graph: nodes: %d    edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00470   printf("  Duplicate citations                    : %d\n", DupLinks);
00471   printf("  Nodes without time which are cited     : %d (add them to graph, use time of the earliest source node)\n", NewDstIds);
00472   printf("  Citations between unknown time nodes   : %d\n", NewCits);
00473   printf("  Nodes without time which make citations: %d (do not add them into the graph)\n", NewSrcIds);
00474   return TimeNetPt;
00475 }
00476 
00477 // Patent citation network. Time units are seconds even though they mean years
00478 PTimeNet TTimeNet::LoadPatents(const TStr& PatentFNm, const TStr& CiteFNm) {
00479   int N = 0;
00480   TExeTm ExeTm;
00481   PTimeNet TimeNetPt = TTimeNet::New();
00482   TTimeNet& TimeNet = *TimeNetPt;
00483   TimeNet.Reserve(4000000, 160000000);
00484   printf("parsing patent data (patent grant year)...\n");
00485   // load time data (file pat63_99.txt)
00486   const int& PatIdCol = 0;
00487   const int& GYearCol = 1;
00488   TStrV ColV;
00489   char Line [1024];
00490   FILE *PatF = fopen(PatentFNm.CStr(), "rt");
00491   fgets(Line, 1024, PatF); // skip 1st line
00492   while (! feof(PatF)) {
00493     Line[0] = 0;
00494     fgets(Line, 1024, PatF);
00495     if (strlen(Line) == 0) break;
00496     TStr(Line).SplitOnAllCh(',', ColV, false);
00497     IAssert(ColV.Len() == 23);
00498     const int PatentId = ColV[PatIdCol].GetInt();
00499     const int GrantYear = ColV[GYearCol].GetInt();
00500     IAssert(! TimeNet.IsNode(PatentId));
00501     TimeNet.AddNode(PatentId, TSecTm(GrantYear)); // pretend year is a second
00502     if (++N % 100000 == 0) printf("\r  %dk", N/1000);
00503   }
00504   printf("\r  %d patents read. %s\n", N, ExeTm.GetTmStr());
00505   fclose(PatF);
00506   // load citations (file cite75_99.txt)
00507   printf("\nLoading patent citations...\n");
00508   int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0;
00509   N = 0;  ExeTm.Tick();
00510   TStr SrcId, DstId;
00511   FILE *CiteF = fopen(CiteFNm.CStr(), "rt");
00512   fgets(Line, 1024, CiteF); // skip 1st line
00513   while (! feof(CiteF)) {
00514     Line[0] = 0;
00515     fgets(Line, 1024, CiteF);
00516     if (strlen(Line) == 0) break;
00517     Line[strlen(Line)-1] = 0; // delete trailing '\n'
00518     TStr(Line).SplitOnCh(SrcId, ',', DstId);
00519     const int SrcNId = SrcId.GetInt();
00520     const int DstNId = DstId.GetInt();
00521     if (! TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) {
00522       //TimeNet.AddNode(SrcNId, TSecTm(1, 1, 1));  NewSrcIds++;
00523       //TimeNet.AddNode(DstNId, TSecTm(1, 1, 1));  NewDstIds++;
00524       NewCits++;
00525       continue;
00526     }
00527     else if (TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) {
00528       TimeNet.AddNode(DstNId, TimeNet.GetNDat(SrcNId));  NewDstIds++;
00529     }
00530     else if (! TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) {
00531       TimeNet.AddNode(SrcNId, TimeNet.GetNDat(DstNId));  NewSrcIds++;
00532     }
00533     if (! TimeNet.IsEdge(SrcNId, DstNId)) {
00534       TimeNet.AddEdge(SrcNId, DstNId);
00535     } else { DupLinks++; }
00536     if (++N % 100000 == 0) printf("\r  %dk", N/1000);
00537   }
00538   fclose(CiteF);
00539   printf("\r  %d citations read. %s\n\n", N, ExeTm.GetTmStr());
00540   printf("Graph: nodes: %d    edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00541   printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0));
00542   TIntV RmNIdV;
00543   for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) {
00544     if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); }
00545   }
00546   for (int i = 0; i < RmNIdV.Len(); i++) {
00547     TimeNet.DelNode(RmNIdV[i]);
00548   }
00549   TimeNet.Defrag(true);
00550   printf("\nFinal graph: nodes: %d    edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00551   printf("  Duplicate citations                    : %d\n", DupLinks);
00552   printf("  Citations between unknown time nodes   : %d\n", NewCits);
00553   printf("  Nodes without time which make citations: %d\n", NewSrcIds);
00554   printf("  Nodes without time which are cited     : %d\n", NewDstIds);
00555   return TimeNetPt;
00556 }
00557 
00558 // Amazon ShareTheLove: nodes are people, we add an edge when there is a first
00559 // recommendation between 2 people (we count each edge only once (regardless
00560 // of how many recommendations there were between 2 persons)
00561 PTimeNet TTimeNet::LoadAmazon(const TStr& StlFNm) {
00562   PTimeNet TimeNetPt = TTimeNet::New();
00563   TTimeNet& TimeNet = *TimeNetPt;
00564   TimeNet.Reserve(3953993, -1);
00565   printf("Amazon Share-the-Love...\n");
00566   char line [2024], MonthStr[4];
00567   int NLines=0;
00568   TStrV ColV;
00569   FILE *F = fopen(StlFNm.CStr(), "rt");
00570   while (! feof(F)) {
00571     memset(line, 0, 2024);
00572     fgets(line, 2024, F);
00573     if (strlen(line) == 0) break;
00574     TStr(line).SplitOnAllCh(',', ColV);
00575     const int SrcNId = ColV[0].GetInt();
00576     const int DstNId = ColV[1].GetInt();
00577     // time data
00578     TStr TmStr = ColV[2]; // time-format: 29JAN02:21:55:23
00579     int Year = TmStr.GetSubStr(5, 6).GetInt();
00580     if (Year < 10) { Year += 2000; } else { Year += 1900; }
00581     MonthStr[0]=toupper(TmStr[2]);  MonthStr[1]=tolower(TmStr[3]);
00582     MonthStr[2]=tolower(TmStr[4]);  MonthStr[3]=0;
00583     const int Month = TTmInfo::GetMonthN(MonthStr, lUs);
00584     const int Day = TmStr.GetSubStr(0, 1).GetInt();
00585     const int Hour = TmStr.GetSubStr(8, 9).GetInt();
00586     const int Min = TmStr.GetSubStr(11, 12).GetInt();
00587     const int Sec = TmStr.GetSubStr(14, 15).GetInt();
00588     // add nodes and links
00589     if (! TimeNet.IsNode(SrcNId)) { TimeNet.AddNode(SrcNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); }
00590     if (! TimeNet.IsNode(DstNId)) { TimeNet.AddNode(DstNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); }
00591     if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); }
00592     if (++NLines % 100000 == 0) printf("\r  %dk", NLines/1000);
00593   }
00594   fclose(F);
00595   printf("\r  %d lines read\n", NLines);
00596   printf("Graph: nodes: %d  edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00597   TimeNet.Defrag(true);
00598   return TimeNetPt;
00599 }
00600 
00602 // Time Node-Edge Network
00603 TTimeNENet& TTimeNENet::operator = (const TTimeNENet& TimeNet) {
00604   if (this != &TimeNet) {
00605     TNet::operator=(TimeNet);
00606   }
00607   return *this;
00608 }
00609 
00610 PTimeNet TTimeNENet::GetTimeNet() const {
00611   PTimeNet NewNet = TTimeNet::New();
00612   NewNet->Reserve(GetNodes(), -1);
00613   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00614     NewNet->AddNode(NI.GetId(), NI.GetDat());
00615   }
00616   for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
00617     const int src = EI.GetSrcNId();
00618     const int dst = EI.GetDstNId();
00619     if (! NewNet->IsEdge(src, dst)) {
00620       NewNet->AddEdge(src, dst); }
00621   }
00622   NewNet->Defrag();
00623   return NewNet;
00624 }
00625 
00626 // only take earliest edge between the nodes
00627 PTimeNENet TTimeNENet::Get1stEdgeNet() const {
00628   PTimeNENet Net = TTimeNENet::New();
00629   Net->Reserve(GetNodes(), -1);
00630   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00631     Net->AddNode(NI.GetId(), NI.GetDat()); }
00632   TIntV EIdV;  GetEIdByTm(EIdV);
00633   TIntPrSet EdgeSet(GetEdges());
00634   for (int edge = 0; edge < EIdV.Len(); edge++) {
00635     const TEdgeI EI = GetEI(EIdV[edge]);
00636     const int Src = EI.GetSrcNId();
00637     const int Dst = EI.GetDstNId();
00638     if (Src==Dst || EdgeSet.IsKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst)))) { continue; } // take only 1st edge
00639     EdgeSet.AddKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst)));
00640     Net->AddEdge(EI);
00641   }
00642   return Net;
00643 }
00644 
00645 PTimeNENet TTimeNENet::GetSubGraph(const TIntV& NIdV) const {
00646   PTimeNENet NewNetPt = TTimeNENet::New();
00647   TTimeNENet& NewNet = *NewNetPt;
00648   NewNet.Reserve(NIdV.Len(), -1);
00649   int node, edge;
00650   TNodeI NI;
00651   for (node = 0; node < NIdV.Len(); node++) {
00652     NewNet.AddNode(NIdV[node], GetNDat(NIdV[node]));
00653   }
00654   for (node = 0; node < NIdV.Len(); node++) {
00655     NI = GetNI(NIdV[node]);
00656     for (edge = 0; edge < NI.GetOutDeg(); edge++) {
00657       const TEdgeI EI = GetEI(NI.GetOutEId(edge));
00658       if (NewNet.IsNode(EI.GetDstNId())) {
00659         NewNet.AddEdge(EI); }
00660     }
00661   }
00662   NewNet.Defrag();
00663   return NewNetPt;
00664 }
00665 
00666 PTimeNENet TTimeNENet::GetESubGraph(const TIntV& EIdV) const {
00667   PTimeNENet NewNetPt = TTimeNENet::New();
00668   TTimeNENet& NewNet = *NewNetPt;
00669   NewNet.Reserve(-1, EIdV.Len());
00670   for (int edge = 0; edge < EIdV.Len(); edge++) {
00671     const TEdgeI Edge = GetEI(EIdV[edge]);
00672     if (! NewNet.IsNode(Edge.GetSrcNId()))
00673       NewNet.AddNode(GetNI(Edge.GetSrcNId()));
00674     if (! NewNet.IsNode(Edge.GetDstNId()))
00675       NewNet.AddNode(GetNI(Edge.GetDstNId()));
00676     NewNet.AddEdge(Edge);
00677   }
00678   NewNet.Defrag();
00679   return NewNetPt;
00680 }
00681 
00682 // assumes that the edges of the network are sorted in time
00683 PTimeNENet TTimeNENet::GetGraphUpToTm(const TSecTm& MaxEdgeTm) const {
00684   PTimeNENet NewNetPt = TTimeNENet::New();
00685   TTimeNENet& NewNet = *NewNetPt;
00686   TSecTm PrevTm;
00687   for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
00688     if (EI() > MaxEdgeTm) { break; }
00689     if (! NewNet.IsNode(EI.GetSrcNId()))
00690       NewNet.AddNode(GetNI(EI.GetSrcNId()));
00691     if (! NewNet.IsNode(EI.GetDstNId()))
00692       NewNet.AddNode(GetNI(EI.GetDstNId()));
00693     NewNet.AddEdge(EI);
00694     IAssert(! PrevTm.IsDef() || PrevTm <= EI()); // edge times must be sorted
00695     PrevTm = EI();
00696   }
00697   NewNet.Defrag();
00698   return NewNetPt;
00699 }
00700 
00701 void TTimeNENet::SortNodeEdgeTimes() {
00702   NodeH.SortByDat(true);
00703   EdgeH.SortByDat(true);
00704 }
00705 
00706 // node time must be smaller than times of incoming or outgoing edges
00707 void TTimeNENet::UpdateNodeTimes() {
00708   int Cnt = 0;
00709   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00710     TSecTm& NodeTm = NI();
00711     for (int edge = 0; edge < NI.GetOutDeg(); edge++) {
00712       const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge));
00713       if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
00714     }
00715     for (int edge = 0; edge < NI.GetInDeg(); edge++) {
00716       const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge));
00717       if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
00718     }
00719   }
00720   printf("Update node times: %d/%d updates\n", Cnt, GetNodes());
00721 }
00722 
00723 void TTimeNENet::SetNodeTmToFirstEdgeTm() {
00724   int Cnt = 0;
00725   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00726     if (NI.GetDeg() == 0) { continue; }
00727     TSecTm NodeTm;
00728     for (int edge = 0; edge < NI.GetOutDeg(); edge++) {
00729       const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge));  IAssert(EdgeTm.IsDef());
00730       if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
00731     }
00732     for (int edge = 0; edge < NI.GetInDeg(); edge++) {
00733       const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge));  IAssert(EdgeTm.IsDef());
00734       if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
00735     }
00736     GetNDat(NI.GetId()) = NodeTm;
00737   }
00738   printf("Node times set: %d/%d updates\n", Cnt, GetNodes());
00739 }
00740 
00741 // shuffle edge arrivals
00742 void TTimeNENet::SetRndEdgeTimes(const int& MinTmEdge) {
00743   printf("Shuffling last %d (%d%%) edge arrival times..\n", GetEdges()-MinTmEdge, int(100.0*(GetEdges()-MinTmEdge)/double(GetEdges())));
00744   TIntV RndEIdV;  GetEIdByTm(RndEIdV);
00745   TIntV TrueEIdV = RndEIdV;
00746   TSecTmV TrueTmV;
00747   const int SwapLen = RndEIdV.Len()-MinTmEdge;
00748   for (int R = 0; R < 10; R++) {
00749     for (int i = MinTmEdge; i < RndEIdV.Len(); i++) {
00750       RndEIdV.Swap(TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge, TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge); }
00751   }
00752   for (int e = 0; e < TrueEIdV.Len(); e++) {
00753     TrueTmV.Add(GetEDat(TrueEIdV[e])); }
00754   for (int e = 0; e < RndEIdV.Len(); e++) {
00755     GetEDat(RndEIdV[e]) = TrueTmV[e]; }
00756   UpdateNodeTimes();
00757 }
00758 
00759 void TTimeNENet::DumpTimeStat() const {
00760   TSecTm MnNodeTm, MxNodeTm;
00761   TSecTm MnEdgeTm, MxEdgeTm;
00762   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00763     const TSecTm NodeTm = NI();
00764     if (! MnNodeTm.IsDef() || MnNodeTm>NodeTm) { MnNodeTm = NodeTm; }
00765     if (! MxNodeTm.IsDef() || MxNodeTm<NodeTm) { MxNodeTm = NodeTm; }
00766   }
00767   printf("Node times:\n  %s\n  %s\n", MnNodeTm.GetStr().CStr(), MxNodeTm.GetStr().CStr());
00768   for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) {
00769     const TSecTm EdgeTm = EI();
00770     if (! MnEdgeTm.IsDef() || MnEdgeTm>EdgeTm) { MnEdgeTm = EdgeTm; }
00771     if (! MxEdgeTm.IsDef() || MxEdgeTm<EdgeTm) { MxEdgeTm = EdgeTm; }
00772   }
00773   printf("Edge times:\n  %s\n  %s\n", MnEdgeTm.GetStr().CStr(), MxEdgeTm.GetStr().CStr());
00774 }
00775 
00776 void TTimeNENet::GetNIdByTm(TIntV& NIdV) const {
00777   TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0);
00778   for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
00779     TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); }
00780   TmToNIdV.Sort();
00781   NIdV.Gen(GetNodes(), 0);
00782   for (int i = 0; i < TmToNIdV.Len(); i++) {
00783     NIdV.Add(TmToNIdV[i].Dat); }
00784 }
00785 
00786 void TTimeNENet::GetEIdByTm(TIntV& EIdV) const {
00787   TVec<TKeyDat<TSecTm, TInt> > TmToEIdV(GetEdges(), 0);
00788   for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) {
00789     TmToEIdV.Add(TKeyDat<TSecTm, TInt>(EI.GetDat(), EI.GetId())); }
00790   TmToEIdV.Sort();
00791   EIdV.Gen(GetEdges(), 0);
00792   for (int i = 0; i < TmToEIdV.Len(); i++) {
00793     EIdV.Add(TmToEIdV[i].Dat); }
00794 }
00795 
00796 void TTimeNENet::GetTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const {
00797   THash<TInt, TIntV> TmIdToNIdVH;
00798   TIntV NIdV;  GetNIdByTm(NIdV);
00799   for (int n = 0; n < NIdV.Len(); n++) {
00800     const int TmId = GetNDat(NIdV[n]).Round(TmUnit).GetAbsSecs();
00801     if (! TmIdToNIdVH.IsKey(TmId)) { TmIdToNIdVH.AddKey(TmId); }
00802     TmIdToNIdVH.GetDat(TmId).Add(NIdV[n]);
00803   }
00804   TVec<TPair<TInt, TIntV> > TmIdNIdVV;
00805   TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV);
00806   TmIdNIdVV.Sort();
00807   TmBucketV.Gen(TmIdNIdVV.Len());
00808   for (int i = 0; i < TmIdNIdVV.Len(); i++) {
00809     TTimeNet::TTmBucket& Bucket = TmBucketV[i];
00810     Bucket.BegTm = TmIdNIdVV[i].Val1;
00811     Bucket.NIdV = TmIdNIdVV[i].Val2;
00812   }
00813 }
00814 
00815 void TTimeNENet::GetEdgeTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const {
00816   THash<TInt, TIntV> TmIdToEIdVH;
00817   TIntV EIdV;  GetEIdByTm(EIdV);
00818   for (int e = 0; e < EIdV.Len(); e++) {
00819     const int TmId = GetEDat(EIdV[e]).Round(TmUnit).GetAbsSecs();
00820     if (! TmIdToEIdVH.IsKey(TmId)) { TmIdToEIdVH.AddKey(TmId); }
00821     TmIdToEIdVH.GetDat(TmId).Add(EIdV[e]);
00822   }
00823   TVec<TPair<TInt, TIntV> > TmIdEIdVV;
00824   TmIdToEIdVH.GetKeyDatPrV(TmIdEIdVV);
00825   TmIdEIdVV.Sort();
00826   TmBucketV.Gen(TmIdEIdVV.Len());
00827   for (int i = 0; i < TmIdEIdVV.Len(); i++) {
00828     TTimeNet::TTmBucket& Bucket = TmBucketV[i];
00829     Bucket.BegTm = TmIdEIdVV[i].Val1;
00830     Bucket.NIdV = TmIdEIdVV[i].Val2;
00831   }
00832 }
00833 
00834 void TTimeNENet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
00835   TIntV NIdV;  GetNIdByTm(NIdV);
00836   TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0);
00837   for (int i = 0; i < NIdV.Len(); i++) {
00838     const int b = i/NodesPerBucket;
00839     if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
00840     TmBucketV[b].NIdV.Add(NIdV[i]);
00841   }
00842 }
00843 
00844 void TTimeNENet::GetEdgeBuckets(const int EdgesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
00845   TIntV EIdV;  GetEIdByTm(EIdV);
00846   TmBucketV.Gen(EIdV.Len()/EdgesPerBucket + 1, 0);
00847   for (int i = 0; i < EIdV.Len(); i++) {
00848     const int b = i/EdgesPerBucket;
00849     if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
00850     TmBucketV[b].NIdV.Add(EIdV[i]);
00851   }
00852 }
00853 
00854 // get edges that close triangles over time
00855 int TTimeNENet::GetTriadEdges(TIntV& TriadEIdV) const {
00856   PUNGraph Graph = TUNGraph::New(GetNodes(), GetEdges());
00857   TIntV EIdV;  GetEIdByTm(EIdV);
00858   TriadEIdV.Clr();
00859   TExeTm ExeTm;
00860   for (int edge = 0; edge < EIdV.Len(); edge++) {
00861     const TEdgeI EI = GetEI(EIdV[edge]);
00862     const int Src = EI.GetSrcNId();
00863     const int Dst = EI.GetDstNId();
00864     if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
00865     if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
00866     if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
00867     if (TSnap::GetCmnNbrs(Graph, Src, Dst) > 0) { TriadEIdV.Add(EIdV[edge]); }
00868     Graph->AddEdge(Src, Dst);
00869     if (edge % 10000 == 0) {
00870       printf("\redges %dk / %dk: triangle edges: %dk [total %s]", edge/1000, EIdV.Len()/1000,
00871         TriadEIdV.Len()/1000, ExeTm.GetStr()); }
00872   }
00873   return Graph->GetEdges();
00874 }
00875 
00876 PGStatVec TTimeNENet::TimeGrowth(const TTmUnit& TimeStep, const TFSet& TakeStat, const TSecTm& StartTm) const {
00877   TExeTm ExeTm;
00878   PGStatVec GStatVec = TGStatVec::New(TimeStep, TakeStat);
00879   TTimeNet::TTmBucketV TmBucketV;
00880   GetEdgeTmBuckets(TimeStep, TmBucketV);
00881   const PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this));
00882   TIntV EdgeIdV;
00883   for (int t = 0; t < TmBucketV.Len(); t++) {
00884     EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
00885     printf("\n***%d/%d: %s (%d edges) ", t+1, TmBucketV.Len(), TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len());  ExeTm.Tick();
00886     if (TmBucketV[t].BegTm < StartTm) { continue; }
00887     const PNEGraph PreGraph = TSnap::GetESubGraph(FullGraph, EdgeIdV);
00888     GStatVec->Add(PreGraph, TmBucketV[t].BegTm);
00889     printf("  [%s]\n", ExeTm.GetTmStr());
00890     //{ TFOut FOut("LinkedIn.GStatVec");  GStatVec->Save(FOut); }
00891   }
00892   return GStatVec;
00893 }
00894 
00895 // take network from last TakeNTmUnits and measure properties
00896 PGStatVec TTimeNENet::TimeGrowth(const TStr& FNmPref, const TStr& Desc, const TFSet& TakeStat, const int& NDiamRuns,
00897                             const TTmUnit& TmUnit, const int& TakeNTmUnits, const bool& LinkBWays) const {
00898   TGStat::NDiamRuns = NDiamRuns;
00899   PGStatVec GrowthStat = TGStatVec::New(TmUnit, TakeStat);
00900   TTimeNet::TTmBucketV TmBucketV;
00901   GetEdgeTmBuckets(TmUnit, TmBucketV);
00902   TIntV EdgeIdV;
00903   TExeTm ExeTm;
00904   for (int t = 0; t < TmBucketV.Len(); t++) {
00905     // take graph over last TakeNTmUnits time units
00906     if (TakeNTmUnits == -1) {
00907       EdgeIdV.AddV(TmBucketV[t].NIdV); }
00908     else {
00909       if (t < TakeNTmUnits) { continue; }
00910       EdgeIdV.Clr(false);
00911       for (int i = t-TakeNTmUnits; i < t; i++) { EdgeIdV.AddV(TmBucketV[i].NIdV); }
00912     }
00913     printf("*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len());  ExeTm.Tick();
00914     PNEGraph PreGraph = TSnap::ConvertESubGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this), EdgeIdV);
00915     if (LinkBWays) {
00916       TIntV KeepEIdV; // keep only nodes that have in- and out-links (send and receive email)
00917       for (TNEGraph::TEdgeI EI = PreGraph->BegEI(); EI < PreGraph->EndEI(); EI++) {
00918         if (PreGraph->IsEdge(EI.GetDstNId(), EI.GetSrcNId(), true)) { KeepEIdV.Add(EI.GetId()); }
00919       }
00920       PreGraph = TSnap::GetESubGraph(PreGraph, KeepEIdV);
00921     }
00922     // take statistics
00923     GrowthStat->Add(PreGraph, TmBucketV[t].BegTm);
00924     { TFOut FOut(TStr::Fmt("growth.%s.gStatVec", FNmPref.CStr()));
00925     GrowthStat->Save(FOut); }
00926     GrowthStat->SaveTxt(FNmPref, Desc);
00927     printf("  [%s]\n", ExeTm.GetTmStr());
00928   }
00929   return GrowthStat;
00930 }
00931 
00932 void TTimeNENet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
00933                              const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc) const {
00934   TTimeNet::TTmBucketV TmBucketV;
00935   GetEdgeTmBuckets(TmUnit, TmBucketV);
00936   PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this));
00937   TIntV EdgeIdV;
00938   TExeTm ExeTm, Run1Tm;
00939   TFltTrV TmDiamV, NdsDiamV;
00940   for (int t = 0; t < TmBucketV.Len(); t++) {
00941     EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
00942     printf("\n*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), EdgeIdV.Len());  ExeTm.Tick();
00943     if (TmBucketV[t].BegTm < StartTm) continue;
00944     PNGraph PreGraph = TSnap::ConvertESubGraph<PNGraph>(FullGraph, EdgeIdV);
00945     TMom Mom;
00946     double EffDiam = 0.0;
00947     for (int r = 0; r < NDiamRuns; r++) {
00948       printf("%d...", r+1);  Run1Tm.Tick();
00949       if (OnlyWcc) { EffDiam = TSnap::GetAnfEffDiam(TSnap::GetMxWcc(PreGraph)); }
00950       else { EffDiam = TSnap::GetAnfEffDiam(PreGraph); }
00951       Mom.Add(EffDiam);
00952       printf("[%s]\r", Run1Tm.GetTmStr());
00953     }
00954     Mom.Def();
00955     TmDiamV.Add(TFltTr(TmBucketV[t].BegTm.Round(TmUnit).GetAbsSecs(), Mom.GetMean(), Mom.GetSDev()));
00956     NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
00957     NdsDiamV.Sort();
00958     printf("  [%s]          \n", ExeTm.GetTmStr());
00959     const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr();
00960     { TGnuPlot GnuPlot("diamEff1."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns));
00961     GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), "AVERAGE "+WccStr+"Effective Diameter");
00962     GnuPlot.AddErrBar(TmDiamV, "", "");
00963     GnuPlot.SavePng(); }
00964     { TGnuPlot GnuPlot("diamEff2."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns));
00965     GnuPlot.SetXYLabel("NODES", "AVERAGE "+WccStr+"Effective Diameter");
00966     GnuPlot.AddErrBar(NdsDiamV, "", "");
00967     GnuPlot.SavePng(); }
00968   }
00969 }
00970 
00971 // pretend we only have link data starting in PostTmDiam
00972 // compare the diameter of the nodes after PostTmDiam with diameter
00973 // of the nodes after PostTmDiam but *also* using nodes and edges
00974 // from before PostTmDiam
00975 void TTimeNENet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
00976                                  const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam, const bool& LinkBWays) {
00977   printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n  %s group by %s.\n",
00978     FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr());
00979   printf("  Delete out-edges of pre time %s nodes.\n  Take subgraph of post year %s subgraph.\n\n",
00980     DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr());
00981   // run diameter
00982   /*const int NSamples = 100;
00983   const int NDiamRuns = 10;
00984   // delete past
00985   if (DelPreEdges != -1) {
00986     TIntV EdgeIdV;
00987     printf("Deleting pre-year %d edges\n", DelPreEdges);
00988     for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
00989       if (EI().GetYear() < DelPreEdges) { EdgeIdV.Add(EI.GetId()); } }
00990     for (int e = 0; e < EdgeIdV.Len(); e++) { DelEdge(EdgeIdV[e]); }
00991     printf("  Deleted %d edges (out of %d edges total).\n", EdgeIdV.Len(), GetEdges());
00992   }
00993   PNEGraph FullGraph = GetNEGraph();
00994   TSnap::DelZeroDegNodes(FullGraph);
00995 
00996   PGrowthStat GrowthStat = TGrowthStat::New(TmUnit, TGraphStat::AllStat());
00997   TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev;
00998   TIntV EdgeIdV;
00999   TExeTm ExeTm;
01000   TTimeNet::TTmBucketV EdgeTmBucketV;
01001   GetEdgeTmBuckets(TmUnit, EdgeTmBucketV);
01002   for (int t = 0; t < EdgeTmBucketV.Len(); t++) {
01003     printf("\nGraph: %s (%d / %d) [%s]\n", EdgeTmBucketV[t].BegTm.GetTmStr().CStr(),
01004       t+1, EdgeTmBucketV.Len(), TExeTm::GetCurTm());
01005     // up-to-year subgraph
01006     EdgeIdV.AddV(EdgeTmBucketV[t].NIdV); // nodes up to time T
01007     if (EdgeTmBucketV[t].BegTm.GetYear() < PostYearDiam) continue;
01008 
01009     const PNEGraph PreNEGraph = EdgeBothWays ? FullGraph->GetEdgeSubGraph(EdgeIdV) : FullGraph;
01010     PNGraph PreGraph = PreNEGraph->GetBWayGraph();
01011     PNGraph WccGraph = TSnap::GetMxWcc(PreGraph);
01012 
01013     // find nodes that are not in missing past
01014     TIntV PostYearNIdV, WccPostYearNIdV;
01015     THash<TIntPr, TInt> PostYearEdgeH;
01016     for (TNEGraph::TEdgeI EI = PreNEGraph->BegEI(); EI < PreNEGraph->EndEI(); EI++) {
01017       if (GetEDat(EI.GetId()).GetYear() >= PostYearDiam) {
01018         PostYearEdgeH.AddKey(TIntPr(EI.GetSrcNId(), EI.GetDstNId())); }
01019     }
01020     TIntH PostYearNIdH;
01021     for (int i = 0; i < PostYearEdgeH.Len(); i++) {
01022       if ((! EdgeBothWays) || (EdgeBothWays && PostYearEdgeH.IsKey(TIntPr(PostYearEdgeH.GetKey(i).Val2, PostYearEdgeH.GetKey(i).Val1)))) { //reverse edge exists
01023         PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val1);
01024         PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val2);
01025       }
01026     }
01027     PostYearNIdH.GetKeyV(PostYearNIdV);
01028     for (int i = 0; i < PostYearNIdV.Len(); i++) {
01029       if (WccGraph->IsNode(PostYearNIdV[i])) {
01030         WccPostYearNIdV.Add(PostYearNIdV[i]); }
01031     }
01032 
01033     // diameter of PostYearDiam subgraph using whole graph (all edges)
01034     TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom;
01035     for (int r = 0; r < NDiamRuns; r++) {
01036       if (! PostYearNIdV.Empty()) {
01037         PreDiamMom.Add(-1); //PreDiamMom.Add(TSnap::GetBfsFullDiam(PreGraph, NSamples, PostYearNIdV, false));
01038         PreEffDiamMom.Add(TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false));
01039       }
01040       if (! WccPostYearNIdV.Empty()) {
01041         //WccDiamMom.Add(TSnap::GetBfsFullDiam(WccGraph, NSamples, WccPostYearNIdV, false));
01042         //WccEffDiamMom.Add(TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false));
01043         WccDiamMom.Add(-1);  WccEffDiamMom.Add(-1);
01044       }
01045       printf("  diam: %d  [%s]  %.2f\r", r+1, ExeTm.GetTmStr(), PreEffDiamMom.GetValV().Last().Val);  ExeTm.Tick();
01046     }
01047     PreDiamMom.Def();  PreEffDiamMom.Def();
01048     WccDiamMom.Def();  WccEffDiamMom.Def();
01049     // save stat
01050     PGraphStat GraphStatPt = GrowthStat->Add(EdgeTmBucketV[t].BegTm);
01051     TGraphStat& GraphStat = *GraphStatPt;
01052     GraphStat.Nodes = PreGraph->GetNodes();
01053     GraphStat.NonZNodes = PreGraph->GetNodes() - TSnap::CntDegNodes<PNGraph>(PreGraph, 0);
01054     GraphStat.Srcs =  GraphStat.Nodes - TSnap::CntOutDegNodes<PNGraph>(PreGraph, 0);
01055     GraphStat.Edges = PreGraph->GetEdges();
01056     GraphStat.WccNodes= WccGraph->GetNodes();
01057     GraphStat.WccEdges = WccGraph->GetEdges();
01058     GraphStat.FullDiam = PreDiamMom.GetMean(); // mean
01059     GraphStat.EffDiam = PreEffDiamMom.GetMean();
01060     GraphStat.FullWccDiam = WccDiamMom.GetMean();
01061     GraphStat.EffWccDiam = WccEffDiamMom.GetMean();
01062     GraphStat.FullDiamDev = PreDiamMom.GetSDev(); // variance
01063     GraphStat.EffDiamDev = PreEffDiamMom.GetSDev();
01064     GraphStat.FullWccDiamDev = WccDiamMom.GetSDev();
01065     GraphStat.EffWccDiamDev = WccEffDiamMom.GetSDev();
01066     { TFOut FOut("growth."+FNmPref+".bin");
01067     GrowthStat->Save(FOut); }
01068     const TStr BigDesc = TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%d\nPostYearDiam\t%d\n",
01069       Desc.CStr(), DelPreEdges, PostYearDiam);
01070     GrowthStat->SaveTxt(FNmPref, BigDesc);
01071   }
01072   // diameter plots
01073   GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.",
01074     DelPreEdges, PostYearDiam));
01075     */
01076 }
01077 
01078 // plot time difference of the node ages when an edge arrives
01079 /*void TTimeNENet::PlotEdgeNodeTimeDiff(const TStr& FNmPref, TStr Desc) const {
01080   if (Desc.Empty()) { Desc = FNmPref; }
01081   printf("Edge node time differences:\n");
01082   THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge
01083   THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst)
01084   PUNGraph Graph = TUNGraph::New(GetNodes(), -1);
01085   TIntV EIdV;  GetEIdByTm(EIdV);
01086   TIntH DegCntH(10, true);
01087   TExeTm ExeTm;
01088   for (int edge = 0; edge < EIdV.Len(); edge++) {
01089     const TEdgeI EI = GetEI(EIdV[edge]);
01090     const int Src = EI.GetSrcNId();
01091     const int Dst = EI.GetDstNId();
01092     if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
01093     if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
01094     if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
01095     const int SrcDeg = Graph->GetNode(Src).GetOutDeg();
01096     const int DstDeg = Graph->GetNode(Dst).GetInDeg();
01097     const int EdgeTm = EI.GetDat().GetAbsSecs();
01098     const int SrcTm = EI.GetSrcNDat().GetAbsSecs();
01099     const int DstTm = EI.GetDstNDat().GetAbsSecs();
01100     NodeAgeH.AddDat((DstTm-SrcTm)/3600) += 1;
01101     if (SrcDeg == 0 || DstDeg == 0) {
01102       Node1AgeH.AddDat((DstTm-SrcTm)/3600) += 1; }
01103     SrcTmH.AddDat((EdgeTm-SrcTm)/3600) += 1;
01104     DstTmH.AddDat((EdgeTm-DstTm)/3600) += 1;
01105     // add edge
01106     Graph->AddEdge(Src, Dst);
01107     if (edge % 1000 == 0) {
01108       printf("\r%dk / %dk [total %s]", edge/1000, EIdV.Len()/1000, ExeTm.GetStr()); }
01109   }
01110   TGnuPlot::PlotValCntH(NodeAgeH, "tmDstSrc."+FNmPref, TStr::Fmt("%s, Node time difference.", Desc.CStr()),
01111     "Time(destination) - Time(source) [hours]", "Count");
01112   TGnuPlot::PlotValCntH(Node1AgeH, "tmDstSrc1."+FNmPref, TStr::Fmt("%s. 1-ST EDGE of one of the nodes.", Desc.CStr()),
01113     "Time(destination) - Time(source) [hours]", "Count");
01114   TGnuPlot::PlotValCntH(SrcTmH, "tmEdgeSrc."+FNmPref, TStr::Fmt("%s. Edge time - source node time.", Desc.CStr()),
01115     "Edge time - source node time [hours]", "Count");
01116   TGnuPlot::PlotValCntH(DstTmH, "tmEdgeDst."+FNmPref, TStr::Fmt("%s. Edge time - destination node time.", Desc.CStr()),
01117     "Edge time - destination node time [hours]", "Count");
01118   printf("\n");
01119 }
01120 
01121 // plot the distribution whether the edge to node that closes the triad was
01122 // chosen random or preferentially
01123 void TTimeNENet::PlotCloseTriad(const TStr& FNmPref, TStr Desc, const bool& OnlyBackEdges) const {
01124   if (Desc.Empty()) { Desc = FNmPref; }
01125   printf("Random vs. preferential triad closing: %s\n", OnlyBackEdges?"OnlyBackEdges":"");
01126   THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge
01127   THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst)
01128   PUNGraph Graph = TUNGraph::New(GetNodes(), -1);
01129   TIntV EIdV;  GetEIdByTm(EIdV);
01130   TIntV CmnV, SrcNbrV, DstNbrV;
01131   TExeTm ExeTm;
01132   TFltV RndRdnV, PrefPrefV, PrefRndV, RndPrefV;
01133   TFltFltH RndRndH, PrefPrefH, PrefRndH, RndPrefH;
01134   int TriadEdges = 0;
01135   FILE *F = fopen(TStr::Fmt("%s-prob.tab", FNmPref.CStr()).CStr(), "wt");
01136   fprintf(F, "#Probability of picking a node that closes the triangle (pick preferentially vs. uniformly at random)\n");
01137   fprintf(F, "#i\tEId\tRndRnd\tRndPref\tPrefRnd\tPrefPref\n");
01138   for (int edge = 0; edge < EIdV.Len(); edge++) {
01139     const TEdgeI EI = GetEI(EIdV[edge]);
01140     const int Src = EI.GetSrcNId();
01141     const int Dst = EI.GetDstNId();
01142     if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
01143     if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
01144     if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
01145     if (OnlyBackEdges && EI.GetSrcNDat().GetAbsSecs()-EI.GetDstNDat().GetAbsSecs() < 0) {
01146       Graph->AddEdge(Src, Dst);  continue;
01147     }
01148     // find common neighbors
01149     const TUNGraph::TNodeI SrcNI = Graph->GetNI(Src);
01150     const TUNGraph::TNodeI DstNI = Graph->GetNI(Dst);
01151     SrcNbrV.Clr(false);  DstNbrV.Clr(false);
01152     for (int i = 0; i < SrcNI.GetOutDeg(); i++) { SrcNbrV.Add(SrcNI.GetOutNId(i)); }
01153     for (int i = 0; i < DstNI.GetOutDeg(); i++) { DstNbrV.Add(DstNI.GetOutNId(i)); }
01154     SrcNbrV.Sort();  DstNbrV.Sort();
01155     SrcNbrV.Intrs(DstNbrV, CmnV);
01156     if (! CmnV.Empty()) {
01157       // triad completion
01158       const int SrcDeg = Graph->GetNode(Src).GetOutDeg();
01159       const int DstDeg = Graph->GetNode(Dst).GetInDeg();
01160       double RndRnd=0, RndPref=0, PrefRnd=0, PrefPref=0;
01161       int AllNbrDegSum=0;//, NbrDegSum = 0;
01162       //for (int i = 0; i < CmnV.Len(); i++) {
01163       //  NbrDegSum += Graph->GetNode(CmnV[i]).GetOutDeg(); }
01164       for (int i = 0; i < SrcNI.GetOutDeg(); i++) {
01165         AllNbrDegSum += Graph->GetNode(SrcNI.GetOutNId(i)).GetOutDeg(); }
01166       // uniform-uniform node selection = 1/|cmn| sum_{i\in cmn} 1/(d_i-1)
01167       // preferential-uniform
01168       for (int i = 0; i < CmnV.Len(); i++) {
01169         const int Deg = Graph->GetNode(CmnV[i]).GetOutDeg();
01170         RndRnd += (1.0/double(SrcDeg)) * (1.0/double(Deg-1));
01171         PrefRnd +=  (Deg/double(AllNbrDegSum)) * (1.0/double(Deg-1));
01172       }
01173       // uniform-preferential selection = 1/|cmn| sum_{i\in cmn} d_t / sum_{i--j} d_j
01174       // preferential-preferential
01175       for (int i = 0; i < CmnV.Len(); i++) {
01176         const TUNGraph::TNode N = Graph->GetNode(CmnV[i]);
01177         const int Deg = N.GetOutDeg();
01178         int DegSum = 0;
01179         for (int j = 0; j < N.GetOutDeg(); j++) {
01180           if (N.GetOutNId(j) != Src) {
01181             DegSum += Graph->GetNode(N.GetOutNId(j)).GetOutDeg(); }
01182         }
01183         RndPref += 1.0/double(SrcDeg) * DstDeg/double(DegSum);
01184         PrefPref += (Deg/double(AllNbrDegSum)) * (DstDeg/double(DegSum));
01185       }
01186       IAssert(0.0<RndRnd && RndRnd<=1.0);   IAssert(0.0<RndPref && RndPref<=1.0);
01187       IAssert(0.0<PrefRnd && PrefRnd<=1.0);  IAssert(0.0<PrefPref && PrefPref<=1.0);
01188       RndRndH.AddDat(TMath::Round(RndRnd, 2)) += 1.0;
01189       RndPrefH.AddDat(TMath::Round(RndPref, 2)) += 1.0;
01190       PrefRndH.AddDat(TMath::Round(PrefRnd, 2)) += 1.0;
01191       PrefPrefH.AddDat(TMath::Round(PrefPref, 2)) += 1.0;
01192       fprintf(F, "%d\t%d\t%g\t%g\t%g\t%g\n", edge, EI.GetId(), RndRnd, RndPref, PrefRnd, PrefPref);
01193       TriadEdges++;
01194     }
01195     Graph->AddEdge(Src, Dst);
01196     if (edge % 1000 == 0) {
01197       printf("\r%dk / %dk triad edges %dk [total %s]", edge/1000, EIdV.Len()/1000, TriadEdges/1000, ExeTm.GetStr()); }
01198   }
01199   fclose(F);
01200   printf("\nRandom vs. preferential triad closing\n");
01201   printf("All edges:            %d\n", Graph->GetEdges());
01202   printf("Triad closing edges:  %d\n", TriadEdges);
01203   // plot distribution
01204   TFltPrV RndRndPrV;   RndRndH.GetKeyDatPrV(RndRndPrV);     RndRndPrV.Sort();
01205   TFltPrV RndPrefPrV;  RndPrefH.GetKeyDatPrV(RndPrefPrV);   RndPrefPrV.Sort();
01206   TFltPrV PrefRndPrV;  PrefRndH.GetKeyDatPrV(PrefRndPrV);   PrefRndPrV.Sort();
01207   TFltPrV PrefPrefPrV; PrefPrefH.GetKeyDatPrV(PrefPrefPrV); PrefPrefPrV.Sort();
01208   { TGnuPlot GP(TStr::Fmt("probPdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges()));
01209   GP.AddPlot(RndRndPrV, gpwLinesPoints, "Random--Random");
01210   GP.AddPlot(RndPrefPrV, gpwLinesPoints, "Random--Preferential");
01211   GP.AddPlot(PrefRndPrV, gpwLinesPoints, "Preferential--Random");
01212   GP.AddPlot(PrefPrefPrV, gpwLinesPoints, "Preferential--Preferential");
01213   GP.SetXYLabel("Probability", "Count");
01214   GP.SavePng(); }
01215   { TGnuPlot GP(TStr::Fmt("probCdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges()));
01216   GP.AddPlot(TGUtil::GetCdf(RndRndPrV), gpwLinesPoints, "Random--Random");
01217   GP.AddPlot(TGUtil::GetCdf(RndPrefPrV), gpwLinesPoints, "Random--Preferential");
01218   GP.AddPlot(TGUtil::GetCdf(PrefRndPrV), gpwLinesPoints, "Preferential--Random");
01219   GP.AddPlot(TGUtil::GetCdf(PrefPrefPrV), gpwLinesPoints, "Preferential--Preferential");
01220   GP.SetXYLabel("Cumulative probability", "Count");
01221   GP.SavePng(); }
01222 }*/
01223 
01224 PTimeNENet TTimeNENet::GetGnmRndNet(const int& Nodes, const int& Edges) {
01225   printf("Generating G_nm(%d, %d)\n", Nodes, Edges);
01226   int Src, Dst;
01227   PTimeNENet Net = TTimeNENet::New();
01228   Net->Reserve(Nodes, Edges);
01229   for (int e = 0; e < Edges; e++) {
01230     Src = TInt::Rnd.GetUniDevInt(Nodes);
01231     Dst = TInt::Rnd.GetUniDevInt(Nodes);
01232     while (Dst == Src || Net->IsEdge(Src, Dst)) {
01233       Dst = TInt::Rnd.GetUniDevInt(Nodes); }
01234     if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(e)); }
01235     if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(e)); }
01236     Net->AddEdge(Src, Dst, -1, TSecTm(e));
01237   }
01238   return Net;
01239 }
01240 
01241 // ACL, model B: Aiello, Chung, Lu: Random evolution in massive graphs
01242 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& Edges, const double& GammaIn, const double& GammaOut) {
01243   const double Alpha = Nodes/double(Edges);
01244   printf("Generating PA(%d, %d), with slope in:%.1f, out: %.1f\n", Nodes, Edges,
01245     2+GammaIn/(Alpha/(1-Alpha)), 2+GammaOut/(Alpha/(1-Alpha)));
01246   // init
01247   int nodes=0, edges=0, time=0, iter=0;
01248   TIntV OutW(Edges, 0), InW(Edges, 0);
01249   PTimeNENet Net = TTimeNENet::New();
01250   Net->Reserve(Nodes, Edges);
01251   // 1st node
01252   Net->AddNode(0, TSecTm(time++));  nodes++;
01253   OutW.Add(0);  InW.Add(0);
01254   while (edges < Edges) {
01255     int Src=-1, Dst=-1;  iter++;
01256     if (TInt::Rnd.GetUniDev() < Alpha) {
01257       if (nodes < Nodes) {
01258         IAssert(Net->AddNode(nodes, TSecTm(time++)));
01259         nodes++; }
01260     } else {
01261       if (TInt::Rnd.GetUniDev() < nodes*GammaIn/double(edges+nodes*GammaIn)) {
01262         Src = TInt::Rnd.GetUniDevInt(nodes); }
01263       else { Src = OutW[TInt::Rnd.GetUniDevInt(OutW.Len())]; }
01264       if (TInt::Rnd.GetUniDev() < nodes*GammaOut/double(edges+nodes*GammaOut)) {
01265         Dst = TInt::Rnd.GetUniDevInt(nodes); }
01266       else { Dst = InW[TInt::Rnd.GetUniDevInt(InW.Len())]; }
01267     }
01268     if (Src == Dst || Net->IsEdge(Src, Dst)) {
01269       continue;
01270     }
01271     //printf("%d/%d %d %d\n", edges, iter, Src, Dst);
01272     if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(time++)); nodes++; }
01273     if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(time++)); nodes++; }
01274     Net->AddEdge(Src, Dst, -1, TSecTm(time++));
01275     OutW.Add(Src); InW.Add(Dst); edges++;
01276   }
01277   for (int node = 0; node < Nodes; node++) {
01278     if (! Net->IsNode(node)) {
01279       Net->AddNode(node, TSecTm(time++)); }
01280   }
01281   return Net;
01282 }
01283 
01284 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& OutDeg) {
01285   printf("Generating PA, nodes:%d, out-deg:%d\n", Nodes, OutDeg);
01286   // init
01287   int time=0;
01288   PTimeNENet Net = TTimeNENet::New();
01289   Net->Reserve(Nodes, OutDeg*Nodes);
01290   Net->AddNode(0, TSecTm(++time));  Net->AddNode(1, TSecTm(++time));
01291   Net->AddEdge(0, 1, -1, TSecTm(++time));
01292   TIntV NIdV;  NIdV.Add(0);  NIdV.Add(1);
01293   TIntSet NodeSet;
01294   for (int node = 2; node <= Nodes; node++) {
01295     NodeSet.Clr(false);
01296     while (NodeSet.Len() < OutDeg && NodeSet.Len() < node) {
01297       NodeSet.AddKey(NIdV[TInt::Rnd.GetUniDevInt(NIdV.Len())]);
01298     }
01299     const int N = Net->AddNode(node, TSecTm(++time));
01300     for (int i = 0; i < NodeSet.Len(); i++) {
01301       Net->AddEdge(node, NodeSet[i], -1, TSecTm(++time));
01302       NIdV.Add(N);  NIdV.Add(NodeSet[i]);
01303     }
01304   }
01305   return Net;
01306 }
01307 
01308 void TTimeNENet::SaveEdgeTm(const TStr& EdgeFNm, const bool& RenumberNId, const bool& RelativeTm) const {
01309   TIntV EIdV;  GetEIdByTm(EIdV);
01310   const int BegTm = RelativeTm ? GetEDat(EIdV[0]).GetAbsSecs() : 0;
01311   TIntSet NIdMap;
01312   if (RenumberNId) { NIdMap.Gen(GetNodes()); }
01313   FILE *F = fopen(EdgeFNm.CStr(), "wt");
01314   //fprintf(F, "#Nodes\t%d\n#Edges\t%d\n", GetNodes(), GetEdges());
01315   //fprintf(F, "#<src>\t<dst>\t<time>\n");
01316   for (int e =0; e < EIdV.Len(); e++) {
01317     const TEdgeI EI = GetEI(EIdV[e]);
01318     if (RenumberNId) {
01319       const int src = EI.GetSrcNId();
01320       const int dst = EI.GetDstNId();
01321       NIdMap.AddKey(src);  NIdMap.AddKey(dst);
01322       fprintf(F, "%d\t%d\t%d\n", NIdMap.GetKeyId(src), NIdMap.GetKeyId(dst), EI().GetAbsSecs()-BegTm);
01323     }else {
01324       fprintf(F, "%d\t%d\t%d\n", EI.GetSrcNId(), EI.GetDstNId(), EI().GetAbsSecs()-BegTm); }
01325   }
01326   fclose(F);
01327 }
01328 
01329 PTimeNENet TTimeNENet::GetSmallNet() {
01330   PTimeNENet Net = TTimeNENet::New();
01331   for (int i = 1; i <= 6; i++) {
01332     Net->AddNode(i, TSecTm(0)); }
01333   int tm = 1;
01334   Net->AddEdge(1, 2, -1, TSecTm(tm++));
01335   Net->AddEdge(3, 4, -1, TSecTm(tm++));
01336   Net->AddEdge(3, 1, -1, TSecTm(tm++));
01337   Net->AddEdge(5, 6, -1, TSecTm(tm++));
01338   Net->AddEdge(6, 4, -1, TSecTm(tm++));
01339   Net->AddEdge(5, 3, -1, TSecTm(tm++));
01340   Net->AddEdge(5, 4, -1, TSecTm(tm++));
01341   Net->AddEdge(5, 2, -1, TSecTm(tm++));
01342   return Net;
01343 }
01344 
01345 PTimeNENet TTimeNENet::LoadFlickr(const TStr& NodeFNm, const TStr& EdgeFNm) {
01346   const int BegOfTm = 1047369600; // Tue Mar 11 01:00:00 2003 (begining of Flickr)
01347   PTimeNENet Net = TTimeNENet::New();
01348   printf("Adding nodes...");
01349   { TSsParser Ss(NodeFNm, ssfWhiteSep);
01350   while (Ss.Next()) {
01351     const int NId = Ss.GetInt(0);
01352     const int Tm = Ss.GetInt(1)+BegOfTm;
01353     if (TSecTm(Tm) < TSecTm(2002, 1, 1)) {
01354       printf("  skip node %g (time %d)\n", (double) Ss.GetLineNo(), Ss.GetInt(1)); continue; }
01355     Net->AddNode(NId, TSecTm(Tm));
01356   } }
01357   printf(" %d nodes\n", Net->GetNodes());
01358   printf("Adding edges...");
01359   int SkipCnt=0;
01360   //TIntH SkipDiffCnt;
01361   { TSsParser Ss(EdgeFNm, ssfWhiteSep);
01362   while (Ss.Next()) {
01363     const int NId1 = Ss.GetInt(0);
01364     const int NId2 = Ss.GetInt(1);
01365     const TSecTm Tm = TSecTm(Ss.GetInt(2)+BegOfTm);
01366     if (! Net->IsNode(NId1) || ! Net->IsNode(NId2)) { printf("not node\n"); continue; }
01367     if (Tm < TSecTm(2002, 1, 1)) { SkipCnt++;
01368       printf("  skip edge %g (time %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr()); continue; }
01369     if (Tm+600 < Net->GetNDat(NId1)) {
01370       printf("  1:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId1).GetStr().CStr());
01371       //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId1).GetAbsSecs()) += 1;
01372       SkipCnt++;  continue; }
01373     if (Tm+600 < Net->GetNDat(NId2)) { SkipCnt++;
01374       printf("  2:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId2).GetStr().CStr());
01375       //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId2).GetAbsSecs()) += 1;
01376       SkipCnt++;  continue; }
01377     Net->AddEdge(NId1, NId2, -1, TSecTm(Tm));
01378   } }
01379   //TGnuPlot::PlotValCntH(SkipDiffCnt, "flickr-edgeNodeDiff", "", "seconds", "count");
01380   printf("  %d edges\n", Net->GetEdges());
01381   printf("  %d edges skipped (edge time < node time)\n", SkipCnt);
01382   Net->UpdateNodeTimes();
01383   return Net;
01384 }
01385 
01386 // load network where fields are separated by Separator and each line contains (*Fld are column indexes)
01387 // .. <source> .. <destination> .. <time>
01388 PTimeNENet TTimeNENet::LoadEdgeTm(const TStr& EdgeFNm, const int& SrcFld, const int& DstFld, const int& TimeFld, const TSsFmt& Separator) {
01389   printf("Loading %s\n", EdgeFNm.CStr());
01390   PTimeNENet Net = TTimeNENet::New();
01391   TStrHash<TInt> StrToId(Mega(1), true); // node id to string
01392   int LineCnt=0;
01393   TExeTm ExeTm;
01394   TSsParser Ss(EdgeFNm, Separator);
01395   TSecTm MinTm=TSecTm::GetCurTm(), MaxTm=TSecTm(100);
01396   while (Ss.Next()) {
01397     if (Ss.IsCmt()) { continue; }
01398     IAssert(Ss.Len() > TimeFld);
01399     const char* Node1 = Ss.GetFld(SrcFld);
01400     const char* Node2 = Ss.GetFld(DstFld);
01401     const char* TmStr = Ss.GetFld(TimeFld);
01402     if (strcmp(TmStr,"NULL")==0) { continue; }
01403     //const TSecTm Tm = TSecTm::GetDtTmFromYmdHmsStr(TmStr);
01404     const TSecTm Tm(atoi(TmStr));
01405     const int NId1 = StrToId.AddKey(Node1);
01406     const int NId2 = StrToId.AddKey(Node2);
01407     if (! Net->IsNode(NId1)) { Net->AddNode(NId1, TSecTm()); }
01408     if (! Net->IsNode(NId2)) { Net->AddNode(NId2, TSecTm()); }
01409     MinTm=TMath::Mn(MinTm, Tm);
01410     MaxTm=TMath::Mx(MaxTm, Tm);
01411     Net->AddEdge(NId1, NId2, -1, Tm);
01412     if (++LineCnt % 1000 == 0) {
01413       printf("\r  %dk lines processed: %d %d [%s]", LineCnt/1000, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr()); }
01414   }
01415   printf("\r  %d lines processed: %d %d [%s]\n", LineCnt, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr());
01416   printf("  Data range %s -- %s\n", MinTm.GetStr().CStr(), MaxTm.GetStr().CStr());
01417   //TSnap::PrintInfo(Net, "", "", false);
01418   Net->UpdateNodeTimes();
01419   return Net;
01420 }