SNAP Library 2.0, Developer Reference  2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
timenet.cpp
Go to the documentation of this file.
00001 
00002 // Time Network
00003 TTimeNet& TTimeNet::operator = (const TTimeNet& TimeNet) {
00004   if (this != &TimeNet) {
00005     TNet::operator=(TimeNet);
00006   }
00007   return *this;
00008 }
00009 
00010 PTimeNet TTimeNet::GetSubGraph(const TIntV& NIdV) const {
00011   PTimeNet NewNetPt = TTimeNet::New();
00012   TTimeNet& NewNet = *NewNetPt;
00013   NewNet.Reserve(NIdV.Len(), -1);
00014   int node, edge;
00015   TNodeI NI;
00016   for (node = 0; node < NIdV.Len(); node++) {
00017     NewNet.AddNode(NIdV[node], GetNDat(NIdV[node])); // also copy the node data
00018   }
00019   for (node = 0; node < NIdV.Len(); node++) {
00020     NI = GetNI(NIdV[node]);
00021     const int SrcNId = NI.GetId();
00022     for (edge = 0; edge < NI.GetOutDeg(); edge++) {
00023       const int OutNId = NI.GetOutNId(edge);
00024       if (NewNet.IsNode(OutNId)) {
00025         NewNet.AddEdge(SrcNId, OutNId); }
00026     }
00027   }
00028   NewNet.Defrag();
00029   return NewNetPt;
00030 }
00031 
00032 PTimeNENet TTimeNet::GetTimeNENet() const {
00033   TIntV NIdV;  GetNIdByTm(NIdV);
00034   PTimeNENet OutNet = TTimeNENet::New(GetNodes(), GetEdges());
00035   for (int i = 0; i < NIdV.Len(); i++) {
00036     const int Src = NIdV[i];
00037     const TTimeNet::TNodeI NI = GetNI(Src);
00038     const TSecTm SrcTm = NI.GetDat();
00039     if (! OutNet->IsNode(Src)) { OutNet->AddNode(Src, SrcTm); }
00040     for (int e = 0; e < NI.GetOutDeg(); e++) {
00041       if (! OutNet->IsNode(NI.GetOutNId(e))) { OutNet->AddNode(NI.GetOutNId(e), SrcTm); }
00042       OutNet->AddEdge(Src, NI.GetOutNId(e), -1, SrcTm);
00043     }
00044   }
00045   return OutNet;
00046 }
00047 
00048 void TTimeNet::GetNIdByTm(TIntV& NIdV) const {
00049   TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0);
00050   for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
00051     TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); }
00052   TmToNIdV.Sort();
00053   NIdV.Gen(GetNodes(), 0);
00054   for (int i = 0; i < TmToNIdV.Len(); i++) {
00055     NIdV.Add(TmToNIdV[i].Dat); }
00056 }
00057 
00058 // put nodes into buckets by TmUnit
00059 void TTimeNet::GetTmBuckets(const TTmUnit& TmUnit, TTmBucketV& TmBucketV) const {
00060   THash<TInt, TIntV> TmIdToNIdVH;
00061   for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
00062     const int TmId = NodeI().Round(TmUnit);
00063     if (! TmIdToNIdVH.IsKey(TmId)) TmIdToNIdVH.AddKey(TmId);
00064     TmIdToNIdVH.GetDat(TmId).Add(NodeI.GetId());
00065   }
00066   TVec<TPair<TInt, TIntV> > TmIdNIdVV;
00067   TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV);
00068   TmIdNIdVV.Sort();
00069   TmBucketV.Gen(TmIdNIdVV.Len());
00070   for (int i = 0; i < TmIdNIdVV.Len(); i++) {
00071     TTmBucket& Bucket = TmBucketV[i];
00072     Bucket.BegTm = TmIdNIdVV[i].Val1;
00073     Bucket.NIdV = TmIdNIdVV[i].Val2;
00074   }
00075 }
00076 
00077 void TTimeNet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
00078   TIntV NIdV;
00079   GetNIdByTm(NIdV);
00080   TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0);
00081   for (int i = 0; i < NIdV.Len(); i++) {
00082     const int b = i/NodesPerBucket;
00083     if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
00084     TmBucketV[b].NIdV.Add(NIdV[i]);
00085   }
00086 }
00087 
00088 PGStatVec TTimeNet::TimeGrowth(const TTmUnit& TmUnit, const TFSet& TakeStat, const TSecTm& StartTm) const {
00089   PGStatVec GrowthStat = new TGStatVec(TmUnit, TakeStat);
00090   TTmBucketV TmBucketV;
00091   GetTmBuckets(TmUnit, TmBucketV);
00092   TIntV NodeIdV;
00093   TExeTm ExeTm;
00094   for (int t = 0; t < TmBucketV.Len(); t++) {
00095     NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
00096     printf("\n=== %d/%d] %s (%d nodes)\n", t+1, TmBucketV.Len(),
00097       TmBucketV[t].BegTm.GetStr().CStr(), NodeIdV.Len());  ExeTm.Tick();
00098     if (TmBucketV[t].BegTm < StartTm) continue;
00099     //PNGraph PreGraph = GetSubGraph(NodeIdV, true); // renumber nodes
00100     PNGraph PreGraph = TSnap::ConvertSubGraph<PNGraph>(PTimeNet((TTimeNet*)this), NodeIdV); // don't renumber nodes
00101     GrowthStat->Add(PreGraph, TmBucketV[t].BegTm);
00102   }
00103   return GrowthStat;
00104 }
00105 
00106 void TTimeNet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
00107                            const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc, const bool& AlsoRewire) const {
00108   const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr();
00109   TTmBucketV TmBucketV;
00110   GetTmBuckets(TmUnit, TmBucketV);
00111   TIntV NodeIdV;
00112   TExeTm ExeTm, Run1Tm;
00113   TFltTrV TmDiamV, NdsDiamV;
00114   TFltTrV RwTmDiamV, RwNdsDiamV;
00115   for (int t = 0; t < TmBucketV.Len(); t++) {
00116     NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
00117     printf("\n*** %d/%d] at %s (%d nodes)\n", t+1, TmBucketV.Len(),
00118       TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), NodeIdV.Len());  ExeTm.Tick();
00119     if (TmBucketV[t].BegTm < StartTm) continue;
00120     //PUNGraph PreGraph = GetSubUNGraph(NodeIdV, true);
00121     PUNGraph PreGraph = TSnap::ConvertSubGraph<PUNGraph>(PTimeNet((TTimeNet*)this), NodeIdV);
00122     { TMom Mom;
00123     for (int r = 0; r < NDiamRuns; r++) {
00124       printf("%d...", r+1);  Run1Tm.Tick();
00125       const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph) : PreGraph);
00126       Mom.Add(EffDiam);  printf("[%s]\r", Run1Tm.GetTmStr());
00127     }
00128     Mom.Def();
00129     TmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev()));
00130     NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
00131     NdsDiamV.Sort();
00132     printf("  [%s]          \n", ExeTm.GetTmStr()); }
00133     if (AlsoRewire) {
00134       //PUNGraph RwGraph = TGGen::GenRndDegSeqS(PreGraph, 50, TInt::Rnd); // edge switching model
00135       TIntV DegSeqV(PreGraph->GetNodes(), 0);
00136       for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) { DegSeqV.Add(NI.GetDeg()); }
00137       PUNGraph RwGraph = TSnap::GenConfModel(DegSeqV, TInt::Rnd);
00138       printf("Configuration model: (%d, %d) --> (%d, %d)\n", PreGraph->GetNodes(), PreGraph->GetEdges(), RwGraph->GetNodes(), RwGraph->GetEdges());
00139       TMom Mom;
00140       for (int r = 0; r < NDiamRuns; r++) {
00141         printf("  diam run %d...", r+1);  Run1Tm.Tick();
00142         const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph):PreGraph);
00143         Mom.Add(EffDiam);  printf(" current run [%s]\n", Run1Tm.GetTmStr());
00144       }
00145       Mom.Def();
00146       RwTmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev()));
00147       RwNdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
00148       RwNdsDiamV.Sort();
00149       printf("done with diameter. Total time [%s] \n", ExeTm.GetTmStr());
00150     }
00151     // plot
00152     { TGnuPlot GnuPlot("diamEff-T."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges()));
00153     GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), WccStr+"Effective Diameter");
00154     GnuPlot.AddErrBar(TmDiamV, "True", "");
00155     if (! RwTmDiamV.Empty()) { GnuPlot.AddErrBar(RwTmDiamV, "Rewired", "");}
00156     GnuPlot.SavePng(); }
00157     { TGnuPlot GnuPlot("diamEff-N."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges()));
00158     GnuPlot.SetXYLabel("NODES", WccStr+"Effective Diameter");
00159     GnuPlot.AddErrBar(NdsDiamV, "True", "");
00160     if (! RwNdsDiamV.Empty()) { GnuPlot.AddErrBar(RwNdsDiamV, "Rewired", "");}
00161     GnuPlot.SavePng(); }
00162   }
00163 }
00164 
00165 // pretend we only have link data starting in PostTmDiam
00166 // compare the diameter of the nodes after PostTmDiam with diameter
00167 // of the nodes after PostTmDiam but *also* using nodes and edges
00168 // from before PostTmDiam
00169 void TTimeNet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
00170                                const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam) const {
00171   printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n  %s group by %s.\n",
00172     FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr());
00173   printf("  Delete out-edges of pre time %s nodes.\n  Take subgraph of post year %s subgraph.\n\n",
00174     DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr());
00175   const int NDiamRuns = 10;
00176   const int NSamples = 100;
00177   //PUNGraph FullGraph = GetUNGraph();
00178   PUNGraph FullGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
00179   // delete past
00180   if (DelPreTmEdges.IsDef()) {
00181     int NDelNodes = 0, NDelEdges = 0;
00182     printf("Deleting pre-%s node out-links\n", DelPreTmEdges.GetStr().CStr());
00183     for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
00184       if (NodeI() < DelPreTmEdges) {
00185         const int NodeId = NodeI.GetId();
00186         for (int edge = 0; edge < NodeI.GetOutDeg(); edge++) {
00187           FullGraph->DelEdge(NodeId, NodeI.GetOutNId(edge)); }
00188         NDelEdges += NodeI.GetOutDeg();  NDelNodes++;
00189       }
00190     }
00191     printf("  Deleted %d nodes out-edges (%d edges total).\n", NDelNodes, NDelEdges);
00192     FullGraph->Defrag(true);
00193   }
00194   PGStatVec GrowthStat = TGStatVec::New(TmUnit);
00195   TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev;
00196   TIntV NodeIdV;
00197   TExeTm ExeTm;
00198   TTmBucketV TmBucketV;
00199   GetTmBuckets(TmUnit, TmBucketV);
00200   for (int t = 0; t < TmBucketV.Len(); t++) {
00201     printf("\nGraph: %s (%d / %d) [%s]\n", TmBucketV[t].BegTm.GetTmStr().CStr(),
00202       t+1, TmBucketV.Len(), TExeTm::GetCurTm());
00203     // up-to-year subgraph
00204     NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
00205     if (TmBucketV[t].BegTm < PostTmDiam) { continue; }
00206     const PUNGraph PreGraph = TSnap::GetSubGraph(FullGraph, NodeIdV, true);
00207     const PUNGraph WccGraph = TSnap::GetMxWcc(PreGraph);
00208     TIntV PostYearNIdV, WccPostYearNIdV;
00209     for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) {
00210       if (GetNDat(NI.GetId()) >= PostTmDiam) {
00211         PostYearNIdV.Add(NI.GetId());
00212         if (WccGraph->IsNode(NI.GetId())) { WccPostYearNIdV.Add(NI.GetId()); }
00213       }
00214     }
00215     TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom;
00216     // diameter of PostYearDiam subgraph using whole graph (all edges)
00217     int FullDiam; double EffDiam;
00218     for (int r = 0; r < NDiamRuns; r++) {
00219       if (! PostYearNIdV.Empty()) {
00220         TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false, EffDiam, FullDiam);
00221         PreDiamMom.Add(FullDiam);  PreEffDiamMom.Add(EffDiam);
00222       }
00223       if (! WccPostYearNIdV.Empty()) {
00224         TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false, EffDiam, FullDiam);
00225         WccDiamMom.Add(FullDiam);  WccEffDiamMom.Add(EffDiam);
00226       }
00227       printf("  diam: %d  [%s]  \r", r+1, ExeTm.GetTmStr());  ExeTm.Tick();
00228     }
00229     PreDiamMom.Def();  PreEffDiamMom.Def();
00230     WccDiamMom.Def();  WccEffDiamMom.Def();
00231     // save stat
00232     PGStat GraphStatPt = GrowthStat->Add(TmBucketV[t].BegTm);
00233     TGStat& GS = *GraphStatPt;
00234     GS.TakeBasicStat(PreGraph, false);
00235     GS.TakeBasicStat(WccGraph, true);
00236     GS.SetVal(gsvFullDiam, PreDiamMom.GetMean()); // mean
00237     GS.SetVal(gsvEffDiam, PreEffDiamMom.GetMean());
00238     GS.SetVal(gsvFullWccDiam, WccDiamMom.GetMean());
00239     GS.SetVal(gsvEffWccDiam, WccEffDiamMom.GetMean());
00240     GS.SetVal(gsvFullDiamDev, PreDiamMom.GetSDev()); // variance
00241     GS.SetVal(gsvEffDiamDev, PreEffDiamMom.GetSDev());
00242     GS.SetVal(gsvFullWccDiamDev, WccDiamMom.GetSDev());
00243     GS.SetVal(gsvEffWccDiamDev, WccEffDiamMom.GetSDev());
00244     { TFOut FOut("growth."+FNmPref+".gStatVec");  GrowthStat->Save(FOut); }
00245     GrowthStat->SaveTxt(FNmPref, TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%s\nPostYearDiam\t%s\n",
00246       Desc.CStr(), DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr()));
00247   }
00248   // diameter plots
00249   //GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.",
00250   //  DelPreEdges, PostYearDiam));*/
00251 }
00252 
00253 void TTimeNet::PlotCCfOverTm(const TStr& FNmPref, TStr Desc, const TTmUnit& TmUnit, const int& NodesBucket) const {
00254   if (Desc.Empty()) { Desc = FNmPref; }
00255   TTimeNet::TTmBucketV TmBucketV;
00256   TStr XLbl;
00257   if (TmUnit == tmuNodes) {
00258     XLbl = "Number of nodes (time)";
00259     IAssert(NodesBucket > 0);
00260     GetNodeBuckets(NodesBucket, TmBucketV); }
00261   else {
00262     XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr());
00263     GetTmBuckets(TmUnit, TmBucketV);
00264   }
00265   TIntV NodeIdV;
00266   TFltPrV DegToCCfV, CcfV, OpClV, OpV;
00267   TVec<TTuple<TFlt, 4> > OpenClsV;
00268   TTuple<TFlt, 4> Tuple;
00269   TExeTm ExeTm;
00270   int XVal = 0;
00271   printf("Clustering coefficient over time:\n  %d edges, %d edges per bucket, %d buckets \n", GetEdges(), 100000, TmBucketV.Len());
00272   PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
00273   for (int t = 0; t < TmBucketV.Len(); t++) {
00274     printf("\r  %d/%d: ", t+1, TmBucketV.Len());
00275     NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
00276     int64 Open=0, Close=0;
00277     const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV);
00278     const double CCf = TSnap::GetClustCf(Graph, DegToCCfV, Open, Close);
00279     if (TmUnit == tmuNodes) { XVal = Graph->GetNodes(); }
00280     else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); }
00281     CcfV.Add(TFltPr(XVal, CCf));
00282     OpClV.Add(TFltPr(XVal, Open+Close==0?0:Close/(Open+Close)));
00283     OpV.Add(TFltPr(XVal, Open==0?0:Close/Open));
00284     Tuple[0]=Graph->GetNodes();
00285     Tuple[1]=Graph->GetEdges();
00286     Tuple[2]=Close;  Tuple[3]=Open;
00287     OpenClsV.Add(Tuple);
00288     printf(" %s", ExeTm.GetStr());
00289     TGnuPlot::PlotValV(DegToCCfV, TStr::Fmt("ccfAt%02dtm.%s", t+1, FNmPref.CStr()),
00290       TStr::Fmt("%s. At time %d. Clustering Coefficient. G(%d,%d)", Desc.CStr(), t+1, Graph->GetNodes(), Graph->GetEdges()),
00291       "Degree", "Clustering coefficient", gpsLog10XY, false);
00292   }
00293   TGnuPlot::PlotValV(CcfV, "ccfOverTm."+FNmPref, Desc+". Average Clustering Coefficient", XLbl, "Average clustering coefficient", gpsAuto, false);
00294   TGnuPlot::PlotValV(OpClV, "ClsOpnTr1."+FNmPref, Desc+". Close/(Open+Closed) triads", XLbl, "Close / (Open+Closed) triads", gpsAuto, false);
00295   TGnuPlot::PlotValV(OpV, "ClsOpnTr2."+FNmPref, Desc+". Close/Open triads", XLbl, "Close / Open triads", gpsAuto, false);
00296   TGnuPlot::SaveTs(OpenClsV, "ClsOpnTr."+FNmPref+".tab", TStr::Fmt("#%s\n#Nodes\tEdges\tClosed\tOpenTriads", Desc.CStr()));
00297   printf("\n");
00298 }
00299 
00300 void TTimeNet::PlotMedianDegOverTm(const TStr& FNmPref, const TTmUnit& TmUnit, const int& NodesPerBucket) const {
00301   TTimeNet::TTmBucketV TmBucketV;
00302   TStr XLbl;
00303   if (TmUnit == tmuNodes) {
00304     XLbl = "Number of nodes (time)";  IAssert(NodesPerBucket > 0);
00305     GetNodeBuckets(NodesPerBucket, TmBucketV); }
00306   else {
00307     XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr());
00308     GetTmBuckets(TmUnit, TmBucketV); }
00309   printf("\n\n%s\nMedian degree over time:\n  %d edges, %d edges per bucket, %d buckets \n", FNmPref.CStr(), GetEdges(), NodesPerBucket, TmBucketV.Len());
00310   TFltPrV MedDegV, MedInDegV, MedOutDegV;
00311   TIntV NodeIdV;
00312   int XVal;
00313   PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
00314   PNGraph NGraph = TSnap::ConvertGraph<PNGraph>(PTimeNet((TTimeNet*)this));
00315   FILE  *F = fopen(("gStat-"+FNmPref+".tab").CStr(), "wt");
00316   fprintf(F, "UndirNodes\tUndirEdges\tUndirNonZNodes\tMedianDeg\tMeanDeg\tDirNodes\tDirEdges\tDirNonzNodes\tMedInDeg\tMedOutDeg\tMeanInDeg\tMeanOutDeg\n");
00317   for (int t = 0; t < TmBucketV.Len(); t++) {
00318     printf("\r  %d/%d: ", t+1, TmBucketV.Len());
00319     NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
00320     if (TmUnit == tmuNodes) { XVal = NodeIdV.Len(); }
00321     else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); }
00322     // un graph
00323     { const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV);  TMom Mom;
00324     for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { if (NI.GetOutDeg()>0) { Mom.Add(NI.GetOutDeg());} }
00325     Mom.Def();  MedDegV.Add(TFltPr(XVal, Mom.GetMedian()));
00326     fprintf(F, "%d\t%d\t%d\t%f\t%f", Graph->GetNodes(), Graph->GetEdges(), TSnap::CntNonZNodes(Graph), (float)Mom.GetMedian(), (float)Mom.GetMean()); }
00327     // directed graph
00328     { const PNGraph Graph = TSnap::GetSubGraph<PNGraph>(NGraph, NodeIdV); TMom MomOut, MomIn;
00329     for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00330       if (NI.GetOutDeg()>0) { MomOut.Add(NI.GetOutDeg()); }
00331       if (NI.GetInDeg()>0) { MomIn.Add(NI.GetInDeg()); } }
00332     MomOut.Def();  MedOutDegV.Add(TFltPr(XVal, MomOut.GetMedian()));
00333     MomIn.Def();  MedInDegV.Add(TFltPr(XVal, MomIn.GetMedian()));
00334     fprintf(F, "\t%d\t%d\t%d\t%f\t%f\t%f\t%f\n", Graph->GetNodes(), Graph->GetEdges(), (int)TSnap::CntNonZNodes(Graph), (float)MomIn.GetMedian(), (float)MomOut.GetMedian(), (float)MomIn.GetMean(), (float)MomOut.GetMean()); }
00335   }
00336   fclose(F);
00337   TGnuPlot::PlotValV(MedDegV, "medDeg."+FNmPref, FNmPref+" Median degree", TTmInfo::GetTmUnitStr(TmUnit), "Median degree");
00338   TGnuPlot::PlotValV(MedOutDegV, "medOutDeg."+FNmPref, FNmPref+" Median OUT degree", TTmInfo::GetTmUnitStr(TmUnit), "Median OUT degree");
00339   TGnuPlot::PlotValV(MedInDegV, "medInDeg."+FNmPref, FNmPref+" Median IN degree", TTmInfo::GetTmUnitStr(TmUnit), "Median IN degree");
00340 }
00341 
00342 // load Arxiv affiliation network (papers to authors bipartite graph)
00343 // <set1_id>  <time>  <set2_id1>  <set2_id2> ... (all ids have to be unique)
00344 PTimeNet TTimeNet::LoadBipartite(const TStr& InFNm) {
00345   PTimeNet TimeNetPt = TTimeNet::New();
00346   TTimeNet& TimeNet = *TimeNetPt;
00347   PSs Ss = TSs::LoadTxt(ssfTabSep, InFNm.CStr());
00348   TIntH Set1IdH; // paper ids
00349   TStrV StrTimeV;
00350   for (int y = 0; y < Ss->GetYLen(); y++) {
00351     if (Ss->At(0, y)[0] == '#') continue; // skip comments
00352     if (Ss->GetXLen(y) < 3) continue;     // there must be at least one author
00353     const int& SrcId = Ss->At(0, y).GetInt();
00354     IAssert(! Set1IdH.IsKey(SrcId));
00355     IAssert(! TimeNet.IsNode(SrcId));
00356     Set1IdH.AddKey(SrcId);
00357     Ss->At(1, y).SplitOnAllCh('-', StrTimeV);
00358     const int Year = StrTimeV[0].GetInt();
00359     const int Month = StrTimeV[1].GetInt();
00360     const int Day = StrTimeV[2].GetInt();
00361     const TSecTm NodeTm(Year, Month, Day);
00362     TimeNet.AddNode(SrcId, NodeTm);
00363     for (int dst = 2; dst < Ss->GetXLen(y); dst++) {
00364       const int DstId = Ss->At(dst, y).GetInt();
00365       IAssert(! Set1IdH.IsKey(DstId));
00366       if (! TimeNet.IsNode(DstId)) { TimeNet.AddNode(DstId, NodeTm); }
00367       else { TimeNet.GetNDat(DstId) = TMath::Mn(NodeTm, TimeNet.GetNDat(DstId)); }
00368       if (! TimeNet.IsEdge(SrcId, DstId)) { TimeNet.AddEdge(SrcId, DstId); }
00369     }
00370   }
00371   TimeNet.Defrag();
00372   printf("Bipartate graph: nodes: %d  edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00373   printf("  Bipartate sets: %d nodes --> %d nodes\n", TSnap::CntInDegNodes(TimeNetPt, 0),
00374     TSnap::CntOutDegNodes(TimeNetPt, 0));
00375   return TimeNetPt;
00376 }
00377 
00378 // load Arxiv paper citation network
00379 //   PaperFNm: <paper> <time>
00380 //   CiteFNm:  <source> <destination>
00381 PTimeNet TTimeNet::LoadArxiv(const TStr& PaperFNm, const TStr& CiteFNm) {
00382   TExeTm ExeTm;
00383   PTimeNet TimeNetPt = TTimeNet::New();
00384   TTimeNet& TimeNet = *TimeNetPt;
00385   printf("Arxiv citation graph (paper publication year)...\n");
00386   // load time data (file hep-ph-slacdates)
00387   char Line [1024];
00388   FILE *PprF = fopen(PaperFNm.CStr(), "rt");
00389   TStr StrId, StrTime;
00390   TStrV StrV, StrTimeV;
00391   int N = 0, DuplicateNode = 0;
00392   while (! feof(PprF)) {
00393     Line[0] = 0;
00394     fgets(Line, 1024, PprF);
00395     if (strlen(Line) == 0 || Line[0] == '#') continue;
00396     Line[strlen(Line)-1] = 0; // delete trailing '\n'
00397     TStr(Line).SplitOnWs(StrV);  IAssert(StrV.Len() == 2);
00398     StrId = StrV[0];  StrTime = StrV[1];  IAssert(!StrId.Empty() && !StrTime.Empty());
00399     StrTime.SplitOnAllCh('-', StrTimeV);  IAssert(StrTimeV.Len() == 3);
00400     const int NodeId = StrId.GetInt();
00401     if (! TimeNet.IsNode(NodeId)) {
00402       const int Year = StrTimeV[0].GetInt();
00403       const int Month = StrTimeV[1].GetInt();
00404       const int Day = StrTimeV[2].GetInt();
00405       TimeNet.AddNode(NodeId, TSecTm(Year, Month, Day));
00406     } else { DuplicateNode++; }
00407     if (++N % 10000 == 0) printf("\r  %dk", N/1000);
00408   }
00409   printf("\r  %d nodes read. %d duplicate nodes. %s\n", N, DuplicateNode, ExeTm.GetTmStr());
00410   fclose(PprF);
00411   // load citations (file hep-ph-citations)
00412   int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0;
00413   FILE *CiteF = fopen(CiteFNm.CStr(), "rt");
00414   N = 0;  ExeTm.Tick();
00415   printf("Loading Arxiv citations...\n");
00416   TIntPrV EdgeV;
00417   THash<TInt, TSecTm> NIdToTimeH;
00418   while (! feof(CiteF)) {
00419     Line[0] = 0;
00420     fgets(Line, 1024, CiteF);
00421     if (strlen(Line) == 0 || Line[0] == '#') continue;
00422     Line[strlen(Line)-1] = 0; // delete trailing '\n'
00423     TStr(Line).SplitOnWs(StrV);  IAssert(StrV.Len() == 2);
00424     const int SrcNId = StrV[0].GetInt();
00425     const int DstNId = StrV[1].GetInt();
00426     EdgeV.Add(TIntPr(SrcNId, DstNId));
00427     // infer time of destination node -- earliest paper that cites the node (paper)
00428     if (! TimeNet.IsNode(DstNId) && TimeNet.IsNode(SrcNId)) {
00429       const TSecTm& SrcTm = TimeNet.GetNDat(SrcNId);
00430       if (! NIdToTimeH.IsKey(DstNId)) {
00431         NIdToTimeH.AddDat(DstNId, SrcTm);
00432         NewDstIds++;
00433       }
00434       else if (NIdToTimeH.GetDat(DstNId) < SrcTm) {
00435         NIdToTimeH.GetDat(DstNId) = SrcTm; }
00436     }
00437     if (++N % 10000 == 0) printf("\r  %dk", N/1000);
00438   }
00439   fclose(CiteF);
00440   // add infeered time nodes (nodes which are cited by papers with known time)
00441   for (int i = 0; i < NIdToTimeH.Len(); i++) {
00442     TimeNet.AddNode(NIdToTimeH.GetKey(i), NIdToTimeH[i]);
00443   }
00444   // add links
00445   for (int i = 0; i < EdgeV.Len(); i++) {
00446     const int SrcNId = EdgeV[i].Val1;
00447     const int DstNId = EdgeV[i].Val2;
00448     if (TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) {
00449       if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); }
00450       else { DupLinks++; }
00451     } else {
00452       if (! TimeNet.IsNode(SrcNId)) {
00453         NewSrcIds++;
00454         if (! TimeNet.IsNode(DstNId)) { NewCits++; }
00455       }
00456     }
00457   }
00458   printf("\r  %d citations read. %s\n", N, ExeTm.GetTmStr());
00459   printf("Graph: nodes: %d    edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00460   printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0));
00461   TIntV RmNIdV;
00462   for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) {
00463     if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); }
00464   }
00465   for (int i = 0; i < RmNIdV.Len(); i++) {
00466     TimeNet.DelNode(RmNIdV[i]);
00467   }
00468   TimeNet.Defrag(true);
00469   printf("\nFinal graph: nodes: %d    edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00470   printf("  Duplicate citations                    : %d\n", DupLinks);
00471   printf("  Nodes without time which are cited     : %d (add them to graph, use time of the earliest source node)\n", NewDstIds);
00472   printf("  Citations between unknown time nodes   : %d\n", NewCits);
00473   printf("  Nodes without time which make citations: %d (do not add them into the graph)\n", NewSrcIds);
00474   return TimeNetPt;
00475 }
00476 
00477 // Patent citation network. Time units are seconds even though they mean years
00478 PTimeNet TTimeNet::LoadPatents(const TStr& PatentFNm, const TStr& CiteFNm) {
00479   int N = 0;
00480   TExeTm ExeTm;
00481   PTimeNet TimeNetPt = TTimeNet::New();
00482   TTimeNet& TimeNet = *TimeNetPt;
00483   TimeNet.Reserve(4000000, 160000000);
00484   printf("parsing patent data (patent grant year)...\n");
00485   // load time data (file pat63_99.txt)
00486   const int& PatIdCol = 0;
00487   const int& GYearCol = 1;
00488   TStrV ColV;
00489   char Line [1024];
00490   FILE *PatF = fopen(PatentFNm.CStr(), "rt");
00491   fgets(Line, 1024, PatF); // skip 1st line
00492   while (! feof(PatF)) {
00493     Line[0] = 0;
00494     fgets(Line, 1024, PatF);
00495     if (strlen(Line) == 0) break;
00496     TStr(Line).SplitOnAllCh(',', ColV, false);
00497     IAssert(ColV.Len() == 23);
00498     const int PatentId = ColV[PatIdCol].GetInt();
00499     const int GrantYear = ColV[GYearCol].GetInt();
00500     IAssert(! TimeNet.IsNode(PatentId));
00501     TimeNet.AddNode(PatentId, TSecTm(GrantYear)); // pretend year is a second
00502     if (++N % 100000 == 0) printf("\r  %dk", N/1000);
00503   }
00504   printf("\r  %d patents read. %s\n", N, ExeTm.GetTmStr());
00505   fclose(PatF);
00506   // load citations (file cite75_99.txt)
00507   printf("\nLoading patent citations...\n");
00508   int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0;
00509   N = 0;  ExeTm.Tick();
00510   TStr SrcId, DstId;
00511   FILE *CiteF = fopen(CiteFNm.CStr(), "rt");
00512   fgets(Line, 1024, CiteF); // skip 1st line
00513   while (! feof(CiteF)) {
00514     Line[0] = 0;
00515     fgets(Line, 1024, CiteF);
00516     if (strlen(Line) == 0) break;
00517     Line[strlen(Line)-1] = 0; // delete trailing '\n'
00518     TStr(Line).SplitOnCh(SrcId, ',', DstId);
00519     const int SrcNId = SrcId.GetInt();
00520     const int DstNId = DstId.GetInt();
00521     if (! TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) {
00522       //TimeNet.AddNode(SrcNId, TSecTm(1, 1, 1));  NewSrcIds++;
00523       //TimeNet.AddNode(DstNId, TSecTm(1, 1, 1));  NewDstIds++;
00524       NewCits++;
00525       continue;
00526     }
00527     else if (TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) {
00528       TimeNet.AddNode(DstNId, TimeNet.GetNDat(SrcNId));  NewDstIds++;
00529     }
00530     else if (! TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) {
00531       TimeNet.AddNode(SrcNId, TimeNet.GetNDat(DstNId));  NewSrcIds++;
00532     }
00533     if (! TimeNet.IsEdge(SrcNId, DstNId)) {
00534       TimeNet.AddEdge(SrcNId, DstNId);
00535     } else { DupLinks++; }
00536     if (++N % 100000 == 0) printf("\r  %dk", N/1000);
00537   }
00538   fclose(CiteF);
00539   printf("\r  %d citations read. %s\n\n", N, ExeTm.GetTmStr());
00540   printf("Graph: nodes: %d    edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00541   printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0));
00542   TIntV RmNIdV;
00543   for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) {
00544     if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); }
00545   }
00546   for (int i = 0; i < RmNIdV.Len(); i++) {
00547     TimeNet.DelNode(RmNIdV[i]);
00548   }
00549   TimeNet.Defrag(true);
00550   printf("\nFinal graph: nodes: %d    edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00551   printf("  Duplicate citations                    : %d\n", DupLinks);
00552   printf("  Citations between unknown time nodes   : %d\n", NewCits);
00553   printf("  Nodes without time which make citations: %d\n", NewSrcIds);
00554   printf("  Nodes without time which are cited     : %d\n", NewDstIds);
00555   return TimeNetPt;
00556 }
00557 
00558 // Amazon ShareTheLove: nodes are people, we add an edge when there is a first
00559 // recommendation between 2 people (we count each edge only once (regardless
00560 // of how many recommendations there were between 2 persons)
00561 PTimeNet TTimeNet::LoadAmazon(const TStr& StlFNm) {
00562   PTimeNet TimeNetPt = TTimeNet::New();
00563   TTimeNet& TimeNet = *TimeNetPt;
00564   TimeNet.Reserve(3953993, -1);
00565   printf("Amazon Share-the-Love...\n");
00566   char line [2024], MonthStr[4];
00567   int NLines=0;
00568   TStrV ColV;
00569   FILE *F = fopen(StlFNm.CStr(), "rt");
00570   while (! feof(F)) {
00571     memset(line, 0, 2024);
00572     fgets(line, 2024, F);
00573     if (strlen(line) == 0) break;
00574     TStr(line).SplitOnAllCh(',', ColV);
00575     const int SrcNId = ColV[0].GetInt();
00576     const int DstNId = ColV[1].GetInt();
00577     // time data
00578     TStr TmStr = ColV[2]; // time-format: 29JAN02:21:55:23
00579     int Year = TmStr.GetSubStr(5, 6).GetInt();
00580     if (Year < 10) { Year += 2000; } else { Year += 1900; }
00581     MonthStr[0]=toupper(TmStr[2]);  MonthStr[1]=tolower(TmStr[3]);
00582     MonthStr[2]=tolower(TmStr[4]);  MonthStr[3]=0;
00583     const int Month = TTmInfo::GetMonthN(MonthStr, lUs);
00584     const int Day = TmStr.GetSubStr(0, 1).GetInt();
00585     const int Hour = TmStr.GetSubStr(8, 9).GetInt();
00586     const int Min = TmStr.GetSubStr(11, 12).GetInt();
00587     const int Sec = TmStr.GetSubStr(14, 15).GetInt();
00588     // add nodes and links
00589     if (! TimeNet.IsNode(SrcNId)) { TimeNet.AddNode(SrcNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); }
00590     if (! TimeNet.IsNode(DstNId)) { TimeNet.AddNode(DstNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); }
00591     if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); }
00592     if (++NLines % 100000 == 0) printf("\r  %dk", NLines/1000);
00593   }
00594   fclose(F);
00595   printf("\r  %d lines read\n", NLines);
00596   printf("Graph: nodes: %d  edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
00597   TimeNet.Defrag(true);
00598   return TimeNetPt;
00599 }
00600 
00602 // Time Node-Edge Network
00603 TTimeNENet& TTimeNENet::operator = (const TTimeNENet& TimeNet) {
00604   if (this != &TimeNet) {
00605     TNet::operator=(TimeNet);
00606   }
00607   return *this;
00608 }
00609 
00610 PTimeNet TTimeNENet::GetTimeNet() const {
00611   PTimeNet NewNet = TTimeNet::New();
00612   NewNet->Reserve(GetNodes(), -1);
00613   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00614     NewNet->AddNode(NI.GetId(), NI.GetDat());
00615   }
00616   for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
00617     const int src = EI.GetSrcNId();
00618     const int dst = EI.GetDstNId();
00619     if (! NewNet->IsEdge(src, dst)) {
00620       NewNet->AddEdge(src, dst); }
00621   }
00622   NewNet->Defrag();
00623   return NewNet;
00624 }
00625 
00626 // only take earliest edge between the nodes
00627 PTimeNENet TTimeNENet::Get1stEdgeNet() const {
00628   PTimeNENet Net = TTimeNENet::New();
00629   Net->Reserve(GetNodes(), -1);
00630   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00631     Net->AddNode(NI.GetId(), NI.GetDat()); }
00632   TIntV EIdV;  GetEIdByTm(EIdV);
00633   TIntPrSet EdgeSet(GetEdges());
00634   for (int edge = 0; edge < EIdV.Len(); edge++) {
00635     const TEdgeI EI = GetEI(EIdV[edge]);
00636     const int Src = EI.GetSrcNId();
00637     const int Dst = EI.GetDstNId();
00638     if (Src==Dst || EdgeSet.IsKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst)))) { continue; } // take only 1st edge
00639     EdgeSet.AddKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst)));
00640     Net->AddEdge(EI);
00641   }
00642   return Net;
00643 }
00644 
00645 PTimeNENet TTimeNENet::GetSubGraph(const TIntV& NIdV) const {
00646   PTimeNENet NewNetPt = TTimeNENet::New();
00647   TTimeNENet& NewNet = *NewNetPt;
00648   NewNet.Reserve(NIdV.Len(), -1);
00649   int node, edge;
00650   TNodeI NI;
00651   for (node = 0; node < NIdV.Len(); node++) {
00652     NewNet.AddNode(NIdV[node], GetNDat(NIdV[node]));
00653   }
00654   for (node = 0; node < NIdV.Len(); node++) {
00655     NI = GetNI(NIdV[node]);
00656     for (edge = 0; edge < NI.GetOutDeg(); edge++) {
00657       const TEdgeI EI = GetEI(NI.GetOutEId(edge));
00658       if (NewNet.IsNode(EI.GetDstNId())) {
00659         NewNet.AddEdge(EI); }
00660     }
00661   }
00662   NewNet.Defrag();
00663   return NewNetPt;
00664 }
00665 
00666 PTimeNENet TTimeNENet::GetESubGraph(const TIntV& EIdV) const {
00667   PTimeNENet NewNetPt = TTimeNENet::New();
00668   TTimeNENet& NewNet = *NewNetPt;
00669   NewNet.Reserve(-1, EIdV.Len());
00670   for (int edge = 0; edge < EIdV.Len(); edge++) {
00671     const TEdgeI Edge = GetEI(EIdV[edge]);
00672     if (! NewNet.IsNode(Edge.GetSrcNId()))
00673       NewNet.AddNode(GetNI(Edge.GetSrcNId()));
00674     if (! NewNet.IsNode(Edge.GetDstNId()))
00675       NewNet.AddNode(GetNI(Edge.GetDstNId()));
00676     NewNet.AddEdge(Edge);
00677   }
00678   NewNet.Defrag();
00679   return NewNetPt;
00680 }
00681 
00682 // assumes that the edges of the network are sorted in time
00683 PTimeNENet TTimeNENet::GetGraphUpToTm(const TSecTm& MaxEdgeTm) const {
00684   PTimeNENet NewNetPt = TTimeNENet::New();
00685   TTimeNENet& NewNet = *NewNetPt;
00686   TSecTm PrevTm;
00687   for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
00688     if (EI() > MaxEdgeTm) { break; }
00689     if (! NewNet.IsNode(EI.GetSrcNId()))
00690       NewNet.AddNode(GetNI(EI.GetSrcNId()));
00691     if (! NewNet.IsNode(EI.GetDstNId()))
00692       NewNet.AddNode(GetNI(EI.GetDstNId()));
00693     NewNet.AddEdge(EI);
00694     IAssert(! PrevTm.IsDef() || PrevTm <= EI()); // edge times must be sorted
00695     PrevTm = EI();
00696   }
00697   NewNet.Defrag();
00698   return NewNetPt;
00699 }
00700 
00701 void TTimeNENet::SortNodeEdgeTimes() {
00702   NodeH.SortByDat(true);
00703   EdgeH.SortByDat(true);
00704 }
00705 
00706 // node time must be smaller than times of incoming or outgoing edges
00707 void TTimeNENet::UpdateNodeTimes() {
00708   int Cnt = 0;
00709   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00710     TSecTm& NodeTm = NI();
00711     for (int edge = 0; edge < NI.GetOutDeg(); edge++) {
00712       const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge));
00713       if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
00714     }
00715     for (int edge = 0; edge < NI.GetInDeg(); edge++) {
00716       const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge));
00717       if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
00718     }
00719   }
00720   printf("Update node times: %d/%d updates\n", Cnt, GetNodes());
00721 }
00722 
00723 void TTimeNENet::SetNodeTmToFirstEdgeTm() {
00724   int Cnt = 0;
00725   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00726     if (NI.GetDeg() == 0) { continue; }
00727     TSecTm NodeTm;
00728     for (int edge = 0; edge < NI.GetOutDeg(); edge++) {
00729       const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge));  IAssert(EdgeTm.IsDef());
00730       if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
00731     }
00732     for (int edge = 0; edge < NI.GetInDeg(); edge++) {
00733       const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge));  IAssert(EdgeTm.IsDef());
00734       if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
00735     }
00736     GetNDat(NI.GetId()) = NodeTm;
00737   }
00738   printf("Node times set: %d/%d updates\n", Cnt, GetNodes());
00739 }
00740 
00741 // shuffle edge arrivals
00742 void TTimeNENet::SetRndEdgeTimes(const int& MinTmEdge) {
00743   printf("Shuffling last %d (%d%%) edge arrival times..\n", GetEdges()-MinTmEdge, int(100.0*(GetEdges()-MinTmEdge)/double(GetEdges())));
00744   TIntV RndEIdV;  GetEIdByTm(RndEIdV);
00745   TIntV TrueEIdV = RndEIdV;
00746   TSecTmV TrueTmV;
00747   const int SwapLen = RndEIdV.Len()-MinTmEdge;
00748   for (int R = 0; R < 10; R++) {
00749     for (int i = MinTmEdge; i < RndEIdV.Len(); i++) {
00750       RndEIdV.Swap(TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge, TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge); }
00751   }
00752   for (int e = 0; e < TrueEIdV.Len(); e++) {
00753     TrueTmV.Add(GetEDat(TrueEIdV[e])); }
00754   for (int e = 0; e < RndEIdV.Len(); e++) {
00755     GetEDat(RndEIdV[e]) = TrueTmV[e]; }
00756   UpdateNodeTimes();
00757 }
00758 
00759 void TTimeNENet::DumpTimeStat() const {
00760   TSecTm MnNodeTm, MxNodeTm;
00761   TSecTm MnEdgeTm, MxEdgeTm;
00762   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00763     const TSecTm NodeTm = NI();
00764     if (! MnNodeTm.IsDef() || MnNodeTm>NodeTm) { MnNodeTm = NodeTm; }
00765     if (! MxNodeTm.IsDef() || MxNodeTm<NodeTm) { MxNodeTm = NodeTm; }
00766   }
00767   printf("Node times:\n  %s\n  %s\n", MnNodeTm.GetStr().CStr(), MxNodeTm.GetStr().CStr());
00768   for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) {
00769     const TSecTm EdgeTm = EI();
00770     if (! MnEdgeTm.IsDef() || MnEdgeTm>EdgeTm) { MnEdgeTm = EdgeTm; }
00771     if (! MxEdgeTm.IsDef() || MxEdgeTm<EdgeTm) { MxEdgeTm = EdgeTm; }
00772   }
00773   printf("Edge times:\n  %s\n  %s\n", MnEdgeTm.GetStr().CStr(), MxEdgeTm.GetStr().CStr());
00774 }
00775 
00776 void TTimeNENet::GetNIdByTm(TIntV& NIdV) const {
00777   TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0);
00778   for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
00779     TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); }
00780   TmToNIdV.Sort();
00781   NIdV.Gen(GetNodes(), 0);
00782   for (int i = 0; i < TmToNIdV.Len(); i++) {
00783     NIdV.Add(TmToNIdV[i].Dat); }
00784 }
00785 
00786 void TTimeNENet::GetEIdByTm(TIntV& EIdV) const {
00787   TVec<TKeyDat<TSecTm, TInt> > TmToEIdV(GetEdges(), 0);
00788   for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) {
00789     TmToEIdV.Add(TKeyDat<TSecTm, TInt>(EI.GetDat(), EI.GetId())); }
00790   TmToEIdV.Sort();
00791   EIdV.Gen(GetEdges(), 0);
00792   for (int i = 0; i < TmToEIdV.Len(); i++) {
00793     EIdV.Add(TmToEIdV[i].Dat); }
00794 }
00795 
00796 void TTimeNENet::GetTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const {
00797   THash<TInt, TIntV> TmIdToNIdVH;
00798   TIntV NIdV;  GetNIdByTm(NIdV);
00799   for (int n = 0; n < NIdV.Len(); n++) {
00800     const int TmId = GetNDat(NIdV[n]).Round(TmUnit).GetAbsSecs();
00801     if (! TmIdToNIdVH.IsKey(TmId)) { TmIdToNIdVH.AddKey(TmId); }
00802     TmIdToNIdVH.GetDat(TmId).Add(NIdV[n]);
00803   }
00804   TVec<TPair<TInt, TIntV> > TmIdNIdVV;
00805   TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV);
00806   TmIdNIdVV.Sort();
00807   TmBucketV.Gen(TmIdNIdVV.Len());
00808   for (int i = 0; i < TmIdNIdVV.Len(); i++) {
00809     TTimeNet::TTmBucket& Bucket = TmBucketV[i];
00810     Bucket.BegTm = TmIdNIdVV[i].Val1;
00811     Bucket.NIdV = TmIdNIdVV[i].Val2;
00812   }
00813 }
00814 
00815 void TTimeNENet::GetEdgeTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const {
00816   THash<TInt, TIntV> TmIdToEIdVH;
00817   TIntV EIdV;  GetEIdByTm(EIdV);
00818   for (int e = 0; e < EIdV.Len(); e++) {
00819     const int TmId = GetEDat(EIdV[e]).Round(TmUnit).GetAbsSecs();
00820     if (! TmIdToEIdVH.IsKey(TmId)) { TmIdToEIdVH.AddKey(TmId); }
00821     TmIdToEIdVH.GetDat(TmId).Add(EIdV[e]);
00822   }
00823   TVec<TPair<TInt, TIntV> > TmIdEIdVV;
00824   TmIdToEIdVH.GetKeyDatPrV(TmIdEIdVV);
00825   TmIdEIdVV.Sort();
00826   TmBucketV.Gen(TmIdEIdVV.Len());
00827   for (int i = 0; i < TmIdEIdVV.Len(); i++) {
00828     TTimeNet::TTmBucket& Bucket = TmBucketV[i];
00829     Bucket.BegTm = TmIdEIdVV[i].Val1;
00830     Bucket.NIdV = TmIdEIdVV[i].Val2;
00831   }
00832 }
00833 
00834 void TTimeNENet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
00835   TIntV NIdV;  GetNIdByTm(NIdV);
00836   TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0);
00837   for (int i = 0; i < NIdV.Len(); i++) {
00838     const int b = i/NodesPerBucket;
00839     if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
00840     TmBucketV[b].NIdV.Add(NIdV[i]);
00841   }
00842 }
00843 
00844 void TTimeNENet::GetEdgeBuckets(const int EdgesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
00845   TIntV EIdV;  GetEIdByTm(EIdV);
00846   TmBucketV.Gen(EIdV.Len()/EdgesPerBucket + 1, 0);
00847   for (int i = 0; i < EIdV.Len(); i++) {
00848     const int b = i/EdgesPerBucket;
00849     if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
00850     TmBucketV[b].NIdV.Add(EIdV[i]);
00851   }
00852 }
00853 
00854 // get edges that close triangles over time
00855 int TTimeNENet::GetTriadEdges(TIntV& TriadEIdV) const {
00856   PUNGraph Graph = TUNGraph::New(GetNodes(), GetEdges());
00857   TIntV EIdV;  GetEIdByTm(EIdV);
00858   TriadEIdV.Clr();
00859   TExeTm ExeTm;
00860   for (int edge = 0; edge < EIdV.Len(); edge++) {
00861     const TEdgeI EI = GetEI(EIdV[edge]);
00862     const int Src = EI.GetSrcNId();
00863     const int Dst = EI.GetDstNId();
00864     if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
00865     if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
00866     if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
00867     if (TSnap::GetCmnNbrs(Graph, Src, Dst) > 0) { TriadEIdV.Add(EIdV[edge]); }
00868     Graph->AddEdge(Src, Dst);
00869     if (edge % 10000 == 0) {
00870       printf("\redges %dk / %dk: triangle edges: %dk [total %s]", edge/1000, EIdV.Len()/1000,
00871         TriadEIdV.Len()/1000, ExeTm.GetStr()); }
00872   }
00873   return Graph->GetEdges();
00874 }
00875 
00876 PGStatVec TTimeNENet::TimeGrowth(const TTmUnit& TimeStep, const TFSet& TakeStat, const TSecTm& StartTm) const {
00877   TExeTm ExeTm;
00878   PGStatVec GStatVec = TGStatVec::New(TimeStep, TakeStat);
00879   TTimeNet::TTmBucketV TmBucketV;
00880   GetEdgeTmBuckets(TimeStep, TmBucketV);
00881   const PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this));
00882   TIntV EdgeIdV;
00883   for (int t = 0; t < TmBucketV.Len(); t++) {
00884     EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
00885     printf("\n***%d/%d: %s (%d edges) ", t+1, TmBucketV.Len(), TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len());  ExeTm.Tick();
00886     if (TmBucketV[t].BegTm < StartTm) { continue; }
00887     const PNEGraph PreGraph = TSnap::GetESubGraph(FullGraph, EdgeIdV);
00888     GStatVec->Add(PreGraph, TmBucketV[t].BegTm);
00889     printf("  [%s]\n", ExeTm.GetTmStr());
00890     //{ TFOut FOut("LinkedIn.GStatVec");  GStatVec->Save(FOut); }
00891   }
00892   return GStatVec;
00893 }
00894 
00895 // take network from last TakeNTmUnits and measure properties
00896 PGStatVec TTimeNENet::TimeGrowth(const TStr& FNmPref, const TStr& Desc, const TFSet& TakeStat, const int& NDiamRuns,
00897                             const TTmUnit& TmUnit, const int& TakeNTmUnits, const bool& LinkBWays) const {
00898   TGStat::NDiamRuns = NDiamRuns;
00899   PGStatVec GrowthStat = TGStatVec::New(TmUnit, TakeStat);
00900   TTimeNet::TTmBucketV TmBucketV;
00901   GetEdgeTmBuckets(TmUnit, TmBucketV);
00902   TIntV EdgeIdV;
00903   TExeTm ExeTm;
00904   for (int t = 0; t < TmBucketV.Len(); t++) {
00905     // take graph over last TakeNTmUnits time units
00906     if (TakeNTmUnits == -1) {
00907       EdgeIdV.AddV(TmBucketV[t].NIdV); }
00908     else {
00909       if (t < TakeNTmUnits) { continue; }
00910       EdgeIdV.Clr(false);
00911       for (int i = t-TakeNTmUnits; i < t; i++) { EdgeIdV.AddV(TmBucketV[i].NIdV); }
00912     }
00913     printf("*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len());  ExeTm.Tick();
00914     PNEGraph PreGraph = TSnap::ConvertESubGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this), EdgeIdV);
00915     if (LinkBWays) {
00916       TIntV KeepEIdV; // keep only nodes that have in- and out-links (send and receive email)
00917       for (TNEGraph::TEdgeI EI = PreGraph->BegEI(); EI < PreGraph->EndEI(); EI++) {
00918         if (PreGraph->IsEdge(EI.GetDstNId(), EI.GetSrcNId(), true)) { KeepEIdV.Add(EI.GetId()); }
00919       }
00920       PreGraph = TSnap::GetESubGraph(PreGraph, KeepEIdV);
00921     }
00922     // take statistics
00923     GrowthStat->Add(PreGraph, TmBucketV[t].BegTm);
00924     { TFOut FOut(TStr::Fmt("growth.%s.gStatVec", FNmPref.CStr()));
00925     GrowthStat->Save(FOut); }
00926     GrowthStat->SaveTxt(FNmPref, Desc);
00927     printf("  [%s]\n", ExeTm.GetTmStr());
00928   }
00929   return GrowthStat;
00930 }
00931 
00932 void TTimeNENet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
00933                              const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc) const {
00934   TTimeNet::TTmBucketV TmBucketV;
00935   GetEdgeTmBuckets(TmUnit, TmBucketV);
00936   PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this));
00937   TIntV EdgeIdV;
00938   TExeTm ExeTm, Run1Tm;
00939   TFltTrV TmDiamV, NdsDiamV;
00940   for (int t = 0; t < TmBucketV.Len(); t++) {
00941     EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
00942     printf("\n*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), EdgeIdV.Len());  ExeTm.Tick();
00943     if (TmBucketV[t].BegTm < StartTm) continue;
00944     PNGraph PreGraph = TSnap::ConvertESubGraph<PNGraph>(FullGraph, EdgeIdV);
00945     TMom Mom;
00946     double EffDiam = 0.0;
00947     for (int r = 0; r < NDiamRuns; r++) {
00948       printf("%d...", r+1);  Run1Tm.Tick();
00949       if (OnlyWcc) { EffDiam = TSnap::GetAnfEffDiam(TSnap::GetMxWcc(PreGraph)); }
00950       else { EffDiam = TSnap::GetAnfEffDiam(PreGraph); }
00951       Mom.Add(EffDiam);
00952       printf("[%s]\r", Run1Tm.GetTmStr());
00953     }
00954     Mom.Def();
00955     TmDiamV.Add(TFltTr(TmBucketV[t].BegTm.Round(TmUnit).GetAbsSecs(), Mom.GetMean(), Mom.GetSDev()));
00956     NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
00957     NdsDiamV.Sort();
00958     printf("  [%s]          \n", ExeTm.GetTmStr());
00959     const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr();
00960     { TGnuPlot GnuPlot("diamEff1."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns));
00961     GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), "AVERAGE "+WccStr+"Effective Diameter");
00962     GnuPlot.AddErrBar(TmDiamV, "", "");
00963     GnuPlot.SavePng(); }
00964     { TGnuPlot GnuPlot("diamEff2."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns));
00965     GnuPlot.SetXYLabel("NODES", "AVERAGE "+WccStr+"Effective Diameter");
00966     GnuPlot.AddErrBar(NdsDiamV, "", "");
00967     GnuPlot.SavePng(); }
00968   }
00969 }
00970 
00971 // pretend we only have link data starting in PostTmDiam
00972 // compare the diameter of the nodes after PostTmDiam with diameter
00973 // of the nodes after PostTmDiam but *also* using nodes and edges
00974 // from before PostTmDiam
00975 void TTimeNENet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
00976                                  const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam, const bool& LinkBWays) {
00977   printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n  %s group by %s.\n",
00978     FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr());
00979   printf("  Delete out-edges of pre time %s nodes.\n  Take subgraph of post year %s subgraph.\n\n",
00980     DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr());
00981   // run diameter
00982   /*const int NSamples = 100;
00983   const int NDiamRuns = 10;
00984   // delete past
00985   if (DelPreEdges != -1) {
00986     TIntV EdgeIdV;
00987     printf("Deleting pre-year %d edges\n", DelPreEdges);
00988     for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
00989       if (EI().GetYear() < DelPreEdges) { EdgeIdV.Add(EI.GetId()); } }
00990     for (int e = 0; e < EdgeIdV.Len(); e++) { DelEdge(EdgeIdV[e]); }
00991     printf("  Deleted %d edges (out of %d edges total).\n", EdgeIdV.Len(), GetEdges());
00992   }
00993   PNEGraph FullGraph = GetNEGraph();
00994   TSnap::DelZeroDegNodes(FullGraph);
00995 
00996   PGrowthStat GrowthStat = TGrowthStat::New(TmUnit, TGraphStat::AllStat());
00997   TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev;
00998   TIntV EdgeIdV;
00999   TExeTm ExeTm;
01000   TTimeNet::TTmBucketV EdgeTmBucketV;
01001   GetEdgeTmBuckets(TmUnit, EdgeTmBucketV);
01002   for (int t = 0; t < EdgeTmBucketV.Len(); t++) {
01003     printf("\nGraph: %s (%d / %d) [%s]\n", EdgeTmBucketV[t].BegTm.GetTmStr().CStr(),
01004       t+1, EdgeTmBucketV.Len(), TExeTm::GetCurTm());
01005     // up-to-year subgraph
01006     EdgeIdV.AddV(EdgeTmBucketV[t].NIdV); // nodes up to time T
01007     if (EdgeTmBucketV[t].BegTm.GetYear() < PostYearDiam) continue;
01008 
01009     const PNEGraph PreNEGraph = EdgeBothWays ? FullGraph->GetEdgeSubGraph(EdgeIdV) : FullGraph;
01010     PNGraph PreGraph = PreNEGraph->GetBWayGraph();
01011     PNGraph WccGraph = TSnap::GetMxWcc(PreGraph);
01012 
01013     // find nodes that are not in missing past
01014     TIntV PostYearNIdV, WccPostYearNIdV;
01015     THash<TIntPr, TInt> PostYearEdgeH;
01016     for (TNEGraph::TEdgeI EI = PreNEGraph->BegEI(); EI < PreNEGraph->EndEI(); EI++) {
01017       if (GetEDat(EI.GetId()).GetYear() >= PostYearDiam) {
01018         PostYearEdgeH.AddKey(TIntPr(EI.GetSrcNId(), EI.GetDstNId())); }
01019     }
01020     TIntH PostYearNIdH;
01021     for (int i = 0; i < PostYearEdgeH.Len(); i++) {
01022       if ((! EdgeBothWays) || (EdgeBothWays && PostYearEdgeH.IsKey(TIntPr(PostYearEdgeH.GetKey(i).Val2, PostYearEdgeH.GetKey(i).Val1)))) { //reverse edge exists
01023         PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val1);
01024         PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val2);
01025       }
01026     }
01027     PostYearNIdH.GetKeyV(PostYearNIdV);
01028     for (int i = 0; i < PostYearNIdV.Len(); i++) {
01029       if (WccGraph->IsNode(PostYearNIdV[i])) {
01030         WccPostYearNIdV.Add(PostYearNIdV[i]); }
01031     }
01032 
01033     // diameter of PostYearDiam subgraph using whole graph (all edges)
01034     TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom;
01035     for (int r = 0; r < NDiamRuns; r++) {
01036       if (! PostYearNIdV.Empty()) {
01037         PreDiamMom.Add(-1); //PreDiamMom.Add(TSnap::GetBfsFullDiam(PreGraph, NSamples, PostYearNIdV, false));
01038         PreEffDiamMom.Add(TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false));
01039       }
01040       if (! WccPostYearNIdV.Empty()) {
01041         //WccDiamMom.Add(TSnap::GetBfsFullDiam(WccGraph, NSamples, WccPostYearNIdV, false));
01042         //WccEffDiamMom.Add(TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false));
01043         WccDiamMom.Add(-1);  WccEffDiamMom.Add(-1);
01044       }
01045       printf("  diam: %d  [%s]  %.2f\r", r+1, ExeTm.GetTmStr(), PreEffDiamMom.GetValV().Last().Val);  ExeTm.Tick();
01046     }
01047     PreDiamMom.Def();  PreEffDiamMom.Def();
01048     WccDiamMom.Def();  WccEffDiamMom.Def();
01049     // save stat
01050     PGraphStat GraphStatPt = GrowthStat->Add(EdgeTmBucketV[t].BegTm);
01051     TGraphStat& GraphStat = *GraphStatPt;
01052     GraphStat.Nodes = PreGraph->GetNodes();
01053     GraphStat.NonZNodes = PreGraph->GetNodes() - TSnap::CntDegNodes<PNGraph>(PreGraph, 0);
01054     GraphStat.Srcs =  GraphStat.Nodes - TSnap::CntOutDegNodes<PNGraph>(PreGraph, 0);
01055     GraphStat.Edges = PreGraph->GetEdges();
01056     GraphStat.WccNodes= WccGraph->GetNodes();
01057     GraphStat.WccEdges = WccGraph->GetEdges();
01058     GraphStat.FullDiam = PreDiamMom.GetMean(); // mean
01059     GraphStat.EffDiam = PreEffDiamMom.GetMean();
01060     GraphStat.FullWccDiam = WccDiamMom.GetMean();
01061     GraphStat.EffWccDiam = WccEffDiamMom.GetMean();
01062     GraphStat.FullDiamDev = PreDiamMom.GetSDev(); // variance
01063     GraphStat.EffDiamDev = PreEffDiamMom.GetSDev();
01064     GraphStat.FullWccDiamDev = WccDiamMom.GetSDev();
01065     GraphStat.EffWccDiamDev = WccEffDiamMom.GetSDev();
01066     { TFOut FOut("growth."+FNmPref+".bin");
01067     GrowthStat->Save(FOut); }
01068     const TStr BigDesc = TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%d\nPostYearDiam\t%d\n",
01069       Desc.CStr(), DelPreEdges, PostYearDiam);
01070     GrowthStat->SaveTxt(FNmPref, BigDesc);
01071   }
01072   // diameter plots
01073   GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.",
01074     DelPreEdges, PostYearDiam));
01075     */
01076 }
01077 
01078 // plot time difference of the node ages when an edge arrives
01079 /*void TTimeNENet::PlotEdgeNodeTimeDiff(const TStr& FNmPref, TStr Desc) const {
01080   if (Desc.Empty()) { Desc = FNmPref; }
01081   printf("Edge node time differences:\n");
01082   THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge
01083   THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst)
01084   PUNGraph Graph = TUNGraph::New(GetNodes(), -1);
01085   TIntV EIdV;  GetEIdByTm(EIdV);
01086   TIntH DegCntH(10, true);
01087   TExeTm ExeTm;
01088   for (int edge = 0; edge < EIdV.Len(); edge++) {
01089     const TEdgeI EI = GetEI(EIdV[edge]);
01090     const int Src = EI.GetSrcNId();
01091     const int Dst = EI.GetDstNId();
01092     if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
01093     if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
01094     if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
01095     const int SrcDeg = Graph->GetNode(Src).GetOutDeg();
01096     const int DstDeg = Graph->GetNode(Dst).GetInDeg();
01097     const int EdgeTm = EI.GetDat().GetAbsSecs();
01098     const int SrcTm = EI.GetSrcNDat().GetAbsSecs();
01099     const int DstTm = EI.GetDstNDat().GetAbsSecs();
01100     NodeAgeH.AddDat((DstTm-SrcTm)/3600) += 1;
01101     if (SrcDeg == 0 || DstDeg == 0) {
01102       Node1AgeH.AddDat((DstTm-SrcTm)/3600) += 1; }
01103     SrcTmH.AddDat((EdgeTm-SrcTm)/3600) += 1;
01104     DstTmH.AddDat((EdgeTm-DstTm)/3600) += 1;
01105     // add edge
01106     Graph->AddEdge(Src, Dst);
01107     if (edge % 1000 == 0) {
01108       printf("\r%dk / %dk [total %s]", edge/1000, EIdV.Len()/1000, ExeTm.GetStr()); }
01109   }
01110   TGnuPlot::PlotValCntH(NodeAgeH, "tmDstSrc."+FNmPref, TStr::Fmt("%s, Node time difference.", Desc.CStr()),
01111     "Time(destination) - Time(source) [hours]", "Count");
01112   TGnuPlot::PlotValCntH(Node1AgeH, "tmDstSrc1."+FNmPref, TStr::Fmt("%s. 1-ST EDGE of one of the nodes.", Desc.CStr()),
01113     "Time(destination) - Time(source) [hours]", "Count");
01114   TGnuPlot::PlotValCntH(SrcTmH, "tmEdgeSrc."+FNmPref, TStr::Fmt("%s. Edge time - source node time.", Desc.CStr()),
01115     "Edge time - source node time [hours]", "Count");
01116   TGnuPlot::PlotValCntH(DstTmH, "tmEdgeDst."+FNmPref, TStr::Fmt("%s. Edge time - destination node time.", Desc.CStr()),
01117     "Edge time - destination node time [hours]", "Count");
01118   printf("\n");
01119 }
01120 
01121 // plot the distribution whether the edge to node that closes the triad was
01122 // chosen random or preferentially
01123 void TTimeNENet::PlotCloseTriad(const TStr& FNmPref, TStr Desc, const bool& OnlyBackEdges) const {
01124   if (Desc.Empty()) { Desc = FNmPref; }
01125   printf("Random vs. preferential triad closing: %s\n", OnlyBackEdges?"OnlyBackEdges":"");
01126   THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge
01127   THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst)
01128   PUNGraph Graph = TUNGraph::New(GetNodes(), -1);
01129   TIntV EIdV;  GetEIdByTm(EIdV);
01130   TIntV CmnV, SrcNbrV, DstNbrV;
01131   TExeTm ExeTm;
01132   TFltV RndRdnV, PrefPrefV, PrefRndV, RndPrefV;
01133   TFltFltH RndRndH, PrefPrefH, PrefRndH, RndPrefH;
01134   int TriadEdges = 0;
01135   FILE *F = fopen(TStr::Fmt("%s-prob.tab", FNmPref.CStr()).CStr(), "wt");
01136   fprintf(F, "#Probability of picking a node that closes the triangle (pick preferentially vs. uniformly at random)\n");
01137   fprintf(F, "#i\tEId\tRndRnd\tRndPref\tPrefRnd\tPrefPref\n");
01138   for (int edge = 0; edge < EIdV.Len(); edge++) {
01139     const TEdgeI EI = GetEI(EIdV[edge]);
01140     const int Src = EI.GetSrcNId();
01141     const int Dst = EI.GetDstNId();
01142     if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
01143     if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
01144     if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
01145     if (OnlyBackEdges && EI.GetSrcNDat().GetAbsSecs()-EI.GetDstNDat().GetAbsSecs() < 0) {
01146       Graph->AddEdge(Src, Dst);  continue;
01147     }
01148     // find common neighbors
01149     const TUNGraph::TNodeI SrcNI = Graph->GetNI(Src);
01150     const TUNGraph::TNodeI DstNI = Graph->GetNI(Dst);
01151     SrcNbrV.Clr(false);  DstNbrV.Clr(false);
01152     for (int i = 0; i < SrcNI.GetOutDeg(); i++) { SrcNbrV.Add(SrcNI.GetOutNId(i)); }
01153     for (int i = 0; i < DstNI.GetOutDeg(); i++) { DstNbrV.Add(DstNI.GetOutNId(i)); }
01154     SrcNbrV.Sort();  DstNbrV.Sort();
01155     SrcNbrV.Intrs(DstNbrV, CmnV);
01156     if (! CmnV.Empty()) {
01157       // triad completion
01158       const int SrcDeg = Graph->GetNode(Src).GetOutDeg();
01159       const int DstDeg = Graph->GetNode(Dst).GetInDeg();
01160       double RndRnd=0, RndPref=0, PrefRnd=0, PrefPref=0;
01161       int AllNbrDegSum=0;//, NbrDegSum = 0;
01162       //for (int i = 0; i < CmnV.Len(); i++) {
01163       //  NbrDegSum += Graph->GetNode(CmnV[i]).GetOutDeg(); }
01164       for (int i = 0; i < SrcNI.GetOutDeg(); i++) {
01165         AllNbrDegSum += Graph->GetNode(SrcNI.GetOutNId(i)).GetOutDeg(); }
01166       // uniform-uniform node selection = 1/|cmn| sum_{i\in cmn} 1/(d_i-1)
01167       // preferential-uniform
01168       for (int i = 0; i < CmnV.Len(); i++) {
01169         const int Deg = Graph->GetNode(CmnV[i]).GetOutDeg();
01170         RndRnd += (1.0/double(SrcDeg)) * (1.0/double(Deg-1));
01171         PrefRnd +=  (Deg/double(AllNbrDegSum)) * (1.0/double(Deg-1));
01172       }
01173       // uniform-preferential selection = 1/|cmn| sum_{i\in cmn} d_t / sum_{i--j} d_j
01174       // preferential-preferential
01175       for (int i = 0; i < CmnV.Len(); i++) {
01176         const TUNGraph::TNode N = Graph->GetNode(CmnV[i]);
01177         const int Deg = N.GetOutDeg();
01178         int DegSum = 0;
01179         for (int j = 0; j < N.GetOutDeg(); j++) {
01180           if (N.GetOutNId(j) != Src) {
01181             DegSum += Graph->GetNode(N.GetOutNId(j)).GetOutDeg(); }
01182         }
01183         RndPref += 1.0/double(SrcDeg) * DstDeg/double(DegSum);
01184         PrefPref += (Deg/double(AllNbrDegSum)) * (DstDeg/double(DegSum));
01185       }
01186       IAssert(0.0<RndRnd && RndRnd<=1.0);   IAssert(0.0<RndPref && RndPref<=1.0);
01187       IAssert(0.0<PrefRnd && PrefRnd<=1.0);  IAssert(0.0<PrefPref && PrefPref<=1.0);
01188       RndRndH.AddDat(TMath::Round(RndRnd, 2)) += 1.0;
01189       RndPrefH.AddDat(TMath::Round(RndPref, 2)) += 1.0;
01190       PrefRndH.AddDat(TMath::Round(PrefRnd, 2)) += 1.0;
01191       PrefPrefH.AddDat(TMath::Round(PrefPref, 2)) += 1.0;
01192       fprintf(F, "%d\t%d\t%g\t%g\t%g\t%g\n", edge, EI.GetId(), RndRnd, RndPref, PrefRnd, PrefPref);
01193       TriadEdges++;
01194     }
01195     Graph->AddEdge(Src, Dst);
01196     if (edge % 1000 == 0) {
01197       printf("\r%dk / %dk triad edges %dk [total %s]", edge/1000, EIdV.Len()/1000, TriadEdges/1000, ExeTm.GetStr()); }
01198   }
01199   fclose(F);
01200   printf("\nRandom vs. preferential triad closing\n");
01201   printf("All edges:            %d\n", Graph->GetEdges());
01202   printf("Triad closing edges:  %d\n", TriadEdges);
01203   // plot distribution
01204   TFltPrV RndRndPrV;   RndRndH.GetKeyDatPrV(RndRndPrV);     RndRndPrV.Sort();
01205   TFltPrV RndPrefPrV;  RndPrefH.GetKeyDatPrV(RndPrefPrV);   RndPrefPrV.Sort();
01206   TFltPrV PrefRndPrV;  PrefRndH.GetKeyDatPrV(PrefRndPrV);   PrefRndPrV.Sort();
01207   TFltPrV PrefPrefPrV; PrefPrefH.GetKeyDatPrV(PrefPrefPrV); PrefPrefPrV.Sort();
01208   { TGnuPlot GP(TStr::Fmt("probPdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges()));
01209   GP.AddPlot(RndRndPrV, gpwLinesPoints, "Random--Random");
01210   GP.AddPlot(RndPrefPrV, gpwLinesPoints, "Random--Preferential");
01211   GP.AddPlot(PrefRndPrV, gpwLinesPoints, "Preferential--Random");
01212   GP.AddPlot(PrefPrefPrV, gpwLinesPoints, "Preferential--Preferential");
01213   GP.SetXYLabel("Probability", "Count");
01214   GP.SavePng(); }
01215   { TGnuPlot GP(TStr::Fmt("probCdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges()));
01216   GP.AddPlot(TGUtil::GetCdf(RndRndPrV), gpwLinesPoints, "Random--Random");
01217   GP.AddPlot(TGUtil::GetCdf(RndPrefPrV), gpwLinesPoints, "Random--Preferential");
01218   GP.AddPlot(TGUtil::GetCdf(PrefRndPrV), gpwLinesPoints, "Preferential--Random");
01219   GP.AddPlot(TGUtil::GetCdf(PrefPrefPrV), gpwLinesPoints, "Preferential--Preferential");
01220   GP.SetXYLabel("Cumulative probability", "Count");
01221   GP.SavePng(); }
01222 }*/
01223 
01224 PTimeNENet TTimeNENet::GetGnmRndNet(const int& Nodes, const int& Edges) {
01225   printf("Generating G_nm(%d, %d)\n", Nodes, Edges);
01226   int Src, Dst;
01227   PTimeNENet Net = TTimeNENet::New();
01228   Net->Reserve(Nodes, Edges);
01229   for (int e = 0; e < Edges; e++) {
01230     Src = TInt::Rnd.GetUniDevInt(Nodes);
01231     Dst = TInt::Rnd.GetUniDevInt(Nodes);
01232     while (Dst == Src || Net->IsEdge(Src, Dst)) {
01233       Dst = TInt::Rnd.GetUniDevInt(Nodes); }
01234     if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(e)); }
01235     if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(e)); }
01236     Net->AddEdge(Src, Dst, -1, TSecTm(e));
01237   }
01238   return Net;
01239 }
01240 
01241 // ACL, model B: Aiello, Chung, Lu: Random evolution in massive graphs
01242 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& Edges, const double& GammaIn, const double& GammaOut) {
01243   const double Alpha = Nodes/double(Edges);
01244   printf("Generating PA(%d, %d), with slope in:%.1f, out: %.1f\n", Nodes, Edges,
01245     2+GammaIn/(Alpha/(1-Alpha)), 2+GammaOut/(Alpha/(1-Alpha)));
01246   // init
01247   int nodes=0, edges=0, time=0, iter=0;
01248   TIntV OutW(Edges, 0), InW(Edges, 0);
01249   PTimeNENet Net = TTimeNENet::New();
01250   Net->Reserve(Nodes, Edges);
01251   // 1st node
01252   Net->AddNode(0, TSecTm(time++));  nodes++;
01253   OutW.Add(0);  InW.Add(0);
01254   while (edges < Edges) {
01255     int Src=-1, Dst=-1;  iter++;
01256     if (TInt::Rnd.GetUniDev() < Alpha) {
01257       if (nodes < Nodes) {
01258         IAssert(Net->AddNode(nodes, TSecTm(time++)));
01259         nodes++; }
01260     } else {
01261       if (TInt::Rnd.GetUniDev() < nodes*GammaIn/double(edges+nodes*GammaIn)) {
01262         Src = TInt::Rnd.GetUniDevInt(nodes); }
01263       else { Src = OutW[TInt::Rnd.GetUniDevInt(OutW.Len())]; }
01264       if (TInt::Rnd.GetUniDev() < nodes*GammaOut/double(edges+nodes*GammaOut)) {
01265         Dst = TInt::Rnd.GetUniDevInt(nodes); }
01266       else { Dst = InW[TInt::Rnd.GetUniDevInt(InW.Len())]; }
01267     }
01268     if (Src == Dst || Net->IsEdge(Src, Dst)) {
01269       continue;
01270     }
01271     //printf("%d/%d %d %d\n", edges, iter, Src, Dst);
01272     if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(time++)); nodes++; }
01273     if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(time++)); nodes++; }
01274     Net->AddEdge(Src, Dst, -1, TSecTm(time++));
01275     OutW.Add(Src); InW.Add(Dst); edges++;
01276   }
01277   for (int node = 0; node < Nodes; node++) {
01278     if (! Net->IsNode(node)) {
01279       Net->AddNode(node, TSecTm(time++)); }
01280   }
01281   return Net;
01282 }
01283 
01284 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& OutDeg) {
01285   printf("Generating PA, nodes:%d, out-deg:%d\n", Nodes, OutDeg);
01286   // init
01287   int time=0;
01288   PTimeNENet Net = TTimeNENet::New();
01289   Net->Reserve(Nodes, OutDeg*Nodes);
01290   Net->AddNode(0, TSecTm(++time));  Net->AddNode(1, TSecTm(++time));
01291   Net->AddEdge(0, 1, -1, TSecTm(++time));
01292   TIntV NIdV;  NIdV.Add(0);  NIdV.Add(1);
01293   TIntSet NodeSet;
01294   for (int node = 2; node <= Nodes; node++) {
01295     NodeSet.Clr(false);
01296     while (NodeSet.Len() < OutDeg && NodeSet.Len() < node) {
01297       NodeSet.AddKey(NIdV[TInt::Rnd.GetUniDevInt(NIdV.Len())]);
01298     }
01299     const int N = Net->AddNode(node, TSecTm(++time));
01300     for (int i = 0; i < NodeSet.Len(); i++) {
01301       Net->AddEdge(node, NodeSet[i], -1, TSecTm(++time));
01302       NIdV.Add(N);  NIdV.Add(NodeSet[i]);
01303     }
01304   }
01305   return Net;
01306 }
01307 
01308 void TTimeNENet::SaveEdgeTm(const TStr& EdgeFNm, const bool& RenumberNId, const bool& RelativeTm) const {
01309   TIntV EIdV;  GetEIdByTm(EIdV);
01310   const int BegTm = RelativeTm ? GetEDat(EIdV[0]).GetAbsSecs() : 0;
01311   TIntSet NIdMap;
01312   if (RenumberNId) { NIdMap.Gen(GetNodes()); }
01313   FILE *F = fopen(EdgeFNm.CStr(), "wt");
01314   //fprintf(F, "#Nodes\t%d\n#Edges\t%d\n", GetNodes(), GetEdges());
01315   //fprintf(F, "#<src>\t<dst>\t<time>\n");
01316   for (int e =0; e < EIdV.Len(); e++) {
01317     const TEdgeI EI = GetEI(EIdV[e]);
01318     if (RenumberNId) {
01319       const int src = EI.GetSrcNId();
01320       const int dst = EI.GetDstNId();
01321       NIdMap.AddKey(src);  NIdMap.AddKey(dst);
01322       fprintf(F, "%d\t%d\t%d\n", NIdMap.GetKeyId(src), NIdMap.GetKeyId(dst), EI().GetAbsSecs()-BegTm);
01323     }else {
01324       fprintf(F, "%d\t%d\t%d\n", EI.GetSrcNId(), EI.GetDstNId(), EI().GetAbsSecs()-BegTm); }
01325   }
01326   fclose(F);
01327 }
01328 
01329 PTimeNENet TTimeNENet::GetSmallNet() {
01330   PTimeNENet Net = TTimeNENet::New();
01331   for (int i = 1; i <= 6; i++) {
01332     Net->AddNode(i, TSecTm(0)); }
01333   int tm = 1;
01334   Net->AddEdge(1, 2, -1, TSecTm(tm++));
01335   Net->AddEdge(3, 4, -1, TSecTm(tm++));
01336   Net->AddEdge(3, 1, -1, TSecTm(tm++));
01337   Net->AddEdge(5, 6, -1, TSecTm(tm++));
01338   Net->AddEdge(6, 4, -1, TSecTm(tm++));
01339   Net->AddEdge(5, 3, -1, TSecTm(tm++));
01340   Net->AddEdge(5, 4, -1, TSecTm(tm++));
01341   Net->AddEdge(5, 2, -1, TSecTm(tm++));
01342   return Net;
01343 }
01344 
01345 PTimeNENet TTimeNENet::LoadFlickr(const TStr& NodeFNm, const TStr& EdgeFNm) {
01346   const int BegOfTm = 1047369600; // Tue Mar 11 01:00:00 2003 (begining of Flickr)
01347   PTimeNENet Net = TTimeNENet::New();
01348   printf("Adding nodes...");
01349   { TSsParser Ss(NodeFNm, ssfWhiteSep);
01350   while (Ss.Next()) {
01351     const int NId = Ss.GetInt(0);
01352     const int Tm = Ss.GetInt(1)+BegOfTm;
01353     if (TSecTm(Tm) < TSecTm(2002, 1, 1)) {
01354       printf("  skip node %g (time %d)\n", (double) Ss.GetLineNo(), Ss.GetInt(1)); continue; }
01355     Net->AddNode(NId, TSecTm(Tm));
01356   } }
01357   printf(" %d nodes\n", Net->GetNodes());
01358   printf("Adding edges...");
01359   int SkipCnt=0;
01360   //TIntH SkipDiffCnt;
01361   { TSsParser Ss(EdgeFNm, ssfWhiteSep);
01362   while (Ss.Next()) {
01363     const int NId1 = Ss.GetInt(0);
01364     const int NId2 = Ss.GetInt(1);
01365     const TSecTm Tm = TSecTm(Ss.GetInt(2)+BegOfTm);
01366     if (! Net->IsNode(NId1) || ! Net->IsNode(NId2)) { printf("not node\n"); continue; }
01367     if (Tm < TSecTm(2002, 1, 1)) { SkipCnt++;
01368       printf("  skip edge %g (time %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr()); continue; }
01369     if (Tm+600 < Net->GetNDat(NId1)) {
01370       printf("  1:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId1).GetStr().CStr());
01371       //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId1).GetAbsSecs()) += 1;
01372       SkipCnt++;  continue; }
01373     if (Tm+600 < Net->GetNDat(NId2)) { SkipCnt++;
01374       printf("  2:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId2).GetStr().CStr());
01375       //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId2).GetAbsSecs()) += 1;
01376       SkipCnt++;  continue; }
01377     Net->AddEdge(NId1, NId2, -1, TSecTm(Tm));
01378   } }
01379   //TGnuPlot::PlotValCntH(SkipDiffCnt, "flickr-edgeNodeDiff", "", "seconds", "count");
01380   printf("  %d edges\n", Net->GetEdges());
01381   printf("  %d edges skipped (edge time < node time)\n", SkipCnt);
01382   Net->UpdateNodeTimes();
01383   return Net;
01384 }
01385 
01386 // load network where fields are separated by Separator and each line contains (*Fld are column indexes)
01387 // .. <source> .. <destination> .. <time>
01388 PTimeNENet TTimeNENet::LoadEdgeTm(const TStr& EdgeFNm, const int& SrcFld, const int& DstFld, const int& TimeFld, const TSsFmt& Separator) {
01389   printf("Loading %s\n", EdgeFNm.CStr());
01390   PTimeNENet Net = TTimeNENet::New();
01391   TStrHash<TInt> StrToId(Mega(1), true); // node id to string
01392   int LineCnt=0;
01393   TExeTm ExeTm;
01394   TSsParser Ss(EdgeFNm, Separator);
01395   TSecTm MinTm=TSecTm::GetCurTm(), MaxTm=TSecTm(100);
01396   while (Ss.Next()) {
01397     if (Ss.IsCmt()) { continue; }
01398     IAssert(Ss.Len() > TimeFld);
01399     const char* Node1 = Ss.GetFld(SrcFld);
01400     const char* Node2 = Ss.GetFld(DstFld);
01401     const char* TmStr = Ss.GetFld(TimeFld);
01402     if (strcmp(TmStr,"NULL")==0) { continue; }
01403     //const TSecTm Tm = TSecTm::GetDtTmFromYmdHmsStr(TmStr);
01404     const TSecTm Tm(atoi(TmStr));
01405     const int NId1 = StrToId.AddKey(Node1);
01406     const int NId2 = StrToId.AddKey(Node2);
01407     if (! Net->IsNode(NId1)) { Net->AddNode(NId1, TSecTm()); }
01408     if (! Net->IsNode(NId2)) { Net->AddNode(NId2, TSecTm()); }
01409     MinTm=TMath::Mn(MinTm, Tm);
01410     MaxTm=TMath::Mx(MaxTm, Tm);
01411     Net->AddEdge(NId1, NId2, -1, Tm);
01412     if (++LineCnt % 1000 == 0) {
01413       printf("\r  %dk lines processed: %d %d [%s]", LineCnt/1000, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr()); }
01414   }
01415   printf("\r  %d lines processed: %d %d [%s]\n", LineCnt, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr());
01416   printf("  Data range %s -- %s\n", MinTm.GetStr().CStr(), MaxTm.GetStr().CStr());
01417   //TSnap::PrintInfo(Net, "", "", false);
01418   Net->UpdateNodeTimes();
01419   return Net;
01420 }