SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
timenet.cpp
Go to the documentation of this file.
1 // Time Network
4  if (this != &TimeNet) {
5  TNet::operator=(TimeNet);
6  }
7  return *this;
8 }
9 
10 PTimeNet TTimeNet::GetSubGraph(const TIntV& NIdV) const {
11  PTimeNet NewNetPt = TTimeNet::New();
12  TTimeNet& NewNet = *NewNetPt;
13  NewNet.Reserve(NIdV.Len(), -1);
14  int node, edge;
15  TNodeI NI;
16  for (node = 0; node < NIdV.Len(); node++) {
17  NewNet.AddNode(NIdV[node], GetNDat(NIdV[node])); // also copy the node data
18  }
19  for (node = 0; node < NIdV.Len(); node++) {
20  NI = GetNI(NIdV[node]);
21  const int SrcNId = NI.GetId();
22  for (edge = 0; edge < NI.GetOutDeg(); edge++) {
23  const int OutNId = NI.GetOutNId(edge);
24  if (NewNet.IsNode(OutNId)) {
25  NewNet.AddEdge(SrcNId, OutNId); }
26  }
27  }
28  NewNet.Defrag();
29  return NewNetPt;
30 }
31 
33  TIntV NIdV; GetNIdByTm(NIdV);
35  for (int i = 0; i < NIdV.Len(); i++) {
36  const int Src = NIdV[i];
37  const TTimeNet::TNodeI NI = GetNI(Src);
38  const TSecTm SrcTm = NI.GetDat();
39  if (! OutNet->IsNode(Src)) { OutNet->AddNode(Src, SrcTm); }
40  for (int e = 0; e < NI.GetOutDeg(); e++) {
41  if (! OutNet->IsNode(NI.GetOutNId(e))) { OutNet->AddNode(NI.GetOutNId(e), SrcTm); }
42  OutNet->AddEdge(Src, NI.GetOutNId(e), -1, SrcTm);
43  }
44  }
45  return OutNet;
46 }
47 
48 void TTimeNet::GetNIdByTm(TIntV& NIdV) const {
49  TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0);
50  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
51  TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); }
52  TmToNIdV.Sort();
53  NIdV.Gen(GetNodes(), 0);
54  for (int i = 0; i < TmToNIdV.Len(); i++) {
55  NIdV.Add(TmToNIdV[i].Dat); }
56 }
57 
58 // put nodes into buckets by TmUnit
59 void TTimeNet::GetTmBuckets(const TTmUnit& TmUnit, TTmBucketV& TmBucketV) const {
60  THash<TInt, TIntV> TmIdToNIdVH;
61  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
62  const int TmId = NodeI().Round(TmUnit);
63  if (! TmIdToNIdVH.IsKey(TmId)) TmIdToNIdVH.AddKey(TmId);
64  TmIdToNIdVH.GetDat(TmId).Add(NodeI.GetId());
65  }
66  TVec<TPair<TInt, TIntV> > TmIdNIdVV;
67  TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV);
68  TmIdNIdVV.Sort();
69  TmBucketV.Gen(TmIdNIdVV.Len());
70  for (int i = 0; i < TmIdNIdVV.Len(); i++) {
71  TTmBucket& Bucket = TmBucketV[i];
72  Bucket.BegTm = TmIdNIdVV[i].Val1;
73  Bucket.NIdV = TmIdNIdVV[i].Val2;
74  }
75 }
76 
77 void TTimeNet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
78  TIntV NIdV;
79  GetNIdByTm(NIdV);
80  TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0);
81  for (int i = 0; i < NIdV.Len(); i++) {
82  const int b = i/NodesPerBucket;
83  if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
84  TmBucketV[b].NIdV.Add(NIdV[i]);
85  }
86 }
87 
88 PGStatVec TTimeNet::TimeGrowth(const TTmUnit& TmUnit, const TFSet& TakeStat, const TSecTm& StartTm) const {
89  PGStatVec GrowthStat = new TGStatVec(TmUnit, TakeStat);
90  TTmBucketV TmBucketV;
91  GetTmBuckets(TmUnit, TmBucketV);
92  TIntV NodeIdV;
93  TExeTm ExeTm;
94  for (int t = 0; t < TmBucketV.Len(); t++) {
95  NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
96  printf("\n=== %d/%d] %s (%d nodes)\n", t+1, TmBucketV.Len(),
97  TmBucketV[t].BegTm.GetStr().CStr(), NodeIdV.Len()); ExeTm.Tick();
98  if (TmBucketV[t].BegTm < StartTm) continue;
99  //PNGraph PreGraph = GetSubGraph(NodeIdV, true); // renumber nodes
100  PNGraph PreGraph = TSnap::ConvertSubGraph<PNGraph>(PTimeNet((TTimeNet*)this), NodeIdV); // don't renumber nodes
101  GrowthStat->Add(PreGraph, TmBucketV[t].BegTm);
102  }
103  return GrowthStat;
104 }
105 
106 void TTimeNet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
107  const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc, const bool& AlsoRewire) const {
108  const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr();
109  TTmBucketV TmBucketV;
110  GetTmBuckets(TmUnit, TmBucketV);
111  TIntV NodeIdV;
112  TExeTm ExeTm, Run1Tm;
113  TFltTrV TmDiamV, NdsDiamV;
114  TFltTrV RwTmDiamV, RwNdsDiamV;
115  for (int t = 0; t < TmBucketV.Len(); t++) {
116  NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
117  printf("\n*** %d/%d] at %s (%d nodes)\n", t+1, TmBucketV.Len(),
118  TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), NodeIdV.Len()); ExeTm.Tick();
119  if (TmBucketV[t].BegTm < StartTm) continue;
120  //PUNGraph PreGraph = GetSubUNGraph(NodeIdV, true);
121  PUNGraph PreGraph = TSnap::ConvertSubGraph<PUNGraph>(PTimeNet((TTimeNet*)this), NodeIdV);
122  { TMom Mom;
123  for (int r = 0; r < NDiamRuns; r++) {
124  printf("%d...", r+1); Run1Tm.Tick();
125  const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph) : PreGraph);
126  Mom.Add(EffDiam); printf("[%s]\r", Run1Tm.GetTmStr());
127  }
128  Mom.Def();
129  TmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev()));
130  NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
131  NdsDiamV.Sort();
132  printf(" [%s] \n", ExeTm.GetTmStr()); }
133  if (AlsoRewire) {
134  //PUNGraph RwGraph = TGGen::GenRndDegSeqS(PreGraph, 50, TInt::Rnd); // edge switching model
135  TIntV DegSeqV(PreGraph->GetNodes(), 0);
136  for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) { DegSeqV.Add(NI.GetDeg()); }
137  PUNGraph RwGraph = TSnap::GenConfModel(DegSeqV, TInt::Rnd);
138  printf("Configuration model: (%d, %d) --> (%d, %d)\n", PreGraph->GetNodes(), PreGraph->GetEdges(), RwGraph->GetNodes(), RwGraph->GetEdges());
139  TMom Mom;
140  for (int r = 0; r < NDiamRuns; r++) {
141  printf(" diam run %d...", r+1); Run1Tm.Tick();
142  const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph):PreGraph);
143  Mom.Add(EffDiam); printf(" current run [%s]\n", Run1Tm.GetTmStr());
144  }
145  Mom.Def();
146  RwTmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev()));
147  RwNdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
148  RwNdsDiamV.Sort();
149  printf("done with diameter. Total time [%s] \n", ExeTm.GetTmStr());
150  }
151  // plot
152  { TGnuPlot GnuPlot("diamEff-T."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges()));
153  GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), WccStr+"Effective Diameter");
154  GnuPlot.AddErrBar(TmDiamV, "True", "");
155  if (! RwTmDiamV.Empty()) { GnuPlot.AddErrBar(RwTmDiamV, "Rewired", "");}
156  GnuPlot.SavePng(); }
157  { TGnuPlot GnuPlot("diamEff-N."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges()));
158  GnuPlot.SetXYLabel("NODES", WccStr+"Effective Diameter");
159  GnuPlot.AddErrBar(NdsDiamV, "True", "");
160  if (! RwNdsDiamV.Empty()) { GnuPlot.AddErrBar(RwNdsDiamV, "Rewired", "");}
161  GnuPlot.SavePng(); }
162  }
163 }
164 
165 // pretend we only have link data starting in PostTmDiam
166 // compare the diameter of the nodes after PostTmDiam with diameter
167 // of the nodes after PostTmDiam but *also* using nodes and edges
168 // from before PostTmDiam
169 void TTimeNet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
170  const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam) const {
171  printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n %s group by %s.\n",
172  FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr());
173  printf(" Delete out-edges of pre time %s nodes.\n Take subgraph of post year %s subgraph.\n\n",
174  DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr());
175  const int NDiamRuns = 10;
176  const int NSamples = 100;
177  //PUNGraph FullGraph = GetUNGraph();
178  PUNGraph FullGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
179  // delete past
180  if (DelPreTmEdges.IsDef()) {
181  int NDelNodes = 0, NDelEdges = 0;
182  printf("Deleting pre-%s node out-links\n", DelPreTmEdges.GetStr().CStr());
183  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
184  if (NodeI() < DelPreTmEdges) {
185  const int NodeId = NodeI.GetId();
186  for (int edge = 0; edge < NodeI.GetOutDeg(); edge++) {
187  FullGraph->DelEdge(NodeId, NodeI.GetOutNId(edge)); }
188  NDelEdges += NodeI.GetOutDeg(); NDelNodes++;
189  }
190  }
191  printf(" Deleted %d nodes out-edges (%d edges total).\n", NDelNodes, NDelEdges);
192  FullGraph->Defrag(true);
193  }
194  PGStatVec GrowthStat = TGStatVec::New(TmUnit);
195  TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev;
196  TIntV NodeIdV;
197  TExeTm ExeTm;
198  TTmBucketV TmBucketV;
199  GetTmBuckets(TmUnit, TmBucketV);
200  for (int t = 0; t < TmBucketV.Len(); t++) {
201  printf("\nGraph: %s (%d / %d) [%s]\n", TmBucketV[t].BegTm.GetTmStr().CStr(),
202  t+1, TmBucketV.Len(), TExeTm::GetCurTm());
203  // up-to-year subgraph
204  NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
205  if (TmBucketV[t].BegTm < PostTmDiam) { continue; }
206  const PUNGraph PreGraph = TSnap::GetSubGraph(FullGraph, NodeIdV, true);
207  const PUNGraph WccGraph = TSnap::GetMxWcc(PreGraph);
208  TIntV PostYearNIdV, WccPostYearNIdV;
209  for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) {
210  if (GetNDat(NI.GetId()) >= PostTmDiam) {
211  PostYearNIdV.Add(NI.GetId());
212  if (WccGraph->IsNode(NI.GetId())) { WccPostYearNIdV.Add(NI.GetId()); }
213  }
214  }
215  TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom;
216  // diameter of PostYearDiam subgraph using whole graph (all edges)
217  int FullDiam; double EffDiam;
218  for (int r = 0; r < NDiamRuns; r++) {
219  if (! PostYearNIdV.Empty()) {
220  TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false, EffDiam, FullDiam);
221  PreDiamMom.Add(FullDiam); PreEffDiamMom.Add(EffDiam);
222  }
223  if (! WccPostYearNIdV.Empty()) {
224  TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false, EffDiam, FullDiam);
225  WccDiamMom.Add(FullDiam); WccEffDiamMom.Add(EffDiam);
226  }
227  printf(" diam: %d [%s] \r", r+1, ExeTm.GetTmStr()); ExeTm.Tick();
228  }
229  PreDiamMom.Def(); PreEffDiamMom.Def();
230  WccDiamMom.Def(); WccEffDiamMom.Def();
231  // save stat
232  PGStat GraphStatPt = GrowthStat->Add(TmBucketV[t].BegTm);
233  TGStat& GS = *GraphStatPt;
234  GS.TakeBasicStat(PreGraph, false);
235  GS.TakeBasicStat(WccGraph, true);
236  GS.SetVal(gsvFullDiam, PreDiamMom.GetMean()); // mean
237  GS.SetVal(gsvEffDiam, PreEffDiamMom.GetMean());
238  GS.SetVal(gsvFullWccDiam, WccDiamMom.GetMean());
239  GS.SetVal(gsvEffWccDiam, WccEffDiamMom.GetMean());
240  GS.SetVal(gsvFullDiamDev, PreDiamMom.GetSDev()); // variance
241  GS.SetVal(gsvEffDiamDev, PreEffDiamMom.GetSDev());
242  GS.SetVal(gsvFullWccDiamDev, WccDiamMom.GetSDev());
243  GS.SetVal(gsvEffWccDiamDev, WccEffDiamMom.GetSDev());
244  { TFOut FOut("growth."+FNmPref+".gStatVec"); GrowthStat->Save(FOut); }
245  GrowthStat->SaveTxt(FNmPref, TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%s\nPostYearDiam\t%s\n",
246  Desc.CStr(), DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr()));
247  }
248  // diameter plots
249  //GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.",
250  // DelPreEdges, PostYearDiam));*/
251 }
252 
253 void TTimeNet::PlotCCfOverTm(const TStr& FNmPref, TStr Desc, const TTmUnit& TmUnit, const int& NodesBucket) const {
254  if (Desc.Empty()) { Desc = FNmPref; }
255  TTimeNet::TTmBucketV TmBucketV;
256  TStr XLbl;
257  if (TmUnit == tmuNodes) {
258  XLbl = "Number of nodes (time)";
259  IAssert(NodesBucket > 0);
260  GetNodeBuckets(NodesBucket, TmBucketV); }
261  else {
262  XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr());
263  GetTmBuckets(TmUnit, TmBucketV);
264  }
265  TIntV NodeIdV;
266  TFltPrV DegToCCfV, CcfV, OpClV, OpV;
267  TVec<TTuple<TFlt, 4> > OpenClsV;
268  TTuple<TFlt, 4> Tuple;
269  TExeTm ExeTm;
270  int XVal = 0;
271  printf("Clustering coefficient over time:\n %d edges, %d edges per bucket, %d buckets \n", GetEdges(), 100000, TmBucketV.Len());
272  PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
273  for (int t = 0; t < TmBucketV.Len(); t++) {
274  printf("\r %d/%d: ", t+1, TmBucketV.Len());
275  NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
276  int64 Open=0, Close=0;
277  const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV);
278  const double CCf = TSnap::GetClustCf(Graph, DegToCCfV, Open, Close);
279  if (TmUnit == tmuNodes) { XVal = Graph->GetNodes(); }
280  else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); }
281  CcfV.Add(TFltPr(XVal, CCf));
282  double FltOpen = static_cast<double>(Open);
283  double FltClose = static_cast<double>(Close);
284  OpClV.Add(TFltPr(XVal, (Open+Close==0 ? 0.0 : FltClose/(FltOpen+FltClose))));
285  OpV.Add(TFltPr(XVal, (Open==0 ? 0.0 : FltClose/FltOpen)));
286  Tuple[0]=Graph->GetNodes();
287  Tuple[1]=Graph->GetEdges();
288  Tuple[2]=FltClose; Tuple[3]=FltOpen;
289  OpenClsV.Add(Tuple);
290  printf(" %s", ExeTm.GetStr());
291  TGnuPlot::PlotValV(DegToCCfV, TStr::Fmt("ccfAt%02dtm.%s", t+1, FNmPref.CStr()),
292  TStr::Fmt("%s. At time %d. Clustering Coefficient. G(%d,%d)", Desc.CStr(), t+1, Graph->GetNodes(), Graph->GetEdges()),
293  "Degree", "Clustering coefficient", gpsLog10XY, false);
294  }
295  TGnuPlot::PlotValV(CcfV, "ccfOverTm."+FNmPref, Desc+". Average Clustering Coefficient", XLbl, "Average clustering coefficient", gpsAuto, false);
296  TGnuPlot::PlotValV(OpClV, "ClsOpnTr1."+FNmPref, Desc+". Close/(Open+Closed) triads", XLbl, "Close / (Open+Closed) triads", gpsAuto, false);
297  TGnuPlot::PlotValV(OpV, "ClsOpnTr2."+FNmPref, Desc+". Close/Open triads", XLbl, "Close / Open triads", gpsAuto, false);
298  TGnuPlot::SaveTs(OpenClsV, "ClsOpnTr."+FNmPref+".tab", TStr::Fmt("#%s\n#Nodes\tEdges\tClosed\tOpenTriads", Desc.CStr()));
299  printf("\n");
300 }
301 
302 void TTimeNet::PlotMedianDegOverTm(const TStr& FNmPref, const TTmUnit& TmUnit, const int& NodesPerBucket) const {
303  TTimeNet::TTmBucketV TmBucketV;
304  TStr XLbl;
305  if (TmUnit == tmuNodes) {
306  XLbl = "Number of nodes (time)"; IAssert(NodesPerBucket > 0);
307  GetNodeBuckets(NodesPerBucket, TmBucketV); }
308  else {
309  XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr());
310  GetTmBuckets(TmUnit, TmBucketV); }
311  printf("\n\n%s\nMedian degree over time:\n %d edges, %d edges per bucket, %d buckets \n", FNmPref.CStr(), GetEdges(), NodesPerBucket, TmBucketV.Len());
312  TFltPrV MedDegV, MedInDegV, MedOutDegV;
313  TIntV NodeIdV;
314  int XVal;
315  PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
316  PNGraph NGraph = TSnap::ConvertGraph<PNGraph>(PTimeNet((TTimeNet*)this));
317  FILE *F = fopen(("gStat-"+FNmPref+".tab").CStr(), "wt");
318  fprintf(F, "UndirNodes\tUndirEdges\tUndirNonZNodes\tMedianDeg\tMeanDeg\tDirNodes\tDirEdges\tDirNonzNodes\tMedInDeg\tMedOutDeg\tMeanInDeg\tMeanOutDeg\n");
319  for (int t = 0; t < TmBucketV.Len(); t++) {
320  printf("\r %d/%d: ", t+1, TmBucketV.Len());
321  NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
322  if (TmUnit == tmuNodes) { XVal = NodeIdV.Len(); }
323  else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); }
324  // un graph
325  { const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV); TMom Mom;
326  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { if (NI.GetOutDeg()>0) { Mom.Add(NI.GetOutDeg());} }
327  Mom.Def(); MedDegV.Add(TFltPr(XVal, Mom.GetMedian()));
328  fprintf(F, "%d\t%d\t%d\t%f\t%f", Graph->GetNodes(), Graph->GetEdges(), TSnap::CntNonZNodes(Graph), (float)Mom.GetMedian(), (float)Mom.GetMean()); }
329  // directed graph
330  { const PNGraph Graph = TSnap::GetSubGraph<PNGraph>(NGraph, NodeIdV); TMom MomOut, MomIn;
331  for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
332  if (NI.GetOutDeg()>0) { MomOut.Add(NI.GetOutDeg()); }
333  if (NI.GetInDeg()>0) { MomIn.Add(NI.GetInDeg()); } }
334  MomOut.Def(); MedOutDegV.Add(TFltPr(XVal, MomOut.GetMedian()));
335  MomIn.Def(); MedInDegV.Add(TFltPr(XVal, MomIn.GetMedian()));
336  fprintf(F, "\t%d\t%d\t%d\t%f\t%f\t%f\t%f\n", Graph->GetNodes(), Graph->GetEdges(), (int)TSnap::CntNonZNodes(Graph), (float)MomIn.GetMedian(), (float)MomOut.GetMedian(), (float)MomIn.GetMean(), (float)MomOut.GetMean()); }
337  }
338  fclose(F);
339  TGnuPlot::PlotValV(MedDegV, "medDeg."+FNmPref, FNmPref+" Median degree", TTmInfo::GetTmUnitStr(TmUnit), "Median degree");
340  TGnuPlot::PlotValV(MedOutDegV, "medOutDeg."+FNmPref, FNmPref+" Median OUT degree", TTmInfo::GetTmUnitStr(TmUnit), "Median OUT degree");
341  TGnuPlot::PlotValV(MedInDegV, "medInDeg."+FNmPref, FNmPref+" Median IN degree", TTmInfo::GetTmUnitStr(TmUnit), "Median IN degree");
342 }
343 
344 // load Arxiv affiliation network (papers to authors bipartite graph)
345 // <set1_id> <time> <set2_id1> <set2_id2> ... (all ids have to be unique)
347  PTimeNet TimeNetPt = TTimeNet::New();
348  TTimeNet& TimeNet = *TimeNetPt;
349  PSs Ss = TSs::LoadTxt(ssfTabSep, InFNm.CStr());
350  TIntH Set1IdH; // paper ids
351  TStrV StrTimeV;
352  for (int y = 0; y < Ss->GetYLen(); y++) {
353  if (Ss->At(0, y)[0] == '#') continue; // skip comments
354  if (Ss->GetXLen(y) < 3) continue; // there must be at least one author
355  const int& SrcId = Ss->At(0, y).GetInt();
356  IAssert(! Set1IdH.IsKey(SrcId));
357  IAssert(! TimeNet.IsNode(SrcId));
358  Set1IdH.AddKey(SrcId);
359  Ss->At(1, y).SplitOnAllCh('-', StrTimeV);
360  const int Year = StrTimeV[0].GetInt();
361  const int Month = StrTimeV[1].GetInt();
362  const int Day = StrTimeV[2].GetInt();
363  const TSecTm NodeTm(Year, Month, Day);
364  TimeNet.AddNode(SrcId, NodeTm);
365  for (int dst = 2; dst < Ss->GetXLen(y); dst++) {
366  const int DstId = Ss->At(dst, y).GetInt();
367  IAssert(! Set1IdH.IsKey(DstId));
368  if (! TimeNet.IsNode(DstId)) { TimeNet.AddNode(DstId, NodeTm); }
369  else { TimeNet.GetNDat(DstId) = TMath::Mn(NodeTm, TimeNet.GetNDat(DstId)); }
370  if (! TimeNet.IsEdge(SrcId, DstId)) { TimeNet.AddEdge(SrcId, DstId); }
371  }
372  }
373  TimeNet.Defrag();
374  printf("Bipartate graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
375  printf(" Bipartate sets: %d nodes --> %d nodes\n", TSnap::CntInDegNodes(TimeNetPt, 0),
376  TSnap::CntOutDegNodes(TimeNetPt, 0));
377  return TimeNetPt;
378 }
379 
380 // load Arxiv paper citation network
381 // PaperFNm: <paper> <time>
382 // CiteFNm: <source> <destination>
383 PTimeNet TTimeNet::LoadArxiv(const TStr& PaperFNm, const TStr& CiteFNm) {
384  TExeTm ExeTm;
385  PTimeNet TimeNetPt = TTimeNet::New();
386  TTimeNet& TimeNet = *TimeNetPt;
387  printf("Arxiv citation graph (paper publication year)...\n");
388  // load time data (file hep-ph-slacdates)
389  char Line [1024];
390  FILE *PprF = fopen(PaperFNm.CStr(), "rt");
391  TStr StrId, StrTime;
392  TStrV StrV, StrTimeV;
393  int N = 0, DuplicateNode = 0;
394  while (! feof(PprF)) {
395  Line[0] = 0;
396  fgets(Line, 1024, PprF);
397  if (strlen(Line) == 0 || Line[0] == '#') continue;
398  Line[strlen(Line)-1] = 0; // delete trailing '\n'
399  TStr(Line).SplitOnWs(StrV); IAssert(StrV.Len() == 2);
400  StrId = StrV[0]; StrTime = StrV[1]; IAssert(!StrId.Empty() && !StrTime.Empty());
401  StrTime.SplitOnAllCh('-', StrTimeV); IAssert(StrTimeV.Len() == 3);
402  const int NodeId = StrId.GetInt();
403  if (! TimeNet.IsNode(NodeId)) {
404  const int Year = StrTimeV[0].GetInt();
405  const int Month = StrTimeV[1].GetInt();
406  const int Day = StrTimeV[2].GetInt();
407  TimeNet.AddNode(NodeId, TSecTm(Year, Month, Day));
408  } else { DuplicateNode++; }
409  if (++N % 10000 == 0) printf("\r %dk", N/1000);
410  }
411  printf("\r %d nodes read. %d duplicate nodes. %s\n", N, DuplicateNode, ExeTm.GetTmStr());
412  fclose(PprF);
413  // load citations (file hep-ph-citations)
414  int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0;
415  FILE *CiteF = fopen(CiteFNm.CStr(), "rt");
416  N = 0; ExeTm.Tick();
417  printf("Loading Arxiv citations...\n");
418  TIntPrV EdgeV;
419  THash<TInt, TSecTm> NIdToTimeH;
420  while (! feof(CiteF)) {
421  Line[0] = 0;
422  fgets(Line, 1024, CiteF);
423  if (strlen(Line) == 0 || Line[0] == '#') continue;
424  Line[strlen(Line)-1] = 0; // delete trailing '\n'
425  TStr(Line).SplitOnWs(StrV); IAssert(StrV.Len() == 2);
426  const int SrcNId = StrV[0].GetInt();
427  const int DstNId = StrV[1].GetInt();
428  EdgeV.Add(TIntPr(SrcNId, DstNId));
429  // infer time of destination node -- earliest paper that cites the node (paper)
430  if (! TimeNet.IsNode(DstNId) && TimeNet.IsNode(SrcNId)) {
431  const TSecTm& SrcTm = TimeNet.GetNDat(SrcNId);
432  if (! NIdToTimeH.IsKey(DstNId)) {
433  NIdToTimeH.AddDat(DstNId, SrcTm);
434  NewDstIds++;
435  }
436  else if (NIdToTimeH.GetDat(DstNId) < SrcTm) {
437  NIdToTimeH.GetDat(DstNId) = SrcTm; }
438  }
439  if (++N % 10000 == 0) printf("\r %dk", N/1000);
440  }
441  fclose(CiteF);
442  // add infeered time nodes (nodes which are cited by papers with known time)
443  for (int i = 0; i < NIdToTimeH.Len(); i++) {
444  TimeNet.AddNode(NIdToTimeH.GetKey(i), NIdToTimeH[i]);
445  }
446  // add links
447  for (int i = 0; i < EdgeV.Len(); i++) {
448  const int SrcNId = EdgeV[i].Val1;
449  const int DstNId = EdgeV[i].Val2;
450  if (TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) {
451  if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); }
452  else { DupLinks++; }
453  } else {
454  if (! TimeNet.IsNode(SrcNId)) {
455  NewSrcIds++;
456  if (! TimeNet.IsNode(DstNId)) { NewCits++; }
457  }
458  }
459  }
460  printf("\r %d citations read. %s\n", N, ExeTm.GetTmStr());
461  printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
462  printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0));
463  TIntV RmNIdV;
464  for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) {
465  if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); }
466  }
467  for (int i = 0; i < RmNIdV.Len(); i++) {
468  TimeNet.DelNode(RmNIdV[i]);
469  }
470  TimeNet.Defrag(true);
471  printf("\nFinal graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
472  printf(" Duplicate citations : %d\n", DupLinks);
473  printf(" Nodes without time which are cited : %d (add them to graph, use time of the earliest source node)\n", NewDstIds);
474  printf(" Citations between unknown time nodes : %d\n", NewCits);
475  printf(" Nodes without time which make citations: %d (do not add them into the graph)\n", NewSrcIds);
476  return TimeNetPt;
477 }
478 
479 // Patent citation network. Time units are seconds even though they mean years
480 PTimeNet TTimeNet::LoadPatents(const TStr& PatentFNm, const TStr& CiteFNm) {
481  int N = 0;
482  TExeTm ExeTm;
483  PTimeNet TimeNetPt = TTimeNet::New();
484  TTimeNet& TimeNet = *TimeNetPt;
485  TimeNet.Reserve(4000000, 160000000);
486  printf("parsing patent data (patent grant year)...\n");
487  // load time data (file pat63_99.txt)
488  const int& PatIdCol = 0;
489  const int& GYearCol = 1;
490  TStrV ColV;
491  char Line [1024];
492  FILE *PatF = fopen(PatentFNm.CStr(), "rt");
493  fgets(Line, 1024, PatF); // skip 1st line
494  while (! feof(PatF)) {
495  Line[0] = 0;
496  fgets(Line, 1024, PatF);
497  if (strlen(Line) == 0) break;
498  TStr(Line).SplitOnAllCh(',', ColV, false);
499  IAssert(ColV.Len() == 23);
500  const int PatentId = ColV[PatIdCol].GetInt();
501  const int GrantYear = ColV[GYearCol].GetInt();
502  IAssert(! TimeNet.IsNode(PatentId));
503  TimeNet.AddNode(PatentId, TSecTm(GrantYear)); // pretend year is a second
504  if (++N % 100000 == 0) printf("\r %dk", N/1000);
505  }
506  printf("\r %d patents read. %s\n", N, ExeTm.GetTmStr());
507  fclose(PatF);
508  // load citations (file cite75_99.txt)
509  printf("\nLoading patent citations...\n");
510  int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0;
511  N = 0; ExeTm.Tick();
512  TStr SrcId, DstId;
513  FILE *CiteF = fopen(CiteFNm.CStr(), "rt");
514  fgets(Line, 1024, CiteF); // skip 1st line
515  while (! feof(CiteF)) {
516  Line[0] = 0;
517  fgets(Line, 1024, CiteF);
518  if (strlen(Line) == 0) break;
519  Line[strlen(Line)-1] = 0; // delete trailing '\n'
520  TStr(Line).SplitOnCh(SrcId, ',', DstId);
521  const int SrcNId = SrcId.GetInt();
522  const int DstNId = DstId.GetInt();
523  if (! TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) {
524  //TimeNet.AddNode(SrcNId, TSecTm(1, 1, 1)); NewSrcIds++;
525  //TimeNet.AddNode(DstNId, TSecTm(1, 1, 1)); NewDstIds++;
526  NewCits++;
527  continue;
528  }
529  else if (TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) {
530  TimeNet.AddNode(DstNId, TimeNet.GetNDat(SrcNId)); NewDstIds++;
531  }
532  else if (! TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) {
533  TimeNet.AddNode(SrcNId, TimeNet.GetNDat(DstNId)); NewSrcIds++;
534  }
535  if (! TimeNet.IsEdge(SrcNId, DstNId)) {
536  TimeNet.AddEdge(SrcNId, DstNId);
537  } else { DupLinks++; }
538  if (++N % 100000 == 0) printf("\r %dk", N/1000);
539  }
540  fclose(CiteF);
541  printf("\r %d citations read. %s\n\n", N, ExeTm.GetTmStr());
542  printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
543  printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0));
544  TIntV RmNIdV;
545  for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) {
546  if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); }
547  }
548  for (int i = 0; i < RmNIdV.Len(); i++) {
549  TimeNet.DelNode(RmNIdV[i]);
550  }
551  TimeNet.Defrag(true);
552  printf("\nFinal graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
553  printf(" Duplicate citations : %d\n", DupLinks);
554  printf(" Citations between unknown time nodes : %d\n", NewCits);
555  printf(" Nodes without time which make citations: %d\n", NewSrcIds);
556  printf(" Nodes without time which are cited : %d\n", NewDstIds);
557  return TimeNetPt;
558 }
559 
560 // Amazon ShareTheLove: nodes are people, we add an edge when there is a first
561 // recommendation between 2 people (we count each edge only once (regardless
562 // of how many recommendations there were between 2 persons)
564  PTimeNet TimeNetPt = TTimeNet::New();
565  TTimeNet& TimeNet = *TimeNetPt;
566  TimeNet.Reserve(3953993, -1);
567  printf("Amazon Share-the-Love...\n");
568  char line [2024], MonthStr[4];
569  int NLines=0;
570  TStrV ColV;
571  FILE *F = fopen(StlFNm.CStr(), "rt");
572  while (! feof(F)) {
573  memset(line, 0, 2024);
574  fgets(line, 2024, F);
575  if (strlen(line) == 0) break;
576  TStr(line).SplitOnAllCh(',', ColV);
577  const int SrcNId = ColV[0].GetInt();
578  const int DstNId = ColV[1].GetInt();
579  // time data
580  TStr TmStr = ColV[2]; // time-format: 29JAN02:21:55:23
581  int Year = TmStr.GetSubStr(5, 6).GetInt();
582  if (Year < 10) { Year += 2000; } else { Year += 1900; }
583  MonthStr[0]=toupper(TmStr[2]); MonthStr[1]=tolower(TmStr[3]);
584  MonthStr[2]=tolower(TmStr[4]); MonthStr[3]=0;
585  const int Month = TTmInfo::GetMonthN(MonthStr, lUs);
586  const int Day = TmStr.GetSubStr(0, 1).GetInt();
587  const int Hour = TmStr.GetSubStr(8, 9).GetInt();
588  const int Min = TmStr.GetSubStr(11, 12).GetInt();
589  const int Sec = TmStr.GetSubStr(14, 15).GetInt();
590  // add nodes and links
591  if (! TimeNet.IsNode(SrcNId)) { TimeNet.AddNode(SrcNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); }
592  if (! TimeNet.IsNode(DstNId)) { TimeNet.AddNode(DstNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); }
593  if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); }
594  if (++NLines % 100000 == 0) printf("\r %dk", NLines/1000);
595  }
596  fclose(F);
597  printf("\r %d lines read\n", NLines);
598  printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
599  TimeNet.Defrag(true);
600  return TimeNetPt;
601 }
602 
604 // Time Node-Edge Network
606  if (this != &TimeNet) {
607  TNet::operator=(TimeNet);
608  }
609  return *this;
610 }
611 
613  PTimeNet NewNet = TTimeNet::New();
614  NewNet->Reserve(GetNodes(), -1);
615  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
616  NewNet->AddNode(NI.GetId(), NI.GetDat());
617  }
618  for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
619  const int src = EI.GetSrcNId();
620  const int dst = EI.GetDstNId();
621  if (! NewNet->IsEdge(src, dst)) {
622  NewNet->AddEdge(src, dst); }
623  }
624  NewNet->Defrag();
625  return NewNet;
626 }
627 
628 // only take earliest edge between the nodes
630  PTimeNENet Net = TTimeNENet::New();
631  Net->Reserve(GetNodes(), -1);
632  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
633  Net->AddNode(NI.GetId(), NI.GetDat()); }
634  TIntV EIdV; GetEIdByTm(EIdV);
635  TIntPrSet EdgeSet(GetEdges());
636  for (int edge = 0; edge < EIdV.Len(); edge++) {
637  const TEdgeI EI = GetEI(EIdV[edge]);
638  const int Src = EI.GetSrcNId();
639  const int Dst = EI.GetDstNId();
640  if (Src==Dst || EdgeSet.IsKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst)))) { continue; } // take only 1st edge
641  EdgeSet.AddKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst)));
642  Net->AddEdge(EI);
643  }
644  return Net;
645 }
646 
648  PTimeNENet NewNetPt = TTimeNENet::New();
649  TTimeNENet& NewNet = *NewNetPt;
650  NewNet.Reserve(NIdV.Len(), -1);
651  int node, edge;
652  TNodeI NI;
653  for (node = 0; node < NIdV.Len(); node++) {
654  NewNet.AddNode(NIdV[node], GetNDat(NIdV[node]));
655  }
656  for (node = 0; node < NIdV.Len(); node++) {
657  NI = GetNI(NIdV[node]);
658  for (edge = 0; edge < NI.GetOutDeg(); edge++) {
659  const TEdgeI EI = GetEI(NI.GetOutEId(edge));
660  if (NewNet.IsNode(EI.GetDstNId())) {
661  NewNet.AddEdge(EI); }
662  }
663  }
664  NewNet.Defrag();
665  return NewNetPt;
666 }
667 
669  PTimeNENet NewNetPt = TTimeNENet::New();
670  TTimeNENet& NewNet = *NewNetPt;
671  NewNet.Reserve(-1, EIdV.Len());
672  for (int edge = 0; edge < EIdV.Len(); edge++) {
673  const TEdgeI Edge = GetEI(EIdV[edge]);
674  if (! NewNet.IsNode(Edge.GetSrcNId()))
675  NewNet.AddNode(GetNI(Edge.GetSrcNId()));
676  if (! NewNet.IsNode(Edge.GetDstNId()))
677  NewNet.AddNode(GetNI(Edge.GetDstNId()));
678  NewNet.AddEdge(Edge);
679  }
680  NewNet.Defrag();
681  return NewNetPt;
682 }
683 
684 // assumes that the edges of the network are sorted in time
686  PTimeNENet NewNetPt = TTimeNENet::New();
687  TTimeNENet& NewNet = *NewNetPt;
688  TSecTm PrevTm;
689  for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
690  if (EI() > MaxEdgeTm) { break; }
691  if (! NewNet.IsNode(EI.GetSrcNId()))
692  NewNet.AddNode(GetNI(EI.GetSrcNId()));
693  if (! NewNet.IsNode(EI.GetDstNId()))
694  NewNet.AddNode(GetNI(EI.GetDstNId()));
695  NewNet.AddEdge(EI);
696  IAssert(! PrevTm.IsDef() || PrevTm <= EI()); // edge times must be sorted
697  PrevTm = EI();
698  }
699  NewNet.Defrag();
700  return NewNetPt;
701 }
702 
704  NodeH.SortByDat(true);
705  EdgeH.SortByDat(true);
706 }
707 
708 // node time must be smaller than times of incoming or outgoing edges
710  int Cnt = 0;
711  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
712  TSecTm& NodeTm = NI();
713  for (int edge = 0; edge < NI.GetOutDeg(); edge++) {
714  const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge));
715  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
716  }
717  for (int edge = 0; edge < NI.GetInDeg(); edge++) {
718  const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge));
719  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
720  }
721  }
722  printf("Update node times: %d/%d updates\n", Cnt, GetNodes());
723 }
724 
726  int Cnt = 0;
727  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
728  if (NI.GetDeg() == 0) { continue; }
729  TSecTm NodeTm;
730  for (int edge = 0; edge < NI.GetOutDeg(); edge++) {
731  const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge)); IAssert(EdgeTm.IsDef());
732  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
733  }
734  for (int edge = 0; edge < NI.GetInDeg(); edge++) {
735  const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge)); IAssert(EdgeTm.IsDef());
736  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
737  }
738  GetNDat(NI.GetId()) = NodeTm;
739  }
740  printf("Node times set: %d/%d updates\n", Cnt, GetNodes());
741 }
742 
743 // shuffle edge arrivals
744 void TTimeNENet::SetRndEdgeTimes(const int& MinTmEdge) {
745  printf("Shuffling last %d (%d%%) edge arrival times..\n", GetEdges()-MinTmEdge, int(100.0*(GetEdges()-MinTmEdge)/double(GetEdges())));
746  TIntV RndEIdV; GetEIdByTm(RndEIdV);
747  TIntV TrueEIdV = RndEIdV;
748  TSecTmV TrueTmV;
749  const int SwapLen = RndEIdV.Len()-MinTmEdge;
750  for (int R = 0; R < 10; R++) {
751  for (int i = MinTmEdge; i < RndEIdV.Len(); i++) {
752  RndEIdV.Swap(TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge, TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge); }
753  }
754  for (int e = 0; e < TrueEIdV.Len(); e++) {
755  TrueTmV.Add(GetEDat(TrueEIdV[e])); }
756  for (int e = 0; e < RndEIdV.Len(); e++) {
757  GetEDat(RndEIdV[e]) = TrueTmV[e]; }
758  UpdateNodeTimes();
759 }
760 
762  TSecTm MnNodeTm, MxNodeTm;
763  TSecTm MnEdgeTm, MxEdgeTm;
764  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
765  const TSecTm NodeTm = NI();
766  if (! MnNodeTm.IsDef() || MnNodeTm>NodeTm) { MnNodeTm = NodeTm; }
767  if (! MxNodeTm.IsDef() || MxNodeTm<NodeTm) { MxNodeTm = NodeTm; }
768  }
769  printf("Node times:\n %s\n %s\n", MnNodeTm.GetStr().CStr(), MxNodeTm.GetStr().CStr());
770  for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) {
771  const TSecTm EdgeTm = EI();
772  if (! MnEdgeTm.IsDef() || MnEdgeTm>EdgeTm) { MnEdgeTm = EdgeTm; }
773  if (! MxEdgeTm.IsDef() || MxEdgeTm<EdgeTm) { MxEdgeTm = EdgeTm; }
774  }
775  printf("Edge times:\n %s\n %s\n", MnEdgeTm.GetStr().CStr(), MxEdgeTm.GetStr().CStr());
776 }
777 
778 void TTimeNENet::GetNIdByTm(TIntV& NIdV) const {
779  TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0);
780  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
781  TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); }
782  TmToNIdV.Sort();
783  NIdV.Gen(GetNodes(), 0);
784  for (int i = 0; i < TmToNIdV.Len(); i++) {
785  NIdV.Add(TmToNIdV[i].Dat); }
786 }
787 
788 void TTimeNENet::GetEIdByTm(TIntV& EIdV) const {
789  TVec<TKeyDat<TSecTm, TInt> > TmToEIdV(GetEdges(), 0);
790  for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) {
791  TmToEIdV.Add(TKeyDat<TSecTm, TInt>(EI.GetDat(), EI.GetId())); }
792  TmToEIdV.Sort();
793  EIdV.Gen(GetEdges(), 0);
794  for (int i = 0; i < TmToEIdV.Len(); i++) {
795  EIdV.Add(TmToEIdV[i].Dat); }
796 }
797 
798 void TTimeNENet::GetTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const {
799  THash<TInt, TIntV> TmIdToNIdVH;
800  TIntV NIdV; GetNIdByTm(NIdV);
801  for (int n = 0; n < NIdV.Len(); n++) {
802  const int TmId = GetNDat(NIdV[n]).Round(TmUnit).GetAbsSecs();
803  if (! TmIdToNIdVH.IsKey(TmId)) { TmIdToNIdVH.AddKey(TmId); }
804  TmIdToNIdVH.GetDat(TmId).Add(NIdV[n]);
805  }
806  TVec<TPair<TInt, TIntV> > TmIdNIdVV;
807  TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV);
808  TmIdNIdVV.Sort();
809  TmBucketV.Gen(TmIdNIdVV.Len());
810  for (int i = 0; i < TmIdNIdVV.Len(); i++) {
811  TTimeNet::TTmBucket& Bucket = TmBucketV[i];
812  Bucket.BegTm = TmIdNIdVV[i].Val1;
813  Bucket.NIdV = TmIdNIdVV[i].Val2;
814  }
815 }
816 
817 void TTimeNENet::GetEdgeTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const {
818  THash<TInt, TIntV> TmIdToEIdVH;
819  TIntV EIdV; GetEIdByTm(EIdV);
820  for (int e = 0; e < EIdV.Len(); e++) {
821  const int TmId = GetEDat(EIdV[e]).Round(TmUnit).GetAbsSecs();
822  if (! TmIdToEIdVH.IsKey(TmId)) { TmIdToEIdVH.AddKey(TmId); }
823  TmIdToEIdVH.GetDat(TmId).Add(EIdV[e]);
824  }
825  TVec<TPair<TInt, TIntV> > TmIdEIdVV;
826  TmIdToEIdVH.GetKeyDatPrV(TmIdEIdVV);
827  TmIdEIdVV.Sort();
828  TmBucketV.Gen(TmIdEIdVV.Len());
829  for (int i = 0; i < TmIdEIdVV.Len(); i++) {
830  TTimeNet::TTmBucket& Bucket = TmBucketV[i];
831  Bucket.BegTm = TmIdEIdVV[i].Val1;
832  Bucket.NIdV = TmIdEIdVV[i].Val2;
833  }
834 }
835 
836 void TTimeNENet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
837  TIntV NIdV; GetNIdByTm(NIdV);
838  TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0);
839  for (int i = 0; i < NIdV.Len(); i++) {
840  const int b = i/NodesPerBucket;
841  if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
842  TmBucketV[b].NIdV.Add(NIdV[i]);
843  }
844 }
845 
846 void TTimeNENet::GetEdgeBuckets(const int EdgesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
847  TIntV EIdV; GetEIdByTm(EIdV);
848  TmBucketV.Gen(EIdV.Len()/EdgesPerBucket + 1, 0);
849  for (int i = 0; i < EIdV.Len(); i++) {
850  const int b = i/EdgesPerBucket;
851  if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
852  TmBucketV[b].NIdV.Add(EIdV[i]);
853  }
854 }
855 
856 // get edges that close triangles over time
857 int TTimeNENet::GetTriadEdges(TIntV& TriadEIdV) const {
858  PUNGraph Graph = TUNGraph::New(GetNodes(), GetEdges());
859  TIntV EIdV; GetEIdByTm(EIdV);
860  TriadEIdV.Clr();
861  TExeTm ExeTm;
862  for (int edge = 0; edge < EIdV.Len(); edge++) {
863  const TEdgeI EI = GetEI(EIdV[edge]);
864  const int Src = EI.GetSrcNId();
865  const int Dst = EI.GetDstNId();
866  if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
867  if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
868  if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
869  if (TSnap::GetCmnNbrs(Graph, Src, Dst) > 0) { TriadEIdV.Add(EIdV[edge]); }
870  Graph->AddEdge(Src, Dst);
871  if (edge % 10000 == 0) {
872  printf("\redges %dk / %dk: triangle edges: %dk [total %s]", edge/1000, EIdV.Len()/1000,
873  TriadEIdV.Len()/1000, ExeTm.GetStr()); }
874  }
875  return Graph->GetEdges();
876 }
877 
878 PGStatVec TTimeNENet::TimeGrowth(const TTmUnit& TimeStep, const TFSet& TakeStat, const TSecTm& StartTm) const {
879  TExeTm ExeTm;
880  PGStatVec GStatVec = TGStatVec::New(TimeStep, TakeStat);
881  TTimeNet::TTmBucketV TmBucketV;
882  GetEdgeTmBuckets(TimeStep, TmBucketV);
883  const PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this));
884  TIntV EdgeIdV;
885  for (int t = 0; t < TmBucketV.Len(); t++) {
886  EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
887  printf("\n***%d/%d: %s (%d edges) ", t+1, TmBucketV.Len(), TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len()); ExeTm.Tick();
888  if (TmBucketV[t].BegTm < StartTm) { continue; }
889  const PNEGraph PreGraph = TSnap::GetESubGraph(FullGraph, EdgeIdV);
890  GStatVec->Add(PreGraph, TmBucketV[t].BegTm);
891  printf(" [%s]\n", ExeTm.GetTmStr());
892  //{ TFOut FOut("LinkedIn.GStatVec"); GStatVec->Save(FOut); }
893  }
894  return GStatVec;
895 }
896 
897 // take network from last TakeNTmUnits and measure properties
898 PGStatVec TTimeNENet::TimeGrowth(const TStr& FNmPref, const TStr& Desc, const TFSet& TakeStat, const int& NDiamRuns,
899  const TTmUnit& TmUnit, const int& TakeNTmUnits, const bool& LinkBWays) const {
900  TGStat::NDiamRuns = NDiamRuns;
901  PGStatVec GrowthStat = TGStatVec::New(TmUnit, TakeStat);
902  TTimeNet::TTmBucketV TmBucketV;
903  GetEdgeTmBuckets(TmUnit, TmBucketV);
904  TIntV EdgeIdV;
905  TExeTm ExeTm;
906  for (int t = 0; t < TmBucketV.Len(); t++) {
907  // take graph over last TakeNTmUnits time units
908  if (TakeNTmUnits == -1) {
909  EdgeIdV.AddV(TmBucketV[t].NIdV); }
910  else {
911  if (t < TakeNTmUnits) { continue; }
912  EdgeIdV.Clr(false);
913  for (int i = t-TakeNTmUnits; i < t; i++) { EdgeIdV.AddV(TmBucketV[i].NIdV); }
914  }
915  printf("*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len()); ExeTm.Tick();
916  PNEGraph PreGraph = TSnap::ConvertESubGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this), EdgeIdV);
917  if (LinkBWays) {
918  TIntV KeepEIdV; // keep only nodes that have in- and out-links (send and receive email)
919  for (TNEGraph::TEdgeI EI = PreGraph->BegEI(); EI < PreGraph->EndEI(); EI++) {
920  if (PreGraph->IsEdge(EI.GetDstNId(), EI.GetSrcNId(), true)) { KeepEIdV.Add(EI.GetId()); }
921  }
922  PreGraph = TSnap::GetESubGraph(PreGraph, KeepEIdV);
923  }
924  // take statistics
925  GrowthStat->Add(PreGraph, TmBucketV[t].BegTm);
926  { TFOut FOut(TStr::Fmt("growth.%s.gStatVec", FNmPref.CStr()));
927  GrowthStat->Save(FOut); }
928  GrowthStat->SaveTxt(FNmPref, Desc);
929  printf(" [%s]\n", ExeTm.GetTmStr());
930  }
931  return GrowthStat;
932 }
933 
934 void TTimeNENet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
935  const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc) const {
936  TTimeNet::TTmBucketV TmBucketV;
937  GetEdgeTmBuckets(TmUnit, TmBucketV);
938  PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this));
939  TIntV EdgeIdV;
940  TExeTm ExeTm, Run1Tm;
941  TFltTrV TmDiamV, NdsDiamV;
942  for (int t = 0; t < TmBucketV.Len(); t++) {
943  EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
944  printf("\n*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), EdgeIdV.Len()); ExeTm.Tick();
945  if (TmBucketV[t].BegTm < StartTm) continue;
946  PNGraph PreGraph = TSnap::ConvertESubGraph<PNGraph>(FullGraph, EdgeIdV);
947  TMom Mom;
948  double EffDiam = 0.0;
949  for (int r = 0; r < NDiamRuns; r++) {
950  printf("%d...", r+1); Run1Tm.Tick();
951  if (OnlyWcc) { EffDiam = TSnap::GetAnfEffDiam(TSnap::GetMxWcc(PreGraph)); }
952  else { EffDiam = TSnap::GetAnfEffDiam(PreGraph); }
953  Mom.Add(EffDiam);
954  printf("[%s]\r", Run1Tm.GetTmStr());
955  }
956  Mom.Def();
957  TmDiamV.Add(TFltTr(TmBucketV[t].BegTm.Round(TmUnit).GetAbsSecs(), Mom.GetMean(), Mom.GetSDev()));
958  NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
959  NdsDiamV.Sort();
960  printf(" [%s] \n", ExeTm.GetTmStr());
961  const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr();
962  { TGnuPlot GnuPlot("diamEff1."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns));
963  GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), "AVERAGE "+WccStr+"Effective Diameter");
964  GnuPlot.AddErrBar(TmDiamV, "", "");
965  GnuPlot.SavePng(); }
966  { TGnuPlot GnuPlot("diamEff2."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns));
967  GnuPlot.SetXYLabel("NODES", "AVERAGE "+WccStr+"Effective Diameter");
968  GnuPlot.AddErrBar(NdsDiamV, "", "");
969  GnuPlot.SavePng(); }
970  }
971 }
972 
973 // pretend we only have link data starting in PostTmDiam
974 // compare the diameter of the nodes after PostTmDiam with diameter
975 // of the nodes after PostTmDiam but *also* using nodes and edges
976 // from before PostTmDiam
977 void TTimeNENet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
978  const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam, const bool& LinkBWays) {
979  printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n %s group by %s.\n",
980  FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr());
981  printf(" Delete out-edges of pre time %s nodes.\n Take subgraph of post year %s subgraph.\n\n",
982  DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr());
983  // run diameter
984  /*const int NSamples = 100;
985  const int NDiamRuns = 10;
986  // delete past
987  if (DelPreEdges != -1) {
988  TIntV EdgeIdV;
989  printf("Deleting pre-year %d edges\n", DelPreEdges);
990  for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
991  if (EI().GetYear() < DelPreEdges) { EdgeIdV.Add(EI.GetId()); } }
992  for (int e = 0; e < EdgeIdV.Len(); e++) { DelEdge(EdgeIdV[e]); }
993  printf(" Deleted %d edges (out of %d edges total).\n", EdgeIdV.Len(), GetEdges());
994  }
995  PNEGraph FullGraph = GetNEGraph();
996  TSnap::DelZeroDegNodes(FullGraph);
997 
998  PGrowthStat GrowthStat = TGrowthStat::New(TmUnit, TGraphStat::AllStat());
999  TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev;
1000  TIntV EdgeIdV;
1001  TExeTm ExeTm;
1002  TTimeNet::TTmBucketV EdgeTmBucketV;
1003  GetEdgeTmBuckets(TmUnit, EdgeTmBucketV);
1004  for (int t = 0; t < EdgeTmBucketV.Len(); t++) {
1005  printf("\nGraph: %s (%d / %d) [%s]\n", EdgeTmBucketV[t].BegTm.GetTmStr().CStr(),
1006  t+1, EdgeTmBucketV.Len(), TExeTm::GetCurTm());
1007  // up-to-year subgraph
1008  EdgeIdV.AddV(EdgeTmBucketV[t].NIdV); // nodes up to time T
1009  if (EdgeTmBucketV[t].BegTm.GetYear() < PostYearDiam) continue;
1010 
1011  const PNEGraph PreNEGraph = EdgeBothWays ? FullGraph->GetEdgeSubGraph(EdgeIdV) : FullGraph;
1012  PNGraph PreGraph = PreNEGraph->GetBWayGraph();
1013  PNGraph WccGraph = TSnap::GetMxWcc(PreGraph);
1014 
1015  // find nodes that are not in missing past
1016  TIntV PostYearNIdV, WccPostYearNIdV;
1017  THash<TIntPr, TInt> PostYearEdgeH;
1018  for (TNEGraph::TEdgeI EI = PreNEGraph->BegEI(); EI < PreNEGraph->EndEI(); EI++) {
1019  if (GetEDat(EI.GetId()).GetYear() >= PostYearDiam) {
1020  PostYearEdgeH.AddKey(TIntPr(EI.GetSrcNId(), EI.GetDstNId())); }
1021  }
1022  TIntH PostYearNIdH;
1023  for (int i = 0; i < PostYearEdgeH.Len(); i++) {
1024  if ((! EdgeBothWays) || (EdgeBothWays && PostYearEdgeH.IsKey(TIntPr(PostYearEdgeH.GetKey(i).Val2, PostYearEdgeH.GetKey(i).Val1)))) { //reverse edge exists
1025  PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val1);
1026  PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val2);
1027  }
1028  }
1029  PostYearNIdH.GetKeyV(PostYearNIdV);
1030  for (int i = 0; i < PostYearNIdV.Len(); i++) {
1031  if (WccGraph->IsNode(PostYearNIdV[i])) {
1032  WccPostYearNIdV.Add(PostYearNIdV[i]); }
1033  }
1034 
1035  // diameter of PostYearDiam subgraph using whole graph (all edges)
1036  TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom;
1037  for (int r = 0; r < NDiamRuns; r++) {
1038  if (! PostYearNIdV.Empty()) {
1039  PreDiamMom.Add(-1); //PreDiamMom.Add(TSnap::GetBfsFullDiam(PreGraph, NSamples, PostYearNIdV, false));
1040  PreEffDiamMom.Add(TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false));
1041  }
1042  if (! WccPostYearNIdV.Empty()) {
1043  //WccDiamMom.Add(TSnap::GetBfsFullDiam(WccGraph, NSamples, WccPostYearNIdV, false));
1044  //WccEffDiamMom.Add(TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false));
1045  WccDiamMom.Add(-1); WccEffDiamMom.Add(-1);
1046  }
1047  printf(" diam: %d [%s] %.2f\r", r+1, ExeTm.GetTmStr(), PreEffDiamMom.GetValV().Last().Val); ExeTm.Tick();
1048  }
1049  PreDiamMom.Def(); PreEffDiamMom.Def();
1050  WccDiamMom.Def(); WccEffDiamMom.Def();
1051  // save stat
1052  PGraphStat GraphStatPt = GrowthStat->Add(EdgeTmBucketV[t].BegTm);
1053  TGraphStat& GraphStat = *GraphStatPt;
1054  GraphStat.Nodes = PreGraph->GetNodes();
1055  GraphStat.NonZNodes = PreGraph->GetNodes() - TSnap::CntDegNodes<PNGraph>(PreGraph, 0);
1056  GraphStat.Srcs = GraphStat.Nodes - TSnap::CntOutDegNodes<PNGraph>(PreGraph, 0);
1057  GraphStat.Edges = PreGraph->GetEdges();
1058  GraphStat.WccNodes= WccGraph->GetNodes();
1059  GraphStat.WccEdges = WccGraph->GetEdges();
1060  GraphStat.FullDiam = PreDiamMom.GetMean(); // mean
1061  GraphStat.EffDiam = PreEffDiamMom.GetMean();
1062  GraphStat.FullWccDiam = WccDiamMom.GetMean();
1063  GraphStat.EffWccDiam = WccEffDiamMom.GetMean();
1064  GraphStat.FullDiamDev = PreDiamMom.GetSDev(); // variance
1065  GraphStat.EffDiamDev = PreEffDiamMom.GetSDev();
1066  GraphStat.FullWccDiamDev = WccDiamMom.GetSDev();
1067  GraphStat.EffWccDiamDev = WccEffDiamMom.GetSDev();
1068  { TFOut FOut("growth."+FNmPref+".bin");
1069  GrowthStat->Save(FOut); }
1070  const TStr BigDesc = TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%d\nPostYearDiam\t%d\n",
1071  Desc.CStr(), DelPreEdges, PostYearDiam);
1072  GrowthStat->SaveTxt(FNmPref, BigDesc);
1073  }
1074  // diameter plots
1075  GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.",
1076  DelPreEdges, PostYearDiam));
1077  */
1078 }
1079 
1080 // plot time difference of the node ages when an edge arrives
1081 /*void TTimeNENet::PlotEdgeNodeTimeDiff(const TStr& FNmPref, TStr Desc) const {
1082  if (Desc.Empty()) { Desc = FNmPref; }
1083  printf("Edge node time differences:\n");
1084  THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge
1085  THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst)
1086  PUNGraph Graph = TUNGraph::New(GetNodes(), -1);
1087  TIntV EIdV; GetEIdByTm(EIdV);
1088  TIntH DegCntH(10, true);
1089  TExeTm ExeTm;
1090  for (int edge = 0; edge < EIdV.Len(); edge++) {
1091  const TEdgeI EI = GetEI(EIdV[edge]);
1092  const int Src = EI.GetSrcNId();
1093  const int Dst = EI.GetDstNId();
1094  if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
1095  if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
1096  if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
1097  const int SrcDeg = Graph->GetNode(Src).GetOutDeg();
1098  const int DstDeg = Graph->GetNode(Dst).GetInDeg();
1099  const int EdgeTm = EI.GetDat().GetAbsSecs();
1100  const int SrcTm = EI.GetSrcNDat().GetAbsSecs();
1101  const int DstTm = EI.GetDstNDat().GetAbsSecs();
1102  NodeAgeH.AddDat((DstTm-SrcTm)/3600) += 1;
1103  if (SrcDeg == 0 || DstDeg == 0) {
1104  Node1AgeH.AddDat((DstTm-SrcTm)/3600) += 1; }
1105  SrcTmH.AddDat((EdgeTm-SrcTm)/3600) += 1;
1106  DstTmH.AddDat((EdgeTm-DstTm)/3600) += 1;
1107  // add edge
1108  Graph->AddEdge(Src, Dst);
1109  if (edge % 1000 == 0) {
1110  printf("\r%dk / %dk [total %s]", edge/1000, EIdV.Len()/1000, ExeTm.GetStr()); }
1111  }
1112  TGnuPlot::PlotValCntH(NodeAgeH, "tmDstSrc."+FNmPref, TStr::Fmt("%s, Node time difference.", Desc.CStr()),
1113  "Time(destination) - Time(source) [hours]", "Count");
1114  TGnuPlot::PlotValCntH(Node1AgeH, "tmDstSrc1."+FNmPref, TStr::Fmt("%s. 1-ST EDGE of one of the nodes.", Desc.CStr()),
1115  "Time(destination) - Time(source) [hours]", "Count");
1116  TGnuPlot::PlotValCntH(SrcTmH, "tmEdgeSrc."+FNmPref, TStr::Fmt("%s. Edge time - source node time.", Desc.CStr()),
1117  "Edge time - source node time [hours]", "Count");
1118  TGnuPlot::PlotValCntH(DstTmH, "tmEdgeDst."+FNmPref, TStr::Fmt("%s. Edge time - destination node time.", Desc.CStr()),
1119  "Edge time - destination node time [hours]", "Count");
1120  printf("\n");
1121 }
1122 
1123 // plot the distribution whether the edge to node that closes the triad was
1124 // chosen random or preferentially
1125 void TTimeNENet::PlotCloseTriad(const TStr& FNmPref, TStr Desc, const bool& OnlyBackEdges) const {
1126  if (Desc.Empty()) { Desc = FNmPref; }
1127  printf("Random vs. preferential triad closing: %s\n", OnlyBackEdges?"OnlyBackEdges":"");
1128  THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge
1129  THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst)
1130  PUNGraph Graph = TUNGraph::New(GetNodes(), -1);
1131  TIntV EIdV; GetEIdByTm(EIdV);
1132  TIntV CmnV, SrcNbrV, DstNbrV;
1133  TExeTm ExeTm;
1134  TFltV RndRdnV, PrefPrefV, PrefRndV, RndPrefV;
1135  TFltFltH RndRndH, PrefPrefH, PrefRndH, RndPrefH;
1136  int TriadEdges = 0;
1137  FILE *F = fopen(TStr::Fmt("%s-prob.tab", FNmPref.CStr()).CStr(), "wt");
1138  fprintf(F, "#Probability of picking a node that closes the triangle (pick preferentially vs. uniformly at random)\n");
1139  fprintf(F, "#i\tEId\tRndRnd\tRndPref\tPrefRnd\tPrefPref\n");
1140  for (int edge = 0; edge < EIdV.Len(); edge++) {
1141  const TEdgeI EI = GetEI(EIdV[edge]);
1142  const int Src = EI.GetSrcNId();
1143  const int Dst = EI.GetDstNId();
1144  if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
1145  if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
1146  if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
1147  if (OnlyBackEdges && EI.GetSrcNDat().GetAbsSecs()-EI.GetDstNDat().GetAbsSecs() < 0) {
1148  Graph->AddEdge(Src, Dst); continue;
1149  }
1150  // find common neighbors
1151  const TUNGraph::TNodeI SrcNI = Graph->GetNI(Src);
1152  const TUNGraph::TNodeI DstNI = Graph->GetNI(Dst);
1153  SrcNbrV.Clr(false); DstNbrV.Clr(false);
1154  for (int i = 0; i < SrcNI.GetOutDeg(); i++) { SrcNbrV.Add(SrcNI.GetOutNId(i)); }
1155  for (int i = 0; i < DstNI.GetOutDeg(); i++) { DstNbrV.Add(DstNI.GetOutNId(i)); }
1156  SrcNbrV.Sort(); DstNbrV.Sort();
1157  SrcNbrV.Intrs(DstNbrV, CmnV);
1158  if (! CmnV.Empty()) {
1159  // triad completion
1160  const int SrcDeg = Graph->GetNode(Src).GetOutDeg();
1161  const int DstDeg = Graph->GetNode(Dst).GetInDeg();
1162  double RndRnd=0, RndPref=0, PrefRnd=0, PrefPref=0;
1163  int AllNbrDegSum=0;//, NbrDegSum = 0;
1164  //for (int i = 0; i < CmnV.Len(); i++) {
1165  // NbrDegSum += Graph->GetNode(CmnV[i]).GetOutDeg(); }
1166  for (int i = 0; i < SrcNI.GetOutDeg(); i++) {
1167  AllNbrDegSum += Graph->GetNode(SrcNI.GetOutNId(i)).GetOutDeg(); }
1168  // uniform-uniform node selection = 1/|cmn| sum_{i\in cmn} 1/(d_i-1)
1169  // preferential-uniform
1170  for (int i = 0; i < CmnV.Len(); i++) {
1171  const int Deg = Graph->GetNode(CmnV[i]).GetOutDeg();
1172  RndRnd += (1.0/double(SrcDeg)) * (1.0/double(Deg-1));
1173  PrefRnd += (Deg/double(AllNbrDegSum)) * (1.0/double(Deg-1));
1174  }
1175  // uniform-preferential selection = 1/|cmn| sum_{i\in cmn} d_t / sum_{i--j} d_j
1176  // preferential-preferential
1177  for (int i = 0; i < CmnV.Len(); i++) {
1178  const TUNGraph::TNode N = Graph->GetNode(CmnV[i]);
1179  const int Deg = N.GetOutDeg();
1180  int DegSum = 0;
1181  for (int j = 0; j < N.GetOutDeg(); j++) {
1182  if (N.GetOutNId(j) != Src) {
1183  DegSum += Graph->GetNode(N.GetOutNId(j)).GetOutDeg(); }
1184  }
1185  RndPref += 1.0/double(SrcDeg) * DstDeg/double(DegSum);
1186  PrefPref += (Deg/double(AllNbrDegSum)) * (DstDeg/double(DegSum));
1187  }
1188  IAssert(0.0<RndRnd && RndRnd<=1.0); IAssert(0.0<RndPref && RndPref<=1.0);
1189  IAssert(0.0<PrefRnd && PrefRnd<=1.0); IAssert(0.0<PrefPref && PrefPref<=1.0);
1190  RndRndH.AddDat(TMath::Round(RndRnd, 2)) += 1.0;
1191  RndPrefH.AddDat(TMath::Round(RndPref, 2)) += 1.0;
1192  PrefRndH.AddDat(TMath::Round(PrefRnd, 2)) += 1.0;
1193  PrefPrefH.AddDat(TMath::Round(PrefPref, 2)) += 1.0;
1194  fprintf(F, "%d\t%d\t%g\t%g\t%g\t%g\n", edge, EI.GetId(), RndRnd, RndPref, PrefRnd, PrefPref);
1195  TriadEdges++;
1196  }
1197  Graph->AddEdge(Src, Dst);
1198  if (edge % 1000 == 0) {
1199  printf("\r%dk / %dk triad edges %dk [total %s]", edge/1000, EIdV.Len()/1000, TriadEdges/1000, ExeTm.GetStr()); }
1200  }
1201  fclose(F);
1202  printf("\nRandom vs. preferential triad closing\n");
1203  printf("All edges: %d\n", Graph->GetEdges());
1204  printf("Triad closing edges: %d\n", TriadEdges);
1205  // plot distribution
1206  TFltPrV RndRndPrV; RndRndH.GetKeyDatPrV(RndRndPrV); RndRndPrV.Sort();
1207  TFltPrV RndPrefPrV; RndPrefH.GetKeyDatPrV(RndPrefPrV); RndPrefPrV.Sort();
1208  TFltPrV PrefRndPrV; PrefRndH.GetKeyDatPrV(PrefRndPrV); PrefRndPrV.Sort();
1209  TFltPrV PrefPrefPrV; PrefPrefH.GetKeyDatPrV(PrefPrefPrV); PrefPrefPrV.Sort();
1210  { TGnuPlot GP(TStr::Fmt("probPdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges()));
1211  GP.AddPlot(RndRndPrV, gpwLinesPoints, "Random--Random");
1212  GP.AddPlot(RndPrefPrV, gpwLinesPoints, "Random--Preferential");
1213  GP.AddPlot(PrefRndPrV, gpwLinesPoints, "Preferential--Random");
1214  GP.AddPlot(PrefPrefPrV, gpwLinesPoints, "Preferential--Preferential");
1215  GP.SetXYLabel("Probability", "Count");
1216  GP.SavePng(); }
1217  { TGnuPlot GP(TStr::Fmt("probCdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges()));
1218  GP.AddPlot(TGUtil::GetCdf(RndRndPrV), gpwLinesPoints, "Random--Random");
1219  GP.AddPlot(TGUtil::GetCdf(RndPrefPrV), gpwLinesPoints, "Random--Preferential");
1220  GP.AddPlot(TGUtil::GetCdf(PrefRndPrV), gpwLinesPoints, "Preferential--Random");
1221  GP.AddPlot(TGUtil::GetCdf(PrefPrefPrV), gpwLinesPoints, "Preferential--Preferential");
1222  GP.SetXYLabel("Cumulative probability", "Count");
1223  GP.SavePng(); }
1224 }*/
1225 
1226 PTimeNENet TTimeNENet::GetGnmRndNet(const int& Nodes, const int& Edges) {
1227  printf("Generating G_nm(%d, %d)\n", Nodes, Edges);
1228  int Src, Dst;
1229  PTimeNENet Net = TTimeNENet::New();
1230  Net->Reserve(Nodes, Edges);
1231  for (int e = 0; e < Edges; e++) {
1232  Src = TInt::Rnd.GetUniDevInt(Nodes);
1233  Dst = TInt::Rnd.GetUniDevInt(Nodes);
1234  while (Dst == Src || Net->IsEdge(Src, Dst)) {
1235  Dst = TInt::Rnd.GetUniDevInt(Nodes); }
1236  if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(e)); }
1237  if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(e)); }
1238  Net->AddEdge(Src, Dst, -1, TSecTm(e));
1239  }
1240  return Net;
1241 }
1242 
1243 // ACL, model B: Aiello, Chung, Lu: Random evolution in massive graphs
1244 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& Edges, const double& GammaIn, const double& GammaOut) {
1245  const double Alpha = Nodes/double(Edges);
1246  printf("Generating PA(%d, %d), with slope in:%.1f, out: %.1f\n", Nodes, Edges,
1247  2+GammaIn/(Alpha/(1-Alpha)), 2+GammaOut/(Alpha/(1-Alpha)));
1248  // init
1249  int nodes=0, edges=0, time=0, iter=0;
1250  TIntV OutW(Edges, 0), InW(Edges, 0);
1251  PTimeNENet Net = TTimeNENet::New();
1252  Net->Reserve(Nodes, Edges);
1253  // 1st node
1254  Net->AddNode(0, TSecTm(time++)); nodes++;
1255  OutW.Add(0); InW.Add(0);
1256  while (edges < Edges) {
1257  int Src=-1, Dst=-1; iter++;
1258  if (TInt::Rnd.GetUniDev() < Alpha) {
1259  if (nodes < Nodes) {
1260  IAssert(Net->AddNode(nodes, TSecTm(time++)));
1261  nodes++; }
1262  } else {
1263  if (TInt::Rnd.GetUniDev() < nodes*GammaIn/double(edges+nodes*GammaIn)) {
1264  Src = TInt::Rnd.GetUniDevInt(nodes); }
1265  else { Src = OutW[TInt::Rnd.GetUniDevInt(OutW.Len())]; }
1266  if (TInt::Rnd.GetUniDev() < nodes*GammaOut/double(edges+nodes*GammaOut)) {
1267  Dst = TInt::Rnd.GetUniDevInt(nodes); }
1268  else { Dst = InW[TInt::Rnd.GetUniDevInt(InW.Len())]; }
1269  }
1270  if (Src == Dst || Net->IsEdge(Src, Dst)) {
1271  continue;
1272  }
1273  //printf("%d/%d %d %d\n", edges, iter, Src, Dst);
1274  if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(time++)); nodes++; }
1275  if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(time++)); nodes++; }
1276  Net->AddEdge(Src, Dst, -1, TSecTm(time++));
1277  OutW.Add(Src); InW.Add(Dst); edges++;
1278  }
1279  for (int node = 0; node < Nodes; node++) {
1280  if (! Net->IsNode(node)) {
1281  Net->AddNode(node, TSecTm(time++)); }
1282  }
1283  return Net;
1284 }
1285 
1286 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& OutDeg) {
1287  printf("Generating PA, nodes:%d, out-deg:%d\n", Nodes, OutDeg);
1288  // init
1289  int time=0;
1290  PTimeNENet Net = TTimeNENet::New();
1291  Net->Reserve(Nodes, OutDeg*Nodes);
1292  Net->AddNode(0, TSecTm(++time)); Net->AddNode(1, TSecTm(++time));
1293  Net->AddEdge(0, 1, -1, TSecTm(++time));
1294  TIntV NIdV; NIdV.Add(0); NIdV.Add(1);
1295  TIntSet NodeSet;
1296  for (int node = 2; node <= Nodes; node++) {
1297  NodeSet.Clr(false);
1298  while (NodeSet.Len() < OutDeg && NodeSet.Len() < node) {
1299  NodeSet.AddKey(NIdV[TInt::Rnd.GetUniDevInt(NIdV.Len())]);
1300  }
1301  const int N = Net->AddNode(node, TSecTm(++time));
1302  for (int i = 0; i < NodeSet.Len(); i++) {
1303  Net->AddEdge(node, NodeSet[i], -1, TSecTm(++time));
1304  NIdV.Add(N); NIdV.Add(NodeSet[i]);
1305  }
1306  }
1307  return Net;
1308 }
1309 
1310 void TTimeNENet::SaveEdgeTm(const TStr& EdgeFNm, const bool& RenumberNId, const bool& RelativeTm) const {
1311  TIntV EIdV; GetEIdByTm(EIdV);
1312  const int BegTm = RelativeTm ? GetEDat(EIdV[0]).GetAbsSecs() : 0;
1313  TIntSet NIdMap;
1314  if (RenumberNId) { NIdMap.Gen(GetNodes()); }
1315  FILE *F = fopen(EdgeFNm.CStr(), "wt");
1316  //fprintf(F, "#Nodes\t%d\n#Edges\t%d\n", GetNodes(), GetEdges());
1317  //fprintf(F, "#<src>\t<dst>\t<time>\n");
1318  for (int e =0; e < EIdV.Len(); e++) {
1319  const TEdgeI EI = GetEI(EIdV[e]);
1320  if (RenumberNId) {
1321  const int src = EI.GetSrcNId();
1322  const int dst = EI.GetDstNId();
1323  NIdMap.AddKey(src); NIdMap.AddKey(dst);
1324  fprintf(F, "%d\t%d\t%d\n", NIdMap.GetKeyId(src), NIdMap.GetKeyId(dst), EI().GetAbsSecs()-BegTm);
1325  }else {
1326  fprintf(F, "%d\t%d\t%d\n", EI.GetSrcNId(), EI.GetDstNId(), EI().GetAbsSecs()-BegTm); }
1327  }
1328  fclose(F);
1329 }
1330 
1332  PTimeNENet Net = TTimeNENet::New();
1333  for (int i = 1; i <= 6; i++) {
1334  Net->AddNode(i, TSecTm(0)); }
1335  int tm = 1;
1336  Net->AddEdge(1, 2, -1, TSecTm(tm++));
1337  Net->AddEdge(3, 4, -1, TSecTm(tm++));
1338  Net->AddEdge(3, 1, -1, TSecTm(tm++));
1339  Net->AddEdge(5, 6, -1, TSecTm(tm++));
1340  Net->AddEdge(6, 4, -1, TSecTm(tm++));
1341  Net->AddEdge(5, 3, -1, TSecTm(tm++));
1342  Net->AddEdge(5, 4, -1, TSecTm(tm++));
1343  Net->AddEdge(5, 2, -1, TSecTm(tm++));
1344  return Net;
1345 }
1346 
1347 PTimeNENet TTimeNENet::LoadFlickr(const TStr& NodeFNm, const TStr& EdgeFNm) {
1348  const int BegOfTm = 1047369600; // Tue Mar 11 01:00:00 2003 (begining of Flickr)
1349  PTimeNENet Net = TTimeNENet::New();
1350  printf("Adding nodes...");
1351  { TSsParser Ss(NodeFNm, ssfWhiteSep);
1352  while (Ss.Next()) {
1353  const int NId = Ss.GetInt(0);
1354  const int Tm = Ss.GetInt(1)+BegOfTm;
1355  if (TSecTm(Tm) < TSecTm(2002, 1, 1)) {
1356  printf(" skip node %g (time %d)\n", (double) Ss.GetLineNo(), Ss.GetInt(1)); continue; }
1357  Net->AddNode(NId, TSecTm(Tm));
1358  } }
1359  printf(" %d nodes\n", Net->GetNodes());
1360  printf("Adding edges...");
1361  int SkipCnt=0;
1362  //TIntH SkipDiffCnt;
1363  { TSsParser Ss(EdgeFNm, ssfWhiteSep);
1364  while (Ss.Next()) {
1365  const int NId1 = Ss.GetInt(0);
1366  const int NId2 = Ss.GetInt(1);
1367  const TSecTm Tm = TSecTm(Ss.GetInt(2)+BegOfTm);
1368  if (! Net->IsNode(NId1) || ! Net->IsNode(NId2)) { printf("not node\n"); continue; }
1369  if (Tm < TSecTm(2002, 1, 1)) { SkipCnt++;
1370  printf(" skip edge %g (time %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr()); continue; }
1371  if (Tm+600 < Net->GetNDat(NId1)) {
1372  printf(" 1:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId1).GetStr().CStr());
1373  //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId1).GetAbsSecs()) += 1;
1374  SkipCnt++; continue; }
1375  if (Tm+600 < Net->GetNDat(NId2)) { SkipCnt++;
1376  printf(" 2:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId2).GetStr().CStr());
1377  //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId2).GetAbsSecs()) += 1;
1378  SkipCnt++; continue; }
1379  Net->AddEdge(NId1, NId2, -1, TSecTm(Tm));
1380  } }
1381  //TGnuPlot::PlotValCntH(SkipDiffCnt, "flickr-edgeNodeDiff", "", "seconds", "count");
1382  printf(" %d edges\n", Net->GetEdges());
1383  printf(" %d edges skipped (edge time < node time)\n", SkipCnt);
1384  Net->UpdateNodeTimes();
1385  return Net;
1386 }
1387 
1388 // load network where fields are separated by Separator and each line contains (*Fld are column indexes)
1389 // .. <source> .. <destination> .. <time>
1390 PTimeNENet TTimeNENet::LoadEdgeTm(const TStr& EdgeFNm, const int& SrcFld, const int& DstFld, const int& TimeFld, const TSsFmt& Separator) {
1391  printf("Loading %s\n", EdgeFNm.CStr());
1392  PTimeNENet Net = TTimeNENet::New();
1393  TStrHash<TInt> StrToId(Mega(1), true); // node id to string
1394  int LineCnt=0;
1395  TExeTm ExeTm;
1396  TSsParser Ss(EdgeFNm, Separator);
1397  TSecTm MinTm=TSecTm::GetCurTm(), MaxTm=TSecTm(100);
1398  while (Ss.Next()) {
1399  if (Ss.IsCmt()) { continue; }
1400  IAssert(Ss.Len() > TimeFld);
1401  const char* Node1 = Ss.GetFld(SrcFld);
1402  const char* Node2 = Ss.GetFld(DstFld);
1403  const char* TmStr = Ss.GetFld(TimeFld);
1404  if (strcmp(TmStr,"NULL")==0) { continue; }
1405  //const TSecTm Tm = TSecTm::GetDtTmFromYmdHmsStr(TmStr);
1406  const TSecTm Tm(atoi(TmStr));
1407  const int NId1 = StrToId.AddKey(Node1);
1408  const int NId2 = StrToId.AddKey(Node2);
1409  if (! Net->IsNode(NId1)) { Net->AddNode(NId1, TSecTm()); }
1410  if (! Net->IsNode(NId2)) { Net->AddNode(NId2, TSecTm()); }
1411  MinTm=TMath::Mn(MinTm, Tm);
1412  MaxTm=TMath::Mx(MaxTm, Tm);
1413  Net->AddEdge(NId1, NId2, -1, Tm);
1414  if (++LineCnt % 1000 == 0) {
1415  printf("\r %dk lines processed: %d %d [%s]", LineCnt/1000, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr()); }
1416  }
1417  printf("\r %d lines processed: %d %d [%s]\n", LineCnt, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr());
1418  printf(" Data range %s -- %s\n", MinTm.GetStr().CStr(), MaxTm.GetStr().CStr());
1419  //TSnap::PrintInfo(Net, "", "", false);
1420  Net->UpdateNodeTimes();
1421  return Net;
1422 }
Definition: ds.h:346
void Clr(const bool &DoDel=true, const int &NoDelLim=-1)
Definition: shash.h:1243
PTimeNENet GetTimeNENet() const
Definition: timenet.cpp:32
#define IAssert(Cond)
Definition: bd.h:262
int GetInt() const
Definition: dt.h:581
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
void PlotEffDiam(const TStr &FNmPref, const TStr &Desc, const TTmUnit &GroupBy, const TSecTm &StartTm, const int &NDiamRuns=10, const bool &OnlyWcc=false) const
Definition: timenet.cpp:934
static char * GetCurTm()
Definition: tm.h:374
static PTimeNet LoadArxiv(const TStr &PaperFNm, const TStr &CiteFNm)
Definition: timenet.cpp:383
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:548
TEdgeI BegEI() const
Returns an iterator referring to the first edge in the network.
Definition: network.h:1389
Definition: tm.h:355
double GetMedian() const
Definition: xmath.h:244
static PSs LoadTxt(const TSsFmt &SsFmt, const TStr &FNm, const PNotify &Notify=NULL, const bool &IsExcelEoln=true, const int &MxY=-1, const TIntV &AllowedColNV=TIntV(), const bool &IsQStr=true)
Definition: ss.cpp:100
void Reserve(const int &Nodes, const int &Edges)
Reserves memory for a network of Nodes nodes and Edges edges.
Definition: network.h:1423
Definition: ds.h:272
static PTimeNENet GetSmallNet()
Definition: timenet.cpp:1331
static PTimeNet New()
Definition: timenet.h:39
bool IsDef() const
Definition: tm.h:123
double GetBfsEffDiam(const PGraph &Graph, const int &NTestNodes, const bool &IsDir=false)
Returns the (approximation of the) Effective Diameter (90-th percentile of the distribution of shorte...
Definition: bfsdfs.h:424
void SavePng(const int &SizeX=1000, const int &SizeY=800, const TStr &Comment=TStr())
Definition: gnuplot.h:120
PGraph GetMxWcc(const PGraph &Graph)
Returns a graph representing the largest weakly connected component on an input Graph.
Definition: cncom.h:452
static const T & Mx(const T &LVal, const T &RVal)
Definition: xmath.h:32
TNodeI BegNI() const
Returns an iterator referring to the first node in the network.
Definition: network.h:1338
PTimeNENet Get1stEdgeNet() const
Definition: timenet.cpp:629
int GetKeyId(const TKey &Key) const
Definition: shash.h:1328
void PlotMissingPast(const TStr &FNmPref, const TStr &Desc, const TTmUnit &GroupBy, const TSecTm &DelPreTmEdges, const TSecTm &PostTmDiam) const
Definition: timenet.cpp:169
void GetNIdByTm(TIntV &NIdV) const
Definition: timenet.cpp:778
void PlotMissingPast(const TStr &FNmPref, const TStr &Desc, const TTmUnit &TmUnit, const TSecTm &DelPreTmEdges, const TSecTm &PostTmDiam, const bool &LinkBWays)
Definition: timenet.cpp:977
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:313
void GetEIdByTm(TIntV &EIdV) const
Definition: timenet.cpp:788
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:8
Definition: fl.h:319
Definition: bits.h:119
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void PlotCCfOverTm(const TStr &FNmPref, TStr Desc, const TTmUnit &TmUnit, const int &NodesBucket=-1) const
Definition: timenet.cpp:253
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:68
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: network.h:1336
int GetNodes() const
Returns the number of nodes in the network.
Definition: network.h:1307
double GetAnfEffDiam(const PGraph &Graph, const bool &IsDir, const double &Percentile, const int &NApprox)
Definition: anf.h:216
Definition: ss.h:72
void Gen(const int &ExpectVals)
Definition: shash.h:1115
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the network.
Definition: network.h:221
int GetEdges() const
Returns the number of edges in the network.
PGStatVec TimeGrowth(const TTmUnit &TimeStep, const TFSet &TakeStat, const TSecTm &StartTm=TSecTm(1)) const
Definition: timenet.cpp:878
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:503
TStr GetSubStr(const int &BChN, const int &EChN) const
Definition: dt.cpp:811
void SetXYLabel(const TStr &XLabel, const TStr &YLabel)
Definition: gnuplot.h:73
void GetEdgeBuckets(const int EdgesPerBucket, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:846
void GetTmBuckets(const TTmUnit &GroupBy, TTmBucketV &TmBucketV) const
Definition: timenet.cpp:59
int AddNode(int NId=-1)
Adds a node of ID NId to the network.
Definition: network.h:1483
static PTimeNet LoadPatents(const TStr &PatentFNm, const TStr &CiteFNm)
Definition: timenet.cpp:480
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
Definition: ss.cpp:447
Definition: xmath.h:129
TSecTm & GetNDat(const int &NId)
Returns node data for the node of ID NId in the network.
Definition: network.h:1346
Edge iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:766
TSecTm & GetNDat(const int &NId)
Returns node data for the node of ID NId in the network.
Definition: network.h:229
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
void Defrag(const bool &OnlyNodeLinks=false)
Defragments the network.
Definition: network.h:1650
int GetNodes() const
Returns the number of nodes in the network.
Definition: network.h:189
Definition: gnuplot.h:7
TEdgeI GetEI(const int &EId) const
Not supported/implemented!
Definition: network.h:1393
static TRnd Rnd
Definition: dt.h:1146
TNodeI BegNI() const
Returns an iterator referring to the first node in the network.
Definition: network.h:219
double GetSDev() const
Definition: xmath.h:242
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
PTimeNENet GetGraphUpToTm(const TSecTm &MaxEdgeTm) const
Definition: timenet.cpp:685
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
const char * GetFld(const int &FldN) const
Returns the contents of the field at index FldN.
Definition: ss.h:129
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: network.h:217
TSsFmt
Spread-Sheet Separator Format.
Definition: ss.h:5
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the network.
Definition: network.h:223
int AddEdge(const int &SrcNId, const int &DstNId, int EId=-1)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: network.h:1557
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
void GetEdgeTmBuckets(const TTmUnit &GroupBy, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:817
static PTimeNENet LoadEdgeTm(const TStr &EdgeFNm, const int &SrcFld=0, const int &DstFld=1, const int &TimeFld=2, const TSsFmt &Separator=ssfTabSep)
Definition: timenet.cpp:1390
Definition: bd.h:63
void SaveEdgeTm(const TStr &EdgeFNm, const bool &RenumberNId=false, const bool &RelativeTm=false) const
Definition: timenet.cpp:1310
Graph Statistics Sequence.
Definition: gstat.h:155
void SplitOnCh(TStr &LStr, const char &SplitCh, TStr &RStr) const
Definition: dt.cpp:901
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1101
static void SaveTs(const TIntKdV &KdV, const TStr &FNm, const TStr &HeadLn=TStr())
Definition: gnuplot.cpp:717
const char * GetTmStr() const
Definition: tm.h:370
virtual void DelNode(const int &NId)
Deletes node of ID NId from the network.
Definition: network.h:347
void PlotEffDiam(const TStr &FNmPref, const TStr &Desc, const TTmUnit &GroupBy, const TSecTm &StartTm, const int &NDiamRuns=10, const bool &OnlyWcc=false, const bool &AlsoRewire=false) const
Definition: timenet.cpp:106
void GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:836
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
static PTimeNENet GetPrefAttach(const int &Nodes, const int &Edges, const double &GammaIn, const double &GammaOut)
Definition: timenet.cpp:1244
void Add(const TFlt &Val, const TFlt &Wgt=1)
Definition: xmath.h:217
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
Statistics of a Graph Snapshot.
Definition: gstat.h:36
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node SrcNId to node DstNId to the network.
Definition: network.h:381
static PTimeNENet New()
Definition: timenet.h:86
void SetNodeTmToFirstEdgeTm()
Definition: timenet.cpp:725
int CntNonZNodes(const PGraph &Graph)
Returns the number of nodes with degree greater than 0.
Definition: alg.h:114
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
Whitespace (space or tab) separated.
Definition: ss.h:11
uint GetAbsSecs() const
Definition: tm.h:150
#define Mega(n)
Definition: gbase.h:4
THash< TInt, TNode > NodeH
Definition: network.h:1259
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:172
int AddKey(const TKey &Key)
Definition: shash.h:1254
void DumpTimeStat() const
Definition: timenet.cpp:761
Tab separated.
Definition: ss.h:6
void SetRndEdgeTimes(const int &MinTmEdge=0)
Definition: timenet.cpp:744
void DelEdge(const int &SrcNId, const int &DstNId)
Deletes an edge between node IDs SrcNId and DstNId from the graph.
Definition: graph.cpp:124
int Len() const
Returns the number of fields in the current line.
Definition: ss.h:114
TTriple< TFlt, TFlt, TFlt > TFltTr
Definition: ds.h:181
Definition: tm.h:14
PUNGraph GenConfModel(const TIntV &DegSeqV, TRnd &Rnd)
Generates a random undirect graph with a given degree sequence.
Definition: ggen.cpp:122
TNodeEdgeNet & operator=(const TNodeEdgeNet &Net)
Definition: network.h:1303
static TStr GetNullStr()
Definition: dt.cpp:1626
void Tick()
Definition: tm.h:364
PTimeNENet GetSubGraph(const TIntV &NIdV) const
Definition: timenet.cpp:647
THash< TInt, TEdge > EdgeH
Definition: network.h:1260
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
void PlotMedianDegOverTm(const TStr &FNmPref, const TTmUnit &TmUnit, const int &NodesPerBucket=-1) const
Definition: timenet.cpp:302
void SetVal(const TGStatVal &StatVal, const double &Val)
Definition: gstat.cpp:88
static PTimeNet LoadBipartite(const TStr &InFNm)
Definition: timenet.cpp:346
int CntInDegNodes(const PGraph &Graph, const int &NodeInDeg)
Returns the number of nodes with in-degree NodeInDeg.
Definition: alg.h:87
static int NDiamRuns
Definition: gstat.h:38
TSecTm & GetEDat(const int &EId)
Returns edge data for the edge with ID EId.
Definition: network.h:1399
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: graph.cpp:92
int AddKey(const char *Key)
Definition: hash.h:968
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the network.
Definition: network.h:401
static PTimeNENet LoadFlickr(const TStr &NodeFNm, const TStr &EdgeFNm)
Definition: timenet.cpp:1347
Definition: hash.h:781
PTimeNENet GetESubGraph(const TIntV &EIdV) const
Definition: timenet.cpp:668
int AddNode(int NId=-1)
Adds a node of ID NId to the network.
Definition: network.h:311
int Len() const
Definition: shash.h:1121
Definition: tm.h:81
void GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:77
Definition: ds.h:32
static PTimeNENet GetGnmRndNet(const int &Nodes, const int &Edges)
Definition: timenet.cpp:1226
int AddKey(const TKey &Key)
Definition: hash.h:373
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:550
TTimeNENet & operator=(const TTimeNENet &TimeNet)
Definition: timenet.cpp:605
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the network.
Definition: network.h:1342
void GetNIdByTm(TIntV &NIdV) const
Definition: timenet.cpp:48
static TStr GetTmUnitStr(const TTmUnit &TmUnit)
Definition: tm.cpp:108
long long int64
Definition: bd.h:27
double GetMean() const
Definition: xmath.h:240
void Save(TSOut &SOut) const
Definition: xmlser.h:16
Definition: dt.h:412
TStr GetStr(const TLoc &Loc=lUs) const
Definition: tm.cpp:457
bool Empty() const
Definition: dt.h:491
TNodeNet & operator=(const TNodeNet &NodeNet)
Definition: network.h:185
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
uint64 GetLineNo() const
Returns the line number of the current line.
Definition: ss.h:118
int GetTriadEdges(TIntV &TriadEIdV) const
Definition: timenet.cpp:857
void SplitOnAllCh(const char &SplitCh, TStrV &StrV, const bool &SkipEmpty=true) const
Definition: dt.cpp:926
int GetEdges() const
Returns the number of edges in the network.
Definition: network.h:1354
static int GetMonthN(const TStr &MonthNm, const TLoc &Loc=lUs)
Definition: tm.cpp:39
void UpdateNodeTimes()
Definition: timenet.cpp:709
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:383
void Defrag(const bool &OnlyNodeLinks=false)
Defragments the graph.
Definition: graph.cpp:160
void GetKeyDatPrV(TVec< TPair< TKey, TDat > > &KeyDatPrV) const
Definition: hash.h:500
void GetTmBuckets(const TTmUnit &GroupBy, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:798
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:239
static TSecTm GetCurTm()
Definition: tm.cpp:697
double GetClustCf(const PGraph &Graph, int SampleNodes=-1)
Computes the average clustering coefficient as defined in Watts and Strogatz, Collective dynamics of ...
Definition: triad.h:137
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:235
bool Next()
Loads next line from the input file.
Definition: ss.cpp:412
Definition: bd.h:196
void SortNodeEdgeTimes()
Definition: timenet.cpp:703
void Defrag(const bool &OnlyNodeLinks=false)
Defragments the network.
Definition: network.h:423
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
TPt< TTimeNet > PTimeNet
Definition: timenet.h:8
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
PTimeNet GetTimeNet() const
Definition: timenet.cpp:612
PGraph GetESubGraph(const PGraph &Graph, const TIntV &EIdV)
Returns a subgraph of graph Graph with EIdV edges.
Definition: subgraph.h:243
void SplitOnWs(TStrV &StrV) const
Definition: dt.cpp:972
TTimeNet & operator=(const TTimeNet &TimeNet)
Definition: timenet.cpp:3
void Reserve(const int &Nodes, const int &Edges)
Reserves memory for a network of Nodes nodes and Edges edges.
Definition: network.h:278
int AddErrBar(const TFltTrV &XYDValV, const TStr &Label=TStr())
Definition: gnuplot.cpp:265
const char * GetStr() const
Definition: tm.h:368
char * CStr()
Definition: dt.h:479
TTmUnit
Definition: tm.h:11
bool IsEdge(const int &SrcNId, const int &DstNId) const
Tests whether an edge between node IDs SrcNId and DstNId exists in the graph.
Definition: graph.cpp:137
int CntDegNodes(const PGraph &Graph, const int &NodeDeg)
Returns the number of nodes with degree NodeDeg.
Definition: alg.h:105
bool IsKey(const TKey &Key) const
Definition: hash.h:258
static PGStatVec New(const TTmUnit &_TmUnit=tmu1Sec)
Definition: gstat.cpp:426
static PTimeNet LoadAmazon(const TStr &StlFNm)
Definition: timenet.cpp:563
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
PTimeNet GetSubGraph(const TIntV &NIdV) const
Definition: timenet.cpp:10
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:237
int Len() const
Definition: hash.h:228
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
TEdgeI EndEI() const
Returns an iterator referring to the past-the-end edge in the network.
Definition: network.h:1391
PGStatVec TimeGrowth(const TTmUnit &TmUnit, const TFSet &TakeStat, const TSecTm &StartTm) const
Definition: timenet.cpp:88
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the network.
Definition: network.h:1340
int GetCmnNbrs(const PGraph &Graph, const int &NId1, const int &NId2)
Returns a number of shared neighbors between a pair of nodes NId1 and NId2.
Definition: triad.h:708
void Def()
Definition: xmath.cpp:339
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
bool IsCmt() const
Checks whether the current line is a comment (starts with '#').
Definition: ss.h:120
int CntOutDegNodes(const PGraph &Graph, const int &NodeOutDeg)
Returns the number of nodes with out-degree NodeOutDeg.
Definition: alg.h:96
TSecTm Round(const TTmUnit &TmUnit) const
Definition: tm.cpp:625
TPt< TTimeNENet > PTimeNENet
Definition: timenet.h:11
TSizeTy AddV(const TVec< TVal, TSizeTy > &ValV)
Adds the elements of the vector ValV to the to end of the vector.
Definition: ds.h:1110
void TakeBasicStat(const PGraph &Graph, const bool &IsMxWcc=false)
Definition: gstat.h:257
void SortByDat(const bool &Asc=true)
Definition: hash.h:292