SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
cmty.cpp
Go to the documentation of this file.
1 // Community detection algorithms
3 namespace TSnap {
4 
5 
6 namespace TSnapDetail {
7 
8 // GIRVAN-NEWMAN algorithm
9 // 1. The betweenness of all existing edges in the network is calculated first.
10 // 2. The edge with the highest betweenness is removed.
11 // 3. The betweenness of all edges affected by the removal is recalculated.
12 // 4. Steps 2 and 3 are repeated until no edges remain.
13 // Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002)
14 // Keep removing edges from Graph until one of the connected components of Graph splits into two.
15 void CmtyGirvanNewmanStep(PUNGraph& Graph, TIntV& Cmty1, TIntV& Cmty2) {
16  TIntPrFltH BtwEH;
17  TBreathFS<PUNGraph> BFS(Graph);
18  Cmty1.Clr(false); Cmty2.Clr(false);
19  while (true) {
20  TSnap::GetBetweennessCentr(Graph, BtwEH);
21  BtwEH.SortByDat(false);
22  if (BtwEH.Empty()) { return; }
23  const int NId1 = BtwEH.GetKey(0).Val1;
24  const int NId2 = BtwEH.GetKey(0).Val2;
25  Graph->DelEdge(NId1, NId2);
26  BFS.DoBfs(NId1, true, false, NId2, TInt::Mx);
27  if (BFS.GetHops(NId1, NId2) == -1) { // two components
28  TSnap::GetNodeWcc(Graph, NId1, Cmty1);
29  TSnap::GetNodeWcc(Graph, NId2, Cmty2);
30  return;
31  }
32  }
33 }
34 
35 // Connected components of a graph define clusters
36 // OutDegH and OrigEdges stores node degrees and number of edges in the original graph
37 double _GirvanNewmanGetModularity(const PUNGraph& G, const TIntH& OutDegH, const int& OrigEdges, TCnComV& CnComV) {
38  TSnap::GetWccs(G, CnComV); // get communities
39  double Mod = 0;
40  for (int c = 0; c < CnComV.Len(); c++) {
41  const TIntV& NIdV = CnComV[c]();
42  double EIn = 0, EEIn = 0;
43  for (int i = 0; i < NIdV.Len(); i++) {
44  TUNGraph::TNodeI NI = G->GetNI(NIdV[i]);
45  EIn += NI.GetOutDeg();
46  EEIn += OutDegH.GetDat(NIdV[i]);
47  }
48  Mod += (EIn-EEIn*EEIn / (2.0*OrigEdges));
49  }
50  if (Mod == 0) { return 0; }
51  else { return Mod / (2.0*OrigEdges); }
52 }
53 
54 void MapEquationNew2Modules(PUNGraph& Graph, TIntH& Module, TIntFltH& Qi, int a, int b) {
55  float InModule = 0.0, OutModule = 0.0, Val;
56  int Mds[2] = { a, b };
57  for (int i = 0; i<2; i++) {
58  InModule = 0.0, OutModule = 0.0;
59  if (Qi.IsKey(Mds[i])) {
60  int CentralModule = Mds[i];
61 
62  //printf("central module: %i\n ",CentralModule);
63 
64  TIntV newM;
65  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
66  if (Module.GetDat(NI.GetId()) == CentralModule)
67  newM.Add(NI.GetId());
68  }
69 
70  for (int j = 0; j<newM.Len(); j++) {
71  //int len1 = newM.Len();
72 
73  //int c = CentralModule;
74  for (int k = 0; k<Graph->GetNI(newM[j]).GetDeg(); k++) {
75  //int len = Graph->GetNI(newM[j]).GetDeg();
76 
77  int ids = Graph->GetNI(newM[j]).GetId();
78  int idd = Graph->GetNI(newM[j]).GetNbrNId(k);
79  int ms = Module.GetDat(ids);
80  int md = Module.GetDat(idd);
81  //int c = CentralModule;
82 
83  if (ms == md) {
84  InModule += 1.0;
85  //printf("IN: \t%i - %i; moduls: %i - %i\n", ids, idd, ms, md);
86  } else {
87  OutModule += 1.0;
88  //printf("OUT: \t%i - %i; moduls: %i - %i\n", ids, idd, ms, md);
89  }
90  }
91  }
92  if (InModule >1) InModule = InModule / 2;
93 
94  //printf("\n");
95 
96  Val = 0.0;
97  if (InModule + OutModule > 0) {
98  Val = OutModule / (InModule + OutModule);
99  }
100  //int control = Mds[i];
101  Qi.AddDat(Mds[i], Val);
102  } else {
103  //int control = Mds[i];
104  Qi.AddDat(Mds[i], 0.0);
105  }
106  }
107 }
108 
109 double Equation(TIntFltH& PAlpha, double& SumPAlphaLogPAlpha, TIntFltH& Qi){
110  double SumPAlpha = 1.0, SumQi = 0.0, SumQiLogQi = 0.0;
111  double SumQiSumPAlphaLogQiSumPAlpha = 0.0, logqi = 0.0, qi = 0.0;
112  for (int i = 0; i<Qi.Len(); i++) {
113  SumQi += Qi[i];
114  qi = Qi[i];
115  if (qi != 0) {
116  logqi = log(qi);
117  } else {
118  logqi = 0;
119  }
120  SumQiLogQi += Qi[i] * logqi;
121  SumQiSumPAlphaLogQiSumPAlpha += (Qi[i] + SumPAlpha)*log(Qi[i] + SumPAlpha);
122  }
123  return (SumQi*log(SumQi) - 2 * SumQiLogQi - SumPAlphaLogPAlpha +
124  SumQiSumPAlphaLogQiSumPAlpha);
125 }
126 
127 bool edgeIntersect(PNGraph& graph, TIntV& a, TIntV& b) {
128  for (int i = 0; i<a.Len(); i++) {
129  for (int j = 0; j<b.Len(); j++) {
130  if (graph->IsEdge(a[i], b[j]))
131  return true;
132  }
133  }
134 
135  return false;
136 }
137 
139  int count = 0;
140  for (int i = 0; i<a.Len(); i++) {
141  for (int j = 0; j<b.Len(); j++) {
142  if (a[i] == b[j])
143  count++;
144  }
145  }
146 
147  return count;
148 }
149 
150 bool inComp(PNGraph& g1, PNGraph& Graph, TIntH& inCompCount, int id, int neigh) {
151  bool out = true;
152 
153  int inCompN = 0;
154  int inComp = 0;
155 
156  if (g1->IsNode(id) && g1->IsNode(neigh)) {
157  int deg = g1->GetNI(id).GetDeg();
158  int neighDeg = g1->GetNI(neigh).GetDeg();
159 
160 
161  if (inCompCount.IsKey(id)) {
162  inComp = inCompCount.GetDat(id);
163  }
164  if (inCompCount.IsKey(neigh)) {
165  inCompN = inCompCount.GetDat(neigh);
166  }
167 
168  if (inCompN < neighDeg && inComp < deg && (!g1->IsNode(neigh) || Graph->GetNI(neigh).GetDeg() - neighDeg == 0)) {
169  inCompCount.AddDat(neigh, ++inCompN);
170  inCompCount.AddDat(id, ++inComp);
171  out = true;
172  } else {
173  out = false;
174  }
175  }
176  return out;
177 }
178 
180  for (int i = 0; i < a.Len(); i++) {
181  bool diff = false;
182  for (int j = 0; j < b.Len(); j++) {
183  if (a[i] == a[j]) {
184  diff = true;
185  break;
186  }
187  }
188  if (!diff) {
189  b.Add(a[i]);
190  break;
191  }
192  }
193 }
194 
195 bool chekIfCrossing(TIntV& a, TIntH& t, int f, int l, int TP) {
196  bool after = false;
197  bool before = false;
198  for (int i = 0; i < a.Len(); i++) {
199  if (t.GetDat(a[i]) < TP)
200  before = true;
201  if (t.GetDat(a[i]) > TP)
202  after = true;
203  }
204 
205  if (TP == f)
206  before = true;
207 
208  if (TP == l)
209  after = true;
210 
211  return (after && before);
212 }
213 
214 double InfomapOnlineIncrement(PUNGraph& Graph, int n1, int n2, TIntFltH& PAlpha, double& SumPAlphaLogPAlpha, TIntFltH& Qi, TIntH& Module, int& Br) {
215  // NOW NEW stuff add another additional iteration:
216 
217  bool n1new = false;
218  bool n2new = false;
219 
220  // add edge
221  if (!Graph->IsNode(n1)){
222  Graph->AddNode(n1);
223  n1new = true;
224  }
225 
226  if (!Graph->IsNode(n2)) {
227  Graph->AddNode(n2);
228  n2new = true;
229  }
230 
231  Graph->AddEdge(n1, n2);
232 
233  int e = Graph->GetEdges();
234 
235  // get previous alpha for 27
236  double oldAlphaN1 = 0.0;
237  double oldAlphaN2 = 0.0;
238 
239  if (!n1new)
240  oldAlphaN1 = PAlpha.GetDat(n1);
241 
242  if (!n2new)
243  oldAlphaN2 = PAlpha.GetDat(n2);
244 
245  // update alpha for 27
246  TUNGraph::TNodeI node = Graph->GetNI(n1);
247  int nodeDeg = node.GetDeg();
248  float d = ((float)nodeDeg / (float)(2 * e));
249  PAlpha.AddDat(n1, d);
250 
251  //update alphasum
252  SumPAlphaLogPAlpha = SumPAlphaLogPAlpha - oldAlphaN1 + d*log(d);
253 
254  if (n1new) {
255  Module.AddDat(n1, Br);
256  Qi.AddDat(Br, 1.0);
257  Br++;
258  }
259 
260  // update alpha for 28
261  node = Graph->GetNI(n2);
262  nodeDeg = node.GetDeg();
263  d = ((float)nodeDeg / (float)(2 * e));
264  PAlpha.AddDat(n2, d);
265 
266  //update alphasum
267  SumPAlphaLogPAlpha = SumPAlphaLogPAlpha - oldAlphaN2 + d*log(d);
268 
269  //add module
270  if (n2new) {
271  Module.AddDat(n2, Br);
272  Qi.AddDat(Br, 1.0);
273  Br++;
274  }
275 
276  // Start
277 
278  double MinCodeLength = TSnapDetail::Equation(PAlpha, SumPAlphaLogPAlpha, Qi);
279  double PrevIterationCodeLength = 0.0;
280 
281  do {
282  PrevIterationCodeLength = MinCodeLength;
283  int id[2] = { n1, n2 };
284  for (int k = 0; k<2; k++) {
285  for (int i = 0; i<Graph->GetNI(id[k]).GetDeg(); i++) {
286 
287  int OldModule = Module.GetDat(id[k]);
288  int NewModule = Module.GetDat(Graph->GetNI(id[k]).GetNbrNId(i));
289 
290  Module.AddDat(id[k], NewModule);
291 
292  TSnapDetail::MapEquationNew2Modules(Graph, Module, Qi, OldModule, NewModule);
293  double NewCodeLength = TSnapDetail::Equation(PAlpha, SumPAlphaLogPAlpha, Qi);
294  if (NewCodeLength<MinCodeLength) {
295  MinCodeLength = NewCodeLength;
296  OldModule = NewModule;
297  }
298  else {
299  Module.AddDat(id[k], OldModule);
300  }
301  }
302  }
303  } while (MinCodeLength<PrevIterationCodeLength);
304 
305  return MinCodeLength;
306 }
307 
308 } // namespace TSnapDetail
309 
310 // Maximum modularity clustering by Girvan-Newman algorithm (slow)
311 // Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002)
312 double CommunityGirvanNewman(PUNGraph& Graph, TCnComV& CmtyV) {
313  TIntH OutDegH;
314  const int NEdges = Graph->GetEdges();
315  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
316  OutDegH.AddDat(NI.GetId(), NI.GetOutDeg());
317  }
318  double BestQ = -1; // modularity
319  TCnComV CurCmtyV;
320  CmtyV.Clr();
321  TIntV Cmty1, Cmty2;
322  while (true) {
323  TSnapDetail::CmtyGirvanNewmanStep(Graph, Cmty1, Cmty2);
324  const double Q = TSnapDetail::_GirvanNewmanGetModularity(Graph, OutDegH, NEdges, CurCmtyV);
325  //printf("current modularity: %f\n", Q);
326  if (Q > BestQ) {
327  BestQ = Q;
328  CmtyV.Swap(CurCmtyV);
329  }
330  if (Cmty1.Len() == 0 || Cmty2.Len() == 0) { break; }
331  }
332  return BestQ;
333 }
334 
335 // Rosvall-Bergstrom community detection algorithm based on information theoretic approach.
336 // See: Rosvall M., Bergstrom C. T., Maps of random walks on complex networks reveal community structure, Proc. Natl. Acad. Sci. USA 105, 1118-1123 (2008)
337 double Infomap(PUNGraph& Graph, TCnComV& CmtyV){
338 
339  TIntFltH PAlpha; // probability of visiting node alpha
340  TIntH Module; // module of each node
341  TIntFltH Qi; // probability of leaving each module
342 
343  double SumPAlphaLogPAlpha = 0.0;
344  int Br = 0;
345  const int e = Graph->GetEdges();
346 
347  // initial values
348  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
349  int nodeId = NI.GetId();
350  int nodeDeg = NI.GetDeg();
351  float d = ((float)nodeDeg / (float)(2 * e));
352  PAlpha.AddDat(nodeId, d);
353  SumPAlphaLogPAlpha += d*log(d);
354  Module.AddDat(nodeId, Br);
355  Qi.AddDat(Br, 1.0);
356  Br += 1;
357  }
358 
359  double MinCodeLength = TSnapDetail::Equation(PAlpha, SumPAlphaLogPAlpha, Qi);
360  double NewCodeLength, PrevIterationCodeLength = 0.0;
361  int OldModule, NewModule;
362 
363  TIntV nodes;
364  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++)
365  nodes.Add(NI.GetId());
366 
367  do {
368  PrevIterationCodeLength = MinCodeLength;
369  TRnd rnd;
370  rnd.Randomize();
371  nodes.Shuffle(rnd);
372  for (int ndcounter = 0; ndcounter<nodes.Len(); ndcounter++) {
373  MinCodeLength = TSnapDetail::Equation(PAlpha, SumPAlphaLogPAlpha, Qi);
374  int nodeId = nodes[ndcounter];
375  TUNGraph::TNodeI NI = Graph->GetNI(nodeId);
376  for (int i = 0; i<NI.GetDeg(); i++) {
377 
378  OldModule = Module.GetDat(nodeId);
379  NewModule = Module.GetDat(NI.GetNbrNId(i));
380 
381  if (OldModule != NewModule){
382 
383  Module.AddDat(nodeId, NewModule);
384 
385  TSnapDetail::MapEquationNew2Modules(Graph, Module, Qi, OldModule, NewModule);
386  NewCodeLength = TSnapDetail::Equation(PAlpha, SumPAlphaLogPAlpha, Qi);
387  if (NewCodeLength<MinCodeLength) {
388  MinCodeLength = NewCodeLength;
389  OldModule = NewModule;
390  }
391  else {
392  Module.AddDat(nodeId, OldModule);
393  }
394  }
395  }
396  }
397  } while (MinCodeLength<PrevIterationCodeLength);
398 
399  Module.SortByDat(true);
400 
401  int Mod = -1;
402  for (int i = 0; i<Module.Len(); i++) {
403  if (Module[i]>Mod){
404  Mod = Module[i];
405  TCnCom t;
406  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){
407  if (Module.GetDat(NI.GetId()) == Mod)
408  t.Add(NI.GetId());
409  }
410  CmtyV.Add(t);
411  }
412  }
413 
414  return MinCodeLength;
415 }
416 
417 double InfomapOnline(PUNGraph& Graph, int n1, int n2, TIntFltH& PAlpha, double& SumPAlphaLogPAlpha, TIntFltH& Qi, TIntH& Module, int& Br, TCnComV& CmtyV) {
418 
419  double MinCodeLength = TSnapDetail::InfomapOnlineIncrement(Graph, n1, n2, PAlpha, SumPAlphaLogPAlpha, Qi, Module, Br);
420 
421  Module.SortByDat(true);
422 
423  int Mod = -1;
424  for (int i = 0; i<Module.Len(); i++) {
425  if (Module[i]>Mod){
426  Mod = Module[i];
427  TCnCom t;
428  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){
429  if (Module.GetDat(NI.GetId()) == Mod)
430  t.Add(NI.GetId());
431  }
432  CmtyV.Add(t);
433  }
434  }
435 
436  return MinCodeLength;
437 }
438 
439 void CmtyEvolutionFileBatchV(TStr InFNm, TIntIntVH& sizesContV, TIntIntVH& cContV, TIntIntVH& edges, double alpha, double beta, int CmtyAlg) {
440  TIntIntHH sizesCont;
441  TIntIntHH cCont;
442  CmtyEvolutionFileBatch(InFNm, sizesCont, cCont, edges, alpha, beta, CmtyAlg);
443 
444  TIntV uniqueId;
445  for (int i = 0; i < cCont.Len(); i++){
446  for (THashKeyDatI<TInt, TInt> it = cCont[i].BegI(); !it.IsEnd(); it++){
447  if (!uniqueId.IsIn(it.GetKey()))
448  uniqueId.Add(it.GetKey());
449  }
450  }
451 
452  for (int j = 0; j<uniqueId.Len(); j++)
453  {
454  TIntV cV;
455  for (int i = 0; i<cCont.Len(); i++)
456  {
457  if (cCont[i].IsKey(uniqueId[j]))
458  cV.Add(cCont[i].GetDat(uniqueId[j]));
459  else
460  cV.Add(-1);
461  }
462  cContV.AddDat(uniqueId[j], cV);
463  }
464 
465  TIntV uniqueC;
466  for (int i = 0; i < sizesCont.Len(); i++){
467  for (THashKeyDatI<TInt, TInt> it = sizesCont[i].BegI(); !it.IsEnd(); it++){
468  if (!uniqueC.IsIn(it.GetKey()))
469  uniqueC.Add(it.GetKey());
470  }
471  }
472 
473  for (int j = 0; j<uniqueC.Len(); j++)
474  {
475  TIntV cV;
476  for (int i = 0; i<sizesCont.Len(); i++)
477  {
478  if (sizesCont[i].IsKey(uniqueC[j]))
479  cV.Add(sizesCont[i].GetDat(uniqueC[j]));
480  else
481  cV.Add(0);
482  }
483  sizesContV.AddDat(uniqueC[j], cV);
484  }
485 
486 }
487 
488 void CmtyEvolutionFileBatch(TStr InFNm, TIntIntHH& sizesCont, TIntIntHH& cCont, TIntIntVH& edges, double alpha, double beta, int CmtyAlg) {
489 
490 
491  // reading folder with networks and calculating core/periphery
492  int br = 0;
493  TIntIntH prev;
494  TIntH prev_sizes;
495 
496  TSsParser Ss(InFNm, ssfWhiteSep, true, false, true);
497  Ss.Next();
498  //int internal_year_counter = 0;
499  // variable for delimiter between networks
500  TStr Marker;
501  // defining variables for node ids and starting year
502  int SrcNId, DstNId; // , t = 1970;
503 
504  // temporal container for edges
505  TIntIntVH edges_;
506 
507  while (!Ss.Eof()) {
508 
509  //printf("%i\n", t);
510  Marker = Ss.GetLnStr();
511  // get the year from the network seperator
512  //t = Marker.GetSubStr(1, 4).GetInt();
513 
514  if (Marker.GetCh(0) == '#'){
515 
516  Ss.Next();
517  PUNGraph Graph = PUNGraph::TObj::New();
518  do{
519  if (!Ss.GetInt(0, SrcNId) || !Ss.GetInt(1, DstNId)) {
520  if (!Ss.Eof()){
521  Ss.Next();
522  if (!Ss.Eof())
523  Marker = Ss.GetLnStr();
524  }
525  continue;
526  }
527  if (!Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
528  if (!Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
529  Graph->AddEdge(SrcNId, DstNId);
530  Ss.Next();
531  if (!Ss.Eof())
532  Marker = Ss.GetLnStr();
533  } while (Marker.GetCh(0) != '#' && !Ss.Eof());
534 
535 
536  if (Graph->GetNodes()>0) {
537  // WORK
538 
539  TSnap::DelSelfEdges(Graph);
540  TCnComV CmtyV;
541  //double Q = 0.0;
542  TStr CmtyAlgStr;
543  if (CmtyAlg == 1) {
544  CmtyAlgStr = "Girvan-Newman";
545  //Q = TSnap::CommunityGirvanNewman(Graph, CmtyV);
546  }
547  else if (CmtyAlg == 2) {
548  CmtyAlgStr = "Clauset-Newman-Moore";
549  //Q = TSnap::CommunityCNM(Graph, CmtyV);
550  }
551  else if (CmtyAlg == 3) {
552  CmtyAlgStr = "Infomap";
553  //Q = TSnap::Infomap(Graph, CmtyV);
554  }
555  else { Fail; }
556 
557  TIntIntHH distCont;
558 
559  if (br == 0) {
560  prev.Clr();
561  //int size = 0;
562  for (int c = 0; c < CmtyV.Len(); c++) {
563  for (int i = 0; i < CmtyV[c].Len(); i++){
564  prev.AddDat(CmtyV[c][i].Val, c);
565  }
566  //int s = CmtyV[c].Len();
567  prev_sizes.AddDat(c, CmtyV[c].Len());
568  }
569  }
570  else {
571 
572  // containers for statistics
573 
574  //TIntFltHH stat1;
575  //TIntIntHH stat2;
576  TIntH dist;
577  TIntH map;
578 
579  int first_new_c_id = -1;
580 
581  // getting first free id for a new community
582  for (THashKeyDatI<TInt, TInt> it = prev_sizes.BegI(); !it.IsEnd(); it++)
583  if (it.GetKey() > first_new_c_id)
584  first_new_c_id = it.GetKey();
585  if (CmtyV.Len() - 1>first_new_c_id)
586  first_new_c_id = CmtyV.Len() - 1;
587  first_new_c_id++;
588 
589  for (int c = 0; c < CmtyV.Len(); c++) {
590 
591  TIntV stat;
592  TIntFltH statH1;
593  TIntFltH statH2;
594 
595  // initialize distributions to 0
596  for (THashKeyDatI<TInt, TInt> it = prev_sizes.BegI(); !it.IsEnd(); it++)
597  dist.AddDat(it.GetKey(), 0);
598  //for new nodes
599  dist.AddDat(-1, 0);
600 
601  for (int i = 0; i < CmtyV[c].Len(); i++) {
602  int id = CmtyV[c][i].Val;
603  int prev_comm = -1;
604  if (prev.IsKey(id))
605  prev_comm = prev.GetDat(CmtyV[c][i].Val);
606  stat.Add(prev_comm);
607  int pre_val = dist.GetDat(prev_comm);
608  dist.AddDat(prev_comm, pre_val + 1);
609  }
610 
611  double sumstat2 = 0;
612  for (THashKeyDatI<TInt, TInt> it = dist.BegI(); !it.IsEnd(); it++) {
613 
614  int k = it.GetKey();
615  int d = it.GetDat();
616  if (d > 0){
617  if (prev_sizes.IsKey(it.GetKey())){
618 
619  double stat1_ = (double)d / (double)prev_sizes.GetDat(k);
620  statH1.AddDat(k, stat1_);
621  }
622  double stat2_ = (double)d / (double)CmtyV[c].Len();
623  statH2.AddDat(k, stat2_);
624  sumstat2 += stat2_;
625 
626  TIntV edge;
627  edge.Add(k);
628  edge.Add(c);
629  edge.Add(d);
630  edge.Add(br - 1);
631  edge.Add(br);
632  edges_.AddDat(edges_.Len() + 1, edge);
633  }
634 
635  // adding edges between two communities in two neighbouring time points;
636 
637 
638  if (sumstat2 > 0.98) break;
639  }
640 
641  int n_of_c_greater_than_half = 0;
642  int id_of_c_greater_than_half = -1;
643  TIntV ids_of_c_greater_than_half;
644 
645  for (THashKeyDatI<TInt, TFlt> it = statH1.BegI(); !it.IsEnd(); it++){
646  if (it.GetDat()>alpha){
647  id_of_c_greater_than_half = it.GetKey();
648  ids_of_c_greater_than_half.Add(it.GetKey());
649  n_of_c_greater_than_half++;
650  }
651  }
652 
653  // if this community is build of majority of one previous community and the other parts of the community are fractions of other communities smaller than half, the new community gets its label
654  if (n_of_c_greater_than_half == 1){
655  map.AddDat(c, id_of_c_greater_than_half);
656  }
657  else{
658  int h2part_id = -2;
659  for (int i = 0; i<ids_of_c_greater_than_half.Len(); i++){
660  double H2 = statH2.GetDat(ids_of_c_greater_than_half[i]);
661  if (H2>beta){
662  h2part_id = ids_of_c_greater_than_half[i];
663  }
664  }
665  if (h2part_id != -2)
666  map.AddDat(c, h2part_id);
667  else{
668  map.AddDat(c, first_new_c_id);
669  first_new_c_id++;
670  }
671  }
672 
673  distCont.AddDat(c, dist);
674 
675  //stat1.AddDat(c,statH1);
676  //stat2.AddDat(c,statH2);
677 
678  }
679 
680 
681  prev.Clr();
682  prev_sizes.Clr();
683  for (int c = 0; c < CmtyV.Len(); c++){
684  for (int i = 0; i < CmtyV[c].Len(); i++){
685  prev.AddDat(CmtyV[c][i].Val, map.GetDat(c));
686  }
687  //int s = CmtyV[c].Len();
688  prev_sizes.AddDat(map.GetDat(c), CmtyV[c].Len());
689  }
690 
691  // filing the edges container - the key thing is the map(c)
692  for (THashKeyDatI<TInt, TIntV> it = edges_.BegI(); !it.IsEnd(); it++){
693  TIntV edgesV;
694  int a = it.GetDat()[0];
695  int b = it.GetDat()[1];
696  int v = it.GetDat()[2];
697  int d = it.GetDat()[3];
698  int e = it.GetDat()[4];
699  edgesV.Add(map.GetDat(b));
700  edgesV.Add(a);
701  edgesV.Add(v);
702  edgesV.Add(d);
703  edgesV.Add(e);
704  if (a != -1)
705  edges.AddDat(edges.Len(), edgesV);
706  }
707  edges_.Clr();
708 
709 
710  }
711 
712  sizesCont.AddDat(br, prev_sizes);
713  cCont.AddDat(br, prev);
714  br++;
715  // WORK - END
716  }
717  }
718  else Ss.Next();
719  }
720 
721 }
722 
723 void CmtyEvolutionJson(TStr& Json, TIntIntVH& sizesContV, TIntIntVH& cContV, TIntIntVH& edges){
725  // This function creates a JSON string with communities and edges for community evolution visualization using D3.js
727 
728  // writing json label for edges
729  Json.InsStr(Json.Len(), "{\n\"edges\":[\n");
730 
731  TInt br = 0;
732  // iterating hash of vector of edges and writing into string
733  for (THashKeyDatI<TInt, TIntV> it = edges.BegI(); !it.IsEnd(); it++)
734  {
735  // first node
736  TInt n1 = it.GetDat()[1];
737  // second node
738  TInt n2 = it.GetDat()[0];
739  // edge weight
740  TInt w = it.GetDat()[2];
741  // start time point
742  TInt t0 = it.GetDat()[3];
743  // end time point
744  TInt t1 = it.GetDat()[4];
745 
746  if (br>0)
747  Json.InsStr(Json.Len(), ",");
748 
749  // writing to string
750  Json.InsStr(Json.Len(), "{\"n1\":"); Json.InsStr(Json.Len(), n1.GetStr());
751  Json.InsStr(Json.Len(), ", \"n2\":"); Json.InsStr(Json.Len(), n2.GetStr());
752  Json.InsStr(Json.Len(), ", \"w\":"); Json.InsStr(Json.Len(), w.GetStr());
753  Json.InsStr(Json.Len(), ", \"t0\":"); Json.InsStr(Json.Len(), t0.GetStr());
754  Json.InsStr(Json.Len(), ", \"t1\":"); Json.InsStr(Json.Len(), t1.GetStr());
755  Json.InsStr(Json.Len(), " }\n");
756  br++;
757  }
758 
759  // json label for communities
760  Json.InsStr(Json.Len(), "],\n\"communities\":[\n");
761 
762  br = 0;
763  // printing communities into json file
764  for (int i = 0; i < sizesContV[0].Len(); i++)
765  {
766  for (THashKeyDatI<TInt, TIntV> it = sizesContV.BegI(); !it.IsEnd(); it++)
767  {
768  // id of community
769  TInt id = it.GetKey();
770  // community size
771  TInt size = it.GetDat()[i];
772  // time
773  TInt j = i;
774 
775  // if the community has size greater than 0, output it to json string
776  if (size > 0) {
777  if (br>0)
778  Json.InsStr(Json.Len(), ",");
779 
780  TInt size = it.GetDat()[i];
781  Json.InsStr(Json.Len(), "{\"id\":"); Json.InsStr(Json.Len(), id.GetStr());
782  Json.InsStr(Json.Len(), ", \"size\":"); Json.InsStr(Json.Len(), size.GetStr());
783  Json.InsStr(Json.Len(), ", \"t\":"); Json.InsStr(Json.Len(), j.GetStr());
784  Json.InsStr(Json.Len(), " }\n");
785 
786  br++;
787  }
788  }
789  }
790 
791  // printing communities into json file - alternative ordering
792  /*
793  for (THashKeyDatI<TInt, TIntV> it = sizesContV.BegI(); !it.IsEnd(); it++)
794  {
795  TInt id = it.GetKey();
796  int len = it.GetDat().Len();
797  for (int i=0; i < it.GetDat().Len(); i++)
798  {
799  TInt size = it.GetDat()[i];
800  TInt j = i;
801  if (size > 0) {
802 
803  if(br>0)
804  Json.InsStr(Json.Len(),",");
805 
806  TInt size = it.GetDat()[i];
807 
808  Json.InsStr(Json.Len(),"{\"id\":"); Json.InsStr(Json.Len(),id.GetStr());
809  Json.InsStr(Json.Len(),", \"size\":"); Json.InsStr(Json.Len(),size.GetStr());
810  Json.InsStr(Json.Len(),", \"t\":"); Json.InsStr(Json.Len(),j.GetStr());
811  Json.InsStr(Json.Len()," }\n");
812 
813  br++;
814 
815  }
816 
817  }
818  }
819  */
820 
821  Json.InsStr(Json.Len(), "]\n}");
822 
823 }
824 
825 TStr CmtyTest(TStr InFNm, int CmtyAlg){
826 
827  TIntIntVH sizesContV;
828  TIntIntVH cContV;
829  TIntIntVH edges;
830  double alpha = 0.5;
831  double beta = 0.75;
832  CmtyEvolutionFileBatchV(InFNm, sizesContV, cContV, edges, alpha, beta, CmtyAlg);
833  TStr out;
834  //int a = sizesContV.Len();
835  //int b = cContV.Len();
836  //int c = edges.Len();
837  CmtyEvolutionJson(out, sizesContV, cContV, edges);
838 
839  return out;
840 }
841 
842 void ReebSimplify(PNGraph& Graph, TIntH& t, int e, PNGraph& gFinal, TIntH& tFinal, bool collapse) {
843  TIntIntVH components;
844  TIntIntVH ct;
845 
846  int newId = 0; //get first new free id;
847 
848  // gett first and last t
849  int first = 429496729;
850  int last = -1;
851 
852  // smarter way of determining focus time points
853  TIntV timePoints;
854 
855  // get first and last time point
856  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
857  if (it.GetDat()<first)
858  first = it.GetDat();
859  if (it.GetDat()>last)
860  last = it.GetDat();
861  }
862 
863  // adding focus timepoints
864  // this can be put in the previous (first, last time point detection) iteration if breaking borders is not an issue
865  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
866  if (it.GetDat() - (e / 2) >= first)
867  timePoints.Add(it.GetDat() - (e / 2) /*- 0.1*/);
868  timePoints.Add(it.GetDat());
869  if (it.GetDat() + (e / 2) <= last)
870  timePoints.Add(it.GetDat() + (e / 2) /*+ 0.1*/);
871  }
872 
873 
874  //iterate each time point
875  for (int i = 0; i<timePoints.Len(); i++) {
876 
877  int focusTimePoint = timePoints[i];
878 
879  TIntV fnodes; // all the nodes int the focus in that step
880 
881  // getting nodes in focus -- in epsilon
882  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
883  if ((it.GetDat() <= focusTimePoint + (e / 2)) && (it.GetDat() >= focusTimePoint - (e / 2)))
884  fnodes.Add(it.GetKey());
885  }
886 
887  // create graph from nodes in focus
888  PNGraph g1 = TNGraph::New();
889  for (int i = 0; i<fnodes.Len(); i++) {
890  if (!g1->IsNode(fnodes[i]))
891  g1->AddNode(fnodes[i]);
892  // lower star
893  for (int j = 0; j<Graph->GetNI(fnodes[i]).GetInDeg(); j++) {
894  int NeighId = Graph->GetNI(fnodes[i]).GetInNId(j);
895  if (t.GetDat(NeighId)<focusTimePoint - (e / 2)) {
896 
897  }
898  else {
899  if (!g1->IsNode(NeighId))
900  g1->AddNode(NeighId);
901  g1->AddEdge(NeighId, fnodes[i]);
902  }
903  }
904  // upper star
905  for (int j = 0; j<Graph->GetNI(fnodes[i]).GetOutDeg(); j++) {
906  int NeighId = Graph->GetNI(fnodes[i]).GetOutNId(j);
907  if (t.GetDat(NeighId)>focusTimePoint + (e / 2)) {
908 
909  }
910  else {
911  if (!g1->IsNode(NeighId))
912  g1->AddNode(NeighId);
913  g1->AddEdge(fnodes[i], NeighId);
914  }
915  }
916  }
917 
918  // getting results from commponents detection and recording elements of components and timestamps of components
919  TCnComV CnComV;
920  GetWccs(g1, CnComV);
921  TIntV communitiesAtT;
922  for (int cc = 0; cc < CnComV.Len(); cc++) {
923  components.AddDat(newId, CnComV[cc].NIdV);
924  communitiesAtT.Add(newId);
925  newId++;
926  }
927  if (CnComV.Len() > 0)
928  ct.AddDat(focusTimePoint, communitiesAtT);
929  } // end iterate each node
930 
931  // connecting neighbouring components
933  THashKeyDatI<TInt, TIntV> prelast = ct.EndI()--;
934  prelast--;
935  while (it < prelast) {
936  TIntV cms0;
937  TIntV cms1;
938  int focusTimePoint;
939  int focusTimePoint1;
940  focusTimePoint = it.GetKey();
941  cms0 = it.GetDat();
942  it++;
943  focusTimePoint1 = it.GetKey();
944  cms1 = it.GetDat();
945  if (cms0.Len()>0 && cms1.Len() > 0) {
946  for (int i = 0; i < cms0.Len(); i++) {
947  for (int j = 0; j < cms1.Len(); j++) {
948  TIntV ids0 = components.GetDat(cms0[i]);
949  TIntV ids1 = components.GetDat(cms1[j]);
950  if (ids0.IntrsLen(ids1) > 0 || TSnapDetail::edgeIntersect(Graph, ids0, ids1)) {
951  if (!gFinal->IsNode(cms0[i])) {
952  gFinal->AddNode(cms0[i]);
953  tFinal.AddDat(cms0[i], focusTimePoint);
954  }
955  if (!gFinal->IsNode(cms1[j])) {
956  gFinal->AddNode(cms1[j]);
957  tFinal.AddDat(cms1[j], focusTimePoint1);
958  }
959  gFinal->AddEdge(cms0[i], cms1[j]);
960  }
961  }
962  }
963  }
964  }// end connecting components
965 
966  // collapsing chains
967  if (collapse) {
968  for (TNGraph::TNodeI NI = gFinal->BegNI(); NI < gFinal->EndNI(); NI++) {
969  if (NI.GetInDeg() == 1 && NI.GetOutDeg() == 1)
970  if (gFinal->GetNI(NI.GetInNId(0)).GetOutDeg() == 1 && gFinal->GetNI(NI.GetOutNId(0)).GetInDeg() == 1)
971  {
972  gFinal->AddEdge(NI.GetInNId(0), NI.GetOutNId(0));
973  gFinal->DelEdge(NI.GetInNId(0), NI.GetId());
974  tFinal.DelKey(NI.GetId());
975  gFinal->DelNode(NI.GetId());
976  }
977  }
978  }// end collapsing
979 
980 }
981 
982 void ReebRefine(PNGraph& Graph, TIntH& t, int e, PNGraph& gFinal, TIntH& tFinal, bool collapse) {
983  TIntIntVH components;
984  TIntIntVH ct;
985 
986  int newId = 0; //get first new free id;
987 
988  // gett first and last t
989  int first = 429496729;
990  int last = -1;
991 
992  // smarter way of determining focus time points
993  TIntV timePoints;
994 
995  // get first and last time point
996  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
997  if (it.GetDat() < first)
998  first = it.GetDat();
999  if (it.GetDat() > last)
1000  last = it.GetDat();
1001  }
1002 
1003  // adding focus timepoints
1004  // this can be put in the previous (first, last time point detection) iteration if breaking borders is not an issue
1005  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
1006  if (it.GetDat() - (e / 2) >= first)
1007  timePoints.Add(it.GetDat() - (e / 2) /*- 0.1*/);
1008  timePoints.Add(it.GetDat());
1009  if (it.GetDat() + (e / 2) <= last)
1010  timePoints.Add(it.GetDat() + (e / 2) /*+ 0.1*/);
1011  }
1012 
1013  TIntV timePointsUnique;
1014  int prevtp = -1;
1015  //get unique time points
1016  for (int i = 0; i < timePoints.Len(); i++){
1017  if (timePoints[i] > prevtp)
1018  timePointsUnique.Add(timePoints[i]);
1019  prevtp = timePoints[i];
1020  }
1021 
1022  timePoints.Clr();
1023  timePoints = timePointsUnique;
1024 
1025  //iterate each time point
1026  for (int i = 0; i < timePoints.Len(); i++) {
1027 
1028  int focusTimePoint = timePoints[i];
1029 
1030  TIntV fnodes; // all the nodes int the focus in that step
1031 
1032  // getting nodes in focus -- in epsilon
1033  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
1034  if ((it.GetDat() <= focusTimePoint + (e / 2)) && (it.GetDat() >= focusTimePoint - (e / 2)))
1035  fnodes.Add(it.GetKey());
1036  }
1037 
1038  // create graph from nodes in focus
1039  PNGraph g1 = TNGraph::New();
1040  for (int i = 0; i < fnodes.Len(); i++) {
1041  if (!g1->IsNode(fnodes[i]))
1042  g1->AddNode(fnodes[i]);
1043  // lower star
1044  for (int j = 0; j < Graph->GetNI(fnodes[i]).GetInDeg(); j++) {
1045  int NeighId = Graph->GetNI(fnodes[i]).GetInNId(j);
1046  if (t.GetDat(NeighId) < focusTimePoint - (e / 2)) {
1047 
1048  }
1049  else {
1050  if (!g1->IsNode(NeighId))
1051  g1->AddNode(NeighId);
1052  g1->AddEdge(NeighId, fnodes[i]);
1053  }
1054  }
1055  // upper star
1056  for (int j = 0; j < Graph->GetNI(fnodes[i]).GetOutDeg(); j++) {
1057  int NeighId = Graph->GetNI(fnodes[i]).GetOutNId(j);
1058  if (t.GetDat(NeighId) > focusTimePoint + (e / 2)) {
1059 
1060  }
1061  else {
1062  if (!g1->IsNode(NeighId))
1063  g1->AddNode(NeighId);
1064  g1->AddEdge(fnodes[i], NeighId);
1065  }
1066  }
1067  }
1068 
1069  // getting results from commponents detection and recording elements of components and timestamps of components
1070  TIntH inCompCount;
1071  TIntIntVH comps;
1072  int compBr = 0;
1073  TIntH nn_nodes;
1074 
1075  int FTP = focusTimePoint;
1076  TIntH TEdges;
1077 
1078  for (TNGraph::TNodeI NI = g1->BegNI(); NI < g1->EndNI(); NI++) {
1079 
1080 
1081  int FTPNode = NI.GetId();
1082  TNGraph::TNodeI GNI = Graph->GetNI(FTPNode);
1083  int FI, FO, RI, RO, I, O;
1084 
1085  RI = NI.GetInDeg();
1086  RO = NI.GetOutDeg();
1087 
1088  FI = Graph->GetNI(FTPNode).GetInDeg() - RI;
1089  FO = Graph->GetNI(FTPNode).GetOutDeg() - RO;
1090 
1091  if (focusTimePoint + (e / 2) == t.GetDat(NI.GetId())) { // if its on the right edge only in degree is observed
1092  RO = FO = 0;
1093  }
1094  if (focusTimePoint - (e / 2) == t.GetDat(NI.GetId())) { // if its on the left edge only out degree is observed
1095  RI = FI = 0;
1096  }
1097 
1098  I = RI + FI;
1099  O = RO + FO;
1100 
1101  // counting edges imidiately after time point
1102  int temp = 0;
1103  if (TEdges.IsKey(FTP))
1104  temp = TEdges.GetDat(FTP);
1105  TEdges.AddDat(FTP, O + temp);
1106 
1107  // FIND ELEMENTS
1108 
1109  // n - n,
1110  if (I > 1 && O > 1) {
1111  // number of nodes is in our out degree
1112  int nn = I;
1113  if (O > I)
1114  nn = O;
1115 
1116  TIntV nds;
1117  nds.Add(FTPNode);
1118  for (int i = 0; i < I; i++) {
1119  nds.Add(GNI.GetInNId(i));
1120  }
1121 
1122  for (int i = 0; i < O; i++) {
1123  nds.Add(GNI.GetOutNId(i));
1124  }
1125 
1126  for (int j = 0; j < nn; j++) {
1127  nn_nodes.AddDat(compBr);
1128  comps.AddDat(compBr, nds);
1129  compBr++;
1130  }
1131  }
1132 
1133  // 1 - n
1134  else if (I == 1 && O > 1) {
1135  for (int i = 0; i < O; i++) {
1136  TIntV nds;
1137  nds.Add(FTPNode);
1138  nds.Add(GNI.GetInNId(0));
1139  nds.Add(GNI.GetOutNId(i));
1140  comps.AddDat(compBr, nds);
1141  compBr++;
1142  }
1143  }
1144 
1145  // n - 1
1146  else if (I > 1 && O == 1) {
1147  for (int i = 0; i < I; i++) {
1148  TIntV nds;
1149  nds.Add(FTPNode);
1150  nds.Add(GNI.GetOutNId(0));
1151  nds.Add(GNI.GetInNId(i));
1152  comps.AddDat(compBr, nds);
1153  compBr++;
1154  }
1155  }
1156 
1157  // 0 - n
1158  else if (I == 0 && O > 1) {
1159  for (int i = 0; i < O; i++) {
1160  TIntV nds;
1161  nds.Add(FTPNode);
1162  nds.Add(GNI.GetOutNId(i));
1163  comps.AddDat(compBr, nds);
1164  compBr++;
1165  }
1166  }
1167 
1168  // n - 0
1169  else if (I > 1 && O == 0) {
1170  for (int i = 0; i < I; i++) {
1171  TIntV nds;
1172  nds.Add(FTPNode);
1173  nds.Add(GNI.GetInNId(i));
1174  comps.AddDat(compBr, nds);
1175  compBr++;
1176  }
1177  }
1178 
1179  // 1 - 1
1180  else if (I == 1 && O == 1) {
1181  TIntV nds;
1182  nds.Add(FTPNode);
1183  nds.Add(GNI.GetOutNId(0));
1184  nds.Add(GNI.GetInNId(0));
1185  comps.AddDat(compBr, nds);
1186  compBr++;
1187  }
1188 
1189  // 0 - 1
1190  else if (I == 0 && O == 1) {
1191  TIntV nds;
1192  nds.Add(FTPNode);
1193  nds.Add(GNI.GetOutNId(0));
1194  comps.AddDat(compBr, nds);
1195  compBr++;
1196  }
1197 
1198  // 1 - 0
1199  else if (I == 1 && O == 0) {
1200  TIntV nds;
1201  nds.Add(FTPNode);
1202  nds.Add(GNI.GetInNId(0));
1203  comps.AddDat(compBr, nds);
1204  compBr++;
1205  }
1206 
1207 
1208 
1209  } // end iterate each node
1210 
1211  // connecting inside of epsilon
1212 
1213  TIntIntVH elements;
1214  TIntH banned;
1215  for (int cc0 = 0; cc0 < comps.Len(); cc0++) {
1216  for (int cc1 = cc0; cc1 < comps.Len(); cc1++) {
1217  int smaller = comps[cc0].Len();
1218  int smaller_id = cc0;
1219  if (cc0 != cc1) {
1220  if (comps[cc1].Len() < smaller) {
1221  smaller = comps[cc1].Len();
1222  smaller_id = cc1;
1223  }
1224  int vi = TSnapDetail::vectorIntersect(comps[cc0], comps[cc1]);
1225  if (vi == smaller && !nn_nodes.IsKey(smaller_id)){
1226  banned.AddDat(smaller_id);
1227  }
1228  /*else if (smaller > 2 && vi == smaller - 1 && !nn_nodes.IsKey(smaller_id)) {
1229  TSnapDetail::transitiveTransform(comps[cc0], comps[cc1]);
1230  banned.AddDat(cc0);
1231  }*/
1232  }
1233  }
1234  }
1235 
1236  // add transitivity connection
1237 
1238  /*
1239  int max_out_tp = -1;
1240  int max_out = -1;
1241  for (THashKeyDatI<TInt, TInt> it = TEdges.BegI(); !it.IsEnd(); it++) {
1242  if (it.GetDat() > max_out) {
1243  max_out = it.GetDat();
1244  max_out_tp = it.GetKey();
1245  }
1246  }
1247  */
1248  for (int cc0 = 0; cc0 < comps.Len(); cc0++) {
1249  if (!banned.IsKey(cc0) /*&& TSnapDetail::chekIfCrossing(comps[cc0], t, first, last, max_out_tp)*/)
1250  elements.AddDat(cc0, comps[cc0]);
1251  }
1252 
1253 
1254  TIntV communitiesAtT;
1255  for (int cc = 0; cc < elements.Len(); cc++) {
1256  components.AddDat(newId, elements[cc]);
1257  communitiesAtT.Add(newId);
1258  newId++;
1259  }
1260  if (elements.Len() > 0)
1261  ct.AddDat(focusTimePoint, communitiesAtT);
1262 
1263  } // FOR
1264 
1265  // connecting neighbouring components
1266  THashKeyDatI<TInt, TIntV> it = ct.BegI();
1267  THashKeyDatI<TInt, TIntV> prelast = ct.EndI()--;
1268  prelast--;
1269  while (it < prelast) {
1270  TIntV cms0;
1271  TIntV cms1;
1272  int focusTimePoint;
1273  int focusTimePoint1;
1274  focusTimePoint = it.GetKey();
1275  cms0 = it.GetDat();
1276  it++;
1277  focusTimePoint1 = it.GetKey();
1278  cms1 = it.GetDat();
1279  if (cms0.Len() > 0 && cms1.Len() > 0) {
1280  for (int i = 0; i < cms0.Len(); i++) {
1281  for (int j = 0; j < cms1.Len(); j++) {
1282  TIntV ids0 = components.GetDat(cms0[i]);
1283  TIntV ids1 = components.GetDat(cms1[j]);
1284  int smaller = ids0.Len();
1285  if (ids1.Len() < smaller)
1286  smaller = ids1.Len();
1287 
1288  if (TSnapDetail::vectorIntersect(ids0, ids1) == smaller || (smaller > 2 && TSnapDetail::vectorIntersect(ids0, ids1) == (smaller -1 ))) {
1289  if (!gFinal->IsNode(cms0[i])) {
1290  gFinal->AddNode(cms0[i]);
1291  tFinal.AddDat(cms0[i], focusTimePoint);
1292  }
1293  if (!gFinal->IsNode(cms1[j])) {
1294  gFinal->AddNode(cms1[j]);
1295  tFinal.AddDat(cms1[j], focusTimePoint1);
1296  }
1297  gFinal->AddEdge(cms0[i], cms1[j]);
1298  }
1299  }
1300  }
1301  }
1302  }// end connecting components
1303 
1304  // collapsing chains
1305  if (collapse) {
1306  for (TNGraph::TNodeI NI = gFinal->BegNI(); NI < gFinal->EndNI(); NI++) {
1307  if (NI.GetInDeg() == 1 && NI.GetOutDeg() == 1)
1308  if (gFinal->GetNI(NI.GetInNId(0)).GetOutDeg() == 1 && gFinal->GetNI(NI.GetOutNId(0)).GetInDeg() == 1)
1309  {
1310  gFinal->AddEdge(NI.GetInNId(0), NI.GetOutNId(0));
1311  gFinal->DelEdge(NI.GetInNId(0), NI.GetId());
1312  tFinal.DelKey(NI.GetId());
1313  gFinal->DelNode(NI.GetId());
1314  }
1315  }
1316  }// end collapsing
1317 
1318 }
1319 
1320 namespace TSnapDetail {
1325 private:
1326  struct TCmtyDat {
1327  double DegFrac;
1329  int MxQId;
1330  TCmtyDat() : MxQId(-1) { }
1331  TCmtyDat(const double& NodeDegFrac, const int& OutDeg) :
1332  DegFrac(NodeDegFrac), NIdQH(OutDeg), MxQId(-1) { }
1333  void AddQ(const int& NId, const double& Q) {
1334  NIdQH.AddDat(NId, Q);
1335  if (MxQId == -1 || NIdQH[MxQId]<Q) { MxQId = NIdQH.GetKeyId(NId); }
1336  }
1337  void UpdateMaxQ() {
1338  MxQId = -1;
1339  for (int i = -1; NIdQH.FNextKeyId(i);) {
1340  if (MxQId == -1 || NIdQH[MxQId]< NIdQH[i]) { MxQId = i; }
1341  }
1342  }
1343  void DelLink(const int& K) {
1344  const int NId = GetMxQNId();
1345  NIdQH.DelKey(K); if (NId == K) { UpdateMaxQ(); }
1346  }
1347  int GetMxQNId() const { return NIdQH.GetKey(MxQId); }
1348  double GetMxQ() const { return NIdQH[MxQId]; }
1349  };
1350 private:
1354  double Q;
1355 public:
1356  TCNMQMatrix(const PUNGraph& Graph) : CmtyQH(Graph->GetNodes()),
1357  MxQHeap(Graph->GetNodes()), CmtyIdUF(Graph->GetNodes()) {
1358  Init(Graph);
1359  }
1360  void Init(const PUNGraph& Graph) {
1361  const double M = 0.5 / Graph->GetEdges(); // 1/2m
1362  Q = 0.0;
1363  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
1364  CmtyIdUF.Add(NI.GetId());
1365  const int OutDeg = NI.GetOutDeg();
1366  if (OutDeg == 0) { continue; }
1367  TCmtyDat& Dat = CmtyQH.AddDat(NI.GetId(), TCmtyDat(M * OutDeg, OutDeg));
1368  for (int e = 0; e < NI.GetOutDeg(); e++) {
1369  const int DstNId = NI.GetOutNId(e);
1370  const double DstMod = 2 * M * (1.0 - OutDeg * Graph->GetNI(DstNId).GetOutDeg() * M);
1371  Dat.AddQ(DstNId, DstMod);
1372  }
1373  Q += -1.0*TMath::Sqr(OutDeg*M);
1374  if (NI.GetId() < Dat.GetMxQNId()) {
1375  MxQHeap.Add(TFltIntIntTr(Dat.GetMxQ(), NI.GetId(), Dat.GetMxQNId()));
1376  }
1377  }
1378  MxQHeap.MakeHeap();
1379  }
1381  while (true) {
1382  if (MxQHeap.Empty()) { break; }
1383  const TFltIntIntTr TopQ = MxQHeap.PopHeap();
1384  if (!CmtyQH.IsKey(TopQ.Val2) || !CmtyQH.IsKey(TopQ.Val3)) { continue; }
1385  if (TopQ.Val1 != CmtyQH.GetDat(TopQ.Val2).GetMxQ() && TopQ.Val1 != CmtyQH.GetDat(TopQ.Val3).GetMxQ()) { continue; }
1386  return TopQ;
1387  }
1388  return TFltIntIntTr(-1, -1, -1);
1389  }
1390 
1391  bool MergeBestQ() {
1392  const TFltIntIntTr TopQ = FindMxQEdge();
1393  if (TopQ.Val1 <= 0.0) { return false; }
1394  // joint communities
1395  const int I = TopQ.Val3;
1396  const int J = TopQ.Val2;
1397  CmtyIdUF.Union(I, J); // join
1398  Q += TopQ.Val1;
1399  TCmtyDat& DatJ = CmtyQH.GetDat(J);
1400  { TCmtyDat& DatI = CmtyQH.GetDat(I);
1401  DatI.DelLink(J); DatJ.DelLink(I);
1402  for (int i = -1; DatJ.NIdQH.FNextKeyId(i); ) {
1403  const int K = DatJ.NIdQH.GetKey(i);
1404  TCmtyDat& DatK = CmtyQH.GetDat(K);
1405  double NewQ = DatJ.NIdQH[i];
1406  if (DatI.NIdQH.IsKey(K)) { NewQ = NewQ + DatI.NIdQH.GetDat(K); DatK.DelLink(I); } // K connected to I and J
1407  else { NewQ = NewQ - 2 * DatI.DegFrac*DatK.DegFrac; } // K connected to J not I
1408  DatJ.AddQ(K, NewQ);
1409  DatK.AddQ(J, NewQ);
1410  MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J, K), TMath::Mx(J, K)));
1411  }
1412  for (int i = -1; DatI.NIdQH.FNextKeyId(i); ) {
1413  const int K = DatI.NIdQH.GetKey(i);
1414  if (!DatJ.NIdQH.IsKey(K)) { // K connected to I not J
1415  TCmtyDat& DatK = CmtyQH.GetDat(K);
1416  const double NewQ = DatI.NIdQH[i] - 2 * DatJ.DegFrac*DatK.DegFrac;
1417  DatJ.AddQ(K, NewQ);
1418  DatK.DelLink(I);
1419  DatK.AddQ(J, NewQ);
1420  MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J, K), TMath::Mx(J, K)));
1421  }
1422  }
1423  DatJ.DegFrac += DatI.DegFrac; }
1424  if (DatJ.NIdQH.Empty()) { CmtyQH.DelKey(J); } // isolated community (done)
1425  CmtyQH.DelKey(I);
1426  return true;
1427  }
1428  static double CmtyCMN(const PUNGraph& Graph, TCnComV& CmtyV) {
1429  TCNMQMatrix QMatrix(Graph);
1430  // maximize modularity
1431  while (QMatrix.MergeBestQ()) {}
1432  // reconstruct communities
1433  THash<TInt, TIntV> IdCmtyH;
1434  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
1435  IdCmtyH.AddDat(QMatrix.CmtyIdUF.Find(NI.GetId())).Add(NI.GetId());
1436  }
1437  CmtyV.Gen(IdCmtyH.Len());
1438  for (int j = 0; j < IdCmtyH.Len(); j++) {
1439  CmtyV[j].NIdV.Swap(IdCmtyH[j]);
1440  }
1441  return QMatrix.Q;
1442  }
1443 };
1444 
1445 } // namespace TSnapDetail
1446 
1447 double CommunityCNM(const PUNGraph& Graph, TCnComV& CmtyV) {
1448  return TSnapDetail::TCNMQMatrix::CmtyCMN(Graph, CmtyV);
1449 }
1450 
1451 }; //namespace TSnap
double CommunityGirvanNewman(PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:312
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
void Randomize()
Definition: dt.h:60
TStr GetStr() const
Definition: dt.h:1107
int Add(const int &Key)
Adds an element Key to the structure.
Definition: gbase.h:241
int Len() const
Definition: dt.h:487
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:479
void Add(const int &NodeId)
Definition: cncom.h:104
int GetHops(const int &SrcNId, const int &DstNId) const
Definition: bfsdfs.h:285
void Union(const int &Key1, const int &Key2)
Merges sets with elements Key1 and Key2.
Definition: gbase.cpp:40
TFltIntIntTr FindMxQEdge()
Definition: cmty.cpp:1380
Definition: dt.h:11
Definition: ds.h:129
static const T & Mx(const T &LVal, const T &RVal)
Definition: xmath.h:32
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:425
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:483
bool IsIn(const TVal &Val) const
Checks whether element Val is a member of the vector.
Definition: ds.h:797
void GetNodeWcc(const PGraph &Graph, const int &NId, TIntV &CnCom)
Returns (via output parameter CnCom) all nodes that are in the same connected component as node NId...
Definition: cncom.h:277
static const int Mx
Definition: dt.h:1049
TCNMQMatrix(const PUNGraph &Graph)
Definition: cmty.cpp:1356
#define Fail
Definition: bd.h:238
int DoBfs(const int &StartNode, const bool &FollowOut, const bool &FollowIn, const int &TargetNId=-1, const int &MxDist=TInt::Mx)
Performs BFS from node id StartNode for at maps MxDist steps by only following in-links (parameter Fo...
Definition: bfsdfs.h:119
TIter BegI() const
Definition: hash.h:171
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:8
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
double InfomapOnline(PUNGraph &Graph, int n1, int n2, TIntFltH &PAlpha, double &SumPAlphaLogPAlpha, TIntFltH &Qi, TIntH &Module, int &Br, TCnComV &CmtyV)
Definition: cmty.cpp:417
void ReebSimplify(PNGraph &Graph, TIntH &t, int e, PNGraph &gFinal, TIntH &tFinal, bool collapse)
Definition: cmty.cpp:842
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:64
bool Empty() const
Definition: hash.h:185
Definition: ss.h:72
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:236
void GetBetweennessCentr(const PGraph &Graph, TIntFltH &NIdBtwH, const bool &IsDir=false, const double &NodeFrac=1.0)
Definition: centr.h:450
TVal1 Val1
Definition: ds.h:131
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
Definition: ss.cpp:447
TChA GetLnStr() const
Returns the current line.
Definition: ss.h:124
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
static double Sqr(const double &x)
Definition: xmath.h:12
TIter EndI() const
Definition: hash.h:176
const TKey & GetKey() const
Definition: hash.h:71
void DelNode(const int &NId)
Deletes node of ID NId from the graph.
Definition: graph.cpp:294
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1047
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:86
void DelKey(const TKey &Key)
Definition: hash.h:362
TVal2 Val2
Definition: ds.h:132
bool Eof() const
Checks for end of file.
Definition: ss.h:122
int GetOutDeg() const
Returns out-degree of the current node (returns same as value GetDeg() since the graph is undirected)...
Definition: graph.h:90
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:971
int Find(const int &Key)
Returns the set that contains element Key.
Definition: gbase.cpp:23
const TDat & GetDat() const
Definition: hash.h:72
Definition: cncom.h:88
bool IsEnd() const
Tests whether the iterator is pointing to the past-end element.
Definition: hash.h:69
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node IDs SrcNId to node DstNId to the graph.
Definition: graph.cpp:321
void CmtyEvolutionFileBatch(TStr InFNm, TIntIntHH &sizesCont, TIntIntHH &cCont, TIntIntVH &edges, double alpha, double beta, int CmtyAlg)
Definition: cmty.cpp:488
void DelEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true)
Deletes an edge from node IDs SrcNId to DstNId from the graph.
Definition: graph.cpp:345
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the graph.
Definition: graph.cpp:363
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:807
bool inComp(PNGraph &g1, PNGraph &Graph, TIntH &inCompCount, int id, int neigh)
Definition: cmty.cpp:150
Simple heap data structure.
Definition: gbase.h:256
Whitespace (space or tab) separated.
Definition: ss.h:11
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:213
bool FNextKeyId(int &KeyId) const
Definition: hash.h:436
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:477
bool chekIfCrossing(TIntV &a, TIntH &t, int f, int l, int TP)
Definition: cmty.cpp:195
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:155
void CmtyEvolutionFileBatchV(TStr InFNm, TIntIntVH &sizesContV, TIntIntVH &cContV, TIntIntVH &edges, double alpha, double beta, int CmtyAlg)
Definition: cmty.cpp:439
TStr CmtyTest(TStr InFNm, int CmtyAlg)
Definition: cmty.cpp:825
void DelEdge(const int &SrcNId, const int &DstNId)
Deletes an edge between node IDs SrcNId and DstNId from the graph.
Definition: graph.cpp:124
TSizeTy IntrsLen(const TVec< TVal, TSizeTy > &ValV) const
Returns the size of the intersection of vectors this and ValV. Assumes the vectors are sorted! ...
Definition: ds.h:1414
char GetCh(const int &ChN) const
Definition: dt.h:483
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
Definition: graph.h:358
THeap< TFltIntIntTr > MxQHeap
Definition: cmty.cpp:1352
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: graph.cpp:92
Definition: dt.h:1044
int GetKeyId(const TKey &Key) const
Definition: hash.h:424
void CmtyEvolutionJson(TStr &Json, TIntIntVH &sizesContV, TIntIntVH &cContV, TIntIntVH &edges)
Definition: cmty.cpp:723
void MapEquationNew2Modules(PUNGraph &Graph, TIntH &Module, TIntFltH &Qi, int a, int b)
Definition: cmty.cpp:54
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:481
double InfomapOnlineIncrement(PUNGraph &Graph, int n1, int n2, TIntFltH &PAlpha, double &SumPAlphaLogPAlpha, TIntFltH &Qi, TIntH &Module, int &Br)
Definition: cmty.cpp:214
void AddQ(const int &NId, const double &Q)
Definition: cmty.cpp:1333
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:362
void transitiveTransform(TIntV &a, TIntV &b)
Definition: cmty.cpp:179
Definition: dt.h:412
double CommunityCNM(const PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:1447
void ReebRefine(PNGraph &Graph, TIntH &t, int e, PNGraph &gFinal, TIntH &tFinal, bool collapse)
Definition: cmty.cpp:982
void Shuffle(TRnd &Rnd)
Randomly shuffles the elements of the vector.
Definition: ds.h:1271
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:339
void Init(const PUNGraph &Graph)
Definition: cmty.cpp:1360
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:211
int vectorIntersect(TIntV &a, TIntV &b)
Definition: cmty.cpp:138
TVal1 Val1
Definition: ds.h:34
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:107
TVal2 Val2
Definition: ds.h:35
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:319
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:207
bool Next()
Loads next line from the input file.
Definition: ss.cpp:412
bool edgeIntersect(PNGraph &graph, TIntV &a, TIntV &b)
Definition: cmty.cpp:127
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:360
int GetId() const
Returns ID of the current node.
Definition: graph.h:84
bool IsKey(const TKey &Key) const
Definition: hash.h:216
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:368
TCmtyDat(const double &NodeDegFrac, const int &OutDeg)
Definition: cmty.cpp:1331
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:209
void DelSelfEdges(const PGraph &Graph)
Removes all the self-edges from the graph.
Definition: alg.h:419
double Equation(TIntFltH &PAlpha, double &SumPAlphaLogPAlpha, TIntFltH &Qi)
Definition: cmty.cpp:109
int Len() const
Definition: hash.h:186
void InsStr(const int &BChN, const TStr &Str)
Definition: dt.cpp:825
double Infomap(PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:337
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
static double CmtyCMN(const PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:1428
void GetWccs(const PGraph &Graph, TCnComV &CnComV)
Returns all weakly connected components in a Graph.
Definition: cncom.h:376
Union Find class (Disjoint-set data structure).
Definition: gbase.h:217
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:372
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
TTriple< TFlt, TInt, TInt > TFltIntIntTr
Definition: ds.h:181
TVal3 Val3
Definition: ds.h:133
double _GirvanNewmanGetModularity(const PUNGraph &G, const TIntH &OutDegH, const int &OrigEdges, TCnComV &CnComV)
Definition: cmty.cpp:37
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
THash< TInt, TCmtyDat > CmtyQH
Definition: cmty.cpp:1351
void CmtyGirvanNewmanStep(PUNGraph &Graph, TIntV &Cmty1, TIntV &Cmty2)
A single step of Girvan-Newman clustering procedure.
Definition: cmty.cpp:15
void SortByDat(const bool &Asc=true)
Definition: hash.h:250