SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
agm.cpp
Go to the documentation of this file.
1 #include "stdafx.h"
2 #include "Snap.h"
3 #include "agm.h"
4 #include "agmfit.h"
5 
7 // AGM graph generation.
8 
10 void TAGM::RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd){
11  int CNodes = CmtyV.Len(), CEdges;
12  if (CNodes < 20) {
13  CEdges = (int) Rnd.GetBinomialDev(Prob, CNodes * (CNodes-1) / 2);
14  } else {
15  CEdges = (int) (Prob * CNodes * (CNodes - 1) / 2);
16  }
17  THashSet<TIntPr> NewEdgeSet(CEdges);
18  for (int edge = 0; edge < CEdges; ) {
19  int SrcNId = CmtyV[Rnd.GetUniDevInt(CNodes)];
20  int DstNId = CmtyV[Rnd.GetUniDevInt(CNodes)];
21  if (SrcNId > DstNId) { Swap(SrcNId,DstNId); }
22  if (SrcNId != DstNId && ! NewEdgeSet.IsKey(TIntPr(SrcNId, DstNId))) { // is new edge
23  NewEdgeSet.AddKey(TIntPr(SrcNId, DstNId));
24  Graph->AddEdge(SrcNId, DstNId);
25  edge++;
26  }
27  }
28 }
29 
30 
31 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd){
32  PUNGraph TryG = TAGM::GenAGM(CmtyVV, DensityCoef, 1.0, Rnd);
33  const double ScaleCoef = (double) TargetEdges / (double) TryG->GetEdges();
34  return TAGM::GenAGM(CmtyVV, DensityCoef, ScaleCoef, Rnd);
35 }
36 
37 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd){
38  TFltV CProbV;
39  double Prob;
40  for (int i = 0; i < CmtyVV.Len(); i++) {
41  Prob = ScaleCoef*pow( double( CmtyVV[i].Len()), - DensityCoef);
42  if (Prob > 1.0) { Prob = 1; }
43  CProbV.Add(Prob);
44  }
45  return TAGM::GenAGM(CmtyVV, CProbV, Rnd);
46 }
47 
49 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom){
50  PUNGraph G = TUNGraph::New(100 * CmtyVV.Len(), -1);
51  printf("AGM begins\n");
52  for (int i = 0; i < CmtyVV.Len(); i++) {
53  TIntV& CmtyV = CmtyVV[i];
54  for (int u = 0; u < CmtyV.Len(); u++) {
55  if ( G->IsNode(CmtyV[u])) { continue; }
56  G->AddNode(CmtyV[u]);
57  }
58  double Prob = CProbV[i];
59  RndConnectInsideCommunity(G, CmtyV, Prob, Rnd);
60  }
61  if (PNoCom > 0.0) { //if we want to connect nodes that do not share any community
62  TIntSet NIDS;
63  for (int c = 0; c < CmtyVV.Len(); c++) {
64  for (int u = 0; u < CmtyVV[c].Len(); u++) {
65  NIDS.AddKey(CmtyVV[c][u]);
66  }
67  }
68  TIntV NIDV;
69  NIDS.GetKeyV(NIDV);
70  RndConnectInsideCommunity(G,NIDV,PNoCom,Rnd);
71  }
72  printf("AGM completed (%d nodes %d edges)\n",G->GetNodes(),G->GetEdges());
73  G->Defrag();
74  return G;
75 }
76 
79 
81 void TAGMUtil::GenPLSeq(TIntV& SzSeq, const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max) {
82  SzSeq.Gen(SeqLen, 0);
83  while (SzSeq.Len() < SeqLen) {
84  int Sz = (int) TMath::Round(Rnd.GetPowerDev(Alpha));
85  if (Sz >= Min && Sz <= Max) {
86  SzSeq.Add(Sz);
87  }
88  }
89 }
90 
92 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
93  TIntV NIDV(Nodes, 0);
94  for (int i = 0; i < Nodes; i++) {
95  NIDV.Add(i);
96  }
97  GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd);
98 }
99 
101 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const PUNGraph& Graph, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
102  if (Coms == 0 || Nodes == 0) {
103  CmtyVV.Clr();
104  return;
105  }
106  TIntV NIDV;
107  Graph->GetNIdV(NIDV);
108  GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd);
109 }
110 
112 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const TIntV& NIDV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
113  if (Coms == 0 || Nodes == 0) {
114  CmtyVV.Clr();
115  return;
116  }
117  TIntV ComSzSeq, MemSeq;
118  TAGMUtil::GenPLSeq(ComSzSeq,Coms,ComSzAlpha,Rnd,MinSz,MaxSz);
119  TAGMUtil::GenPLSeq(MemSeq,Nodes,MemAlpha,Rnd,MinK,MaxK);
120  TIntPrV CIDSzPrV, NIDMemPrV;
121  for (int i = 0; i < ComSzSeq.Len(); i++) {
122  CIDSzPrV.Add(TIntPr(i, ComSzSeq[i]));
123  }
124  for (int i = 0; i < MemSeq.Len(); i++) {
125  NIDMemPrV.Add(TIntPr(NIDV[i], MemSeq[i]));
126  }
127  TAGMUtil::ConnectCmtyVV(CmtyVV, CIDSzPrV, NIDMemPrV, Rnd);
128 }
129 
131 void TAGMUtil::ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd) {
132  const int Nodes = NIDMemPrV.Len(), Coms = CIDSzPrV.Len();
133  TIntV NDegV,CDegV;
134  TIntPrSet CNIDSet;
135  TIntSet HitNodes(Nodes);
136  THash<TInt,TIntV> CmtyVH;
137  for (int i = 0;i < CIDSzPrV.Len(); i++) {
138  for (int j = 0; j < CIDSzPrV[i].Val2; j++) {
139  CDegV.Add(CIDSzPrV[i].Val1);
140  }
141  }
142  for (int i = 0; i < NIDMemPrV.Len(); i++) {
143  for (int j = 0; j < NIDMemPrV[i].Val2; j++) {
144  NDegV.Add(NIDMemPrV[i].Val1);
145  }
146  }
147  while (CDegV.Len() < (int) (1.2 * Nodes)) {
148  CDegV.Add(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1);
149  }
150  while (NDegV.Len() < CDegV.Len()) {
151  NDegV.Add(NIDMemPrV[Rnd.GetUniDevInt(Nodes)].Val1);
152  }
153  printf("Total Mem: %d, Total Sz: %d\n",NDegV.Len(), CDegV.Len());
154  int c=0;
155  while (c++ < 15 && CDegV.Len() > 1) {
156  for (int i = 0; i < CDegV.Len(); i++) {
157  int u = Rnd.GetUniDevInt(CDegV.Len());
158  int v = Rnd.GetUniDevInt(NDegV.Len());
159  if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; }
160  CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
161  HitNodes.AddKey(NDegV[v]);
162  if (u == CDegV.Len() - 1) { CDegV.DelLast(); }
163  else {
164  CDegV[u] = CDegV.Last();
165  CDegV.DelLast();
166  }
167  if (v == NDegV.Len() - 1) { NDegV.DelLast(); }
168  else {
169  NDegV[v] = NDegV.Last();
170  NDegV.DelLast();
171  }
172  }
173  }
174  //make sure that every node belongs to at least one community
175  for (int i = 0; i < Nodes; i++) {
176  int NID = NIDMemPrV[i].Val1;
177  if (! HitNodes.IsKey(NID)) {
178  CNIDSet.AddKey(TIntPr(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1, NID));
179  HitNodes.AddKey(NID);
180  }
181  }
182  IAssert(HitNodes.Len() == Nodes);
183  for (int i = 0; i < CNIDSet.Len(); i++) {
184  TIntPr CNIDPr = CNIDSet[i];
185  CmtyVH.AddDat(CNIDPr.Val1);
186  CmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
187  }
188  CmtyVH.GetDatV(CmtyVV);
189 }
190 
192 void TAGMUtil::RewireCmtyVV(const TVec<TIntV>& CmtyVVIn, TVec<TIntV>& CmtyVVOut, TRnd& Rnd){
193  THash<TInt,TIntV> CmtyVH;
194  for (int i = 0; i < CmtyVVIn.Len(); i++) {
195  CmtyVH.AddDat(i, CmtyVVIn[i]);
196  }
197  TAGMUtil::RewireCmtyNID(CmtyVH, Rnd);
198  CmtyVH.GetDatV(CmtyVVOut);
199 }
200 
203  THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len());
204  TIntV NDegV;
205  TIntV CDegV;
206  for (int i = 0; i < CmtyVH.Len(); i++) {
207  int CID = CmtyVH.GetKey(i);
208  for (int j = 0; j < CmtyVH[i].Len(); j++) {
209  int NID = CmtyVH[i][j];
210  NDegV.Add(NID);
211  CDegV.Add(CID);
212  }
213  }
214  TIntPrSet CNIDSet(CDegV.Len());
215  int c=0;
216  while (c++ < 15 && CDegV.Len() > 1){
217  for (int i = 0; i < CDegV.Len(); i++) {
218  int u = Rnd.GetUniDevInt(CDegV.Len());
219  int v = Rnd.GetUniDevInt(NDegV.Len());
220  if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; }
221  CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
222  if (u == CDegV.Len() - 1) {
223  CDegV.DelLast();
224  } else {
225  CDegV[u] = CDegV.Last();
226  CDegV.DelLast();
227  }
228  if ( v == NDegV.Len() - 1) {
229  NDegV.DelLast();
230  } else{
231  NDegV[v] = NDegV.Last();
232  NDegV.DelLast();
233  }
234  }
235  }
236  for (int i = 0; i < CNIDSet.Len(); i++) {
237  TIntPr CNIDPr = CNIDSet[i];
238  IAssert(CmtyVH.IsKey(CNIDPr.Val1));
239  NewCmtyVH.AddDat(CNIDPr.Val1);
240  NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
241  }
242  CmtyVH = NewCmtyVH;
243 }
244 
246 void TAGMUtil::LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV) {
247  CmtyVV.Gen(Kilo(100), 0);
248  TSsParser Ss(InFNm, ssfWhiteSep);
249  while (Ss.Next()) {
250  if(Ss.GetFlds() > 0) {
251  TIntV CmtyV;
252  for (int i = 0; i < Ss.GetFlds(); i++) {
253  if (Ss.IsInt(i)) {
254  CmtyV.Add(Ss.GetInt(i));
255  }
256  }
257  CmtyVV.Add(CmtyV);
258  }
259  }
260  CmtyVV.Pack();
261  printf("community loading completed (%d communities)\n",CmtyVV.Len());
262 
263 }
264 
266 void TAGMUtil::LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV, TStrHash<TInt>& StrToNIdH, const int BeginCol, const int MinSz, const TSsFmt Sep) {
267  CmtyVV.Gen(Kilo(100), 0);
268  TSsParser Ss(InFNm, Sep);
269  while (Ss.Next()) {
270  if(Ss.GetFlds() > BeginCol) {
271  TIntV CmtyV;
272  for (int i = BeginCol; i < Ss.GetFlds(); i++) {
273  if (StrToNIdH.IsKey(Ss.GetFld(i))) {
274  CmtyV.Add(StrToNIdH.GetKeyId(Ss.GetFld(i)));
275  }
276  }
277  if (CmtyV.Len() < MinSz) { continue; }
278  CmtyVV.Add(CmtyV);
279  }
280  }
281  CmtyVV.Pack();
282  printf("community loading completed (%d communities)\n",CmtyVV.Len());
283 }
284 
286 void TAGMUtil::DumpCmtyVV(const TStr& OutFNm, const TVec<TIntV>& CmtyVV) {
287  FILE* F = fopen(OutFNm.CStr(),"wt");
288  for (int i = 0; i < CmtyVV.Len(); i++) {
289  for (int j = 0; j < CmtyVV[i].Len(); j++) {
290  fprintf(F,"%d\t", (int) CmtyVV[i][j]);
291  }
292  fprintf(F,"\n");
293  }
294  fclose(F);
295 }
296 
298 void TAGMUtil::DumpCmtyVV(const TStr OutFNm, TVec<TIntV>& CmtyVV, TIntStrH& NIDNmH) {
299  FILE* F = fopen(OutFNm.CStr(), "wt");
300  for (int c = 0; c < CmtyVV.Len(); c++) {
301  for (int u = 0; u < CmtyVV[c].Len(); u++) {
302  if (NIDNmH.IsKey(CmtyVV[c][u])){
303  fprintf(F, "%s\t", NIDNmH.GetDat(CmtyVV[c][u]).CStr());
304  }
305  else {
306  fprintf(F, "%d\t", (int) CmtyVV[c][u]);
307  }
308  }
309  fprintf(F, "\n");
310  }
311  fclose(F);
312 }
313 
316  int M = 0;
317  for (int i = 0; i < CmtyVV.Len(); i++) {
318  M += CmtyVV[i].Len();
319  }
320  return M;
321 }
322 
324 void TAGMUtil::GetNodeMembership(TIntH& NIDComVH, const THash<TInt,TIntV >& CmtyVH) {
325  NIDComVH.Clr();
326  for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){
327  for (int j = 0;j < HI.GetDat().Len(); j++) {
328  int NID = HI.GetDat()[j];
329  NIDComVH.AddDat(NID)++;
330  }
331  }
332 }
333 
336  NIDComVH.Clr();
337  for (int i = 0; i < CmtyVV.Len(); i++){
338  int CID = i;
339  for (int j = 0; j < CmtyVV[i].Len(); j++) {
340  int NID = CmtyVV[i][j];
341  NIDComVH.AddDat(NID).AddKey(CID);
342  }
343  }
344 }
345 
347 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV, const TIntV& NIDV) {
348  NIDComVH.Clr();
349  for (int u = 0; u < NIDV.Len(); u++) {
350  NIDComVH.AddDat(NIDV[u]);
351  }
352  for (int i = 0; i < CmtyVV.Len(); i++){
353  int CID = i;
354  for (int j = 0; j < CmtyVV[i].Len(); j++) {
355  int NID = CmtyVV[i][j];
356  NIDComVH.AddDat(NID).AddKey(CID);
357  }
358  }
359 }
360 
361 
363  for (int i = 0; i < CmtyVV.Len(); i++){
364  int CID = i;
365  for (TIntSet::TIter SI = CmtyVV[i].BegI(); SI < CmtyVV[i].EndI(); SI++) {
366  int NID = SI.GetKey();
367  NIDComVH.AddDat(NID).AddKey(CID);
368  }
369  }
370 }
372  for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){
373  int CID = HI.GetKey();
374  for (int j = 0; j < HI.GetDat().Len(); j++) {
375  int NID = HI.GetDat()[j];
376  NIDComVH.AddDat(NID).AddKey(CID);
377  }
378  }
379 }
380 
382  for (int i = 0; i < CmtyVH.Len(); i++){
383  int CID = CmtyVH.GetKey(i);
384  for (int j = 0; j < CmtyVH[i].Len(); j++) {
385  int NID = CmtyVH[i][j];
386  NIDComVH.AddDat(NID).Add(CID);
387  }
388  }
389 }
390 
392  THash<TInt,TIntV> CmtyVH;
393  for (int i = 0; i < CmtyVV.Len(); i++) {
394  CmtyVH.AddDat(i, CmtyVV[i]);
395  }
396  GetNodeMembership(NIDComVH, CmtyVH);
397 }
398 
399 int TAGMUtil::Intersection(const TIntV& C1, const TIntV& C2) {
400  TIntSet S1(C1), S2(C2);
401  return TAGMUtil::Intersection(S1, S2);
402 }
403 
405  C.Gen(A.Len());
406  if (A.Len() < B.Len()) {
407  for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++)
408  if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey());
409  } else {
410  for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++)
411  if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey());
412  }
413 }
414 
416  int n = 0;
417  if (A.Len() < B.Len()) {
418  for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++)
419  if (B.IsKey(it.GetKey())) n++;
420  } else {
421  for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++)
422  if (A.IsKey(it.GetKey())) n++;
423  }
424  return n;
425 }
426 
428 void TAGMUtil::SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV>& CmtyVVAtr, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH ) {
429  THash<TInt,TIntV> NIDComVHAtr;
430  TAGMUtil::GetNodeMembership(NIDComVHAtr, CmtyVVAtr);
431 
432  FILE* F = fopen(OutFNm.CStr(), "wt");
433  fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
434  fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
435  fprintf(F, "\t<graph mode='static' defaultedgetype='undirected'>\n");
436  if (CmtyVVAtr.Len() > 0) {
437  fprintf(F, "\t<attributes class='node'>\n");
438  for (int c = 0; c < CmtyVVAtr.Len(); c++) {
439  fprintf(F, "\t\t<attribute id='%d' title='c%d' type='boolean'>", c, c);
440  fprintf(F, "\t\t<default>false</default>\n");
441  fprintf(F, "\t\t</attribute>\n");
442  }
443  fprintf(F, "\t</attributes>\n");
444  }
445  fprintf(F, "\t\t<nodes>\n");
446  for (TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
447  int NID = NI.GetId();
448  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
449  TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
450 
451  double Size = MinSz;
452  double SizeStep = (MaxSz - MinSz) / (double) CmtyVVAtr.Len();
453  if (NIDComVHAtr.IsKey(NID)) {
454  Size = MinSz + SizeStep * (double) NIDComVHAtr.GetDat(NID).Len();
455  }
456  double Alpha = 1.0;
457  fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
458  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
459  fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
460  //specify attributes
461  if (NIDComVHAtr.IsKey(NID)) {
462  fprintf(F, "\t\t\t\t<attvalues>\n");
463  for (int c = 0; c < NIDComVHAtr.GetDat(NID).Len(); c++) {
464  int CID = NIDComVHAtr.GetDat(NID)[c];
465  fprintf(F, "\t\t\t\t\t<attvalue for='%d' value='true'/>\n", CID);
466  }
467  fprintf(F, "\t\t\t\t</attvalues>\n");
468  }
469 
470  fprintf(F, "\t\t\t</node>\n");
471  }
472  fprintf(F, "\t\t</nodes>\n");
473  //plot edges
474  int EID = 0;
475  fprintf(F, "\t\t<edges>\n");
476  for (TUNGraph::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
477  fprintf(F, "\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, EI.GetSrcNId(), EI.GetDstNId());
478  }
479  fprintf(F, "\t\t</edges>\n");
480  fprintf(F, "\t</graph>\n");
481  fprintf(F, "</gexf>\n");
482  fclose(F);
483 }
484 
486 void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) {
488  if (CmtyVV.Len() == 0) { return; }
489  double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0;
490  double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax;
491  double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len();
492  THash<TInt,TIntV> NIDComVH;
493  TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV);
494 
495  FILE* F = fopen(OutFNm.CStr(), "wt");
496  fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
497  fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
498  fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n");
499  fprintf(F, "\t\t<nodes>\n");
500  for (int c = 0; c < CmtyVV.Len(); c++) {
501  int CID = c;
502  double XPos = c * CStep + CXMin;
503  TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120);
504  fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID);
505  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val);
506  fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz);
507  fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
508  fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax);
509  fprintf(F, "\t\t\t</node>\n");
510  }
511 
512  for (int u = 0;u < NIDV.Len(); u++) {
513  int NID = NIDV[u];
514  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
515  double Size = MinSz;
516  double XPos = NXMin + u * NStep;
517  TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
518  double Alpha = 1.0;
519  fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
520  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
521  fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
522  fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
523  fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin);
524  fprintf(F, "\t\t\t</node>\n");
525  }
526  fprintf(F, "\t\t</nodes>\n");
527  fprintf(F, "\t\t<edges>\n");
528  int EID = 0;
529  for (int u = 0;u < NIDV.Len(); u++) {
530  int NID = NIDV[u];
531  if (NIDComVH.IsKey(NID)) {
532  for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) {
533  int CID = NIDComVH.GetDat(NID)[c];
534  fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID);
535  }
536  }
537  }
538  fprintf(F, "\t\t</edges>\n");
539  fprintf(F, "\t</graph>\n");
540  fprintf(F, "</gexf>\n");
541 }
542 
544 int TAGMUtil::FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom, const TStr PltFPrx) {
545  TRnd Rnd(RndSeed);
546  int LambdaIter = 100;
547  if (Graph->GetNodes() < 200) { LambdaIter = 1; }
548  if (Graph->GetNodes() < 200 && Graph->GetEdges() > 2000) { LambdaIter = 100; }
549 
550  //Find coms with large C
551  TAGMFit AGMFitM(Graph, InitComs, RndSeed);
552  if (PNoCom > 0.0) { AGMFitM.SetPNoCom(PNoCom); }
553  AGMFitM.RunMCMC(MaxIter, LambdaIter, "");
554 
555  int TE = Graph->GetEdges();
556  TFltV RegV;
557  RegV.Add(0.3 * TE);
558  for (int r = 0; r < 25; r++) {
559  RegV.Add(RegV.Last() * RegGap);
560  }
561  TFltPrV RegComsV, RegLV, RegBICV;
562  TFltV LV, BICV;
563  //record likelihood and number of communities with nonzero P_c
564  for (int r = 0; r < RegV.Len(); r++) {
565  double RegCoef = RegV[r];
566  AGMFitM.SetRegCoef(RegCoef);
567  AGMFitM.MLEGradAscentGivenCAG(0.01, 1000);
568  AGMFitM.SetRegCoef(0.0);
569 
570  TVec<TIntV> EstCmtyVV;
571  AGMFitM.GetCmtyVV(EstCmtyVV, 0.99);
572  int NumLowQ = EstCmtyVV.Len();
573  RegComsV.Add(TFltPr(RegCoef, (double) NumLowQ));
574 
575  if (EstCmtyVV.Len() > 0) {
576  TAGMFit AFTemp(Graph, EstCmtyVV, Rnd);
577  AFTemp.MLEGradAscentGivenCAG(0.001, 1000);
578  double CurL = AFTemp.Likelihood();
579  LV.Add(CurL);
580  BICV.Add(-2.0 * CurL + (double) EstCmtyVV.Len() * log((double) Graph->GetNodes() * (Graph->GetNodes() - 1) / 2.0));
581  }
582  else {
583  break;
584  }
585  }
586  // if likelihood does not exist or does not change at all, report the smallest number of communities or 2
587  if (LV.Len() == 0) { return 2; }
588  else if (LV[0] == LV.Last()) { return (int) TMath::Mx<TFlt>(2.0, RegComsV[LV.Len() - 1].Val2); }
589 
590 
591  //normalize likelihood and BIC to 0~100
592  int MaxL = 100;
593  {
594  TFltV& ValueV = LV;
595  TFltPrV& RegValueV = RegLV;
596  double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
597  for (int l = 0; l < ValueV.Len(); l++) {
598  if (ValueV[l] < MinValue) { MinValue = ValueV[l]; }
599  if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; }
600  }
601  while (ValueV.Len() < RegV.Len()) { ValueV.Add(MinValue); }
602  double RangeVal = MaxValue - MinValue;
603  for (int l = 0; l < ValueV.Len(); l++) {
604  RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
605  }
606 
607  }
608  {
609  TFltV& ValueV = BICV;
610  TFltPrV& RegValueV = RegBICV;
611  double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
612  for (int l = 0; l < ValueV.Len(); l++) {
613  if (ValueV[l] < MinValue) { MinValue = ValueV[l]; }
614  if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; }
615  }
616  while (ValueV.Len() < RegV.Len()) { ValueV.Add(MaxValue); }
617  double RangeVal = MaxValue - MinValue;
618  for (int l = 0; l < ValueV.Len(); l++) {
619  RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
620  }
621  }
622 
623  //fit logistic regression to normalized likelihood.
624  TVec<TFltV> XV(RegLV.Len());
625  TFltV YV (RegLV.Len());
626  for (int l = 0; l < RegLV.Len(); l++) {
627  XV[l] = TFltV::GetV(log(RegLV[l].Val1));
628  YV[l] = RegLV[l].Val2 / (double) MaxL;
629  }
630  TFltPrV LRVScaled, LRV;
631  TLogRegFit LRFit;
632  PLogRegPredict LRMd = LRFit.CalcLogRegNewton(XV, YV, PltFPrx);
633  for (int l = 0; l < RegLV.Len(); l++) {
634  LRV.Add(TFltPr(RegV[l], LRMd->GetCfy(XV[l])));
635  LRVScaled.Add(TFltPr(RegV[l], double(MaxL) * LRV.Last().Val2));
636  }
637 
638  //estimate # communities from fitted logistic regression
639  int NumComs = 0, IdxRegDrop = 0;
640  double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25
641  double LeftReg = 0.0, RightReg = 0.0;
642  TFltV Theta;
643  LRMd->GetTheta(Theta);
644  RegDrop = (- Theta[1] - LRThres) / Theta[0];
645  if (RegDrop <= XV[0][0]) { NumComs = (int) RegComsV[0].Val2; }
646  else if (RegDrop >= XV.Last()[0]) { NumComs = (int) RegComsV.Last().Val2; }
647  else { //interpolate for RegDrop
648  for (int i = 0; i < XV.Len(); i++) {
649  if (XV[i][0] > RegDrop) { IdxRegDrop = i; break; }
650  }
651 
652  if (IdxRegDrop == 0) {
653  printf("Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta[0].Val, Theta[1].Val);
654  for (int l = 0; l < RegLV.Len(); l++) {
655  printf("X[%d]:%f, Y[%d]:%f\n", l, XV[l][0].Val, l, YV[l].Val);
656  }
657  }
658  IAssert(IdxRegDrop > 0);
659  LeftReg = RegDrop - XV[IdxRegDrop - 1][0];
660  RightReg = XV[IdxRegDrop][0] - RegDrop;
661  NumComs = (int) TMath::Round( (RightReg * RegComsV[IdxRegDrop - 1].Val2 + LeftReg * RegComsV[IdxRegDrop].Val2) / (LeftReg + RightReg));
662 
663  }
664  //printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - 1].Val2, RegComsV[IdxRegDrop].Val2);
665  printf("Num Coms:%d\n", NumComs);
666  if (NumComs < 2) { NumComs = 2; }
667 
668  if (PltFPrx.Len() > 0) {
669  TStr PlotTitle = TStr::Fmt("N:%d, E:%d ", Graph->GetNodes(), TE);
670  TGnuPlot GPC(PltFPrx + ".l");
671  GPC.AddPlot(RegComsV, gpwLinesPoints, "C");
672  GPC.AddPlot(RegLV, gpwLinesPoints, "likelihood");
673  GPC.AddPlot(RegBICV, gpwLinesPoints, "BIC");
674  GPC.AddPlot(LRVScaled, gpwLinesPoints, "Sigmoid (scaled)");
675  GPC.SetScale(gpsLog10X);
676  GPC.SetTitle(PlotTitle);
677  GPC.SavePng(PltFPrx + ".l.png");
678  }
679 
680  return NumComs;
681 }
682 
683 double TAGMUtil::GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges) {
684  const int Edges2 = Edges >= 0 ? 2*Edges : Graph->GetEdges();
685  int Vol = 0, Cut = 0;
686  double Phi = 0.0;
687  for (int i = 0; i < CmtyS.Len(); i++) {
688  if (! Graph->IsNode(CmtyS[i])) { continue; }
689  TUNGraph::TNodeI NI = Graph->GetNI(CmtyS[i]);
690  for (int e = 0; e < NI.GetOutDeg(); e++) {
691  if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; }
692  }
693  Vol += NI.GetOutDeg();
694  }
695  // get conductance
696  if (Vol != Edges2) {
697  if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); }
698  else if (Vol == 0) { Phi = 0.0; }
699  else { Phi = Cut / double(Vol); }
700  } else {
701  if (Vol == Edges2) { Phi = 1.0; }
702  }
703  return Phi;
704 }
705 
706 void TAGMUtil::GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS) {
707  TUNGraph::TNodeI NI = Graph->GetNI(NID);
708  NBCmtyS.Gen(NI.GetDeg());
709  NBCmtyS.AddKey(NID);
710  for (int e = 0; e < NI.GetDeg(); e++) {
711  NBCmtyS.AddKey(NI.GetNbrNId(e));
712  }
713 }
714 
716 // Logistic regression by gradient ascent
717 
718 void TLogRegFit::GetNewtonStep(TFltVV& HVV, const TFltV& GradV, TFltV& DeltaLV){
719  bool HSingular = false;
720  for (int i = 0; i < HVV.GetXDim(); i++) {
721  if (HVV(i,i) == 0.0) {
722  HVV(i,i) = 0.001;
723  HSingular = true;
724  }
725  DeltaLV[i] = GradV[i] / HVV(i, i);
726  }
727  if (! HSingular) {
728  if (HVV(0, 0) < 0) { // if Hessian is negative definite, convert it to positive definite
729  for (int r = 0; r < Theta.Len(); r++) {
730  for (int c = 0; c < Theta.Len(); c++) {
731  HVV(r, c) = - HVV(r, c);
732  }
733  }
734  TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
735  }
736  else {
737  TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
738  for (int i = 0; i < DeltaLV.Len(); i++) {
739  DeltaLV[i] = - DeltaLV[i];
740  }
741  }
742 
743  }
744 }
745 
747  HVV.Gen(Theta.Len(), Theta.Len());
748  TFltV OutV;
750  for (int i = 0; i < X.Len(); i++) {
751  for (int r = 0; r < Theta.Len(); r++) {
752  HVV.At(r, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][r]);
753  for (int c = r + 1; c < Theta.Len(); c++) {
754  HVV.At(r, c) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
755  HVV.At(c, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
756  }
757  }
758  }
759  /*
760  printf("\n");
761  for (int r = 0; r < Theta.Len(); r++) {
762  for (int c = 0; c < Theta.Len(); c++) {
763  printf("%f\t", HVV.At(r, c).Val);
764  }
765  printf("\n");
766  }
767  */
768 }
769 
770 int TLogRegFit::MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) {
771  TExeTm ExeTm;
772  TFltV GradV(Theta.Len()), DeltaLV(Theta.Len());
773  TFltVV HVV(Theta.Len(), Theta.Len());
774  int iter = 0;
775  double MinVal = -1e10, MaxVal = 1e10;
776  for(iter = 0; iter < MaxStep; iter++) {
777  Gradient(GradV);
778  Hessian(HVV);
779  GetNewtonStep(HVV, GradV, DeltaLV);
780  double Increment = TLinAlg::DotProduct(GradV, DeltaLV);
781  if (Increment <= ChangeEps) {break;}
782  double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1);
783  for(int i = 0; i < Theta.Len(); i++) {
784  double Change = LearnRate * DeltaLV[i];
785  Theta[i] += Change;
786  if(Theta[i] < MinVal) { Theta[i] = MinVal;}
787  if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
788  }
789  }
790  if (! PlotNm.Empty()) {
791  printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
792  }
793 
794  return iter;
795 }
796 
797 int TLogRegFit::MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) {
798  TExeTm ExeTm;
799  TFltV GradV(Theta.Len());
800  int iter = 0;
801  TIntFltPrV IterLV, IterGradNormV;
802  double MinVal = -1e10, MaxVal = 1e10;
803  double GradCutOff = 100000;
804  for(iter = 0; iter < MaxStep; iter++) {
805  Gradient(GradV); //if gradient is going out of the boundary, cut off
806  for(int i = 0; i < Theta.Len(); i++) {
807  if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; }
808  if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; }
809  if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; }
810  if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; }
811  }
812  double Alpha = 0.15, Beta = 0.9;
813  //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
814  double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
815  if (TLinAlg::Norm(GradV) < ChangeEps) { break; }
816  for(int i = 0; i < Theta.Len(); i++) {
817  double Change = LearnRate * GradV[i];
818  Theta[i] += Change;
819  if(Theta[i] < MinVal) { Theta[i] = MinVal;}
820  if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
821  }
822  if (! PlotNm.Empty()) {
823  double L = Likelihood();
824  IterLV.Add(TIntFltPr(iter, L));
825  IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV)));
826  }
827 
828  }
829  if (! PlotNm.Empty()) {
830  TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
831  TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q");
832  printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
833  }
834  return iter;
835 }
836 
837 double TLogRegFit::GetStepSizeByLineSearch(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta) {
838  double StepSize = 1.0;
839  double InitLikelihood = Likelihood();
840  IAssert(Theta.Len() == DeltaV.Len());
841  TFltV NewThetaV(Theta.Len());
842  double MinVal = -1e10, MaxVal = 1e10;
843  for(int iter = 0; ; iter++) {
844  for (int i = 0; i < Theta.Len(); i++){
845  NewThetaV[i] = Theta[i] + StepSize * DeltaV[i];
846  if (NewThetaV[i] < MinVal) { NewThetaV[i] = MinVal; }
847  if (NewThetaV[i] > MaxVal) { NewThetaV[i] = MaxVal; }
848  }
849  if (Likelihood(NewThetaV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) {
850  StepSize *= Beta;
851  } else {
852  break;
853  }
854  }
855  return StepSize;
856 }
857 
858 double TLogRegFit::Likelihood(const TFltV& NewTheta) {
859  TFltV OutV;
860  TLogRegPredict::GetCfy(X, OutV, NewTheta);
861  double L = 0;
862  for (int r = 0; r < OutV.Len(); r++) {
863  L += Y[r] * log(OutV[r]);
864  L += (1 - Y[r]) * log(1 - OutV[r]);
865  }
866  return L;
867 }
868 
870  TFltV OutV;
872  GradV.Gen(M);
873  for (int r = 0; r < X.Len(); r++) {
874  //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val);
875  for (int m = 0; m < M; m++) {
876  GradV[m] += (Y[r] - OutV[r]) * X[r][m];
877  }
878  }
879  //for (int m = 0; m < M; m++) { printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); }
880 }
881 
882 PLogRegPredict TLogRegFit::CalcLogRegNewton(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) {
883 
884  X = XPt;
885  Y = yPt;
886  IAssert(X.Len() == Y.Len());
887  if (Intercept == false) { // if intercept is not included, add it
888  for (int r = 0; r < X.Len(); r++) { X[r].Add(1); }
889  }
890  M = X[0].Len();
891  for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); }
892  for (int r = 0; r < Y.Len(); r++) {
893  if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
894  if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
895  }
896  Theta.Gen(M);
897  MLENewton(ChangeEps, MaxStep, PlotNm);
898  return new TLogRegPredict(Theta);
899 };
900 
901 PLogRegPredict TLogRegFit::CalcLogRegGradient(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) {
902  X = XPt;
903  Y = yPt;
904  IAssert(X.Len() == Y.Len());
905  if (Intercept == false) { // if intercept is not included, add it
906  for (int r = 0; r < X.Len(); r++) { X[r].Add(1); }
907  }
908  M = X[0].Len();
909  for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); }
910  for (int r = 0; r < Y.Len(); r++) {
911  if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
912  if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
913  }
914  Theta.Gen(M);
915  MLEGradient(ChangeEps, MaxStep, PlotNm);
916  return new TLogRegPredict(Theta);
917 };
918 
920 // Logistic-Regression-Model
921 
922 double TLogRegPredict::GetCfy(const TFltV& AttrV, const TFltV& NewTheta) {
923  int len = AttrV.Len();
924  double res = 0;
925  if (len < NewTheta.Len()) { res = NewTheta.Last(); } //if feature vector is shorter, add an intercept
926  for (int i = 0; i < len; i++) {
927  if (i < NewTheta.Len()) { res += AttrV[i] * NewTheta[i]; }
928  }
929  double mu = 1 / (1 + exp(-res));
930  return mu;
931 }
932 
933 void TLogRegPredict::GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta) {
934  OutV.Gen(X.Len());
935  for (int r = 0; r < X.Len(); r++) {
936  OutV[r] = GetCfy(X[r], NewTheta);
937  }
938 }
TVec< TFltV > X
Definition: agm.h:169
#define IAssert(Cond)
Definition: bd.h:262
Edge iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:121
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
static void GetNbhCom(const PUNGraph &Graph, const int NID, TIntSet &NBCmtyS)
Definition: agm.cpp:706
TIter EndI() const
Returns an iterator referring to the past-the-end element in the vector.
Definition: ds.h:595
void GetNewtonStep(TFltVV &HVV, const TFltV &GradV, TFltV &DeltaLV)
Definition: agm.cpp:718
static double Norm(const TFltV &x)
Definition: linalg.cpp:324
int Len() const
Definition: dt.h:490
static int TotalMemberships(const TVec< TIntV > &CmtyVV)
total number of memberships (== sum of the sizes of communities)
Definition: agm.cpp:315
void GetDatV(TVec< TDat > &DatV) const
Definition: hash.h:492
TEdgeI EndEI() const
Returns an iterator referring to the past-the-end edge in the graph.
Definition: graph.h:280
Definition: tm.h:355
static void RewireCmtyVV(const TVec< TIntV > &CmtyVVIn, TVec< TIntV > &CmtyVVOut, TRnd &Rnd)
rewire bipartite community affiliation graphs
Definition: agm.cpp:192
#define Kilo(n)
Definition: gbase.h:3
Definition: dt.h:11
static void GetNodeMembership(THash< TInt, TIntSet > &NIDComVH, const TVec< TIntV > &CmtyVV)
get hash table of
Definition: agm.cpp:335
Definition: ds.h:130
static void GetCfy(const TVec< TFltV > &X, TFltV &OutV, const TFltV &NewTheta)
Definition: agm.cpp:933
void SetPNoCom(const double &Epsilon)
Set Epsilon (the probability that two nodes sharing no communities connect) to the default value...
Definition: agmfit.h:57
TIter BegI() const
Definition: shash.h:1105
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:153
TIter BegI() const
Definition: hash.h:213
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:8
double Likelihood()
COMMENT.
Definition: agmfit.cpp:104
static void GenPLSeq(TIntV &SzSeq, const int &SeqLen, const double &Alpha, TRnd &Rnd, const int &Min, const int &Max)
AGMUtil:: Utilities for AGM.
Definition: agm.cpp:81
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:68
static int Intersection(const TIntV &C1, const TIntV &C2)
Definition: agm.cpp:399
void GetKeyV(TVec< TKey > &KeyV) const
Definition: shash.h:1347
Definition: ss.h:72
void Gen(const int &ExpectVals)
Definition: shash.h:1115
double Likelihood()
Definition: agm.h:183
TVal1 Val1
Definition: ds.h:132
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
Definition: ss.cpp:447
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
static void GetIntersection(const THashSet< TInt > &A, const THashSet< TInt > &B, THashSet< TInt > &C)
Definition: agm.cpp:404
TIter EndI() const
Definition: hash.h:218
Fitting the Affilialiton Graph Model (AGM).
Definition: agmfit.h:8
static double GetConductance(const PUNGraph &Graph, const TIntSet &CmtyS, const int Edges)
Definition: agm.cpp:683
int GetFlds() const
Returns the number of fields in the current line.
Definition: ss.h:116
static const double Mx
Definition: dt.h:1391
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
const char * GetFld(const int &FldN) const
Returns the contents of the field at index FldN.
Definition: ss.h:129
TFltV Y
Definition: agm.h:170
TSsFmt
Spread-Sheet Separator Format.
Definition: ss.h:5
TPair< TInt, TFlt > TIntFltPr
Definition: ds.h:87
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:90
const char * GetTmStr() const
Definition: tm.h:370
static void SaveGephi(const TStr &OutFNm, const PUNGraph &G, const TVec< TIntV > &CmtyVVAtr, const double MaxSz, const double MinSz)
Definition: agm.h:55
static void ConnectCmtyVV(TVec< TIntV > &CmtyVV, const TIntPrV &CIDSzPrV, const TIntPrV &NIDMemPrV, TRnd &Rnd)
Generate bipartite community affiliation from given power law coefficients for membership distributio...
Definition: agm.cpp:131
TVal2 Val2
Definition: ds.h:133
int GetOutDeg() const
Returns out-degree of the current node (returns same as value GetDeg() since the graph is undirected)...
Definition: graph.h:94
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
void GetCmtyVV(TVec< TIntV > &CmtyVV, const double QMax=2.0)
Get communities whose p_c is higher than 1 - QMax.
Definition: agmfit.cpp:554
int MLEGradAscentGivenCAG(const double &Thres=0.001, const int &MaxIter=10000, const TStr PlotNm=TStr())
Gradient descent for p_c while keeping the community affiliation graph (CAG) fixed.
Definition: agmfit.cpp:130
bool IsKey(const char *Key) const
Definition: hash.h:897
TFltV Theta
Definition: agm.h:171
void Hessian(TFltVV &HVV)
Definition: agm.cpp:746
PLogRegPredict CalcLogRegNewton(const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false)
Definition: agm.cpp:882
static void GenCmtyVVFromPL(TVec< TIntV > &CmtyVV, const PUNGraph &Graph, const int &Nodes, const int &Coms, const double &ComSzAlpha, const double &MemAlpha, const int &MinSz, const int &MaxSz, const int &MinK, const int &MaxK, TRnd &Rnd)
Generate bipartite community affiliation from given power law coefficients for membership distributio...
Definition: agm.cpp:101
static double Round(const double &Val)
Definition: xmath.h:16
Whitespace (space or tab) separated.
Definition: ss.h:11
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:241
int M
Definition: agm.h:172
void SetRegCoef(const double Val)
Definition: agmfit.h:45
TEdgeI BegEI() const
Returns an iterator referring to the first edge in the graph.
Definition: graph.h:278
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:172
int AddKey(const TKey &Key)
Definition: shash.h:1254
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
static void LoadCmtyVV(const TStr &InFNm, TVec< TIntV > &CmtyVV)
load bipartite community affiliation graph from text file (each row contains the member node IDs for ...
Definition: agm.cpp:246
static void SolveSymetricSystem(TFltVV &A, const TFltV &b, TFltV &x)
Definition: linalg.cpp:837
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
int MLEGradient(const double &ChangeEps, const int &MaxStep, const TStr PlotNm)
Definition: agm.cpp:797
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: graph.cpp:92
Definition: hash.h:781
static void SaveBipartiteGephi(const TStr &OutFNm, const TIntV &NIDV, const TVec< TIntV > &CmtyVV, const double MaxSz, const double MinSz, const TIntStrH &NIDNameH, const THash< TInt, TIntTr > &NIDColorH, const THash< TInt, TIntTr > &CIDColorH)
save bipartite community affiliation into gexf file
Definition: agm.cpp:486
TIter EndI() const
Definition: shash.h:1112
int Len() const
Definition: shash.h:1121
Definition: ds.h:32
double GetBinomialDev(const double &Prb, const int &Trials)
Definition: dt.cpp:154
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:106
void RunMCMC(const int &MaxIter, const int &EvalLambdaIter, const TStr &PlotFPrx=TStr())
Main procedure for fitting the AGM to a given graph using MCMC.
Definition: agmfit.cpp:366
Definition: dt.h:412
static int FindComsByAGM(const PUNGraph &Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom=0.0, const TStr PltFPrx=TStr())
estimate number of communities using AGM
Definition: agm.cpp:544
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
void Pack()
Reduces vector capacity (frees memory) to match its size.
Definition: ds.h:1057
TSizeTy GetXDim() const
Definition: ds.h:2250
double GetStepSizeByLineSearch(const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta)
Definition: agm.cpp:837
void Gradient(TFltV &GradV)
Definition: agm.cpp:869
void Defrag(const bool &OnlyNodeLinks=false)
Defragments the graph.
Definition: graph.cpp:160
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:239
static void DumpCmtyVV(const TStr &OutFNm, const TVec< TIntV > &CmtyVV)
dump bipartite community affiliation into a text file
Definition: agm.cpp:286
TVal1 Val1
Definition: ds.h:34
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:111
TVal2 Val2
Definition: ds.h:35
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361
PLogRegPredict CalcLogRegGradient(const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false)
Definition: agm.cpp:901
int MLENewton(const double &ChangeEps, const int &MaxStep, const TStr PlotNm)
Definition: agm.cpp:770
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:235
bool Next()
Loads next line from the input file.
Definition: ss.cpp:412
static double DotProduct(const TFltV &x, const TFltV &y)
Definition: linalg.cpp:165
static void RndConnectInsideCommunity(PUNGraph &Graph, const TIntV &CmtyV, const double &Prob, TRnd &Rnd)
Connect members of a given community by Erdos-Renyi.
Definition: agm.cpp:10
TTriple< TInt, TInt, TInt > TIntTr
Definition: ds.h:171
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
static TVec< TFlt, int > GetV(const TFlt &Val1)
Returns a vector on element Val1.
Definition: ds.h:848
char * CStr()
Definition: dt.h:479
bool IsKey(const TKey &Key) const
Definition: hash.h:258
bool IsInt(const int &FldN) const
Checks whether fields FldN is an integer.
Definition: ss.h:143
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:237
void DelLast()
Removes the last element of the vector.
Definition: ds.h:665
int Len() const
Definition: hash.h:228
static PUNGraph GenAGM(TVec< TIntV > &CmtyVV, const double &DensityCoef, const double &ScaleCoef, TRnd &Rnd=TInt::Rnd)
Definition: agm.cpp:37
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
void Gen(const TSizeTy &_XDim, const TSizeTy &_YDim)
Definition: ds.h:2247
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
TVal3 Val3
Definition: ds.h:134
double GetPowerDev(const double &AlphaSlope)
Definition: dt.h:47
int GetKeyId(const char *Key) const
Definition: hash.h:994
static void RewireCmtyNID(THash< TInt, TIntV > &CmtyVH, TRnd &Rnd)
rewire bipartite community affiliation graphs
Definition: agm.cpp:202
void Swap(TRec &Rec1, TRec &Rec2)
Definition: bd.h:568
static const double Mn
Definition: dt.h:1390
const TVal & At(const TSizeTy &X, const TSizeTy &Y) const
Definition: ds.h:2256