SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
agm.cpp
Go to the documentation of this file.
1 #include "stdafx.h"
2 #include "Snap.h"
3 #include "agm.h"
4 #include "agmfit.h"
5 
7 // AGM graph generation.
8 
10 void TAGM::RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd){
11  int CNodes = CmtyV.Len(), CEdges;
12  if (CNodes < 20) {
13  CEdges = (int) Rnd.GetBinomialDev(Prob, CNodes * (CNodes-1) / 2);
14  } else {
15  CEdges = (int) (Prob * CNodes * (CNodes - 1) / 2);
16  }
17  THashSet<TIntPr> NewEdgeSet(CEdges);
18  for (int edge = 0; edge < CEdges; ) {
19  int SrcNId = CmtyV[Rnd.GetUniDevInt(CNodes)];
20  int DstNId = CmtyV[Rnd.GetUniDevInt(CNodes)];
21  if (SrcNId > DstNId) { Swap(SrcNId,DstNId); }
22  if (SrcNId != DstNId && ! NewEdgeSet.IsKey(TIntPr(SrcNId, DstNId))) { // is new edge
23  NewEdgeSet.AddKey(TIntPr(SrcNId, DstNId));
24  Graph->AddEdge(SrcNId, DstNId);
25  edge++;
26  }
27  }
28 }
29 
30 
31 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd){
32  PUNGraph TryG = TAGM::GenAGM(CmtyVV, DensityCoef, 1.0, Rnd);
33  const double ScaleCoef = (double) TargetEdges / (double) TryG->GetEdges();
34  return TAGM::GenAGM(CmtyVV, DensityCoef, ScaleCoef, Rnd);
35 }
36 
37 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd){
38  TFltV CProbV;
39  double Prob;
40  for (int i = 0; i < CmtyVV.Len(); i++) {
41  Prob = ScaleCoef*pow( double( CmtyVV[i].Len()), - DensityCoef);
42  if (Prob > 1.0) { Prob = 1; }
43  CProbV.Add(Prob);
44  }
45  return TAGM::GenAGM(CmtyVV, CProbV, Rnd);
46 }
47 
49 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom){
50  PUNGraph G = TUNGraph::New(100 * CmtyVV.Len(), -1);
51  printf("AGM begins\n");
52  for (int i = 0; i < CmtyVV.Len(); i++) {
53  TIntV& CmtyV = CmtyVV[i];
54  for (int u = 0; u < CmtyV.Len(); u++) {
55  if ( G->IsNode(CmtyV[u])) { continue; }
56  G->AddNode(CmtyV[u]);
57  }
58  double Prob = CProbV[i];
59  RndConnectInsideCommunity(G, CmtyV, Prob, Rnd);
60  }
61  if (PNoCom > 0.0) { //if we want to connect nodes that do not share any community
62  TIntSet NIDS;
63  for (int c = 0; c < CmtyVV.Len(); c++) {
64  for (int u = 0; u < CmtyVV[c].Len(); u++) {
65  NIDS.AddKey(CmtyVV[c][u]);
66  }
67  }
68  TIntV NIDV;
69  NIDS.GetKeyV(NIDV);
70  RndConnectInsideCommunity(G,NIDV,PNoCom,Rnd);
71  }
72  printf("AGM completed (%d nodes %d edges)\n",G->GetNodes(),G->GetEdges());
73  G->Defrag();
74  return G;
75 }
76 
79 
81 void TAGMUtil::GenPLSeq(TIntV& SzSeq, const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max) {
82  SzSeq.Gen(SeqLen, 0);
83  while (SzSeq.Len() < SeqLen) {
84  int Sz = (int) TMath::Round(Rnd.GetPowerDev(Alpha));
85  if (Sz >= Min && Sz <= Max) {
86  SzSeq.Add(Sz);
87  }
88  }
89 }
90 
92 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
93  TIntV NIDV(Nodes, 0);
94  for (int i = 0; i < Nodes; i++) {
95  NIDV.Add(i);
96  }
97  GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd);
98 }
99 
101 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const PUNGraph& Graph, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
102  if (Coms == 0 || Nodes == 0) {
103  CmtyVV.Clr();
104  return;
105  }
106  TIntV NIDV;
107  Graph->GetNIdV(NIDV);
108  GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd);
109 }
110 
112 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const TIntV& NIDV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
113  if (Coms == 0 || Nodes == 0) {
114  CmtyVV.Clr();
115  return;
116  }
117  TIntV ComSzSeq, MemSeq;
118  TAGMUtil::GenPLSeq(ComSzSeq,Coms,ComSzAlpha,Rnd,MinSz,MaxSz);
119  TAGMUtil::GenPLSeq(MemSeq,Nodes,MemAlpha,Rnd,MinK,MaxK);
120  TIntPrV CIDSzPrV, NIDMemPrV;
121  for (int i = 0; i < ComSzSeq.Len(); i++) {
122  CIDSzPrV.Add(TIntPr(i, ComSzSeq[i]));
123  }
124  for (int i = 0; i < MemSeq.Len(); i++) {
125  NIDMemPrV.Add(TIntPr(NIDV[i], MemSeq[i]));
126  }
127  TAGMUtil::ConnectCmtyVV(CmtyVV, CIDSzPrV, NIDMemPrV, Rnd);
128 }
129 
131 void TAGMUtil::ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd) {
132  const int Nodes = NIDMemPrV.Len(), Coms = CIDSzPrV.Len();
133  TIntV NDegV,CDegV;
134  TIntPrSet CNIDSet;
135  TIntSet HitNodes(Nodes);
136  THash<TInt,TIntV> CmtyVH;
137  for (int i = 0;i < CIDSzPrV.Len(); i++) {
138  for (int j = 0; j < CIDSzPrV[i].Val2; j++) {
139  CDegV.Add(CIDSzPrV[i].Val1);
140  }
141  }
142  for (int i = 0; i < NIDMemPrV.Len(); i++) {
143  for (int j = 0; j < NIDMemPrV[i].Val2; j++) {
144  NDegV.Add(NIDMemPrV[i].Val1);
145  }
146  }
147  while (CDegV.Len() < (int) (1.2 * Nodes)) {
148  CDegV.Add(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1);
149  }
150  while (NDegV.Len() < CDegV.Len()) {
151  NDegV.Add(NIDMemPrV[Rnd.GetUniDevInt(Nodes)].Val1);
152  }
153  printf("Total Mem: %d, Total Sz: %d\n",NDegV.Len(), CDegV.Len());
154  int c=0;
155  while (c++ < 15 && CDegV.Len() > 1) {
156  for (int i = 0; i < CDegV.Len(); i++) {
157  int u = Rnd.GetUniDevInt(CDegV.Len());
158  int v = Rnd.GetUniDevInt(NDegV.Len());
159  if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; }
160  CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
161  HitNodes.AddKey(NDegV[v]);
162  if (u == CDegV.Len() - 1) { CDegV.DelLast(); }
163  else {
164  CDegV[u] = CDegV.Last();
165  CDegV.DelLast();
166  }
167  if (v == NDegV.Len() - 1) { NDegV.DelLast(); }
168  else {
169  NDegV[v] = NDegV.Last();
170  NDegV.DelLast();
171  }
172  }
173  }
174  //make sure that every node belongs to at least one community
175  for (int i = 0; i < Nodes; i++) {
176  int NID = NIDMemPrV[i].Val1;
177  if (! HitNodes.IsKey(NID)) {
178  CNIDSet.AddKey(TIntPr(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1, NID));
179  HitNodes.AddKey(NID);
180  }
181  }
182  IAssert(HitNodes.Len() == Nodes);
183  for (int i = 0; i < CNIDSet.Len(); i++) {
184  TIntPr CNIDPr = CNIDSet[i];
185  CmtyVH.AddDat(CNIDPr.Val1);
186  CmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
187  }
188  CmtyVH.GetDatV(CmtyVV);
189 }
190 
192 void TAGMUtil::RewireCmtyVV(const TVec<TIntV>& CmtyVVIn, TVec<TIntV>& CmtyVVOut, TRnd& Rnd){
193  THash<TInt,TIntV> CmtyVH;
194  for (int i = 0; i < CmtyVVIn.Len(); i++) {
195  CmtyVH.AddDat(i, CmtyVVIn[i]);
196  }
197  TAGMUtil::RewireCmtyNID(CmtyVH, Rnd);
198  CmtyVH.GetDatV(CmtyVVOut);
199 }
200 
203  THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len());
204  TIntV NDegV;
205  TIntV CDegV;
206  for (int i = 0; i < CmtyVH.Len(); i++) {
207  int CID = CmtyVH.GetKey(i);
208  for (int j = 0; j < CmtyVH[i].Len(); j++) {
209  int NID = CmtyVH[i][j];
210  NDegV.Add(NID);
211  CDegV.Add(CID);
212  }
213  }
214  TIntPrSet CNIDSet(CDegV.Len());
215  int c=0;
216  while (c++ < 15 && CDegV.Len() > 1){
217  for (int i = 0; i < CDegV.Len(); i++) {
218  int u = Rnd.GetUniDevInt(CDegV.Len());
219  int v = Rnd.GetUniDevInt(NDegV.Len());
220  if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; }
221  CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
222  if (u == CDegV.Len() - 1) {
223  CDegV.DelLast();
224  } else {
225  CDegV[u] = CDegV.Last();
226  CDegV.DelLast();
227  }
228  if ( v == NDegV.Len() - 1) {
229  NDegV.DelLast();
230  } else{
231  NDegV[v] = NDegV.Last();
232  NDegV.DelLast();
233  }
234  }
235  }
236  for (int i = 0; i < CNIDSet.Len(); i++) {
237  TIntPr CNIDPr = CNIDSet[i];
238  IAssert(CmtyVH.IsKey(CNIDPr.Val1));
239  NewCmtyVH.AddDat(CNIDPr.Val1);
240  NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
241  }
242  CmtyVH = NewCmtyVH;
243 }
244 
246 void TAGMUtil::LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV) {
247  CmtyVV.Gen(Kilo(100), 0);
248  TSsParser Ss(InFNm, ssfWhiteSep);
249  while (Ss.Next()) {
250  if(Ss.GetFlds() > 0) {
251  TIntV CmtyV;
252  for (int i = 0; i < Ss.GetFlds(); i++) {
253  if (Ss.IsInt(i)) {
254  CmtyV.Add(Ss.GetInt(i));
255  }
256  }
257  CmtyVV.Add(CmtyV);
258  }
259  }
260  CmtyVV.Pack();
261  printf("community loading completed (%d communities)\n",CmtyVV.Len());
262 
263 }
264 
266 void TAGMUtil::LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV, TStrHash<TInt>& StrToNIdH, const int BeginCol, const int MinSz, const TSsFmt Sep) {
267  CmtyVV.Gen(Kilo(100), 0);
268  TSsParser Ss(InFNm, Sep);
269  while (Ss.Next()) {
270  if(Ss.GetFlds() > BeginCol) {
271  TIntV CmtyV;
272  for (int i = BeginCol; i < Ss.GetFlds(); i++) {
273  if (StrToNIdH.IsKey(Ss.GetFld(i))) {
274  CmtyV.Add(StrToNIdH.GetKeyId(Ss.GetFld(i)));
275  }
276  }
277  if (CmtyV.Len() < MinSz) { continue; }
278  CmtyVV.Add(CmtyV);
279  }
280  }
281  CmtyVV.Pack();
282  printf("community loading completed (%d communities)\n",CmtyVV.Len());
283 }
284 
286 void TAGMUtil::DumpCmtyVV(const TStr& OutFNm, const TVec<TIntV>& CmtyVV) {
287  FILE* F = fopen(OutFNm.CStr(),"wt");
288  for (int i = 0; i < CmtyVV.Len(); i++) {
289  for (int j = 0; j < CmtyVV[i].Len(); j++) {
290  fprintf(F,"%d\t", (int) CmtyVV[i][j]);
291  }
292  fprintf(F,"\n");
293  }
294  fclose(F);
295 }
296 
298 void TAGMUtil::DumpCmtyVV(const TStr OutFNm, TVec<TIntV>& CmtyVV, TIntStrH& NIDNmH) {
299  FILE* F = fopen(OutFNm.CStr(), "wt");
300  for (int c = 0; c < CmtyVV.Len(); c++) {
301  for (int u = 0; u < CmtyVV[c].Len(); u++) {
302  if (NIDNmH.IsKey(CmtyVV[c][u])){
303  fprintf(F, "%s\t", NIDNmH.GetDat(CmtyVV[c][u]).CStr());
304  }
305  else {
306  fprintf(F, "%d\t", (int) CmtyVV[c][u]);
307  }
308  }
309  fprintf(F, "\n");
310  }
311  fclose(F);
312 }
313 
316  int M = 0;
317  for (int i = 0; i < CmtyVV.Len(); i++) {
318  M += CmtyVV[i].Len();
319  }
320  return M;
321 }
322 
324 void TAGMUtil::GetNodeMembership(TIntH& NIDComVH, const THash<TInt,TIntV >& CmtyVH) {
325  NIDComVH.Clr();
326  for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){
327  for (int j = 0;j < HI.GetDat().Len(); j++) {
328  int NID = HI.GetDat()[j];
329  NIDComVH.AddDat(NID)++;
330  }
331  }
332 }
333 
336  NIDComVH.Clr();
337  for (int i = 0; i < CmtyVV.Len(); i++){
338  int CID = i;
339  for (int j = 0; j < CmtyVV[i].Len(); j++) {
340  int NID = CmtyVV[i][j];
341  NIDComVH.AddDat(NID).AddKey(CID);
342  }
343  }
344 }
345 
347 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV, const TIntV& NIDV) {
348  NIDComVH.Clr();
349  for (int u = 0; u < NIDV.Len(); u++) {
350  NIDComVH.AddDat(NIDV[u]);
351  }
352  for (int i = 0; i < CmtyVV.Len(); i++){
353  int CID = i;
354  for (int j = 0; j < CmtyVV[i].Len(); j++) {
355  int NID = CmtyVV[i][j];
356  NIDComVH.AddDat(NID).AddKey(CID);
357  }
358  }
359 }
360 
361 
363  for (int i = 0; i < CmtyVV.Len(); i++){
364  int CID = i;
365  for (TIntSet::TIter SI = CmtyVV[i].BegI(); SI < CmtyVV[i].EndI(); SI++) {
366  int NID = SI.GetKey();
367  NIDComVH.AddDat(NID).AddKey(CID);
368  }
369  }
370 }
372  for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){
373  int CID = HI.GetKey();
374  for (int j = 0; j < HI.GetDat().Len(); j++) {
375  int NID = HI.GetDat()[j];
376  NIDComVH.AddDat(NID).AddKey(CID);
377  }
378  }
379 }
380 
382  for (int i = 0; i < CmtyVH.Len(); i++){
383  int CID = CmtyVH.GetKey(i);
384  for (int j = 0; j < CmtyVH[i].Len(); j++) {
385  int NID = CmtyVH[i][j];
386  NIDComVH.AddDat(NID).Add(CID);
387  }
388  }
389 }
390 
392  THash<TInt,TIntV> CmtyVH;
393  for (int i = 0; i < CmtyVV.Len(); i++) {
394  CmtyVH.AddDat(i, CmtyVV[i]);
395  }
396  GetNodeMembership(NIDComVH, CmtyVH);
397 }
398 
399 int TAGMUtil::Intersection(const TIntV& C1, const TIntV& C2) {
400  TIntSet S1(C1), S2(C2);
401  return TAGMUtil::Intersection(S1, S2);
402 }
403 
405  C.Gen(A.Len());
406  if (A.Len() < B.Len()) {
407  for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++)
408  if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey());
409  } else {
410  for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++)
411  if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey());
412  }
413 }
414 
416  int n = 0;
417  if (A.Len() < B.Len()) {
418  for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++)
419  if (B.IsKey(it.GetKey())) n++;
420  } else {
421  for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++)
422  if (A.IsKey(it.GetKey())) n++;
423  }
424  return n;
425 }
426 
428 void TAGMUtil::SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV>& CmtyVVAtr, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH ) {
429  THash<TInt,TIntV> NIDComVHAtr;
430  TAGMUtil::GetNodeMembership(NIDComVHAtr, CmtyVVAtr);
431 
432  FILE* F = fopen(OutFNm.CStr(), "wt");
433  fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
434  fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
435  fprintf(F, "\t<graph mode='static' defaultedgetype='undirected'>\n");
436  if (CmtyVVAtr.Len() > 0) {
437  fprintf(F, "\t<attributes class='node'>\n");
438  for (int c = 0; c < CmtyVVAtr.Len(); c++) {
439  fprintf(F, "\t\t<attribute id='%d' title='c%d' type='boolean'>", c, c);
440  fprintf(F, "\t\t<default>false</default>\n");
441  fprintf(F, "\t\t</attribute>\n");
442  }
443  fprintf(F, "\t</attributes>\n");
444  }
445  fprintf(F, "\t\t<nodes>\n");
446  for (TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
447  int NID = NI.GetId();
448  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
449  TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
450 
451  double Size = MinSz;
452  double SizeStep = (MaxSz - MinSz) / (double) CmtyVVAtr.Len();
453  if (NIDComVHAtr.IsKey(NID)) {
454  Size = MinSz + SizeStep * (double) NIDComVHAtr.GetDat(NID).Len();
455  }
456  double Alpha = 1.0;
457  fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
458  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
459  fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
460  //specify attributes
461  if (NIDComVHAtr.IsKey(NID)) {
462  fprintf(F, "\t\t\t\t<attvalues>\n");
463  for (int c = 0; c < NIDComVHAtr.GetDat(NID).Len(); c++) {
464  int CID = NIDComVHAtr.GetDat(NID)[c];
465  fprintf(F, "\t\t\t\t\t<attvalue for='%d' value='true'/>\n", CID);
466  }
467  fprintf(F, "\t\t\t\t</attvalues>\n");
468  }
469 
470  fprintf(F, "\t\t\t</node>\n");
471  }
472  fprintf(F, "\t\t</nodes>\n");
473  //plot edges
474  int EID = 0;
475  fprintf(F, "\t\t<edges>\n");
476  for (TUNGraph::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
477  fprintf(F, "\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, EI.GetSrcNId(), EI.GetDstNId());
478  }
479  fprintf(F, "\t\t</edges>\n");
480  fprintf(F, "\t</graph>\n");
481  fprintf(F, "</gexf>\n");
482  fclose(F);
483 }
484 
486 void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) {
488  if (CmtyVV.Len() == 0) { return; }
489  double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0;
490  double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax;
491  double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len();
492  THash<TInt,TIntV> NIDComVH;
493  TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV);
494 
495  FILE* F = fopen(OutFNm.CStr(), "wt");
496  fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
497  fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
498  fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n");
499  fprintf(F, "\t\t<nodes>\n");
500  for (int c = 0; c < CmtyVV.Len(); c++) {
501  int CID = c;
502  double XPos = c * CStep + CXMin;
503  TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120);
504  fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID);
505  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val);
506  fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz);
507  fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
508  fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax);
509  fprintf(F, "\t\t\t</node>\n");
510  }
511 
512  for (int u = 0;u < NIDV.Len(); u++) {
513  int NID = NIDV[u];
514  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
515  double Size = MinSz;
516  double XPos = NXMin + u * NStep;
517  TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
518  double Alpha = 1.0;
519  fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
520  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
521  fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
522  fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
523  fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin);
524  fprintf(F, "\t\t\t</node>\n");
525  }
526  fprintf(F, "\t\t</nodes>\n");
527  fprintf(F, "\t\t<edges>\n");
528  int EID = 0;
529  for (int u = 0;u < NIDV.Len(); u++) {
530  int NID = NIDV[u];
531  if (NIDComVH.IsKey(NID)) {
532  for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) {
533  int CID = NIDComVH.GetDat(NID)[c];
534  fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID);
535  }
536  }
537  }
538  fprintf(F, "\t\t</edges>\n");
539  fprintf(F, "\t</graph>\n");
540  fprintf(F, "</gexf>\n");
541 }
542 
544 int TAGMUtil::FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom, const TStr PltFPrx) {
545  TRnd Rnd(RndSeed);
546  int LambdaIter = 100;
547  if (Graph->GetNodes() < 200) { LambdaIter = 1; }
548  if (Graph->GetNodes() < 200 && Graph->GetEdges() > 2000) { LambdaIter = 100; }
549 
550  //Find coms with large C
551  TAGMFit AGMFitM(Graph, InitComs, RndSeed);
552  if (PNoCom > 0.0) { AGMFitM.SetPNoCom(PNoCom); }
553  AGMFitM.RunMCMC(MaxIter, LambdaIter, "");
554 
555  int TE = Graph->GetEdges();
556  TFltV RegV;
557  RegV.Add(0.3 * TE);
558  for (int r = 0; r < 25; r++) {
559  RegV.Add(RegV.Last() * RegGap);
560  }
561  TFltPrV RegComsV, RegLV, RegBICV;
562  TFltV LV, BICV;
563  //record likelihood and number of communities with nonzero P_c
564  for (int r = 0; r < RegV.Len(); r++) {
565  double RegCoef = RegV[r];
566  AGMFitM.SetRegCoef(RegCoef);
567  AGMFitM.MLEGradAscentGivenCAG(0.01, 1000);
568  AGMFitM.SetRegCoef(0.0);
569 
570  TVec<TIntV> EstCmtyVV;
571  AGMFitM.GetCmtyVV(EstCmtyVV, 0.99);
572  int NumLowQ = EstCmtyVV.Len();
573  RegComsV.Add(TFltPr(RegCoef, (double) NumLowQ));
574 
575  if (EstCmtyVV.Len() > 0) {
576  TAGMFit AFTemp(Graph, EstCmtyVV, Rnd);
577  AFTemp.MLEGradAscentGivenCAG(0.001, 1000);
578  double CurL = AFTemp.Likelihood();
579  LV.Add(CurL);
580  BICV.Add(-2.0 * CurL + (double) EstCmtyVV.Len() * log((double) Graph->GetNodes() * (Graph->GetNodes() - 1) / 2.0));
581  }
582  else {
583  break;
584  }
585  }
586  // if likelihood does not exist or does not change at all, report the smallest number of communities or 2
587  if (LV.Len() == 0) { return 2; }
588  else if (LV[0] == LV.Last()) { return (int) TMath::Mx<TFlt>(2.0, RegComsV[LV.Len() - 1].Val2); }
589 
590 
591  //normalize likelihood and BIC to 0~100
592  int MaxL = 100;
593  {
594  TFltV& ValueV = LV;
595  TFltPrV& RegValueV = RegLV;
596  double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
597  for (int l = 0; l < ValueV.Len(); l++) {
598  if (ValueV[l] < MinValue) { MinValue = ValueV[l]; }
599  if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; }
600  }
601  while (ValueV.Len() < RegV.Len()) { ValueV.Add(MinValue); }
602  double RangeVal = MaxValue - MinValue;
603  for (int l = 0; l < ValueV.Len(); l++) {
604  RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
605  }
606 
607  }
608  {
609  TFltV& ValueV = BICV;
610  TFltPrV& RegValueV = RegBICV;
611  double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
612  for (int l = 0; l < ValueV.Len(); l++) {
613  if (ValueV[l] < MinValue) { MinValue = ValueV[l]; }
614  if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; }
615  }
616  while (ValueV.Len() < RegV.Len()) { ValueV.Add(MaxValue); }
617  double RangeVal = MaxValue - MinValue;
618  for (int l = 0; l < ValueV.Len(); l++) {
619  RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
620  }
621  }
622 
623  //fit logistic regression to normalized likelihood.
624  TVec<TFltV> XV(RegLV.Len());
625  TFltV YV (RegLV.Len());
626  for (int l = 0; l < RegLV.Len(); l++) {
627  XV[l] = TFltV::GetV(log(RegLV[l].Val1));
628  YV[l] = RegLV[l].Val2 / (double) MaxL;
629  }
630  TFltPrV LRVScaled, LRV;
631  TLogRegFit LRFit;
632  PLogRegPredict LRMd = LRFit.CalcLogRegNewton(XV, YV, PltFPrx);
633  for (int l = 0; l < RegLV.Len(); l++) {
634  LRV.Add(TFltPr(RegV[l], LRMd->GetCfy(XV[l])));
635  LRVScaled.Add(TFltPr(RegV[l], double(MaxL) * LRV.Last().Val2));
636  }
637 
638  //estimate # communities from fitted logistic regression
639  int NumComs = 0, IdxRegDrop = 0;
640  double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25
641  double LeftReg = 0.0, RightReg = 0.0;
642  TFltV Theta;
643  LRMd->GetTheta(Theta);
644  RegDrop = (- Theta[1] - LRThres) / Theta[0];
645  if (RegDrop <= XV[0][0]) { NumComs = (int) RegComsV[0].Val2; }
646  else if (RegDrop >= XV.Last()[0]) { NumComs = (int) RegComsV.Last().Val2; }
647  else { //interpolate for RegDrop
648  for (int i = 0; i < XV.Len(); i++) {
649  if (XV[i][0] > RegDrop) { IdxRegDrop = i; break; }
650  }
651 
652  if (IdxRegDrop == 0) {
653  printf("Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta[0].Val, Theta[1].Val);
654  for (int l = 0; l < RegLV.Len(); l++) {
655  printf("X[%d]:%f, Y[%d]:%f\n", l, XV[l][0].Val, l, YV[l].Val);
656  }
657  }
658  IAssert(IdxRegDrop > 0);
659  LeftReg = RegDrop - XV[IdxRegDrop - 1][0];
660  RightReg = XV[IdxRegDrop][0] - RegDrop;
661  NumComs = (int) TMath::Round( (RightReg * RegComsV[IdxRegDrop - 1].Val2 + LeftReg * RegComsV[IdxRegDrop].Val2) / (LeftReg + RightReg));
662 
663  }
664  //printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - 1].Val2, RegComsV[IdxRegDrop].Val2);
665  printf("Num Coms:%d\n", NumComs);
666  if (NumComs < 2) { NumComs = 2; }
667 
668  if (PltFPrx.Len() > 0) {
669  TStr PlotTitle = TStr::Fmt("N:%d, E:%d ", Graph->GetNodes(), TE);
670  TGnuPlot GPC(PltFPrx + ".l");
671  GPC.AddPlot(RegComsV, gpwLinesPoints, "C");
672  GPC.AddPlot(RegLV, gpwLinesPoints, "likelihood");
673  GPC.AddPlot(RegBICV, gpwLinesPoints, "BIC");
674  GPC.AddPlot(LRVScaled, gpwLinesPoints, "Sigmoid (scaled)");
675  GPC.SetScale(gpsLog10X);
676  GPC.SetTitle(PlotTitle);
677  GPC.SavePng(PltFPrx + ".l.png");
678  }
679 
680  return NumComs;
681 }
682 
683 double TAGMUtil::GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges) {
684  const int Edges2 = Edges >= 0 ? 2*Edges : Graph->GetEdges();
685  int Vol = 0, Cut = 0;
686  double Phi = 0.0;
687  for (int i = 0; i < CmtyS.Len(); i++) {
688  if (! Graph->IsNode(CmtyS[i])) { continue; }
689  TUNGraph::TNodeI NI = Graph->GetNI(CmtyS[i]);
690  for (int e = 0; e < NI.GetOutDeg(); e++) {
691  if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; }
692  }
693  Vol += NI.GetOutDeg();
694  }
695  // get conductance
696  if (Vol != Edges2) {
697  if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); }
698  else if (Vol == 0) { Phi = 0.0; }
699  else { Phi = Cut / double(Vol); }
700  } else {
701  if (Vol == Edges2) { Phi = 1.0; }
702  }
703  return Phi;
704 }
705 
706 void TAGMUtil::GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS) {
707  TUNGraph::TNodeI NI = Graph->GetNI(NID);
708  NBCmtyS.Gen(NI.GetDeg());
709  NBCmtyS.AddKey(NID);
710  for (int e = 0; e < NI.GetDeg(); e++) {
711  NBCmtyS.AddKey(NI.GetNbrNId(e));
712  }
713 }
714 
716 // Logistic regression by gradient ascent
717 
718 void TLogRegFit::GetNewtonStep(TFltVV& HVV, const TFltV& GradV, TFltV& DeltaLV){
719  bool HSingular = false;
720  for (int i = 0; i < HVV.GetXDim(); i++) {
721  if (HVV(i,i) == 0.0) {
722  HVV(i,i) = 0.001;
723  HSingular = true;
724  }
725  DeltaLV[i] = GradV[i] / HVV(i, i);
726  }
727  if (! HSingular) {
728  if (HVV(0, 0) < 0) { // if Hessian is negative definite, convert it to positive definite
729  for (int r = 0; r < Theta.Len(); r++) {
730  for (int c = 0; c < Theta.Len(); c++) {
731  HVV(r, c) = - HVV(r, c);
732  }
733  }
734  TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
735  }
736  else {
737  TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
738  for (int i = 0; i < DeltaLV.Len(); i++) {
739  DeltaLV[i] = - DeltaLV[i];
740  }
741  }
742 
743  }
744 }
745 
747  HVV.Gen(Theta.Len(), Theta.Len());
748  TFltV OutV;
750  for (int i = 0; i < X.Len(); i++) {
751  for (int r = 0; r < Theta.Len(); r++) {
752  HVV.At(r, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][r]);
753  for (int c = r + 1; c < Theta.Len(); c++) {
754  HVV.At(r, c) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
755  HVV.At(c, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
756  }
757  }
758  }
759  /*
760  printf("\n");
761  for (int r = 0; r < Theta.Len(); r++) {
762  for (int c = 0; c < Theta.Len(); c++) {
763  printf("%f\t", HVV.At(r, c).Val);
764  }
765  printf("\n");
766  }
767  */
768 }
769 
770 int TLogRegFit::MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) {
771  TExeTm ExeTm;
772  TFltV GradV(Theta.Len()), DeltaLV(Theta.Len());
773  TFltVV HVV(Theta.Len(), Theta.Len());
774  int iter = 0;
775  double MinVal = -1e10, MaxVal = 1e10;
776  for(iter = 0; iter < MaxStep; iter++) {
777  Gradient(GradV);
778  Hessian(HVV);
779  GetNewtonStep(HVV, GradV, DeltaLV);
780  double Increment = TLinAlg::DotProduct(GradV, DeltaLV);
781  if (Increment <= ChangeEps) {break;}
782  double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1);
783  for(int i = 0; i < Theta.Len(); i++) {
784  double Change = LearnRate * DeltaLV[i];
785  Theta[i] += Change;
786  if(Theta[i] < MinVal) { Theta[i] = MinVal;}
787  if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
788  }
789  }
790  if (! PlotNm.Empty()) {
791  printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
792  }
793 
794  return iter;
795 }
796 
797 int TLogRegFit::MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) {
798  TExeTm ExeTm;
799  TFltV GradV(Theta.Len());
800  int iter = 0;
801  TIntFltPrV IterLV, IterGradNormV;
802  double MinVal = -1e10, MaxVal = 1e10;
803  double GradCutOff = 100000;
804  for(iter = 0; iter < MaxStep; iter++) {
805  Gradient(GradV); //if gradient is going out of the boundary, cut off
806  for(int i = 0; i < Theta.Len(); i++) {
807  if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; }
808  if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; }
809  if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; }
810  if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; }
811  }
812  double Alpha = 0.15, Beta = 0.9;
813  //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
814  double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
815  if (TLinAlg::Norm(GradV) < ChangeEps) { break; }
816  for(int i = 0; i < Theta.Len(); i++) {
817  double Change = LearnRate * GradV[i];
818  Theta[i] += Change;
819  if(Theta[i] < MinVal) { Theta[i] = MinVal;}
820  if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
821  }
822  if (! PlotNm.Empty()) {
823  double L = Likelihood();
824  IterLV.Add(TIntFltPr(iter, L));
825  IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV)));
826  }
827 
828  }
829  if (! PlotNm.Empty()) {
830  TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
831  TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q");
832  printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
833  }
834  return iter;
835 }
836 
837 double TLogRegFit::GetStepSizeByLineSearch(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta) {
838  double StepSize = 1.0;
839  double InitLikelihood = Likelihood();
840  IAssert(Theta.Len() == DeltaV.Len());
841  TFltV NewThetaV(Theta.Len());
842  double MinVal = -1e10, MaxVal = 1e10;
843  for(int iter = 0; ; iter++) {
844  for (int i = 0; i < Theta.Len(); i++){
845  NewThetaV[i] = Theta[i] + StepSize * DeltaV[i];
846  if (NewThetaV[i] < MinVal) { NewThetaV[i] = MinVal; }
847  if (NewThetaV[i] > MaxVal) { NewThetaV[i] = MaxVal; }
848  }
849  if (Likelihood(NewThetaV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) {
850  StepSize *= Beta;
851  } else {
852  break;
853  }
854  }
855  return StepSize;
856 }
857 
858 double TLogRegFit::Likelihood(const TFltV& NewTheta) {
859  TFltV OutV;
860  TLogRegPredict::GetCfy(X, OutV, NewTheta);
861  double L = 0;
862  for (int r = 0; r < OutV.Len(); r++) {
863  L += Y[r] * log(OutV[r]);
864  L += (1 - Y[r]) * log(1 - OutV[r]);
865  }
866  return L;
867 }
868 
870  TFltV OutV;
872  GradV.Gen(M);
873  for (int r = 0; r < X.Len(); r++) {
874  //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val);
875  for (int m = 0; m < M; m++) {
876  GradV[m] += (Y[r] - OutV[r]) * X[r][m];
877  }
878  }
879  //for (int m = 0; m < M; m++) { printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); }
880 }
881 
882 PLogRegPredict TLogRegFit::CalcLogRegNewton(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) {
883 
884  X = XPt;
885  Y = yPt;
886  IAssert(X.Len() == Y.Len());
887  if (Intercept == false) { // if intercept is not included, add it
888  for (int r = 0; r < X.Len(); r++) { X[r].Add(1); }
889  }
890  M = X[0].Len();
891  for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); }
892  for (int r = 0; r < Y.Len(); r++) {
893  if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
894  if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
895  }
896  Theta.Gen(M);
897  MLENewton(ChangeEps, MaxStep, PlotNm);
898  return new TLogRegPredict(Theta);
899 };
900 
901 PLogRegPredict TLogRegFit::CalcLogRegGradient(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) {
902  X = XPt;
903  Y = yPt;
904  IAssert(X.Len() == Y.Len());
905  if (Intercept == false) { // if intercept is not included, add it
906  for (int r = 0; r < X.Len(); r++) { X[r].Add(1); }
907  }
908  M = X[0].Len();
909  for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); }
910  for (int r = 0; r < Y.Len(); r++) {
911  if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
912  if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
913  }
914  Theta.Gen(M);
915  MLEGradient(ChangeEps, MaxStep, PlotNm);
916  return new TLogRegPredict(Theta);
917 };
918 
920 // Logistic-Regression-Model
921 
922 double TLogRegPredict::GetCfy(const TFltV& AttrV, const TFltV& NewTheta) {
923  int len = AttrV.Len();
924  double res = 0;
925  if (len < NewTheta.Len()) { res = NewTheta.Last(); } //if feature vector is shorter, add an intercept
926  for (int i = 0; i < len; i++) {
927  if (i < NewTheta.Len()) { res += AttrV[i] * NewTheta[i]; }
928  }
929  double mu = 1 / (1 + exp(-res));
930  return mu;
931 }
932 
933 void TLogRegPredict::GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta) {
934  OutV.Gen(X.Len());
935  for (int r = 0; r < X.Len(); r++) {
936  OutV[r] = GetCfy(X[r], NewTheta);
937  }
938 }
TVec< TFltV > X
Definition: agm.h:169
#define IAssert(Cond)
Definition: bd.h:262
Edge iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:121
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
static void GetNbhCom(const PUNGraph &Graph, const int NID, TIntSet &NBCmtyS)
Definition: agm.cpp:706
TIter EndI() const
Returns an iterator referring to the past-the-end element in the vector.
Definition: ds.h:595
void GetNewtonStep(TFltVV &HVV, const TFltV &GradV, TFltV &DeltaLV)
Definition: agm.cpp:718
static double Norm(const TFltV &x)
Definition: linalg.cpp:324
int Len() const
Definition: dt.h:487
static int TotalMemberships(const TVec< TIntV > &CmtyVV)
total number of memberships (== sum of the sizes of communities)
Definition: agm.cpp:315
void GetDatV(TVec< TDat > &DatV) const
Definition: hash.h:492
TEdgeI EndEI() const
Returns an iterator referring to the past-the-end edge in the graph.
Definition: graph.h:276
Definition: tm.h:355
static void RewireCmtyVV(const TVec< TIntV > &CmtyVVIn, TVec< TIntV > &CmtyVVOut, TRnd &Rnd)
rewire bipartite community affiliation graphs
Definition: agm.cpp:192
#define Kilo(n)
Definition: gbase.h:3
Definition: dt.h:11
static void GetNodeMembership(THash< TInt, TIntSet > &NIDComVH, const TVec< TIntV > &CmtyVV)
get hash table of
Definition: agm.cpp:335
Definition: ds.h:130
static void GetCfy(const TVec< TFltV > &X, TFltV &OutV, const TFltV &NewTheta)
Definition: agm.cpp:933
void SetPNoCom(const double &Epsilon)
Set Epsilon (the probability that two nodes sharing no communities connect) to the default value...
Definition: agmfit.h:57
TIter BegI() const
Definition: shash.h:1105
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:153
TIter BegI() const
Definition: hash.h:213
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:8
double Likelihood()
COMMENT.
Definition: agmfit.cpp:104
static void GenPLSeq(TIntV &SzSeq, const int &SeqLen, const double &Alpha, TRnd &Rnd, const int &Min, const int &Max)
AGMUtil:: Utilities for AGM.
Definition: agm.cpp:81
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:68
static int Intersection(const TIntV &C1, const TIntV &C2)
Definition: agm.cpp:399
void GetKeyV(TVec< TKey > &KeyV) const
Definition: shash.h:1347
Definition: ss.h:72
void Gen(const int &ExpectVals)
Definition: shash.h:1115
double Likelihood()
Definition: agm.h:183
TVal1 Val1
Definition: ds.h:132
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
Definition: ss.cpp:447
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
static void GetIntersection(const THashSet< TInt > &A, const THashSet< TInt > &B, THashSet< TInt > &C)
Definition: agm.cpp:404
TIter EndI() const
Definition: hash.h:218
Fitting the Affilialiton Graph Model (AGM).
Definition: agmfit.h:8
static double GetConductance(const PUNGraph &Graph, const TIntSet &CmtyS, const int Edges)
Definition: agm.cpp:683
int GetFlds() const
Returns the number of fields in the current line.
Definition: ss.h:116
static const double Mx
Definition: dt.h:1388
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
const char * GetFld(const int &FldN) const
Returns the contents of the field at index FldN.
Definition: ss.h:129
TFltV Y
Definition: agm.h:170
TSsFmt
Spread-Sheet Separator Format.
Definition: ss.h:5
TPair< TInt, TFlt > TIntFltPr
Definition: ds.h:87
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:90
const char * GetTmStr() const
Definition: tm.h:370
static void SaveGephi(const TStr &OutFNm, const PUNGraph &G, const TVec< TIntV > &CmtyVVAtr, const double MaxSz, const double MinSz)
Definition: agm.h:55
static void ConnectCmtyVV(TVec< TIntV > &CmtyVV, const TIntPrV &CIDSzPrV, const TIntPrV &NIDMemPrV, TRnd &Rnd)
Generate bipartite community affiliation from given power law coefficients for membership distributio...
Definition: agm.cpp:131
TVal2 Val2
Definition: ds.h:133
int GetOutDeg() const
Returns out-degree of the current node (returns same as value GetDeg() since the graph is undirected)...
Definition: graph.h:94
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
void GetCmtyVV(TVec< TIntV > &CmtyVV, const double QMax=2.0)
Get communities whose p_c is higher than 1 - QMax.
Definition: agmfit.cpp:554
int MLEGradAscentGivenCAG(const double &Thres=0.001, const int &MaxIter=10000, const TStr PlotNm=TStr())
Gradient descent for p_c while keeping the community affiliation graph (CAG) fixed.
Definition: agmfit.cpp:130
bool IsKey(const char *Key) const
Definition: hash.h:897
TFltV Theta
Definition: agm.h:171
void Hessian(TFltVV &HVV)
Definition: agm.cpp:746
PLogRegPredict CalcLogRegNewton(const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false)
Definition: agm.cpp:882
static void GenCmtyVVFromPL(TVec< TIntV > &CmtyVV, const PUNGraph &Graph, const int &Nodes, const int &Coms, const double &ComSzAlpha, const double &MemAlpha, const int &MinSz, const int &MaxSz, const int &MinK, const int &MaxK, TRnd &Rnd)
Generate bipartite community affiliation from given power law coefficients for membership distributio...
Definition: agm.cpp:101
static double Round(const double &Val)
Definition: xmath.h:16
Whitespace (space or tab) separated.
Definition: ss.h:11
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:241
int M
Definition: agm.h:172
void SetRegCoef(const double Val)
Definition: agmfit.h:45
TEdgeI BegEI() const
Returns an iterator referring to the first edge in the graph.
Definition: graph.h:274
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:172
int AddKey(const TKey &Key)
Definition: shash.h:1254
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
static void LoadCmtyVV(const TStr &InFNm, TVec< TIntV > &CmtyVV)
load bipartite community affiliation graph from text file (each row contains the member node IDs for ...
Definition: agm.cpp:246
static void SolveSymetricSystem(TFltVV &A, const TFltV &b, TFltV &x)
Definition: linalg.cpp:837
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
int MLEGradient(const double &ChangeEps, const int &MaxStep, const TStr PlotNm)
Definition: agm.cpp:797
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: graph.cpp:92
Definition: hash.h:781
static void SaveBipartiteGephi(const TStr &OutFNm, const TIntV &NIDV, const TVec< TIntV > &CmtyVV, const double MaxSz, const double MinSz, const TIntStrH &NIDNameH, const THash< TInt, TIntTr > &NIDColorH, const THash< TInt, TIntTr > &CIDColorH)
save bipartite community affiliation into gexf file
Definition: agm.cpp:486
TIter EndI() const
Definition: shash.h:1112
int Len() const
Definition: shash.h:1121
Definition: ds.h:32
double GetBinomialDev(const double &Prb, const int &Trials)
Definition: dt.cpp:154
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:106
void RunMCMC(const int &MaxIter, const int &EvalLambdaIter, const TStr &PlotFPrx=TStr())
Main procedure for fitting the AGM to a given graph using MCMC.
Definition: agmfit.cpp:366
Definition: dt.h:412
static int FindComsByAGM(const PUNGraph &Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom=0.0, const TStr PltFPrx=TStr())
estimate number of communities using AGM
Definition: agm.cpp:544
bool Empty() const
Definition: dt.h:488
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
void Pack()
Reduces vector capacity (frees memory) to match its size.
Definition: ds.h:1057
TSizeTy GetXDim() const
Definition: ds.h:2249
double GetStepSizeByLineSearch(const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta)
Definition: agm.cpp:837
void Gradient(TFltV &GradV)
Definition: agm.cpp:869
void Defrag(const bool &OnlyNodeLinks=false)
Defragments the graph.
Definition: graph.cpp:160
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:239
static void DumpCmtyVV(const TStr &OutFNm, const TVec< TIntV > &CmtyVV)
dump bipartite community affiliation into a text file
Definition: agm.cpp:286
TVal1 Val1
Definition: ds.h:34
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:111
TVal2 Val2
Definition: ds.h:35
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361
PLogRegPredict CalcLogRegGradient(const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false)
Definition: agm.cpp:901
int MLENewton(const double &ChangeEps, const int &MaxStep, const TStr PlotNm)
Definition: agm.cpp:770
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:235
bool Next()
Loads next line from the input file.
Definition: ss.cpp:412
static double DotProduct(const TFltV &x, const TFltV &y)
Definition: linalg.cpp:165
static void RndConnectInsideCommunity(PUNGraph &Graph, const TIntV &CmtyV, const double &Prob, TRnd &Rnd)
Connect members of a given community by Erdos-Renyi.
Definition: agm.cpp:10
TTriple< TInt, TInt, TInt > TIntTr
Definition: ds.h:171
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
static TVec< TFlt, TSizeTy > GetV(const TFlt &Val1)
Returns a vector on element Val1.
Definition: ds.h:848
char * CStr()
Definition: dt.h:476
bool IsKey(const TKey &Key) const
Definition: hash.h:258
bool IsInt(const int &FldN) const
Checks whether fields FldN is an integer.
Definition: ss.h:143
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:237
void DelLast()
Removes the last element of the vector.
Definition: ds.h:665
int Len() const
Definition: hash.h:228
static PUNGraph GenAGM(TVec< TIntV > &CmtyVV, const double &DensityCoef, const double &ScaleCoef, TRnd &Rnd=TInt::Rnd)
Definition: agm.cpp:37
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
void Gen(const TSizeTy &_XDim, const TSizeTy &_YDim)
Definition: ds.h:2246
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
TVal3 Val3
Definition: ds.h:134
double GetPowerDev(const double &AlphaSlope)
Definition: dt.h:47
int GetKeyId(const char *Key) const
Definition: hash.h:994
static void RewireCmtyNID(THash< TInt, TIntV > &CmtyVH, TRnd &Rnd)
rewire bipartite community affiliation graphs
Definition: agm.cpp:202
void Swap(TRec &Rec1, TRec &Rec2)
Definition: bd.h:568
static const double Mn
Definition: dt.h:1387
const TVal & At(const TSizeTy &X, const TSizeTy &Y) const
Definition: ds.h:2255