SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
agmdirected.cpp
Go to the documentation of this file.
1 
2 #include "stdafx.h"
3 #include "agmfast.h"
4 #include "agmdirected.h"
5 #include "Snap.h"
6 #include "agm.h"
7 
8 void TCoda::Save(TSOut& SOut) {
9  G->Save(SOut);
10  F.Save(SOut);
11  H.Save(SOut);
12  NIDV.Save(SOut);
13  RegCoef.Save(SOut);
14  SumFV.Save(SOut);
15  SumHV.Save(SOut);
16  NodesOk.Save(SOut);
17  MinVal.Save(SOut);
18  MaxVal.Save(SOut);
19  NegWgt.Save(SOut);
20  NumComs.Save(SOut);
21  HOVIDSV.Save(SOut);
22  PNoCom.Save(SOut);
23 }
24 
25 void TCoda::Load(TSIn& SIn, const int& RndSeed) {
26  G->Load(SIn);
27  F.Load(SIn);
28  H.Load(SIn);
29  NIDV.Load(SIn);
30  RegCoef.Load(SIn);
31  SumFV.Load(SIn);
32  SumHV.Load(SIn);
33  NodesOk.Load(SIn);
34  MinVal.Load(SIn);
35  MaxVal.Load(SIn);
36  NegWgt.Load(SIn);
37  NumComs.Load(SIn);
38  HOVIDSV.Load(SIn);
39  PNoCom.Load(SIn);
40  Rnd.PutSeed(RndSeed);
41 }
42 
43 void TCoda::RandomInit(const int InitComs) {
44  F.Gen(G->GetNodes());
45  H.Gen(G->GetNodes());
46  SumFV.Gen(InitComs);
47  SumHV.Gen(InitComs);
48  NumComs = InitComs;
49  for (int u = 0; u < F.Len(); u++) {
50  //assign to just one community
51  int Mem = G->GetNI(u).GetOutDeg();
52  if (Mem > 10) { Mem = 10; }
53  for (int c = 0; c < Mem; c++) {
54  int CID = Rnd.GetUniDevInt(InitComs);
55  AddComOut(u, CID, Rnd.GetUniDev());
56  }
57  }
58  for (int u = 0; u < H.Len(); u++) {
59  //assign to just one community
60  int Mem = G->GetNI(u).GetInDeg();
61  if (Mem > 10) { Mem = 10; }
62  for (int c = 0; c < Mem; c++) {
63  int CID = Rnd.GetUniDevInt(InitComs);
64  AddComIn(u, CID, Rnd.GetUniDev());
65  }
66  }
67  //assign a member to zero-member community (if any)
68  for (int c = 0; c < SumFV.Len(); c++) {
69  if (SumFV[c] == 0.0) {
70  int UID = Rnd.GetUniDevInt(G->GetNodes());
71  AddComOut(UID, c, Rnd.GetUniDev());
72  }
73  }
74  //assign a member to zero-member community (if any)
75  for (int c = 0; c < SumHV.Len(); c++) {
76  if (SumHV[c] == 0.0) {
77  int UID = Rnd.GetUniDevInt(G->GetNodes());
78  AddComIn(UID, c, Rnd.GetUniDev());
79  }
80  }
81 }
82 
83 void TCoda::NeighborComInit(const int InitComs) {
84  //initialize with best neighborhood communities (Gleich et.al. KDD'12)
85  TExeTm RunTm;
86  TFltIntPrV NIdPhiV(F.Len(), 0);
87  TAGMFastUtil::GetNIdPhiV<PNGraph>(G, NIdPhiV);
88  NeighborComInit(NIdPhiV, InitComs);
89 }
90 
91 void TCoda::NeighborComInit(TFltIntPrV& NIdPhiV, const int InitComs) {
92  NIdPhiV.Sort(true);
93  F.Gen(G->GetNodes());
94  H.Gen(G->GetNodes());
95  SumFV.Gen(InitComs);
96  SumHV.Gen(InitComs);
97  NumComs = InitComs;
98  //TIntFltH NCPhiH(F.Len());
99  TIntSet InvalidNIDS(F.Len());
100  TIntV ChosenNIDV(InitComs, 0); //FOR DEBUG
101  //choose nodes with local minimum in conductance
102  int CurCID = 0;
103  for (int ui = 0; ui < NIdPhiV.Len(); ui++) {
104  int UID = NIdPhiV[ui].Val2;
105  fflush(stdout);
106  if (InvalidNIDS.IsKey(UID)) { continue; }
107  ChosenNIDV.Add(UID); //FOR DEBUG
108  //add the node and its neighbors to the current community
109  TNGraph::TNodeI NI = G->GetNI(UID);
110  if (NI.GetOutDeg() > 0) { AddComOut(UID, CurCID, 1.0); }
111  if (NI.GetInDeg() > 0) { AddComIn(UID, CurCID, 1.0); }
112  fflush(stdout);
113  //add neighbors depending on whether they have incoming / outgoing edges from the center node (NI)
114  for (int e = 0; e < NI.GetDeg(); e++) {
115  int VID = NI.GetNbrNId(e);
116  TNGraph::TNodeI VI = G->GetNI(VID);
117  if (VI.GetOutDeg() > 0) { AddComOut(VID, CurCID, 1.0); }
118  if (VI.GetInDeg() > 0) { AddComIn(VID, CurCID, 1.0); }
119  }
120  //exclude its neighbors from the next considerations
121  for (int e = 0; e < NI.GetDeg(); e++) {
122  InvalidNIDS.AddKey(NI.GetNbrNId(e));
123  }
124  CurCID++;
125  fflush(stdout);
126  if (CurCID >= NumComs) { break; }
127  }
128  if (NumComs > CurCID) {
129  printf("%d communities needed to fill randomly\n", NumComs - CurCID);
130  }
131  //assign a member to zero-member community (if any)
132  for (int c = 0; c < SumFV.Len(); c++) {
133  if (SumFV[c] == 0.0) {
134  int ComSz = 10;
135  for (int u = 0; u < ComSz; u++) {
136  int UID = Rnd.GetUniDevInt(G->GetNodes());
137  AddComOut(UID, c, Rnd.GetUniDev());
138  }
139  }
140  }
141  //assign a member to zero-member community (if any)
142  for (int c = 0; c < SumHV.Len(); c++) {
143  if (SumHV[c] == 0.0) {
144  int ComSz = 10;
145  for (int u = 0; u < ComSz; u++) {
146  int UID = Rnd.GetUniDevInt(G->GetNodes());
147  AddComIn(UID, c, Rnd.GetUniDev());
148  }
149  }
150  }
151 }
152 
154  ScoreV.Gen(G->GetNodes() * G->GetNodes(), 0);
155  TIntV NIDV;
156  G->GetNIdV(NIDV);
157  TIntSet Cuv;
158  for (int u = 0; u < NIDV.Len(); u++) {
159  int UID = NIDV[u];
160  for (int v = 0; v < NIDV.Len(); v++) {
161  int VID = NIDV[v];
162  if (UID == VID) { continue; }
163  if (! G->IsEdge(UID, VID)) {
164  double Val = 1.0 - Prediction(UID, VID);
165  ScoreV.Add(TFltIntIntTr(Val, UID, VID));
166  }
167  }
168  }
169 }
170 
171 void TCoda::SetCmtyVV(const TVec<TIntV>& CmtyVVOut, const TVec<TIntV>& CmtyVVIn) {
172  IAssert(CmtyVVOut.Len() == CmtyVVIn.Len());
173  F.Gen(G->GetNodes());
174  H.Gen(G->GetNodes());
175  SumFV.Gen(CmtyVVOut.Len());
176  SumHV.Gen(CmtyVVIn.Len());
177  NumComs = CmtyVVOut.Len();
178  TIntH NIDIdxH(NIDV.Len());
179  if (! NodesOk) {
180  for (int u = 0; u < NIDV.Len(); u++) {
181  NIDIdxH.AddDat(NIDV[u], u);
182  }
183  }
184  for (int c = 0; c < CmtyVVOut.Len(); c++) {
185  for (int u = 0; u < CmtyVVOut[c].Len(); u++) {
186  int UID = CmtyVVOut[c][u];
187  if (! NodesOk) { UID = NIDIdxH.GetDat(UID); }
188  if (G->IsNode(UID)) {
189  AddComOut(UID, c, 1.0);
190  }
191  }
192  }
193  for (int c = 0; c < CmtyVVIn.Len(); c++) {
194  for (int u = 0; u < CmtyVVIn[c].Len(); u++) {
195  int UID = CmtyVVIn[c][u];
196  if (! NodesOk) { UID = NIDIdxH.GetDat(UID); }
197  if (G->IsNode(UID)) {
198  AddComIn(UID, c, 1.0);
199  }
200  }
201  }
202 }
203 
204 void TCoda::SetGraph(const PNGraph& GraphPt) {
205  G = GraphPt;
206  HOVIDSV.Gen(G->GetNodes());
207  NodesOk = true;
208  GraphPt->GetNIdV(NIDV);
209  // check that nodes IDs are {0,1,..,Nodes-1}
210  for (int nid = 0; nid < GraphPt->GetNodes(); nid++) {
211  if (! GraphPt->IsNode(nid)) {
212  NodesOk = false;
213  break;
214  }
215  }
216  if (! NodesOk) {
217  printf("rearrage nodes\n");
218  G = TSnap::GetSubGraph(GraphPt, NIDV, true);
219  for (int nid = 0; nid < G->GetNodes(); nid++) {
220  IAssert(G->IsNode(nid));
221  }
222  }
224 
225  PNoCom = 1.0 / (double) G->GetNodes();
226  DoParallel = false;
227  if (1.0 / PNoCom > sqrt(TFlt::Mx)) { PNoCom = 0.99 / sqrt(TFlt::Mx); } // to prevent overflow
228  NegWgt = 1.0;
229 }
230 
231 double TCoda::Likelihood(const bool _DoParallel) {
232  TExeTm ExeTm;
233  double L = 0.0;
234  if (_DoParallel) {
235  #pragma omp parallel for
236  for (int u = 0; u < F.Len(); u++) {
237  double LU = LikelihoodForNode(true, u);
238  #pragma omp atomic
239  L += LU;
240  }
241  }
242  else {
243  for (int u = 0; u < F.Len(); u++) {
244  double LU = LikelihoodForNode(true, u);
245  L += LU;
246  }
247  }
248  return L;
249 }
250 
251 double TCoda::LikelihoodForNode(const bool IsRow, const int UID) {
252  if (IsRow) {
253  return LikelihoodForNode(IsRow, UID, F[UID]);
254  } else {
255  return LikelihoodForNode(IsRow, UID, H[UID]);
256  }
257 }
258 
259 double TCoda::LikelihoodForNode(const bool IsRow, const int UID, const TIntFltH& FU) {
260  double L = 0.0;
261  TFltV HOSumHV; //adjust for Hv of v hold out
262  if (HOVIDSV[UID].Len() > 0) {
263  HOSumHV.Gen(NumComs);
264  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
265  for (int c = 0; c < SumHV.Len(); c++) {
266  HOSumHV[c] += GetCom(! IsRow, HOVIDSV[UID][e], c);
267  }
268  }
269  }
270  TNGraph::TNodeI NI = G->GetNI(UID);
271  const int Deg = IsRow ? NI.GetOutDeg(): NI.GetInDeg();
272  for (int e = 0; e < Deg; e++) {
273  const int v = IsRow ? NI.GetOutNId(e): NI.GetInNId(e);
274  if (v == UID) { continue; }
275  if (HOVIDSV[UID].IsKey(v)) { continue; }
276  if (IsRow) {
277  L += log (1.0 - Prediction(FU, H[v])) + NegWgt * DotProduct(FU, H[v]);
278  } else {
279  L += log (1.0 - Prediction(F[v], FU)) + NegWgt * DotProduct(F[v], FU);
280  }
281  }
282  for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
283  double HOSum = HOVIDSV[UID].Len() > 0? HOSumHV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
284  L -= NegWgt * (GetSumVal(! IsRow, HI.GetKey()) - HOSum - GetCom(! IsRow, UID, HI.GetKey())) * HI.GetDat();
285  }
286  //add regularization
287  if (RegCoef > 0.0) { //L1
288  L -= RegCoef * Sum(FU);
289  }
290  if (RegCoef < 0.0) { //L2
291  L += RegCoef * Norm2(FU);
292  }
293  return L;
294 }
295 
296 
297 /*
298 double TCoda::LikelihoodForRow(const int UID) {
299  return LikelihoodForRow(UID, F[UID]);
300 }
301 
302 
303 double TCoda::LikelihoodForRow(const int UID, const TIntFltH& FU) {
304  double L = 0.0;
305  TFltV HOSumHV; //adjust for Hv of v hold out
306  if (HOVIDSV[UID].Len() > 0) {
307  HOSumHV.Gen(SumFV.Len());
308 
309  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
310  for (int c = 0; c < SumHV.Len(); c++) {
311  HOSumHV[c] += GetComIn(HOVIDSV[UID][e], c);
312  }
313  }
314  }
315  TNGraph::TNodeI NI = G->GetNI(UID);
316  for (int e = 0; e < NI.GetOutDeg(); e++) {
317  int v = NI.GetOutNId(e);
318  if (v == UID) { continue; }
319  if (HOVIDSV[UID].IsKey(v)) { continue; }
320  double LU = log (1.0 - Prediction(FU, H[v])) + NegWgt * DotProduct(FU, H[v]);
321  L += LU;
322  }
323  for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
324  double HOSum = HOVIDSV[UID].Len() > 0? HOSumHV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
325  double LU = NegWgt * (SumHV[HI.GetKey()] - HOSum - GetComIn(UID, HI.GetKey())) * HI.GetDat();
326  L -= LU;
327  }
328  //add regularization
329  if (RegCoef > 0.0) { //L1
330  L -= RegCoef * Sum(FU);
331  }
332  if (RegCoef < 0.0) { //L2
333  L += RegCoef * Norm2(FU);
334  }
335 
336  return L;
337 }
338 
339 double TCoda::LikelihoodForCol(const int VID) {
340  return LikelihoodForCol(VID, H[VID]);
341 }
342 
343 
344 double TCoda::LikelihoodForCol(const int VID, const TIntFltH& HV) {
345  double L = 0.0;
346  TFltV HOSumFV; //adjust for Fv of v hold out
347  if (HOVIDSV[VID].Len() > 0) {
348  HOSumFV.Gen(SumFV.Len());
349  for (int e = 0; e < HOVIDSV[VID].Len(); e++) {
350  for (int c = 0; c < SumFV.Len(); c++) {
351  HOSumFV[c] += GetComOut(HOVIDSV[VID][e], c);
352  }
353  }
354  }
355  TNGraph::TNodeI NI = G->GetNI(VID);
356  for (int e = 0; e < NI.GetInDeg(); e++) {
357  int v = NI.GetInNId(e);
358  if (v == VID) { continue; }
359  if (HOVIDSV[VID].IsKey(v)) { continue; }
360  L += log (1.0 - Prediction(F[v], HV)) + NegWgt * DotProduct(F[v], HV);
361  }
362  for (TIntFltH::TIter HI = HV.BegI(); HI < HV.EndI(); HI++) {
363  double HOSum = HOVIDSV[VID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
364  L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetComOut(VID, HI.GetKey())) * HI.GetDat();
365  }
366  //add regularization
367  if (RegCoef > 0.0) { //L1
368  L -= RegCoef * Sum(HV);
369  }
370  if (RegCoef < 0.0) { //L2
371  L += RegCoef * Norm2(HV);
372  }
373 
374  return L;
375 }
376 */
377 void TCoda::GradientForNode(const bool IsRow, const int UID, TIntFltH& GradU, const TIntSet& CIDSet) {
378  GradU.Gen(CIDSet.Len());
379 
380  TFltV HOSumHV; //adjust for Hv of v hold out
381  if (HOVIDSV[UID].Len() > 0) {
382  HOSumHV.Gen(NumComs);
383  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
384  for (int c = 0; c < SumHV.Len(); c++) {
385  HOSumHV[c] += GetCom(! IsRow, HOVIDSV[UID][e], c);
386  }
387  }
388  }
389 
390  TNGraph::TNodeI NI = G->GetNI(UID);
391  int Deg = IsRow ? NI.GetOutDeg(): NI.GetInDeg();
392  TFltV PredV(Deg), GradV(CIDSet.Len());
393  TIntV CIDV(CIDSet.Len());
394  for (int e = 0; e < Deg; e++) {
395  int VID = IsRow? NI.GetOutNId(e): NI.GetInNId(e);
396  if (VID == UID) { continue; }
397  if (HOVIDSV[UID].IsKey(VID)) { continue; }
398  PredV[e] = IsRow? Prediction(UID, VID): Prediction(VID, UID);
399  }
400 
401  for (int c = 0; c < CIDSet.Len(); c++) {
402  int CID = CIDSet.GetKey(c);
403  double Val = 0.0;
404  for (int e = 0; e < Deg; e++) {
405  int VID = IsRow? NI.GetOutNId(e): NI.GetInNId(e);
406  if (VID == UID) { continue; }
407  if (HOVIDSV[UID].IsKey(VID)) { continue; }
408  Val += PredV[e] * GetCom(! IsRow, VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(! IsRow, VID, CID);
409  }
410  double HOSum = HOVIDSV[UID].Len() > 0? HOSumHV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
411  Val -= NegWgt * (GetSumVal(! IsRow, CID) - HOSum - GetCom(! IsRow, UID, CID));
412  CIDV[c] = CID;
413  GradV[c] = Val;
414  }
415  //add regularization
416  if (RegCoef > 0.0) { //L1
417  for (int c = 0; c < GradV.Len(); c++) {
418  GradV[c] -= RegCoef;
419  }
420  }
421  if (RegCoef < 0.0) { //L2
422  for (int c = 0; c < GradV.Len(); c++) {
423  GradV[c] += 2 * RegCoef * GetCom(IsRow, UID, CIDV[c]);
424  }
425  }
426  for (int c = 0; c < GradV.Len(); c++) {
427  if (GetCom(IsRow, UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
428  if (fabs(GradV[c]) < 0.0001) { continue; }
429  GradU.AddDat(CIDV[c], GradV[c]);
430  }
431  for (int c = 0; c < GradU.Len(); c++) {
432  if (GradU[c] >= 10) { GradU[c] = 10; }
433  if (GradU[c] <= -10) { GradU[c] = -10; }
434  IAssert(GradU[c] >= -10);
435  }
436 }
437 
438 /*
439 void TCoda::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) {
440  GradU.Gen(CIDSet.Len());
441 
442  TFltV HOSumHV; //adjust for Hv of v hold out
443  if (HOVIDSV[UID].Len() > 0) {
444  HOSumHV.Gen(SumHV.Len());
445 
446  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
447  for (int c = 0; c < SumHV.Len(); c++) {
448  HOSumHV[c] += GetComIn(HOVIDSV[UID][e], c);
449  }
450  }
451  }
452 
453  TNGraph::TNodeI NI = G->GetNI(UID);
454  int Deg = NI.GetOutDeg();
455  TFltV PredV(Deg), GradV(CIDSet.Len());
456  TIntV CIDV(CIDSet.Len());
457  for (int e = 0; e < Deg; e++) {
458  if (NI.GetOutNId(e) == UID) { continue; }
459  if (HOVIDSV[UID].IsKey(NI.GetOutNId(e))) { continue; }
460  PredV[e] = Prediction(UID, NI.GetOutNId(e));
461  }
462 
463  for (int c = 0; c < CIDSet.Len(); c++) {
464  int CID = CIDSet.GetKey(c);
465  double Val = 0.0;
466  for (int e = 0; e < Deg; e++) {
467  int VID = NI.GetNbrNId(e);
468  if (VID == UID) { continue; }
469  if (HOVIDSV[UID].IsKey(VID)) { continue; }
470  Val += PredV[e] * GetComIn(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetComIn(VID, CID);
471  }
472  double HOSum = HOVIDSV[UID].Len() > 0? HOSumHV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
473  Val -= NegWgt * (SumHV[CID] - HOSum - GetComIn(UID, CID));
474  CIDV[c] = CID;
475  GradV[c] = Val;
476  }
477  //add regularization
478  if (RegCoef > 0.0) { //L1
479  for (int c = 0; c < GradV.Len(); c++) {
480  GradV[c] -= RegCoef;
481  }
482  }
483  if (RegCoef < 0.0) { //L2
484  for (int c = 0; c < GradV.Len(); c++) {
485  GradV[c] += 2 * RegCoef * GetComOut(UID, CIDV[c]);
486  }
487  }
488  for (int c = 0; c < GradV.Len(); c++) {
489  if (GetComOut(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
490  if (fabs(GradV[c]) < 0.0001) { continue; }
491  GradU.AddDat(CIDV[c], GradV[c]);
492  }
493  for (int c = 0; c < GradU.Len(); c++) {
494  if (GradU[c] >= 10) { GradU[c] = 10; }
495  if (GradU[c] <= -10) { GradU[c] = -10; }
496  IAssert(GradU[c] >= -10);
497  }
498 }
499 */
500 
501 void TCoda::GetCmtyVV(const bool IsOut, TVec<TIntV>& CmtyVV) {
502  GetCmtyVV(IsOut, CmtyVV, sqrt(1.0 / G->GetNodes()), 3);
503 }
504 
506 void TCoda::GetCommunity(TIntV& CmtyVIn, TIntV& CmtyVOut, const int CID, const double Thres) {
507  TIntFltH NIDFucH(F.Len() / 10), NIDHucH(F.Len() / 10);
508  for (int u = 0; u < NIDV.Len(); u++) {
509  int NID = u;
510  if (! NodesOk) { NID = NIDV[u]; }
511  if (GetCom(true, u, CID) >= Thres) { NIDFucH.AddDat(NID, GetCom(true, u, CID)); }
512  if (GetCom(false, u, CID) >= Thres) { NIDHucH.AddDat(NID, GetCom(false, u, CID)); }
513  }
514  NIDFucH.SortByDat(false);
515  NIDHucH.SortByDat(false);
516  NIDFucH.GetKeyV(CmtyVOut);
517  NIDHucH.GetKeyV(CmtyVIn);
518 }
519 
520 void TCoda::GetTopCIDs(TIntV& CIdV, const int TopK, const int IsAverage, const int MinSz) {
521  TIntFltH CIdFHH;
522  for (int c = 0; c < GetNumComs(); c++) {
523  if (IsAverage == 1) {
524  TIntV CmtyVIn, CmtyVOut;
525  GetCommunity(CmtyVIn, CmtyVOut, c);
526  if (CmtyVIn.Len() == 0 || CmtyVOut.Len() == 0) { continue; }
527  if (CmtyVIn.Len() < MinSz || CmtyVOut.Len() < MinSz) { continue; }
528  CIdFHH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c) / (double) CmtyVIn.Len() / (double) CmtyVOut.Len());
529  } else {
530  CIdFHH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c));
531  }
532  }
533  CIdFHH.SortByDat(false);
534  CIdFHH.GetKeyV(CIdV);
535  if (TopK < CIdFHH.Len()) { CIdV.Trunc(TopK); }
536 }
537 
539 void TCoda::GetCmtyVV(const bool IsOut, TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) {
540  CmtyVV.Gen(NumComs, 0);
541  TIntFltH CIDSumFH(NumComs);
542  for (int c = 0; c < NumComs; c++) {
543  CIDSumFH.AddDat(c, GetSumVal(IsOut, c));
544  }
545  CIDSumFH.SortByDat(false);
546  for (int c = 0; c < NumComs; c++) {
547  int CID = CIDSumFH.GetKey(c);
548  TIntFltH NIDFucH, NIDHucH, NIDInOutH;
549  TIntV CmtyV;
550  GetNIDValH(NIDInOutH, NIDFucH, NIDHucH, CID, Thres);
551  if (IsOut) {
552  NIDFucH.GetKeyV(CmtyV);
553  } else {
554  NIDHucH.GetKeyV(CmtyV);
555  }
556  if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
557  }
558  if ( NumComs != CmtyVV.Len()) {
559  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
560  }
561 }
562 
563 void TCoda::GetCmtyVVUnSorted(const bool IsOut, TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) {
564  CmtyVV.Gen(NumComs, 0);
565  for (int c = 0; c < NumComs; c++) {
566  TIntV CmtyV((int) (GetSumVal(IsOut, c) * 10), 0);
567  for (int u = 0; u < G->GetNodes(); u++) {
568  if (GetCom(IsOut, u, c) > Thres) { CmtyV.Add(NIDV[u]); }
569  }
570  if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
571  }
572  if ( NumComs != CmtyVV.Len()) {
573  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
574  }
575 }
576 
578  PNGraph NewG = TNGraph::New(G->GetNodes(), -1);
579  for (TNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
580  //add node
581  int NIdx = NI.GetId();
582  int NID = NIDV[NIdx];
583  if (! NewG->IsNode(NID)) { NewG->AddNode(NID); }
584  //add edge
585  for (int e = 0; e < NI.GetOutDeg(); e++) {
586  int OutNID = NIDV[NI.GetOutNId(e)];
587  if (! NewG->IsNode(OutNID)) { NewG->AddNode(OutNID); }
588  NewG->AddEdge(NID, OutNID);
589  }
590  }
591  IAssert(G->GetNodes() == NewG->GetNodes());
592  IAssert(G->GetEdges() == NewG->GetEdges());
593  return NewG;
594 }
595 
596 void TCoda::GetNIDValH(TIntFltH& NIdValInOutH, TIntFltH& NIdValOutH, TIntFltH& NIdValInH, const int CID, const double Thres) {
597  NIdValOutH.Gen((int) GetSumVal(true, CID) + 1);
598  NIdValInH.Gen((int) GetSumVal(false, CID) + 1);
599  NIdValInOutH.Gen((int) GetSumVal(false, CID) + 1);
600  if (GetSumVal(true, CID) < Thres && GetSumVal(false, CID) < Thres) { return; }
601  for (int u = 0; u < NIDV.Len(); u++) {
602  if (GetCom(true, u, CID) >= Thres && GetCom(false, u, CID) >= Thres) {
603  NIdValInOutH.AddDat(NIDV[u], GetCom(true, u, CID) + GetCom(false, u, CID));
604  }
605  if (GetCom(true, u, CID) >= Thres) {
606  NIdValOutH.AddDat(NIDV[u], GetCom(true, u, CID));
607  }
608  if (GetCom(false, u, CID) >= Thres) {
609  NIdValInH.AddDat(NIDV[u], GetCom(false, u, CID));
610  }
611  }
612  NIdValInH.SortByDat(false);
613  NIdValOutH.SortByDat(false);
614  NIdValInOutH.SortByDat(false);
615 }
616 
617 void TCoda::DumpMemberships(const TStr& OutFNm, const TStrHash<TInt>& NodeNameH, const double Thres) {
618  if (NodeNameH.Len() > 0) { IAssert(NodeNameH.Len() == G->GetNodes()); }
619  FILE* FId = fopen(OutFNm.CStr(), "wt");
620  TIntFltH CIDSumFH(NumComs);
621  for (int c = 0; c < NumComs; c++) {
622  CIDSumFH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c));
623  }
624  CIDSumFH.SortByDat(false);
625  for (int c = 0; c < NumComs; c++) {
626  int CID = CIDSumFH.GetKey(c);
627  TIntFltH NIDOutFH, NIDInFH, NIDInOutFH;
628  GetNIDValH(NIDInOutFH, NIDOutFH, NIDInFH, CID, Thres);
629  if (NIDOutFH.Len() == 0 || NIDInFH.Len() == 0) { continue; }
630 
631  /*
632  if (GetSumVal(true, CID) < Thres && GetSumVal(false, CID) < Thres) { continue; }
633  for (int u = 0; u < NIDV.Len(); u++) {
634  if (GetCom(true, u, CID) >= Thres && GetCom(false, u, CID) >= Thres) {
635  NIDInOutFH.AddDat(u, GetCom(true, u, CID) + GetCom(false, u, CID));
636  } else if (GetCom(true, u, CID) >= Thres) {
637  NIDOutFH.AddDat(u, GetCom(true, u, CID));
638  } else if (GetCom(false, u, CID) >= Thres) {
639  NIDInFH.AddDat(u, GetCom(false, u, CID));
640  }
641  }
642  NIDInOutFH.SortByDat(false);
643  NIDInFH.SortByDat(false);
644  NIDOutFH.SortByDat(false);
645  */
646  fprintf(FId, "%d\t%d\t%d\t%f\t%f\t%f\t", NIDInOutFH.Len(), NIDInFH.Len() - NIDInOutFH.Len(), NIDOutFH.Len() - NIDInOutFH.Len(), CIDSumFH.GetDat(CID).Val, GetSumVal(false, CID).Val, GetSumVal(true, CID).Val);
647  fprintf(FId, "InOut:\t");
648  for (int u = 0; u < NIDInOutFH.Len(); u++) {
649  int NIdx = NIDInOutFH.GetKey(u);
650  fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDInOutFH[u].Val);
651  }
652  fprintf(FId, "In:\t");
653  for (int u = 0; u < NIDInFH.Len(); u++) {
654  int NIdx = NIDInFH.GetKey(u);
655  fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDInFH[u].Val);
656  }
657  fprintf(FId, "Out:\t");
658  for (int u = 0; u < NIDOutFH.Len(); u++) {
659  int NIdx = NIDOutFH.GetKey(u);
660  fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDOutFH[u].Val);
661  }
662  fprintf(FId, "\n");
663  }
664  fclose(FId);
665 }
666 
667 
668 void TCoda::DumpMemberships(const TStr& OutFNm, const double Thres) {
669  TStrHash<TInt> NodeNameH(G->GetNodes(), false);
670  for (int u = 0; u < NIDV.Len(); u++) { NodeNameH.AddKey(TStr::Fmt("%d", NIDV[u].Val)); }
671  DumpMemberships(OutFNm, NodeNameH, Thres);
672 }
673 
674 void TCoda::GetCmtyS(TIntSet& CmtySOut, TIntSet& CmtySIn, const int CID, const double Thres) {
675  CmtySOut.Gen(G->GetNodes() / 10);
676  CmtySIn.Gen(G->GetNodes() / 10);
677  for (int u = 0; u < NIDV.Len(); u++) {
678  if (GetCom(true, u, CID) > Thres) {
679  //CmtySOut.AddKey(
680  }
681  }
682 }
683 
684 /*
685 void TCoda::GetCmtyVVIn(TVec<TIntV>& CmtyVV) {
686  GetCmtyVVIn(CmtyVV, sqrt(1.0 / G->GetNodes()), 3);
687 }
688 
690 void TCoda::GetCmtyVVIn(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) {
691  CmtyVV.Gen(NumComs, 0);
692  TIntFltH CIDSumHH(NumComs);
693  for (int c = 0; c < SumHV.Len(); c++) {
694  CIDSumHH.AddDat(c, SumHV[c]);
695  }
696  CIDSumHH.SortByDat(false);
697  for (int c = 0; c < NumComs; c++) {
698  int CID = CIDSumHH.GetKey(c);
699  TIntFltH NIDHucH(H.Len() / 10);
700  TIntV CmtyV;
701  IAssert(SumHV[CID] == CIDSumHH.GetDat(CID));
702  if (SumHV[CID] < Thres) { continue; }
703  for (int u = 0; u < H.Len(); u++) {
704  int NID = u;
705  if (! NodesOk) { NID = NIDV[u]; }
706  if (GetComIn(u, CID) >= Thres) { NIDHucH.AddDat(NID, GetComIn(u, CID)); }
707  }
708  NIDHucH.SortByDat(false);
709  NIDHucH.GetKeyV(CmtyV);
710  if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
711  }
712  if ( NumComs != CmtyVV.Len()) {
713  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
714  }
715 }
716 */
717 void TCoda::GetCmtyVV(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn, const int MinSz) {
718  GetCmtyVV(false, CmtyVVIn, sqrt(1.0 / G->GetNodes()), MinSz);
719  GetCmtyVV(true, CmtyVVOut, sqrt(1.0 / G->GetNodes()), MinSz);
720 }
721 
722 void TCoda::GetCmtyVVUnSorted(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn) {
723  GetCmtyVVUnSorted(false, CmtyVVIn, sqrt(1.0 / G->GetNodes()));
724  GetCmtyVVUnSorted(true, CmtyVVOut, sqrt(1.0 / G->GetNodes()));
725 }
726 
727 void TCoda::GetCmtyVV(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn, const double ThresOut, const double ThresIn, const int MinSz) {
728  GetCmtyVV(false, CmtyVVIn, ThresIn, MinSz);
729  GetCmtyVV(true, CmtyVVOut, ThresOut, MinSz);
730 }
731 
733 int TCoda::FindComsByCV(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const int EdgesForCV, const double StepAlpha, const double StepBeta) {
734  double ComsGap = exp(TMath::Log((double) MaxComs / (double) MinComs) / (double) DivComs);
735  TIntV ComsV;
736  ComsV.Add(MinComs);
737  while (ComsV.Len() < DivComs) {
738  int NewComs = int(ComsV.Last() * ComsGap);
739  if (NewComs == ComsV.Last().Val) { NewComs++; }
740  ComsV.Add(NewComs);
741  }
742  if (ComsV.Last() < MaxComs) { ComsV.Add(MaxComs); }
743  return FindComsByCV(ComsV, 0.1, NumThreads, OutFNm, EdgesForCV, StepAlpha, StepBeta);
744 }
745 
746 int TCoda::FindComsByCV(TIntV& ComsV, const double HOFrac, const int NumThreads, const TStr PlotLFNm, const int EdgesForCV, const double StepAlpha, const double StepBeta) {
747  if (ComsV.Len() == 0) {
748  int MaxComs = G->GetNodes() / 5;
749  ComsV.Add(2);
750  while(ComsV.Last() < MaxComs) { ComsV.Add(ComsV.Last() * 2); }
751  }
752  int MaxIterCV = 3;
753 
754  TVec<TVec<TIntSet> > HoldOutSets(MaxIterCV);
755  TFltIntPrV NIdPhiV;
756  TAGMFastUtil::GetNIdPhiV<PNGraph>(G, NIdPhiV);
757 
758  if (G->GetEdges() > EdgesForCV) { //if edges are many enough, use CV
759  printf("generating hold out set\n");
760  TIntV NIdV1, NIdV2;
761  G->GetNIdV(NIdV1);
762  G->GetNIdV(NIdV2);
763  for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
764  // generate holdout sets
765  TAGMFastUtil::GenHoldOutPairs(G, HoldOutSets[IterCV], HOFrac, Rnd);
766  /*
767  HoldOutSets[IterCV].Gen(G->GetNodes());
768  const int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
769  int HOCnt = 0;
770  int HOEdges = (int) TMath::Round(HOFrac * G->GetEdges());
771  printf("holding out %d edges...\n", HOEdges);
772  for (int he = 0; he < (int) HOEdges; he++) {
773  HoldOutSets[IterCV][EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
774  HoldOutSets[IterCV][EdgeV[he].Val2].AddKey(EdgeV[he].Val1);
775  HOCnt++;
776  }
777  printf("%d Edges hold out\n", HOCnt);
778  while(HOCnt++ < HOTotal) {
779  int SrcNID = Rnd.GetUniDevInt(G->GetNodes());
780  int DstNID = Rnd.GetUniDevInt(G->GetNodes());
781  HoldOutSets[IterCV][SrcNID].AddKey(DstNID);
782  HoldOutSets[IterCV][DstNID].AddKey(SrcNID);
783  }
784  */
785  }
786 
787  printf("hold out set generated\n");
788  }
789 
790  TFltV HOLV(ComsV.Len());
791  TIntFltPrV ComsLV;
792  for (int c = 0; c < ComsV.Len(); c++) {
793  const int Coms = ComsV[c];
794  printf("Try number of Coms:%d\n", Coms);
795 
796  if (G->GetEdges() > EdgesForCV) { //if edges are many enough, use CV
797  for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
798  HOVIDSV = HoldOutSets[IterCV];
799  NeighborComInit(NIdPhiV, Coms);
800  printf("Initialized\n");
801 
802  if (NumThreads == 1) {
803  printf("MLE without parallelization begins\n");
804  MLEGradAscent(0.05, 10 * G->GetNodes(), "", StepAlpha, StepBeta);
805  } else {
806  printf("MLE with parallelization begins\n");
807  MLEGradAscentParallel(0.05, 100, NumThreads, "", StepAlpha, StepBeta);
808  }
809  double HOL = LikelihoodHoldOut();
810  HOL = HOL < 0? HOL: TFlt::Mn;
811  HOLV[c] += HOL;
812  }
813  }
814  else {
815  HOVIDSV.Gen(G->GetNodes());
816  MLEGradAscent(0.0001, 100 * G->GetNodes(), "");
817  double BIC = 2 * Likelihood() - (double) G->GetNodes() * Coms * 2.0 * log ( (double) G->GetNodes());
818  HOLV[c] = BIC;
819  }
820  }
821  int EstComs = 2;
822  double MaxL = TFlt::Mn;
823  printf("\n");
824  for (int c = 0; c < ComsV.Len(); c++) {
825  ComsLV.Add(TIntFltPr(ComsV[c].Val, HOLV[c].Val));
826  printf("%d(%f)\t", ComsV[c].Val, HOLV[c].Val);
827  if (MaxL < HOLV[c]) {
828  MaxL = HOLV[c];
829  EstComs = ComsV[c];
830  }
831  }
832  printf("\n");
833  RandomInit(EstComs);
834  HOVIDSV.Gen(G->GetNodes());
835  if (! PlotLFNm.Empty()) {
836  TGnuPlot::PlotValV(ComsLV, PlotLFNm, "hold-out likelihood", "communities", "likelihood");
837  }
838  return EstComs;
839 }
840 
841 double TCoda::LikelihoodHoldOut(const bool DoParallel) {
842  double L = 0.0;
843  for (int u = 0; u < HOVIDSV.Len(); u++) {
844  for (int e = 0; e < HOVIDSV[u].Len(); e++) {
845  int VID = HOVIDSV[u][e];
846  if (VID == u) { continue; }
847  double Pred = Prediction(u, VID);
848  if (G->IsEdge(u, VID)) {
849  L += log(1.0 - Pred);
850  }
851  else {
852  L += NegWgt * log(Pred);
853  }
854  }
855  }
856  return L;
857 }
858 
859 double TCoda::GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter) {
860  double StepSize = 1.0;
861  double InitLikelihood = LikelihoodForNode(IsRow, UID);
862  TIntFltH NewVarV(DeltaV.Len());
863  for(int iter = 0; iter < MaxIter; iter++) {
864  for (int i = 0; i < DeltaV.Len(); i++){
865  int CID = DeltaV.GetKey(i);
866  double NewVal;
867  NewVal = GetCom(IsRow, UID, CID) + StepSize * DeltaV.GetDat(CID);
868  if (NewVal < MinVal) { NewVal = MinVal; }
869  if (NewVal > MaxVal) { NewVal = MaxVal; }
870  NewVarV.AddDat(CID, NewVal);
871  }
872  if (LikelihoodForNode(IsRow, UID, NewVarV) < InitLikelihood + Alpha * StepSize * DotProduct(GradV, DeltaV)) {
873  StepSize *= Beta;
874  } else {
875  break;
876  }
877  if (iter == MaxIter - 1) {
878  StepSize = 0.0;
879  break;
880  }
881  }
882  return StepSize;
883 }
884 
885 int TCoda::MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
886  time_t InitTime = time(NULL);
887  TExeTm ExeTm, CheckTm;
888  int iter = 0, PrevIter = 0;
889  TIntFltPrV IterLV;
890  TNGraph::TNodeI UI;
891  double PrevL = TFlt::Mn, CurL = 0.0;
892  TIntV NIdxV(F.Len(), 0);
893  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
894  IAssert(NIdxV.Len() == F.Len());
895  TIntFltH GradV;
896  while(iter < MaxIter) {
897  NIdxV.Shuffle(Rnd);
898  for (int ui = 0; ui < F.Len(); ui++, iter++) {
899  const bool IsRow = (ui % 2 == 0);
900  int u = NIdxV[ui]; //
901  //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
902  UI = G->GetNI(u);
903  const int Deg = IsRow? UI.GetOutDeg(): UI.GetInDeg();
904  TIntSet CIDSet(5 * Deg);
905  for (int e = 0; e < Deg; e++) {
906  int VID = IsRow? UI.GetOutNId(e): UI.GetInNId(e);
907  if (HOVIDSV[u].IsKey(VID)) { continue; }
908  TIntFltH NbhCIDH = IsRow? H[VID]: F[VID];
909  for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
910  CIDSet.AddKey(CI.GetKey());
911  //printf("CI.GetKey:%d\n", CI.GetKey());
912  IAssert(CI.GetKey() <= NumComs);
913  }
914  }
915  TIntFltH& CurMem = IsRow? F[u]: H[u];
916  for (TIntFltH::TIter CI = CurMem.BegI(); CI < CurMem.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
917  if (! CIDSet.IsKey(CI.GetKey())) {
918  DelCom(IsRow, u, CI.GetKey());
919  }
920  }
921  if (CIDSet.Empty()) { continue; }
922  GradientForNode(IsRow, u, GradV, CIDSet);
923  if (Norm2(GradV) < 1e-4) { continue; }
924  double LearnRate = GetStepSizeByLineSearch(IsRow, u, GradV, GradV, StepAlpha, StepBeta);
925  if (LearnRate == 0.0) { continue; }
926  for (int ci = 0; ci < GradV.Len(); ci++) {
927  int CID = GradV.GetKey(ci);
928  double Change = LearnRate * GradV.GetDat(CID);
929  double NewFuc = GetCom(IsRow, u, CID) + Change;
930  if (NewFuc <= 0.0) {
931  DelCom(IsRow, u, CID);
932  } else {
933  AddCom(IsRow, u, CID, NewFuc);
934  }
935  }
936  if (! PlotNm.Empty() && (iter + 1) % G->GetNodes() == 0) {
937  IterLV.Add(TIntFltPr(iter, Likelihood(false)));
938  }
939  }
940  printf("\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime);
941  fflush(stdout);
942  if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) {
943  PrevIter = iter;
944  CurL = Likelihood();
945  if (PrevL > TFlt::Mn && ! PlotNm.Empty()) {
946  printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL);
947  }
948  fflush(stdout);
949  if (CurL - PrevL <= Thres * fabs(PrevL)) { break; }
950  else { PrevL = CurL; }
951  }
952 
953  }
954  printf("\n");
955  printf("MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr());
956  if (! PlotNm.Empty()) {
957  TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
958  }
959  return iter;
960 }
961 
962 int TCoda::MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
963  //parallel
964  time_t InitTime = time(NULL);
965  //uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs();
966  TExeTm ExeTm, CheckTm;
967  double PrevL = Likelihood(true);
968  TIntFltPrV IterLV;
969  int PrevIter = 0;
970  int iter = 0;
971  TIntV NIdxV(F.Len(), 0);
972  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
973  TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization
974  NIDOPTV.PutAll(0);
975  TVec<TIntFltH> NewF(ChunkNum * ChunkSize);
976  TIntV NewNIDV(ChunkNum * ChunkSize);
977  TBoolV IsRowV(ChunkNum * ChunkSize);
978  for (iter = 0; iter < MaxIter; iter++) {
979  NIdxV.Clr(false);
980  for (int i = 0; i < F.Len(); i++) {
981  //if (NIDOPTV[i] == 0) { NIdxV.Add(i); }
982  NIdxV.Add(i);
983  }
984  IAssert (NIdxV.Len() <= F.Len());
985  NIdxV.Shuffle(Rnd);
986  // compute gradient for chunk of nodes
987 #pragma omp parallel for schedule(static, 1)
988  for (int TIdx = 0; TIdx < ChunkNum; TIdx++) {
989  TIntFltH GradV;
990  for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) {
991  const bool IsRow = (ui % 2 == 0);
992  NewNIDV[ui] = -1;
993  if (ui > NIdxV.Len()) { continue; }
994  const int u = NIdxV[ui]; //
995  //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
996  TNGraph::TNodeI UI = G->GetNI(u);
997  const int Deg = IsRow? UI.GetOutDeg(): UI.GetInDeg();
998  TIntSet CIDSet(5 * Deg);
999  TIntFltH CurFU = IsRow? F[u]: H[u];
1000  for (int e = 0; e < Deg; e++) {
1001  int VID = IsRow? UI.GetOutNId(e): UI.GetInNId(e);
1002  if (HOVIDSV[u].IsKey(VID)) { continue; }
1003  TIntFltH& NbhCIDH = IsRow? H[VID]: F[VID];
1004  for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
1005  CIDSet.AddKey(CI.GetKey());
1006  }
1007  }
1008  if (CIDSet.Empty()) {
1009  CurFU.Clr();
1010  }
1011  else {
1012  for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
1013  if (! CIDSet.IsKey(CI.GetKey())) {
1014  CurFU.DelIfKey(CI.GetKey());
1015  }
1016  }
1017  GradientForNode(IsRow, u, GradV, CIDSet);
1018  if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; }
1019  double LearnRate = GetStepSizeByLineSearch(IsRow, u, GradV, GradV, StepAlpha, StepBeta);
1020  if (LearnRate == 0.0) { NewNIDV[ui] = -2; continue; }
1021  for (int ci = 0; ci < GradV.Len(); ci++) {
1022  int CID = GradV.GetKey(ci);
1023  double Change = LearnRate * GradV.GetDat(CID);
1024  double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change;
1025  if (NewFuc <= 0.0) {
1026  CurFU.DelIfKey(CID);
1027  } else {
1028  CurFU.AddDat(CID) = NewFuc;
1029  }
1030  }
1031  CurFU.Defrag();
1032  }
1033  //store changes
1034  NewF[ui] = CurFU;
1035  NewNIDV[ui] = u;
1036  IsRowV[ui] = IsRow;
1037  }
1038  }
1039  int NumNoChangeGrad = 0;
1040  int NumNoChangeStepSize = 0;
1041  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1042  int NewNID = NewNIDV[ui];
1043  if (NewNID == -1) { NumNoChangeGrad++; continue; }
1044  if (NewNID == -2) { NumNoChangeStepSize++; continue; }
1045  if (IsRowV[ui]) {
1046  for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
1047  SumFV[CI.GetKey()] -= CI.GetDat();
1048  }
1049  } else {
1050  for (TIntFltH::TIter CI = H[NewNID].BegI(); CI < H[NewNID].EndI(); CI++) {
1051  SumHV[CI.GetKey()] -= CI.GetDat();
1052  }
1053  }
1054  }
1055 #pragma omp parallel for
1056  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1057  int NewNID = NewNIDV[ui];
1058  if (NewNID < 0) { continue; }
1059  if (IsRowV[ui]) {
1060  F[NewNID] = NewF[ui];
1061  } else {
1062  H[NewNID] = NewF[ui];
1063  }
1064  }
1065  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1066  int NewNID = NewNIDV[ui];
1067  if (NewNID < 0) { continue; }
1068  if (IsRowV[ui]) {
1069  for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
1070  SumFV[CI.GetKey()] += CI.GetDat();
1071  }
1072  } else {
1073  for (TIntFltH::TIter CI = H[NewNID].BegI(); CI < H[NewNID].EndI(); CI++) {
1074  SumHV[CI.GetKey()] += CI.GetDat();
1075  }
1076  }
1077  }
1078  // update the nodes who are optimal
1079  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1080  int NewNID = NewNIDV[ui];
1081  if (NewNID < 0) { continue; }
1082  TNGraph::TNodeI UI = G->GetNI(NewNID);
1083  NIDOPTV[NewNID] = 0;
1084  for (int e = 0; e < UI.GetDeg(); e++) {
1085  NIDOPTV[UI.GetNbrNId(e)] = 0;
1086  }
1087  }
1088  int OPTCnt = 0;
1089  for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } }
1090  /*
1091  if (! PlotNm.Empty()) {
1092  printf("\r%d iterations [%s] %lu secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), TSecTm::GetCurTm().GetAbsSecs() - StartTm);
1093  if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); }
1094  fflush(stdout);
1095  }
1096  */
1097  if ((iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) {
1098  PrevIter = iter;
1099  double CurL = Likelihood(true);
1100  IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL));
1101  printf("\r%d iterations, Likelihood: %f, Diff: %f [%lu secs]", iter, CurL, CurL - PrevL, time(NULL) - InitTime);
1102  fflush(stdout);
1103  if (CurL - PrevL <= Thres * fabs(PrevL)) {
1104  break;
1105  }
1106  else {
1107  PrevL = CurL;
1108  }
1109  }
1110  }
1111  if (! PlotNm.Empty()) {
1112  printf("\nMLE completed with %d iterations(%lu secs)\n", iter, time(NULL) - InitTime);
1113  TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
1114  } else {
1115  printf("\rMLE completed with %d iterations(%lu secs)\n", iter, time(NULL) - InitTime);
1116  fflush(stdout);
1117  }
1118  return iter;
1119 }
#define IAssert(Cond)
Definition: bd.h:262
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:420
PNGraph GetGraphRawNID()
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:548
double LikelihoodForNode(const bool IsRow, const int UID)
void AddComIn(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:128
TFlt NegWgt
Definition: agmdirected.h:23
Definition: tm.h:355
int Val
Definition: dt.h:1139
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:481
TFlt MinVal
Definition: agmdirected.h:21
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:552
void SetGraph(const PNGraph &GraphPt)
void Save(TSOut &SOut) const
Definition: dt.h:1153
void GetNIDValH(TIntFltH &NIdValInOutH, TIntFltH &NIdValOutH, TIntFltH &NIdValInH, const int CID, const double Thres)
double Likelihood(const bool DoParallel=false)
void NeighborComInit(const int InitComs)
Definition: agmdirected.cpp:83
TVec< TIntFltH > H
Definition: agmdirected.h:11
TIter BegI() const
Definition: hash.h:213
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:313
void Load(TSIn &SIn)
Definition: dt.h:994
double Val
Definition: dt.h:1388
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
int Len() const
Definition: hash.h:842
void Gen(const int &ExpectVals)
Definition: shash.h:1115
void SetCmtyVV(const TVec< TIntV > &CmtyVVOut, const TVec< TIntV > &CmtyVVIn)
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:503
TFlt & GetSumVal(const bool IsOut, const int CID)
Definition: agmdirected.h:86
void Save(TSOut &SOut) const
Definition: dt.h:995
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:236
void GetCmtyS(TIntSet &CmtySOut, TIntSet &CmtySIn, const int CID, const double Thres)
int FindComsByCV(TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const int EdgesForCV=100, const double StepAlpha=0.3, const double StepBeta=0.1)
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
TIter EndI() const
Definition: hash.h:218
void GetTopCIDs(TIntV &CIdV, const int TopK, const int IsAverage=1, const int MinSz=1)
void Load(TSIn &SIn)
Definition: ds.h:946
TBool DoParallel
Definition: agmdirected.h:25
TFlt RegCoef
Definition: agmdirected.h:14
static const double Mx
Definition: dt.h:1391
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
const TKey & GetKey(const int &KeyId) const
Definition: shash.h:1141
void Defrag()
Definition: hash.h:555
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmdirected.h:154
Definition: fl.h:58
void Save(TSOut &SOut) const
Definition: ds.h:954
void GetCmtyVVUnSorted(const bool IsOut, TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
TPair< TInt, TFlt > TIntFltPr
Definition: ds.h:87
const char * GetTmStr() const
Definition: tm.h:370
bool Empty() const
Definition: shash.h:1120
TIntV NIDV
Definition: agmdirected.h:13
double GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
void AddComOut(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:121
void GetCmtyVV(TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn, const int MinSz=3)
int GetNumComs()
Definition: agmdirected.h:36
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
double LikelihoodHoldOut(const bool DoParallel=false)
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
void Gen(const int &ExpectVals)
Definition: hash.h:222
TFltV SumFV
Definition: agmdirected.h:15
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node SrcNId to node DstNId to the graph.
Definition: graph.cpp:321
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1229
static PNGraph Load(TSIn &SIn)
Static constructor that loads the graph from a stream SIn and returns a pointer to it...
Definition: graph.h:487
void Save(TSOut &SOut)
Definition: agmdirected.cpp:8
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the graph.
Definition: graph.cpp:363
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:838
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
const char * GetKey(const int &KeyId) const
Definition: hash.h:893
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
void Load(TSIn &SIn)
Definition: dt.h:1152
TVec< TIntFltH > F
Definition: agmdirected.h:10
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:546
int AddKey(const TKey &Key)
Definition: shash.h:1254
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
void Load(TSIn &SIn, const int &RndSeed=0)
Definition: agmdirected.cpp:25
double Prediction(const TIntFltH &FU, const TIntFltH &HV)
Definition: agmdirected.h:174
TFlt PNoCom
Definition: agmdirected.h:24
PNGraph G
Definition: agmdirected.h:9
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
Definition: graph.h:402
Definition: fl.h:128
TInt NumComs
Definition: agmdirected.h:18
TBool NodesOk
Definition: agmdirected.h:17
int AddKey(const char *Key)
Definition: hash.h:968
void DelCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:135
void Save(TSOut &SOut) const
Saves the graph to a (binary) stream SOut.
Definition: graph.h:479
Definition: hash.h:781
void GetNonEdgePairScores(TFltIntIntTrV &ScoreV)
void RandomInit(const int InitComs)
Definition: agmdirected.cpp:43
int Len() const
Definition: shash.h:1121
double Norm2(const TIntFltH &UV)
Definition: agmdirected.h:189
void PutSeed(const int &_Seed)
Definition: dt.cpp:18
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:550
void GetKeyV(TVec< TKey > &KeyV) const
Definition: hash.h:484
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:406
Definition: dt.h:412
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TFltV SumHV
Definition: agmdirected.h:16
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:383
void Load(TSIn &SIn)
Definition: dt.h:1405
void DumpMemberships(const TStr &OutFNm, const TStrHash< TInt > &NodeNameH)
Definition: agmdirected.h:45
double GetUniDev()
Definition: dt.h:30
bool DelIfKey(const TKey &Key)
Definition: hash.h:243
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmfast.h:159
void AddCom(const bool IsOut, const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:114
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
static double Log(const double &Val)
Definition: xmath.h:14
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
void GetCommunity(TIntV &CmtyVIn, TIntV &CmtyVOut, const int CID)
Definition: agmdirected.h:50
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:404
TRnd Rnd
Definition: agmdirected.h:12
char * CStr()
Definition: dt.h:479
bool IsKey(const TKey &Key) const
Definition: hash.h:258
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:412
TFlt MaxVal
Definition: agmdirected.h:22
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
void DelSelfEdges(const PGraph &Graph)
Removes all the self-edges from the graph.
Definition: alg.h:419
void Trunc(const TSizeTy &_Vals=-1)
Truncates the vector's length and capacity to _Vals elements.
Definition: ds.h:1033
double Sum(const TIntFltH &UV)
Definition: agmdirected.h:182
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93
int Len() const
Definition: hash.h:228
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
void GradientForNode(const bool IsRow, const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:416
void Save(TSOut &SOut) const
Definition: dt.h:1402
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
TTriple< TFlt, TInt, TInt > TFltIntIntTr
Definition: ds.h:182
static const double Mn
Definition: dt.h:1390
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
void SortByDat(const bool &Asc=true)
Definition: hash.h:292