SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
agmdirected.cpp
Go to the documentation of this file.
1 
2 #include "stdafx.h"
3 #include "agmfast.h"
4 #include "agmdirected.h"
5 #include "Snap.h"
6 #include "agm.h"
7 
8 void TCoda::Save(TSOut& SOut) {
9  G->Save(SOut);
10  F.Save(SOut);
11  H.Save(SOut);
12  NIDV.Save(SOut);
13  RegCoef.Save(SOut);
14  SumFV.Save(SOut);
15  SumHV.Save(SOut);
16  NodesOk.Save(SOut);
17  MinVal.Save(SOut);
18  MaxVal.Save(SOut);
19  NegWgt.Save(SOut);
20  NumComs.Save(SOut);
21  HOVIDSV.Save(SOut);
22  PNoCom.Save(SOut);
23 }
24 
25 void TCoda::Load(TSIn& SIn, const int& RndSeed) {
26  G->Load(SIn);
27  F.Load(SIn);
28  H.Load(SIn);
29  NIDV.Load(SIn);
30  RegCoef.Load(SIn);
31  SumFV.Load(SIn);
32  SumHV.Load(SIn);
33  NodesOk.Load(SIn);
34  MinVal.Load(SIn);
35  MaxVal.Load(SIn);
36  NegWgt.Load(SIn);
37  NumComs.Load(SIn);
38  HOVIDSV.Load(SIn);
39  PNoCom.Load(SIn);
40  Rnd.PutSeed(RndSeed);
41 }
42 
43 void TCoda::RandomInit(const int InitComs) {
44  F.Gen(G->GetNodes());
45  H.Gen(G->GetNodes());
46  SumFV.Gen(InitComs);
47  SumHV.Gen(InitComs);
48  NumComs = InitComs;
49  for (int u = 0; u < F.Len(); u++) {
50  //assign to just one community
51  int Mem = G->GetNI(u).GetOutDeg();
52  if (Mem > 10) { Mem = 10; }
53  for (int c = 0; c < Mem; c++) {
54  int CID = Rnd.GetUniDevInt(InitComs);
55  AddComOut(u, CID, Rnd.GetUniDev());
56  }
57  }
58  for (int u = 0; u < H.Len(); u++) {
59  //assign to just one community
60  int Mem = G->GetNI(u).GetInDeg();
61  if (Mem > 10) { Mem = 10; }
62  for (int c = 0; c < Mem; c++) {
63  int CID = Rnd.GetUniDevInt(InitComs);
64  AddComIn(u, CID, Rnd.GetUniDev());
65  }
66  }
67  //assign a member to zero-member community (if any)
68  for (int c = 0; c < SumFV.Len(); c++) {
69  if (SumFV[c] == 0.0) {
70  int UID = Rnd.GetUniDevInt(G->GetNodes());
71  AddComOut(UID, c, Rnd.GetUniDev());
72  }
73  }
74  //assign a member to zero-member community (if any)
75  for (int c = 0; c < SumHV.Len(); c++) {
76  if (SumHV[c] == 0.0) {
77  int UID = Rnd.GetUniDevInt(G->GetNodes());
78  AddComIn(UID, c, Rnd.GetUniDev());
79  }
80  }
81 }
82 
83 void TCoda::NeighborComInit(const int InitComs) {
84  //initialize with best neighborhood communities (Gleich et.al. KDD'12)
85  TExeTm RunTm;
86  TFltIntPrV NIdPhiV(F.Len(), 0);
87  TAGMFastUtil::GetNIdPhiV<PNGraph>(G, NIdPhiV);
88  NeighborComInit(NIdPhiV, InitComs);
89 }
90 
91 void TCoda::NeighborComInit(TFltIntPrV& NIdPhiV, const int InitComs) {
92  NIdPhiV.Sort(true);
93  F.Gen(G->GetNodes());
94  H.Gen(G->GetNodes());
95  SumFV.Gen(InitComs);
96  SumHV.Gen(InitComs);
97  NumComs = InitComs;
98  //TIntFltH NCPhiH(F.Len());
99  TIntSet InvalidNIDS(F.Len());
100  TIntV ChosenNIDV(InitComs, 0); //FOR DEBUG
101  //choose nodes with local minimum in conductance
102  int CurCID = 0;
103  for (int ui = 0; ui < NIdPhiV.Len(); ui++) {
104  int UID = NIdPhiV[ui].Val2;
105  fflush(stdout);
106  if (InvalidNIDS.IsKey(UID)) { continue; }
107  ChosenNIDV.Add(UID); //FOR DEBUG
108  //add the node and its neighbors to the current community
109  TNGraph::TNodeI NI = G->GetNI(UID);
110  if (NI.GetOutDeg() > 0) { AddComOut(UID, CurCID, 1.0); }
111  if (NI.GetInDeg() > 0) { AddComIn(UID, CurCID, 1.0); }
112  fflush(stdout);
113  //add neighbors depending on whether they have incoming / outgoing edges from the center node (NI)
114  for (int e = 0; e < NI.GetDeg(); e++) {
115  int VID = NI.GetNbrNId(e);
116  TNGraph::TNodeI VI = G->GetNI(VID);
117  if (VI.GetOutDeg() > 0) { AddComOut(VID, CurCID, 1.0); }
118  if (VI.GetInDeg() > 0) { AddComIn(VID, CurCID, 1.0); }
119  }
120  //exclude its neighbors from the next considerations
121  for (int e = 0; e < NI.GetDeg(); e++) {
122  InvalidNIDS.AddKey(NI.GetNbrNId(e));
123  }
124  CurCID++;
125  fflush(stdout);
126  if (CurCID >= NumComs) { break; }
127  }
128  if (NumComs > CurCID) {
129  printf("%d communities needed to fill randomly\n", NumComs - CurCID);
130  }
131  //assign a member to zero-member community (if any)
132  for (int c = 0; c < SumFV.Len(); c++) {
133  if (SumFV[c] == 0.0) {
134  int ComSz = 10;
135  for (int u = 0; u < ComSz; u++) {
136  int UID = Rnd.GetUniDevInt(G->GetNodes());
137  AddComOut(UID, c, Rnd.GetUniDev());
138  }
139  }
140  }
141  //assign a member to zero-member community (if any)
142  for (int c = 0; c < SumHV.Len(); c++) {
143  if (SumHV[c] == 0.0) {
144  int ComSz = 10;
145  for (int u = 0; u < ComSz; u++) {
146  int UID = Rnd.GetUniDevInt(G->GetNodes());
147  AddComIn(UID, c, Rnd.GetUniDev());
148  }
149  }
150  }
151 }
152 
154  ScoreV.Gen(G->GetNodes() * G->GetNodes(), 0);
155  TIntV NIDV;
156  G->GetNIdV(NIDV);
157  TIntSet Cuv;
158  for (int u = 0; u < NIDV.Len(); u++) {
159  int UID = NIDV[u];
160  for (int v = 0; v < NIDV.Len(); v++) {
161  int VID = NIDV[v];
162  if (UID == VID) { continue; }
163  if (! G->IsEdge(UID, VID)) {
164  double Val = 1.0 - Prediction(UID, VID);
165  ScoreV.Add(TFltIntIntTr(Val, UID, VID));
166  }
167  }
168  }
169 }
170 
171 void TCoda::SetCmtyVV(const TVec<TIntV>& CmtyVVOut, const TVec<TIntV>& CmtyVVIn) {
172  IAssert(CmtyVVOut.Len() == CmtyVVIn.Len());
173  F.Gen(G->GetNodes());
174  H.Gen(G->GetNodes());
175  SumFV.Gen(CmtyVVOut.Len());
176  SumHV.Gen(CmtyVVIn.Len());
177  NumComs = CmtyVVOut.Len();
178  TIntH NIDIdxH(NIDV.Len());
179  if (! NodesOk) {
180  for (int u = 0; u < NIDV.Len(); u++) {
181  NIDIdxH.AddDat(NIDV[u], u);
182  }
183  }
184  for (int c = 0; c < CmtyVVOut.Len(); c++) {
185  for (int u = 0; u < CmtyVVOut[c].Len(); u++) {
186  int UID = CmtyVVOut[c][u];
187  if (! NodesOk) { UID = NIDIdxH.GetDat(UID); }
188  if (G->IsNode(UID)) {
189  AddComOut(UID, c, 1.0);
190  }
191  }
192  }
193  for (int c = 0; c < CmtyVVIn.Len(); c++) {
194  for (int u = 0; u < CmtyVVIn[c].Len(); u++) {
195  int UID = CmtyVVIn[c][u];
196  if (! NodesOk) { UID = NIDIdxH.GetDat(UID); }
197  if (G->IsNode(UID)) {
198  AddComIn(UID, c, 1.0);
199  }
200  }
201  }
202 }
203 
204 void TCoda::SetGraph(const PNGraph& GraphPt) {
205  G = GraphPt;
206  HOVIDSV.Gen(G->GetNodes());
207  NodesOk = true;
208  GraphPt->GetNIdV(NIDV);
209  // check that nodes IDs are {0,1,..,Nodes-1}
210  for (int nid = 0; nid < GraphPt->GetNodes(); nid++) {
211  if (! GraphPt->IsNode(nid)) {
212  NodesOk = false;
213  break;
214  }
215  }
216  if (! NodesOk) {
217  printf("rearrage nodes\n");
218  G = TSnap::GetSubGraph(GraphPt, NIDV, true);
219  for (int nid = 0; nid < G->GetNodes(); nid++) {
220  IAssert(G->IsNode(nid));
221  }
222  }
224 
225  PNoCom = 1.0 / (double) G->GetNodes();
226  DoParallel = false;
227  if (1.0 / PNoCom > sqrt(TFlt::Mx)) { PNoCom = 0.99 / sqrt(TFlt::Mx); } // to prevent overflow
228  NegWgt = 1.0;
229 }
230 
231 double TCoda::Likelihood(const bool _DoParallel) {
232  TExeTm ExeTm;
233  double L = 0.0;
234  if (_DoParallel) {
235  #pragma omp parallel for
236  for (int u = 0; u < F.Len(); u++) {
237  double LU = LikelihoodForNode(true, u);
238  #pragma omp atomic
239  L += LU;
240  }
241  }
242  else {
243  for (int u = 0; u < F.Len(); u++) {
244  double LU = LikelihoodForNode(true, u);
245  L += LU;
246  }
247  }
248  return L;
249 }
250 
251 double TCoda::LikelihoodForNode(const bool IsRow, const int UID) {
252  if (IsRow) {
253  return LikelihoodForNode(IsRow, UID, F[UID]);
254  } else {
255  return LikelihoodForNode(IsRow, UID, H[UID]);
256  }
257 }
258 
259 double TCoda::LikelihoodForNode(const bool IsRow, const int UID, const TIntFltH& FU) {
260  double L = 0.0;
261  TFltV HOSumHV; //adjust for Hv of v hold out
262  if (HOVIDSV[UID].Len() > 0) {
263  HOSumHV.Gen(NumComs);
264  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
265  for (int c = 0; c < SumHV.Len(); c++) {
266  HOSumHV[c] += GetCom(! IsRow, HOVIDSV[UID][e], c);
267  }
268  }
269  }
270  TNGraph::TNodeI NI = G->GetNI(UID);
271  const int Deg = IsRow ? NI.GetOutDeg(): NI.GetInDeg();
272  for (int e = 0; e < Deg; e++) {
273  const int v = IsRow ? NI.GetOutNId(e): NI.GetInNId(e);
274  if (v == UID) { continue; }
275  if (HOVIDSV[UID].IsKey(v)) { continue; }
276  if (IsRow) {
277  L += log (1.0 - Prediction(FU, H[v])) + NegWgt * DotProduct(FU, H[v]);
278  } else {
279  L += log (1.0 - Prediction(F[v], FU)) + NegWgt * DotProduct(F[v], FU);
280  }
281  }
282  for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
283  double HOSum = HOVIDSV[UID].Len() > 0? HOSumHV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
284  L -= NegWgt * (GetSumVal(! IsRow, HI.GetKey()) - HOSum - GetCom(! IsRow, UID, HI.GetKey())) * HI.GetDat();
285  }
286  //add regularization
287  if (RegCoef > 0.0) { //L1
288  L -= RegCoef * Sum(FU);
289  }
290  if (RegCoef < 0.0) { //L2
291  L += RegCoef * Norm2(FU);
292  }
293  return L;
294 }
295 
296 
297 /*
298 double TCoda::LikelihoodForRow(const int UID) {
299  return LikelihoodForRow(UID, F[UID]);
300 }
301 
302 
303 double TCoda::LikelihoodForRow(const int UID, const TIntFltH& FU) {
304  double L = 0.0;
305  TFltV HOSumHV; //adjust for Hv of v hold out
306  if (HOVIDSV[UID].Len() > 0) {
307  HOSumHV.Gen(SumFV.Len());
308 
309  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
310  for (int c = 0; c < SumHV.Len(); c++) {
311  HOSumHV[c] += GetComIn(HOVIDSV[UID][e], c);
312  }
313  }
314  }
315  TNGraph::TNodeI NI = G->GetNI(UID);
316  for (int e = 0; e < NI.GetOutDeg(); e++) {
317  int v = NI.GetOutNId(e);
318  if (v == UID) { continue; }
319  if (HOVIDSV[UID].IsKey(v)) { continue; }
320  double LU = log (1.0 - Prediction(FU, H[v])) + NegWgt * DotProduct(FU, H[v]);
321  L += LU;
322  }
323  for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
324  double HOSum = HOVIDSV[UID].Len() > 0? HOSumHV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
325  double LU = NegWgt * (SumHV[HI.GetKey()] - HOSum - GetComIn(UID, HI.GetKey())) * HI.GetDat();
326  L -= LU;
327  }
328  //add regularization
329  if (RegCoef > 0.0) { //L1
330  L -= RegCoef * Sum(FU);
331  }
332  if (RegCoef < 0.0) { //L2
333  L += RegCoef * Norm2(FU);
334  }
335 
336  return L;
337 }
338 
339 double TCoda::LikelihoodForCol(const int VID) {
340  return LikelihoodForCol(VID, H[VID]);
341 }
342 
343 
344 double TCoda::LikelihoodForCol(const int VID, const TIntFltH& HV) {
345  double L = 0.0;
346  TFltV HOSumFV; //adjust for Fv of v hold out
347  if (HOVIDSV[VID].Len() > 0) {
348  HOSumFV.Gen(SumFV.Len());
349  for (int e = 0; e < HOVIDSV[VID].Len(); e++) {
350  for (int c = 0; c < SumFV.Len(); c++) {
351  HOSumFV[c] += GetComOut(HOVIDSV[VID][e], c);
352  }
353  }
354  }
355  TNGraph::TNodeI NI = G->GetNI(VID);
356  for (int e = 0; e < NI.GetInDeg(); e++) {
357  int v = NI.GetInNId(e);
358  if (v == VID) { continue; }
359  if (HOVIDSV[VID].IsKey(v)) { continue; }
360  L += log (1.0 - Prediction(F[v], HV)) + NegWgt * DotProduct(F[v], HV);
361  }
362  for (TIntFltH::TIter HI = HV.BegI(); HI < HV.EndI(); HI++) {
363  double HOSum = HOVIDSV[VID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
364  L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetComOut(VID, HI.GetKey())) * HI.GetDat();
365  }
366  //add regularization
367  if (RegCoef > 0.0) { //L1
368  L -= RegCoef * Sum(HV);
369  }
370  if (RegCoef < 0.0) { //L2
371  L += RegCoef * Norm2(HV);
372  }
373 
374  return L;
375 }
376 */
377 void TCoda::GradientForNode(const bool IsRow, const int UID, TIntFltH& GradU, const TIntSet& CIDSet) {
378  GradU.Gen(CIDSet.Len());
379 
380  TFltV HOSumHV; //adjust for Hv of v hold out
381  if (HOVIDSV[UID].Len() > 0) {
382  HOSumHV.Gen(NumComs);
383  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
384  for (int c = 0; c < SumHV.Len(); c++) {
385  HOSumHV[c] += GetCom(! IsRow, HOVIDSV[UID][e], c);
386  }
387  }
388  }
389 
390  TNGraph::TNodeI NI = G->GetNI(UID);
391  int Deg = IsRow ? NI.GetOutDeg(): NI.GetInDeg();
392  TFltV PredV(Deg), GradV(CIDSet.Len());
393  TIntV CIDV(CIDSet.Len());
394  for (int e = 0; e < Deg; e++) {
395  int VID = IsRow? NI.GetOutNId(e): NI.GetInNId(e);
396  if (VID == UID) { continue; }
397  if (HOVIDSV[UID].IsKey(VID)) { continue; }
398  PredV[e] = IsRow? Prediction(UID, VID): Prediction(VID, UID);
399  }
400 
401  for (int c = 0; c < CIDSet.Len(); c++) {
402  int CID = CIDSet.GetKey(c);
403  double Val = 0.0;
404  for (int e = 0; e < Deg; e++) {
405  int VID = IsRow? NI.GetOutNId(e): NI.GetInNId(e);
406  if (VID == UID) { continue; }
407  if (HOVIDSV[UID].IsKey(VID)) { continue; }
408  Val += PredV[e] * GetCom(! IsRow, VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(! IsRow, VID, CID);
409  }
410  double HOSum = HOVIDSV[UID].Len() > 0? HOSumHV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
411  Val -= NegWgt * (GetSumVal(! IsRow, CID) - HOSum - GetCom(! IsRow, UID, CID));
412  CIDV[c] = CID;
413  GradV[c] = Val;
414  }
415  //add regularization
416  if (RegCoef > 0.0) { //L1
417  for (int c = 0; c < GradV.Len(); c++) {
418  GradV[c] -= RegCoef;
419  }
420  }
421  if (RegCoef < 0.0) { //L2
422  for (int c = 0; c < GradV.Len(); c++) {
423  GradV[c] += 2 * RegCoef * GetCom(IsRow, UID, CIDV[c]);
424  }
425  }
426  for (int c = 0; c < GradV.Len(); c++) {
427  if (GetCom(IsRow, UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
428  if (fabs(GradV[c]) < 0.0001) { continue; }
429  GradU.AddDat(CIDV[c], GradV[c]);
430  }
431  for (int c = 0; c < GradU.Len(); c++) {
432  if (GradU[c] >= 10) { GradU[c] = 10; }
433  if (GradU[c] <= -10) { GradU[c] = -10; }
434  IAssert(GradU[c] >= -10);
435  }
436 }
437 
438 /*
439 void TCoda::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) {
440  GradU.Gen(CIDSet.Len());
441 
442  TFltV HOSumHV; //adjust for Hv of v hold out
443  if (HOVIDSV[UID].Len() > 0) {
444  HOSumHV.Gen(SumHV.Len());
445 
446  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
447  for (int c = 0; c < SumHV.Len(); c++) {
448  HOSumHV[c] += GetComIn(HOVIDSV[UID][e], c);
449  }
450  }
451  }
452 
453  TNGraph::TNodeI NI = G->GetNI(UID);
454  int Deg = NI.GetOutDeg();
455  TFltV PredV(Deg), GradV(CIDSet.Len());
456  TIntV CIDV(CIDSet.Len());
457  for (int e = 0; e < Deg; e++) {
458  if (NI.GetOutNId(e) == UID) { continue; }
459  if (HOVIDSV[UID].IsKey(NI.GetOutNId(e))) { continue; }
460  PredV[e] = Prediction(UID, NI.GetOutNId(e));
461  }
462 
463  for (int c = 0; c < CIDSet.Len(); c++) {
464  int CID = CIDSet.GetKey(c);
465  double Val = 0.0;
466  for (int e = 0; e < Deg; e++) {
467  int VID = NI.GetNbrNId(e);
468  if (VID == UID) { continue; }
469  if (HOVIDSV[UID].IsKey(VID)) { continue; }
470  Val += PredV[e] * GetComIn(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetComIn(VID, CID);
471  }
472  double HOSum = HOVIDSV[UID].Len() > 0? HOSumHV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
473  Val -= NegWgt * (SumHV[CID] - HOSum - GetComIn(UID, CID));
474  CIDV[c] = CID;
475  GradV[c] = Val;
476  }
477  //add regularization
478  if (RegCoef > 0.0) { //L1
479  for (int c = 0; c < GradV.Len(); c++) {
480  GradV[c] -= RegCoef;
481  }
482  }
483  if (RegCoef < 0.0) { //L2
484  for (int c = 0; c < GradV.Len(); c++) {
485  GradV[c] += 2 * RegCoef * GetComOut(UID, CIDV[c]);
486  }
487  }
488  for (int c = 0; c < GradV.Len(); c++) {
489  if (GetComOut(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
490  if (fabs(GradV[c]) < 0.0001) { continue; }
491  GradU.AddDat(CIDV[c], GradV[c]);
492  }
493  for (int c = 0; c < GradU.Len(); c++) {
494  if (GradU[c] >= 10) { GradU[c] = 10; }
495  if (GradU[c] <= -10) { GradU[c] = -10; }
496  IAssert(GradU[c] >= -10);
497  }
498 }
499 */
500 
501 void TCoda::GetCmtyVV(const bool IsOut, TVec<TIntV>& CmtyVV) {
502  GetCmtyVV(IsOut, CmtyVV, sqrt(1.0 / G->GetNodes()), 3);
503 }
504 
506 void TCoda::GetCommunity(TIntV& CmtyVIn, TIntV& CmtyVOut, const int CID, const double Thres) {
507  TIntFltH NIDFucH(F.Len() / 10), NIDHucH(F.Len() / 10);
508  for (int u = 0; u < NIDV.Len(); u++) {
509  int NID = u;
510  if (! NodesOk) { NID = NIDV[u]; }
511  if (GetCom(true, u, CID) >= Thres) { NIDFucH.AddDat(NID, GetCom(true, u, CID)); }
512  if (GetCom(false, u, CID) >= Thres) { NIDHucH.AddDat(NID, GetCom(false, u, CID)); }
513  }
514  NIDFucH.SortByDat(false);
515  NIDHucH.SortByDat(false);
516  NIDFucH.GetKeyV(CmtyVOut);
517  NIDHucH.GetKeyV(CmtyVIn);
518 }
519 
520 void TCoda::GetTopCIDs(TIntV& CIdV, const int TopK, const int IsAverage, const int MinSz) {
521  TIntFltH CIdFHH;
522  for (int c = 0; c < GetNumComs(); c++) {
523  if (IsAverage == 1) {
524  TIntV CmtyVIn, CmtyVOut;
525  GetCommunity(CmtyVIn, CmtyVOut, c);
526  if (CmtyVIn.Len() == 0 || CmtyVOut.Len() == 0) { continue; }
527  if (CmtyVIn.Len() < MinSz || CmtyVOut.Len() < MinSz) { continue; }
528  CIdFHH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c) / (double) CmtyVIn.Len() / (double) CmtyVOut.Len());
529  } else {
530  CIdFHH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c));
531  }
532  }
533  CIdFHH.SortByDat(false);
534  CIdFHH.GetKeyV(CIdV);
535  if (TopK < CIdFHH.Len()) { CIdV.Trunc(TopK); }
536 }
537 
539 void TCoda::GetCmtyVV(const bool IsOut, TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) {
540  CmtyVV.Gen(NumComs, 0);
541  TIntFltH CIDSumFH(NumComs);
542  for (int c = 0; c < NumComs; c++) {
543  CIDSumFH.AddDat(c, GetSumVal(IsOut, c));
544  }
545  CIDSumFH.SortByDat(false);
546  for (int c = 0; c < NumComs; c++) {
547  int CID = CIDSumFH.GetKey(c);
548  TIntFltH NIDFucH, NIDHucH, NIDInOutH;
549  TIntV CmtyV;
550  GetNIDValH(NIDInOutH, NIDFucH, NIDHucH, CID, Thres);
551  if (IsOut) {
552  NIDFucH.GetKeyV(CmtyV);
553  } else {
554  NIDHucH.GetKeyV(CmtyV);
555  }
556  if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
557  }
558  if ( NumComs != CmtyVV.Len()) {
559  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
560  }
561 }
562 
563 void TCoda::GetCmtyVVUnSorted(const bool IsOut, TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) {
564  CmtyVV.Gen(NumComs, 0);
565  for (int c = 0; c < NumComs; c++) {
566  TIntV CmtyV((int) (GetSumVal(IsOut, c) * 10), 0);
567  for (int u = 0; u < G->GetNodes(); u++) {
568  if (GetCom(IsOut, u, c) > Thres) { CmtyV.Add(NIDV[u]); }
569  }
570  if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
571  }
572  if ( NumComs != CmtyVV.Len()) {
573  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
574  }
575 }
576 
578  PNGraph NewG = TNGraph::New(G->GetNodes(), -1);
579  for (TNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
580  //add node
581  int NIdx = NI.GetId();
582  int NID = NIDV[NIdx];
583  if (! NewG->IsNode(NID)) { NewG->AddNode(NID); }
584  //add edge
585  for (int e = 0; e < NI.GetOutDeg(); e++) {
586  int OutNID = NIDV[NI.GetOutNId(e)];
587  if (! NewG->IsNode(OutNID)) { NewG->AddNode(OutNID); }
588  NewG->AddEdge(NID, OutNID);
589  }
590  }
591  IAssert(G->GetNodes() == NewG->GetNodes());
592  IAssert(G->GetEdges() == NewG->GetEdges());
593  return NewG;
594 }
595 
596 void TCoda::GetNIDValH(TIntFltH& NIdValInOutH, TIntFltH& NIdValOutH, TIntFltH& NIdValInH, const int CID, const double Thres) {
597  NIdValOutH.Gen((int) GetSumVal(true, CID) + 1);
598  NIdValInH.Gen((int) GetSumVal(false, CID) + 1);
599  NIdValInOutH.Gen((int) GetSumVal(false, CID) + 1);
600  if (GetSumVal(true, CID) < Thres && GetSumVal(false, CID) < Thres) { return; }
601  for (int u = 0; u < NIDV.Len(); u++) {
602  if (GetCom(true, u, CID) >= Thres && GetCom(false, u, CID) >= Thres) {
603  NIdValInOutH.AddDat(NIDV[u], GetCom(true, u, CID) + GetCom(false, u, CID));
604  }
605  if (GetCom(true, u, CID) >= Thres) {
606  NIdValOutH.AddDat(NIDV[u], GetCom(true, u, CID));
607  }
608  if (GetCom(false, u, CID) >= Thres) {
609  NIdValInH.AddDat(NIDV[u], GetCom(false, u, CID));
610  }
611  }
612  NIdValInH.SortByDat(false);
613  NIdValOutH.SortByDat(false);
614  NIdValInOutH.SortByDat(false);
615 }
616 
617 void TCoda::DumpMemberships(const TStr& OutFNm, const TStrHash<TInt>& NodeNameH, const double Thres) {
618  if (NodeNameH.Len() > 0) { IAssert(NodeNameH.Len() == G->GetNodes()); }
619  FILE* FId = fopen(OutFNm.CStr(), "wt");
620  TIntFltH CIDSumFH(NumComs);
621  for (int c = 0; c < NumComs; c++) {
622  CIDSumFH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c));
623  }
624  CIDSumFH.SortByDat(false);
625  for (int c = 0; c < NumComs; c++) {
626  int CID = CIDSumFH.GetKey(c);
627  TIntFltH NIDOutFH, NIDInFH, NIDInOutFH;
628  GetNIDValH(NIDInOutFH, NIDOutFH, NIDInFH, CID, Thres);
629  if (NIDOutFH.Len() == 0 || NIDInFH.Len() == 0) { continue; }
630 
631  /*
632  if (GetSumVal(true, CID) < Thres && GetSumVal(false, CID) < Thres) { continue; }
633  for (int u = 0; u < NIDV.Len(); u++) {
634  if (GetCom(true, u, CID) >= Thres && GetCom(false, u, CID) >= Thres) {
635  NIDInOutFH.AddDat(u, GetCom(true, u, CID) + GetCom(false, u, CID));
636  } else if (GetCom(true, u, CID) >= Thres) {
637  NIDOutFH.AddDat(u, GetCom(true, u, CID));
638  } else if (GetCom(false, u, CID) >= Thres) {
639  NIDInFH.AddDat(u, GetCom(false, u, CID));
640  }
641  }
642  NIDInOutFH.SortByDat(false);
643  NIDInFH.SortByDat(false);
644  NIDOutFH.SortByDat(false);
645  */
646  fprintf(FId, "%d\t%d\t%d\t%f\t%f\t%f\t", NIDInOutFH.Len(), NIDInFH.Len() - NIDInOutFH.Len(), NIDOutFH.Len() - NIDInOutFH.Len(), CIDSumFH.GetDat(CID).Val, GetSumVal(false, CID).Val, GetSumVal(true, CID).Val);
647  fprintf(FId, "InOut:\t");
648  for (int u = 0; u < NIDInOutFH.Len(); u++) {
649  int NIdx = NIDInOutFH.GetKey(u);
650  fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDInOutFH[u].Val);
651  }
652  fprintf(FId, "In:\t");
653  for (int u = 0; u < NIDInFH.Len(); u++) {
654  int NIdx = NIDInFH.GetKey(u);
655  fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDInFH[u].Val);
656  }
657  fprintf(FId, "Out:\t");
658  for (int u = 0; u < NIDOutFH.Len(); u++) {
659  int NIdx = NIDOutFH.GetKey(u);
660  fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDOutFH[u].Val);
661  }
662  fprintf(FId, "\n");
663  }
664  fclose(FId);
665 }
666 
667 
668 void TCoda::DumpMemberships(const TStr& OutFNm, const double Thres) {
669  TStrHash<TInt> NodeNameH(G->GetNodes(), false);
670  for (int u = 0; u < NIDV.Len(); u++) { NodeNameH.AddKey(TStr::Fmt("%d", NIDV[u].Val)); }
671  DumpMemberships(OutFNm, NodeNameH, Thres);
672 }
673 
674 void TCoda::GetCmtyS(TIntSet& CmtySOut, TIntSet& CmtySIn, const int CID, const double Thres) {
675  CmtySOut.Gen(G->GetNodes() / 10);
676  CmtySIn.Gen(G->GetNodes() / 10);
677  for (int u = 0; u < NIDV.Len(); u++) {
678  if (GetCom(true, u, CID) > Thres) {
679  //CmtySOut.AddKey(
680  }
681  }
682 }
683 
684 /*
685 void TCoda::GetCmtyVVIn(TVec<TIntV>& CmtyVV) {
686  GetCmtyVVIn(CmtyVV, sqrt(1.0 / G->GetNodes()), 3);
687 }
688 
690 void TCoda::GetCmtyVVIn(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) {
691  CmtyVV.Gen(NumComs, 0);
692  TIntFltH CIDSumHH(NumComs);
693  for (int c = 0; c < SumHV.Len(); c++) {
694  CIDSumHH.AddDat(c, SumHV[c]);
695  }
696  CIDSumHH.SortByDat(false);
697  for (int c = 0; c < NumComs; c++) {
698  int CID = CIDSumHH.GetKey(c);
699  TIntFltH NIDHucH(H.Len() / 10);
700  TIntV CmtyV;
701  IAssert(SumHV[CID] == CIDSumHH.GetDat(CID));
702  if (SumHV[CID] < Thres) { continue; }
703  for (int u = 0; u < H.Len(); u++) {
704  int NID = u;
705  if (! NodesOk) { NID = NIDV[u]; }
706  if (GetComIn(u, CID) >= Thres) { NIDHucH.AddDat(NID, GetComIn(u, CID)); }
707  }
708  NIDHucH.SortByDat(false);
709  NIDHucH.GetKeyV(CmtyV);
710  if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
711  }
712  if ( NumComs != CmtyVV.Len()) {
713  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
714  }
715 }
716 */
717 void TCoda::GetCmtyVV(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn, const int MinSz) {
718  GetCmtyVV(false, CmtyVVIn, sqrt(1.0 / G->GetNodes()), MinSz);
719  GetCmtyVV(true, CmtyVVOut, sqrt(1.0 / G->GetNodes()), MinSz);
720 }
721 
722 void TCoda::GetCmtyVVUnSorted(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn) {
723  GetCmtyVVUnSorted(false, CmtyVVIn, sqrt(1.0 / G->GetNodes()));
724  GetCmtyVVUnSorted(true, CmtyVVOut, sqrt(1.0 / G->GetNodes()));
725 }
726 
727 void TCoda::GetCmtyVV(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn, const double ThresOut, const double ThresIn, const int MinSz) {
728  GetCmtyVV(false, CmtyVVIn, ThresIn, MinSz);
729  GetCmtyVV(true, CmtyVVOut, ThresOut, MinSz);
730 }
731 
733 int TCoda::FindComsByCV(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const int EdgesForCV, const double StepAlpha, const double StepBeta) {
734  double ComsGap = exp(TMath::Log((double) MaxComs / (double) MinComs) / (double) DivComs);
735  TIntV ComsV;
736  ComsV.Add(MinComs);
737  while (ComsV.Len() < DivComs) {
738  int NewComs = int(ComsV.Last() * ComsGap);
739  if (NewComs == ComsV.Last().Val) { NewComs++; }
740  ComsV.Add(NewComs);
741  }
742  if (ComsV.Last() < MaxComs) { ComsV.Add(MaxComs); }
743  return FindComsByCV(ComsV, 0.1, NumThreads, OutFNm, EdgesForCV, StepAlpha, StepBeta);
744 }
745 
746 int TCoda::FindComsByCV(TIntV& ComsV, const double HOFrac, const int NumThreads, const TStr PlotLFNm, const int EdgesForCV, const double StepAlpha, const double StepBeta) {
747  if (ComsV.Len() == 0) {
748  int MaxComs = G->GetNodes() / 5;
749  ComsV.Add(2);
750  while(ComsV.Last() < MaxComs) { ComsV.Add(ComsV.Last() * 2); }
751  }
752  int MaxIterCV = 3;
753 
754  TVec<TVec<TIntSet> > HoldOutSets(MaxIterCV);
755  TFltIntPrV NIdPhiV;
756  TAGMFastUtil::GetNIdPhiV<PNGraph>(G, NIdPhiV);
757 
758  if (G->GetEdges() > EdgesForCV) { //if edges are many enough, use CV
759  printf("generating hold out set\n");
760  TIntV NIdV1, NIdV2;
761  G->GetNIdV(NIdV1);
762  G->GetNIdV(NIdV2);
763  for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
764  // generate holdout sets
765  TAGMFastUtil::GenHoldOutPairs(G, HoldOutSets[IterCV], HOFrac, Rnd);
766  /*
767  HoldOutSets[IterCV].Gen(G->GetNodes());
768  const int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
769  int HOCnt = 0;
770  int HOEdges = (int) TMath::Round(HOFrac * G->GetEdges());
771  printf("holding out %d edges...\n", HOEdges);
772  for (int he = 0; he < (int) HOEdges; he++) {
773  HoldOutSets[IterCV][EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
774  HoldOutSets[IterCV][EdgeV[he].Val2].AddKey(EdgeV[he].Val1);
775  HOCnt++;
776  }
777  printf("%d Edges hold out\n", HOCnt);
778  while(HOCnt++ < HOTotal) {
779  int SrcNID = Rnd.GetUniDevInt(G->GetNodes());
780  int DstNID = Rnd.GetUniDevInt(G->GetNodes());
781  HoldOutSets[IterCV][SrcNID].AddKey(DstNID);
782  HoldOutSets[IterCV][DstNID].AddKey(SrcNID);
783  }
784  */
785  }
786 
787  printf("hold out set generated\n");
788  }
789 
790  TFltV HOLV(ComsV.Len());
791  TIntFltPrV ComsLV;
792  for (int c = 0; c < ComsV.Len(); c++) {
793  const int Coms = ComsV[c];
794  printf("Try number of Coms:%d\n", Coms);
795 
796  if (G->GetEdges() > EdgesForCV) { //if edges are many enough, use CV
797  for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
798  HOVIDSV = HoldOutSets[IterCV];
799  NeighborComInit(NIdPhiV, Coms);
800  printf("Initialized\n");
801 
802  if (NumThreads == 1) {
803  printf("MLE without parallelization begins\n");
804  MLEGradAscent(0.05, 10 * G->GetNodes(), "", StepAlpha, StepBeta);
805  } else {
806  printf("MLE with parallelization begins\n");
807  MLEGradAscentParallel(0.05, 100, NumThreads, "", StepAlpha, StepBeta);
808  }
809  double HOL = LikelihoodHoldOut();
810  HOL = HOL < 0? HOL: TFlt::Mn;
811  HOLV[c] += HOL;
812  }
813  }
814  else {
815  HOVIDSV.Gen(G->GetNodes());
816  MLEGradAscent(0.0001, 100 * G->GetNodes(), "");
817  double BIC = 2 * Likelihood() - (double) G->GetNodes() * Coms * 2.0 * log ( (double) G->GetNodes());
818  HOLV[c] = BIC;
819  }
820  }
821  int EstComs = 2;
822  double MaxL = TFlt::Mn;
823  printf("\n");
824  for (int c = 0; c < ComsV.Len(); c++) {
825  ComsLV.Add(TIntFltPr(ComsV[c].Val, HOLV[c].Val));
826  printf("%d(%f)\t", ComsV[c].Val, HOLV[c].Val);
827  if (MaxL < HOLV[c]) {
828  MaxL = HOLV[c];
829  EstComs = ComsV[c];
830  }
831  }
832  printf("\n");
833  RandomInit(EstComs);
834  HOVIDSV.Gen(G->GetNodes());
835  if (! PlotLFNm.Empty()) {
836  TGnuPlot::PlotValV(ComsLV, PlotLFNm, "hold-out likelihood", "communities", "likelihood");
837  }
838  return EstComs;
839 }
840 
841 double TCoda::LikelihoodHoldOut(const bool DoParallel) {
842  double L = 0.0;
843  for (int u = 0; u < HOVIDSV.Len(); u++) {
844  for (int e = 0; e < HOVIDSV[u].Len(); e++) {
845  int VID = HOVIDSV[u][e];
846  if (VID == u) { continue; }
847  double Pred = Prediction(u, VID);
848  if (G->IsEdge(u, VID)) {
849  L += log(1.0 - Pred);
850  }
851  else {
852  L += NegWgt * log(Pred);
853  }
854  }
855  }
856  return L;
857 }
858 
859 double TCoda::GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter) {
860  double StepSize = 1.0;
861  double InitLikelihood = LikelihoodForNode(IsRow, UID);
862  TIntFltH NewVarV(DeltaV.Len());
863  for(int iter = 0; iter < MaxIter; iter++) {
864  for (int i = 0; i < DeltaV.Len(); i++){
865  int CID = DeltaV.GetKey(i);
866  double NewVal;
867  NewVal = GetCom(IsRow, UID, CID) + StepSize * DeltaV.GetDat(CID);
868  if (NewVal < MinVal) { NewVal = MinVal; }
869  if (NewVal > MaxVal) { NewVal = MaxVal; }
870  NewVarV.AddDat(CID, NewVal);
871  }
872  if (LikelihoodForNode(IsRow, UID, NewVarV) < InitLikelihood + Alpha * StepSize * DotProduct(GradV, DeltaV)) {
873  StepSize *= Beta;
874  } else {
875  break;
876  }
877  if (iter == MaxIter - 1) {
878  StepSize = 0.0;
879  break;
880  }
881  }
882  return StepSize;
883 }
884 
885 int TCoda::MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
886  time_t InitTime = time(NULL);
887  TExeTm ExeTm, CheckTm;
888  int iter = 0, PrevIter = 0;
889  TIntFltPrV IterLV;
890  TNGraph::TNodeI UI;
891  double PrevL = TFlt::Mn, CurL = 0.0;
892  TIntV NIdxV(F.Len(), 0);
893  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
894  IAssert(NIdxV.Len() == F.Len());
895  TIntFltH GradV;
896  while(iter < MaxIter) {
897  NIdxV.Shuffle(Rnd);
898  for (int ui = 0; ui < F.Len(); ui++, iter++) {
899  const bool IsRow = (ui % 2 == 0);
900  int u = NIdxV[ui]; //
901  //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
902  UI = G->GetNI(u);
903  const int Deg = IsRow? UI.GetOutDeg(): UI.GetInDeg();
904  TIntSet CIDSet(5 * Deg);
905  for (int e = 0; e < Deg; e++) {
906  int VID = IsRow? UI.GetOutNId(e): UI.GetInNId(e);
907  if (HOVIDSV[u].IsKey(VID)) { continue; }
908  TIntFltH NbhCIDH = IsRow? H[VID]: F[VID];
909  for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
910  CIDSet.AddKey(CI.GetKey());
911  //printf("CI.GetKey:%d\n", CI.GetKey());
912  IAssert(CI.GetKey() <= NumComs);
913  }
914  }
915  TIntFltH& CurMem = IsRow? F[u]: H[u];
916  for (TIntFltH::TIter CI = CurMem.BegI(); CI < CurMem.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
917  if (! CIDSet.IsKey(CI.GetKey())) {
918  DelCom(IsRow, u, CI.GetKey());
919  }
920  }
921  if (CIDSet.Empty()) { continue; }
922  GradientForNode(IsRow, u, GradV, CIDSet);
923  if (Norm2(GradV) < 1e-4) { continue; }
924  double LearnRate = GetStepSizeByLineSearch(IsRow, u, GradV, GradV, StepAlpha, StepBeta);
925  if (LearnRate == 0.0) { continue; }
926  for (int ci = 0; ci < GradV.Len(); ci++) {
927  int CID = GradV.GetKey(ci);
928  double Change = LearnRate * GradV.GetDat(CID);
929  double NewFuc = GetCom(IsRow, u, CID) + Change;
930  if (NewFuc <= 0.0) {
931  DelCom(IsRow, u, CID);
932  } else {
933  AddCom(IsRow, u, CID, NewFuc);
934  }
935  }
936  if (! PlotNm.Empty() && (iter + 1) % G->GetNodes() == 0) {
937  IterLV.Add(TIntFltPr(iter, Likelihood(false)));
938  }
939  }
940  printf("\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime);
941  fflush(stdout);
942  if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) {
943  PrevIter = iter;
944  CurL = Likelihood();
945  if (PrevL > TFlt::Mn && ! PlotNm.Empty()) {
946  printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL);
947  }
948  fflush(stdout);
949  if (CurL - PrevL <= Thres * fabs(PrevL)) { break; }
950  else { PrevL = CurL; }
951  }
952 
953  }
954  printf("\n");
955  printf("MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr());
956  if (! PlotNm.Empty()) {
957  TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
958  }
959  return iter;
960 }
961 
962 int TCoda::MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
963  //parallel
964  time_t InitTime = time(NULL);
965  //uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs();
966  TExeTm ExeTm, CheckTm;
967  double PrevL = Likelihood(true);
968  TIntFltPrV IterLV;
969  int PrevIter = 0;
970  int iter = 0;
971  TIntV NIdxV(F.Len(), 0);
972  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
973  TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization
974  NIDOPTV.PutAll(0);
975  TVec<TIntFltH> NewF(ChunkNum * ChunkSize);
976  TIntV NewNIDV(ChunkNum * ChunkSize);
977  TBoolV IsRowV(ChunkNum * ChunkSize);
978  for (iter = 0; iter < MaxIter; iter++) {
979  NIdxV.Clr(false);
980  for (int i = 0; i < F.Len(); i++) {
981  //if (NIDOPTV[i] == 0) { NIdxV.Add(i); }
982  NIdxV.Add(i);
983  }
984  IAssert (NIdxV.Len() <= F.Len());
985  NIdxV.Shuffle(Rnd);
986  // compute gradient for chunk of nodes
987 #pragma omp parallel for schedule(static, 1)
988  for (int TIdx = 0; TIdx < ChunkNum; TIdx++) {
989  TIntFltH GradV;
990  for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) {
991  const bool IsRow = (ui % 2 == 0);
992  NewNIDV[ui] = -1;
993  if (ui > NIdxV.Len()) { continue; }
994  const int u = NIdxV[ui]; //
995  //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
996  TNGraph::TNodeI UI = G->GetNI(u);
997  const int Deg = IsRow? UI.GetOutDeg(): UI.GetInDeg();
998  TIntSet CIDSet(5 * Deg);
999  TIntFltH CurFU = IsRow? F[u]: H[u];
1000  for (int e = 0; e < Deg; e++) {
1001  int VID = IsRow? UI.GetOutNId(e): UI.GetInNId(e);
1002  if (HOVIDSV[u].IsKey(VID)) { continue; }
1003  TIntFltH& NbhCIDH = IsRow? H[VID]: F[VID];
1004  for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
1005  CIDSet.AddKey(CI.GetKey());
1006  }
1007  }
1008  if (CIDSet.Empty()) {
1009  CurFU.Clr();
1010  }
1011  else {
1012  for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
1013  if (! CIDSet.IsKey(CI.GetKey())) {
1014  CurFU.DelIfKey(CI.GetKey());
1015  }
1016  }
1017  GradientForNode(IsRow, u, GradV, CIDSet);
1018  if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; }
1019  double LearnRate = GetStepSizeByLineSearch(IsRow, u, GradV, GradV, StepAlpha, StepBeta);
1020  if (LearnRate == 0.0) { NewNIDV[ui] = -2; continue; }
1021  for (int ci = 0; ci < GradV.Len(); ci++) {
1022  int CID = GradV.GetKey(ci);
1023  double Change = LearnRate * GradV.GetDat(CID);
1024  double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change;
1025  if (NewFuc <= 0.0) {
1026  CurFU.DelIfKey(CID);
1027  } else {
1028  CurFU.AddDat(CID) = NewFuc;
1029  }
1030  }
1031  CurFU.Defrag();
1032  }
1033  //store changes
1034  NewF[ui] = CurFU;
1035  NewNIDV[ui] = u;
1036  IsRowV[ui] = IsRow;
1037  }
1038  }
1039  int NumNoChangeGrad = 0;
1040  int NumNoChangeStepSize = 0;
1041  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1042  int NewNID = NewNIDV[ui];
1043  if (NewNID == -1) { NumNoChangeGrad++; continue; }
1044  if (NewNID == -2) { NumNoChangeStepSize++; continue; }
1045  if (IsRowV[ui]) {
1046  for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
1047  SumFV[CI.GetKey()] -= CI.GetDat();
1048  }
1049  } else {
1050  for (TIntFltH::TIter CI = H[NewNID].BegI(); CI < H[NewNID].EndI(); CI++) {
1051  SumHV[CI.GetKey()] -= CI.GetDat();
1052  }
1053  }
1054  }
1055 #pragma omp parallel for
1056  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1057  int NewNID = NewNIDV[ui];
1058  if (NewNID < 0) { continue; }
1059  if (IsRowV[ui]) {
1060  F[NewNID] = NewF[ui];
1061  } else {
1062  H[NewNID] = NewF[ui];
1063  }
1064  }
1065  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1066  int NewNID = NewNIDV[ui];
1067  if (NewNID < 0) { continue; }
1068  if (IsRowV[ui]) {
1069  for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
1070  SumFV[CI.GetKey()] += CI.GetDat();
1071  }
1072  } else {
1073  for (TIntFltH::TIter CI = H[NewNID].BegI(); CI < H[NewNID].EndI(); CI++) {
1074  SumHV[CI.GetKey()] += CI.GetDat();
1075  }
1076  }
1077  }
1078  // update the nodes who are optimal
1079  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
1080  int NewNID = NewNIDV[ui];
1081  if (NewNID < 0) { continue; }
1082  TNGraph::TNodeI UI = G->GetNI(NewNID);
1083  NIDOPTV[NewNID] = 0;
1084  for (int e = 0; e < UI.GetDeg(); e++) {
1085  NIDOPTV[UI.GetNbrNId(e)] = 0;
1086  }
1087  }
1088  int OPTCnt = 0;
1089  for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } }
1090  /*
1091  if (! PlotNm.Empty()) {
1092  printf("\r%d iterations [%s] %lu secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), TSecTm::GetCurTm().GetAbsSecs() - StartTm);
1093  if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); }
1094  fflush(stdout);
1095  }
1096  */
1097  if ((iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) {
1098  PrevIter = iter;
1099  double CurL = Likelihood(true);
1100  IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL));
1101  printf("\r%d iterations, Likelihood: %f, Diff: %f [%lu secs]", iter, CurL, CurL - PrevL, time(NULL) - InitTime);
1102  fflush(stdout);
1103  if (CurL - PrevL <= Thres * fabs(PrevL)) {
1104  break;
1105  }
1106  else {
1107  PrevL = CurL;
1108  }
1109  }
1110  }
1111  if (! PlotNm.Empty()) {
1112  printf("\nMLE completed with %d iterations(%lu secs)\n", iter, time(NULL) - InitTime);
1113  TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
1114  } else {
1115  printf("\rMLE completed with %d iterations(%lu secs)\n", iter, time(NULL) - InitTime);
1116  fflush(stdout);
1117  }
1118  return iter;
1119 }
#define IAssert(Cond)
Definition: bd.h:262
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:376
PNGraph GetGraphRawNID()
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:479
double LikelihoodForNode(const bool IsRow, const int UID)
void AddComIn(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:128
TFlt NegWgt
Definition: agmdirected.h:23
Definition: tm.h:355
int Val
Definition: dt.h:1046
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:425
TFlt MinVal
Definition: agmdirected.h:21
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:483
void SetGraph(const PNGraph &GraphPt)
void Save(TSOut &SOut) const
Definition: dt.h:1060
void GetNIDValH(TIntFltH &NIdValInOutH, TIntFltH &NIdValOutH, TIntFltH &NIdValInH, const int CID, const double Thres)
double Likelihood(const bool DoParallel=false)
void NeighborComInit(const int InitComs)
Definition: agmdirected.cpp:83
TVec< TIntFltH > H
Definition: agmdirected.h:11
TIter BegI() const
Definition: hash.h:171
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:313
void Load(TSIn &SIn)
Definition: dt.h:901
double Val
Definition: dt.h:1295
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int Len() const
Definition: hash.h:770
void Gen(const int &ExpectVals)
Definition: shash.h:1115
void SetCmtyVV(const TVec< TIntV > &CmtyVVOut, const TVec< TIntV > &CmtyVVIn)
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
TFlt & GetSumVal(const bool IsOut, const int CID)
Definition: agmdirected.h:86
void Save(TSOut &SOut) const
Definition: dt.h:902
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:236
void GetCmtyS(TIntSet &CmtySOut, TIntSet &CmtySIn, const int CID, const double Thres)
int FindComsByCV(TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const int EdgesForCV=100, const double StepAlpha=0.3, const double StepBeta=0.1)
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
TIter EndI() const
Definition: hash.h:176
void GetTopCIDs(TIntV &CIdV, const int TopK, const int IsAverage=1, const int MinSz=1)
void Load(TSIn &SIn)
Definition: ds.h:895
TBool DoParallel
Definition: agmdirected.h:25
TFlt RegCoef
Definition: agmdirected.h:14
static const double Mx
Definition: dt.h:1298
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
const TKey & GetKey(const int &KeyId) const
Definition: shash.h:1141
void Defrag()
Definition: hash.h:513
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmdirected.h:154
Definition: fl.h:58
void Save(TSOut &SOut) const
Definition: ds.h:903
void GetCmtyVVUnSorted(const bool IsOut, TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
TPair< TInt, TFlt > TIntFltPr
Definition: ds.h:87
const char * GetTmStr() const
Definition: tm.h:370
bool Empty() const
Definition: shash.h:1120
TIntV NIDV
Definition: agmdirected.h:13
double GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
void AddComOut(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:121
void GetCmtyVV(TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn, const int MinSz=3)
int GetNumComs()
Definition: agmdirected.h:36
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
double LikelihoodHoldOut(const bool DoParallel=false)
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1254
void Gen(const int &ExpectVals)
Definition: hash.h:180
TFltV SumFV
Definition: agmdirected.h:15
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node IDs SrcNId to node DstNId to the graph.
Definition: graph.cpp:321
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1166
static PNGraph Load(TSIn &SIn)
Static constructor that loads the graph from a stream SIn and returns a pointer to it...
Definition: graph.h:431
void Save(TSOut &SOut)
Definition: agmdirected.cpp:8
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the graph.
Definition: graph.cpp:363
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:807
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
const char * GetKey(const int &KeyId) const
Definition: hash.h:821
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
void Load(TSIn &SIn)
Definition: dt.h:1059
TVec< TIntFltH > F
Definition: agmdirected.h:10
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:477
int AddKey(const TKey &Key)
Definition: shash.h:1254
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
void Load(TSIn &SIn, const int &RndSeed=0)
Definition: agmdirected.cpp:25
double Prediction(const TIntFltH &FU, const TIntFltH &HV)
Definition: agmdirected.h:174
TFlt PNoCom
Definition: agmdirected.h:24
PNGraph G
Definition: agmdirected.h:9
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
Definition: graph.h:358
Definition: fl.h:128
TInt NumComs
Definition: agmdirected.h:18
TBool NodesOk
Definition: agmdirected.h:17
int AddKey(const char *Key)
Definition: hash.h:896
void DelCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:135
void Save(TSOut &SOut) const
Saves the graph to a (binary) stream SOut.
Definition: graph.h:423
Definition: hash.h:729
void GetNonEdgePairScores(TFltIntIntTrV &ScoreV)
void RandomInit(const int InitComs)
Definition: agmdirected.cpp:43
int Len() const
Definition: shash.h:1121
double Norm2(const TIntFltH &UV)
Definition: agmdirected.h:189
void PutSeed(const int &_Seed)
Definition: dt.cpp:18
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:481
void GetKeyV(TVec< TKey > &KeyV) const
Definition: hash.h:442
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:362
Definition: dt.h:412
bool Empty() const
Definition: dt.h:488
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TFltV SumHV
Definition: agmdirected.h:16
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:339
void Load(TSIn &SIn)
Definition: dt.h:1312
void DumpMemberships(const TStr &OutFNm, const TStrHash< TInt > &NodeNameH)
Definition: agmdirected.h:45
double GetUniDev()
Definition: dt.h:30
bool DelIfKey(const TKey &Key)
Definition: hash.h:201
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:319
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmfast.h:159
void AddCom(const bool IsOut, const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:114
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
static double Log(const double &Val)
Definition: xmath.h:14
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
void GetCommunity(TIntV &CmtyVIn, TIntV &CmtyVOut, const int CID)
Definition: agmdirected.h:50
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:360
TRnd Rnd
Definition: agmdirected.h:12
char * CStr()
Definition: dt.h:476
bool IsKey(const TKey &Key) const
Definition: hash.h:216
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:368
TFlt MaxVal
Definition: agmdirected.h:22
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
void DelSelfEdges(const PGraph &Graph)
Removes all the self-edges from the graph.
Definition: alg.h:419
void Trunc(const TSizeTy &_Vals=-1)
Truncates the vector's length and capacity to _Vals elements.
Definition: ds.h:982
double Sum(const TIntFltH &UV)
Definition: agmdirected.h:182
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93
int Len() const
Definition: hash.h:186
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
void GradientForNode(const bool IsRow, const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:372
void Save(TSOut &SOut) const
Definition: dt.h:1309
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
TTriple< TFlt, TInt, TInt > TFltIntIntTr
Definition: ds.h:181
static const double Mn
Definition: dt.h:1297
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
void SortByDat(const bool &Asc=true)
Definition: hash.h:250