SNAP Library 6.0, User Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
agmfast.h
Go to the documentation of this file.
1 #ifndef snap_agmfast_h
2 #define snap_agmfast_h
3 #include "Snap.h"
4 
7 class TAGMFast {
8 private:
9  PUNGraph G; //graph to fit
10  TVec<TIntFltH> F; // membership for each user (Size: Nodes * Coms)
11  TRnd Rnd; // random number generator
12  TIntV NIDV; // original node ID vector
13  TFlt RegCoef; //Regularization coefficient when we fit for P_c +: L1, -: L2
14  TFltV SumFV; // sum_u F_uc for each community c. Needed for efficient calculation
15  TBool NodesOk; // Node ID is from 0 ~ N-1
16  TInt NumComs; // number of communities
17 public:
18  TVec<TIntSet> HOVIDSV; //NID pairs to hold out for cross validation
19  TFlt MinVal; // minimum value of F (0)
20  TFlt MaxVal; // maximum value of F (for numerical reason)
21  TFlt NegWgt; // weight of negative example (a pair of nodes without an edge)
22  TFlt PNoCom; // base probability \varepsilon (edge probability between a pair of nodes sharing no community
23  TBool DoParallel; // whether to use parallelism for computation
24 
25  TAGMFast(const PUNGraph& GraphPt, const int& InitComs, const int RndSeed = 0): Rnd(RndSeed), RegCoef(0),
26  NodesOk(true), MinVal(0.0), MaxVal(1000.0), NegWgt(1.0) { SetGraph(GraphPt); RandomInit(InitComs); }
27  void SetGraph(const PUNGraph& GraphPt);
28  void SetRegCoef(const double _RegCoef) { RegCoef = _RegCoef; }
29  double GetRegCoef() { return RegCoef; }
30  void RandomInit(const int InitComs);
31  void NeighborComInit(const int InitComs);
32  void SetCmtyVV(const TVec<TIntV>& CmtyVV);
33  double Likelihood(const bool DoParallel = false);
34  double LikelihoodForRow(const int UID);
35  double LikelihoodForRow(const int UID, const TIntFltH& FU);
36  int MLENewton(const double& Thres, const int& MaxIter, const TStr& PlotNm = TStr());
37  void GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet);
38  double GradientForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
39  double HessianForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
40  double LikelihoodForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
41  void GetCmtyVV(TVec<TIntV>& CmtyVV);
42  void GetCmtyVV(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3);
43  int FindComsByCV(TIntV& ComsV, const double HOFrac = 0.2, const int NumThreads = 20, const TStr& PlotLFNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1);
44  int FindComsByCV(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr& OutFNm, const double StepAlpha = 0.3, const double StepBeta = 0.3);
45  double LikelihoodHoldOut(const bool DoParallel = false);
46  double GetStepSizeByLineSearch(const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter = 10);
47  int MLEGradAscent(const double& Thres, const int& MaxIter, const TStr& PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
48  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr& PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
49  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const TStr& PlotNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1) {
50  int ChunkSize = G->GetNodes() / 10 / ChunkNum;
51  if (ChunkSize == 0) { ChunkSize = 1; }
52  return MLEGradAscentParallel(Thres, MaxIter, ChunkNum, ChunkSize, PlotNm, StepAlpha, StepBeta);
53  }
54  //double FindOptimalThres(const TVec<TIntV>& TrueCmtyVV, TVec<TIntV>& CmtyVV);
55  void Save(TSOut& SOut);
56  void Load(TSIn& SIn, const int& RndSeed = 0);
57  double inline GetCom(const int& NID, const int& CID) {
58  if (F[NID].IsKey(CID)) {
59  return F[NID].GetDat(CID);
60  } else {
61  return 0.0;
62  }
63  }
64  void inline AddCom(const int& NID, const int& CID, const double& Val) {
65  if (F[NID].IsKey(CID)) {
66  SumFV[CID] -= F[NID].GetDat(CID);
67  }
68  F[NID].AddDat(CID) = Val;
69  SumFV[CID] += Val;
70  }
71 
72  void inline DelCom(const int& NID, const int& CID) {
73  if (F[NID].IsKey(CID)) {
74  SumFV[CID] -= F[NID].GetDat(CID);
75  F[NID].DelKey(CID);
76  }
77  }
78  double inline DotProduct(const TIntFltH& UV, const TIntFltH& VV) {
79  double DP = 0;
80  if (UV.Len() > VV.Len()) {
81  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
82  if (VV.IsKey(HI.GetKey())) {
83  DP += VV.GetDat(HI.GetKey()) * HI.GetDat();
84  }
85  }
86  } else {
87  for (TIntFltH::TIter HI = VV.BegI(); HI < VV.EndI(); HI++) {
88  if (UV.IsKey(HI.GetKey())) {
89  DP += UV.GetDat(HI.GetKey()) * HI.GetDat();
90  }
91  }
92  }
93  return DP;
94  }
95  double inline DotProduct(const int& UID, const int& VID) {
96  return DotProduct(F[UID], F[VID]);
97  }
98  double inline Prediction(const TIntFltH& FU, const TIntFltH& FV) {
99  double DP = log (1.0 / (1.0 - PNoCom)) + DotProduct(FU, FV);
100  IAssertR(DP > 0.0, TStr::Fmt("DP: %f", DP));
101  return exp(- DP);
102  }
103  double inline Prediction(const int& UID, const int& VID) {
104  return Prediction(F[UID], F[VID]);
105  }
106  double inline Sum(const TIntFltH& UV) {
107  double N = 0.0;
108  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
109  N += HI.GetDat();
110  }
111  return N;
112  }
113  double inline Norm2(const TIntFltH& UV) {
114  double N = 0.0;
115  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
116  N += HI.GetDat() * HI.GetDat();
117  }
118  return N;
119  }
120 };
121 
122 
125 
127 public:
128  //static double GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges);
129  //static double GetConductance(const PNGraph& Graph, const TIntSet& CmtyS, const int Edges);
130 template<class PGraph>
131 static double GetConductance(const PGraph& Graph, const TIntSet& CmtyS, const int Edges) {
132  const bool GraphType = HasGraphFlag(typename PGraph::TObj, gfDirected);
133  int Edges2;
134  if (GraphType) { Edges2 = Edges >= 0 ? Edges : Graph->GetEdges(); }
135  else { Edges2 = Edges >= 0 ? 2 * Edges : Graph->GetEdges(); }
136  int Vol = 0, Cut = 0;
137  double Phi = 0.0;
138  for (int i = 0; i < CmtyS.Len(); i++) {
139  if (! Graph->IsNode(CmtyS[i])) { continue; }
140  typename PGraph::TObj::TNodeI NI = Graph->GetNI(CmtyS[i]);
141  for (int e = 0; e < NI.GetOutDeg(); e++) {
142  if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; }
143  }
144  Vol += NI.GetOutDeg();
145  }
146  // get conductance
147  if (Vol != Edges2) {
148  if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); }
149  else if (Vol == 0) { Phi = 0.0; }
150  else { Phi = Cut / double(Vol); }
151  } else {
152  if (Vol == Edges2) { Phi = 1.0; }
153  }
154  return Phi;
155 }
156 
157 
158 template<class PGraph>
159  static void GenHoldOutPairs(const PGraph& G, TVec<TIntSet>& HoldOutSet, double HOFrac, TRnd& Rnd) {
160  TIntPrV EdgeV(G->GetEdges(), 0);
161  for (typename PGraph::TObj::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
162  EdgeV.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
163  }
164  EdgeV.Shuffle(Rnd);
165 
166  const bool GraphType = HasGraphFlag(typename PGraph::TObj, gfDirected);
167  HoldOutSet.Gen(G->GetNodes());
168  int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
169  if (GraphType) { HOTotal *= 2;}
170  int HOCnt = 0;
171  int HOEdges = (int) TMath::Round(HOFrac * G->GetEdges());
172  printf("holding out %d edges...\n", HOEdges);
173  for (int he = 0; he < (int) HOEdges; he++) {
174  HoldOutSet[EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
175  if (! GraphType) { HoldOutSet[EdgeV[he].Val2].AddKey(EdgeV[he].Val1); }
176  HOCnt++;
177  }
178  printf("%d Edges hold out\n", HOCnt);
179  while(HOCnt++ < HOTotal) {
180  int SrcNID = Rnd.GetUniDevInt(G->GetNodes());
181  int DstNID = Rnd.GetUniDevInt(G->GetNodes());
182  if (SrcNID == DstNID) { continue; }
183  HoldOutSet[SrcNID].AddKey(DstNID);
184  if (! GraphType) { HoldOutSet[DstNID].AddKey(SrcNID); }
185  }
186  }
187 
188 template<class PGraph>
189  static void GetNbhCom(const PGraph& Graph, const int NID, TIntSet& NBCmtyS) {
190  typename PGraph::TObj::TNodeI NI = Graph->GetNI(NID);
191  NBCmtyS.Gen(NI.GetDeg());
192  NBCmtyS.AddKey(NID);
193  for (int e = 0; e < NI.GetDeg(); e++) {
194  NBCmtyS.AddKey(NI.GetNbrNId(e));
195  }
196  }
197 template<class PGraph>
198  static void GetNIdPhiV(const PGraph& G, TFltIntPrV& NIdPhiV) {
199  NIdPhiV.Gen(G->GetNodes(), 0);
200  const int Edges = G->GetEdges();
201  TExeTm RunTm;
202  //compute conductance of neighborhood community
203  for (typename PGraph::TObj::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
204  TIntSet NBCmty(NI.GetDeg() + 1);
205  double Phi;
206  if (NI.GetDeg() < 5) { //do not include nodes with too few degree
207  Phi = 1.0;
208  } else {
209  TAGMFastUtil::GetNbhCom<PGraph>(G, NI.GetId(), NBCmty);
210  Phi = TAGMFastUtil::GetConductance(G, NBCmty, Edges);
211  }
212  NIdPhiV.Add(TFltIntPr(Phi, NI.GetId()));
213  }
214  printf("conductance computation completed [%s]\n", RunTm.GetTmStr());
215  fflush(stdout);
216  }
217 };
218 
219 #endif
double Norm2(const TIntFltH &UV)
Definition: agmfast.h:113
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
Definition: agmfast.cpp:125
PUNGraph G
Definition: agmfast.h:9
TFlt MinVal
Definition: agmfast.h:19
int MLENewton(const double &Thres, const int &MaxIter, const TStr &PlotNm=TStr())
Newton method: DEPRECATED.
Definition: agmfast.cpp:416
TFlt NegWgt
Definition: agmfast.h:21
double Prediction(const int &UID, const int &VID)
Definition: agmfast.h:103
TBool DoParallel
Definition: agmfast.h:23
#define IAssertR(Cond, Reason)
Definition: bd.h:265
TPair< TFlt, TInt > TFltIntPr
Definition: ds.h:97
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr &PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmfast.cpp:761
Definition: tm.h:355
double LikelihoodHoldOut(const bool DoParallel=false)
Definition: agmfast.cpp:647
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
Definition: agmfast.h:98
void NeighborComInit(const int InitComs)
Definition: agmfast.cpp:60
void Save(TSOut &SOut)
Definition: agmfast.cpp:7
Definition: dt.h:11
static double GetConductance(const PGraph &Graph, const TIntSet &CmtyS, const int Edges)
Definition: agmfast.h:131
double GradientForOneVar(const TFltV &AlphaKV, const int UID, const int CID, const double &Val)
Definition: agmfast.cpp:339
void DelCom(const int &NID, const int &CID)
Definition: agmfast.h:72
static void GetNbhCom(const PGraph &Graph, const int NID, TIntSet &NBCmtyS)
Definition: agmfast.h:189
int FindComsByCV(TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr &PlotLFNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmfast.cpp:554
TIter BegI() const
Definition: hash.h:213
void SetRegCoef(const double _RegCoef)
Definition: agmfast.h:28
void Gen(const int &ExpectVals)
Definition: shash.h:1115
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
TIter EndI() const
Definition: hash.h:218
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr &PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmfast.cpp:690
double GetRegCoef()
Definition: agmfast.h:29
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
Definition: dt.h:1386
double GetCom(const int &NID, const int &CID)
Definition: agmfast.h:57
double LikelihoodForOneVar(const TFltV &AlphaKV, const int UID, const int CID, const double &Val)
Definition: agmfast.cpp:364
Definition: fl.h:58
TBool NodesOk
Definition: agmfast.h:15
double DotProduct(const int &UID, const int &VID)
Definition: agmfast.h:95
TFlt PNoCom
Definition: agmfast.h:22
const char * GetTmStr() const
Definition: tm.h:370
#define HasGraphFlag(TGraph, Flag)
For quick testing of the properties of the graph/network object (see TGraphFlag). ...
Definition: gbase.h:41
TVec< TIntFltH > F
Definition: agmfast.h:10
static void GetNIdPhiV(const PGraph &G, TFltIntPrV &NIdPhiV)
Definition: agmfast.h:198
double GetStepSizeByLineSearch(const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
Definition: agmfast.cpp:665
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:838
static double Round(const double &Val)
Definition: xmath.h:16
TInt NumComs
Definition: agmfast.h:16
void GetCmtyVV(TVec< TIntV > &CmtyVV)
Definition: agmfast.cpp:508
int AddKey(const TKey &Key)
Definition: shash.h:1254
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const TStr &PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmfast.h:49
Definition: fl.h:128
void SetGraph(const PUNGraph &GraphPt)
Definition: agmfast.cpp:146
Utility functions for BigCLAM, Coda.
Definition: agmfast.h:126
Definition: dt.h:1137
directed graph (TNGraph, TNEGraph), else graph is undirected TUNGraph
Definition: gbase.h:13
void Load(TSIn &SIn, const int &RndSeed=0)
Definition: agmfast.cpp:22
double LikelihoodForRow(const int UID)
Definition: agmfast.cpp:193
TFltV SumFV
Definition: agmfast.h:14
int Len() const
Definition: shash.h:1121
double HessianForOneVar(const TFltV &AlphaKV, const int UID, const int CID, const double &Val)
Definition: agmfast.cpp:394
Community detection with AGM. Sparse AGM-fast with coordinate ascent.
Definition: agmfast.h:7
TVec< TIntSet > HOVIDSV
Definition: agmfast.h:18
TFlt RegCoef
Definition: agmfast.h:13
Definition: dt.h:412
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TIntV NIDV
Definition: agmfast.h:12
TRnd Rnd
Definition: agmfast.h:11
TAGMFast(const PUNGraph &GraphPt, const int &InitComs, const int RndSeed=0)
Definition: agmfast.h:25
double Likelihood(const bool DoParallel=false)
Definition: agmfast.cpp:173
double Sum(const TIntFltH &UV)
Definition: agmfast.h:106
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmfast.h:159
TFlt MaxVal
Definition: agmfast.h:20
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
void AddCom(const int &NID, const int &CID, const double &Val)
Definition: agmfast.h:64
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
void RandomInit(const int InitComs)
Definition: agmfast.cpp:38
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
Definition: dt.h:974
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmfast.h:78
int Len() const
Definition: hash.h:228
void GradientForRow(const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
Definition: agmfast.cpp:250
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430