SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
agmfast.h
Go to the documentation of this file.
1 #ifndef snap_agmfast_h
2 #define snap_agmfast_h
3 #include "Snap.h"
4 
7 class TAGMFast {
8 private:
9  PUNGraph G; //graph to fit
10  TVec<TIntFltH> F; // membership for each user (Size: Nodes * Coms)
11  TRnd Rnd; // random number generator
12  TIntV NIDV; // original node ID vector
13  TFlt RegCoef; //Regularization coefficient when we fit for P_c +: L1, -: L2
14  TFltV SumFV; // sum_u F_uc for each community c. Needed for efficient calculation
15  TBool NodesOk; // Node ID is from 0 ~ N-1
16  TInt NumComs; // number of communities
17 public:
18  TVec<TIntSet> HOVIDSV; //NID pairs to hold out for cross validation
19  TFlt MinVal; // minimum value of F (0)
20  TFlt MaxVal; // maximum value of F (for numerical reason)
21  TFlt NegWgt; // weight of negative example (a pair of nodes without an edge)
22  TFlt PNoCom; // base probability \varepsilon (edge probability between a pair of nodes sharing no community
23  TBool DoParallel; // whether to use parallelism for computation
24 
25  TAGMFast(const PUNGraph& GraphPt, const int& InitComs, const int RndSeed = 0): Rnd(RndSeed), RegCoef(0),
26  NodesOk(true), MinVal(0.0), MaxVal(1000.0), NegWgt(1.0) { SetGraph(GraphPt); RandomInit(InitComs); }
27  void SetGraph(const PUNGraph& GraphPt);
28  void SetRegCoef(const double _RegCoef) { RegCoef = _RegCoef; }
29  double GetRegCoef() { return RegCoef; }
30  void RandomInit(const int InitComs);
31  void NeighborComInit(const int InitComs);
32  void SetCmtyVV(const TVec<TIntV>& CmtyVV);
33  double Likelihood(const bool DoParallel = false);
34  double LikelihoodForRow(const int UID);
35  double LikelihoodForRow(const int UID, const TIntFltH& FU);
36  int MLENewton(const double& Thres, const int& MaxIter, const TStr& PlotNm = TStr());
37  void GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet);
38  double GradientForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
39  double HessianForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
40  double LikelihoodForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
41  void GetCmtyVV(TVec<TIntV>& CmtyVV);
42  void GetCmtyVV(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3);
43  int FindComsByCV(TIntV& ComsV, const double HOFrac = 0.2, const int NumThreads = 20, const TStr& PlotLFNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1);
44  int FindComsByCV(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr& OutFNm, const double StepAlpha = 0.3, const double StepBeta = 0.3);
45  double LikelihoodHoldOut(const bool DoParallel = false);
46  double GetStepSizeByLineSearch(const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter = 10);
47  int MLEGradAscent(const double& Thres, const int& MaxIter, const TStr& PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
48  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr& PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
49  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const TStr& PlotNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1) {
50  int ChunkSize = G->GetNodes() / 10 / ChunkNum;
51  if (ChunkSize == 0) { ChunkSize = 1; }
52  return MLEGradAscentParallel(Thres, MaxIter, ChunkNum, ChunkSize, PlotNm, StepAlpha, StepBeta);
53  }
54  //double FindOptimalThres(const TVec<TIntV>& TrueCmtyVV, TVec<TIntV>& CmtyVV);
55  void Save(TSOut& SOut);
56  void Load(TSIn& SIn, const int& RndSeed = 0);
57  double inline GetCom(const int& NID, const int& CID) {
58  if (F[NID].IsKey(CID)) {
59  return F[NID].GetDat(CID);
60  } else {
61  return 0.0;
62  }
63  }
64  void inline AddCom(const int& NID, const int& CID, const double& Val) {
65  if (F[NID].IsKey(CID)) {
66  SumFV[CID] -= F[NID].GetDat(CID);
67  }
68  F[NID].AddDat(CID) = Val;
69  SumFV[CID] += Val;
70  }
71 
72  void inline DelCom(const int& NID, const int& CID) {
73  if (F[NID].IsKey(CID)) {
74  SumFV[CID] -= F[NID].GetDat(CID);
75  F[NID].DelKey(CID);
76  }
77  }
78  double inline DotProduct(const TIntFltH& UV, const TIntFltH& VV) {
79  double DP = 0;
80  if (UV.Len() > VV.Len()) {
81  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
82  if (VV.IsKey(HI.GetKey())) {
83  DP += VV.GetDat(HI.GetKey()) * HI.GetDat();
84  }
85  }
86  } else {
87  for (TIntFltH::TIter HI = VV.BegI(); HI < VV.EndI(); HI++) {
88  if (UV.IsKey(HI.GetKey())) {
89  DP += UV.GetDat(HI.GetKey()) * HI.GetDat();
90  }
91  }
92  }
93  return DP;
94  }
95  double inline DotProduct(const int& UID, const int& VID) {
96  return DotProduct(F[UID], F[VID]);
97  }
98  double inline Prediction(const TIntFltH& FU, const TIntFltH& FV) {
99  double DP = log (1.0 / (1.0 - PNoCom)) + DotProduct(FU, FV);
100  IAssertR(DP > 0.0, TStr::Fmt("DP: %f", DP));
101  return exp(- DP);
102  }
103  double inline Prediction(const int& UID, const int& VID) {
104  return Prediction(F[UID], F[VID]);
105  }
106  double inline Sum(const TIntFltH& UV) {
107  double N = 0.0;
108  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
109  N += HI.GetDat();
110  }
111  return N;
112  }
113  double inline Norm2(const TIntFltH& UV) {
114  double N = 0.0;
115  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
116  N += HI.GetDat() * HI.GetDat();
117  }
118  return N;
119  }
120 };
121 
122 
125 
127 public:
128  //static double GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges);
129  //static double GetConductance(const PNGraph& Graph, const TIntSet& CmtyS, const int Edges);
130 template<class PGraph>
131 static double GetConductance(const PGraph& Graph, const TIntSet& CmtyS, const int Edges) {
132  const bool GraphType = HasGraphFlag(typename PGraph::TObj, gfDirected);
133  int Edges2;
134  if (GraphType) { Edges2 = Edges >= 0 ? Edges : Graph->GetEdges(); }
135  else { Edges2 = Edges >= 0 ? 2 * Edges : Graph->GetEdges(); }
136  int Vol = 0, Cut = 0;
137  double Phi = 0.0;
138  for (int i = 0; i < CmtyS.Len(); i++) {
139  if (! Graph->IsNode(CmtyS[i])) { continue; }
140  typename PGraph::TObj::TNodeI NI = Graph->GetNI(CmtyS[i]);
141  for (int e = 0; e < NI.GetOutDeg(); e++) {
142  if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; }
143  }
144  Vol += NI.GetOutDeg();
145  }
146  // get conductance
147  if (Vol != Edges2) {
148  if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); }
149  else if (Vol == 0) { Phi = 0.0; }
150  else { Phi = Cut / double(Vol); }
151  } else {
152  if (Vol == Edges2) { Phi = 1.0; }
153  }
154  return Phi;
155 }
156 
157 
158 template<class PGraph>
159  static void GenHoldOutPairs(const PGraph& G, TVec<TIntSet>& HoldOutSet, double HOFrac, TRnd& Rnd) {
160  TIntPrV EdgeV(G->GetEdges(), 0);
161  for (typename PGraph::TObj::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
162  EdgeV.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
163  }
164  EdgeV.Shuffle(Rnd);
165 
166  const bool GraphType = HasGraphFlag(typename PGraph::TObj, gfDirected);
167  HoldOutSet.Gen(G->GetNodes());
168  int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
169  if (GraphType) { HOTotal *= 2;}
170  int HOCnt = 0;
171  int HOEdges = (int) TMath::Round(HOFrac * G->GetEdges());
172  printf("holding out %d edges...\n", HOEdges);
173  for (int he = 0; he < (int) HOEdges; he++) {
174  HoldOutSet[EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
175  if (! GraphType) { HoldOutSet[EdgeV[he].Val2].AddKey(EdgeV[he].Val1); }
176  HOCnt++;
177  }
178  printf("%d Edges hold out\n", HOCnt);
179  while(HOCnt++ < HOTotal) {
180  int SrcNID = Rnd.GetUniDevInt(G->GetNodes());
181  int DstNID = Rnd.GetUniDevInt(G->GetNodes());
182  if (SrcNID == DstNID) { continue; }
183  HoldOutSet[SrcNID].AddKey(DstNID);
184  if (! GraphType) { HoldOutSet[DstNID].AddKey(SrcNID); }
185  }
186  }
187 
188 template<class PGraph>
189  static void GetNbhCom(const PGraph& Graph, const int NID, TIntSet& NBCmtyS) {
190  typename PGraph::TObj::TNodeI NI = Graph->GetNI(NID);
191  NBCmtyS.Gen(NI.GetDeg());
192  NBCmtyS.AddKey(NID);
193  for (int e = 0; e < NI.GetDeg(); e++) {
194  NBCmtyS.AddKey(NI.GetNbrNId(e));
195  }
196  }
197 template<class PGraph>
198  static void GetNIdPhiV(const PGraph& G, TFltIntPrV& NIdPhiV) {
199  NIdPhiV.Gen(G->GetNodes(), 0);
200  const int Edges = G->GetEdges();
201  TExeTm RunTm;
202  //compute conductance of neighborhood community
203  for (typename PGraph::TObj::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
204  TIntSet NBCmty(NI.GetDeg() + 1);
205  double Phi;
206  if (NI.GetDeg() < 5) { //do not include nodes with too few degree
207  Phi = 1.0;
208  } else {
209  TAGMFastUtil::GetNbhCom<PGraph>(G, NI.GetId(), NBCmty);
210  Phi = TAGMFastUtil::GetConductance(G, NBCmty, Edges);
211  }
212  NIdPhiV.Add(TFltIntPr(Phi, NI.GetId()));
213  }
214  printf("conductance computation completed [%s]\n", RunTm.GetTmStr());
215  fflush(stdout);
216  }
217 };
218 
219 #endif
double Norm2(const TIntFltH &UV)
Definition: agmfast.h:113
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
Definition: agmfast.cpp:125
PUNGraph G
Definition: agmfast.h:9
TFlt MinVal
Definition: agmfast.h:19
int MLENewton(const double &Thres, const int &MaxIter, const TStr &PlotNm=TStr())
Newton method: DEPRECATED.
Definition: agmfast.cpp:416
TFlt NegWgt
Definition: agmfast.h:21
double Prediction(const int &UID, const int &VID)
Definition: agmfast.h:103
TBool DoParallel
Definition: agmfast.h:23
#define IAssertR(Cond, Reason)
Definition: bd.h:265
TPair< TFlt, TInt > TFltIntPr
Definition: ds.h:97
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr &PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmfast.cpp:761
Definition: tm.h:355
double LikelihoodHoldOut(const bool DoParallel=false)
Definition: agmfast.cpp:647
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
Definition: agmfast.h:98
void NeighborComInit(const int InitComs)
Definition: agmfast.cpp:60
void Save(TSOut &SOut)
Definition: agmfast.cpp:7
Definition: dt.h:11
static double GetConductance(const PGraph &Graph, const TIntSet &CmtyS, const int Edges)
Definition: agmfast.h:131
double GradientForOneVar(const TFltV &AlphaKV, const int UID, const int CID, const double &Val)
Definition: agmfast.cpp:339
void DelCom(const int &NID, const int &CID)
Definition: agmfast.h:72
static void GetNbhCom(const PGraph &Graph, const int NID, TIntSet &NBCmtyS)
Definition: agmfast.h:189
int FindComsByCV(TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr &PlotLFNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmfast.cpp:554
TIter BegI() const
Definition: hash.h:213
void SetRegCoef(const double _RegCoef)
Definition: agmfast.h:28
void Gen(const int &ExpectVals)
Definition: shash.h:1115
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
TIter EndI() const
Definition: hash.h:218
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr &PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmfast.cpp:690
double GetRegCoef()
Definition: agmfast.h:29
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
Definition: dt.h:1383
double GetCom(const int &NID, const int &CID)
Definition: agmfast.h:57
double LikelihoodForOneVar(const TFltV &AlphaKV, const int UID, const int CID, const double &Val)
Definition: agmfast.cpp:364
Definition: fl.h:58
TBool NodesOk
Definition: agmfast.h:15
double DotProduct(const int &UID, const int &VID)
Definition: agmfast.h:95
TFlt PNoCom
Definition: agmfast.h:22
const char * GetTmStr() const
Definition: tm.h:370
#define HasGraphFlag(TGraph, Flag)
For quick testing of the properties of the graph/network object (see TGraphFlag). ...
Definition: gbase.h:41
TVec< TIntFltH > F
Definition: agmfast.h:10
static void GetNIdPhiV(const PGraph &G, TFltIntPrV &NIdPhiV)
Definition: agmfast.h:198
double GetStepSizeByLineSearch(const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
Definition: agmfast.cpp:665
static double Round(const double &Val)
Definition: xmath.h:16
TInt NumComs
Definition: agmfast.h:16
void GetCmtyVV(TVec< TIntV > &CmtyVV)
Definition: agmfast.cpp:508
int AddKey(const TKey &Key)
Definition: shash.h:1254
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const TStr &PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmfast.h:49
Definition: fl.h:128
void SetGraph(const PUNGraph &GraphPt)
Definition: agmfast.cpp:146
Utility functions for BigCLAM, Coda.
Definition: agmfast.h:126
Definition: dt.h:1134
directed graph (TNGraph, TNEGraph), else graph is undirected TUNGraph
Definition: gbase.h:13
void Load(TSIn &SIn, const int &RndSeed=0)
Definition: agmfast.cpp:22
double LikelihoodForRow(const int UID)
Definition: agmfast.cpp:193
TFltV SumFV
Definition: agmfast.h:14
int Len() const
Definition: shash.h:1121
double HessianForOneVar(const TFltV &AlphaKV, const int UID, const int CID, const double &Val)
Definition: agmfast.cpp:394
Community detection with AGM. Sparse AGM-fast with coordinate ascent.
Definition: agmfast.h:7
TVec< TIntSet > HOVIDSV
Definition: agmfast.h:18
TFlt RegCoef
Definition: agmfast.h:13
Definition: dt.h:412
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TIntV NIDV
Definition: agmfast.h:12
TRnd Rnd
Definition: agmfast.h:11
TAGMFast(const PUNGraph &GraphPt, const int &InitComs, const int RndSeed=0)
Definition: agmfast.h:25
double Likelihood(const bool DoParallel=false)
Definition: agmfast.cpp:173
double Sum(const TIntFltH &UV)
Definition: agmfast.h:106
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmfast.h:159
TFlt MaxVal
Definition: agmfast.h:20
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
void AddCom(const int &NID, const int &CID, const double &Val)
Definition: agmfast.h:64
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
void RandomInit(const int InitComs)
Definition: agmfast.cpp:38
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
Definition: dt.h:971
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmfast.h:78
int Len() const
Definition: hash.h:228
void GradientForRow(const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
Definition: agmfast.cpp:250
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430