SNAP Library 2.3, User Reference  2014-06-16 11:58:46
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
cmty.cpp
Go to the documentation of this file.
1 // Community detection algorithms
3 namespace TSnap {
4 
5 
6 namespace TSnapDetail {
7 // GIRVAN-NEWMAN algorithm
8 // 1. The betweenness of all existing edges in the network is calculated first.
9 // 2. The edge with the highest betweenness is removed.
10 // 3. The betweenness of all edges affected by the removal is recalculated.
11 // 4. Steps 2 and 3 are repeated until no edges remain.
12 // Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002)
13 // Keep removing edges from Graph until one of the connected components of Graph splits into two.
14 void CmtyGirvanNewmanStep(PUNGraph& Graph, TIntV& Cmty1, TIntV& Cmty2) {
15  TIntPrFltH BtwEH;
16  TBreathFS<PUNGraph> BFS(Graph);
17  Cmty1.Clr(false); Cmty2.Clr(false);
18  while (true) {
19  TSnap::GetBetweennessCentr(Graph, BtwEH);
20  BtwEH.SortByDat(false);
21  if (BtwEH.Empty()) { return; }
22  const int NId1 = BtwEH.GetKey(0).Val1;
23  const int NId2 = BtwEH.GetKey(0).Val2;
24  Graph->DelEdge(NId1, NId2);
25  BFS.DoBfs(NId1, true, false, NId2, TInt::Mx);
26  if (BFS.GetHops(NId1, NId2) == -1) { // two components
27  TSnap::GetNodeWcc(Graph, NId1, Cmty1);
28  TSnap::GetNodeWcc(Graph, NId2, Cmty2);
29  return;
30  }
31  }
32 }
33 
34 // Connected components of a graph define clusters
35 // OutDegH and OrigEdges stores node degrees and number of edges in the original graph
36 double _GirvanNewmanGetModularity(const PUNGraph& G, const TIntH& OutDegH, const int& OrigEdges, TCnComV& CnComV) {
37  TSnap::GetWccs(G, CnComV); // get communities
38  double Mod = 0;
39  for (int c = 0; c < CnComV.Len(); c++) {
40  const TIntV& NIdV = CnComV[c]();
41  double EIn=0, EEIn=0;
42  for (int i = 0; i < NIdV.Len(); i++) {
43  TUNGraph::TNodeI NI = G->GetNI(NIdV[i]);
44  EIn += NI.GetOutDeg();
45  EEIn += OutDegH.GetDat(NIdV[i]);
46  }
47  Mod += (EIn-EEIn*EEIn/(2.0*OrigEdges));
48  }
49  if (Mod == 0) { return 0; }
50  else { return Mod/(2.0*OrigEdges); }
51 }
52 
53 TIntFltH MapEquationNew2Modules(PUNGraph& Graph, TIntH& Module, TIntFltH& Qi, int a, int b){
54  TIntFltH Qi1;
55  Qi1 = Qi;
56  float InModule=0.0, OutModule=0.0, Val;
57  int Mds[2] = {a,b};
58  for (int i=0; i<2; i++) {
59  InModule=0.0, OutModule=0.0;
60  if (Qi1.IsKey(Mds[i])){
61  int CentralModule = Mds[i];
62  for (TUNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
63  if (Module.GetDat(EI.GetSrcNId()) == Module.GetDat(EI.GetDstNId()) && Module.GetDat(EI.GetDstNId()) == CentralModule) {
64  InModule += 1.0;
65  } else if ((Module.GetDat(EI.GetSrcNId()) == CentralModule && Module.GetDat(EI.GetDstNId()) != CentralModule) || (Module.GetDat(EI.GetSrcNId()) != CentralModule && Module.GetDat(EI.GetDstNId()) == CentralModule)) {
66  OutModule +=1.0;
67  }
68  }
69  Val = 0.0;
70  if (InModule+OutModule > 0) {
71  Val = OutModule/(InModule+OutModule);
72  }
73  Qi1.DelKey(Mds[i]);
74  Qi1.AddDat(Mds[i],Val);
75  } else {
76  Qi1.DelKey(Mds[i]);
77  Qi1.AddDat(Mds[i],0.0);
78  }
79  }
80 
81  return Qi1;
82 }
83 
84 float Equation(PUNGraph& Graph, TIntFltH& PAlpha,float& SumPAlphaLogPAlpha, TIntFltH& Qi){
85  float SumPAlpha = 1.0, SumQi = 0.0, SumQiLogQi=0.0, SumQiSumPAlphaLogQiSumPAlpha = 0.0;
86  for (int i=0; i<Qi.Len(); i++) {
87  SumQi += Qi[i];
88  SumQiLogQi += Qi[i]*log(Qi[i]);
89  SumQiSumPAlphaLogQiSumPAlpha += (Qi[i]+SumPAlpha)*log(Qi[i]+SumPAlpha);
90  }
91  return (SumQi*log(SumQi)-2*SumQiLogQi-SumPAlphaLogPAlpha+SumQiSumPAlphaLogQiSumPAlpha);
92 }
93 
94 } // namespace TSnapDetail
95 
96 // Maximum modularity clustering by Girvan-Newman algorithm (slow)
97 // Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002)
98 double CommunityGirvanNewman(PUNGraph& Graph, TCnComV& CmtyV) {
99  TIntH OutDegH;
100  const int NEdges = Graph->GetEdges();
101  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
102  OutDegH.AddDat(NI.GetId(), NI.GetOutDeg());
103  }
104  double BestQ = -1; // modularity
105  TCnComV CurCmtyV;
106  CmtyV.Clr();
107  TIntV Cmty1, Cmty2;
108  while (true) {
109  TSnapDetail::CmtyGirvanNewmanStep(Graph, Cmty1, Cmty2);
110  const double Q = TSnapDetail::_GirvanNewmanGetModularity(Graph, OutDegH, NEdges, CurCmtyV);
111  //printf("current modularity: %f\n", Q);
112  if (Q > BestQ) {
113  BestQ = Q;
114  CmtyV.Swap(CurCmtyV);
115  }
116  if (Cmty1.Len()==0 || Cmty2.Len() == 0) { break; }
117  }
118  return BestQ;
119 }
120 
121 // Rosvall-Bergstrom community detection algorithm based on information theoretic approach.
122 // See: Rosvall M., Bergstrom C. T., Maps of random walks on complex networks reveal community structure, Proc. Natl. Acad. Sci. USA 105, 1118-1123 (2008)
123 double Infomap(PUNGraph& Graph, TCnComV& CmtyV){
124  TIntH DegH;
125  TIntFltH PAlpha; // probability of visiting node alpha
126  TIntH Module; // module of each node
127  TIntFltH Qi; // probability of leaving each module
128  float SumPAlphaLogPAlpha = 0.0;
129  int Br = 0;
130  const int e = Graph->GetEdges();
131 
132  // initial values
133  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
134  DegH.AddDat(NI.GetId(), NI.GetDeg());
135  float d = ((float)NI.GetDeg()/(float)(2*e));
136  PAlpha.AddDat(NI.GetId(), d);
137  SumPAlphaLogPAlpha += d*log(d);
138  Module.AddDat(NI.GetId(),Br);
139  Qi.AddDat(Module[Br],1.0);
140  Br+=1;
141  }
142 
143  float MinCodeLength = TSnapDetail::Equation(Graph,PAlpha,SumPAlphaLogPAlpha,Qi);
144  float NewCodeLength, PrevIterationCodeLength = 0.0;
145  int OldModule, NewModule;
146 
147  do {
148  PrevIterationCodeLength = MinCodeLength;
149  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
150  MinCodeLength = TSnapDetail::Equation(Graph, PAlpha, SumPAlphaLogPAlpha, Qi);
151  for (int i=0; i<DegH.GetDat(NI.GetId()); i++) {
152  OldModule = Module.GetDat(NI.GetId());
153  NewModule = Module.GetDat(NI.GetNbrNId(i));
154  if (OldModule!=NewModule){
155  Module.DelKey(NI.GetId());
156  Module.AddDat(NI.GetId(),NewModule);
157  Qi = TSnapDetail::MapEquationNew2Modules(Graph,Module,Qi,OldModule, NewModule);
158  NewCodeLength = TSnapDetail::Equation(Graph,PAlpha,SumPAlphaLogPAlpha, Qi);
159  if (NewCodeLength<MinCodeLength) {
160  MinCodeLength = NewCodeLength;
161  OldModule = NewModule;
162  } else {
163  Module.DelKey(NI.GetId());
164  Module.AddDat(NI.GetId(),OldModule);
165  }
166  }
167  }
168  }
169  } while (MinCodeLength<PrevIterationCodeLength);
170 
171  Module.SortByDat(true);
172  int Mod=-1;
173  for (int i=0; i<Module.Len(); i++) {
174  if (Module[i]>Mod){
175  Mod = Module[i];
176  TCnCom t;
177  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){
178  if (Module.GetDat(NI.GetId())==Mod)
179  t.Add(NI.GetId());
180  }
181  CmtyV.Add(t);
182  }
183  }
184 
185  return MinCodeLength;
186 }
187 
188 namespace TSnapDetail {
192 class TCNMQMatrix {
193 private:
194  struct TCmtyDat {
195  double DegFrac;
197  int MxQId;
198  TCmtyDat() : MxQId(-1) { }
199  TCmtyDat(const double& NodeDegFrac, const int& OutDeg) :
200  DegFrac(NodeDegFrac), NIdQH(OutDeg), MxQId(-1) { }
201  void AddQ(const int& NId, const double& Q) { NIdQH.AddDat(NId, Q);
202  if (MxQId==-1 || NIdQH[MxQId]<Q) { MxQId=NIdQH.GetKeyId(NId); } }
203  void UpdateMaxQ() { MxQId=-1;
204  for (int i = -1; NIdQH.FNextKeyId(i); ) {
205  if (MxQId==-1 || NIdQH[MxQId]< NIdQH[i]) { MxQId=i; } } }
206  void DelLink(const int& K) { const int NId=GetMxQNId();
207  NIdQH.DelKey(K); if (NId==K) { UpdateMaxQ(); } }
208  int GetMxQNId() const { return NIdQH.GetKey(MxQId); }
209  double GetMxQ() const { return NIdQH[MxQId]; }
210  };
211 private:
215  double Q;
216 public:
217  TCNMQMatrix(const PUNGraph& Graph) : CmtyQH(Graph->GetNodes()),
218  MxQHeap(Graph->GetNodes()), CmtyIdUF(Graph->GetNodes()) { Init(Graph); }
219  void Init(const PUNGraph& Graph) {
220  const double M = 0.5/Graph->GetEdges(); // 1/2m
221  Q = 0.0;
222  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
223  CmtyIdUF.Add(NI.GetId());
224  const int OutDeg = NI.GetOutDeg();
225  if (OutDeg == 0) { continue; }
226  TCmtyDat& Dat = CmtyQH.AddDat(NI.GetId(), TCmtyDat(M * OutDeg, OutDeg));
227  for (int e = 0; e < NI.GetOutDeg(); e++) {
228  const int DstNId = NI.GetOutNId(e);
229  const double DstMod = 2 * M * (1.0 - OutDeg * Graph->GetNI(DstNId).GetOutDeg() * M);
230  Dat.AddQ(DstNId, DstMod);
231  }
232  Q += -1.0*TMath::Sqr(OutDeg*M);
233  if (NI.GetId() < Dat.GetMxQNId()) {
234  MxQHeap.Add(TFltIntIntTr(Dat.GetMxQ(), NI.GetId(), Dat.GetMxQNId())); }
235  }
236  MxQHeap.MakeHeap();
237  }
239  while (true) {
240  if (MxQHeap.Empty()) { break; }
241  const TFltIntIntTr TopQ = MxQHeap.PopHeap();
242  if (! CmtyQH.IsKey(TopQ.Val2) || ! CmtyQH.IsKey(TopQ.Val3)) { continue; }
243  if (TopQ.Val1!=CmtyQH.GetDat(TopQ.Val2).GetMxQ() && TopQ.Val1!=CmtyQH.GetDat(TopQ.Val3).GetMxQ()) { continue; }
244  return TopQ;
245  }
246  return TFltIntIntTr(-1, -1, -1);
247  }
248  bool MergeBestQ() {
249  const TFltIntIntTr TopQ = FindMxQEdge();
250  if (TopQ.Val1 <= 0.0) { return false; }
251  // joint communities
252  const int I = TopQ.Val3;
253  const int J = TopQ.Val2;
254  CmtyIdUF.Union(I, J); // join
255  Q += TopQ.Val1;
256  TCmtyDat& DatJ = CmtyQH.GetDat(J);
257  { TCmtyDat& DatI = CmtyQH.GetDat(I);
258  DatI.DelLink(J); DatJ.DelLink(I);
259  for (int i = -1; DatJ.NIdQH.FNextKeyId(i); ) {
260  const int K = DatJ.NIdQH.GetKey(i);
261  TCmtyDat& DatK = CmtyQH.GetDat(K);
262  double NewQ = DatJ.NIdQH[i];
263  if (DatI.NIdQH.IsKey(K)) { NewQ = NewQ+DatI.NIdQH.GetDat(K); DatK.DelLink(I); } // K connected to I and J
264  else { NewQ = NewQ-2*DatI.DegFrac*DatK.DegFrac; } // K connected to J not I
265  DatJ.AddQ(K, NewQ);
266  DatK.AddQ(J, NewQ);
267  MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K)));
268  }
269  for (int i = -1; DatI.NIdQH.FNextKeyId(i); ) {
270  const int K = DatI.NIdQH.GetKey(i);
271  if (! DatJ.NIdQH.IsKey(K)) { // K connected to I not J
272  TCmtyDat& DatK = CmtyQH.GetDat(K);
273  const double NewQ = DatI.NIdQH[i]-2*DatJ.DegFrac*DatK.DegFrac;
274  DatJ.AddQ(K, NewQ);
275  DatK.DelLink(I);
276  DatK.AddQ(J, NewQ);
277  MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K)));
278  }
279  }
280  DatJ.DegFrac += DatI.DegFrac; }
281  if (DatJ.NIdQH.Empty()) { CmtyQH.DelKey(J); } // isolated community (done)
282  CmtyQH.DelKey(I);
283  return true;
284  }
285  static double CmtyCMN(const PUNGraph& Graph, TCnComV& CmtyV) {
286  TCNMQMatrix QMatrix(Graph);
287  // maximize modularity
288  while (QMatrix.MergeBestQ()) { }
289  // reconstruct communities
290  THash<TInt, TIntV> IdCmtyH;
291  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
292  IdCmtyH.AddDat(QMatrix.CmtyIdUF.Find(NI.GetId())).Add(NI.GetId());
293  }
294  CmtyV.Gen(IdCmtyH.Len());
295  for (int j = 0; j < IdCmtyH.Len(); j++) {
296  CmtyV[j].NIdV.Swap(IdCmtyH[j]);
297  }
298  return QMatrix.Q;
299  }
300 };
301 
302 } // namespace TSnapDetail
303 
304 double CommunityCNM(const PUNGraph& Graph, TCnComV& CmtyV) {
305  return TSnapDetail::TCNMQMatrix::CmtyCMN(Graph, CmtyV);
306 }
307 
308 }; //namespace TSnap
double CommunityGirvanNewman(PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:98
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
Edge iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:111
int Add(const int &Key)
Adds an element Key to the structure.
Definition: gbase.h:238
void Add(const int &NodeId)
Definition: cncom.h:104
int GetHops(const int &SrcNId, const int &DstNId) const
Definition: bfsdfs.h:148
void Union(const int &Key1, const int &Key2)
Merges sets with elements Key1 and Key2.
Definition: gbase.cpp:40
TFltIntIntTr FindMxQEdge()
Definition: cmty.cpp:238
Definition: ds.h:129
static const T & Mx(const T &LVal, const T &RVal)
Definition: xmath.h:32
void GetNodeWcc(const PGraph &Graph, const int &NId, TIntV &CnCom)
Returns (via output parameter CnCom) all nodes that are in the same connected component as node NId...
Definition: cncom.h:277
static const int Mx
Definition: dt.h:1046
TCNMQMatrix(const PUNGraph &Graph)
Definition: cmty.cpp:217
int DoBfs(const int &StartNode, const bool &FollowOut, const bool &FollowIn, const int &TargetNId=-1, const int &MxDist=TInt::Mx)
Performs BFS from node id StartNode for at maps MxDist steps by only following in-links (parameter Fo...
Definition: bfsdfs.h:108
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:74
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:535
void GetBetweennessCentr(const PUNGraph &Graph, const TIntV &BtwNIdV, TIntFltH &NodeBtwH, const bool &DoNodeCent, TIntPrFltH &EdgeBtwH, const bool &DoEdgeCent)
Definition: centr.cpp:28
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:63
bool Empty() const
Definition: hash.h:185
TVal1 Val1
Definition: ds.h:131
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
static double Sqr(const double &x)
Definition: xmath.h:12
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1011
void DelKey(const TKey &Key)
Definition: hash.h:358
TVal2 Val2
Definition: ds.h:132
int GetOutDeg() const
Returns out-degree of the current node (returns same as value GetDeg() since the graph is undirected)...
Definition: graph.h:86
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:953
int Find(const int &Key)
Returns the set that contains element Key.
Definition: gbase.cpp:23
Definition: cncom.h:88
Simple heap data structure.
Definition: gbase.h:253
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:205
bool FNextKeyId(int &KeyId) const
Definition: hash.h:432
THeap< TFltIntIntTr > MxQHeap
Definition: cmty.cpp:213
int GetKeyId(const TKey &Key) const
Definition: hash.h:420
float Equation(PUNGraph &Graph, TIntFltH &PAlpha, float &SumPAlphaLogPAlpha, TIntFltH &Qi)
Definition: cmty.cpp:84
void AddQ(const int &NId, const double &Q)
Definition: cmty.cpp:201
double CommunityCNM(const PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:304
void Init(const PUNGraph &Graph)
Definition: cmty.cpp:219
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:203
TVal1 Val1
Definition: ds.h:34
TVal2 Val2
Definition: ds.h:35
Definition: bd.h:196
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:486
bool IsKey(const TKey &Key) const
Definition: hash.h:216
TCmtyDat(const double &NodeDegFrac, const int &OutDeg)
Definition: cmty.cpp:199
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:559
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:201
int Len() const
Definition: hash.h:186
double Infomap(PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:123
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
static double CmtyCMN(const PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:285
void GetWccs(const PGraph &Graph, TCnComV &CnComV)
Returns all weakly connected components in a Graph.
Definition: cncom.h:376
Union Find class (Disjoint-set data structure).
Definition: gbase.h:214
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
TTriple< TFlt, TInt, TInt > TFltIntIntTr
Definition: ds.h:176
TVal3 Val3
Definition: ds.h:133
double _GirvanNewmanGetModularity(const PUNGraph &G, const TIntH &OutDegH, const int &OrigEdges, TCnComV &CnComV)
Definition: cmty.cpp:36
THash< TInt, TCmtyDat > CmtyQH
Definition: cmty.cpp:212
void CmtyGirvanNewmanStep(PUNGraph &Graph, TIntV &Cmty1, TIntV &Cmty2)
A single step of Girvan-Newman clustering procedure.
Definition: cmty.cpp:14
TIntFltH MapEquationNew2Modules(PUNGraph &Graph, TIntH &Module, TIntFltH &Qi, int a, int b)
Definition: cmty.cpp:53
void SortByDat(const bool &Asc=true)
Definition: hash.h:246