SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
cliques.cpp
Go to the documentation of this file.
1 #include "stdafx.h"
2 #include "cliques.h"
3 
5 // TCommunity implementation
7  for (THashSet<TInt>::TIter it=A.BegI(); it<A.EndI(); it++) {
8  const int nId = it.GetKey();
9  if (!B.IsKey(nId)) Complement.AddKey(nId);
10  }
11 }
12 
14  if (A.Len() < B.Len()) {
15  for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++)
16  if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey());
17  } else {
18  for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++)
19  if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey());
20  }
21 }
22 
24  int n = 0;
25  if (A.Len() < B.Len()) {
26  for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++)
27  if (B.IsKey(it.GetKey())) n++;
28  } else {
29  for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++)
30  if (A.IsKey(it.GetKey())) n++;
31  }
32  return n;
33 }
34 
35 void TCliqueOverlap::GetNbrs(int NId, THashSet<TInt>& Nbrs) const{
36  TUNGraph::TNodeI node = m_G->GetNI(NId);
37  int deg = node.GetDeg();
38  for (int i=0; i<deg; i++) Nbrs.AddKey(node.GetNbrNId(i));
39 }
40 
42  int id = -1;
43  int maxDeg = -1;
44  //
45  for (THashSetKeyI<TInt> it=Set.BegI(); it<Set.EndI(); it++) {
46  int nId = it.GetKey();
47  int deg = m_G->GetNI(nId).GetDeg();
48  if (maxDeg < deg) { maxDeg=deg; id=nId; }
49  }
50  return id;
51 }
52 
54  int id = -1;
55  int maxIntersection = -1;
56  //
57  for (THashSetKeyI<TInt> it=SUBG.BegI(); it<SUBG.EndI(); it++) {
58  int nId = it.GetKey();
59  TUNGraph::TNodeI nIt = m_G->GetNI(nId);
60  int deg = nIt.GetDeg();
61  //
62  int curIntersection = 0;
63  for (int i=0; i<deg; i++) {
64  int nbrId = nIt.GetNbrNId(i);
65  if (CAND.IsKey(nbrId)) curIntersection++;
66  }
67  //
68  if (maxIntersection < curIntersection) { maxIntersection=curIntersection; id=nId; }
69  }
70  return id;
71 }
72 
74  if (SUBG.Len()==0) { if (m_Q.Len() >= m_minMaxCliqueSize) { m_Q.Pack(); m_maxCliques->Add(m_Q); } return; }
75  if (CAND.Len()==0) return;
76  //Get u that maximaze CAND intersection with neighbours of vertex u
77  int u = MaxNbrsInCANDNodeId(SUBG, CAND);
78  //Get neighbours of node u
79  THashSet<TInt> nbrsU;
80  GetNbrs(u, nbrsU);
81  //Get relative complement of nbrsU in CAND
82  THashSet<TInt> EXT;
83  GetRelativeComplement(CAND, nbrsU, EXT);
84  while(EXT.Len() != 0) {
85  int q = GetNodeIdWithMaxDeg(EXT);
86  //
87  m_Q.Add(q);
88  //
89  THashSet<TInt> nbrsQ;
90  GetNbrs(q, nbrsQ);
91  //
92  THashSet<TInt> SUBGq;
93  GetIntersection(SUBG, nbrsQ, SUBGq);
94  //
95  THashSet<TInt> CANDq;
96  GetIntersection(CAND, nbrsQ, CANDq);
97  //
98  Expand(SUBGq, CANDq);
99  //
100  CAND.DelKey(q);
101  m_Q.DelLast();
102  //
103  EXT.Clr();
104  GetRelativeComplement(CAND, nbrsU, EXT);
105  }
106 }
107 
108 void TCliqueOverlap::GetMaximalCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) {
109  if (G->GetNodes() == 0) return;
110  //
111  m_G = G;
112  m_minMaxCliqueSize = MinMaxCliqueSize;
113  m_maxCliques =& MaxCliques;
114  m_Q.Clr();
115  //
116  THashSet<TInt> SUBG;
117  THashSet<TInt> CAND;
118  for (TUNGraph::TNodeI NI=m_G->BegNI(); NI<m_G->EndNI(); NI++) {
119  TInt nId = NI.GetId();
120  SUBG.AddKey(nId);
121  CAND.AddKey(nId);
122  }
123  //
124  Expand(SUBG, CAND);
125 }
126 
127 void TCliqueOverlap::CalculateOverlapMtx(const TVec<TIntV>& MaxCliques, int MinNodeOverlap, TVec<TIntV>& OverlapMtx) {
128  OverlapMtx.Clr();
129  //
130  int n = MaxCliques.Len();
131  //Convert max cliques to HashSets
132  TVec<THashSet<TInt> > cliques;
133  for (int i=0; i<n; i++) {
134  const int len = MaxCliques[i].Len();
135  cliques.Add();
136  if (len < MinNodeOverlap) { continue; }
137  THashSet<TInt>& set = cliques.Last(); set.Gen(len);
138  for (int j=0; j<len; j++) { set.AddKey(MaxCliques[i][j]); }
139  }
140  //Init clique clique overlap matrix
141  n = cliques.Len();
142  OverlapMtx.Gen(n);
143  for (int i=0; i<n; i++) OverlapMtx[i].Gen(n);
144  //Calculate clique clique overlap matrix
145  for (int i=0; i<n; i++) {
146  OverlapMtx[i][i] = cliques[i].Len();
147  for (int j=i+1; j<n; j++) {
148  OverlapMtx[i][j] = Intersection(cliques[i], cliques[j]); }
149  }
150 }
151 
152 PUNGraph TCliqueOverlap::CalculateOverlapMtx(const TVec<TIntV>& MaxCliques, int MinNodeOverlap) {
153  const int n = MaxCliques.Len();
154  //Convert max cliques to HashSets
155  TVec<THashSet<TInt> > cliques;
156  for (int i=0; i<n; i++) {
157  const int len = MaxCliques[i].Len();
158  cliques.Add();
159  if (len < MinNodeOverlap) { continue; }
160  THashSet<TInt>& set = cliques.Last(); set.Gen(len);
161  for (int j=0; j<len; j++) { set.AddKey(MaxCliques[i][j]); }
162  }
163  //Init clique clique overlap matrix
164  PUNGraph OverlapMtx = TUNGraph::New();
165  for (int i=0; i < n; i++) {
166  OverlapMtx->AddNode(i); }
167  //Calculate clique clique overlap matrix
168  for (int i=0; i<n; i++) {
169  for (int j=i+1; j<n; j++) {
170  if (Intersection(cliques[i], cliques[j]) >= MinNodeOverlap) {
171  OverlapMtx->AddEdge(i,j); }
172  }
173  }
174  return OverlapMtx;
175 }
176 
177 void TCliqueOverlap::GetOverlapCliques(const TVec<TIntV>& OverlapMtx, int MinNodeOverlap, TVec<TIntV>& CliqueIdVV) {
178  int n = OverlapMtx.Len();
179  for (int i=0; i<n; i++) {
180  bool isCommunity = false;
181  for (int j=i+1; j<n; j++) {
182  if (OverlapMtx[i][j] >= MinNodeOverlap) {
183  if (! isCommunity) {
184  TIntV v; v.Add(i);
185  CliqueIdVV.Add(v);
186  isCommunity=true;
187  }
188  CliqueIdVV.Last().Add(j);
189  }
190  }
191  }
192 }
193 
194 void TCliqueOverlap::GetOverlapCliques(const TVec<TIntV>& OverlapMtx, const TVec<TIntV>& MaxCliques, double MinOverlapFrac, TVec<TIntV>& CliqueIdVV) {
195  int n = OverlapMtx.Len();
196  for(int i=0; i<n; i++){
197  bool isCommunity = false;
198  int size1 = MaxCliques[i].Len();
199  for(int j=i+1; j<n; j++){
200  int size2 = MaxCliques[j].Len();
201  double ratio = OverlapMtx[i][j];
202  if(size1 < size2) ratio /= size1;
203  else ratio /= size2;
204  if(ratio >= MinOverlapFrac){
205  if(!isCommunity){
206  TIntV v; v.Add(i);
207  CliqueIdVV.Add(v);
208  isCommunity=true;
209  }
210  CliqueIdVV.Last().Add(j);
211  }
212  }
213  }
214 }
215 
217 void TCliqueOverlap::GetMaxCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) {
218  TCliqueOverlap CO;
219  MaxCliques.Clr(false);
220  CO.GetMaximalCliques(G, MinMaxCliqueSize, MaxCliques);
221 }
222 
224 void TCliqueOverlap::GetCPMCommunities(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& NIdCmtyVV) {
225  printf("Clique Percolation Method\n");
226  TExeTm ExeTm;
227  TVec<TIntV> MaxCliques;
228  TCliqueOverlap::GetMaxCliques(G, MinMaxCliqueSize, MaxCliques);
229  // op RS 2012/05/15, commented out next line, a parameter is missing,
230  // creating a warning on OS X
231  // printf("...%d cliques found\n");
232  // get clique overlap matrix (graph)
233  PUNGraph OverlapGraph = TCliqueOverlap::CalculateOverlapMtx(MaxCliques, MinMaxCliqueSize-1);
234  printf("...overlap matrix (%d, %d)\n", G->GetNodes(), G->GetEdges());
235  // connected components are communities
236  TCnComV CnComV;
237  TSnap::GetWccs(OverlapGraph, CnComV);
238  NIdCmtyVV.Clr(false);
239  TIntSet CmtySet;
240  for (int c = 0; c < CnComV.Len(); c++) {
241  CmtySet.Clr(false);
242  for (int i = 0; i <CnComV[c].Len(); i++) {
243  const TIntV& CliqueNIdV = MaxCliques[CnComV[c][i]];
244  CmtySet.AddKeyV(CliqueNIdV);
245  }
246  NIdCmtyVV.Add();
247  CmtySet.GetKeyV(NIdCmtyVV.Last());
248  NIdCmtyVV.Last().Sort();
249  }
250  printf("done [%s].\n", ExeTm.GetStr());
251 }
void Clr(const bool &DoDel=true, const int &NoDelLim=-1)
Definition: shash.h:1243
static void GetRelativeComplement(const THashSet< TInt > &A, const THashSet< TInt > &B, THashSet< TInt > &Complement)
Definition: cliques.cpp:6
void GetNbrs(int NId, THashSet< TInt > &Nbrs) const
Definition: cliques.cpp:35
Definition: tm.h:355
void AddKeyV(const TVec< TKey > &KeyV)
Definition: shash.h:1284
static void GetCPMCommunities(const PUNGraph &G, int MinMaxCliqueSize, TVec< TIntV > &Communities)
Clique Percolation method communities.
Definition: cliques.cpp:224
TIter BegI() const
Definition: shash.h:1105
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:8
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:68
void GetKeyV(TVec< TKey > &KeyV) const
Definition: shash.h:1347
void Gen(const int &ExpectVals)
Definition: shash.h:1115
static void GetMaxCliques(const PUNGraph &G, int MinMaxCliqueSize, TVec< TIntV > &MaxCliques)
Enumerate maximal cliques of the network on more than MinMaxCliqueSize nodes.
Definition: cliques.cpp:217
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:90
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
static void GetOverlapCliques(const TVec< TIntV > &OverlapMtx, int MinNodeOverlap, TVec< TIntV > &CliqueIdVV)
Definition: cliques.cpp:177
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
void DelKey(const TKey &Key)
Definition: shash.h:1289
int MaxNbrsInCANDNodeId(const THashSet< TInt > &SUBG, const THashSet< TInt > &CAND) const
Definition: cliques.cpp:53
static void GetIntersection(const THashSet< TInt > &A, const THashSet< TInt > &B, THashSet< TInt > &C)
Definition: cliques.cpp:13
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:241
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:172
int AddKey(const TKey &Key)
Definition: shash.h:1254
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
void Expand(const THashSet< TInt > &SUBG, THashSet< TInt > &CAND)
Definition: cliques.cpp:73
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: graph.cpp:92
Definition: dt.h:1137
int m_minMaxCliqueSize
Definition: cliques.h:13
TIter EndI() const
Definition: shash.h:1112
int Len() const
Definition: shash.h:1121
TVec< TIntV > * m_maxCliques
Definition: cliques.h:12
PUNGraph m_G
Definition: cliques.h:10
void Pack()
Reduces vector capacity (frees memory) to match its size.
Definition: ds.h:1057
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:239
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:111
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
const char * GetStr() const
Definition: tm.h:368
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:237
void DelLast()
Removes the last element of the vector.
Definition: ds.h:665
static int Intersection(const THashSet< TInt > &A, const THashSet< TInt > &B)
Definition: cliques.cpp:23
static void CalculateOverlapMtx(const TVec< TIntV > &MaxCliques, int MinNodeOverlap, TVec< TIntV > &OverlapMtx)
Definition: cliques.cpp:127
TIntV m_Q
Definition: cliques.h:11
void GetWccs(const PGraph &Graph, TCnComV &CnComV)
Returns all weakly connected components in a Graph.
Definition: cncom.h:376
void GetMaximalCliques(const PUNGraph &G, int MinMaxCliqueSize, TVec< TIntV > &MaxCliques)
Definition: cliques.cpp:108
int GetNodeIdWithMaxDeg(const THashSet< TInt > &Set) const
Definition: cliques.cpp:41