SNAP Library, Developer Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
cliques.cpp
Go to the documentation of this file.
00001 #include "stdafx.h"
00002 #include "cliques.h"
00003 
00005 // TCommunity implementation
00006 void TCliqueOverlap::GetRelativeComplement(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& Complement) {
00007   for (THashSet<TInt>::TIter it=A.BegI(); it<A.EndI(); it++) {
00008                 const int nId = it.GetKey();
00009                 if (!B.IsKey(nId)) Complement.AddKey(nId);
00010         }
00011 }
00012 
00013 void TCliqueOverlap::GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C) {
00014         if (A.Len() < B.Len()) {
00015                 for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++) 
00016                         if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey());
00017         } else {
00018                 for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++) 
00019                         if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey());
00020         }
00021 }
00022 
00023 int TCliqueOverlap::Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B) {
00024         int n = 0;
00025         if (A.Len() < B.Len()) {
00026                 for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++) 
00027                         if (B.IsKey(it.GetKey())) n++;
00028         } else {
00029                 for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++) 
00030                         if (A.IsKey(it.GetKey())) n++;
00031         }
00032         return n;
00033 }
00034 
00035 void TCliqueOverlap::GetNbrs(int NId, THashSet<TInt>& Nbrs) const{
00036         TUNGraph::TNodeI node = m_G->GetNI(NId);
00037         int deg = node.GetDeg();
00038         for (int i=0; i<deg; i++) Nbrs.AddKey(node.GetNbrNId(i));
00039 }
00040 
00041 int TCliqueOverlap::GetNodeIdWithMaxDeg(const THashSet<TInt>& Set) const{
00042         int id = -1;
00043         int maxDeg = -1;
00044         //
00045         for (THashSetKeyI<TInt> it=Set.BegI(); it<Set.EndI(); it++) {
00046                 int nId = it.GetKey();
00047                 int deg = m_G->GetNI(nId).GetDeg();
00048                 if (maxDeg < deg) { maxDeg=deg; id=nId; }
00049         }
00050         return id;
00051 }
00052 
00053 int TCliqueOverlap::MaxNbrsInCANDNodeId(const THashSet<TInt>& SUBG, const THashSet<TInt>& CAND) const{
00054         int id = -1;
00055         int maxIntersection = -1;
00056         //
00057         for (THashSetKeyI<TInt> it=SUBG.BegI(); it<SUBG.EndI(); it++) {
00058                 int nId = it.GetKey();
00059                 TUNGraph::TNodeI nIt = m_G->GetNI(nId);
00060                 int deg = nIt.GetDeg();
00061                 //
00062                 int curIntersection = 0;
00063                 for (int i=0; i<deg; i++) {
00064                         int nbrId = nIt.GetNbrNId(i);
00065                         if (CAND.IsKey(nbrId)) curIntersection++;
00066                 }
00067                 //
00068                 if (maxIntersection < curIntersection) { maxIntersection=curIntersection; id=nId; }
00069         }
00070         return id;
00071 }
00072 
00073 void TCliqueOverlap::Expand(const THashSet<TInt>& SUBG, THashSet<TInt>& CAND) {
00074         if (SUBG.Len()==0) { if (m_Q.Len() >= m_minMaxCliqueSize) { m_Q.Pack(); m_maxCliques->Add(m_Q); } return; }
00075         if (CAND.Len()==0) return;
00076         //Get u that maximaze CAND intersection with neighbours of vertex u
00077         int u = MaxNbrsInCANDNodeId(SUBG, CAND);
00078         //Get neighbours of node u
00079         THashSet<TInt> nbrsU;
00080         GetNbrs(u, nbrsU);
00081         //Get relative complement of nbrsU in CAND
00082         THashSet<TInt> EXT;
00083         GetRelativeComplement(CAND, nbrsU, EXT);
00084         while(EXT.Len() != 0) {
00085                 int q = GetNodeIdWithMaxDeg(EXT);
00086                 //
00087                 m_Q.Add(q);
00088                 //
00089                 THashSet<TInt> nbrsQ;
00090                 GetNbrs(q, nbrsQ);
00091                 //
00092                 THashSet<TInt> SUBGq;
00093                 GetIntersection(SUBG, nbrsQ, SUBGq);
00094                 //
00095                 THashSet<TInt> CANDq;
00096                 GetIntersection(CAND, nbrsQ, CANDq);
00097                 //
00098                 Expand(SUBGq, CANDq);
00099                 //
00100                 CAND.DelKey(q);
00101                 m_Q.DelLast();
00102                 //
00103                 EXT.Clr();
00104                 GetRelativeComplement(CAND, nbrsU, EXT);
00105         }
00106 }
00107 
00108 void TCliqueOverlap::GetMaximalCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) {
00109         if (G->GetNodes() == 0) return;
00110         //
00111         m_G = G;
00112         m_minMaxCliqueSize = MinMaxCliqueSize;
00113         m_maxCliques =& MaxCliques;
00114         m_Q.Clr();
00115         //
00116         THashSet<TInt> SUBG;
00117         THashSet<TInt> CAND;
00118         for (TUNGraph::TNodeI NI=m_G->BegNI(); NI<m_G->EndNI(); NI++) {
00119                 TInt nId = NI.GetId();
00120                 SUBG.AddKey(nId);
00121                 CAND.AddKey(nId);
00122         }
00123         //
00124         Expand(SUBG, CAND);
00125 }
00126 
00127 void TCliqueOverlap::CalculateOverlapMtx(const TVec<TIntV>& MaxCliques, int MinNodeOverlap, TVec<TIntV>& OverlapMtx) {
00128         OverlapMtx.Clr();
00129         //
00130         int n = MaxCliques.Len();
00131         //Convert max cliques to HashSets
00132                 TVec<THashSet<TInt> > cliques;
00133         for (int i=0; i<n; i++) {
00134                 const int len = MaxCliques[i].Len();
00135     cliques.Add();
00136     if (len < MinNodeOverlap) { continue; }
00137                 THashSet<TInt>& set = cliques.Last();  set.Gen(len);
00138     for (int j=0; j<len; j++) { set.AddKey(MaxCliques[i][j]); }
00139         }
00140         //Init clique clique overlap matrix
00141         n = cliques.Len();
00142         OverlapMtx.Gen(n);
00143         for (int i=0; i<n; i++) OverlapMtx[i].Gen(n);
00144         //Calculate clique clique overlap matrix
00145   for (int i=0; i<n; i++) {
00146     OverlapMtx[i][i] = cliques[i].Len();
00147     for (int j=i+1; j<n; j++) {
00148       OverlapMtx[i][j] = Intersection(cliques[i], cliques[j]); }
00149   }
00150 }
00151 
00152 PUNGraph TCliqueOverlap::CalculateOverlapMtx(const TVec<TIntV>& MaxCliques, int MinNodeOverlap) {
00153         const int n = MaxCliques.Len();
00154   //Convert max cliques to HashSets
00155         TVec<THashSet<TInt> > cliques;
00156         for (int i=0; i<n; i++) {
00157                 const int len = MaxCliques[i].Len();
00158     cliques.Add();
00159     if (len < MinNodeOverlap) { continue; }
00160                 THashSet<TInt>& set = cliques.Last();  set.Gen(len);
00161     for (int j=0; j<len; j++) { set.AddKey(MaxCliques[i][j]); }
00162         }
00163         //Init clique clique overlap matrix
00164         PUNGraph OverlapMtx = TUNGraph::New();
00165   for (int i=0; i < n; i++) {
00166     OverlapMtx->AddNode(i); }
00167         //Calculate clique clique overlap matrix
00168   for (int i=0; i<n; i++) {
00169     for (int j=i+1; j<n; j++) {
00170       if (Intersection(cliques[i], cliques[j]) >= MinNodeOverlap) {
00171         OverlapMtx->AddEdge(i,j); }
00172     }
00173   }
00174   return OverlapMtx;
00175 }
00176 
00177 void TCliqueOverlap::GetOverlapCliques(const TVec<TIntV>& OverlapMtx, int MinNodeOverlap, TVec<TIntV>& CliqueIdVV) {
00178         int n = OverlapMtx.Len();
00179         for (int i=0; i<n; i++) {
00180                 bool isCommunity = false;
00181                 for (int j=i+1; j<n; j++) {
00182                         if (OverlapMtx[i][j] >= MinNodeOverlap) {
00183                                 if (! isCommunity) {
00184                                         TIntV v; v.Add(i);
00185                                         CliqueIdVV.Add(v);
00186                                         isCommunity=true;
00187                                 }
00188                                 CliqueIdVV.Last().Add(j);
00189                         }
00190                 }
00191         }
00192 }
00193 
00194 void TCliqueOverlap::GetOverlapCliques(const TVec<TIntV>& OverlapMtx, const TVec<TIntV>& MaxCliques, double MinOverlapFrac, TVec<TIntV>& CliqueIdVV) {
00195         int n = OverlapMtx.Len();
00196         for(int i=0; i<n; i++){
00197                 bool isCommunity = false;
00198                 int size1 = MaxCliques[i].Len();
00199                 for(int j=i+1; j<n; j++){
00200                         int size2 = MaxCliques[j].Len();
00201                         double ratio = OverlapMtx[i][j];
00202                         if(size1 < size2) ratio /= size1;
00203                         else ratio /= size2;
00204                         if(ratio >= MinOverlapFrac){
00205                                 if(!isCommunity){
00206                                         TIntV v; v.Add(i);
00207                                         CliqueIdVV.Add(v);
00208                                         isCommunity=true;
00209                                 }
00210                                 CliqueIdVV.Last().Add(j);
00211                         }
00212                 }
00213         }
00214 }
00215 
00217 void TCliqueOverlap::GetMaxCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) {
00218   TCliqueOverlap CO;
00219   MaxCliques.Clr(false);
00220   CO.GetMaximalCliques(G, MinMaxCliqueSize, MaxCliques);
00221 }
00222 
00224 void TCliqueOverlap::GetCPMCommunities(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& NIdCmtyVV) {
00225   printf("Clique Percolation Method\n");
00226   TExeTm ExeTm;
00227   TVec<TIntV> MaxCliques;
00228   TCliqueOverlap::GetMaxCliques(G, MinMaxCliqueSize, MaxCliques);
00229   // op RS 2012/05/15, commented out next line, a parameter is missing,
00230   //   creating a warning on OS X
00231   // printf("...%d cliques found\n");
00232   // get clique overlap matrix (graph)
00233   PUNGraph OverlapGraph = TCliqueOverlap::CalculateOverlapMtx(MaxCliques, MinMaxCliqueSize-1);
00234   printf("...overlap matrix (%d, %d)\n", G->GetNodes(), G->GetEdges());
00235   // connected components are communities
00236   TCnComV CnComV;
00237   TSnap::GetWccs(OverlapGraph, CnComV);
00238   NIdCmtyVV.Clr(false);
00239   TIntSet CmtySet;
00240   for (int c = 0; c < CnComV.Len(); c++) {
00241     CmtySet.Clr(false);
00242     for (int i = 0; i <CnComV[c].Len(); i++) {
00243       const TIntV& CliqueNIdV = MaxCliques[CnComV[c][i]];
00244       CmtySet.AddKeyV(CliqueNIdV);
00245     }
00246     NIdCmtyVV.Add();
00247     CmtySet.GetKeyV(NIdCmtyVV.Last());
00248     NIdCmtyVV.Last().Sort();
00249   }
00250   printf("done [%s].\n", ExeTm.GetStr());
00251 }