SNAP Library, Developer Reference  2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
cmty.cpp
Go to the documentation of this file.
00001 
00002 // Community detection algorithms
00003 namespace TSnap {
00004 
00005 
00006 namespace TSnapDetail {
00007 // GIRVAN-NEWMAN algorithm
00008 //      1. The betweenness of all existing edges in the network is calculated first.
00009 //      2. The edge with the highest betweenness is removed.
00010 //      3. The betweenness of all edges affected by the removal is recalculated.
00011 //      4. Steps 2 and 3 are repeated until no edges remain.
00012 //  Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002)
00013 // Keep removing edges from Graph until one of the connected components of Graph splits into two.
00014 void CmtyGirvanNewmanStep(PUNGraph& Graph, TIntV& Cmty1, TIntV& Cmty2) {
00015   TIntPrFltH BtwEH;
00016   TBreathFS<PUNGraph> BFS(Graph);
00017   Cmty1.Clr(false);  Cmty2.Clr(false);
00018   while (true) {
00019     TSnap::GetBetweennessCentr(Graph, BtwEH);
00020     BtwEH.SortByDat(false);
00021     if (BtwEH.Empty()) { return; }
00022     const int NId1 = BtwEH.GetKey(0).Val1;
00023     const int NId2 = BtwEH.GetKey(0).Val2;
00024     Graph->DelEdge(NId1, NId2);
00025     BFS.DoBfs(NId1, true, false, NId2, TInt::Mx);
00026     if (BFS.GetHops(NId1, NId2) == -1) { // two components
00027       TSnap::GetNodeWcc(Graph, NId1, Cmty1);
00028       TSnap::GetNodeWcc(Graph, NId2, Cmty2);
00029       return;
00030     }
00031   }
00032 }
00033 
00034 // Connected components of a graph define clusters
00035 // OutDegH and OrigEdges stores node degrees and number of edges in the original graph
00036 double _GirvanNewmanGetModularity(const PUNGraph& G, const TIntH& OutDegH, const int& OrigEdges, TCnComV& CnComV) {
00037   TSnap::GetWccs(G, CnComV); // get communities
00038   double Mod = 0;
00039   for (int c = 0; c < CnComV.Len(); c++) {
00040     const TIntV& NIdV = CnComV[c]();
00041     double EIn=0, EEIn=0;
00042     for (int i = 0; i < NIdV.Len(); i++) {
00043       TUNGraph::TNodeI NI = G->GetNI(NIdV[i]);
00044       EIn += NI.GetOutDeg();
00045       EEIn += OutDegH.GetDat(NIdV[i]);
00046     }
00047     Mod += (EIn-EEIn*EEIn/(2.0*OrigEdges));
00048   }
00049   if (Mod == 0) { return 0; }
00050   else { return Mod/(2.0*OrigEdges); }
00051 }
00052 
00053 } // namespace TSnapDetail
00054 
00055 // Maximum modularity clustering by Girvan-Newman algorithm (slow)
00056 //  Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002)
00057 double CommunityGirvanNewman(PUNGraph& Graph, TCnComV& CmtyV) {
00058   TIntH OutDegH;
00059   const int NEdges = Graph->GetEdges();
00060   for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00061     OutDegH.AddDat(NI.GetId(), NI.GetOutDeg());
00062   }
00063   double BestQ = -1; // modularity
00064   TCnComV CurCmtyV;
00065   CmtyV.Clr();
00066   TIntV Cmty1, Cmty2;
00067   while (true) {
00068     TSnapDetail::CmtyGirvanNewmanStep(Graph, Cmty1, Cmty2);
00069     const double Q = TSnapDetail::_GirvanNewmanGetModularity(Graph, OutDegH, NEdges, CurCmtyV);
00070     //printf("current modularity: %f\n", Q);
00071     if (Q > BestQ) {
00072       BestQ = Q; 
00073       CmtyV.Swap(CurCmtyV);
00074     }
00075     if (Cmty1.Len()==0 || Cmty2.Len() == 0) { break; }
00076   }
00077   return BestQ;
00078 }
00079 
00080 namespace TSnapDetail {
00084 class TCNMQMatrix {
00085 private:
00086   struct TCmtyDat {
00087     double DegFrac;
00088     TIntFltH NIdQH;
00089     int MxQId;
00090     TCmtyDat() : MxQId(-1) { }
00091     TCmtyDat(const double& NodeDegFrac, const int& OutDeg) : 
00092       DegFrac(NodeDegFrac), NIdQH(OutDeg), MxQId(-1) { }
00093     void AddQ(const int& NId, const double& Q) { NIdQH.AddDat(NId, Q);
00094       if (MxQId==-1 || NIdQH[MxQId]<Q) { MxQId=NIdQH.GetKeyId(NId); } }
00095     void UpdateMaxQ() { MxQId=-1; 
00096       for (int i = -1; NIdQH.FNextKeyId(i); ) { 
00097         if (MxQId==-1 || NIdQH[MxQId]< NIdQH[i]) { MxQId=i; } } }
00098     void DelLink(const int& K) { const int NId=GetMxQNId(); 
00099       NIdQH.DelKey(K); if (NId==K) { UpdateMaxQ(); }  }
00100     int GetMxQNId() const { return NIdQH.GetKey(MxQId); }
00101     double GetMxQ() const { return NIdQH[MxQId]; }
00102   };
00103 private:
00104   THash<TInt, TCmtyDat> CmtyQH;
00105   TVec<TFltIntIntTr> MxQHeap;
00106   TUnionFind CmtyIdUF;
00107   double Q;
00108 public:
00109   TCNMQMatrix(const PUNGraph& Graph) : CmtyQH(Graph->GetNodes()), 
00110     MxQHeap(Graph->GetNodes(), 0), CmtyIdUF(Graph->GetNodes()) { Init(Graph); }
00111   void Init(const PUNGraph& Graph) {
00112     const double M = 0.5/Graph->GetEdges(); // 1/2m
00113     Q = 0.0;
00114     for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00115       CmtyIdUF.Add(NI.GetId());
00116       const int OutDeg = NI.GetOutDeg();
00117       if (OutDeg == 0) { continue; }
00118       TCmtyDat& Dat = CmtyQH.AddDat(NI.GetId(), TCmtyDat(M * OutDeg, OutDeg));
00119       for (int e = 0; e < NI.GetOutDeg(); e++) {
00120         const int DstNId = NI.GetOutNId(e);
00121         const double DstMod = 2 * M * (1.0 - OutDeg * Graph->GetNI(DstNId).GetOutDeg() * M);
00122         Dat.AddQ(DstNId, DstMod);
00123       }
00124       Q += -1.0*TMath::Sqr(OutDeg*M);
00125       if (NI.GetId() < Dat.GetMxQNId()) {
00126         MxQHeap.Add(TFltIntIntTr(Dat.GetMxQ(), NI.GetId(), Dat.GetMxQNId())); }
00127     }
00128     MxQHeap.MakeHeap();
00129   }
00130   TFltIntIntTr FindMxQEdge() {
00131     while (true) {
00132       if (MxQHeap.Empty()) { break; }
00133       const TFltIntIntTr TopQ = MxQHeap.PopHeap();
00134       if (! CmtyQH.IsKey(TopQ.Val2) || ! CmtyQH.IsKey(TopQ.Val3)) { continue; }
00135       if (TopQ.Val1!=CmtyQH.GetDat(TopQ.Val2).GetMxQ() && TopQ.Val1!=CmtyQH.GetDat(TopQ.Val3).GetMxQ()) { continue; }
00136       return TopQ;
00137     }
00138     return TFltIntIntTr(-1, -1, -1);
00139   }
00140   bool MergeBestQ() {
00141     const TFltIntIntTr TopQ = FindMxQEdge();
00142     if (TopQ.Val1 <= 0.0) { return false; }
00143     // joint communities
00144     const int I = TopQ.Val3;
00145     const int J = TopQ.Val2;
00146     CmtyIdUF.Union(I, J); // join
00147     Q += TopQ.Val1;
00148     TCmtyDat& DatJ = CmtyQH.GetDat(J);
00149     { TCmtyDat& DatI = CmtyQH.GetDat(I);
00150     DatI.DelLink(J);  DatJ.DelLink(I);
00151     for (int i = -1; DatJ.NIdQH.FNextKeyId(i); ) {
00152       const int K = DatJ.NIdQH.GetKey(i);
00153       TCmtyDat& DatK = CmtyQH.GetDat(K);
00154       double NewQ = DatJ.NIdQH[i];
00155       if (DatI.NIdQH.IsKey(K)) { NewQ = NewQ+DatI.NIdQH.GetDat(K);  DatK.DelLink(I); }     // K connected to I and J
00156       else { NewQ = NewQ-2*DatI.DegFrac*DatK.DegFrac; }  // K connected to J not I
00157       DatJ.AddQ(K, NewQ);
00158       DatK.AddQ(J, NewQ);
00159       MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K)));
00160     }
00161     for (int i = -1; DatI.NIdQH.FNextKeyId(i); ) {
00162       const int K = DatI.NIdQH.GetKey(i);
00163       if (! DatJ.NIdQH.IsKey(K)) { // K connected to I not J
00164         TCmtyDat& DatK = CmtyQH.GetDat(K);
00165         const double NewQ = DatI.NIdQH[i]-2*DatJ.DegFrac*DatK.DegFrac; 
00166         DatJ.AddQ(K, NewQ);
00167         DatK.DelLink(I);
00168         DatK.AddQ(J, NewQ);
00169         MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K)));
00170       }
00171     } 
00172     DatJ.DegFrac += DatI.DegFrac; }
00173     if (DatJ.NIdQH.Empty()) { CmtyQH.DelKey(J); } // isolated community (done)
00174     CmtyQH.DelKey(I);
00175     return true;
00176   }
00177   static double CmtyCMN(const PUNGraph& Graph, TCnComV& CmtyV) {
00178     TCNMQMatrix QMatrix(Graph);
00179     // maximize modularity
00180     while (QMatrix.MergeBestQ()) { }
00181     // reconstruct communities
00182     THash<TInt, TIntV> IdCmtyH;
00183     for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00184       IdCmtyH.AddDat(QMatrix.CmtyIdUF.Find(NI.GetId())).Add(NI.GetId()); 
00185     }
00186     CmtyV.Gen(IdCmtyH.Len());
00187     for (int j = 0; j < IdCmtyH.Len(); j++) {
00188       CmtyV[j].NIdV.Swap(IdCmtyH[j]);
00189     }
00190     return QMatrix.Q;
00191   }
00192 };
00193 
00194 } // namespace TSnapDetail
00195 
00196 double CommunityCNM(const PUNGraph& Graph, TCnComV& CmtyV) {
00197   return TSnapDetail::TCNMQMatrix::CmtyCMN(Graph, CmtyV);
00198 }
00199 
00200 }; //namespace TSnap