SNAP Library, Developer Reference  2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ghash.cpp
Go to the documentation of this file.
00001 
00002 // Graph Hash Table Key
00003 const int TGraphKey::RoundTo = 4; // round to 4 decimal places
00004 
00005 TGraphKey::TGraphKey(const TSFltV& GraphSigV) : Nodes(-1), EdgeV(), SigV(), VariantId(0) {
00006   SigV.Gen(GraphSigV.Len());
00007   for (int i = 0; i < GraphSigV.Len(); i++) {
00008     SigV[i] = TFlt(TMath::Round(GraphSigV[i], RoundTo));
00009   }
00010 }
00011 
00012 TGraphKey::TGraphKey(const TIntV& GraphSigV) : Nodes(-1), EdgeV(), SigV(), VariantId(0) {
00013   SigV.Gen(GraphSigV.Len());
00014   for (int i = 0; i < GraphSigV.Len(); i++) {
00015     SigV[i] = TFlt(GraphSigV[i]());
00016   }
00017 }
00018 
00019 TGraphKey::TGraphKey(const TFltV& GraphSigV) : Nodes(-1), EdgeV(), SigV(), VariantId(0) {
00020   SigV.Gen(GraphSigV.Len());
00021   for (int i = 0; i < GraphSigV.Len(); i++) {
00022     SigV[i] = TFlt(TMath::Round(GraphSigV[i], RoundTo));
00023   }
00024 }
00025 
00026 TGraphKey::TGraphKey(const TGraphKey& GraphKey) : Nodes(GraphKey.Nodes),
00027   EdgeV(GraphKey.EdgeV), SigV(GraphKey.SigV), VariantId(GraphKey.VariantId) {
00028 }
00029 
00030 TGraphKey::TGraphKey(TSIn& SIn) : Nodes(SIn), EdgeV(SIn), SigV(SIn), VariantId(SIn) { }
00031 
00032 void TGraphKey::Save(TSOut& SOut) const {
00033   Nodes.Save(SOut);  EdgeV.Save(SOut);
00034   SigV.Save(SOut);  VariantId.Save(SOut);
00035 }
00036 
00037 TGraphKey& TGraphKey::operator = (const TGraphKey& GraphKey) {
00038   if (this != &GraphKey) {
00039     Nodes = GraphKey.Nodes;
00040     EdgeV = GraphKey.EdgeV;
00041     SigV = GraphKey.SigV;
00042     VariantId = GraphKey.VariantId;
00043   }
00044   return *this;
00045 }
00046 
00047 PNGraph TGraphKey::GetNGraph() const {
00048   PNGraph G = TNGraph::New();
00049   for (int i = 0; i < GetNodes(); i++) G->AddNode(i);
00050   for (int e = 0; e < GetEdges(); e++) {
00051     G->AddEdge(EdgeV[e].Val1, EdgeV[e].Val2);
00052   }
00053   G->Defrag();
00054   return G;
00055 }
00056 
00057 // renumbers nodes
00058 void TGraphKey::TakeGraph(const PNGraph& Graph) {
00059   TIntH NodeIdH;
00060   for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00061     NodeIdH.AddKey(NI.GetId()); }
00062   Nodes = Graph->GetNodes();
00063   EdgeV.Gen(Nodes, 0);
00064   for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00065     const int NewNId = NodeIdH.GetKeyId(NI.GetId());
00066     for (int i = 0; i < NI.GetOutDeg(); i++) {
00067       EdgeV.Add(TIntPr(NewNId, NodeIdH.GetKeyId(NI.GetOutNId(i))));
00068     }
00069   }
00070   EdgeV.Sort(true);
00071   EdgeV.Pack();
00072 }
00073 
00074 void TGraphKey::TakeGraph(const PNGraph& Graph, TIntPrV& NodeMap) {
00075   TIntSet NodeIdH;
00076   int n = 0;
00077   NodeMap.Gen(Graph->GetNodes(), 0);
00078   for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, n++) {
00079     NodeIdH.AddKey(NI.GetId());
00080     NodeMap.Add(TIntPr(NI.GetId(), n));
00081   }
00082   Nodes = Graph->GetNodes();
00083   EdgeV.Gen(Nodes, 0);
00084   for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00085     const int NewNId = NodeIdH.GetKeyId(NI.GetId());
00086     for (int i = 0; i < NI.GetOutDeg(); i++) {
00087       EdgeV.Add(TIntPr(NewNId, NodeIdH.GetKeyId(NI.GetOutNId(i))));
00088     }
00089   }
00090   EdgeV.Sort(true);
00091   EdgeV.Pack();
00092 }
00093 
00094 void TGraphKey::TakeSig(const PNGraph& Graph, const int& MnSvdGraph, const int& MxSvdGraph) {
00095   const int Edges = Graph->GetEdges();
00096   Nodes = Graph->GetNodes();
00097   VariantId = 0;
00098   SigV.Gen(2+Nodes, 0);
00099   // degree sequence
00100   TIntPrV DegV(Nodes, 0);
00101   for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00102     DegV.Add(TIntPr(NodeI.GetInDeg(), NodeI.GetOutDeg()));
00103   }
00104   DegV.Sort(false);
00105   SigV.Add(TFlt(Nodes));
00106   SigV.Add(TFlt(Edges));
00107   for (int i = 0; i < DegV.Len(); i++) {
00108     SigV.Add(DegV[i].Val1());
00109     SigV.Add(DegV[i].Val2());
00110   }
00111   // singular values signature
00112   //   it turns out that it is cheaper to do brute force isomorphism
00113   //   checking than to calculate SVD and then check isomorphism
00114   if (Nodes >= MnSvdGraph && Nodes < MxSvdGraph) {
00115     // perform full SVD
00116     TFltVV AdjMtx(Nodes+1, Nodes+1);
00117     TFltV SngValV;
00118     TFltVV LSingV, RSingV;
00119     TIntH NodeIdH;
00120     // create adjecency matrix
00121     for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00122       NodeIdH.AddKey(NodeI.GetId());
00123     }
00124     for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00125       const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1;
00126       for (int e = 0; e < NodeI.GetOutDeg(); e++) {
00127         const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1;  // no self edges
00128         if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1;
00129       }
00130     }
00131     try { // can fail to converge but results seem to be good
00132       TSvd::Svd(AdjMtx, LSingV, SngValV, RSingV);
00133     } catch(...) {
00134       printf("\n***No SVD convergence: G(%d, %d): SngValV.Len():%d\n", Nodes(), Graph->GetEdges(), SngValV.Len());
00135     }
00136     // round singular values
00137     SngValV.Sort(false);
00138     for (int i = 0; i < SngValV.Len(); i++) {
00139       SigV.Add(TMath::Round(SngValV[i], RoundTo));
00140     }
00141   }
00142   //printf("SIG:\n");  for (int i = 0; i < SigV.Len(); i++) { printf("\t%f\n", SigV[i]); }
00143   SigV.Pack();
00144 }
00145 
00146 void TGraphKey::SaveTxt(FILE *F) const {
00147   fprintf(F, "nodes: %d.  edges: %d\n", GetNodes(), GetEdges());
00148   for (int i = 0; i < EdgeV.Len(); i++) {
00149     fprintf(F,"  %d\t%d\n", EdgeV[i].Val1(), EdgeV[i].Val2());
00150   }
00151 }
00152 
00153 void TGraphKey::SaveGViz(const TStr& OutFNm, const TStr& Desc, const TStr& NodeAttrs, const int& Size) const {
00154   FILE *F = fopen(OutFNm.CStr(), "wt");
00155   fprintf(F, "/*****\n");
00156   fprintf(F, "  Graph (%d, %d)\n", GetNodes(), GetEdges());
00157   //if (! Desc.Empty()) fprintf(F, "  %s\n", Desc.CStr());
00158   fprintf(F, "*****/\n\n");
00159   fprintf(F, "digraph G {\n");
00160   if (Size != -1) fprintf(F, "  size=\"%d,%d\";\n", Size, Size);
00161   fprintf(F, "  graph [splines=true overlap=false]\n"); //size=\"12,10\" ratio=fill
00162   if (NodeAttrs.Empty()) fprintf(F, "  node  [shape=ellipse, width=0.3, height=0.3]\n");
00163   else fprintf(F, "  node  [shape=ellipse, %s]\n", NodeAttrs.CStr());
00164   if (! EdgeV.Empty()) {
00165     for (int e = 0; e < EdgeV.Len(); e++) {
00166       fprintf(F, "  %d -> %d;\n", EdgeV[e].Val1(), EdgeV[e].Val2()); }
00167   } else {
00168     for (int n = 0; n < Nodes; n++) { fprintf(F, "  %d;\n", n); }
00169   }
00170   if (! Desc.Empty()) {
00171     fprintf(F, "  label = \"\\n%s\\n\";", Desc.CStr());
00172     fprintf(F, "  fontsize=24;\n");
00173   }
00174   fprintf(F, "}\n");
00175   fclose(F);
00176 }
00177 
00178 // width=0.3, height=0.3, label=\"\", style=filled, color=black
00179 void TGraphKey::DrawGViz(const TStr& OutFNm, const TStr& Desc, const TStr& NodeAttrs, const int& Size) const {
00180   const TStr DotFNm = OutFNm.GetFMid()+".dot";
00181   SaveGViz(DotFNm, Desc, NodeAttrs, Size);
00182   TSnap::TSnapDetail::GVizDoLayout(DotFNm, OutFNm, gvlDot);
00183 }
00184 
00185 bool TGraphKey::IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TIntV& NodeIdMap) {
00186   const TIntPrV& EdgeV1 = Key1.EdgeV;
00187   const TIntPrV& EdgeV2 = Key2.EdgeV;
00188   if (Key1.Nodes != Key2.Nodes || EdgeV1.Len() != EdgeV2.Len()) return false;
00189   for (int e1 = 0; e1 < EdgeV1.Len(); e1++) {
00190     const TIntPr Edge2(NodeIdMap[EdgeV1[e1].Val1], NodeIdMap[EdgeV1[e1].Val2]);
00191     if (EdgeV2.SearchBin(Edge2) == -1) return false;
00192   }
00193   return true;
00194 }
00195 
00196 bool TGraphKey::IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV) {
00197   int IsoPermId;
00198   return IsIsomorph(Key1, Key2, NodeIdMapV, IsoPermId);
00199 }
00200 
00201 bool TGraphKey::IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV, int& IsoPermId) {
00202   const TIntPrV& EdgeV1 = Key1.EdgeV;
00203   const TIntPrV& EdgeV2 = Key2.EdgeV;
00204   //for (int i = 0; i < EdgeV1.Len(); i++) printf("\t%d - %d\n", EdgeV1[i].Val1, EdgeV1[i].Val2);  printf("\n");
00205   //for (int i = 0; i < EdgeV2.Len(); i++) printf("\t%d - %d\n", EdgeV2[i].Val1, EdgeV2[i].Val2);
00206   if (Key1.Nodes != Key2.Nodes || EdgeV1.Len() != EdgeV2.Len()) return false;
00207   const int Nodes = NodeIdMapV[0].Len();
00208   // fast adjecency matrix
00209   TIntV AdjMtx2(Nodes*Nodes);
00210   for (int i = 0; i < EdgeV2.Len(); i++) {
00211     AdjMtx2[EdgeV2[i].Val1*Nodes + EdgeV2[i].Val2] = 1;
00212   }
00213   for (int perm = 0; perm < NodeIdMapV.Len(); perm++) {
00214     const TIntV& NodeIdMap = NodeIdMapV[perm];
00215     bool IsIso = true;
00216     for (int e1 = 0; e1 < EdgeV1.Len(); e1++) {
00217       const int NId1 = NodeIdMap[EdgeV1[e1].Val1];
00218       const int NId2 = NodeIdMap[EdgeV1[e1].Val2];
00219       if (AdjMtx2[NId1*Nodes + NId2] != 1) {
00220         IsIso = false;  break; }
00221     }
00222     if (IsIso) {
00223       IsoPermId = perm;
00224       return true; }
00225   }
00226   IsoPermId = -1;
00227   return false;
00228 }
00229 
00231 // Simple Edge Graph
00232 bool TSimpleGraph::Join(const TSimpleGraph& G1, const TSimpleGraph& G2) {
00233   const int Edges1 = G1.GetEdges();
00234   const int Edges2 = G2.GetEdges();
00235   const TIntPrV& EdgeV1 = G1.EdgeV;
00236   const TIntPrV& EdgeV2 = G2.EdgeV;
00237   const int MxEdges = Edges1+1;
00238   if (GetEdges() != MxEdges) EdgeV.Gen(MxEdges);
00239   IAssert(Edges1 == Edges2);
00240 
00241   int e=0, g1=0, g2=0;
00242   while (g1 < Edges1 && g2 < Edges2) {
00243     if (e == MxEdges) return false;
00244     if (abs(g1 - g2) > 1) return false;
00245     if (EdgeV1[g1] == EdgeV2[g2]) { e++;  g1++;  g2++; }
00246     else if (EdgeV1[g1] < EdgeV2[g2]) { e++;  g1++; }
00247     else { e++;  g2++; }
00248   }
00249 
00250   e=0; g1=0; g2=0;
00251   while (g1 < Edges1 && g2 < Edges2) {
00252     if (EdgeV1[g1] == EdgeV2[g2]) {
00253       EdgeV[e] = EdgeV1[g1];  e++;  g1++;  g2++; }
00254     else if (EdgeV1[g1] < EdgeV2[g2]) {
00255       EdgeV[e] = EdgeV1[g1];  e++;  g1++; }
00256     else {
00257       EdgeV[e] = EdgeV2[g2];  e++;  g2++; }
00258   }
00259   if (g1 == Edges1 && g2 == Edges2 && e == MxEdges) return true;
00260   if (e+1 == MxEdges) {
00261     if (g1+1 == Edges1 && g2 == Edges2) {
00262       EdgeV[e] = EdgeV1.Last();
00263       return true;
00264     }
00265     if (g1 == Edges1 && g2+1 == Edges2) {
00266       EdgeV[e] = EdgeV2.Last();
00267       return true;
00268     }
00269   }
00270   return false;
00271 }
00272 
00273 void TSimpleGraph::Dump(const TStr& Desc) const {
00274   if (! Desc.Empty()) printf("%s. Edges: %d\n", Desc.CStr(), EdgeV.Len());
00275   else printf("Edges: %d\n", EdgeV.Len());
00276   for (int i = 0; i < EdgeV.Len(); i++) {
00277     printf("\t%d\t%d\n", EdgeV[i].Val1(), EdgeV[i].Val2());
00278   }
00279 }
00280 
00281 void TSubGraphsEnum::Gen2Graphs() {
00282   // singe edge sub-graphs
00283   SgV.Gen(NGraph->GetEdges(), 0);
00284   TSimpleGraph SimpleG;
00285   TIntPrV& EdgeV = SimpleG.GetEdgeV();
00286   EdgeV.Gen(1);
00287   for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) {
00288     for (int e = 0; e < NI.GetOutDeg(); e++) {
00289       EdgeV[0] = TIntPr(NI.GetId(), NI.GetOutNId(e));
00290       SgV.Add(SimpleG);
00291     }
00292   }
00293   SgV.Sort();
00294   // two edge sub-graphs
00295   EdgeV.Gen(2);
00296   for (int g1 = 0; g1 < SgV.Len()-1; g1++) {
00297     const TIntPr& E1 = SgV[g1].GetEdgeV()[0];
00298     for (int g2 = g1+1; g2 < SgV.Len(); g2++) {
00299       const TIntPr& E2 = SgV[g2].GetEdgeV()[0];
00300       if (E1.Val2 == E2.Val1 || E1.Val1 == E2.Val2 || E1.Val1 == E2.Val1 || E1.Val2 == E2.Val2) {
00301         EdgeV[0] = TMath::Mn(E1, E2);
00302         EdgeV[1] = TMath::Mx(E1, E2);
00303         SimpleG.Dump();
00304         NextSgV.Add(SimpleG);
00305       }
00306     }
00307   }
00308   SgV.MoveFrom(NextSgV);
00309 }
00310 
00311 void TSubGraphsEnum::EnumSubGraphs(const int& MaxEdges) {
00312   TExeTm ExeTm;
00313   Gen2Graphs();
00314   printf("  %2d edge graphs:  %d\t[%s]\n", 2, SgV.Len(), ExeTm.GetTmStr());  ExeTm.Tick();
00315   //for (int i = 0; i < SgV.Len(); i++) { SgV[i].Dump(TStr::Fmt("  %d", i+1)); }
00316   //printf("**************************************************************\n");
00317   TSimpleGraph SimpleG;
00318   TIntPrV& EdgeV = SimpleG.GetEdgeV();
00319   // multiple edge sub-graphs
00320   for (int edges = 3; edges <= MaxEdges; edges++) {
00321     EdgeV.Clr();
00322     printf("  %2d edge graphs:", edges);
00323     for (int g1 = 0; g1 < SgV.Len()-1; g1++) {
00324       for (int g2 = g1+1; g2 < SgV.Len(); g2++) {
00325         if (SimpleG.Join(SgV[g1], SgV[g2])) { NextSgV.Add(SimpleG); }
00326       }
00327     }
00328     printf("  candidates: %8d [%s]", NextSgV.Len(), ExeTm.GetTmStr());  ExeTm.Tick();
00329     NextSgV.Sort();
00330     SgV.Gen(NextSgV.Len(), 0);
00331     SgV.Add(NextSgV[0]);
00332     for (int i = 1; i < NextSgV.Len(); i++) {
00333       if (SgV.Last() != NextSgV[i]) {
00334         SgV.Add(NextSgV[i]);
00335       }
00336     }
00337     NextSgV.Clr(false);
00338     printf("  total: %8d [%s]\n", SgV.Len(), ExeTm.GetTmStr());  ExeTm.Tick();
00339     //for (int i = 0; i < SgV.Len(); i++) { SgV[i].Dump(TStr::Fmt("  %d", i+1)); }
00340     //printf("**************************************************************\n");
00341   }
00342 }
00343 
00344 void TSubGraphsEnum::RecurBfs(const int& MxDepth) {
00345   TExeTm ExeTm;
00346   SgV.Clr(true);
00347   for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) {
00348     TSimpleGraph SimpleG;
00349     RecurBfs(NI.GetId(), MxDepth, SimpleG);
00350     //NGraph->DelNode(NI.GetId());
00351     printf(".");
00352   }
00353   printf("\ncandidates: %d\n", SgV.Len());
00354   SgV.Sort();
00355   int Cnt = 1;
00356   for (int i = 1; i < SgV.Len(); i++) {
00357     if (SgV[i-1] != SgV[i]) Cnt++;
00358   }
00359   printf("distinct:   %d\t[%s]\n", Cnt, ExeTm.GetTmStr());
00360 }
00361 
00362 void TSubGraphsEnum::RecurBfs(const int& NId, const int& Depth, TSimpleGraph& PrevG) {
00363   if (Depth == 0) {
00364     TIntPrV& EdgeV = PrevG();
00365     EdgeV.Sort();
00366     for (int i = 1; i < EdgeV.Len(); i++) {
00367       if (EdgeV[i-1] == EdgeV[i]) { return; }
00368     }
00369     SgV.Add(PrevG);
00370     return;
00371   }
00372   const TNGraph::TNodeI NI = NGraph ->GetNI(NId);
00373   for (int e = 0; e < NI.GetOutDeg(); e++) {
00374     TSimpleGraph CurG = PrevG;
00375     CurG.AddEdge(NI.GetId(), NI.GetOutNId(e));
00376     RecurBfs(NI.GetOutNId(e), Depth-1, CurG);
00377   }
00378   for (int e = 0; e < NI.GetInDeg(); e++) {
00379     TSimpleGraph CurG = PrevG;
00380     CurG.AddEdge(NI.GetInNId(e), NI.GetId());
00381     RecurBfs(NI.GetInNId(e), Depth-1, CurG);
00382   }
00383 }
00384 
00385 void TSubGraphsEnum::RecurBfs1(const int& MxDepth) {
00386   TExeTm ExeTm;
00387   SgV.Clr(true);
00388   for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) {
00389     TSimpleGraph SimpleG;
00390     RecurBfs1(NI.GetId(), MxDepth);
00391     //NGraph->DelNode(NI.GetId());
00392     printf(".");
00393   }
00394   printf("candidates: %d\n", SgV.Len());
00395   SgV.Sort();
00396   int Cnt = 1;
00397   for (int i = 1; i < SgV.Len(); i++) {
00398     if (SgV[i-1]!=SgV[i]) {
00399       //SgV[i].Dump();
00400       Cnt++;
00401     }
00402   }
00403   printf("distinct:   %d\t[%s]\n", Cnt, ExeTm.GetTmStr());
00404 }
00405 
00406 void TSubGraphsEnum::RecurBfs1(const int& NId, const int& Depth) {
00407   if (Depth == 0) {
00408     TIntPrV EdgeV;
00409     EdgeH.GetKeyV(EdgeV);
00410     EdgeV.Sort();
00411     SgV.Add(EdgeV);
00412     return;
00413   }
00414   const TNGraph::TNodeI NI = NGraph ->GetNI(NId);
00415   for (int e = 0; e < NI.GetOutDeg(); e++) {
00416     const TIntPr Edge(NId, NI.GetOutNId(e));
00417     if (! EdgeH.IsKey(Edge)) {
00418       EdgeH.AddKey(Edge);
00419       RecurBfs1(NI.GetOutNId(e), Depth-1);
00420       EdgeH.DelKey(Edge);
00421     }
00422   }
00423   for (int e = 0; e < NI.GetInDeg(); e++) {
00424     const TIntPr Edge(NI.GetInNId(e), NId);
00425     if (! EdgeH.IsKey(Edge)) {
00426       EdgeH.AddKey(Edge);
00427       RecurBfs1(NI.GetInNId(e), Depth-1);
00428       EdgeH.DelKey(Edge);
00429     }
00430   }
00431 }