SNAP Library, User Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ghash.cpp
Go to the documentation of this file.
00001 
00002 // Graph Hash Table Key
00003 const int TGraphKey::RoundTo = 4; // round to 4 decimal places
00004 
00005 TGraphKey::TGraphKey(const TSFltV& GraphSigV) : Nodes(-1), EdgeV(), SigV(), VariantId(0) {
00006   SigV.Gen(GraphSigV.Len());
00007   for (int i = 0; i < GraphSigV.Len(); i++) {
00008     SigV[i] = TFlt(TMath::Round(GraphSigV[i], RoundTo));
00009   }
00010 }
00011 
00012 TGraphKey::TGraphKey(const TIntV& GraphSigV) : Nodes(-1), EdgeV(), SigV(), VariantId(0) {
00013   SigV.Gen(GraphSigV.Len());
00014   for (int i = 0; i < GraphSigV.Len(); i++) {
00015     SigV[i] = TFlt(GraphSigV[i]());
00016   }
00017 }
00018 
00019 TGraphKey::TGraphKey(const TFltV& GraphSigV) : Nodes(-1), EdgeV(), SigV(), VariantId(0) {
00020   SigV.Gen(GraphSigV.Len());
00021   for (int i = 0; i < GraphSigV.Len(); i++) {
00022     SigV[i] = TFlt(TMath::Round(GraphSigV[i], RoundTo));
00023   }
00024 }
00025 
00026 TGraphKey::TGraphKey(const TGraphKey& GraphKey) : Nodes(GraphKey.Nodes),
00027   EdgeV(GraphKey.EdgeV), SigV(GraphKey.SigV), VariantId(GraphKey.VariantId) {
00028 }
00029 
00030 TGraphKey::TGraphKey(TSIn& SIn) : Nodes(SIn), EdgeV(SIn), SigV(SIn), VariantId(SIn) { }
00031 
00032 void TGraphKey::Save(TSOut& SOut) const {
00033   Nodes.Save(SOut);  EdgeV.Save(SOut);
00034   SigV.Save(SOut);  VariantId.Save(SOut);
00035 }
00036 
00037 TGraphKey& TGraphKey::operator = (const TGraphKey& GraphKey) {
00038   if (this != &GraphKey) {
00039     Nodes = GraphKey.Nodes;
00040     EdgeV = GraphKey.EdgeV;
00041     SigV = GraphKey.SigV;
00042     VariantId = GraphKey.VariantId;
00043   }
00044   return *this;
00045 }
00046 
00047 PNGraph TGraphKey::GetNGraph() const {
00048   PNGraph G = TNGraph::New();
00049   for (int i = 0; i < GetNodes(); i++) G->AddNode(i);
00050   for (int e = 0; e < GetEdges(); e++) {
00051     G->AddEdge(EdgeV[e].Val1, EdgeV[e].Val2);
00052   }
00053   G->Defrag();
00054   return G;
00055 }
00056 
00057 // renumbers nodes
00058 void TGraphKey::TakeGraph(const PNGraph& Graph) {
00059   TIntH NodeIdH;
00060   for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00061     NodeIdH.AddKey(NI.GetId()); }
00062   Nodes = Graph->GetNodes();
00063   EdgeV.Gen(Nodes, 0);
00064   for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00065     const int NewNId = NodeIdH.GetKeyId(NI.GetId());
00066     for (int i = 0; i < NI.GetOutDeg(); i++) {
00067       EdgeV.Add(TIntPr(NewNId, NodeIdH.GetKeyId(NI.GetOutNId(i))));
00068     }
00069   }
00070   EdgeV.Sort(true);
00071   EdgeV.Pack();
00072 }
00073 
00074 void TGraphKey::TakeGraph(const PNGraph& Graph, TIntPrV& NodeMap) {
00075   TIntSet NodeIdH;
00076   int n = 0;
00077   NodeMap.Gen(Graph->GetNodes(), 0);
00078   for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, n++) {
00079     NodeIdH.AddKey(NI.GetId());
00080     NodeMap.Add(TIntPr(NI.GetId(), n));
00081   }
00082   Nodes = Graph->GetNodes();
00083   EdgeV.Gen(Nodes, 0);
00084   for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00085     const int NewNId = NodeIdH.GetKeyId(NI.GetId());
00086     for (int i = 0; i < NI.GetOutDeg(); i++) {
00087       EdgeV.Add(TIntPr(NewNId, NodeIdH.GetKeyId(NI.GetOutNId(i))));
00088     }
00089   }
00090   EdgeV.Sort(true);
00091   EdgeV.Pack();
00092 }
00093 
00094 void TGraphKey::TakeSig(const PNGraph& Graph, const int& MnSvdGraph, const int& MxSvdGraph) {
00095   const int Edges = Graph->GetEdges();
00096   Nodes = Graph->GetNodes();
00097   VariantId = 0;
00098   SigV.Gen(2+Nodes, 0);
00099   // degree sequence
00100   TIntPrV DegV(Nodes, 0);
00101   for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00102     DegV.Add(TIntPr(NodeI.GetInDeg(), NodeI.GetOutDeg()));
00103   }
00104   DegV.Sort(false);
00105   // compose the signature: nodes, edges, sorted in- and out- degree sequence
00106   SigV.Add(TFlt(Nodes));
00107   SigV.Add(TFlt(Edges));
00108   for (int i = 0; i < DegV.Len(); i++) {
00109     SigV.Add(DegV[i].Val1());
00110     SigV.Add(DegV[i].Val2());
00111   }
00112   // singular values signature
00113   //   it turns out that it is cheaper to do brute force isomorphism
00114   //   checking than to calculate SVD and then check isomorphism
00115   if (Nodes >= MnSvdGraph && Nodes < MxSvdGraph) {
00116     // perform full SVD
00117     TFltVV AdjMtx(Nodes+1, Nodes+1);
00118     TFltV SngValV;
00119     TFltVV LSingV, RSingV;
00120     TIntH NodeIdH;
00121     // create adjecency matrix
00122     for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00123       NodeIdH.AddKey(NodeI.GetId());
00124     }
00125     for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00126       const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1;
00127       for (int e = 0; e < NodeI.GetOutDeg(); e++) {
00128         const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1;  // no self edges
00129         if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1;
00130       }
00131     }
00132     try { // can fail to converge but results seem to be good
00133       TSvd::Svd(AdjMtx, LSingV, SngValV, RSingV);
00134     } catch(...) {
00135       printf("\n***No SVD convergence: G(%d, %d): SngValV.Len():%d\n", Nodes(), Graph->GetEdges(), SngValV.Len());
00136     }
00137     // round singular values
00138     SngValV.Sort(false);
00139     for (int i = 0; i < SngValV.Len(); i++) {
00140       SigV.Add(TMath::Round(SngValV[i], RoundTo));
00141     }
00142   }
00143   //printf("SIG:\n");  for (int i = 0; i < SigV.Len(); i++) { printf("\t%f\n", SigV[i]); }
00144   SigV.Pack();
00145 }
00146 
00147 void TGraphKey::SaveTxt(FILE *F) const {
00148   fprintf(F, "#GraphKey. Nodes: %d.  Edges: %d\n", GetNodes(), GetEdges());
00149   for (int i = 0; i < EdgeV.Len(); i++) {
00150     fprintf(F,"  %d\t%d\n", EdgeV[i].Val1(), EdgeV[i].Val2());
00151   }
00152 }
00153 
00154 void TGraphKey::SaveGViz(const TStr& OutFNm, const TStr& Desc, const TStr& NodeAttrs, const int& Size) const {
00155   FILE *F = fopen(OutFNm.CStr(), "wt");
00156   fprintf(F, "/*****\n");
00157   fprintf(F, "  Graph (%d, %d)\n", GetNodes(), GetEdges());
00158   //if (! Desc.Empty()) fprintf(F, "  %s\n", Desc.CStr());
00159   fprintf(F, "*****/\n\n");
00160   fprintf(F, "digraph G {\n");
00161   if (Size != -1) fprintf(F, "  size=\"%d,%d\";\n", Size, Size);
00162   fprintf(F, "  graph [splines=true overlap=false]\n"); //size=\"12,10\" ratio=fill
00163   if (NodeAttrs.Empty()) fprintf(F, "  node  [shape=ellipse, width=0.3, height=0.3]\n");
00164   else fprintf(F, "  node  [shape=ellipse, %s]\n", NodeAttrs.CStr());
00165   if (! EdgeV.Empty()) {
00166     for (int e = 0; e < EdgeV.Len(); e++) {
00167       fprintf(F, "  %d -> %d;\n", EdgeV[e].Val1(), EdgeV[e].Val2()); }
00168   } else {
00169     for (int n = 0; n < Nodes; n++) { fprintf(F, "  %d;\n", n); }
00170   }
00171   if (! Desc.Empty()) {
00172     fprintf(F, "  label = \"\\n%s\\n\";", Desc.CStr());
00173     fprintf(F, "  fontsize=24;\n");
00174   }
00175   fprintf(F, "}\n");
00176   fclose(F);
00177 }
00178 
00179 // width=0.3, height=0.3, label=\"\", style=filled, color=black
00180 void TGraphKey::DrawGViz(const TStr& OutFNm, const TStr& Desc, const TStr& NodeAttrs, const int& Size) const {
00181   const TStr DotFNm = OutFNm.GetFMid()+".dot";
00182   SaveGViz(DotFNm, Desc, NodeAttrs, Size);
00183   TSnap::TSnapDetail::GVizDoLayout(DotFNm, OutFNm, gvlDot);
00184 }
00185 
00186 bool TGraphKey::IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TIntV& NodeIdMap) {
00187   const TIntPrV& EdgeV1 = Key1.EdgeV;
00188   const TIntPrV& EdgeV2 = Key2.EdgeV;
00189   if (Key1.Nodes != Key2.Nodes || EdgeV1.Len() != EdgeV2.Len()) { return false; }
00190   for (int e1 = 0; e1 < EdgeV1.Len(); e1++) {
00191     const TIntPr Edge2(NodeIdMap[EdgeV1[e1].Val1], NodeIdMap[EdgeV1[e1].Val2]);
00192     if (EdgeV2.SearchBin(Edge2) == -1) return false;
00193   }
00194   return true;
00195 }
00196 
00197 bool TGraphKey::IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV) {
00198   int IsoPermId;
00199   return IsIsomorph(Key1, Key2, NodeIdMapV, IsoPermId);
00200 }
00201 
00202 bool TGraphKey::IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV, int& IsoPermId) {
00203   const TIntPrV& EdgeV1 = Key1.EdgeV;
00204   const TIntPrV& EdgeV2 = Key2.EdgeV;
00205   //for (int i = 0; i < EdgeV1.Len(); i++) printf("\t%d - %d\n", EdgeV1[i].Val1, EdgeV1[i].Val2);  printf("\n");
00206   //for (int i = 0; i < EdgeV2.Len(); i++) printf("\t%d - %d\n", EdgeV2[i].Val1, EdgeV2[i].Val2);
00207   if (Key1.Nodes != Key2.Nodes || EdgeV1.Len() != EdgeV2.Len()) return false;
00208   const int Nodes = NodeIdMapV[0].Len();
00209   // fast adjecency matrix
00210   TIntV AdjMtx2(Nodes*Nodes);
00211   for (int i = 0; i < EdgeV2.Len(); i++) {
00212     AdjMtx2[EdgeV2[i].Val1*Nodes + EdgeV2[i].Val2] = 1;
00213   }
00214   for (int perm = 0; perm < NodeIdMapV.Len(); perm++) {
00215     const TIntV& NodeIdMap = NodeIdMapV[perm];
00216     bool IsIso = true;
00217     for (int e1 = 0; e1 < EdgeV1.Len(); e1++) {
00218       const int NId1 = NodeIdMap[EdgeV1[e1].Val1];
00219       const int NId2 = NodeIdMap[EdgeV1[e1].Val2];
00220       if (AdjMtx2[NId1*Nodes + NId2] != 1) {
00221         IsIso = false;  break; }
00222     }
00223     if (IsIso) {
00224       IsoPermId = perm;
00225       return true; }
00226   }
00227   IsoPermId = -1;
00228   return false;
00229 }
00230 
00232 // Simple Edge Graph
00233 bool TSimpleGraph::Join(const TSimpleGraph& G1, const TSimpleGraph& G2) {
00234   const int Edges1 = G1.GetEdges();
00235   const int Edges2 = G2.GetEdges();
00236   const TIntPrV& EdgeV1 = G1.EdgeV;
00237   const TIntPrV& EdgeV2 = G2.EdgeV;
00238   const int MxEdges = Edges1+1;
00239   if (GetEdges() != MxEdges) EdgeV.Gen(MxEdges);
00240   IAssert(Edges1 == Edges2);
00241 
00242   int e=0, g1=0, g2=0;
00243   while (g1 < Edges1 && g2 < Edges2) {
00244     if (e == MxEdges) return false;
00245     if (abs(g1 - g2) > 1) return false;
00246     if (EdgeV1[g1] == EdgeV2[g2]) { e++;  g1++;  g2++; }
00247     else if (EdgeV1[g1] < EdgeV2[g2]) { e++;  g1++; }
00248     else { e++;  g2++; }
00249   }
00250 
00251   e=0; g1=0; g2=0;
00252   while (g1 < Edges1 && g2 < Edges2) {
00253     if (EdgeV1[g1] == EdgeV2[g2]) {
00254       EdgeV[e] = EdgeV1[g1];  e++;  g1++;  g2++; }
00255     else if (EdgeV1[g1] < EdgeV2[g2]) {
00256       EdgeV[e] = EdgeV1[g1];  e++;  g1++; }
00257     else {
00258       EdgeV[e] = EdgeV2[g2];  e++;  g2++; }
00259   }
00260   if (g1 == Edges1 && g2 == Edges2 && e == MxEdges) return true;
00261   if (e+1 == MxEdges) {
00262     if (g1+1 == Edges1 && g2 == Edges2) {
00263       EdgeV[e] = EdgeV1.Last();
00264       return true;
00265     }
00266     if (g1 == Edges1 && g2+1 == Edges2) {
00267       EdgeV[e] = EdgeV2.Last();
00268       return true;
00269     }
00270   }
00271   return false;
00272 }
00273 
00274 void TSimpleGraph::Dump(const TStr& Desc) const {
00275   if (! Desc.Empty()) printf("%s. Edges: %d\n", Desc.CStr(), EdgeV.Len());
00276   else printf("Edges: %d\n", EdgeV.Len());
00277   for (int i = 0; i < EdgeV.Len(); i++) {
00278     printf("\t%d\t%d\n", EdgeV[i].Val1(), EdgeV[i].Val2());
00279   }
00280 }
00281 
00282 void TSubGraphsEnum::Gen2Graphs() {
00283   // singe edge sub-graphs
00284   SgV.Gen(NGraph->GetEdges(), 0);
00285   TSimpleGraph SimpleG;
00286   TIntPrV& EdgeV = SimpleG.GetEdgeV();
00287   EdgeV.Gen(1);
00288   for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) {
00289     for (int e = 0; e < NI.GetOutDeg(); e++) {
00290       EdgeV[0] = TIntPr(NI.GetId(), NI.GetOutNId(e));
00291       SgV.Add(SimpleG);
00292     }
00293   }
00294   SgV.Sort();
00295   // two edge sub-graphs
00296   EdgeV.Gen(2);
00297   for (int g1 = 0; g1 < SgV.Len()-1; g1++) {
00298     const TIntPr& E1 = SgV[g1].GetEdgeV()[0];
00299     for (int g2 = g1+1; g2 < SgV.Len(); g2++) {
00300       const TIntPr& E2 = SgV[g2].GetEdgeV()[0];
00301       if (E1.Val2 == E2.Val1 || E1.Val1 == E2.Val2 || E1.Val1 == E2.Val1 || E1.Val2 == E2.Val2) {
00302         EdgeV[0] = TMath::Mn(E1, E2);
00303         EdgeV[1] = TMath::Mx(E1, E2);
00304         SimpleG.Dump();
00305         NextSgV.Add(SimpleG);
00306       }
00307     }
00308   }
00309   SgV.MoveFrom(NextSgV);
00310 }
00311 
00312 void TSubGraphsEnum::EnumSubGraphs(const int& MaxEdges) {
00313   TExeTm ExeTm;
00314   Gen2Graphs();
00315   printf("  %2d edge graphs:  %d\t[%s]\n", 2, SgV.Len(), ExeTm.GetTmStr());  ExeTm.Tick();
00316   //for (int i = 0; i < SgV.Len(); i++) { SgV[i].Dump(TStr::Fmt("  %d", i+1)); }
00317   //printf("**************************************************************\n");
00318   TSimpleGraph SimpleG;
00319   TIntPrV& EdgeV = SimpleG.GetEdgeV();
00320   // multiple edge sub-graphs
00321   for (int edges = 3; edges <= MaxEdges; edges++) {
00322     EdgeV.Clr();
00323     printf("  %2d edge graphs:", edges);
00324     for (int g1 = 0; g1 < SgV.Len()-1; g1++) {
00325       for (int g2 = g1+1; g2 < SgV.Len(); g2++) {
00326         if (SimpleG.Join(SgV[g1], SgV[g2])) { NextSgV.Add(SimpleG); }
00327       }
00328     }
00329     printf("  candidates: %8d [%s]", NextSgV.Len(), ExeTm.GetTmStr());  ExeTm.Tick();
00330     NextSgV.Sort();
00331     SgV.Gen(NextSgV.Len(), 0);
00332     SgV.Add(NextSgV[0]);
00333     for (int i = 1; i < NextSgV.Len(); i++) {
00334       if (SgV.Last() != NextSgV[i]) {
00335         SgV.Add(NextSgV[i]);
00336       }
00337     }
00338     NextSgV.Clr(false);
00339     printf("  total: %8d [%s]\n", SgV.Len(), ExeTm.GetTmStr());  ExeTm.Tick();
00340     //for (int i = 0; i < SgV.Len(); i++) { SgV[i].Dump(TStr::Fmt("  %d", i+1)); }
00341     //printf("**************************************************************\n");
00342   }
00343 }
00344 
00345 void TSubGraphsEnum::RecurBfs(const int& MxDepth) {
00346   TExeTm ExeTm;
00347   SgV.Clr(true);
00348   for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) {
00349     TSimpleGraph SimpleG;
00350     RecurBfs(NI.GetId(), MxDepth, SimpleG);
00351     //NGraph->DelNode(NI.GetId());
00352     printf(".");
00353   }
00354   printf("\ncandidates: %d\n", SgV.Len());
00355   SgV.Sort();
00356   int Cnt = 1;
00357   for (int i = 1; i < SgV.Len(); i++) {
00358     if (SgV[i-1] != SgV[i]) Cnt++;
00359   }
00360   printf("distinct:   %d\t[%s]\n", Cnt, ExeTm.GetTmStr());
00361 }
00362 
00363 void TSubGraphsEnum::RecurBfs(const int& NId, const int& Depth, TSimpleGraph& PrevG) {
00364   if (Depth == 0) {
00365     TIntPrV& EdgeV = PrevG();
00366     EdgeV.Sort();
00367     for (int i = 1; i < EdgeV.Len(); i++) {
00368       if (EdgeV[i-1] == EdgeV[i]) { return; }
00369     }
00370     SgV.Add(PrevG);
00371     return;
00372   }
00373   const TNGraph::TNodeI NI = NGraph ->GetNI(NId);
00374   for (int e = 0; e < NI.GetOutDeg(); e++) {
00375     TSimpleGraph CurG = PrevG;
00376     CurG.AddEdge(NI.GetId(), NI.GetOutNId(e));
00377     RecurBfs(NI.GetOutNId(e), Depth-1, CurG);
00378   }
00379   for (int e = 0; e < NI.GetInDeg(); e++) {
00380     TSimpleGraph CurG = PrevG;
00381     CurG.AddEdge(NI.GetInNId(e), NI.GetId());
00382     RecurBfs(NI.GetInNId(e), Depth-1, CurG);
00383   }
00384 }
00385 
00386 void TSubGraphsEnum::RecurBfs1(const int& MxDepth) {
00387   TExeTm ExeTm;
00388   SgV.Clr(true);
00389   for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) {
00390     TSimpleGraph SimpleG;
00391     RecurBfs1(NI.GetId(), MxDepth);
00392     //NGraph->DelNode(NI.GetId());
00393     printf(".");
00394   }
00395   printf("candidates: %d\n", SgV.Len());
00396   SgV.Sort();
00397   int Cnt = 1;
00398   for (int i = 1; i < SgV.Len(); i++) {
00399     if (SgV[i-1]!=SgV[i]) {
00400       //SgV[i].Dump();
00401       Cnt++;
00402     }
00403   }
00404   printf("distinct:   %d\t[%s]\n", Cnt, ExeTm.GetTmStr());
00405 }
00406 
00407 void TSubGraphsEnum::RecurBfs1(const int& NId, const int& Depth) {
00408   if (Depth == 0) {
00409     TIntPrV EdgeV;
00410     EdgeH.GetKeyV(EdgeV);
00411     EdgeV.Sort();
00412     SgV.Add(EdgeV);
00413     return;
00414   }
00415   const TNGraph::TNodeI NI = NGraph ->GetNI(NId);
00416   for (int e = 0; e < NI.GetOutDeg(); e++) {
00417     const TIntPr Edge(NId, NI.GetOutNId(e));
00418     if (! EdgeH.IsKey(Edge)) {
00419       EdgeH.AddKey(Edge);
00420       RecurBfs1(NI.GetOutNId(e), Depth-1);
00421       EdgeH.DelKey(Edge);
00422     }
00423   }
00424   for (int e = 0; e < NI.GetInDeg(); e++) {
00425     const TIntPr Edge(NI.GetInNId(e), NId);
00426     if (! EdgeH.IsKey(Edge)) {
00427       EdgeH.AddKey(Edge);
00428       RecurBfs1(NI.GetInNId(e), Depth-1);
00429       EdgeH.DelKey(Edge);
00430     }
00431   }
00432 }