SNAP Library, User Reference  2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ghash.h
Go to the documentation of this file.
00001 
00002 
00003 class TGraphKey {
00004 public:
00005   static const int RoundTo;
00006 private:
00007 public:
00008   TInt Nodes;
00009   TIntPrV EdgeV;  // renumbers the graph (node ids 0..nodes-1)
00010   TFltV SigV;     // signature (for hashing)
00011   TInt VariantId; // if graphs have same signature but are different
00012 public:
00013   TGraphKey() : Nodes(-1), EdgeV(), SigV(), VariantId(0) { }
00014   TGraphKey(const TSFltV& GraphSigV);
00015   TGraphKey(const TIntV& GraphSigV);
00016   TGraphKey(const TFltV& GraphSigV);
00017   TGraphKey(const TGraphKey& GraphKey);
00018   TGraphKey(TSIn& SIn);
00019   void Save(TSOut& SOut) const;
00020   TGraphKey& operator = (const TGraphKey& GraphKey);
00021   bool operator == (const TGraphKey& GraphKey) const { return SigV==GraphKey.SigV && VariantId==GraphKey.VariantId; }
00022 
00023   int GetPrimHashCd() const { return abs(SigV.GetPrimHashCd() ^ VariantId); }
00024   int GetSecHashCd() const { return abs(SigV.GetSecHashCd() ^ VariantId<<8); }
00025 
00026   int GetNodes() const { return Nodes; }
00027   int GetEdges() const { return EdgeV.Len(); }
00028   int GetSigLen() const { return SigV.Len(); }
00029   int GetVariant() const { return VariantId; }
00030   void SetVariant(const int& Variant) { VariantId = Variant; }
00031   void SetEdgeV(const TIntPrV& EdgeIdV) { EdgeV = EdgeIdV; }
00032 
00033   PNGraph GetNGraph() const;
00034   void TakeGraph(const PNGraph& Graph); // renumbers the nodes
00035   void TakeGraph(const PNGraph& Graph, TIntPrV& NodeMap); // renumbers the nodes
00036   void TakeSig(const PNGraph& Graph, const int& MnSvdGraph, const int& MxSvdGraph); // create graph signature
00037 
00038   void SaveTxt(FILE *F) const;
00039   void SaveGViz(const TStr& OutFNm, const TStr& Desc = TStr(), const TStr& NodeAttrs="", const int& Size=-1) const;
00040   void DrawGViz(const TStr& OutFNm, const TStr& Desc = TStr(), const TStr& NodeAttrs="", const int& Size=-1) const;
00041   static bool IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TIntV& NodeIdMap);
00042   static bool IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV);
00043   static bool IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV, int& IsoPermId);
00044 };
00045 
00049 template <class TDat>
00050 class TGHash {
00051 public:
00052   typedef typename THash<TGraphKey, TDat>::TIter TIter;
00053 private:
00054   TInt MxIsoCheck;   // brute force graph isomorphism check
00055   TInt MxSvdGraph;   // SVD isomorphism check
00056   THash<TInt, TVec<TIntV> > GSzToPermH; // node permutations up to MxIsoCkeck nodes
00057   TBool HashOnlyTrees; // hashing only trees (exact isomorphism test)
00058   THash<TGraphKey, TDat> GraphH;
00059 private:
00060   void InitPermutations();
00061   int IsGetKeyId(const PNGraph& Graph) const;
00062   int IsGetKeyId(const PNGraph& Graph, TGraphKey& GKey) const;
00063   int IsGetKeyId(const PNGraph& Graph, TGraphKey& GKey, TIntPrV& NodeMap) const;
00064 public:
00065   TGHash(const bool& HashTrees, const int& MaxIsoCheck=8, const int& MaxSvdGraph=500);
00066   TGHash(TSIn& SIn);
00067   void Save(TSOut& SOut) const;
00068 
00069   const TDat& operator [] (const int& KeyId) const { return GraphH[KeyId]; }
00070   TDat& operator [] (const int& KeyId) { return GraphH[KeyId]; }
00071   const TDat& operator () (const TGraphKey& Key) const { return GraphH.GetDat(Key); }
00072   TDat& operator () (const TGraphKey& Key) { return GraphH.GetDat(Key); }
00073   TIter BegI() const { return GraphH.BegI(); }
00074   TIter EndI() const { return GraphH.EndI(); }
00075   TIter GetI(const int& KeyId) const  { return GraphH.GetI(KeyId); }
00076 
00077   bool HashTrees() const { return HashOnlyTrees; }
00078 
00079   void Gen(const int& Ports) { GraphH.Gen(Ports); }
00080   void Clr(const bool& DoDel=true, const int& NoDelLim=-1) { GraphH.Clr(DoDel, NoDelLim); }
00081   bool Empty() const { return GraphH.Empty(); }
00082   int Len() const {  return GraphH.Len(); }
00083   int GetPorts() const { return GraphH.GetPorts(); }
00084   bool IsAutoSize() const { return GraphH.IsAutoSize(); }
00085   int GetMxKeyIds() const { return GraphH.GetMxKeyIds(); }
00086   bool IsKeyIdEqKeyN() const { return GraphH.IsKeyIdEqKeyN(); }
00087 
00088   int AddKey(const PNGraph& Graph);
00089   TDat& AddDat(const PNGraph& Graph) { return GraphH[AddKey(Graph)]; }
00090   TDat& AddDat(const PNGraph& Graph, const TDat& Dat) { return GraphH[AddKey(Graph)] = Dat; }
00091 
00092   bool IsKey(const PNGraph& Graph) const { int k=IsGetKeyId(Graph); return k!=-1; }
00093   int GetKeyId(const PNGraph& Graph) const { int k=IsGetKeyId(Graph); IAssert(k!=-1); return k; }
00094   const TDat& GetDat(const PNGraph& Graph) const { return GraphH[GetKeyId(Graph)]; }
00095   TDat& GetDat(const PNGraph& Graph) { return GraphH[GetKeyId(Graph)]; }
00096 
00097   const TGraphKey& GetKey(const int& KeyId) const { return GraphH.GetKey(KeyId); }
00098   int GetKeyId(const TGraphKey& Key) const { return GraphH.GetKeyId(Key); }
00099   bool IsKey(const TGraphKey& Key) const { return GraphH.IsKey(Key); }
00100   bool IsKey(const TGraphKey& Key, int& KeyId) const { return GraphH.IsKey(Key, KeyId); }
00101   bool IsKeyId(const int& KeyId) const { return GraphH.IsKeyId(KeyId); }
00102   const TDat& GetDat(const TGraphKey& Key) const { return GraphH.GetDat(Key); }
00103   TDat& GetDat(const TGraphKey& Key) { return GraphH.GetDat(Key); }
00104   const TDat& GetDatId(const int& KeyId) const { return GraphH[KeyId]; }
00105   TDat& GetDatId(const int& KeyId) { return GraphH[KeyId]; }
00106 
00107   void GetKeyDat(const int& KeyId, TGraphKey& Key, TDat& Dat) const { GraphH.GetKeyDat(KeyId, Key, Dat); }
00108   bool IsKeyGetDat(const TGraphKey& Key, TDat& Dat) const { return GraphH.IsKeyGetDat(Key, Dat); }
00109 
00110   bool GetNodeMap(const PNGraph& Graph, TIntPrV& NodeMapV) const;
00111   bool GetNodeMap(const PNGraph& Graph, TIntPrV& NodeMapV, int& KeyId) const;
00112 
00113   int FFirstKeyId() const { return 0-1; }
00114   bool FNextKeyId(int& KeyId) const { return GraphH.FNextKeyId(KeyId); }
00115   void GetKeyV(TVec<TGraphKey>& KeyV) const { GraphH.GetKeyV(KeyV); }
00116   void GetDatV(TVec<TDat>& DatV) const { GraphH.GetDatV(DatV); }
00117   void GetKeyIdByDat(TIntV& KeyIdV, const bool& Asc = true) const; // order keyIds by data
00118   void GetKeyIdByGSz(TIntV& KeyIdV, const bool& Asc = true) const; // order keyIds by graph size
00119   void GetKeyDatPrV(TVec<TPair<TGraphKey, TDat> >& KeyDatPrV) const { GraphH.GetKeyDatPrV(KeyDatPrV); }
00120   void GetDatKeyPrV(TVec<TPair<TDat, TGraphKey> >& DatKeyPrV) const { GraphH.GetDatKeyPrV(DatKeyPrV); }
00121 
00122   void Defrag() { GraphH.Defrag(); }
00123   void Pack() { GraphH.Pack(); }
00124 
00125   void DrawGViz(const int& KeyId, const TStr& OutFNmPref, const TStr& OutputType = "gif", TStr Desc="") const;
00126   void DrawGViz(const TIntV& KeyIdV, const TStr& OutFNmPref, const TStr& OutputType = "gif") const;
00127   void SaveTxt(const TStr& OutFNm, const TStr& Desc, const TStr& DatColNm, const bool& SortByKeyVal=true) const;
00128   void SaveDetailTxt(const TStr& OutFNm, const TStr& Desc, const TStr& DatColNm) const;
00129 };
00130 
00131 template <class TDat>
00132 void TGHash<TDat>::InitPermutations() {
00133   GSzToPermH.Clr();
00134   for (int nodes = 2; nodes <= MxIsoCheck; nodes++) {
00135     TVec<TIntV> NodePermutationV;
00136     TIntV NodeIdV(nodes, 0);
00137     for (int i = 0; i < nodes; i++)  NodeIdV.Add(i);
00138     NodeIdV.Pack();
00139     NodePermutationV.Add(NodeIdV);
00140     while (NodeIdV.NextPerm()) {
00141       NodePermutationV.Add(NodeIdV);
00142     }
00143     NodePermutationV.Pack();
00144     GSzToPermH.AddDat(nodes, NodePermutationV);
00145   }
00146 }
00147 
00148 template <class TDat>
00149 TGHash<TDat>::TGHash(const bool& HashTrees, const int& MaxIsoCheck, const int& MaxSvdGraph) :
00150  MxIsoCheck(MaxIsoCheck), MxSvdGraph(MaxSvdGraph), GSzToPermH(), HashOnlyTrees(HashTrees), GraphH() {
00151   if (! HashTrees) {
00152     InitPermutations();
00153   }
00154 }
00155 
00156 template <class TDat>
00157 TGHash<TDat>::TGHash(TSIn& SIn) : MxIsoCheck(SIn), MxSvdGraph(SIn), GSzToPermH(), HashOnlyTrees(SIn), GraphH(SIn) {
00158   if (! HashOnlyTrees) {
00159     InitPermutations();
00160   }
00161 }
00162 
00163 template <class TDat>
00164 void TGHash<TDat>::Save(TSOut& SOut) const {
00165   MxIsoCheck.Save(SOut);
00166   MxSvdGraph.Save(SOut);
00167   HashOnlyTrees.Save(SOut);
00168   GraphH.Save(SOut);
00169 }
00170 
00171 template <class TDat>
00172 int TGHash<TDat>::AddKey(const PNGraph& Graph) {
00173   if (HashOnlyTrees) {
00174     int RootNId;  IAssert(TSnap::IsTree(Graph, RootNId));
00175     TIntV TreeSig;  TSnap::GetTreeSig(Graph, RootNId, TreeSig);
00176     TGraphKey GKey(TreeSig);
00177     const int KeyId = GraphH.GetKeyId(GKey);
00178     if (KeyId == -1) {
00179       GKey.TakeGraph(Graph);
00180       return GraphH.AddKey(GKey);
00181     }
00182     return KeyId;
00183   } else {
00184     TGraphKey GKey;
00185     GKey.TakeSig(Graph, MxIsoCheck+1, MxSvdGraph); // get signature
00186     const int Nodes = GKey.GetNodes();
00187     if (Nodes > 2 && Nodes <= MxIsoCheck) {
00188       GKey.TakeGraph(Graph);
00189       // check all variants with same signature
00190       for (int variant = 1; ; variant++) {
00191         GKey.SetVariant(variant);
00192         int KeyId = GraphH.GetKeyId(GKey);
00193         if (KeyId == -1) {
00194           KeyId = GraphH.AddKey(GKey);
00195           //printf("  new variant: %d (%d)\n", KeyId, variant);
00196           return KeyId;
00197 
00198         }
00199         if (TGraphKey::IsIsomorph(GKey, GraphH.GetKey(KeyId), GSzToPermH.GetDat(Nodes))) {
00200           //printf("  isomorphic to: %d\n", KeyId);
00201           return KeyId;
00202         } // found isomorphic graph
00203       }
00204     } else {
00205       const int KeyId = GraphH.GetKeyId(GKey);
00206       if (KeyId == -1) {
00207         GKey.TakeGraph(Graph);
00208         return GraphH.AddKey(GKey);
00209       }
00210       return KeyId;
00211     }
00212   }
00213   Fail;
00214   return -1;
00215 }
00216 
00217 template <class TDat>
00218 int TGHash<TDat>::IsGetKeyId(const PNGraph& Graph) const {
00219   TGraphKey GKey;
00220   return IsGetKeyId(Graph, GKey);
00221 }
00222 
00223 template <class TDat>
00224 int TGHash<TDat>::IsGetKeyId(const PNGraph& Graph, TGraphKey& GKey) const {
00225   if (HashOnlyTrees) {
00226     int RootNId;  IAssert(TSnap::IsTree(Graph, RootNId));
00227     TIntV TreeSig;  TSnap::GetTreeSig(Graph, RootNId, TreeSig);
00228     GKey = TGraphKey(TreeSig);
00229     const int KeyId = GraphH.GetKeyId(GKey);
00230     //IAssert(KeyId != -1);
00231     return KeyId;
00232   } else {
00233     GKey.TakeSig(Graph, MxIsoCheck+1, MxSvdGraph);
00234     const int Nodes = GKey.GetNodes();
00235     if (Nodes > 2 && Nodes <= MxIsoCheck) {
00236       GKey.TakeGraph(Graph);
00237       for (int variant = 1; ; variant++) {
00238         GKey.SetVariant(variant);
00239         int KeyId = GraphH.GetKeyId(GKey);
00240         //IAssert(KeyId != -1);
00241         if (TGraphKey::IsIsomorph(GKey, GraphH.GetKey(KeyId), GSzToPermH.GetDat(Nodes))) { return KeyId; }
00242       }
00243       return -1;
00244     } else {
00245       const int KeyId = GraphH.GetKeyId(GKey);
00246       //IAssert(KeyId != -1);
00247       return KeyId;
00248     }
00249   }
00250   Fail;
00251   return -1;
00252 }
00253 
00254 template <class TDat>
00255 bool TGHash<TDat>::GetNodeMap(const PNGraph& Graph, TIntPrV& NodeMapV) const {
00256   int KeyId;
00257   return GetNodeMap(Graph, NodeMapV, KeyId);
00258 }
00259 
00260 template <class TDat>
00261 bool TGHash<TDat>::GetNodeMap(const PNGraph& Graph, TIntPrV& NodeMapV, int& KeyId) const {
00262   NodeMapV.Clr(false);
00263   if (HashOnlyTrees) {
00264     int RootNId;  IAssert(TSnap::IsTree(Graph, RootNId));
00265     TIntV TreeSig;  TSnap::GetTreeSig(Graph, RootNId, TreeSig, NodeMapV);
00266     TGraphKey GKey(TreeSig);
00267     KeyId = GraphH.GetKeyId(GKey);
00268     return KeyId != -1;
00269   } else {
00270     const int Nodes = Graph->GetNodes();
00271     int IsoPermId = -1;
00272     NodeMapV.Clr(false);
00273     if (Nodes == 0) { return true; }
00274     else if (Nodes == 1) {
00275       NodeMapV.Add(TIntPr(Graph->BegNI().GetId(), 0));  return true; }
00276     else if (Nodes <= MxIsoCheck) {
00277       TGraphKey GKey;
00278       GKey.TakeSig(Graph, MxIsoCheck+1, MxSvdGraph);
00279       GKey.TakeGraph(Graph, NodeMapV);
00280       for (int variant = 1; ; variant++) {
00281         GKey.SetVariant(variant);
00282         KeyId = GraphH.GetKeyId(GKey);
00283         if (KeyId == -1) return false;
00284         if (TGraphKey::IsIsomorph(GKey, GraphH.GetKey(KeyId), GSzToPermH.GetDat(Nodes), IsoPermId)) {
00285           const TIntV& K1K2Perm = GSzToPermH.GetDat(Nodes)[IsoPermId];
00286           // map from graph to key1 to key2
00287           for  (int i = 0; i < NodeMapV.Len(); i++) {
00288             NodeMapV[i].Val2 = K1K2Perm[NodeMapV[i].Val2];
00289           }
00290           return true;
00291         }
00292       }
00293       return false;
00294     } else {
00295       return false; // graph too big to find the mapping
00296     }
00297   }
00298   Fail;  return false;
00299 }
00300 
00301 template <class TDat>
00302 void TGHash<TDat>::GetKeyIdByDat(TIntV& KeyIdV, const bool& Asc) const {
00303   TVec<TQuad<TDat, TInt,TInt, TInt> > DatKeyIdV(Len(), 0); // <TDat,Nodes,Edges,KeyId>
00304   for (int i = FFirstKeyId(); FNextKeyId(i); ) {
00305     DatKeyIdV.Add(TQuad<TDat, TInt,TInt, TInt>(GetDatId(i), GetKey(i).GetNodes(), GetKey(i).GetEdges(), i));
00306   }
00307   DatKeyIdV.Sort(Asc);
00308   KeyIdV.Gen(Len(), 0);
00309   for (int i = 0; i < Len(); i++) {
00310     KeyIdV.Add(DatKeyIdV[i].Val4);
00311   }
00312 }
00313 
00314 template <class TDat>
00315 void TGHash<TDat>::GetKeyIdByGSz(TIntV& KeyIdV, const bool& Asc) const {
00316   TVec<TQuad<TInt,TInt, TDat, TInt> > DatKeyIdV(Len(), 0); // <Nodes,Edges,TDat,KeyId>
00317   for (int i = FFirstKeyId(); FNextKeyId(i); ) {
00318     DatKeyIdV.Add(TQuad< TInt,TInt, TDat, TInt>(GetKey(i).GetNodes(), GetKey(i).GetEdges(), GetDatId(i), i));
00319   }
00320   DatKeyIdV.Sort(Asc);
00321   KeyIdV.Gen(Len(), 0);
00322   for (int i = 0; i < Len(); i++) {
00323     KeyIdV.Add(DatKeyIdV[i].Val4);
00324   }
00325 }
00326 
00327 template <class TDat>
00328 void TGHash<TDat>::DrawGViz(const int& KeyId, const TStr& OutFNmPref, const TStr& OutputType, TStr Desc) const {
00329   IAssert(OutputType == "ps" || OutputType == "gif" || OutputType == "png");
00330   const TGraphKey& GKey = GetKey(KeyId);
00331   const TStr Desc1 = TStr::Fmt("%s (%d, %d)", Desc.CStr(), GKey.GetNodes(), GKey.GetEdges());
00332   GKey.SaveGViz(OutFNmPref+".dot", Desc1);
00333   TSnap::TSnapDetail::GVizDoLayout(OutFNmPref+".dot", OutFNmPref+"."+OutputType, gvlDot);
00334 }
00335 
00336 template <class TDat>
00337 void TGHash<TDat>::DrawGViz(const TIntV& KeyIdV, const TStr& OutFNmPref, const TStr& OutputType) const {
00338   IAssert(OutputType == "ps" || OutputType == "gif" || OutputType == "png");
00339   TExeTm ExeTm;
00340   printf("Plotting %d graphs\n", KeyIdV.Len());
00341   for (int i = 0; i < KeyIdV.Len(); i++) {
00342     const TStr FNm = TStr::Fmt("%s.%03d.key%d.", OutFNmPref.CStr(), i+1, KeyIdV[i]());
00343     const TStr Desc = TStr::Fmt("KeyId:%d", KeyIdV[i]());
00344     const TGraphKey& GKey = GetKey(KeyIdV[i]);
00345     printf("\r  %d  g(%d, %d)    ", i, GKey.GetNodes(), GKey.GetEdges());
00346     GKey.SaveGViz(FNm+"dot", Desc);
00347     TSnap::TSnapDetail::GVizDoLayout(FNm+"dot", FNm+OutputType, gvlDot);
00348   }
00349   printf("done [%s].\n", ExeTm.GetTmStr());
00350 }
00351 
00352 template <class TDat>
00353 void TGHash<TDat>::SaveTxt(const TStr& OutFNm, const TStr& Desc, const TStr& DatColNm, const bool& SortByKeyVal) const {
00354   TIntV KeyIdV;
00355   if (SortByKeyVal) GetKeyIdByDat(KeyIdV, false);
00356   else GetKeyIdByGSz(KeyIdV, true);
00357   FILE *F = fopen(OutFNm.CStr(), "wt");
00358   fprintf(F, "Graph-Hash-Table");
00359   fprintf(F, "%s\n", Desc.CStr());
00360   fprintf(F, "%d graphs\n", KeyIdV.Len());
00361   fprintf(F, "Rank\tKeyId\tNodes\tEdges\t%s\n", DatColNm.CStr());
00362   for (int i = 0; i < KeyIdV.Len(); i++) {
00363     const TGraphKey& Key = GetKey(KeyIdV[i]);
00364     fprintf(F, "%d\t%d\t%d\t%d\t%s\n", i+1, KeyIdV[i](), Key.GetNodes(), Key.GetEdges(),
00365       GetDatId(KeyIdV[i]).GetStr().CStr());
00366   }
00367   fclose(F);
00368 }
00369 
00370 template <class TDat>
00371 void TGHash<TDat>::SaveDetailTxt(const TStr& OutFNm, const TStr& Desc, const TStr& DatColNm) const {
00372   TIntV KeyIdV;  GetKeyIdByDat(KeyIdV, false);
00373   FILE *F = fopen(OutFNm.CStr(), "wt");
00374   fprintf(F, "Graph-Hash-Table\n");
00375   fprintf(F, "%s\n", Desc.CStr());
00376   fprintf(F, "%d graphs", KeyIdV.Len());
00377   for (int i = 0; i < KeyIdV.Len(); i++) {
00378     fprintf(F, "\n\n[%5d]\tRank: %d\n", KeyIdV[i](), i+1);
00379     fprintf(F, "Dat:  %s\n", GetDat(KeyIdV[i]).GetStr().CStr());
00380     GetDatId(KeyIdV[i]).SaveTxt(F);
00381   }
00382   fclose(F);
00383 }
00384 
00387 class TSimpleGraph {
00388 private:
00389   TIntPrV EdgeV;
00390 public:
00391   TSimpleGraph() { }
00392   TSimpleGraph(const TIntPrV& GEdgeV) : EdgeV(GEdgeV) { }
00393   bool operator == (const TSimpleGraph& Graph) const { return EdgeV == Graph.EdgeV; }
00394   bool operator < (const TSimpleGraph& Graph) const { return EdgeV < Graph.EdgeV; }
00395 
00396   int GetEdges() const { return EdgeV.Len(); }
00397   void AddEdge(const int& SrcNId, const int& DstNId) { EdgeV.Add(TIntPr(SrcNId, DstNId)); }
00398   bool Join(const TSimpleGraph& G1, const TSimpleGraph& G2);
00399   TIntPrV& GetEdgeV() { return EdgeV; }
00400   TIntPrV& operator () () { return EdgeV; }
00401 
00402   void Dump(const TStr& Desc = TStr()) const;
00403 };
00404 typedef TVec<TSimpleGraph> TSimpleGraphV;
00405 
00408 class TSubGraphsEnum {
00409 private:
00410   TSimpleGraphV SgV, NextSgV;
00411   THash<TIntPr, TIntH> EdgeH;
00412   PNGraph NGraph;
00413 public:
00414   TSubGraphsEnum(PNGraph Graph) : NGraph(Graph) { }
00415 
00416   void Gen2Graphs();
00417   void EnumSubGraphs(const int& MaxEdges);
00418   void RecurBfs(const int& MxDepth);
00419   void RecurBfs(const int& NId, const int& Depth, TSimpleGraph& PrevG);
00420   void RecurBfs1(const int& MxDepth);
00421   void RecurBfs1(const int& NId, const int& Depth);
00422   //void RecurBfs(const int& NId, const int& Depth, const THash<TIntPr, TInt>& EdgeH);
00423 };
00424 
00425