SNAP Library, Developer Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
gio.h
Go to the documentation of this file.
00001 // TODO ROK, Jure included basic documentation, finalize reference doc
00002 
00004 // Loading and saving graphs from/to various file formats.
00005 namespace TSnap {
00006 
00008 template <class PGraph> PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId=0, const int& DstColId=1);
00010 template <class PGraph> PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId, const char& Separator);
00012 template <class PGraph> PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId=0, const int& DstColId=1);
00014 template <class PGraph> PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId, TStrHash<TInt>& StrToNIdH);
00016 template <class PGraph> PGraph LoadConnList(const TStr& InFNm);
00018 template <class PGraph> PGraph LoadConnListStr(const TStr& InFNm, TStrHash<TInt>& StrToNIdH);
00019 
00023 template <class PGraph> PGraph LoadPajek(const TStr& InFNm);
00025 PNGraph LoadDyNet(const TStr& FNm);
00027 TVec<PNGraph> LoadDyNetGraphV(const TStr& FNm);
00028 
00029 //TODO: Sparse/Dense adjacency matrix which values we threshold at Thresh to obtain an adjacency matrix.
00030 //template <class PGraph> PGraph LoadAdjMtx(const TStr& FNm, const int Thresh);
00031 //TODO: Load from a GML file format (http://en.wikipedia.org/wiki/Graph_Modelling_Language)
00032 //template <class PGraph> PGraph LoadGml(const TStr& FNm, const int Thresh);
00033 
00034 
00036 template <class PGraph> void SaveEdgeList(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc=TStr());
00038 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm);
00040 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH);
00042 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH);
00044 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH, const TIntStrH& EIdColorH);
00046 template <class PGraph> void SaveMatlabSparseMtx(const PGraph& Graph, const TStr& OutFNm);
00049 template<class PGraph> void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc=TStr(), const bool& NodeLabels=false, const TIntStrH& NIdColorH=TIntStrH());
00052 template<class PGraph> void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const TIntStrH& NIdLabelH);
00053 
00054 //TODO:  Save to a GML file format (http://en.wikipedia.org/wiki/Graph_Modelling_Language)
00055 //template <class PGraph> SaveGml(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc);
00056 
00058 // Implementation
00059 
00064 template <class PGraph>
00065 PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId) {
00066   TSsParser Ss(InFNm, ssfWhiteSep, true, true, true);
00067   PGraph Graph = PGraph::TObj::New();
00068   int SrcNId, DstNId;
00069   while (Ss.Next()) {
00070     if (! Ss.GetInt(SrcColId, SrcNId) || ! Ss.GetInt(DstColId, DstNId)) { continue; }
00071     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00072     if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00073     Graph->AddEdge(SrcNId, DstNId);
00074   }
00075   Graph->Defrag();
00076   return Graph;
00077 }
00078 
00083 template <class PGraph>
00084 PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId, const char& Separator) {
00085   TSsParser Ss(InFNm, Separator);
00086   PGraph Graph = PGraph::TObj::New();
00087   int SrcNId, DstNId;
00088   while (Ss.Next()) {
00089     if (! Ss.GetInt(SrcColId, SrcNId) || ! Ss.GetInt(DstColId, DstNId)) { continue; }
00090     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00091     if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00092     Graph->AddEdge(SrcNId, DstNId);
00093   }
00094   Graph->Defrag();
00095   return Graph;
00096 }
00097 
00102 template <class PGraph>
00103 PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId) {
00104   TSsParser Ss(InFNm, ssfWhiteSep);
00105   PGraph Graph = PGraph::TObj::New();
00106   TStrHash<TInt> StrToNIdH(Mega(1), true); // hash-table mapping strings to integer node ids
00107   while (Ss.Next()) {
00108     const int SrcNId = StrToNIdH.AddKey(Ss[SrcColId]);
00109     const int DstNId = StrToNIdH.AddKey(Ss[DstColId]);
00110     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00111     if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00112     Graph->AddEdge(SrcNId, DstNId);
00113   }
00114   Graph->Defrag();
00115   return Graph;
00116 }
00117 
00123 template <class PGraph>
00124 PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId, TStrHash<TInt>& StrToNIdH) {
00125   TSsParser Ss(InFNm, ssfWhiteSep);
00126   PGraph Graph = PGraph::TObj::New();
00127   while (Ss.Next()) {
00128     const int SrcNId = StrToNIdH.AddKey(Ss[SrcColId]);
00129     const int DstNId = StrToNIdH.AddKey(Ss[DstColId]);
00130     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00131     if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00132     Graph->AddEdge(SrcNId, DstNId);
00133   }
00134   Graph->Defrag();
00135   return Graph;
00136 }
00137 
00141 template <class PGraph>
00142 PGraph LoadConnList(const TStr& InFNm) {
00143   TSsParser Ss(InFNm, ssfWhiteSep, true, true, true);
00144   PGraph Graph = PGraph::TObj::New();
00145   while (Ss.Next()) {
00146     if (! Ss.IsInt(0)) { continue; }
00147     const int SrcNId = Ss.GetInt(0);
00148     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00149     for (int dst = 1; dst < Ss.Len(); dst++) {
00150       const int DstNId = Ss.GetInt(dst);
00151       if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00152       Graph->AddEdge(SrcNId, DstNId);
00153     }
00154   }
00155   Graph->Defrag();
00156   return Graph;
00157 }
00158 
00163 template <class PGraph> 
00164 PGraph LoadConnListStr(const TStr& InFNm, TStrHash<TInt>& StrToNIdH) {
00165   TSsParser Ss(InFNm, ssfWhiteSep, true, true, true);
00166   PGraph Graph = PGraph::TObj::New();
00167   while (Ss.Next()) {
00168     const int SrcNId = StrToNIdH.AddDatId(Ss[0]);
00169     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00170     for (int dst = 1; dst < Ss.Len(); dst++) {
00171       const int DstNId = StrToNIdH.AddDatId(Ss[dst]);
00172       if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00173       Graph->AddEdge(SrcNId, DstNId);
00174     }
00175   }
00176   Graph->Defrag();
00177   return Graph;
00178 }
00179 
00180 template <class PGraph>
00181 PGraph LoadPajek(const TStr& InFNm) {
00182   PGraph Graph = PGraph::TObj::New();
00183   TSsParser Ss(InFNm, ssfSpaceSep, true, true, true);
00184   while ((Ss.Len()==0 || strstr(Ss[0], "*vertices") == NULL) && ! Ss.Eof()) {
00185     Ss.Next();  Ss.ToLc(); }
00186   // nodes
00187   bool EdgeList = true;
00188   EAssert(strstr(Ss[0], "*vertices") != NULL);
00189   while (Ss.Next()) {
00190     Ss.ToLc();
00191     if (Ss.Len()>0 && Ss[0][0] == '%') { continue; } // comment
00192     if (strstr(Ss[0], "*arcslist")!=NULL || strstr(Ss[0],"*edgeslist")!=NULL) { EdgeList=false; break; } 
00193     if (strstr(Ss[0], "*arcs")!=NULL || strstr(Ss[0],"*edges")!=NULL) { break; } // arcs are directed, edges are undirected
00194     Graph->AddNode(Ss.GetInt(0));
00195   }
00196   // edges
00197   while (Ss.Next()) {
00198     if (Ss.Len()>0 && Ss[0][0] == '%') { continue; } // comment
00199     if (Ss.Len()>0 && Ss[0][0] == '*') { break; }
00200     if (EdgeList) {
00201       // <source> <destination> <weight>
00202       if (Ss.Len() == 3 && Ss.IsInt(0) && Ss.IsInt(1)) {
00203         Graph->AddEdge(Ss.GetInt(0), Ss.GetInt(1)); }
00204     } else {
00205       // <source> <destination1> <destination2> <destination3> ...
00206       const int SrcNId = Ss.GetInt(0);
00207       for (int i = 1; i < Ss.Len(); i++) {
00208         Graph->AddEdge(SrcNId, Ss.GetInt(i)); }
00209     }
00210   }
00211   return Graph;
00212 }
00213 
00214 template <class PGraph>
00215 void SaveEdgeList(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc) {
00216   FILE *F = fopen(OutFNm.CStr(), "wt");
00217   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { fprintf(F, "# Directed graph: %s \n", OutFNm.CStr()); } 
00218   else { fprintf(F, "# Undirected graph (each unordered pair of nodes is saved once): %s\n", OutFNm.CStr()); }
00219   if (! Desc.Empty()) { fprintf(F, "# %s\n", Desc.CStr()); }
00220   fprintf(F, "# Nodes: %d Edges: %d\n", Graph->GetNodes(), Graph->GetEdges());
00221   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { fprintf(F, "# FromNodeId\tToNodeId\n"); }
00222   else { fprintf(F, "# NodeId\tNodeId\n"); }
00223   for (typename PGraph::TObj::TEdgeI ei = Graph->BegEI(); ei < Graph->EndEI(); ei++) {
00224     fprintf(F, "%d\t%d\n", ei.GetSrcNId(), ei.GetDstNId());
00225   }
00226   fclose(F);
00227 }
00228 
00229 template <class PGraph>
00230 void SavePajek(const PGraph& Graph, const TStr& OutFNm) {
00231   TIntH NIdToIdH(Graph->GetNodes(), true);
00232   FILE *F = fopen(OutFNm.CStr(), "wt");
00233   fprintf(F, "*Vertices %d\n", Graph->GetNodes());
00234   int i = 0;
00235   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) {
00236     fprintf(F, "%d  \"%d\" ic Red fos 10\n", i+1, NI.GetId()); // ic: internal color, fos: font size
00237     NIdToIdH.AddDat(NI.GetId(), i+1);
00238   }
00239   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) {
00240     fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected
00241   else {
00242     fprintf(F, "*Edges %d\n", Graph->GetEdges());
00243   }
00244   for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
00245     const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId());
00246     const int DstNId = NIdToIdH.GetDat(EI.GetDstNId());
00247     fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1); // width=1
00248   }
00249   fclose(F);
00250 }
00251 
00254 template <class PGraph>
00255 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH) {
00256   TIntH NIdToIdH(Graph->GetNodes(), true);
00257   FILE *F = fopen(OutFNm.CStr(), "wt");
00258   fprintf(F, "*Vertices %d\n", Graph->GetNodes());
00259   int i = 0;
00260   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) {
00261     fprintf(F, "%d  \"%d\" ic %s fos 10\n", i+1, NI.GetId(),
00262       NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red");
00263     NIdToIdH.AddDat(NI.GetId(), i+1);
00264   }
00265   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) {
00266     fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected
00267   else {
00268     fprintf(F, "*Edges %d\n", Graph->GetEdges());
00269   }
00270   for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
00271     const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId());
00272     const int DstNId = NIdToIdH.GetDat(EI.GetDstNId());
00273     fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1);
00274   }
00275   fclose(F);
00276 }
00277 
00281 template <class PGraph>
00282 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH) {
00283   TIntH NIdToIdH(Graph->GetNodes(), true);
00284   FILE *F = fopen(OutFNm.CStr(), "wt");
00285   fprintf(F, "*Vertices %d\n", Graph->GetNodes());
00286   int i = 0;
00287   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) {
00288     fprintf(F, "%d  \"%s\" ic %s fos 10\n", i+1,
00289       NIdLabelH.IsKey(NI.GetId()) ? NIdLabelH.GetDat(NI.GetId()).CStr() : TStr::Fmt("%d", NI.GetId()).CStr(),
00290       NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red");
00291     NIdToIdH.AddDat(NI.GetId(), i+1);
00292   }
00293   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) {
00294     fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected
00295   else {
00296     fprintf(F, "*Edges %d\n", Graph->GetEdges());
00297   }
00298   for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
00299     const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId());
00300     const int DstNId = NIdToIdH.GetDat(EI.GetDstNId());
00301     fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1);
00302   }
00303   fclose(F);
00304 }
00305 
00310 template <class PGraph>
00311 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH, const TIntStrH& EIdColorH) {
00312   CAssert(HasGraphFlag(typename PGraph::TObj, gfMultiGraph)); // network needs to have edge ids
00313   TIntH NIdToIdH(Graph->GetNodes(), true);
00314   FILE *F = fopen(OutFNm.CStr(), "wt");
00315   fprintf(F, "*Vertices %d\n", Graph->GetNodes());
00316   int i = 0;
00317   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) {
00318     fprintf(F, "%d  \"%s\" ic %s fos 10\n", i+1,
00319       NIdLabelH.IsKey(NI.GetId()) ? NIdLabelH.GetDat(NI.GetId()).CStr() : TStr::Fmt("%d", NI.GetId()).CStr(),
00320       NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red");
00321     NIdToIdH.AddDat(NI.GetId(), i+1);
00322   }
00323   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) {
00324     fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected
00325   else {
00326     fprintf(F, "*Edges %d\n", Graph->GetEdges());
00327   }
00328   for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
00329     const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId());
00330     const int DstNId = NIdToIdH.GetDat(EI.GetDstNId());
00331     fprintf(F, "%d %d 1 c %s\n", SrcNId, DstNId, 1,
00332       EIdColorH.IsKey(EI.GetId()) ? EIdColorH.GetDat(EI.GetId()).CStr() : "Black");
00333   }
00334   fclose(F);
00335 }
00336 
00338 template <class PGraph>
00339 void SaveMatlabSparseMtx(const PGraph& Graph, const TStr& OutFNm) {
00340   FILE *F = fopen(OutFNm.CStr(), "wt");
00341   TIntSet NIdSet(Graph->GetNodes()); // so that
00342   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00343     NIdSet.AddKey(NI.GetId());
00344   }
00345   for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
00346     const int Src = NIdSet.GetKeyId(EI.GetSrcNId())+1;
00347     const int Dst = NIdSet.GetKeyId(EI.GetDstNId())+1;
00348     fprintf(F, "%d\t%d\t1\n", Src, Dst);
00349     if (! HasGraphFlag(typename PGraph::TObj, gfDirected) && Src!=Dst) {
00350       fprintf(F, "%d\t%d\t1\n", Dst, Src);
00351     }
00352   }
00353   fclose(F);
00354 }
00355 
00356 template<class PGraph>
00357 void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const bool& NodeLabels, const TIntStrH& NIdColorH) {
00358   const bool IsDir = HasGraphFlag(typename PGraph::TObj, gfDirected);
00359   FILE *F = fopen(OutFNm.CStr(), "wt");
00360   if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr());
00361   if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); }
00362   fprintf(F, "  graph [splines=false overlap=false]\n"); //size=\"12,10\" ratio=fill
00363   // node  [width=0.3, height=0.3, label=\"\", style=filled, color=black]
00364   // node  [shape=box, width=0.3, height=0.3, label=\"\", style=filled, fillcolor=red]
00365   fprintf(F, "  node  [shape=ellipse, width=0.3, height=0.3%s]\n", NodeLabels?"":", label=\"\"");
00366   // node colors
00367   //for (int i = 0; i < NIdColorH.Len(); i++) {
00368   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00369     if (NIdColorH.IsKey(NI.GetId())) {
00370       fprintf(F, "  %d [style=filled, fillcolor=\"%s\"];\n", NI.GetId(), NIdColorH.GetDat(NI.GetId()).CStr()); }
00371     else {
00372       fprintf(F, "  %d ;\n", NI.GetId());
00373     }
00374   }
00375   // edges
00376   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00377     if (NI.GetOutDeg()==0 && NI.GetInDeg()==0 && !NIdColorH.IsKey(NI.GetId())) {
00378       fprintf(F, "%d;\n", NI.GetId()); }
00379     else {
00380       for (int e = 0; e < NI.GetOutDeg(); e++) {
00381         if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; }
00382         fprintf(F, "  %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e));
00383       }
00384     }
00385   }
00386   if (! Desc.Empty()) {
00387     fprintf(F, "  label = \"\\n%s\\n\";", Desc.CStr());
00388     fprintf(F, "  fontsize=24;\n");
00389   }
00390   fprintf(F, "}\n");
00391   fclose(F);
00392 }
00393 
00394 template<class PGraph>
00395 void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const TIntStrH& NIdLabelH) {
00396   const bool IsDir = Graph->HasFlag(gfDirected);
00397   FILE *F = fopen(OutFNm.CStr(), "wt");
00398   if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr());
00399   if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); }
00400   fprintf(F, "  graph [splines=true overlap=false]\n"); //size=\"12,10\" ratio=fill
00401   fprintf(F, "  node  [shape=ellipse, width=0.3, height=0.3]\n");
00402   // node colors
00403   //for (int i = 0; i < NodeLabelH.Len(); i++) {
00404   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00405     fprintf(F, "  %d [label=\"%s\"];\n", NI.GetId(), NIdLabelH.GetDat(NI.GetId()).CStr());
00406 }
00407   // edges
00408   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00409     if (NI.GetOutDeg()==0 && NI.GetInDeg()==0 && ! NIdLabelH.IsKey(NI.GetId())) {
00410       fprintf(F, "%d;\n", NI.GetId()); }
00411     else {
00412       for (int e = 0; e < NI.GetOutDeg(); e++) {
00413         if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; }
00414         fprintf(F, "  %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e));
00415       }
00416     }
00417   }
00418   if (! Desc.Empty()) {
00419     fprintf(F, "  label = \"\\n%s\\n\";", Desc.CStr());
00420     fprintf(F, "  fontsize=24;\n");
00421   }
00422   fprintf(F, "}\n");
00423   fclose(F);
00424 }
00425 
00426 } // namespace TSnap