SNAP Library , Developer Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
bfsdfs.h
Go to the documentation of this file.
00001 namespace TSnap {
00002 
00004 // BFS and DFS
00008 template <class PGraph> PNGraph GetBfsTree(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn);
00010 template <class PGraph> int GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn, int& TreeSz, int& TreeDepth);
00013 template <class PGraph> int GetNodesAtHop(const PGraph& Graph, const int& StartNId, const int& Hop, TIntV& NIdV, const bool& IsDir=false);
00016 template <class PGraph> int GetNodesAtHops(const PGraph& Graph, const int& StartNId, TIntPrV& HopCntV, const bool& IsDir=false);
00017 
00019 // Shortest paths
00022 template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir=false);
00027 template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir=false, const int& MaxDist=TInt::Mx);
00028 
00030 // Diameter
00031 
00034 template <class PGraph> int GetBfsFullDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir=false);
00037 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir=false);
00040 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam);
00043 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam, double& AvgDiam);
00046 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const TIntV& SubGraphNIdV, const bool& IsDir, double& EffDiam, int& FullDiam);
00047 
00048 // TODO: Implement in the future
00049 //template <class PGraph> int GetRangeDist(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir=false);
00050 //template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir=false, const int& MaxDist=1000);
00051 //template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, const TIntSet& TargetSet, const bool& IsDir, TIntV& PathNIdV);
00052 //template <class PGraph> int GetShortPath(TIntH& NIdPrnH, TCcQueue<int>& NIdQ, const PGraph& Graph, const int& SrcNId, const TIntSet& TargetSet, const bool& IsDir, TIntV& PathNIdV);
00053 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir=false);
00054 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, int& MxDistNId);
00055 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, int& MxDistNId, TCcQueue<int>& NIdQ, TCcQueue<int>& DistQ, TIntSet& VisitedH);
00056 //template <class PGraph> int GetMxGreedyDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir=false);
00057 //template <class PGraph> int GetMxGreedyDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, TCcQueue<int>& NIdQ, TCcQueue<int>& DistQ, TIntSet& VisitedH);
00058 //template <class PGraph> PNGraph GetShortPathsSubGraph(const PGraph& Graph, const TIntV& SubGraphNIdV);
00059 //template <class PGraph> PGraph GetWccPathsSubGraph(const PGraph& Graph, const TIntV& NIdV);
00060 //template <class PGraph> void GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOutEdges, int& TreeSz, int& TreeDepth);
00061 
00062 } // namespace TSnap
00063 
00067 template<class PGraph>
00068 class TBreathFS {
00069 public:
00070   PGraph Graph;
00071   TSnapQueue<int> Queue;
00072   TInt StartNId;
00073   TIntH NIdDistH;
00074 public:
00075   TBreathFS(const PGraph& GraphPt, const bool& InitBigQ=true) :
00076     Graph(GraphPt), Queue(InitBigQ?Graph->GetNodes():1024), NIdDistH(InitBigQ?Graph->GetNodes():1024) { }
00078   void SetGraph(const PGraph& GraphPt);
00080   int DoBfs(const int& StartNode, const bool& FollowOut, const bool& FollowIn, const int& TargetNId=-1, const int& MxDist=TInt::Mx);
00082   int GetNVisited() const { return NIdDistH.Len(); }
00084   void GetVisitedNIdV(TIntV& NIdV) const { NIdDistH.GetKeyV(NIdV); }
00087   int GetHops(const int& SrcNId, const int& DstNId) const;
00090   int GetRndPath(const int& SrcNId, const int& DstNId, TIntV& PathNIdV) const;
00091 };
00092 
00093 template<class PGraph>
00094 void TBreathFS<PGraph>::SetGraph(const PGraph& GraphPt) {
00095   Graph=GraphPt;
00096   const int N=GraphPt->GetNodes();
00097   if (Queue.Reserved() < N) { Queue.Gen(N); }
00098   if (NIdDistH.GetReservedKeyIds() < N) { NIdDistH.Gen(N); }
00099 }
00100 
00101 template<class PGraph>
00102 int TBreathFS<PGraph>::DoBfs(const int& StartNode, const bool& FollowOut, const bool& FollowIn, const int& TargetNId, const int& MxDist) {
00103   StartNId = StartNode;
00104   IAssert(Graph->IsNode(StartNId));
00105   NIdDistH.Clr(false);  NIdDistH.AddDat(StartNId, 0);
00106   Queue.Clr(false);  Queue.Push(StartNId);
00107   int v, MaxDist = 0;
00108   while (! Queue.Empty()) {
00109     const int NId = Queue.Top();  Queue.Pop();
00110     const int Dist = NIdDistH.GetDat(NId);
00111     if (Dist == MxDist) { break; } // max distance limit reached
00112     const typename PGraph::TObj::TNodeI NodeI = Graph->GetNI(NId);
00113     if (FollowOut) { // out-links
00114       for (v = 0; v < NodeI.GetOutDeg(); v++) {  // out-links
00115         const int DstNId = NodeI.GetOutNId(v);
00116         if (! NIdDistH.IsKey(DstNId)) {
00117           NIdDistH.AddDat(DstNId, Dist+1);
00118           MaxDist = TMath::Mx(MaxDist, Dist+1);
00119           if (DstNId == TargetNId) { return MaxDist; }
00120           Queue.Push(DstNId);
00121         }
00122       }
00123     }
00124     if (FollowIn) { // in-links
00125       for (v = 0; v < NodeI.GetInDeg(); v++) {
00126         const int DstNId = NodeI.GetInNId(v);
00127         if (! NIdDistH.IsKey(DstNId)) {
00128           NIdDistH.AddDat(DstNId, Dist+1);
00129           MaxDist = TMath::Mx(MaxDist, Dist+1);
00130           if (DstNId == TargetNId) { return MaxDist; }
00131           Queue.Push(DstNId);
00132         }
00133       }
00134     }
00135   }
00136   return MaxDist;
00137 }
00138 
00139 template<class PGraph>
00140 int TBreathFS<PGraph>::GetHops(const int& SrcNId, const int& DstNId) const {
00141   TInt Dist;
00142   if (SrcNId!=StartNId) { return -1; }
00143   if (! NIdDistH.IsKeyGetDat(DstNId, Dist)) { return -1; }
00144   return Dist.Val;
00145 }
00146 
00147 template<class PGraph>
00148 int TBreathFS<PGraph>::GetRndPath(const int& SrcNId, const int& DstNId, TIntV& PathNIdV) const {
00149   PathNIdV.Clr(false);
00150   if (SrcNId!=StartNId || ! NIdDistH.IsKey(DstNId)) { return -1; }
00151   PathNIdV.Add(DstNId);
00152   TIntV CloserNIdV;
00153   int CurNId = DstNId;
00154   TInt CurDist, NextDist;
00155   while (CurNId != SrcNId) {
00156     typename PGraph::TObj::TNodeI NI = Graph->GetNI(CurNId);
00157     IAssert(NIdDistH.IsKeyGetDat(CurNId, CurDist));
00158     CloserNIdV.Clr(false);
00159     for (int e = 0; e < NI.GetDeg(); e++) {
00160       const int Next = NI.GetNbrNId(e);
00161       IAssert(NIdDistH.IsKeyGetDat(Next, NextDist));
00162       if (NextDist == CurDist-1) { CloserNIdV.Add(Next); }
00163     }
00164     IAssert(! CloserNIdV.Empty());
00165     CurNId = CloserNIdV[TInt::Rnd.GetUniDevInt(CloserNIdV.Len())];
00166     PathNIdV.Add(CurNId);
00167   }
00168   PathNIdV.Reverse();
00169   return PathNIdV.Len()-1;
00170 }
00171 
00173 // Implementation
00174 namespace TSnap {
00175 
00176 template <class PGraph>
00177 PNGraph GetBfsTree(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn) {
00178   TBreathFS<PGraph> BFS(Graph, false);
00179   BFS.DoBfs(StartNId, FollowOut, FollowIn, -1, TInt::Mx);
00180   PNGraph Tree = TNGraph::New();
00181   BFS.NIdDistH.SortByDat();
00182   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00183     const int NId = BFS.NIdDistH.GetKey(i);
00184     const int Dist = BFS.NIdDistH[i];
00185     typename PGraph::TObj::TNodeI NI = Graph->GetNI(NId);
00186     if (!Tree->IsNode(NId)) {
00187       Tree->AddNode(NId);
00188     }
00189     if (FollowOut) {
00190       for (int e = 0; e < NI.GetInDeg(); e++) {
00191         const int Prev = NI.GetInNId(e);
00192         if (Tree->IsNode(Prev) && BFS.NIdDistH.GetDat(Prev)==Dist-1) {
00193           Tree->AddEdge(Prev, NId); }
00194       }
00195     }
00196     if (FollowIn) {
00197       for (int e = 0; e < NI.GetOutDeg(); e++) {
00198         const int Prev = NI.GetOutNId(e);
00199         if (Tree->IsNode(Prev) && BFS.NIdDistH.GetDat(Prev)==Dist-1) {
00200           Tree->AddEdge(Prev, NId); }
00201       }
00202     }
00203   }
00204   return Tree;
00205 }
00206 
00207 template <class PGraph>
00208 int GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn, int& TreeSz, int& TreeDepth) {
00209   TBreathFS<PGraph> BFS(Graph);
00210   BFS.DoBfs(StartNId, FollowOut, FollowIn, -1, TInt::Mx);
00211   TreeSz = BFS.NIdDistH.Len();
00212   TreeDepth = 0;
00213   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00214     TreeDepth = TMath::Mx(TreeDepth, BFS.NIdDistH[i].Val);
00215   }
00216   return TreeSz;
00217 }
00218 
00219 template <class PGraph>
00220 int GetNodesAtHop(const PGraph& Graph, const int& StartNId, const int& Hop, TIntV& NIdV, const bool& IsDir) {
00221   TBreathFS<PGraph> BFS(Graph);
00222   BFS.DoBfs(StartNId, true, !IsDir, -1, Hop);
00223   NIdV.Clr(false);
00224   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00225     if (BFS.NIdDistH[i] == Hop) {
00226       NIdV.Add(BFS.NIdDistH.GetKey(i)); }
00227   }
00228   return NIdV.Len();
00229 }
00230 
00231 template <class PGraph>
00232 int GetNodesAtHops(const PGraph& Graph, const int& StartNId, TIntPrV& HopCntV, const bool& IsDir) {
00233   TBreathFS<PGraph> BFS(Graph);
00234   BFS.DoBfs(StartNId, true, !IsDir, -1, TInt::Mx);
00235   TIntH HopCntH;
00236   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00237     HopCntH.AddDat(BFS.NIdDistH[i]) += 1;
00238   }
00239   HopCntH.GetKeyDatPrV(HopCntV);
00240   HopCntV.Sort();
00241   return HopCntV.Len();
00242 }
00243 
00244 template <class PGraph>
00245 int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir, const int& MaxDist) {
00246   TBreathFS<PGraph> BFS(Graph);
00247   BFS.DoBfs(SrcNId, true, ! IsDir, -1, MaxDist);
00248   NIdToDistH.Clr();
00249   NIdToDistH.Swap(BFS.NIdDistH);
00250   return NIdToDistH[NIdToDistH.Len()-1];
00251 }
00252 
00253 template <class PGraph> 
00254 int GetShortPath(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir) {
00255   TBreathFS<PGraph> BFS(Graph);
00256   BFS.DoBfs(SrcNId, true, ! IsDir, DstNId, TInt::Mx);
00257   return BFS.GetHops(SrcNId, DstNId);
00258 }
00259 
00260 template <class PGraph>
00261 int GetBfsFullDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir) {
00262   int FullDiam;
00263   double EffDiam;
00264   GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam);
00265   return FullDiam;
00266 }
00267 
00268 template <class PGraph>
00269 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir) {
00270   int FullDiam;
00271   double EffDiam;
00272   GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam);
00273   return EffDiam;
00274 }
00275 
00276 template <class PGraph>
00277 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam) {
00278   double AvgDiam;
00279   EffDiam = -1;  FullDiam = -1;
00280   return GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam, AvgDiam);
00281 }
00282 
00283 template <class PGraph>
00284 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam, double& AvgDiam) {
00285   EffDiam = -1;  FullDiam = -1;  AvgDiam = -1;
00286   TIntFltH DistToCntH;
00287   TBreathFS<PGraph> BFS(Graph);
00288   // shotest paths
00289   TIntV NodeIdV;
00290   Graph->GetNIdV(NodeIdV);  NodeIdV.Shuffle(TInt::Rnd);
00291   for (int tries = 0; tries < TMath::Mn(NTestNodes, Graph->GetNodes()); tries++) {
00292     const int NId = NodeIdV[tries];
00293     BFS.DoBfs(NId, true, ! IsDir, -1, TInt::Mx);
00294     for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00295       DistToCntH.AddDat(BFS.NIdDistH[i]) += 1; }
00296   }
00297   TIntFltKdV DistNbrsPdfV;
00298   double SumPathL=0, PathCnt=0;
00299   for (int i = 0; i < DistToCntH.Len(); i++) {
00300     DistNbrsPdfV.Add(TIntFltKd(DistToCntH.GetKey(i), DistToCntH[i]));
00301     SumPathL += DistToCntH.GetKey(i) * DistToCntH[i];
00302     PathCnt += DistToCntH[i];
00303   }
00304   DistNbrsPdfV.Sort();
00305   EffDiam = TSnap::TSnapDetail::CalcEffDiamPdf(DistNbrsPdfV, 0.9); // effective diameter (90-th percentile)
00306   FullDiam = DistNbrsPdfV.Last().Key;                // approximate full diameter (max shortest path length over the sampled nodes)
00307   AvgDiam = SumPathL/PathCnt;                        // average shortest path length
00308   return EffDiam;
00309 }
00310 
00311 template <class PGraph>
00312 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const TIntV& SubGraphNIdV, const bool& IsDir, double& EffDiam, int& FullDiam) {
00313   EffDiam = -1;
00314   FullDiam = -1;
00315   TIntFltH DistToCntH;
00316   TBreathFS<PGraph> BFS(Graph);
00317   // shotest paths
00318   TIntV NodeIdV(SubGraphNIdV);  NodeIdV.Shuffle(TInt::Rnd);
00319   TInt Dist;
00320   for (int tries = 0; tries < TMath::Mn(NTestNodes, Graph->GetNodes()); tries++) {
00321     const int NId = NodeIdV[tries];
00322     BFS.DoBfs(NId, true, ! IsDir, -1, TInt::Mx);
00323     for (int i = 0; i < SubGraphNIdV.Len(); i++) {
00324       if (BFS.NIdDistH.IsKeyGetDat(SubGraphNIdV[i], Dist)) {
00325         DistToCntH.AddDat(Dist) += 1; }
00326     }
00327   }
00328   TIntFltKdV DistNbrsPdfV;
00329   for (int i = 0; i < DistToCntH.Len(); i++) {
00330     DistNbrsPdfV.Add(TIntFltKd(DistToCntH.GetKey(i), DistToCntH[i]));
00331   }
00332   DistNbrsPdfV.Sort();
00333   EffDiam = TSnap::TSnapDetail::CalcEffDiamPdf(DistNbrsPdfV, 0.9);  // effective diameter (90-th percentile)
00334   FullDiam = DistNbrsPdfV.Last().Key;                 // approximate full diameter (max shortest path length over the sampled nodes)
00335   return EffDiam;                                     // average shortest path length
00336 }
00337 
00338 } // namespace TSnap