SNAP Library, Developer Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
bfsdfs.h
Go to the documentation of this file.
00001 namespace TSnap {
00002 
00004 // BFS and DFS
00008 template <class PGraph> PNGraph GetBfsTree(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn);
00010 template <class PGraph> int GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn, int& TreeSz, int& TreeDepth);
00013 template <class PGraph> int GetNodesAtHop(const PGraph& Graph, const int& StartNId, const int& Hop, TIntV& NIdV, const bool& IsDir=false);
00016 template <class PGraph> int GetNodesAtHops(const PGraph& Graph, const int& StartNId, TIntPrV& HopCntV, const bool& IsDir=false);
00017 
00019 // Shortest paths
00022 template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir=false);
00027 template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir=false, const int& MaxDist=TInt::Mx);
00028 
00030 // Diameter
00031 
00034 template <class PGraph> int GetBfsFullDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir=false);
00037 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir=false);
00040 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam);
00043 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam, double& AvgDiam);
00046 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const TIntV& SubGraphNIdV, const bool& IsDir, double& EffDiam, int& FullDiam);
00047 
00048 // TODO: Implement in the future
00049 //template <class PGraph> int GetRangeDist(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir=false);
00050 //template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir=false, const int& MaxDist=1000);
00051 //template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, const TIntSet& TargetSet, const bool& IsDir, TIntV& PathNIdV);
00052 //template <class PGraph> int GetShortPath(TIntH& NIdPrnH, TCcQueue<int>& NIdQ, const PGraph& Graph, const int& SrcNId, const TIntSet& TargetSet, const bool& IsDir, TIntV& PathNIdV);
00053 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir=false);
00054 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, int& MxDistNId);
00055 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, int& MxDistNId, TCcQueue<int>& NIdQ, TCcQueue<int>& DistQ, TIntSet& VisitedH);
00056 //template <class PGraph> int GetMxGreedyDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir=false);
00057 //template <class PGraph> int GetMxGreedyDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, TCcQueue<int>& NIdQ, TCcQueue<int>& DistQ, TIntSet& VisitedH);
00058 //template <class PGraph> PNGraph GetShortPathsSubGraph(const PGraph& Graph, const TIntV& SubGraphNIdV);
00059 //template <class PGraph> PGraph GetWccPathsSubGraph(const PGraph& Graph, const TIntV& NIdV);
00060 //template <class PGraph> void GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOutEdges, int& TreeSz, int& TreeDepth);
00061 
00062 } // namespace TSnap
00063 
00067 template<class PGraph>
00068 class TBreathFS {
00069 public:
00070   PGraph Graph;
00071   TSnapQueue<int> Queue;
00072   TInt StartNId;
00073   TIntH NIdDistH;
00074 public:
00075   TBreathFS(const PGraph& GraphPt, const bool& InitBigQ=true) :
00076     Graph(GraphPt), Queue(InitBigQ?Graph->GetNodes():1024), NIdDistH(InitBigQ?Graph->GetNodes():1024) { }
00078   void SetGraph(const PGraph& GraphPt);
00080   int DoBfs(const int& StartNode, const bool& FollowOut, const bool& FollowIn, const int& TargetNId=-1, const int& MxDist=TInt::Mx);
00082   int GetNVisited() const { return NIdDistH.Len(); }
00084   void GetVisitedNIdV(TIntV& NIdV) const { NIdDistH.GetKeyV(NIdV); }
00087   int GetHops(const int& SrcNId, const int& DstNId) const;
00090   int GetRndPath(const int& SrcNId, const int& DstNId, TIntV& PathNIdV) const;
00091 };
00092 
00093 template<class PGraph>
00094 void TBreathFS<PGraph>::SetGraph(const PGraph& GraphPt) {
00095   Graph=GraphPt;
00096   const int N=GraphPt->GetNodes();
00097   if (Queue.Reserved() < N) { Queue.Gen(N); }
00098   if (NIdDistH.GetReservedKeyIds() < N) { NIdDistH.Gen(N); }
00099 }
00100 
00101 template<class PGraph>
00102 int TBreathFS<PGraph>::DoBfs(const int& StartNode, const bool& FollowOut, const bool& FollowIn, const int& TargetNId, const int& MxDist) {
00103   StartNId = StartNode;
00104   IAssert(Graph->IsNode(StartNId));
00105   NIdDistH.Clr(false);  NIdDistH.AddDat(StartNId, 0);
00106   Queue.Clr(false);  Queue.Push(StartNId);
00107   int v, MaxDist = 0;
00108   while (! Queue.Empty()) {
00109     const int NId = Queue.Top();  Queue.Pop();
00110     const int Dist = NIdDistH.GetDat(NId);
00111     if (Dist == MxDist) { break; } // max distance limit reached
00112     const typename PGraph::TObj::TNodeI NodeI = Graph->GetNI(NId);
00113     if (FollowOut) { // out-links
00114       for (v = 0; v < NodeI.GetOutDeg(); v++) {  // out-links
00115         const int DstNId = NodeI.GetOutNId(v);
00116         if (! NIdDistH.IsKey(DstNId)) {
00117           NIdDistH.AddDat(DstNId, Dist+1);
00118           MaxDist = TMath::Mx(MaxDist, Dist+1);
00119           if (DstNId == TargetNId) { return MaxDist; }
00120           Queue.Push(DstNId);
00121         }
00122       }
00123     }
00124     if (FollowIn) { // in-links
00125       for (v = 0; v < NodeI.GetInDeg(); v++) {
00126         const int DstNId = NodeI.GetInNId(v);
00127         if (! NIdDistH.IsKey(DstNId)) {
00128           NIdDistH.AddDat(DstNId, Dist+1);
00129           MaxDist = TMath::Mx(MaxDist, Dist+1);
00130           if (DstNId == TargetNId) { return MaxDist; }
00131           Queue.Push(DstNId);
00132         }
00133       }
00134     }
00135   }
00136   return MaxDist;
00137 }
00138 
00139 template<class PGraph>
00140 int TBreathFS<PGraph>::GetHops(const int& SrcNId, const int& DstNId) const {
00141   TInt Dist;
00142   if (SrcNId!=StartNId) { return -1; }
00143   if (! NIdDistH.IsKeyGetDat(DstNId, Dist)) { return -1; }
00144   return Dist.Val;
00145 }
00146 
00147 template<class PGraph>
00148 int TBreathFS<PGraph>::GetRndPath(const int& SrcNId, const int& DstNId, TIntV& PathNIdV) const {
00149   PathNIdV.Clr(false);
00150   if (SrcNId!=StartNId || ! NIdDistH.IsKey(DstNId)) { return -1; }
00151   PathNIdV.Add(DstNId);
00152   TIntV CloserNIdV;
00153   int CurNId = DstNId;
00154   TInt CurDist, NextDist;
00155   while (CurNId != SrcNId) {
00156     typename PGraph::TObj::TNodeI NI = Graph->GetNI(CurNId);
00157     IAssert(NIdDistH.IsKeyGetDat(CurNId, CurDist));
00158     CloserNIdV.Clr(false);
00159     for (int e = 0; e < NI.GetDeg(); e++) {
00160       const int Next = NI.GetNbrNId(e);
00161       IAssert(NIdDistH.IsKeyGetDat(Next, NextDist));
00162       if (NextDist == CurDist-1) { CloserNIdV.Add(Next); }
00163     }
00164     IAssert(! CloserNIdV.Empty());
00165     CurNId = CloserNIdV[TInt::Rnd.GetUniDevInt(CloserNIdV.Len())];
00166     PathNIdV.Add(CurNId);
00167   }
00168   PathNIdV.Reverse();
00169   return PathNIdV.Len()-1;
00170 }
00171 
00173 // Implementation
00174 namespace TSnap {
00175 
00176 template <class PGraph>
00177 PNGraph GetBfsTree(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn) {
00178   TBreathFS<PGraph> BFS(Graph, false);
00179   BFS.DoBfs(StartNId, FollowOut, FollowIn, -1, TInt::Mx);
00180   PNGraph Tree = TNGraph::New();
00181   BFS.NIdDistH.SortByDat();
00182   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00183     const int NId = BFS.NIdDistH.GetKey(i);
00184     const int Dist = BFS.NIdDistH[i];
00185     typename PGraph::TObj::TNodeI NI = Graph->GetNI(NId);
00186     Tree->AddNode(NId);
00187     if (FollowOut) {
00188       for (int e = 0; e < NI.GetInDeg(); e++) {
00189         const int Prev = NI.GetInNId(e);
00190         if (Tree->IsNode(Prev) && BFS.NIdDistH.GetDat(Prev)==Dist-1) {
00191           Tree->AddEdge(Prev, NId); }
00192       }
00193     }
00194     if (FollowIn) {
00195       for (int e = 0; e < NI.GetOutDeg(); e++) {
00196         const int Prev = NI.GetOutNId(e);
00197         if (Tree->IsNode(Prev) && BFS.NIdDistH.GetDat(Prev)==Dist-1) {
00198           Tree->AddEdge(Prev, NId); }
00199       }
00200     }
00201   }
00202   return Tree;
00203 }
00204 
00205 template <class PGraph>
00206 int GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn, int& TreeSz, int& TreeDepth) {
00207   TBreathFS<PGraph> BFS(Graph);
00208   BFS.DoBfs(StartNId, FollowOut, FollowIn, -1, TInt::Mx);
00209   TreeSz = BFS.NIdDistH.Len();
00210   TreeDepth = 0;
00211   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00212     TreeDepth = TMath::Mx(TreeDepth, BFS.NIdDistH[i].Val);
00213   }
00214   return TreeSz;
00215 }
00216 
00217 template <class PGraph>
00218 int GetNodesAtHop(const PGraph& Graph, const int& StartNId, const int& Hop, TIntV& NIdV, const bool& IsDir) {
00219   TBreathFS<PGraph> BFS(Graph);
00220   BFS.DoBfs(StartNId, true, !IsDir, -1, Hop);
00221   NIdV.Clr(false);
00222   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00223     if (BFS.NIdDistH[i] == Hop) {
00224       NIdV.Add(BFS.NIdDistH.GetKey(i)); }
00225   }
00226   return NIdV.Len();
00227 }
00228 
00229 template <class PGraph>
00230 int GetNodesAtHops(const PGraph& Graph, const int& StartNId, TIntPrV& HopCntV, const bool& IsDir) {
00231   TBreathFS<PGraph> BFS(Graph);
00232   BFS.DoBfs(StartNId, true, !IsDir, -1, TInt::Mx);
00233   TIntH HopCntH;
00234   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00235     HopCntH.AddDat(BFS.NIdDistH[i]) += 1;
00236   }
00237   HopCntH.GetKeyDatPrV(HopCntV);
00238   HopCntV.Sort();
00239   return HopCntV.Len();
00240 }
00241 
00242 template <class PGraph>
00243 int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir, const int& MaxDist) {
00244   TBreathFS<PGraph> BFS(Graph);
00245   BFS.DoBfs(SrcNId, true, ! IsDir, -1, MaxDist);
00246   NIdToDistH.Clr();
00247   NIdToDistH.Swap(BFS.NIdDistH);
00248   return NIdToDistH[NIdToDistH.Len()-1];
00249 }
00250 
00251 template <class PGraph> 
00252 int GetShortPath(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir) {
00253   TBreathFS<PGraph> BFS(Graph);
00254   BFS.DoBfs(SrcNId, true, ! IsDir, DstNId, TInt::Mx);
00255   return BFS.GetHops(SrcNId, DstNId);
00256 }
00257 
00258 template <class PGraph>
00259 int GetBfsFullDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir) {
00260   int FullDiam;
00261   double EffDiam;
00262   GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam);
00263   return FullDiam;
00264 }
00265 
00266 template <class PGraph>
00267 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir) {
00268   int FullDiam;
00269   double EffDiam;
00270   GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam);
00271   return EffDiam;
00272 }
00273 
00274 template <class PGraph>
00275 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam) {
00276   double AvgDiam;
00277   EffDiam = -1;  FullDiam = -1;
00278   return GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam, AvgDiam);
00279 }
00280 
00281 template <class PGraph>
00282 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam, double& AvgDiam) {
00283   EffDiam = -1;  FullDiam = -1;  AvgDiam = -1;
00284   TIntFltH DistToCntH;
00285   TBreathFS<PGraph> BFS(Graph);
00286   // shotest paths
00287   TIntV NodeIdV;
00288   Graph->GetNIdV(NodeIdV);  NodeIdV.Shuffle(TInt::Rnd);
00289   for (int tries = 0; tries < TMath::Mn(NTestNodes, Graph->GetNodes()); tries++) {
00290     const int NId = NodeIdV[tries];
00291     BFS.DoBfs(NId, true, ! IsDir, -1, TInt::Mx);
00292     for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00293       DistToCntH.AddDat(BFS.NIdDistH[i]) += 1; }
00294   }
00295   TIntFltKdV DistNbrsPdfV;
00296   double SumPathL=0, PathCnt=0;
00297   for (int i = 0; i < DistToCntH.Len(); i++) {
00298     DistNbrsPdfV.Add(TIntFltKd(DistToCntH.GetKey(i), DistToCntH[i]));
00299     SumPathL += DistToCntH.GetKey(i) * DistToCntH[i];
00300     PathCnt += DistToCntH[i];
00301   }
00302   DistNbrsPdfV.Sort();
00303   EffDiam = TSnap::TSnapDetail::CalcEffDiamPdf(DistNbrsPdfV, 0.9); // effective diameter (90-th percentile)
00304   FullDiam = DistNbrsPdfV.Last().Key;                // approximate full diameter (max shortest path length over the sampled nodes)
00305   AvgDiam = SumPathL/PathCnt;                        // average shortest path length
00306   return EffDiam;
00307 }
00308 
00309 template <class PGraph>
00310 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const TIntV& SubGraphNIdV, const bool& IsDir, double& EffDiam, int& FullDiam) {
00311   EffDiam = -1;
00312   FullDiam = -1;
00313   TIntFltH DistToCntH;
00314   TBreathFS<PGraph> BFS(Graph);
00315   // shotest paths
00316   TIntV NodeIdV(SubGraphNIdV);  NodeIdV.Shuffle(TInt::Rnd);
00317   TInt Dist;
00318   for (int tries = 0; tries < TMath::Mn(NTestNodes, Graph->GetNodes()); tries++) {
00319     const int NId = NodeIdV[tries];
00320     BFS.DoBfs(NId, true, ! IsDir, -1, TInt::Mx);
00321     for (int i = 0; i < SubGraphNIdV.Len(); i++) {
00322       if (BFS.NIdDistH.IsKeyGetDat(SubGraphNIdV[i], Dist)) {
00323         DistToCntH.AddDat(Dist) += 1; }
00324     }
00325   }
00326   TIntFltKdV DistNbrsPdfV;
00327   for (int i = 0; i < DistToCntH.Len(); i++) {
00328     DistNbrsPdfV.Add(TIntFltKd(DistToCntH.GetKey(i), DistToCntH[i]));
00329   }
00330   DistNbrsPdfV.Sort();
00331   EffDiam = TSnap::TSnapDetail::CalcEffDiamPdf(DistNbrsPdfV, 0.9);  // effective diameter (90-th percentile)
00332   FullDiam = DistNbrsPdfV.Last().Key;                 // approximate full diameter (max shortest path length over the sampled nodes)
00333   return EffDiam;                                     // average shortest path length
00334 }
00335 
00336 } // namespace TSnap