SNAP Library, User Reference  2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
bfsdfs.h
Go to the documentation of this file.
00001 namespace TSnap {
00002 
00004 // BFS and DFS
00008 template <class PGraph> PNGraph GetBfsTree(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn);
00010 template <class PGraph> int GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn, int& TreeSz, int& TreeDepth);
00013 template <class PGraph> int GetNodesAtHop(const PGraph& Graph, const int& StartNId, const int& Hop, TIntV& NIdV, const bool& IsDir=false);
00016 template <class PGraph> int GetNodesAtHops(const PGraph& Graph, const int& StartNId, TIntPrV& HopCntV, const bool& IsDir=false);
00017 
00019 // Shortest paths
00022 template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir=false);
00027 template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir=false, const int& MaxDist=TInt::Mx);
00028 
00030 // Diameter
00031 
00034 template <class PGraph> int GetBfsFullDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir=false);
00037 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir=false);
00040 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam);
00043 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam, double& AvgDiam);
00046 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const TIntV& SubGraphNIdV, const bool& IsDir, double& EffDiam, int& FullDiam);
00047 
00048 // TODO: Implement in the future
00049 //template <class PGraph> int GetRangeDist(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir=false);
00050 //template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir=false, const int& MaxDist=1000);
00051 //template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, const TIntSet& TargetSet, const bool& IsDir, TIntV& PathNIdV);
00052 //template <class PGraph> int GetShortPath(TIntH& NIdPrnH, TCcQueue<int>& NIdQ, const PGraph& Graph, const int& SrcNId, const TIntSet& TargetSet, const bool& IsDir, TIntV& PathNIdV);
00053 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir=false);
00054 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, int& MxDistNId);
00055 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, int& MxDistNId, TCcQueue<int>& NIdQ, TCcQueue<int>& DistQ, TIntSet& VisitedH);
00056 //template <class PGraph> int GetMxGreedyDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir=false);
00057 //template <class PGraph> int GetMxGreedyDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, TCcQueue<int>& NIdQ, TCcQueue<int>& DistQ, TIntSet& VisitedH);
00058 //template <class PGraph> PNGraph GetShortPathsSubGraph(const PGraph& Graph, const TIntV& SubGraphNIdV);
00059 //template <class PGraph> PGraph GetWccPathsSubGraph(const PGraph& Graph, const TIntV& NIdV);
00060 //template <class PGraph> void GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOutEdges, int& TreeSz, int& TreeDepth);
00061 
00062 } // namespace TSnap
00063 
00067 template<class PGraph>
00068 class TBreathFS {
00069 public:
00070   PGraph Graph;
00071   TSnapQueue<int> Queue;
00072   TInt StartNId;
00073   TIntH NIdDistH;
00074 public:
00075   TBreathFS(const PGraph& GraphPt, const bool& InitBigQ=true) :
00076     Graph(GraphPt), Queue(InitBigQ?Graph->GetNodes():1024), NIdDistH(InitBigQ?Graph->GetNodes():1024) { }
00078   void SetGraph(const PGraph& GraphPt);
00080   int DoBfs(const int& StartNode, const bool& FollowOut, const bool& FollowIn, const int& TargetNId=-1, const int& MxDist=TInt::Mx);
00082   int GetNVisited() const { return NIdDistH.Len(); }
00084   void GetVisitedNIdV(TIntV& NIdV) const { NIdDistH.GetKeyV(NIdV); }
00087   int GetHops(const int& SrcNId, const int& DstNId) const;
00090   int GetRndPath(const int& SrcNId, const int& DstNId, TIntV& PathNIdV) const;
00091 };
00092 
00093 template<class PGraph>
00094 void TBreathFS<PGraph>::SetGraph(const PGraph& GraphPt) {
00095   Graph=GraphPt;
00096   const int N=GraphPt->GetNodes();
00097   if (Queue.Reserved() < N) { Queue.Gen(N); }
00098   if (NIdDistH.GetReservedKeyIds() < N) { NIdDistH.Gen(N); }
00099 }
00100 
00101 template<class PGraph>
00102 int TBreathFS<PGraph>::DoBfs(const int& StartNode, const bool& FollowOut, const bool& FollowIn, const int& TargetNId, const int& MxDist) {
00103   StartNId = StartNode;
00104   IAssert(Graph->IsNode(StartNId));
00105   NIdDistH.Clr(false);  NIdDistH.AddDat(StartNId, 0);
00106   Queue.Clr(false);  Queue.Push(StartNId);
00107   int v, MaxDist = 0;
00108   while (! Queue.Empty()) {
00109     const int NId = Queue.Top();  Queue.Pop();
00110     const int Dist = NIdDistH.GetDat(NId);
00111     if (Dist == MxDist) { break; } // max distance limit reached
00112     const typename PGraph::TObj::TNodeI NodeI = Graph->GetNI(NId);
00113     if (FollowOut) { // out-links
00114       for (v = 0; v < NodeI.GetOutDeg(); v++) {  // out-links
00115         const int DstNId = NodeI.GetOutNId(v);
00116         if (! NIdDistH.IsKey(DstNId)) {
00117           NIdDistH.AddDat(DstNId, Dist+1);
00118           MaxDist = TMath::Mx(MaxDist, Dist+1);
00119           if (DstNId == TargetNId) { return MaxDist; }
00120           Queue.Push(DstNId);
00121         }
00122       }
00123     }
00124     if (FollowIn) { // in-links
00125       for (v = 0; v < NodeI.GetInDeg(); v++) {
00126         const int DstNId = NodeI.GetInNId(v);
00127         if (! NIdDistH.IsKey(DstNId)) {
00128           NIdDistH.AddDat(DstNId, Dist+1);
00129           MaxDist = TMath::Mx(MaxDist, Dist+1);
00130           if (DstNId == TargetNId) { return MaxDist; }
00131           Queue.Push(DstNId);
00132         }
00133       }
00134     }
00135   }
00136   return MaxDist;
00137 }
00138 
00139 template<class PGraph>
00140 int TBreathFS<PGraph>::GetHops(const int& SrcNId, const int& DstNId) const {
00141   TInt Dist;
00142   if (SrcNId!=StartNId) { return -1; }
00143   if (! NIdDistH.IsKeyGetDat(DstNId, Dist)) { return -1; }
00144   return Dist.Val;
00145 }
00146 
00147 template<class PGraph>
00148 int TBreathFS<PGraph>::GetRndPath(const int& SrcNId, const int& DstNId, TIntV& PathNIdV) const {
00149   PathNIdV.Clr(false);
00150   if (SrcNId!=StartNId || ! NIdDistH.IsKey(DstNId)) { return -1; }
00151   PathNIdV.Add(DstNId);
00152   TIntV CloserNIdV;
00153   int CurNId = DstNId;
00154   TInt CurDist, NextDist;
00155   while (CurNId != SrcNId) {
00156     typename PGraph::TObj::TNodeI NI = Graph->GetNI(CurNId);
00157     IAssert(NIdDistH.IsKeyGetDat(CurNId, CurDist));
00158     CloserNIdV.Clr(false);
00159     for (int e = 0; e < NI.GetDeg(); e++) {
00160       const int Next = NI.GetNbrNId(e);
00161       IAssert(NIdDistH.IsKeyGetDat(Next, NextDist));
00162       if (NextDist == CurDist-1) { CloserNIdV.Add(Next); }
00163     }
00164     IAssert(! CloserNIdV.Empty());
00165     CurNId = CloserNIdV[TInt::Rnd.GetUniDevInt(CloserNIdV.Len())];
00166     PathNIdV.Add(CurNId);
00167   }
00168   PathNIdV.Reverse();
00169   return PathNIdV.Len()-1;
00170 }
00171 
00173 // Implementation
00174 namespace TSnap {
00175 
00176 template <class PGraph>
00177 PNGraph GetBfsTree(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn) {
00178   TBreathFS<PGraph> BFS(Graph, false);
00179   BFS.DoBfs(StartNId, FollowOut, FollowIn, -1, TInt::Mx);
00180   PNGraph Tree = TNGraph::New();
00181   BFS.NIdDistH.SortByDat();
00182   Tree->AddNode(StartNId);
00183   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00184     const int NId = BFS.NIdDistH.GetKey(i);
00185     const int Dist = BFS.NIdDistH[i];
00186     typename PGraph::TObj::TNodeI NI = Graph->GetNI(NId);
00187     Tree->AddNode(NId);
00188     if (FollowOut) {
00189       for (int e = 0; e < NI.GetInDeg(); e++) {
00190         const int Prev = NI.GetInNId(e);
00191         if (Tree->IsNode(Prev) && BFS.NIdDistH.GetDat(Prev)==Dist-1) {
00192           Tree->AddEdge(Prev, NId); }
00193       }
00194     }
00195     if (FollowIn) {
00196       for (int e = 0; e < NI.GetOutDeg(); e++) {
00197         const int Prev = NI.GetOutNId(e);
00198         if (Tree->IsNode(Prev) && BFS.NIdDistH.GetDat(Prev)==Dist-1) {
00199           Tree->AddEdge(Prev, NId); }
00200       }
00201     }
00202   }
00203   return Tree;
00204 }
00205 
00206 template <class PGraph>
00207 int GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn, int& TreeSz, int& TreeDepth) {
00208   TBreathFS<PGraph> BFS(Graph);
00209   BFS.DoBfs(StartNId, FollowOut, FollowIn, -1, TInt::Mx);
00210   TreeSz = BFS.NIdDistH.Len();
00211   TreeDepth = 0;
00212   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00213     TreeDepth = TMath::Mx(TreeDepth, BFS.NIdDistH[i].Val);
00214   }
00215   return TreeSz;
00216 }
00217 
00218 template <class PGraph>
00219 int GetNodesAtHop(const PGraph& Graph, const int& StartNId, const int& Hop, TIntV& NIdV, const bool& IsDir) {
00220   TBreathFS<PGraph> BFS(Graph);
00221   BFS.DoBfs(StartNId, true, !IsDir, -1, Hop);
00222   NIdV.Clr(false);
00223   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00224     if (BFS.NIdDistH[i] == Hop) {
00225       NIdV.Add(BFS.NIdDistH.GetKey(i)); }
00226   }
00227   return NIdV.Len();
00228 }
00229 
00230 template <class PGraph>
00231 int GetNodesAtHops(const PGraph& Graph, const int& StartNId, TIntPrV& HopCntV, const bool& IsDir) {
00232   TBreathFS<PGraph> BFS(Graph);
00233   BFS.DoBfs(StartNId, true, !IsDir, -1, TInt::Mx);
00234   TIntH HopCntH;
00235   for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00236     HopCntH.AddDat(BFS.NIdDistH[i]) += 1;
00237   }
00238   HopCntH.GetKeyDatPrV(HopCntV);
00239   HopCntV.Sort();
00240   return HopCntV.Len();
00241 }
00242 
00243 template <class PGraph>
00244 int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir, const int& MaxDist) {
00245   TBreathFS<PGraph> BFS(Graph);
00246   BFS.DoBfs(SrcNId, true, ! IsDir, -1, MaxDist);
00247   NIdToDistH.Clr();
00248   NIdToDistH.Swap(BFS.NIdDistH);
00249   return NIdToDistH[NIdToDistH.Len()-1];
00250 }
00251 
00252 template <class PGraph> 
00253 int GetShortPath(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir) {
00254   TBreathFS<PGraph> BFS(Graph);
00255   BFS.DoBfs(SrcNId, true, ! IsDir, DstNId, TInt::Mx);
00256   return BFS.GetHops(SrcNId, DstNId);
00257 }
00258 
00259 template <class PGraph>
00260 int GetBfsFullDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir) {
00261   int FullDiam;
00262   double EffDiam;
00263   GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam);
00264   return FullDiam;
00265 }
00266 
00267 template <class PGraph>
00268 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir) {
00269   int FullDiam;
00270   double EffDiam;
00271   GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam);
00272   return EffDiam;
00273 }
00274 
00275 template <class PGraph>
00276 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam) {
00277   double AvgDiam;
00278   EffDiam = -1;  FullDiam = -1;
00279   return GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam, AvgDiam);
00280 }
00281 
00282 template <class PGraph>
00283 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam, double& AvgDiam) {
00284   EffDiam = -1;  FullDiam = -1;  AvgDiam = -1;
00285   TIntFltH DistToCntH;
00286   TBreathFS<PGraph> BFS(Graph);
00287   // shotest paths
00288   TIntV NodeIdV;
00289   Graph->GetNIdV(NodeIdV);  NodeIdV.Shuffle(TInt::Rnd);
00290   for (int tries = 0; tries < TMath::Mn(NTestNodes, Graph->GetNodes()); tries++) {
00291     const int NId = NodeIdV[tries];
00292     BFS.DoBfs(NId, true, ! IsDir, -1, TInt::Mx);
00293     for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
00294       DistToCntH.AddDat(BFS.NIdDistH[i]) += 1; }
00295   }
00296   TIntFltKdV DistNbrsPdfV;
00297   double SumPathL=0, PathCnt=0;
00298   for (int i = 0; i < DistToCntH.Len(); i++) {
00299     DistNbrsPdfV.Add(TIntFltKd(DistToCntH.GetKey(i), DistToCntH[i]));
00300     SumPathL += DistToCntH.GetKey(i) * DistToCntH[i];
00301     PathCnt += DistToCntH[i];
00302   }
00303   DistNbrsPdfV.Sort();
00304   EffDiam = TSnap::TSnapDetail::CalcEffDiamPdf(DistNbrsPdfV, 0.9); // effective diameter (90-th percentile)
00305   FullDiam = DistNbrsPdfV.Last().Key;                // approximate full diameter (max shortest path length over the sampled nodes)
00306   AvgDiam = SumPathL/PathCnt;                        // average shortest path length
00307   return EffDiam;
00308 }
00309 
00310 template <class PGraph>
00311 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const TIntV& SubGraphNIdV, const bool& IsDir, double& EffDiam, int& FullDiam) {
00312   EffDiam = -1;
00313   FullDiam = -1;
00314   TIntFltH DistToCntH;
00315   TBreathFS<PGraph> BFS(Graph);
00316   // shotest paths
00317   TIntV NodeIdV(SubGraphNIdV);  NodeIdV.Shuffle(TInt::Rnd);
00318   TInt Dist;
00319   for (int tries = 0; tries < TMath::Mn(NTestNodes, Graph->GetNodes()); tries++) {
00320     const int NId = NodeIdV[tries];
00321     BFS.DoBfs(NId, true, ! IsDir, -1, TInt::Mx);
00322     for (int i = 0; i < SubGraphNIdV.Len(); i++) {
00323       if (BFS.NIdDistH.IsKeyGetDat(SubGraphNIdV[i], Dist)) {
00324         DistToCntH.AddDat(Dist) += 1; }
00325     }
00326   }
00327   TIntFltKdV DistNbrsPdfV;
00328   for (int i = 0; i < DistToCntH.Len(); i++) {
00329     DistNbrsPdfV.Add(TIntFltKd(DistToCntH.GetKey(i), DistToCntH[i]));
00330   }
00331   DistNbrsPdfV.Sort();
00332   EffDiam = TSnap::TSnapDetail::CalcEffDiamPdf(DistNbrsPdfV, 0.9);  // effective diameter (90-th percentile)
00333   FullDiam = DistNbrsPdfV.Last().Key;                 // approximate full diameter (max shortest path length over the sampled nodes)
00334   return EffDiam;                                     // average shortest path length
00335 }
00336 
00337 } // namespace TSnap