SNAP Library, User Reference  2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
anf.h
Go to the documentation of this file.
00001 
00002 // Approximate Neighborhood Function.
00003 namespace TSnap {
00010 template <class PGraph> void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32); 
00017 template <class PGraph> void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32);
00020 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox);
00023 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const int NRuns=1, int NApprox=-1);
00024 } // namespace TSnap
00025 
00032 template <class PGraph>
00033 class TGraphAnf {
00034 private:
00035   typedef TVec<uint64> TAnfBitV;
00036   THash<TInt, uint64> NIdToBitPosH;  // NId to byte(!) offset in BitV
00037   TInt NApprox;                      // maintain N parallel approximations (multiple of 8)
00038   TInt NBits, MoreBits, ApproxBytes; // NBits=logNodes+MoreBits;  MoreBits: additional R bits;  ApproxBytes: Approx/8;
00039   PGraph Graph;
00040   TRnd Rnd;
00041 private:
00042   UndefDefaultCopyAssign(TGraphAnf);
00043 public:
00044   TGraphAnf(const PGraph& GraphPt, const int& Approx=32, const int& moreBits=5, const int& RndSeed=0) :
00045     NApprox(Approx), MoreBits(moreBits), Graph(GraphPt), Rnd(RndSeed) { IAssert(NApprox%8==0);  IAssert(sizeof(uint)==4); }
00046   uint64 GetNIdOffset(const int& NId) const { return NIdToBitPosH.GetDat(NId); }
00047   void InitAnfBits(TAnfBitV& BitV);
00048   void Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const;
00049   double AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const;
00050   double GetCount(const TAnfBitV& BitV, const uint64& NIdOffset) const {
00051     return pow(2.0, AvgLstZero(BitV, NIdOffset)) / 0.77351; }
00057   void GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir);
00063   void GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir);
00064 };
00065 
00066 template <class PGraph>
00067 void TGraphAnf<PGraph>::InitAnfBits(TAnfBitV& BitV) {
00068   const int NNodes = Graph->GetNodes();
00069   const int LogNodes = int(ceil(TMath::Log2(NNodes)));
00070   ApproxBytes = NApprox / 8;
00071   NBits = LogNodes + MoreBits; // bits per node
00072   const int BytesPerNd = ApproxBytes * NBits; // total bytes per node
00073   BitV.Gen((NNodes * BytesPerNd)/sizeof(uint)+1);  IAssert(BitV.BegI() != NULL);
00074   BitV.PutAll(0);
00075   int SetBit = 0;
00076   uint64 NodeOff = 0;
00077   uchar* BitVPt = (uchar *) BitV.BegI();
00078   // for each node: 1st bits of all approximations are at BitV[Offset+0], 2nd bits at BitV[Offset+NApprox/32], ...
00079   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI != Graph->EndNI(); NI++) {
00080     NIdToBitPosH.AddDat(NI.GetId(), NodeOff);
00081     // init vertex bits
00082     for (int approx = 0; approx < NApprox; approx++) {
00083       const int RndNum = Rnd.GetUniDevInt();
00084       for (SetBit = 0; (RndNum & (1<<SetBit))==0 && SetBit < NBits; SetBit++) { }
00085       if (SetBit >= NBits) SetBit = NBits-1;
00086       const int BitPos = ApproxBytes * SetBit + approx / 8;
00087       *(BitVPt + NodeOff + BitPos) |= (1<<(approx % 8)); // magically works better than code below (see anf.c)
00088     }
00089     NodeOff += BytesPerNd;
00090   }
00091 }
00092 
00093 template <class PGraph>
00094 void TGraphAnf<PGraph>::Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const {
00095   uchar* DstI = (uchar *) BitV1.BegI() + NId1Offset;
00096   uchar* SrcI = (uchar *) BitV2.BegI() + NId2Offset;
00097   for (int b=0; b < ApproxBytes*NBits; b++, DstI++, SrcI++) { *DstI |= *SrcI; }
00098 }
00099 
00100 // Average least zero bit position (least significant zero)
00101 template <class PGraph>
00102 double TGraphAnf<PGraph>::AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const { //average least zero bit position (least significant zero)
00103   int approx, bit, AvgBitPos = 0;
00104   uchar* BitVPt = (uchar *) BitV.BegI();
00105   for (approx = 0; approx < NApprox; approx++) {
00106     for (bit = 0; bit < NBits; bit++) {
00107       if ((*(BitVPt+NIdOffset + ApproxBytes*bit + approx/8) & (1<<(approx%8))) == 0) break; } // found zero
00108     if (bit > NBits) bit = NBits;
00109     AvgBitPos += bit;
00110   }
00111   return AvgBitPos/double(NApprox) ;
00112 }
00113 
00114 template <class PGraph>
00115 void TGraphAnf<PGraph>::GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) {
00116   const int NNodes = Graph->GetNodes();
00117   TAnfBitV CurBitsV, LastBitsV;
00118   InitAnfBits(CurBitsV);          IAssert(CurBitsV.BegI() != NULL);
00119   LastBitsV.Gen(CurBitsV.Len());  IAssert(LastBitsV.BegI() != NULL);
00120   DistNbrsV.Clr();
00121   DistNbrsV.Add(TIntFltKd(1, Graph->GetNI(SrcNId).GetOutDeg()));
00122   for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
00123     memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
00124     for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00125       const uint64 NIdOffset = GetNIdOffset(NI.GetId());
00126       for (int e = 0; e < NI.GetOutDeg(); e++) {
00127         const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
00128         Union(CurBitsV, NIdOffset,  LastBitsV, NId2Offset);
00129       }
00130       if (! IsDir) {
00131         for (int e = 0; e < NI.GetInDeg(); e++) {
00132           const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
00133           Union(CurBitsV, NIdOffset,  LastBitsV, NId2Offset);
00134         }
00135       }
00136     }
00137     DistNbrsV.Add(TIntFltKd(dist, GetCount(CurBitsV, GetNIdOffset(SrcNId))));
00138     if (DistNbrsV.Len() > 1 && DistNbrsV.Last().Dat < 1.001*DistNbrsV[DistNbrsV.Len()-2].Dat) break; // 0.1%  change
00139   }
00140 }
00141 
00142 template <class PGraph>
00143 void TGraphAnf<PGraph>::GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) {
00144   TAnfBitV CurBitsV, LastBitsV;
00145   InitAnfBits(CurBitsV);          IAssert(CurBitsV.BegI() != NULL);
00146   LastBitsV.Gen(CurBitsV.Len());  IAssert(LastBitsV.BegI() != NULL);
00147   int v, e;
00148   double NPairs = 0.0;
00149   DistNbrsV.Clr();
00150   DistNbrsV.Add(TIntFltKd(0, Graph->GetNodes()));
00151   DistNbrsV.Add(TIntFltKd(1, Graph->GetEdges()));
00152   //TExeTm ExeTm;
00153   for (int dist = 2; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
00154     //printf("ANF dist %d...", dist);  ExeTm.Tick();
00155     memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
00156     for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00157       const uint64 NIdOffset = GetNIdOffset(NI.GetId());
00158       for (e = 0; e < NI.GetOutDeg(); e++) {
00159         const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
00160         Union(CurBitsV, NIdOffset,  LastBitsV, NId2Offset);
00161       }
00162       if (! IsDir) {
00163         for (e = 0; e < NI.GetInDeg(); e++) {
00164           const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
00165           Union(CurBitsV, NIdOffset,  LastBitsV, NId2Offset);
00166         }
00167       }
00168     }
00169     NPairs = 0.0;
00170     for (v = NIdToBitPosH.FFirstKeyId(); NIdToBitPosH.FNextKeyId(v); ) {
00171       NPairs += GetCount(CurBitsV, NIdToBitPosH[v]);
00172     }
00173     DistNbrsV.Add(TIntFltKd(dist, NPairs));
00174     //printf("pairs: %g  %s\n", NPairs, ExeTm.GetTmStr());
00175     if (NPairs == 0) { break; }
00176     if (DistNbrsV.Len() > 1 && NPairs < 1.001*DistNbrsV.LastLast().Dat) { break; } // 0.1%  change
00177     //TGnuPlot::SaveTs(DistNbrsV, "hops.tab", "HOPS, REACHABLE PAIRS");
00178   }
00179 }
00181 // Approximate Neighborhood Function
00182 namespace TSnap {
00183 
00184 namespace TSnapDetail {
00186 double CalcEffDiam(const TIntFltKdV& DistNbrsCdfV, const double& Percentile=0.9);
00188 double CalcEffDiam(const TFltPrV& DistNbrsCdfV, const double& Percentile=0.9);
00190 double CalcEffDiamPdf(const TIntFltKdV& DistNbrsPdfV, const double& Percentile=0.9);
00192 double CalcEffDiamPdf(const TFltPrV& DistNbrsPdfV, const double& Percentile=0.9);
00194 double CalcAvgDiamPdf(const TIntFltKdV& DistNbrsPdfV);
00196 double CalcAvgDiamPdf(const TFltPrV& DistNbrsPdfV);
00197 } // TSnapDetail
00198 
00199 template <class PGraph>
00200 void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) {
00201   TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
00202   Anf.GetNodeAnf(SrcNId, DistNbrsV, MxDist, IsDir);
00203 }
00204 
00205 template <class PGraph>
00206 void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) {
00207   TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
00208   Anf.GetGraphAnf(DistNbrsV, MxDist, IsDir);
00209 }
00210 
00211 template <class PGraph>
00212 double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox) {
00213   TIntFltKdV DistNbrsV;
00214   TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
00215   Anf.GetGraphAnf(DistNbrsV, -1, IsDir);
00216   return TSnap::TSnapDetail::CalcEffDiam(DistNbrsV, Percentile);
00217 }
00218 
00219 template<class PGraph>
00220 double GetAnfEffDiam(const PGraph& Graph, const int NRuns, int NApprox) {
00221   //return TSnap::GetEffDiam(Graph, IsDir, 0.9, 32);
00222   TMom Mom;
00223   if (NApprox == -1) {
00224     if (Graph->GetNodes() < 100000) { NApprox = 64; }
00225     else if (Graph->GetNodes() < 1000000) { NApprox = 32; }
00226     else { NApprox = 16; }
00227   }
00228   const bool IsDir = false;
00229   for (int r = 0; r < NRuns; r++) {
00230     Mom.Add(TSnap::GetAnfEffDiam(Graph, IsDir, 0.9, NApprox));
00231   }
00232   Mom.Def();
00233   return Mom.GetMean();
00234 }
00235 
00236 template <class PGraph> void TestAnf() {
00237   PGraph Graph = PGraph::TObj::New();
00238   //Graph:
00239   //  0    2 ----> 3
00240   //       ^       |
00241   //       |       |
00242   //       |       ^
00243   //  1    5 <---- 4
00244   for (int v = 0; v < 6; v++) { Graph->AddNode(v); }
00245   Graph->AddEdge(2, 3);
00246   Graph->AddEdge(3, 4);
00247   Graph->AddEdge(4, 5);
00248   Graph->AddEdge(5, 2);
00249   TFltV AnfV;
00250   for (int t = 0; t < 10; t++) {
00251     TGraphAnf<PGraph> Anf(Graph, 128, 5, t+1);
00252     TIntFltKdV DistToNbrsV;
00253     Anf.GetGraphAnf(DistToNbrsV, 5, true);
00254     printf("\n--seed: %d---------------------\n", t+1);
00255     for (int i = 0; i < DistToNbrsV.Len(); i++) {
00256       printf("dist: %d\t hops:%f\n", DistToNbrsV[i].Key(), DistToNbrsV[i].Dat());
00257     }
00258     AnfV.Add(DistToNbrsV.Last().Dat);
00259   }
00260   TMom Mom(AnfV);
00261   printf("-----------\nAvgAnf: %f  StDev:  %f\n", Mom.GetMean(), Mom.GetSDev());//*/
00262   // const int NApprox = 32;
00263   /*printf("\nANF vs. SAMPLE diam test (10 runs of ANF, NApprox=%d):\n", NApprox);
00264   //Graph = TGGen<PGraph>::GenGrid(20, 20);
00265   Graph = TGAlg::GetMxWcc(TGGen<PGraph>::GenRnd(1000, 10000));
00266   TFltV FullAnf, EffAnf;
00267   for (int tryn = 0; tryn < 10; tryn++) {
00268     FullAnf.Add(GetEffDiam(Graph, false, 1.0, NApprox));
00269     EffAnf.Add(GetEffDiam(Graph, false, 0.9, NApprox));
00270   }
00271   TMom FullMom(FullAnf);
00272   TMom AnfMom(EffAnf);
00273   printf("  Sample FullDiam:      %d\n", TGAlg::GetBfsFullDiam(Graph, 100, false));
00274   printf("  Anf    FullDiam:      %f  [%f]\n", FullMom.GetMean(), FullMom.GetSDev());
00275   printf("  Sample EffDiam [90%%]: %f\n", TGAlg::GetBfsEffDiam(Graph, 100, false));
00276   printf("  Anf    EffDiam [90%%]: %f  [%f]\n", AnfMom.GetMean(), AnfMom.GetSDev());
00277   // epinions
00278   printf("\nEpinions graph:\n");
00279   { typedef PNGraph PGraph;
00280   PGraph G = TGGen<PGraph>::GenEpinions();
00281   TIntFltKdV DistToPairsV;
00282   GetAnf(G, DistToPairsV, 50, true);
00283   for(int i = 0; i < DistToPairsV.Len(); i++) {
00284     printf("\t%d\t%f\n", DistToPairsV[i].Key, DistToPairsV[i].Dat); }
00285   printf("\nUndir\n");
00286   TAnf<PGraph>::GetAnf(G, DistToPairsV, 50, false);
00287   for(int j = 0; j < DistToPairsV.Len(); j++) {
00288     printf("\t%d\t%f\n", DistToPairsV[j].Key, DistToPairsV[j].Dat); }
00289   }//*/
00290 }
00291 
00292 } // namespace TSnap