SNAP Library 3.0, Developer Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
anf.h
Go to the documentation of this file.
1 // Approximate Neighborhood Function.
3 namespace TSnap {
10 template <class PGraph> void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32);
17 template <class PGraph> void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32);
20 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox);
23 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const int NRuns=1, int NApprox=-1);
24 } // namespace TSnap
25 
32 template <class PGraph>
33 class TGraphAnf {
34 private:
36  THash<TInt, uint64> NIdToBitPosH; // NId to byte(!) offset in BitV
37  TInt NApprox; // maintain N parallel approximations (multiple of 8)
38  TInt NBits, MoreBits, ApproxBytes; // NBits=logNodes+MoreBits; MoreBits: additional R bits; ApproxBytes: Approx/8;
39  PGraph Graph;
41 private:
43 public:
44  TGraphAnf(const PGraph& GraphPt, const int& Approx=32, const int& moreBits=5, const int& RndSeed=0) :
45  NApprox(Approx), MoreBits(moreBits), Graph(GraphPt), Rnd(RndSeed) { IAssert(NApprox%8==0); IAssert(sizeof(uint)==4); }
46  uint64 GetNIdOffset(const int& NId) const { return NIdToBitPosH.GetDat(NId); }
47  void InitAnfBits(TAnfBitV& BitV);
48  void Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const;
49  double AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const;
50  double GetCount(const TAnfBitV& BitV, const uint64& NIdOffset) const {
51  return pow(2.0, AvgLstZero(BitV, NIdOffset)) / 0.77351; }
57  void GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir);
63  void GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir);
64 };
65 
66 template <class PGraph>
68  const int NNodes = Graph->GetNodes();
69  const int LogNodes = int(ceil(TMath::Log2(NNodes)));
70  ApproxBytes = NApprox / 8;
71  NBits = LogNodes + MoreBits; // bits per node
72  const int BytesPerNd = ApproxBytes * NBits; // total bytes per node
73  int64 VSize = ((static_cast<int64>(NNodes) * static_cast<int64>(BytesPerNd))/sizeof(uint)) + 1;
74  IAssertR(VSize <= TInt::Mx,
75  TStr::Fmt("Your graph is too large for Approximate Neighborhood Function, %s is larger than %d",
76  TUInt64::GetStr(VSize).CStr(),TInt::Mx));
77  printf("size %d\n", static_cast<int>(VSize));
78  BitV.Gen((const int)VSize); IAssert(BitV.BegI() != NULL);
79  BitV.PutAll(0);
80  int SetBit = 0;
81  uint64 NodeOff = 0;
82  uchar* BitVPt = (uchar *) BitV.BegI();
83  // for each node: 1st bits of all approximations are at BitV[Offset+0], 2nd bits at BitV[Offset+NApprox/32], ...
84  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI != Graph->EndNI(); NI++) {
85  NIdToBitPosH.AddDat(NI.GetId(), NodeOff);
86  // init vertex bits
87  for (int approx = 0; approx < NApprox; approx++) {
88  const int RndNum = Rnd.GetUniDevInt();
89  for (SetBit = 0; (RndNum & (1<<SetBit))==0 && SetBit < NBits; SetBit++) { }
90  if (SetBit >= NBits) SetBit = NBits-1;
91  const int BitPos = ApproxBytes * SetBit + approx / 8;
92  *(BitVPt + NodeOff + BitPos) |= (1<<(approx % 8)); // magically works better than code below (see anf.c)
93  }
94  NodeOff += BytesPerNd;
95  }
96 }
97 
98 template <class PGraph>
99 void TGraphAnf<PGraph>::Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const {
100  uchar* DstI = (uchar *) BitV1.BegI() + NId1Offset;
101  uchar* SrcI = (uchar *) BitV2.BegI() + NId2Offset;
102  for (int b=0; b < ApproxBytes*NBits; b++, DstI++, SrcI++) { *DstI |= *SrcI; }
103 }
104 
105 // Average least zero bit position (least significant zero)
106 template <class PGraph>
107 double TGraphAnf<PGraph>::AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const { //average least zero bit position (least significant zero)
108  int approx, bit, AvgBitPos = 0;
109  uchar* BitVPt = (uchar *) BitV.BegI();
110  for (approx = 0; approx < NApprox; approx++) {
111  for (bit = 0; bit < NBits; bit++) {
112  if ((*(BitVPt+NIdOffset + ApproxBytes*bit + approx/8) & (1<<(approx%8))) == 0) break; } // found zero
113  if (bit > NBits) bit = NBits;
114  AvgBitPos += bit;
115  }
116  return AvgBitPos/double(NApprox) ;
117 }
118 
119 template <class PGraph>
120 void TGraphAnf<PGraph>::GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) {
121  //const int NNodes = Graph->GetNodes();
122  TAnfBitV CurBitsV, LastBitsV;
123  InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL);
124  LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL);
125  DistNbrsV.Clr();
126  DistNbrsV.Add(TIntFltKd(1, Graph->GetNI(SrcNId).GetOutDeg()));
127  for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
128  memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
129  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
130  const uint64 NIdOffset = GetNIdOffset(NI.GetId());
131  for (int e = 0; e < NI.GetOutDeg(); e++) {
132  const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
133  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
134  }
135  if (! IsDir) {
136  for (int e = 0; e < NI.GetInDeg(); e++) {
137  const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
138  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
139  }
140  }
141  }
142  DistNbrsV.Add(TIntFltKd(dist, GetCount(CurBitsV, GetNIdOffset(SrcNId))));
143  if (DistNbrsV.Len() > 1 && DistNbrsV.Last().Dat < 1.001*DistNbrsV[DistNbrsV.Len()-2].Dat) break; // 0.1% change
144  }
145 }
146 
147 template <class PGraph>
148 void TGraphAnf<PGraph>::GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) {
149  TAnfBitV CurBitsV, LastBitsV;
150  InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL);
151  LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL);
152  int v, e;
153  double NPairs = 0.0;
154  DistNbrsV.Clr();
155  DistNbrsV.Add(TIntFltKd(0, Graph->GetNodes()));
156  DistNbrsV.Add(TIntFltKd(1, Graph->GetEdges()));
157  //TExeTm ExeTm;
158  for (int dist = 2; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
159  //printf("ANF dist %d...", dist); ExeTm.Tick();
160  memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
161  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
162  const uint64 NIdOffset = GetNIdOffset(NI.GetId());
163  for (e = 0; e < NI.GetOutDeg(); e++) {
164  const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
165  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
166  }
167  if (! IsDir) {
168  for (e = 0; e < NI.GetInDeg(); e++) {
169  const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
170  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
171  }
172  }
173  }
174  NPairs = 0.0;
175  for (v = NIdToBitPosH.FFirstKeyId(); NIdToBitPosH.FNextKeyId(v); ) {
176  NPairs += GetCount(CurBitsV, NIdToBitPosH[v]);
177  }
178  DistNbrsV.Add(TIntFltKd(dist, NPairs));
179  //printf("pairs: %g %s\n", NPairs, ExeTm.GetTmStr());
180  if (NPairs == 0) { break; }
181  if (DistNbrsV.Len() > 1 && NPairs < 1.001*DistNbrsV.LastLast().Dat) { break; } // 0.1% change
182  //TGnuPlot::SaveTs(DistNbrsV, "hops.tab", "HOPS, REACHABLE PAIRS");
183  }
184 }
186 // Approximate Neighborhood Function
187 namespace TSnap {
188 
189 namespace TSnapDetail {
191 double CalcEffDiam(const TIntFltKdV& DistNbrsCdfV, const double& Percentile=0.9);
193 double CalcEffDiam(const TFltPrV& DistNbrsCdfV, const double& Percentile=0.9);
195 double CalcEffDiamPdf(const TIntFltKdV& DistNbrsPdfV, const double& Percentile=0.9);
197 double CalcEffDiamPdf(const TFltPrV& DistNbrsPdfV, const double& Percentile=0.9);
199 double CalcAvgDiamPdf(const TIntFltKdV& DistNbrsPdfV);
201 double CalcAvgDiamPdf(const TFltPrV& DistNbrsPdfV);
202 } // TSnapDetail
203 
204 template <class PGraph>
205 void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) {
206  TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
207  Anf.GetNodeAnf(SrcNId, DistNbrsV, MxDist, IsDir);
208 }
209 
210 template <class PGraph>
211 void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) {
212  TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
213  Anf.GetGraphAnf(DistNbrsV, MxDist, IsDir);
214 }
215 
216 template <class PGraph>
217 double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox) {
218  TIntFltKdV DistNbrsV;
219  TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
220  Anf.GetGraphAnf(DistNbrsV, -1, IsDir);
221  return TSnap::TSnapDetail::CalcEffDiam(DistNbrsV, Percentile);
222 }
223 
224 template<class PGraph>
225 double GetAnfEffDiam(const PGraph& Graph, const int NRuns, int NApprox) {
226  //return TSnap::GetEffDiam(Graph, IsDir, 0.9, 32);
227  TMom Mom;
228  if (NApprox == -1) {
229  if (Graph->GetNodes() < 100000) { NApprox = 64; }
230  else if (Graph->GetNodes() < 1000000) { NApprox = 32; }
231  else { NApprox = 16; }
232  }
233  const bool IsDir = false;
234  for (int r = 0; r < NRuns; r++) {
235  Mom.Add(TSnap::GetAnfEffDiam(Graph, IsDir, 0.9, NApprox));
236  }
237  Mom.Def();
238  return Mom.GetMean();
239 }
240 
241 template <class PGraph> void TestAnf() {
242  PGraph Graph = PGraph::TObj::New();
243  //Graph:
244  // 0 2 ----> 3
245  // ^ |
246  // | |
247  // | ^
248  // 1 5 <---- 4
249  for (int v = 0; v < 6; v++) { Graph->AddNode(v); }
250  Graph->AddEdge(2, 3);
251  Graph->AddEdge(3, 4);
252  Graph->AddEdge(4, 5);
253  Graph->AddEdge(5, 2);
254  TFltV AnfV;
255  for (int t = 0; t < 10; t++) {
256  TGraphAnf<PGraph> Anf(Graph, 128, 5, t+1);
257  TIntFltKdV DistToNbrsV;
258  Anf.GetGraphAnf(DistToNbrsV, 5, true);
259  printf("\n--seed: %d---------------------\n", t+1);
260  for (int i = 0; i < DistToNbrsV.Len(); i++) {
261  printf("dist: %d\t hops:%f\n", DistToNbrsV[i].Key(), DistToNbrsV[i].Dat());
262  }
263  AnfV.Add(DistToNbrsV.Last().Dat);
264  }
265  TMom Mom(AnfV);
266  printf("-----------\nAvgAnf: %f StDev: %f\n", Mom.GetMean(), Mom.GetSDev());//*/
267  // const int NApprox = 32;
268  /*printf("\nANF vs. SAMPLE diam test (10 runs of ANF, NApprox=%d):\n", NApprox);
269  //Graph = TGGen<PGraph>::GenGrid(20, 20);
270  Graph = TGAlg::GetMxWcc(TGGen<PGraph>::GenRnd(1000, 10000));
271  TFltV FullAnf, EffAnf;
272  for (int tryn = 0; tryn < 10; tryn++) {
273  FullAnf.Add(GetEffDiam(Graph, false, 1.0, NApprox));
274  EffAnf.Add(GetEffDiam(Graph, false, 0.9, NApprox));
275  }
276  TMom FullMom(FullAnf);
277  TMom AnfMom(EffAnf);
278  printf(" Sample FullDiam: %d\n", TGAlg::GetBfsFullDiam(Graph, 100, false));
279  printf(" Anf FullDiam: %f [%f]\n", FullMom.GetMean(), FullMom.GetSDev());
280  printf(" Sample EffDiam [90%%]: %f\n", TGAlg::GetBfsEffDiam(Graph, 100, false));
281  printf(" Anf EffDiam [90%%]: %f [%f]\n", AnfMom.GetMean(), AnfMom.GetSDev());
282  // epinions
283  printf("\nEpinions graph:\n");
284  { typedef PNGraph PGraph;
285  PGraph G = TGGen<PGraph>::GenEpinions();
286  TIntFltKdV DistToPairsV;
287  GetAnf(G, DistToPairsV, 50, true);
288  for(int i = 0; i < DistToPairsV.Len(); i++) {
289  printf("\t%d\t%f\n", DistToPairsV[i].Key, DistToPairsV[i].Dat); }
290  printf("\nUndir\n");
291  TAnf<PGraph>::GetAnf(G, DistToPairsV, 50, false);
292  for(int j = 0; j < DistToPairsV.Len(); j++) {
293  printf("\t%d\t%f\n", DistToPairsV[j].Key, DistToPairsV[j].Dat); }
294  }//*/
295 }
296 
297 } // namespace TSnap
TRnd Rnd
Definition: anf.h:40
void GetGraphAnf(TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir)
Definition: anf.h:148
#define IAssert(Cond)
Definition: bd.h:262
PGraph Graph
Definition: anf.h:39
#define IAssertR(Cond, Reason)
Definition: bd.h:265
Definition: dt.h:11
unsigned int uint
Definition: bd.h:11
static const int Mx
Definition: dt.h:1049
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TKeyDat< TInt, TFlt > TIntFltKd
Definition: ds.h:380
void GetAnf(const PGraph &Graph, const int &SrcNId, TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir, const int &NApprox=32)
Definition: anf.h:205
double GetAnfEffDiam(const PGraph &Graph, const bool &IsDir, const double &Percentile, const int &NApprox)
Definition: anf.h:217
void Union(TAnfBitV &BitV1, const uint64 &NId1Offset, TAnfBitV &BitV2, const uint64 &NId2Offset) const
Definition: anf.h:99
TInt ApproxBytes
Definition: anf.h:38
Definition: xmath.h:129
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
void TestAnf()
Definition: anf.h:241
double GetSDev() const
Definition: xmath.h:242
TGraphAnf(const PGraph &GraphPt, const int &Approx=32, const int &moreBits=5, const int &RndSeed=0)
Definition: anf.h:44
double AvgLstZero(const TAnfBitV &BitV, const uint64 &NIdOffset) const
Definition: anf.h:107
TInt NBits
Definition: anf.h:38
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:971
void Add(const TFlt &Val, const TFlt &Wgt=1)
Definition: xmath.h:217
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1166
unsigned long long uint64
Definition: bd.h:38
const TVal & LastLast() const
Returns a reference to the one before last element of the vector.
Definition: ds.h:557
double CalcEffDiamPdf(const TIntFltKdV &DistNbrsPdfV, const double &Percentile)
Helper function for computing a given Percentile of a (unnormalized) probability distribution functio...
Definition: anf.cpp:29
void InitAnfBits(TAnfBitV &BitV)
Definition: anf.h:67
void GetNodeAnf(const int &SrcNId, TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir)
Definition: anf.h:120
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
unsigned char uchar
Definition: bd.h:10
Definition: dt.h:1044
double CalcAvgDiamPdf(const TIntFltKdV &DistNbrsPdfV)
Helper function for computing the mean of a (unnormalized) probability distribution function...
Definition: anf.cpp:41
UndefDefaultCopyAssign(TGraphAnf)
double CalcEffDiam(const TIntFltKdV &DistNbrsCdfV, const double &Percentile)
Helper function for computing a given Percentile of a (unnormalized) cumulative distribution function...
Definition: anf.cpp:7
TStr GetStr() const
Definition: dt.h:1270
long long int64
Definition: bd.h:27
double GetMean() const
Definition: xmath.h:240
uint64 GetNIdOffset(const int &NId) const
Definition: anf.h:46
TIter BegI() const
Returns an iterator pointing to the first element in the vector.
Definition: ds.h:565
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
static double Log2(const double &Val)
Definition: xmath.h:15
THash< TInt, uint64 > NIdToBitPosH
Definition: anf.h:36
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
TVec< uint64 > TAnfBitV
Definition: anf.h:35
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
Definition: anf.h:33
TInt MoreBits
Definition: anf.h:38
void Def()
Definition: xmath.cpp:339
TInt NApprox
Definition: anf.h:37
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
double GetCount(const TAnfBitV &BitV, const uint64 &NIdOffset) const
Definition: anf.h:50