SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
anf.h
Go to the documentation of this file.
1 // Approximate Neighborhood Function.
3 namespace TSnap {
10 template <class PGraph> void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32);
17 template <class PGraph> void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32);
20 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox);
23 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const int NRuns=1, int NApprox=-1);
24 } // namespace TSnap
25 
32 template <class PGraph>
33 class TGraphAnf {
34 private:
36  THash<TInt, uint64> NIdToBitPosH; // NId to byte(!) offset in BitV
37  TInt NApprox; // maintain N parallel approximations (multiple of 8)
38  TInt NBits, MoreBits, ApproxBytes; // NBits=logNodes+MoreBits; MoreBits: additional R bits; ApproxBytes: Approx/8;
39  PGraph Graph;
41 private:
43 public:
44  TGraphAnf(const PGraph& GraphPt, const int& Approx=32, const int& moreBits=5, const int& RndSeed=0) :
45  NApprox(Approx), MoreBits(moreBits), Graph(GraphPt), Rnd(RndSeed) { IAssert(NApprox%8==0); IAssert(sizeof(uint)==4); }
46  uint64 GetNIdOffset(const int& NId) const { return NIdToBitPosH.GetDat(NId); }
47  void InitAnfBits(TAnfBitV& BitV);
48  void Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const;
49  double AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const;
50  double GetCount(const TAnfBitV& BitV, const uint64& NIdOffset) const {
51  return pow(2.0, AvgLstZero(BitV, NIdOffset)) / 0.77351; }
57  void GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir);
63  void GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir);
64 };
65 
66 template <class PGraph>
68  const int NNodes = Graph->GetNodes();
69  const int LogNodes = int(ceil(TMath::Log2(NNodes)));
70  ApproxBytes = NApprox / 8;
71  NBits = LogNodes + MoreBits; // bits per node
72  const int BytesPerNd = ApproxBytes * NBits; // total bytes per node
73  int64 VSize = ((static_cast<int64>(NNodes) * static_cast<int64>(BytesPerNd))/sizeof(uint)) + 1;
74  IAssertR(VSize <= TInt::Mx,
75  TStr::Fmt("Your graph is too large for Approximate Neighborhood Function, %s is larger than %d",
76  TUInt64::GetStr(VSize).CStr(),TInt::Mx));
77  printf("size %d\n", static_cast<int>(VSize));
78  BitV.Gen((const int)VSize); IAssert(BitV.BegI() != NULL);
79  BitV.PutAll(0);
80  int SetBit = 0;
81  uint64 NodeOff = 0;
82  uchar* BitVPt = (uchar *) BitV.BegI();
83  // for each node: 1st bits of all approximations are at BitV[Offset+0], 2nd bits at BitV[Offset+NApprox/32], ...
84  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI != Graph->EndNI(); NI++) {
85  NIdToBitPosH.AddDat(NI.GetId(), NodeOff);
86  // init vertex bits
87  for (int approx = 0; approx < NApprox; approx++) {
88  const int RndNum = Rnd.GetUniDevInt();
89  for (SetBit = 0; (RndNum & (1<<SetBit))==0 && SetBit < NBits; SetBit++) { }
90  if (SetBit >= NBits) SetBit = NBits-1;
91  const int BitPos = ApproxBytes * SetBit + approx / 8;
92  *(BitVPt + NodeOff + BitPos) |= (1<<(approx % 8)); // magically works better than code below (see anf.c)
93  }
94  NodeOff += BytesPerNd;
95  }
96 }
97 
98 template <class PGraph>
99 void TGraphAnf<PGraph>::Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const {
100  uchar* DstI = (uchar *) BitV1.BegI() + NId1Offset;
101  uchar* SrcI = (uchar *) BitV2.BegI() + NId2Offset;
102  for (int b=0; b < ApproxBytes*NBits; b++, DstI++, SrcI++) { *DstI |= *SrcI; }
103 }
104 
105 // Average least zero bit position (least significant zero)
106 template <class PGraph>
107 double TGraphAnf<PGraph>::AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const { //average least zero bit position (least significant zero)
108  int approx, bit, AvgBitPos = 0;
109  uchar* BitVPt = (uchar *) BitV.BegI();
110  for (approx = 0; approx < NApprox; approx++) {
111  for (bit = 0; bit < NBits; bit++) {
112  if ((*(BitVPt+NIdOffset + ApproxBytes*bit + approx/8) & (1<<(approx%8))) == 0) break; } // found zero
113  if (bit > NBits) bit = NBits;
114  AvgBitPos += bit;
115  }
116  return AvgBitPos/double(NApprox) ;
117 }
118 
119 template <class PGraph>
120 void TGraphAnf<PGraph>::GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) {
121  //const int NNodes = Graph->GetNodes();
122  TAnfBitV CurBitsV, LastBitsV;
123  InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL);
124  LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL);
125  DistNbrsV.Clr();
126  DistNbrsV.Add(TIntFltKd(1, Graph->GetNI(SrcNId).GetOutDeg()));
127  for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
128  memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
129  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
130  const uint64 NIdOffset = GetNIdOffset(NI.GetId());
131  for (int e = 0; e < NI.GetOutDeg(); e++) {
132  const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
133  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
134  }
135  if (! IsDir) {
136  for (int e = 0; e < NI.GetInDeg(); e++) {
137  const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
138  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
139  }
140  }
141  }
142  DistNbrsV.Add(TIntFltKd(dist, GetCount(CurBitsV, GetNIdOffset(SrcNId))));
143  if (DistNbrsV.Len() > 1 && DistNbrsV.Last().Dat < 1.001*DistNbrsV[DistNbrsV.Len()-2].Dat) break; // 0.1% change
144  }
145 }
146 
147 template <class PGraph>
148 void TGraphAnf<PGraph>::GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) {
149  TAnfBitV CurBitsV, LastBitsV;
150  InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL);
151  LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL);
152  int v, e;
153  double NPairs = 0.0;
154  DistNbrsV.Clr();
155  DistNbrsV.Add(TIntFltKd(0, Graph->GetNodes()));
156  //TExeTm ExeTm;
157  for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
158  //printf("ANF dist %d...", dist); ExeTm.Tick();
159  memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
160  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
161  const uint64 NIdOffset = GetNIdOffset(NI.GetId());
162  for (e = 0; e < NI.GetOutDeg(); e++) {
163  const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
164  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
165  }
166  if (! IsDir) {
167  for (e = 0; e < NI.GetInDeg(); e++) {
168  const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
169  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
170  }
171  }
172  }
173  NPairs = 0.0;
174  for (v = NIdToBitPosH.FFirstKeyId(); NIdToBitPosH.FNextKeyId(v); ) {
175  NPairs += GetCount(CurBitsV, NIdToBitPosH[v]);
176  }
177  DistNbrsV.Add(TIntFltKd(dist, NPairs));
178  //printf("pairs: %g %s\n", NPairs, ExeTm.GetTmStr());
179  if (NPairs == 0) { break; }
180  if (DistNbrsV.Len() > 1 && NPairs < 1.001*DistNbrsV.LastLast().Dat) { break; } // 0.1% change
181  //TGnuPlot::SaveTs(DistNbrsV, "hops.tab", "HOPS, REACHABLE PAIRS");
182  }
183 }
185 // Approximate Neighborhood Function
186 namespace TSnap {
187 
188 namespace TSnapDetail {
190 double CalcEffDiam(const TIntFltKdV& DistNbrsCdfV, const double& Percentile=0.9);
192 double CalcEffDiam(const TFltPrV& DistNbrsCdfV, const double& Percentile=0.9);
194 double CalcEffDiamPdf(const TIntFltKdV& DistNbrsPdfV, const double& Percentile=0.9);
196 double CalcEffDiamPdf(const TFltPrV& DistNbrsPdfV, const double& Percentile=0.9);
198 double CalcAvgDiamPdf(const TIntFltKdV& DistNbrsPdfV);
200 double CalcAvgDiamPdf(const TFltPrV& DistNbrsPdfV);
201 } // TSnapDetail
202 
203 template <class PGraph>
204 void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) {
205  TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
206  Anf.GetNodeAnf(SrcNId, DistNbrsV, MxDist, IsDir);
207 }
208 
209 template <class PGraph>
210 void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) {
211  TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
212  Anf.GetGraphAnf(DistNbrsV, MxDist, IsDir);
213 }
214 
215 template <class PGraph>
216 double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox) {
217  TIntFltKdV DistNbrsV;
218  TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
219  Anf.GetGraphAnf(DistNbrsV, -1, IsDir);
220  return TSnap::TSnapDetail::CalcEffDiam(DistNbrsV, Percentile);
221 }
222 
223 template<class PGraph>
224 double GetAnfEffDiam(const PGraph& Graph, const int NRuns, int NApprox) {
225  //return TSnap::GetEffDiam(Graph, IsDir, 0.9, 32);
226  TMom Mom;
227  if (NApprox == -1) {
228  if (Graph->GetNodes() < 100000) { NApprox = 64; }
229  else if (Graph->GetNodes() < 1000000) { NApprox = 32; }
230  else { NApprox = 16; }
231  }
232  const bool IsDir = false;
233  for (int r = 0; r < NRuns; r++) {
234  Mom.Add(TSnap::GetAnfEffDiam(Graph, IsDir, 0.9, NApprox));
235  }
236  Mom.Def();
237  return Mom.GetMean();
238 }
239 
240 template <class PGraph> void TestAnf() {
241  PGraph Graph = PGraph::TObj::New();
242  //Graph:
243  // 0 2 ----> 3
244  // ^ |
245  // | |
246  // | ^
247  // 1 5 <---- 4
248  for (int v = 0; v < 6; v++) { Graph->AddNode(v); }
249  Graph->AddEdge(2, 3);
250  Graph->AddEdge(3, 4);
251  Graph->AddEdge(4, 5);
252  Graph->AddEdge(5, 2);
253  TFltV AnfV;
254  for (int t = 0; t < 10; t++) {
255  TGraphAnf<PGraph> Anf(Graph, 128, 5, t+1);
256  TIntFltKdV DistToNbrsV;
257  Anf.GetGraphAnf(DistToNbrsV, 5, true);
258  printf("\n--seed: %d---------------------\n", t+1);
259  for (int i = 0; i < DistToNbrsV.Len(); i++) {
260  printf("dist: %d\t hops:%f\n", DistToNbrsV[i].Key(), DistToNbrsV[i].Dat());
261  }
262  AnfV.Add(DistToNbrsV.Last().Dat);
263  }
264  TMom Mom(AnfV);
265  printf("-----------\nAvgAnf: %f StDev: %f\n", Mom.GetMean(), Mom.GetSDev());//*/
266  // const int NApprox = 32;
267  /*printf("\nANF vs. SAMPLE diam test (10 runs of ANF, NApprox=%d):\n", NApprox);
268  //Graph = TGGen<PGraph>::GenGrid(20, 20);
269  Graph = TGAlg::GetMxWcc(TGGen<PGraph>::GenRnd(1000, 10000));
270  TFltV FullAnf, EffAnf;
271  for (int tryn = 0; tryn < 10; tryn++) {
272  FullAnf.Add(GetEffDiam(Graph, false, 1.0, NApprox));
273  EffAnf.Add(GetEffDiam(Graph, false, 0.9, NApprox));
274  }
275  TMom FullMom(FullAnf);
276  TMom AnfMom(EffAnf);
277  printf(" Sample FullDiam: %d\n", TGAlg::GetBfsFullDiam(Graph, 100, false));
278  printf(" Anf FullDiam: %f [%f]\n", FullMom.GetMean(), FullMom.GetSDev());
279  printf(" Sample EffDiam [90%%]: %f\n", TGAlg::GetBfsEffDiam(Graph, 100, false));
280  printf(" Anf EffDiam [90%%]: %f [%f]\n", AnfMom.GetMean(), AnfMom.GetSDev());
281  // epinions
282  printf("\nEpinions graph:\n");
283  { typedef PNGraph PGraph;
284  PGraph G = TGGen<PGraph>::GenEpinions();
285  TIntFltKdV DistToPairsV;
286  GetAnf(G, DistToPairsV, 50, true);
287  for(int i = 0; i < DistToPairsV.Len(); i++) {
288  printf("\t%d\t%f\n", DistToPairsV[i].Key, DistToPairsV[i].Dat); }
289  printf("\nUndir\n");
290  TAnf<PGraph>::GetAnf(G, DistToPairsV, 50, false);
291  for(int j = 0; j < DistToPairsV.Len(); j++) {
292  printf("\t%d\t%f\n", DistToPairsV[j].Key, DistToPairsV[j].Dat); }
293  }//*/
294 }
295 
296 } // namespace TSnap
TRnd Rnd
Definition: anf.h:40
void GetGraphAnf(TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir)
Definition: anf.h:148
#define IAssert(Cond)
Definition: bd.h:262
PGraph Graph
Definition: anf.h:39
#define IAssertR(Cond, Reason)
Definition: bd.h:265
Definition: dt.h:11
unsigned int uint
Definition: bd.h:11
static const int Mx
Definition: dt.h:1139
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
TKeyDat< TInt, TFlt > TIntFltKd
Definition: ds.h:381
void GetAnf(const PGraph &Graph, const int &SrcNId, TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir, const int &NApprox=32)
Definition: anf.h:204
double GetAnfEffDiam(const PGraph &Graph, const bool &IsDir, const double &Percentile, const int &NApprox)
Definition: anf.h:216
void Union(TAnfBitV &BitV1, const uint64 &NId1Offset, TAnfBitV &BitV2, const uint64 &NId2Offset) const
Definition: anf.h:99
TInt ApproxBytes
Definition: anf.h:38
Definition: xmath.h:129
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
void TestAnf()
Definition: anf.h:240
double GetSDev() const
Definition: xmath.h:242
TGraphAnf(const PGraph &GraphPt, const int &Approx=32, const int &moreBits=5, const int &RndSeed=0)
Definition: anf.h:44
double AvgLstZero(const TAnfBitV &BitV, const uint64 &NIdOffset) const
Definition: anf.h:107
TInt NBits
Definition: anf.h:38
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
void Add(const TFlt &Val, const TFlt &Wgt=1)
Definition: xmath.h:217
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1229
unsigned long long uint64
Definition: bd.h:38
const TVal & LastLast() const
Returns a reference to the one before last element of the vector.
Definition: ds.h:585
double CalcEffDiamPdf(const TIntFltKdV &DistNbrsPdfV, const double &Percentile)
Helper function for computing a given Percentile of a (unnormalized) probability distribution functio...
Definition: anf.cpp:29
void InitAnfBits(TAnfBitV &BitV)
Definition: anf.h:67
void GetNodeAnf(const int &SrcNId, TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir)
Definition: anf.h:120
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
unsigned char uchar
Definition: bd.h:10
Definition: dt.h:1134
double CalcAvgDiamPdf(const TIntFltKdV &DistNbrsPdfV)
Helper function for computing the mean of a (unnormalized) probability distribution function...
Definition: anf.cpp:41
UndefDefaultCopyAssign(TGraphAnf)
double CalcEffDiam(const TIntFltKdV &DistNbrsCdfV, const double &Percentile)
Helper function for computing a given Percentile of a (unnormalized) cumulative distribution function...
Definition: anf.cpp:7
TStr GetStr() const
Definition: dt.h:1360
long long int64
Definition: bd.h:27
double GetMean() const
Definition: xmath.h:240
uint64 GetNIdOffset(const int &NId) const
Definition: anf.h:46
TIter BegI() const
Returns an iterator pointing to the first element in the vector.
Definition: ds.h:593
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
static double Log2(const double &Val)
Definition: xmath.h:15
THash< TInt, uint64 > NIdToBitPosH
Definition: anf.h:36
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
TVec< uint64 > TAnfBitV
Definition: anf.h:35
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
Definition: anf.h:33
TInt MoreBits
Definition: anf.h:38
void Def()
Definition: xmath.cpp:339
TInt NApprox
Definition: anf.h:37
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
double GetCount(const TAnfBitV &BitV, const uint64 &NIdOffset) const
Definition: anf.h:50