SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
sim.cpp
Go to the documentation of this file.
1 
3  int lenA = NI1.GetOutDeg();
4  int lenB = NI2.GetOutDeg();
5  int ct = 0;
6  int j = 0;
7  int i = 0;
8  while (i < lenA && j < lenB) {
9  if (NI1.GetOutNId(i) == NI2.GetOutNId(j)) {
10  ct++; i++; j++;
11  } else if (NI1.GetOutNId(i) > NI2.GetOutNId(j)) {
12  j++;
13  } else {
14  i++;
15  }
16  }
17  return ct*1.0/(lenA+lenB-ct);
18 
19 }
20 
21 void MergeNbrs(TIntV* NeighbourV, TIntV* list1, TNGraph::TNodeI NI2) {
22  int j = 0;
23  int k = 0;
24  int prev = -1;
25  int lenA = list1->Len();
26 
27  int lenB = NI2.GetInDeg();
28  if (lenA > 0 && lenB > 0) {
29  int v1 = (*list1)[j];
30  int v2 = NI2.GetInNId(k);
31  while (1) {
32  if (v1 <= v2) {
33  if (prev != v1) {
34  NeighbourV->Add(v1);
35  prev = v1;
36  }
37  j += 1;
38  if (j >= lenA) {
39  break;
40  }
41  v1 = (*list1)[j];
42  } else {
43  if (prev != v2) {
44  NeighbourV->Add(v2);
45  prev = v2;
46  }
47  k += 1;
48  if (k >= lenB) {
49  break;
50  }
51  v2 = NI2.GetInNId(k);
52  }
53  }
54  }
55  while (j < lenA) {
56  int v = (*list1)[j];
57  if (prev != v) {
58  NeighbourV->Add(v);
59  prev = v;
60  }
61  j += 1;
62  }
63  while (k < lenB) {
64  int v = NI2.GetInNId(k);
65  if (prev != v) {
66  NeighbourV->Add(v);
67  prev = v;
68  }
69  k += 1;
70  }
71 }
72 
73 PNGraph GetBiGraph(PTable P, int index_col_1, int index_col_2) {
74  TVec<TPair<TStr, TAttrType>, int > S = P->GetSchema();
75  PNGraph Graph = TSnap::ToGraph<PNGraph>(P, S[index_col_1].GetVal1(), S[index_col_2].GetVal1(), aaFirst);
76  return Graph;
77 }
78 
79 #ifdef GCC_ATOMIC
81  PNEANet KNN = TNEANet::New();
82  TIntV NIdV;
83  Graph->GetNIdV (NIdV);
84  int size = NIdV.Len();
85  for (int ind = 0; ind < size; ind++) {
86  KNN->AddNode(NIdV[ind]);
87  }
88  KNN->AddFltAttrE("sim");
89  TVec<TVec<TPair<TFlt, TInt>, int >, int > TopKList;
90  TVec<TVec<TPair<TFlt, TInt>, int >, int > ThTopK; // for each thread
91  TIntV NodeList;
92  TIntV ThNodeList;// for each thread
93  int NumThreads = omp_get_max_threads();
94  omp_set_num_threads(NumThreads);
95  #pragma omp parallel private(ThNodeList, ThTopK)
96  {
97  TIntV* Neighbors_old = new TIntV();
98  TIntV* Neighbors = new TIntV();
99  TIntV* temp;
100 
101  #pragma omp for schedule(dynamic,1000)
102  for (int ind = 0; ind < size; ind++) {
103  TNGraph::TNodeI NI = Graph->GetNI(NIdV[ind]);
104  if (NI.GetInDeg() > 0) {
105  continue;
106  }
107  if (NI.GetOutDeg() == 0) {
108  continue;
109  }
110 
111  TVec<TPair<TFlt, TInt>, int > TopK;
112  for (int i = 0; i < K; i++) {
113  TopK.Add(TPair<TFlt,TInt>(0.0, -1));
114  }
115 
116  Neighbors->Clr(false);
117  Neighbors_old->Clr(false);
118 
119  for (int i = 0; i < NI.GetOutDeg(); i++) {
120  TNGraph::TNodeI Inst_NI = Graph->GetNI(NI.GetOutNId(i));
121  MergeNbrs(Neighbors, Neighbors_old, Inst_NI);
122 
123  temp = Neighbors_old;
124  temp->Clr(false);
125  Neighbors_old = Neighbors;
126  Neighbors = temp;
127  }
128 
129  // Swap neighbors and Neighbors_old
130 
131  temp = Neighbors_old;
132  Neighbors_old = Neighbors;
133  Neighbors = temp;
134  for(int j = 0; j< Neighbors->Len(); j++) {
135 
136  TNGraph::TNodeI Auth_NI = Graph->GetNI((*Neighbors)[j]);
137 
138  float similarity = JaccardSim(NI, Auth_NI);
139  if (TopK[K-1].GetVal1() < similarity) {
140  int index = 0;
141  for (int i = K-2; i >= 0; i--)
142  if (TopK[i].GetVal1() < similarity) {
143  TopK.SetVal(i+1, TopK[i]);
144  } else {
145  index = i+1;
146  break;
147  }
148  TopK.SetVal(index, TPair<TFlt, TInt>(similarity, (*Neighbors)[j]));
149  }
150  }
151 
152  ThTopK.Add(TopK);
153  ThNodeList.Add(NIdV[ind]);
154 
155 // if (ct%10000 == 0)
156 // cout<<ct<<" avg neighbor degree = "<<sum_neighbors*1.0/ct<<" "<<currentDateTime()<<endl;
157 
158  }
159  #pragma omp critical
160  {
161  for (int j = 0; j < ThTopK.Len(); j++) {
162  TopKList.Add(ThTopK[j]);
163  NodeList.Add(ThNodeList[j]);
164  }
165  }
166  }
167 
168  int size2 = NodeList.Len();
169  for (int i= 0; i < size2 ; i++) {
170 
171  for (int j = 0; j < K; j++) {
172  if (TopKList[i][j].GetVal2() <= -1) {
173  break;
174  }
175  int EId = KNN->AddEdge(NodeList[i], TopKList[i][j].GetVal2());
176  KNN->AddFltAttrDatE(EId, TopKList[i][j].GetVal1(), "sim");
177  }
178  }
179  return KNN;
180 }
181 #endif
182 
183 PNEANet KNNJaccard(PNGraph Graph, int K) {
184  PNEANet KNN = TNEANet::New();
185 
186  int sum_neighbors = 0;
187  int ct;
188  int end;
189  end = Graph->GetNodes();
190  TIntV* Neighbors_old = new TIntV();
191  TIntV* Neighbors = new TIntV();
192  TIntV* temp;
193  TIntV NIdV;
194  Graph->GetNIdV (NIdV);
195  int size = NIdV.Len();
196  for (int ind = 0; ind < size; ind++) {
197  KNN->AddNode(NIdV[ind]);
198  }
199  KNN->AddFltAttrE("sim");
200 
201  for (int ind = 0; ind < size; ind++) {
202  TNGraph::TNodeI NI = Graph->GetNI(NIdV[ind]);
203  if (NI.GetInDeg() > 0) {
204  continue;
205  }
206  if (NI.GetOutDeg() == 0) {
207  continue;
208  }
209  ct ++;
210 
211  TVec<TPair<TFlt, TInt> > TopK;
212  for (int i = 0; i < K; i++) {
213  TopK.Add(TPair<TFlt,TInt>(0.0, -1));
214  }
215 
216  Neighbors->Clr(false);
217  Neighbors_old->Clr(false);
218 
219  for (int i = 0; i < NI.GetOutDeg(); i++) {
220  TNGraph::TNodeI Inst_NI = Graph->GetNI(NI.GetOutNId(i));
221  MergeNbrs(Neighbors, Neighbors_old, Inst_NI);
222 
223  temp = Neighbors_old;
224  temp->Clr(false);
225  Neighbors_old = Neighbors;
226  Neighbors = temp;
227  }
228  int num = Neighbors_old->Len();
229  sum_neighbors += num;
230 
231  //Swap neighbors and Neighbors_old
232 
233  temp = Neighbors_old;
234  Neighbors_old = Neighbors;
235  Neighbors = temp;
236  for (int j = 0; j< Neighbors->Len(); j++) {
237 
238  TNGraph::TNodeI Auth_NI = Graph->GetNI((*Neighbors)[j]);
239 
240  float similarity = JaccardSim(NI, Auth_NI);
241  if (TopK[K-1].GetVal1() < similarity) {
242  int index = 0;
243  for (int i = K-2; i >= 0; i--)
244  if (TopK[i].GetVal1() < similarity) {
245  TopK.SetVal(i+1, TopK[i]);
246  } else {
247  index = i+1;
248  break;
249  }
250  TopK.SetVal(index, TPair<TFlt, TInt>(similarity, (*Neighbors)[j]));
251  }
252  }
253 
254  for (int i = 0; i < K; i++) {
255  int EId = KNN->AddEdge(NI.GetId(), TopK[i].GetVal2());
256  KNN->AddFltAttrDatE(EId, TopK[i].GetVal1(), "sim");
257  }
258 
259 // if (ct%10000 == 0)
260 // cout<<ct<<" avg neighbor degree = "<<sum_neighbors*1.0/ct<<" "<<currentDateTime()<<endl;
261 
262  }
263 
264  return KNN;
265 }
266 
float JaccardSim(TNGraph::TNodeI NI1, TNGraph::TNodeI NI2)
Definition: sim.cpp:2
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:548
PNEANet KNNJaccard(PNGraph Graph, int K)
Definition: sim.cpp:183
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:499
void MergeNbrs(TIntV *NeighbourV, TIntV *list1, TNGraph::TNodeI NI2)
Definition: sim.cpp:21
PNGraph GetBiGraph(PTable P, int index_col_1, int index_col_2)
Definition: sim.cpp:73
Definition: table.h:257
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
PNEANet KNNJaccardParallel(PNGraph Graph, int K)
Definition: sim.cpp:80
void SetVal(const TSizeTy &ValN, const TVal &Val)
Sets the value of element at position ValN to Val.
Definition: ds.h:653
Definition: ds.h:32
int GetId() const
Returns ID of the current node.
Definition: graph.h:396
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:402
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:379
TVec< TInt > TIntV
Definition: ds.h:1594
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:400
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:408
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
static PNEANet New()
Static cons returns pointer to graph. Ex: PNEANet Graph=TNEANet::New().
Definition: network.h:2176
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:412