SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
sim.cpp File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

float JaccardSim (TNGraph::TNodeI NI1, TNGraph::TNodeI NI2)
 
void MergeNbrs (TIntV *NeighbourV, TIntV *list1, TNGraph::TNodeI NI2)
 
PNGraph GetBiGraph (PTable P, int index_col_1, int index_col_2)
 
PNEANet KNNJaccardParallel (PNGraph Graph, int K)
 
PNEANet KNNJaccard (PNGraph Graph, int K)
 

Function Documentation

PNGraph GetBiGraph ( PTable  P,
int  index_col_1,
int  index_col_2 
)

Definition at line 73 of file sim.cpp.

References aaFirst.

73  {
74  TVec<TPair<TStr, TAttrType>, int > S = P->GetSchema();
75  PNGraph Graph = TSnap::ToGraph<PNGraph>(P, S[index_col_1].GetVal1(), S[index_col_2].GetVal1(), aaFirst);
76  return Graph;
77 }
Definition: table.h:257
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
float JaccardSim ( TNGraph::TNodeI  NI1,
TNGraph::TNodeI  NI2 
)

Definition at line 2 of file sim.cpp.

References TNGraph::TNodeI::GetOutDeg(), and TNGraph::TNodeI::GetOutNId().

Referenced by KNNJaccard(), and KNNJaccardParallel().

2  {
3  int lenA = NI1.GetOutDeg();
4  int lenB = NI2.GetOutDeg();
5  int ct = 0;
6  int j = 0;
7  int i = 0;
8  while (i < lenA && j < lenB) {
9  if (NI1.GetOutNId(i) == NI2.GetOutNId(j)) {
10  ct++; i++; j++;
11  } else if (NI1.GetOutNId(i) > NI2.GetOutNId(j)) {
12  j++;
13  } else {
14  i++;
15  }
16  }
17  return ct*1.0/(lenA+lenB-ct);
18 
19 }
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:402
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:412

Here is the call graph for this function:

Here is the caller graph for this function:

PNEANet KNNJaccard ( PNGraph  Graph,
int  K 
)

Definition at line 183 of file sim.cpp.

References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), TNGraph::TNodeI::GetId(), TNGraph::TNodeI::GetInDeg(), TNGraph::GetNI(), TNGraph::GetNIdV(), TNGraph::GetNodes(), TNGraph::TNodeI::GetOutDeg(), TNGraph::TNodeI::GetOutNId(), JaccardSim(), TVec< TVal, TSizeTy >::Len(), MergeNbrs(), TNEANet::New(), and TVec< TVal, TSizeTy >::SetVal().

183  {
184  PNEANet KNN = TNEANet::New();
185 
186  int sum_neighbors = 0;
187  int ct;
188  int end;
189  end = Graph->GetNodes();
190  TIntV* Neighbors_old = new TIntV();
191  TIntV* Neighbors = new TIntV();
192  TIntV* temp;
193  TIntV NIdV;
194  Graph->GetNIdV (NIdV);
195  int size = NIdV.Len();
196  for (int ind = 0; ind < size; ind++) {
197  KNN->AddNode(NIdV[ind]);
198  }
199  KNN->AddFltAttrE("sim");
200 
201  for (int ind = 0; ind < size; ind++) {
202  TNGraph::TNodeI NI = Graph->GetNI(NIdV[ind]);
203  if (NI.GetInDeg() > 0) {
204  continue;
205  }
206  if (NI.GetOutDeg() == 0) {
207  continue;
208  }
209  ct ++;
210 
211  TVec<TPair<TFlt, TInt> > TopK;
212  for (int i = 0; i < K; i++) {
213  TopK.Add(TPair<TFlt,TInt>(0.0, -1));
214  }
215 
216  Neighbors->Clr(false);
217  Neighbors_old->Clr(false);
218 
219  for (int i = 0; i < NI.GetOutDeg(); i++) {
220  TNGraph::TNodeI Inst_NI = Graph->GetNI(NI.GetOutNId(i));
221  MergeNbrs(Neighbors, Neighbors_old, Inst_NI);
222 
223  temp = Neighbors_old;
224  temp->Clr(false);
225  Neighbors_old = Neighbors;
226  Neighbors = temp;
227  }
228  int num = Neighbors_old->Len();
229  sum_neighbors += num;
230 
231  //Swap neighbors and Neighbors_old
232 
233  temp = Neighbors_old;
234  Neighbors_old = Neighbors;
235  Neighbors = temp;
236  for (int j = 0; j< Neighbors->Len(); j++) {
237 
238  TNGraph::TNodeI Auth_NI = Graph->GetNI((*Neighbors)[j]);
239 
240  float similarity = JaccardSim(NI, Auth_NI);
241  if (TopK[K-1].GetVal1() < similarity) {
242  int index = 0;
243  for (int i = K-2; i >= 0; i--)
244  if (TopK[i].GetVal1() < similarity) {
245  TopK.SetVal(i+1, TopK[i]);
246  } else {
247  index = i+1;
248  break;
249  }
250  TopK.SetVal(index, TPair<TFlt, TInt>(similarity, (*Neighbors)[j]));
251  }
252  }
253 
254  for (int i = 0; i < K; i++) {
255  int EId = KNN->AddEdge(NI.GetId(), TopK[i].GetVal2());
256  KNN->AddFltAttrDatE(EId, TopK[i].GetVal1(), "sim");
257  }
258 
259 // if (ct%10000 == 0)
260 // cout<<ct<<" avg neighbor degree = "<<sum_neighbors*1.0/ct<<" "<<currentDateTime()<<endl;
261 
262  }
263 
264  return KNN;
265 }
float JaccardSim(TNGraph::TNodeI NI1, TNGraph::TNodeI NI2)
Definition: sim.cpp:2
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:548
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:499
void MergeNbrs(TIntV *NeighbourV, TIntV *list1, TNGraph::TNodeI NI2)
Definition: sim.cpp:21
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
void SetVal(const TSizeTy &ValN, const TVal &Val)
Sets the value of element at position ValN to Val.
Definition: ds.h:653
Definition: ds.h:32
int GetId() const
Returns ID of the current node.
Definition: graph.h:396
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:402
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:379
TVec< TInt > TIntV
Definition: ds.h:1594
Definition: bd.h:196
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:400
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
static PNEANet New()
Static cons returns pointer to graph. Ex: PNEANet Graph=TNEANet::New().
Definition: network.h:2176
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:412

Here is the call graph for this function:

PNEANet KNNJaccardParallel ( PNGraph  Graph,
int  K 
)

Definition at line 80 of file sim.cpp.

References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), TNGraph::TNodeI::GetInDeg(), TNGraph::GetNI(), TNGraph::GetNIdV(), TNGraph::TNodeI::GetOutDeg(), TNGraph::TNodeI::GetOutNId(), JaccardSim(), TVec< TVal, TSizeTy >::Len(), MergeNbrs(), TNEANet::New(), and TVec< TVal, TSizeTy >::SetVal().

80  {
81  PNEANet KNN = TNEANet::New();
82  TIntV NIdV;
83  Graph->GetNIdV (NIdV);
84  int size = NIdV.Len();
85  for (int ind = 0; ind < size; ind++) {
86  KNN->AddNode(NIdV[ind]);
87  }
88  KNN->AddFltAttrE("sim");
89  TVec<TVec<TPair<TFlt, TInt>, int >, int > TopKList;
90  TVec<TVec<TPair<TFlt, TInt>, int >, int > ThTopK; // for each thread
91  TIntV NodeList;
92  TIntV ThNodeList;// for each thread
93  int NumThreads = omp_get_max_threads();
94  omp_set_num_threads(NumThreads);
95  #pragma omp parallel private(ThNodeList, ThTopK)
96  {
97  TIntV* Neighbors_old = new TIntV();
98  TIntV* Neighbors = new TIntV();
99  TIntV* temp;
100 
101  #pragma omp for schedule(dynamic,1000)
102  for (int ind = 0; ind < size; ind++) {
103  TNGraph::TNodeI NI = Graph->GetNI(NIdV[ind]);
104  if (NI.GetInDeg() > 0) {
105  continue;
106  }
107  if (NI.GetOutDeg() == 0) {
108  continue;
109  }
110 
111  TVec<TPair<TFlt, TInt>, int > TopK;
112  for (int i = 0; i < K; i++) {
113  TopK.Add(TPair<TFlt,TInt>(0.0, -1));
114  }
115 
116  Neighbors->Clr(false);
117  Neighbors_old->Clr(false);
118 
119  for (int i = 0; i < NI.GetOutDeg(); i++) {
120  TNGraph::TNodeI Inst_NI = Graph->GetNI(NI.GetOutNId(i));
121  MergeNbrs(Neighbors, Neighbors_old, Inst_NI);
122 
123  temp = Neighbors_old;
124  temp->Clr(false);
125  Neighbors_old = Neighbors;
126  Neighbors = temp;
127  }
128 
129  // Swap neighbors and Neighbors_old
130 
131  temp = Neighbors_old;
132  Neighbors_old = Neighbors;
133  Neighbors = temp;
134  for(int j = 0; j< Neighbors->Len(); j++) {
135 
136  TNGraph::TNodeI Auth_NI = Graph->GetNI((*Neighbors)[j]);
137 
138  float similarity = JaccardSim(NI, Auth_NI);
139  if (TopK[K-1].GetVal1() < similarity) {
140  int index = 0;
141  for (int i = K-2; i >= 0; i--)
142  if (TopK[i].GetVal1() < similarity) {
143  TopK.SetVal(i+1, TopK[i]);
144  } else {
145  index = i+1;
146  break;
147  }
148  TopK.SetVal(index, TPair<TFlt, TInt>(similarity, (*Neighbors)[j]));
149  }
150  }
151 
152  ThTopK.Add(TopK);
153  ThNodeList.Add(NIdV[ind]);
154 
155 // if (ct%10000 == 0)
156 // cout<<ct<<" avg neighbor degree = "<<sum_neighbors*1.0/ct<<" "<<currentDateTime()<<endl;
157 
158  }
159  #pragma omp critical
160  {
161  for (int j = 0; j < ThTopK.Len(); j++) {
162  TopKList.Add(ThTopK[j]);
163  NodeList.Add(ThNodeList[j]);
164  }
165  }
166  }
167 
168  int size2 = NodeList.Len();
169  for (int i= 0; i < size2 ; i++) {
170 
171  for (int j = 0; j < K; j++) {
172  if (TopKList[i][j].GetVal2() <= -1) {
173  break;
174  }
175  int EId = KNN->AddEdge(NodeList[i], TopKList[i][j].GetVal2());
176  KNN->AddFltAttrDatE(EId, TopKList[i][j].GetVal1(), "sim");
177  }
178  }
179  return KNN;
180 }
float JaccardSim(TNGraph::TNodeI NI1, TNGraph::TNodeI NI2)
Definition: sim.cpp:2
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:548
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void MergeNbrs(TIntV *NeighbourV, TIntV *list1, TNGraph::TNodeI NI2)
Definition: sim.cpp:21
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
void SetVal(const TSizeTy &ValN, const TVal &Val)
Sets the value of element at position ValN to Val.
Definition: ds.h:653
Definition: ds.h:32
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:402
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:379
TVec< TInt > TIntV
Definition: ds.h:1594
Definition: bd.h:196
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:400
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
static PNEANet New()
Static cons returns pointer to graph. Ex: PNEANet Graph=TNEANet::New().
Definition: network.h:2176
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:412

Here is the call graph for this function:

void MergeNbrs ( TIntV NeighbourV,
TIntV list1,
TNGraph::TNodeI  NI2 
)

Definition at line 21 of file sim.cpp.

References TVec< TVal, TSizeTy >::Add(), TNGraph::TNodeI::GetInDeg(), TNGraph::TNodeI::GetInNId(), and TVec< TVal, TSizeTy >::Len().

Referenced by KNNJaccard(), and KNNJaccardParallel().

21  {
22  int j = 0;
23  int k = 0;
24  int prev = -1;
25  int lenA = list1->Len();
26 
27  int lenB = NI2.GetInDeg();
28  if (lenA > 0 && lenB > 0) {
29  int v1 = (*list1)[j];
30  int v2 = NI2.GetInNId(k);
31  while (1) {
32  if (v1 <= v2) {
33  if (prev != v1) {
34  NeighbourV->Add(v1);
35  prev = v1;
36  }
37  j += 1;
38  if (j >= lenA) {
39  break;
40  }
41  v1 = (*list1)[j];
42  } else {
43  if (prev != v2) {
44  NeighbourV->Add(v2);
45  prev = v2;
46  }
47  k += 1;
48  if (k >= lenB) {
49  break;
50  }
51  v2 = NI2.GetInNId(k);
52  }
53  }
54  }
55  while (j < lenA) {
56  int v = (*list1)[j];
57  if (prev != v) {
58  NeighbourV->Add(v);
59  prev = v;
60  }
61  j += 1;
62  }
63  while (k < lenB) {
64  int v = NI2.GetInNId(k);
65  if (prev != v) {
66  NeighbourV->Add(v);
67  prev = v;
68  }
69  k += 1;
70  }
71 }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:400
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:408
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602

Here is the call graph for this function:

Here is the caller graph for this function: