SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
word2vec.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void LearnEmbeddings (TVVec< TInt, int64 > &WalksVV, const int &Dimensions, const int &WinSize, const int &Iter, const bool &Verbose, TIntFltVH &EmbeddingsHV)
 Learns embeddings using SGD, Skip-gram with negative sampling. More...
 

Variables

const int MaxExp = 6
 
const int ExpTablePrecision = 10000
 
const int TableSize = MaxExp*ExpTablePrecision*2
 
const int NegSamN = 5
 
const double StartAlpha = 0.025
 

Function Documentation

void LearnEmbeddings ( TVVec< TInt, int64 > &  WalksVV,
const int &  Dimensions,
const int &  WinSize,
const int &  Iter,
const bool &  Verbose,
TIntFltVH EmbeddingsHV 
)

Learns embeddings using SGD, Skip-gram with negative sampling.

Definition at line 160 of file word2vec.cpp.

References THash< TKey, TDat, THashFunc >::AddDat(), TMath::E, ExpTablePrecision, THash< TKey, TDat, THashFunc >::GetDat(), TVVec< TVal, TSizeTy >::GetXDim(), TVVec< TVal, TSizeTy >::GetYDim(), InitNegEmb(), InitPosEmb(), InitUnigramTable(), THash< TKey, TDat, THashFunc >::IsKey(), LearnVocab(), MaxExp, TMath::Power(), StartAlpha, TableSize, and TrainModel().

Referenced by node2vec().

162  {
163  TIntIntH RnmH;
164  TIntIntH RnmBackH;
165  int64 NNodes = 0;
166  //renaming nodes into consecutive numbers
167  for (int i = 0; i < WalksVV.GetXDim(); i++) {
168  for (int64 j = 0; j < WalksVV.GetYDim(); j++) {
169  if ( RnmH.IsKey(WalksVV(i, j)) ) {
170  WalksVV(i, j) = RnmH.GetDat(WalksVV(i, j));
171  } else {
172  RnmH.AddDat(WalksVV(i,j),NNodes);
173  RnmBackH.AddDat(NNodes,WalksVV(i, j));
174  WalksVV(i, j) = NNodes++;
175  }
176  }
177  }
178  TIntV Vocab(NNodes);
179  LearnVocab(WalksVV, Vocab);
180  TIntV KTable(NNodes);
181  TFltV UTable(NNodes);
182  TVVec<TFlt, int64> SynNeg;
183  TVVec<TFlt, int64> SynPos;
184  TRnd Rnd(time(NULL));
185  InitPosEmb(Vocab, Dimensions, Rnd, SynPos);
186  InitNegEmb(Vocab, Dimensions, SynNeg);
187  InitUnigramTable(Vocab, KTable, UTable);
188  TFltV ExpTable(TableSize);
189  double Alpha = StartAlpha; //learning rate
190 #pragma omp parallel for schedule(dynamic)
191  for (int i = 0; i < TableSize; i++ ) {
192  double Value = -MaxExp + static_cast<double>(i) / static_cast<double>(ExpTablePrecision);
193  ExpTable[i] = TMath::Power(TMath::E, Value);
194  }
195  int64 WordCntAll = 0;
196 // op RS 2016/09/26, collapse does not compile on Mac OS X
197 //#pragma omp parallel for schedule(dynamic) collapse(2)
198  for (int j = 0; j < Iter; j++) {
199 #pragma omp parallel for schedule(dynamic)
200  for (int64 i = 0; i < WalksVV.GetXDim(); i++) {
201  TrainModel(WalksVV, Dimensions, WinSize, Iter, Verbose, KTable, UTable,
202  WordCntAll, ExpTable, Alpha, i, Rnd, SynNeg, SynPos);
203  }
204  }
205  if (Verbose) { printf("\n"); fflush(stdout); }
206  for (int64 i = 0; i < SynPos.GetXDim(); i++) {
207  TFltV CurrV(SynPos.GetYDim());
208  for (int j = 0; j < SynPos.GetYDim(); j++) { CurrV[j] = SynPos(i, j); }
209  EmbeddingsHV.AddDat(RnmBackH.GetDat(i), CurrV);
210  }
211 }
Definition: dt.h:11
void InitUnigramTable(TIntV &Vocab, TIntV &KTable, TFltV &UTable)
Definition: word2vec.cpp:18
void InitNegEmb(TIntV &Vocab, const int &Dimensions, TVVec< TFlt, int64 > &SynNeg)
Definition: word2vec.cpp:73
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
const int MaxExp
Definition: word2vec.h:10
Definition: ds.h:2223
TSizeTy GetYDim() const
Definition: ds.h:2251
static double Power(const double &Base, const double &Exponent)
Definition: xmath.h:25
void TrainModel(TVVec< TInt, int64 > &WalksVV, const int &Dimensions, const int &WinSize, const int &Iter, const bool &Verbose, TIntV &KTable, TFltV &UTable, int64 &WordCntAll, TFltV &ExpTable, double &Alpha, int64 CurrWalk, TRnd &Rnd, TVVec< TFlt, int64 > &SynNeg, TVVec< TFlt, int64 > &SynPos)
Definition: word2vec.cpp:92
long long int64
Definition: bd.h:27
const double StartAlpha
Definition: word2vec.h:20
TSizeTy GetXDim() const
Definition: ds.h:2250
Definition: hash.h:97
const int ExpTablePrecision
Definition: word2vec.h:13
void LearnVocab(TVVec< TInt, int64 > &WalksVV, TIntV &Vocab)
Definition: word2vec.cpp:8
const int TableSize
Definition: word2vec.h:14
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
static double E
Definition: xmath.h:7
void InitPosEmb(TIntV &Vocab, const int &Dimensions, TRnd &Rnd, TVVec< TFlt, int64 > &SynPos)
Definition: word2vec.cpp:83

Here is the call graph for this function:

Here is the caller graph for this function:

Variable Documentation

const int ExpTablePrecision = 10000

Definition at line 13 of file word2vec.h.

Referenced by LearnEmbeddings(), and TrainModel().

const int NegSamN = 5

Definition at line 17 of file word2vec.h.

Referenced by TrainModel().

const double StartAlpha = 0.025

Definition at line 20 of file word2vec.h.

Referenced by LearnEmbeddings(), and TrainModel().

const int TableSize = MaxExp*ExpTablePrecision*2

Definition at line 14 of file word2vec.h.

Referenced by LearnEmbeddings(), and TrainModel().