SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
word2vec.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void LearnEmbeddings (TVVec< TInt, int64 > &WalksVV, int &Dimensions, int &WinSize, int &Iter, bool &Verbose, TIntFltVH &EmbeddingsHV)
 Learns embeddings using SGD, Skip-gram with negative sampling. More...
 

Variables

const int MaxExp = 6
 
const int ExpTablePrecision = 10000
 
const int TableSize = MaxExp*ExpTablePrecision*2
 
const int NegSamN = 5
 
const double StartAlpha = 0.025
 

Function Documentation

void LearnEmbeddings ( TVVec< TInt, int64 > &  WalksVV,
int &  Dimensions,
int &  WinSize,
int &  Iter,
bool &  Verbose,
TIntFltVH EmbeddingsHV 
)

Learns embeddings using SGD, Skip-gram with negative sampling.

Definition at line 148 of file word2vec.cpp.

References THash< TKey, TDat, THashFunc >::AddDat(), TMath::E, ExpTablePrecision, THash< TKey, TDat, THashFunc >::GetDat(), TVVec< TVal, TSizeTy >::GetXDim(), TVVec< TVal, TSizeTy >::GetYDim(), InitNegEmb(), InitPosEmb(), InitUnigramTable(), THash< TKey, TDat, THashFunc >::IsKey(), LearnVocab(), MaxExp, TMath::Power(), StartAlpha, TableSize, and TrainModel().

Referenced by node2vec().

149  {
150  TIntIntH RnmH;
151  TIntIntH RnmBackH;
152  int64 NNodes = 0;
153  //renaming nodes into consecutive numbers
154  for (int i = 0; i < WalksVV.GetXDim(); i++) {
155  for (int64 j = 0; j < WalksVV.GetYDim(); j++) {
156  if ( RnmH.IsKey(WalksVV(i, j)) ) {
157  WalksVV(i, j) = RnmH.GetDat(WalksVV(i, j));
158  } else {
159  RnmH.AddDat(WalksVV(i,j),NNodes);
160  RnmBackH.AddDat(NNodes,WalksVV(i, j));
161  WalksVV(i, j) = NNodes++;
162  }
163  }
164  }
165  TIntV Vocab(NNodes);
166  LearnVocab(WalksVV, Vocab);
167  TIntV KTable(NNodes);
168  TFltV UTable(NNodes);
169  TVVec<TFlt, int64> SynNeg;
170  TVVec<TFlt, int64> SynPos;
171  TRnd Rnd(time(NULL));
172  InitPosEmb(Vocab, Dimensions, Rnd, SynPos);
173  InitNegEmb(Vocab, Dimensions, SynNeg);
174  InitUnigramTable(Vocab, KTable, UTable);
175  TFltV ExpTable(TableSize);
176  double Alpha = StartAlpha; //learning rate
177 #pragma omp parallel for schedule(dynamic)
178  for (int i = 0; i < TableSize; i++ ) {
179  double Value = -MaxExp + static_cast<double>(i) / static_cast<double>(ExpTablePrecision);
180  ExpTable[i] = TMath::Power(TMath::E, Value);
181  }
182  int64 WordCntAll = 0;
183 // op RS 2016/09/26, collapse does not compile on Mac OS X
184 //#pragma omp parallel for schedule(dynamic) collapse(2)
185  for (int j = 0; j < Iter; j++) {
186 #pragma omp parallel for schedule(dynamic)
187  for (int64 i = 0; i < WalksVV.GetXDim(); i++) {
188  TrainModel(WalksVV, Dimensions, WinSize, Iter, Verbose, KTable, UTable,
189  WordCntAll, ExpTable, Alpha, i, Rnd, SynNeg, SynPos);
190  }
191  }
192  if (Verbose) { printf("\n"); fflush(stdout); }
193  for (int64 i = 0; i < SynPos.GetXDim(); i++) {
194  TFltV CurrV(SynPos.GetYDim());
195  for (int j = 0; j < SynPos.GetYDim(); j++) { CurrV[j] = SynPos(i, j); }
196  EmbeddingsHV.AddDat(RnmBackH.GetDat(i), CurrV);
197  }
198 }
Definition: dt.h:11
void InitPosEmb(TIntV &Vocab, int &Dimensions, TRnd &Rnd, TVVec< TFlt, int64 > &SynPos)
Definition: word2vec.cpp:73
void InitUnigramTable(TIntV &Vocab, TIntV &KTable, TFltV &UTable)
Definition: word2vec.cpp:18
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
const int MaxExp
Definition: word2vec.h:9
Definition: ds.h:2222
TSizeTy GetYDim() const
Definition: ds.h:2250
static double Power(const double &Base, const double &Exponent)
Definition: xmath.h:25
void TrainModel(TVVec< TInt, int64 > &WalksVV, int &Dimensions, int &WinSize, int &Iter, bool &Verbose, TIntV &KTable, TFltV &UTable, int64 &WordCntAll, TFltV &ExpTable, double &Alpha, int64 CurrWalk, TRnd &Rnd, TVVec< TFlt, int64 > &SynNeg, TVVec< TFlt, int64 > &SynPos)
Definition: word2vec.cpp:82
void InitNegEmb(TIntV &Vocab, int &Dimensions, TVVec< TFlt, int64 > &SynNeg)
Definition: word2vec.cpp:63
long long int64
Definition: bd.h:27
const double StartAlpha
Definition: word2vec.h:19
TSizeTy GetXDim() const
Definition: ds.h:2249
Definition: hash.h:97
const int ExpTablePrecision
Definition: word2vec.h:12
void LearnVocab(TVVec< TInt, int64 > &WalksVV, TIntV &Vocab)
Definition: word2vec.cpp:8
const int TableSize
Definition: word2vec.h:13
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
static double E
Definition: xmath.h:7

Here is the call graph for this function:

Here is the caller graph for this function:

Variable Documentation

const int ExpTablePrecision = 10000

Definition at line 12 of file word2vec.h.

Referenced by LearnEmbeddings(), and TrainModel().

const int NegSamN = 5

Definition at line 16 of file word2vec.h.

Referenced by TrainModel().

const double StartAlpha = 0.025

Definition at line 19 of file word2vec.h.

Referenced by LearnEmbeddings(), and TrainModel().

const int TableSize = MaxExp*ExpTablePrecision*2

Definition at line 13 of file word2vec.h.

Referenced by LearnEmbeddings(), and TrainModel().