This graph shows which files directly or indirectly include this file:

Functions
void	LearnEmbeddings (TVVec< TInt, int64 > &WalksVV, int &Dimensions, int &WinSize, int &Iter, bool &Verbose, TIntFltVH &EmbeddingsHV)
	Learns embeddings using SGD, Skip-gram with negative sampling. More...

Variables
const int	MaxExp = 6

const int	ExpTablePrecision = 10000

const int	TableSize = MaxExpExpTablePrecision2

const int	NegSamN = 5

const double	StartAlpha = 0.025

Function Documentation

void LearnEmbeddings	(	TVVec< TInt, int64 > &	WalksVV,
		int &	Dimensions,
		int &	WinSize,
		int &	Iter,
		bool &	Verbose,
		TIntFltVH &	EmbeddingsHV
	)

Learns embeddings using SGD, Skip-gram with negative sampling.

Definition at line 148 of file word2vec.cpp.

References THash< TKey, TDat, THashFunc >::AddDat(), TMath::E, ExpTablePrecision, THash< TKey, TDat, THashFunc >::GetDat(), TVVec< TVal, TSizeTy >::GetXDim(), TVVec< TVal, TSizeTy >::GetYDim(), InitNegEmb(), InitPosEmb(), InitUnigramTable(), THash< TKey, TDat, THashFunc >::IsKey(), LearnVocab(), MaxExp, TMath::Power(), StartAlpha, TableSize, and TrainModel().

Referenced by node2vec().

                                                     {
   TIntIntH RnmH;
   TIntIntH RnmBackH;
   int64 NNodes = 0;
   //renaming nodes into consecutive numbers
   for (int i = 0; i < WalksVV.GetXDim(); i++) {
     for (int64 j = 0; j < WalksVV.GetYDim(); j++) {
       if ( RnmH.IsKey(WalksVV(i, j)) ) {
         WalksVV(i, j) = RnmH.GetDat(WalksVV(i, j));
       } else {
         RnmH.AddDat(WalksVV(i,j),NNodes);
         RnmBackH.AddDat(NNodes,WalksVV(i, j));
         WalksVV(i, j) = NNodes++;
       }
     }
   }
   TIntV Vocab(NNodes);
   LearnVocab(WalksVV, Vocab);
   TIntV KTable(NNodes);
   TFltV UTable(NNodes);
   TVVec<TFlt, int64> SynNeg;
   TVVec<TFlt, int64> SynPos;
   TRnd Rnd(time(NULL));
   InitPosEmb(Vocab, Dimensions, Rnd, SynPos);
   InitNegEmb(Vocab, Dimensions, SynNeg);
   InitUnigramTable(Vocab, KTable, UTable);
   TFltV ExpTable(TableSize);
   double Alpha = StartAlpha;                              //learning rate
 #pragma omp parallel for schedule(dynamic)
   for (int i = 0; i < TableSize; i++ ) {
     double Value = -MaxExp + static_cast<double>(i) / static_cast<double>(ExpTablePrecision);
     ExpTable[i] = TMath::Power(TMath::E, Value);
   }
   int64 WordCntAll = 0;
 // op RS 2016/09/26, collapse does not compile on Mac OS X
 //#pragma omp parallel for schedule(dynamic) collapse(2)
   for (int j = 0; j < Iter; j++) {
 #pragma omp parallel for schedule(dynamic)
     for (int64 i = 0; i < WalksVV.GetXDim(); i++) {
       TrainModel(WalksVV, Dimensions, WinSize, Iter, Verbose, KTable, UTable,
        WordCntAll, ExpTable, Alpha, i, Rnd, SynNeg, SynPos); 
     }
   }
   if (Verbose) { printf("\n"); fflush(stdout); }
   for (int64 i = 0; i < SynPos.GetXDim(); i++) {
     TFltV CurrV(SynPos.GetYDim());
     for (int j = 0; j < SynPos.GetYDim(); j++) { CurrV[j] = SynPos(i, j); }
     EmbeddingsHV.AddDat(RnmBackH.GetDat(i), CurrV);
   }
 }