9 for(
int64 i = 0; i < Vocab.
Len(); i++) { Vocab[i] = 0; }
11 for(
int j = 0; j < WalksVV.
GetYDim(); j++) {
12 Vocab[WalksVV(i,j)]++;
19 double TrainWordsPow = 0;
22 for (
int64 i = 0; i < Vocab.
Len(); i++) {
24 TrainWordsPow += ProbV[i];
28 for (
int64 i = 0; i < ProbV.Len(); i++) {
29 ProbV[i] /= TrainWordsPow;
33 for (
int64 i = 0; i < ProbV.Len(); i++) {
34 UTable[i] = ProbV[i] * ProbV.
Len();
35 if ( UTable[i] < 1 ) {
41 while(UnderV.
Len() > 0 && OverV.
Len() > 0) {
46 KTable[Small] = Large;
47 UTable[Large] = UTable[Large] + UTable[Small] - 1;
48 if (UTable[Large] < 1) {
59 return Y < UTable[X] ? X : KTable[X];
66 for (
int j = 0; j < SynNeg.
GetYDim(); j++) {
76 for (
int j = 0; j < SynPos.
GetYDim(); j++) {
77 SynPos(i,j) =(Rnd.
GetUniDev()-0.5)/Dimensions;
85 TFltV Neu1V(Dimensions);
86 TFltV Neu1eV(Dimensions);
89 for (
int j = 0; j < WalksVV.
GetYDim(); j++) { WalkV[j] = WalksVV(CurrWalk,j); }
90 for (
int64 WordI=0; WordI<WalkV.Len(); WordI++) {
91 if ( WordCntAll%10000 == 0 ) {
93 printf(
"\rLearning Progress: %.2lf%% ",(
double)WordCntAll*100/(
double)(Iter*AllWords));
96 Alpha =
StartAlpha * (1 - WordCntAll /
static_cast<double>(Iter * AllWords + 1));
99 int64 Word = WalkV[WordI];
100 for (
int i = 0; i < Dimensions; i++) {
105 for (
int a = Offset; a < WinSize * 2 + 1 - Offset; a++) {
106 if (a == WinSize) {
continue; }
107 int64 CurrWordI = WordI - WinSize + a;
108 if (CurrWordI < 0){
continue; }
109 if (CurrWordI >= WalkV.Len()){
continue; }
110 int64 CurrWord = WalkV[CurrWordI];
111 for (
int i = 0; i < Dimensions; i++) { Neu1eV[i] = 0; }
113 for (
int j = 0; j <
NegSamN+1; j++) {
120 if (Target == Word) {
continue; }
124 for (
int i = 0; i < Dimensions; i++) {
125 Product += SynPos(CurrWord,i) * SynNeg(Target,i);
128 if (Product >
MaxExp) { Grad = (Label - 1) * Alpha; }
129 else if (Product < -
MaxExp) { Grad = Label * Alpha; }
132 Grad = (Label - 1 + 1 / (1 + Exp)) * Alpha;
134 for (
int i = 0; i < Dimensions; i++) {
135 Neu1eV[i] += Grad * SynNeg(Target,i);
136 SynNeg(Target,i) += Grad * SynPos(CurrWord,i);
139 for (
int i = 0; i < Dimensions; i++) {
140 SynPos(CurrWord,i) += Neu1eV[i];
149 int& Iter,
bool& Verbose,
TIntFltVH& EmbeddingsHV) {
154 for (
int i = 0; i < WalksVV.
GetXDim(); i++) {
156 if ( RnmH.
IsKey(WalksVV(i, j)) ) {
157 WalksVV(i, j) = RnmH.
GetDat(WalksVV(i, j));
159 RnmH.
AddDat(WalksVV(i,j),NNodes);
160 RnmBackH.
AddDat(NNodes,WalksVV(i, j));
161 WalksVV(i, j) = NNodes++;
167 TIntV KTable(NNodes);
168 TFltV UTable(NNodes);
171 TRnd Rnd(time(NULL));
177 #pragma omp parallel for schedule(dynamic)
182 int64 WordCntAll = 0;
185 for (
int j = 0; j < Iter; j++) {
186 #pragma omp parallel for schedule(dynamic)
188 TrainModel(WalksVV, Dimensions, WinSize, Iter, Verbose, KTable, UTable,
189 WordCntAll, ExpTable, Alpha, i, Rnd, SynNeg, SynPos);
192 if (Verbose) { printf(
"\n"); fflush(stdout); }
195 for (
int j = 0; j < SynPos.
GetYDim(); j++) { CurrV[j] = SynPos(i, j); }
void InitPosEmb(TIntV &Vocab, int &Dimensions, TRnd &Rnd, TVVec< TFlt, int64 > &SynPos)
void InitUnigramTable(TIntV &Vocab, TIntV &KTable, TFltV &UTable)
TSizeTy Len() const
Returns the number of elements in the vector.
const TDat & GetDat(const TKey &Key) const
static double Power(const double &Base, const double &Exponent)
const TVal & Last() const
Returns a reference to the last element of the vector.
void TrainModel(TVVec< TInt, int64 > &WalksVV, int &Dimensions, int &WinSize, int &Iter, bool &Verbose, TIntV &KTable, TFltV &UTable, int64 &WordCntAll, TFltV &ExpTable, double &Alpha, int64 CurrWalk, TRnd &Rnd, TVVec< TFlt, int64 > &SynNeg, TVVec< TFlt, int64 > &SynPos)
void LearnEmbeddings(TVVec< TInt, int64 > &WalksVV, int &Dimensions, int &WinSize, int &Iter, bool &Verbose, TIntFltVH &EmbeddingsHV)
Learns embeddings using SGD, Skip-gram with negative sampling.
void InitNegEmb(TIntV &Vocab, int &Dimensions, TVVec< TFlt, int64 > &SynNeg)
const int ExpTablePrecision
void LearnVocab(TVVec< TInt, int64 > &WalksVV, TIntV &Vocab)
int GetUniDevInt(const int &Range=0)
int64 RndUnigramInt(TIntV &KTable, TFltV &UTable, TRnd &Rnd)
bool IsKey(const TKey &Key) const
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
void DelLast()
Removes the last element of the vector.
TDat & AddDat(const TKey &Key)