SNAP Library 2.2, Developer Reference  2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
agmattr.cpp
Go to the documentation of this file.
00001 
00002 #include "stdafx.h"
00003 #include "agmattr.h"
00004 #include "Snap.h"
00005 #include "agm.h"
00006 
00007 
00008 
00009 void TCesna::RandomInit(const int InitComs) {
00010   F.Gen(G->GetNodes());
00011   SumFV.Gen(InitComs);
00012   NumComs = InitComs;
00013   for (int u = 0; u < F.Len(); u++) {
00014     //assign to just one community
00015     int Mem = G->GetNI(u).GetDeg();
00016     if (Mem > 10) { Mem = 10; }
00017     for (int c = 0; c < Mem; c++) {
00018       int CID = Rnd.GetUniDevInt(InitComs);
00019       AddCom(u, CID, Rnd.GetUniDev());
00020     }
00021   }
00022   //assign a member to zero-member community (if any)
00023   for (int c = 0; c < SumFV.Len(); c++) {
00024     if (SumFV[c] == 0.0) {
00025       int UID = Rnd.GetUniDevInt(G->GetNodes());
00026       AddCom(UID, c, Rnd.GetUniDev());
00027     }
00028   }
00029   InitW();
00030 }
00031 
00032 void TCesna::NeighborComInit(const int InitComs) {
00033   //initialize with best neighborhood communities (Gleich et.al. KDD'12)
00034   TFltIntPrV NIdPhiV(F.Len(), 0);
00035   TCesnaUtil::GetNIdPhiV<PUNGraph>(G, NIdPhiV);
00036   NeighborComInit(NIdPhiV, InitComs);
00037 }
00038 
00039 void TCesna::NeighborComInit(TFltIntPrV& NIdPhiV, const int InitComs) {
00040   //initialize with best neighborhood communities (Gleich et.al. KDD'12)
00041   NIdPhiV.Sort(true);
00042   F.Gen(G->GetNodes());
00043   SumFV.Gen(InitComs);
00044   NumComs = InitComs;
00045   TIntSet InvalidNIDS(F.Len());
00046   TIntV ChosenNIDV(InitComs, 0); //FOR DEBUG
00047   //choose nodes with local minimum in conductance
00048   int CurCID = 0;
00049   for (int ui = 0; ui < NIdPhiV.Len(); ui++) {
00050     int UID = NIdPhiV[ui].Val2;
00051     fflush(stdout);
00052     if (InvalidNIDS.IsKey(UID)) { continue; }
00053     ChosenNIDV.Add(UID); //FOR DEBUG
00054     //add the node and its neighbors to the current community
00055     AddCom(UID, CurCID, 1.0);
00056     TUNGraph::TNodeI NI = G->GetNI(UID);
00057     fflush(stdout);
00058     for (int e = 0; e < NI.GetDeg(); e++) {
00059       AddCom(NI.GetNbrNId(e), CurCID, 1.0);
00060     }
00061     //exclude its neighbors from the next considerations
00062     for (int e = 0; e < NI.GetDeg(); e++) {
00063       InvalidNIDS.AddKey(NI.GetNbrNId(e));
00064     }
00065     CurCID++;
00066     fflush(stdout);
00067     if (CurCID >= NumComs) { break;  }
00068   }
00069   if (NumComs > CurCID) {
00070     printf("%d communities needed to fill randomly\n", NumComs - CurCID);
00071   }
00072   //assign a member to zero-member community (if any)
00073   for (int c = 0; c < SumFV.Len(); c++) {
00074     if (SumFV[c] == 0.0) {
00075       int ComSz = 10;
00076       for (int u = 0; u < ComSz; u++) {
00077         int UID = Rnd.GetUniDevInt(G->GetNodes());
00078         AddCom(UID, c, Rnd.GetUniDev());
00079       }
00080     }
00081   }
00082   InitW();
00083 }
00084 
00085 void TCesna::SetCmtyVV(const TVec<TIntV>& CmtyVV) {
00086   F.Gen(G->GetNodes());
00087   SumFV.Gen(CmtyVV.Len());
00088   NumComs = CmtyVV.Len();
00089   InitW();
00090   for (int c = 0; c < CmtyVV.Len(); c++) {
00091     for (int u = 0; u < CmtyVV[c].Len(); u++) {
00092       int UID = CmtyVV[c][u];
00093       if (! NIDToIdx.IsKey(UID)) { continue; }
00094       AddCom(NIDToIdx.GetKeyId(UID), c, 1.0);
00095     }
00096   }
00097 }
00098 
00099 void TCesna::SetGraph(const PUNGraph& GraphPt, const THash<TInt, TIntV>& NIDAttrH) {
00100   HOVIDSV.Gen(GraphPt->GetNodes());  
00101   HOKIDSV.Gen(GraphPt->GetNodes());  
00102   X.Gen(GraphPt->GetNodes());
00103   TIntV NIDV;
00104   GraphPt->GetNIdV(NIDV);
00105   NIDToIdx.Gen(NIDV.Len());
00106   NIDToIdx.AddKeyV(NIDV);
00107   // check that nodes IDs are {0,1,..,Nodes-1}
00108   printf("rearrage nodes\n");
00109   G = TSnap::GetSubGraph(GraphPt, NIDV, true);
00110   for (int nid = 0; nid < G->GetNodes(); nid++) {
00111     IAssert(G->IsNode(nid)); 
00112   }
00113   TSnap::DelSelfEdges(G);
00114   
00115   PNoCom = 1.0 / (double) G->GetNodes();
00116   DoParallel = false;
00117   if (1.0 / PNoCom > sqrt(TFlt::Mx)) { PNoCom = 0.99 / sqrt(TFlt::Mx); } // to prevent overflow
00118   NegWgt = 1.0;
00119 
00120   // add node attributes, find number of attributes
00121   int NumAttr = 0;
00122   for (int u = 0; u < NIDAttrH.Len(); u++) {
00123     int UID = NIDAttrH.GetKey(u);
00124     if (! NIDToIdx.IsKey(UID)) { continue; }
00125     X[NIDToIdx.GetKeyId(UID)].Gen(NIDAttrH[u].Len());
00126     for (int k = 0; k < NIDAttrH[u].Len(); k++) {
00127       int KID = NIDAttrH[u][k];
00128       IAssert (KID >= 0);
00129       X[NIDToIdx.GetKeyId(UID)].AddKey(KID);
00130       if (NumAttr < KID + 1) { NumAttr = KID + 1; } 
00131     }
00132   }
00133   Attrs = NumAttr;
00134   InitW();
00135 }
00136 
00137 double TCesna::Likelihood(const bool _DoParallel) { 
00138   TExeTm ExeTm;
00139   double L = 0.0;
00140   if (_DoParallel) {
00141   #pragma omp parallel for 
00142     for (int u = 0; u < F.Len(); u++) {
00143       double LU = LikelihoodForRow(u);
00144       #pragma omp atomic
00145         L += LU;
00146     }
00147   }
00148   else {
00149     for (int u = 0; u < F.Len(); u++) {
00150       double LU = LikelihoodForRow(u);
00151         L += LU;
00152     }
00153   }
00154   return L;
00155 }
00156 
00157 double TCesna::LikelihoodForRow(const int UID) {
00158   return LikelihoodForRow(UID, F[UID]);
00159 }
00160 
00161 double TCesna::LikelihoodForRow(const int UID, const TIntFltH& FU) {
00162   double L = 0.0;
00163   TFltV HOSumFV; //adjust for Fv of v hold out
00164   if (HOVIDSV[UID].Len() > 0) {
00165     HOSumFV.Gen(SumFV.Len());
00166     
00167     for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
00168       for (int c = 0; c < SumFV.Len(); c++) {
00169         HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
00170       }
00171     }
00172   }
00173 
00174   TUNGraph::TNodeI NI = G->GetNI(UID);
00175   for (int e = 0; e < NI.GetDeg(); e++) {
00176     int v = NI.GetNbrNId(e);
00177     if (v == UID) { continue; }
00178     if (HOVIDSV[UID].IsKey(v)) { continue; }
00179     L += log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]);
00180   }
00181   for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
00182     double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
00183     L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat();
00184   }
00185   //add regularization
00186   if (RegCoef > 0.0) { //L1
00187     L -= RegCoef * Sum(FU);
00188   }
00189   if (RegCoef < 0.0) { //L2
00190     L += RegCoef * Norm2(FU);
00191   }
00192   L *= (1.0 - WeightAttr);
00193   // add attribute part
00194   for (int k = 0; k < Attrs; k++) {
00195     if (HOKIDSV[UID].IsKey(k)) { continue; }
00196     L += WeightAttr * LikelihoodAttrKForRow(UID, k, FU);
00197   }
00198   return L;
00199 }
00200 
00201 
00202 double TCesna::LikelihoodAttrKForRow(const int UID, const int K, const TIntFltH& FU, const TFltV& WK) {
00203   double Prob = PredictAttrK(FU, WK);
00204   double L = 0.0;
00205   if (GetAttr(UID, K)) { 
00206     L = Prob == 0.0? -100.0: log(Prob);
00207   } else {
00208     L = Prob == 1.0? -100.0: log(1.0 - Prob);
00209   }
00210   return L;
00211 }
00212 
00213 void TCesna::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) {
00214   GradU.Gen(CIDSet.Len());
00215   TFltV HOSumFV; //adjust for Fv of v hold out
00216   if (HOVIDSV[UID].Len() > 0) {
00217     HOSumFV.Gen(SumFV.Len());
00218     
00219     for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
00220       for (int c = 0; c < SumFV.Len(); c++) {
00221         HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
00222       }
00223     }
00224   }
00225   TUNGraph::TNodeI NI = G->GetNI(UID);
00226   int Deg = NI.GetDeg();
00227   TFltV PredV(Deg), GradV(CIDSet.Len());
00228   TIntV CIDV(CIDSet.Len());
00229   for (int e = 0; e < Deg; e++) {
00230     if (NI.GetNbrNId(e) == UID) { continue; }
00231     if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; }
00232     PredV[e] = Prediction(UID, NI.GetNbrNId(e));
00233   }
00234   
00235   for (int c = 0; c < CIDSet.Len(); c++) {
00236     int CID = CIDSet.GetKey(c);
00237     double Val = 0.0;
00238     for (int e = 0; e < Deg; e++) {
00239       int VID = NI.GetNbrNId(e);
00240       if (VID == UID) { continue; }
00241       if (HOVIDSV[UID].IsKey(VID)) { continue; }
00242       Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID);
00243     }
00244     double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
00245     Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID));
00246     CIDV[c] = CID;
00247     GradV[c] = Val;
00248   }
00249   //add regularization
00250   if (RegCoef > 0.0) { //L1
00251     for (int c = 0; c < GradV.Len(); c++) {
00252       GradV[c] -= RegCoef; 
00253     }
00254   }
00255   if (RegCoef < 0.0) { //L2
00256     for (int c = 0; c < GradV.Len(); c++) {
00257       GradV[c] += 2 * RegCoef * GetCom(UID, CIDV[c]); 
00258     }
00259   }
00260   for (int c = 0; c < GradV.Len(); c++) {
00261     GradV[c] *= (1.0 - WeightAttr);
00262   }
00263   //add attribute part
00264   TFltV AttrPredV(Attrs);
00265   for (int k = 0; k < Attrs; k++) {
00266     if (HOKIDSV[UID].IsKey(k)) { continue; }
00267     AttrPredV[k] = PredictAttrK(F[UID], W[k]);
00268   }
00269   for (int c = 0; c < GradV.Len(); c++) {
00270     for (int k = 0; k < Attrs; k++) {
00271       if (HOKIDSV[UID].IsKey(k)) { continue; }
00272       GradV[c] += WeightAttr * (GetAttr(UID, k) - AttrPredV[k]) * GetW(CIDV[c], k);
00273     }
00274   }
00275 
00276 
00277   for (int c = 0; c < GradV.Len(); c++) {
00278     if (GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
00279     if (fabs(GradV[c]) < 0.0001) { continue; }
00280     GradU.AddDat(CIDV[c], GradV[c]);
00281   }
00282   for (int c = 0; c < GradU.Len(); c++) {
00283     if (GradU[c] >= 10) { GradU[c] = 10; }
00284     if (GradU[c] <= -10) { GradU[c] = -10; }
00285     IAssert(GradU[c] >= -10);
00286   }
00287 }
00288 
00289 void TCesna::GetCmtyVV(TVec<TIntV>& CmtyVV) {
00290   TVec<TFltV> TmpV;
00291   GetCmtyVV(CmtyVV, TmpV, sqrt(2.0 * (double) G->GetEdges() / G->GetNodes() / G->GetNodes()), 3);
00292 }
00293 
00294 
00296 void TCesna::GetCmtyVV(TVec<TIntV>& CmtyVV, TVec<TFltV>& Wck, const double Thres, const int MinSz) {
00297   CmtyVV.Gen(NumComs, 0);
00298   Wck.Gen(NumComs, 0);
00299   TIntFltH CIDSumFH(NumComs);
00300   for (int c = 0; c < SumFV.Len(); c++) {
00301     CIDSumFH.AddDat(c, SumFV[c]);
00302   }
00303   CIDSumFH.SortByDat(false);
00304   for (int c = 0; c < NumComs; c++) {
00305     int CID = CIDSumFH.GetKey(c);
00306     TIntFltH NIDFucH(F.Len() / 10);
00307     TIntV CmtyV;
00308     IAssert(SumFV[CID] == CIDSumFH.GetDat(CID));
00309     if (SumFV[CID] < Thres) { continue; }
00310     for (int u = 0; u < F.Len(); u++) {
00311       int NID = NIDToIdx[u];
00312       if (GetCom(u, CID) >= Thres) { NIDFucH.AddDat(NID, GetCom(u, CID)); }
00313     }
00314     NIDFucH.SortByDat(false);
00315     NIDFucH.GetKeyV(CmtyV);
00316     if (CmtyV.Len() < MinSz) { continue; }
00317     CmtyVV.Add(CmtyV); 
00318     TFltV WV(Attrs);
00319     for (int k = 0; k < Attrs; k++) {
00320       WV[k] = GetW(CID, k);
00321     }
00322     Wck.Add(WV);
00323   }
00324   if ( NumComs != CmtyVV.Len()) {
00325     printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
00326   }
00327 }
00328 
00329 void TCesna::GetCmtyVVUnSorted(TVec<TIntV>& CmtyVV) {
00330   GetCmtyVVUnSorted(CmtyVV, sqrt(2.0 * (double) G->GetEdges() / G->GetNodes() / G->GetNodes()), 3);
00331 }
00332 
00333 void TCesna::GetCmtyVVUnSorted(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) {
00334   CmtyVV.Gen(NumComs, 0);
00335   for (int c = 0; c < NumComs; c++) {
00336     TIntV CmtyV;
00337     for (int u = 0; u < G->GetNodes(); u++) {
00338       if (GetCom(u, c) > Thres) { CmtyV.Add(NIDToIdx[u]); }
00339     }
00340     if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
00341   }
00342   if ( NumComs != CmtyVV.Len()) {
00343     printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
00344   }
00345 }
00346 
00348 int TCesna::FindComs(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const bool UseBIC, const double HOFrac, const double StepAlpha, const double StepBeta) {
00349     double ComsGap = exp(TMath::Log((double) MaxComs / (double) MinComs) / (double) DivComs);
00350     TIntV ComsV;
00351     ComsV.Add(MinComs);
00352     while (ComsV.Len() < DivComs) {
00353       int NewComs = int(ComsV.Last() * ComsGap);
00354       if (NewComs == ComsV.Last().Val) { NewComs++; }
00355       ComsV.Add(NewComs);
00356     }
00357     if (ComsV.Last() < MaxComs) { ComsV.Add(MaxComs); }
00358     return FindComs(ComsV, UseBIC, HOFrac, NumThreads, OutFNm, StepAlpha, StepBeta);
00359 }
00360 
00361 int TCesna::FindComs(TIntV& ComsV, const bool UseBIC, const double HOFrac, const int NumThreads, const TStr PlotLFNm, const double StepAlpha, const double StepBeta) {
00362   if (ComsV.Len() == 0) {
00363     int MaxComs = G->GetNodes() / 5;
00364     ComsV.Add(2);
00365     while(ComsV.Last() < MaxComs) { ComsV.Add(ComsV.Last() * 2); }
00366   }
00367   int MaxIterCV = 3;
00368 
00369   TVec<TVec<TIntSet> > HoldOutSets(MaxIterCV), HoldOutSetsAttr(MaxIterCV);
00370   TFltIntPrV NIdPhiV;
00371   TCesnaUtil::GetNIdPhiV<PUNGraph>(G, NIdPhiV);
00372   if (! UseBIC) { //if edges are many enough, use CV
00373     //printf("generating hold out set\n");
00374     TIntV NIdV1, NIdV2;
00375     G->GetNIdV(NIdV1);
00376     G->GetNIdV(NIdV2);
00377     for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
00378       // generate holdout sets
00379       TCesnaUtil::GenHoldOutPairs(G, HoldOutSets[IterCV], HOFrac, Rnd);
00380       GenHoldOutAttr(HOFrac, HoldOutSetsAttr[IterCV]);
00381     }
00382     //printf("hold out set generated\n");
00383   }
00384 
00385   TFltV HOLV(ComsV.Len());
00386   TIntFltPrV ComsLV;
00387   for (int c = 0; c < ComsV.Len(); c++) {
00388     const int Coms = ComsV[c];
00389     //printf("Try number of Coms:%d\n", Coms);
00390 
00391     if (! UseBIC) { //if edges are many enough, use CV
00392       for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
00393         HOVIDSV = HoldOutSets[IterCV];
00394         HOKIDSV = HoldOutSetsAttr[IterCV];
00395         NeighborComInit(NIdPhiV, Coms);
00396         //printf("Initialized\n");
00397 
00398         if (NumThreads == 1) {
00399           //printf("MLE without parallelization begins\n");
00400           MLEGradAscent(0.01, 100 * G->GetNodes(), "", StepAlpha, StepBeta);
00401           //printf("likelihood: train:%f, attr:%f, hold:%f\n", Likelihood(), LikelihoodAttr(), LikelihoodHoldOut());
00402         } else {
00403           //printf("MLE with parallelization begins\n");
00404           MLEGradAscentParallel(0.01, 100, NumThreads, "", StepAlpha, StepBeta);
00405         }
00406         double HOL = LikelihoodHoldOut();
00407         HOL = HOL < 0? HOL: TFlt::Mn;
00408         HOLV[c] += HOL;
00409       }
00410     }
00411     else {
00412       HOVIDSV.Gen(G->GetNodes());
00413       HOKIDSV.Gen(G->GetNodes());
00414       if (NumThreads == 1) {
00415         MLEGradAscent(0.005, 100 * G->GetNodes(), "", StepAlpha, StepBeta);
00416         printf("likelihood: train:%f, attr:%f, hold:%f\n", Likelihood(), LikelihoodAttr(), LikelihoodHoldOut());
00417       } else {
00418         MLEGradAscentParallel(0.005, 100, NumThreads, "", StepAlpha, StepBeta);
00419       }
00420       //int NumParams = G->GetNodes() * Coms + Coms * Attrs;
00421       double NumParams = (1.0 - WeightAttr) * Coms + WeightAttr * Coms * Attrs;
00422       double Observations = (1.0 - WeightAttr) * G->GetNodes() * (G->GetNodes() - 1) / 2 + WeightAttr * G->GetNodes() * Attrs;
00423       double BIC = 2 * Likelihood() - NumParams * log (Observations);
00424       HOLV[c] = BIC;
00425     }
00426   }
00427   int EstComs = 2;
00428   double MaxL = TFlt::Mn;
00429   if (UseBIC) {
00430     printf("Number of communities vs likelihood (criterion: BIC)\n");
00431   } else {
00432     printf("Number of communities vs likelihood (criterion: Cross validation)\n");
00433   }
00434   for (int c = 0; c < ComsV.Len(); c++) {
00435     ComsLV.Add(TIntFltPr(ComsV[c].Val, HOLV[c].Val));
00436     printf("%d(%f)\t", ComsV[c].Val, HOLV[c].Val);
00437     if (MaxL < HOLV[c]) {
00438       MaxL = HOLV[c];
00439       EstComs = ComsV[c];
00440     }
00441   }
00442   printf("\n");
00443   RandomInit(EstComs);
00444   HOVIDSV.Gen(G->GetNodes());
00445   HOKIDSV.Gen(G->GetNodes());
00446   if (! PlotLFNm.Empty()) {
00447     TGnuPlot::PlotValV(ComsLV, PlotLFNm, "hold-out likelihood", "communities", "likelihood");
00448   }
00449   return EstComs;
00450 }
00451 
00452 double TCesna::LikelihoodHoldOut() { 
00453   double L = 0.0;
00454   for (int u = 0; u < HOVIDSV.Len(); u++) {
00455     for (int e = 0; e < HOVIDSV[u].Len(); e++) {
00456       int VID = HOVIDSV[u][e];
00457       if (VID == u) { continue; } 
00458       double Pred = Prediction(u, VID);
00459       if (G->IsEdge(u, VID)) {
00460         L += log(1.0 - Pred);
00461       }
00462       else {
00463         L += NegWgt * log(Pred);
00464       }
00465       //printf("NODE %d, %d: Dot Prod: %f, Prediction: %f L:%f\n", u, VID, DotProduct(F[u], F[VID]), Pred, L);
00466     } 
00467   }
00468   L *= (1.0 - WeightAttr);
00469   //printf("likelihood for hold out network:%f\n", L);
00470   for (int u = 0; u < HOKIDSV.Len(); u++) {
00471     for (int e = 0; e < HOKIDSV[u].Len(); e++) {
00472       IAssert(HOKIDSV[u][e] < Attrs);
00473       L += WeightAttr * LikelihoodAttrKForRow(u, HOKIDSV[u][e]);
00474       //printf("asbefsafd ATTRIBUTE %d, NODE %d:%f\n", u, HOKIDSV[u][e].Val, LikelihoodAttrKForRow(u, HOKIDSV[u][e]));
00475     }
00476   }
00477   return L;
00478 }
00479 
00480 double TCesna::GetStepSizeByLineSearch(const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter) {
00481   double StepSize = 1.0;
00482   double InitLikelihood = LikelihoodForRow(UID);
00483   TIntFltH NewVarV(DeltaV.Len());
00484   for(int iter = 0; iter < MaxIter; iter++) {
00485     for (int i = 0; i < DeltaV.Len(); i++){
00486       int CID = DeltaV.GetKey(i);
00487       double NewVal = GetCom(UID, CID) + StepSize * DeltaV.GetDat(CID);
00488       if (NewVal < MinVal) { NewVal = MinVal; }
00489       if (NewVal > MaxVal) { NewVal = MaxVal; }
00490       NewVarV.AddDat(CID, NewVal);
00491     }
00492     if (LikelihoodForRow(UID, NewVarV) < InitLikelihood + Alpha * StepSize * DotProduct(GradV, DeltaV)) {
00493       StepSize *= Beta;
00494     } else {
00495       break;
00496     }
00497     if (iter == MaxIter - 1) { 
00498       StepSize = 0.0;
00499       break;
00500     }
00501   }
00502   return StepSize;
00503 }
00504 
00505 int TCesna::MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
00506   time_t InitTime = time(NULL);
00507   TExeTm ExeTm, CheckTm;
00508   int iter = 0, PrevIter = 0;
00509   TIntFltPrV IterLV;
00510   TUNGraph::TNodeI UI;
00511   double PrevL = TFlt::Mn, CurL = 0.0;
00512   TIntV NIdxV(F.Len(), 0);
00513   for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
00514   TIntFltH GradV;
00515   //consider all C
00516   TIntSet CIDSet(NumComs);
00517   for (int c = 0; c < NumComs; c++) { CIDSet.AddKey(c); }
00518   while(iter < MaxIter) {
00519     NIdxV.Shuffle(Rnd);
00520     for (int ui = 0; ui < F.Len(); ui++, iter++) {
00521       int u = NIdxV[ui]; //
00522       /*
00523       //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
00524       UI = G->GetNI(u);
00525       TIntSet CIDSet(20 * UI.GetDeg());
00526       for (int e = 0; e < UI.GetDeg(); e++) {
00527         if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; }
00528         TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)];
00529         for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
00530           CIDSet.AddKey(CI.GetKey());
00531         }
00532       }
00533       for (TIntFltH::TIter CI = F[u].BegI(); CI < F[u].EndI(); CI++) { //remove the community membership which U does not share with its neighbors
00534         if (! CIDSet.IsKey(CI.GetKey())) {
00535           DelCom(u, CI.GetKey());
00536         }
00537       }
00538       if (CIDSet.Empty()) { continue; }
00539       */
00540       
00541       GradientForRow(u, GradV, CIDSet);
00542       if (Norm2(GradV) < 1e-4) { continue; }
00543       double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta);
00544       if (LearnRate == 0.0) { continue; }
00545       for (int ci = 0; ci < GradV.Len(); ci++) {
00546         int CID = GradV.GetKey(ci);
00547         double Change = LearnRate * GradV.GetDat(CID);
00548         double NewFuc = GetCom(u, CID) + Change;
00549         if (NewFuc <= 0.0) {
00550           DelCom(u, CID);
00551         } else {
00552           AddCom(u, CID, NewFuc);
00553         }
00554       }
00555       if (! PlotNm.Empty() && (iter + 1) % G->GetNodes() == 0) {
00556         IterLV.Add(TIntFltPr(iter, Likelihood(false)));
00557       }
00558     }
00559     // fit W (logistic regression)
00560     for (int k = 0; k < Attrs; k++) {
00561       TFltV GradWV(NumComs);
00562       GradientForWK(GradWV, k);
00563       if (TLinAlg::Norm2(GradWV) < 1e-4) { continue; }
00564       double LearnRate = GetStepSizeByLineSearchForWK(k, GradWV, GradWV, StepAlpha, StepBeta);
00565       if (LearnRate == 0.0) { continue; }
00566       for (int c = 0; c < GradWV.Len(); c++){
00567         W[k][c] += LearnRate * GradWV[c];
00568         if (W[k][c] < MinValW) { W[k][c] = MinValW; }
00569         if (W[k][c] > MaxValW) { W[k][c] = MaxValW; }
00570       }
00571     }
00572     printf("\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime);
00573     fflush(stdout);
00574     if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) {
00575       PrevIter = iter;
00576       CurL = Likelihood();
00577       if (PrevL > TFlt::Mn && ! PlotNm.Empty()) {
00578         printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL,  CurL - PrevL);
00579       }
00580       fflush(stdout);
00581       if (CurL - PrevL <= Thres * fabs(PrevL)) { break; }
00582       else { PrevL = CurL; }
00583     }
00584     
00585   }
00586   printf("\n");
00587   printf("MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr());
00588   if (! PlotNm.Empty()) {
00589     TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
00590   }
00591   return iter;
00592 }
00593 
00594 int TCesna::MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
00595   //parallel
00596   time_t InitTime = time(NULL);
00597   uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs();
00598   TExeTm ExeTm, CheckTm;
00599   double PrevL = Likelihood(true);
00600   TIntFltPrV IterLV;
00601   int PrevIter = 0;
00602   int iter = 0;
00603   TIntV NIdxV(F.Len(), 0);
00604   for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
00605   TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization
00606   NIDOPTV.PutAll(0);
00607   TVec<TIntFltH> NewF(ChunkNum * ChunkSize);
00608   TIntV NewNIDV(ChunkNum * ChunkSize);
00609   for (iter = 0; iter < MaxIter; iter++) {
00610     NIdxV.Clr(false);
00611     for (int i = 0; i < F.Len(); i++) { 
00612       if (NIDOPTV[i] == 0) {  NIdxV.Add(i); }
00613     }
00614     IAssert (NIdxV.Len() <= F.Len());
00615     NIdxV.Shuffle(Rnd);
00616     // compute gradient for chunk of nodes
00617 #pragma omp parallel for schedule(static, 1)
00618     for (int TIdx = 0; TIdx < ChunkNum; TIdx++) {
00619       TIntFltH GradV;
00620       for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) {
00621         NewNIDV[ui] = -1;
00622         if (ui >= NIdxV.Len()) { continue; }
00623         int u = NIdxV[ui]; //
00624         //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
00625         TUNGraph::TNodeI UI = G->GetNI(u);
00626         TIntSet CIDSet(5 * UI.GetDeg());
00627         TIntFltH CurFU = F[u];
00628         for (int e = 0; e < UI.GetDeg(); e++) {
00629           if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; }
00630           TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)];
00631           for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
00632             CIDSet.AddKey(CI.GetKey());
00633           }
00634         }
00635         if (CIDSet.Empty()) { 
00636           CurFU.Clr();
00637         }
00638         else {
00639           for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
00640             if (! CIDSet.IsKey(CI.GetKey())) {
00641               CurFU.DelIfKey(CI.GetKey());
00642             }
00643           }
00644           GradientForRow(u, GradV, CIDSet);
00645           if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; }
00646           double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta);
00647           if (LearnRate == 0.0) { NewNIDV[ui] = -2; continue; }
00648           for (int ci = 0; ci < GradV.Len(); ci++) {
00649             int CID = GradV.GetKey(ci);
00650             double Change = LearnRate * GradV.GetDat(CID);
00651             double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change;
00652             if (NewFuc <= 0.0) {
00653               CurFU.DelIfKey(CID);
00654             } else {
00655               CurFU.AddDat(CID) = NewFuc;
00656             }
00657           }
00658           CurFU.Defrag();
00659         }
00660         //store changes
00661         NewF[ui] = CurFU;
00662         NewNIDV[ui] = u;
00663       }
00664     }
00665     int NumNoChangeGrad = 0;
00666     int NumNoChangeStepSize = 0;
00667     for (int ui = 0; ui < NewNIDV.Len(); ui++) {
00668       int NewNID = NewNIDV[ui];
00669       if (NewNID == -1) { NumNoChangeGrad++; continue; }
00670       if (NewNID == -2) { NumNoChangeStepSize++; continue; }
00671       for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
00672         SumFV[CI.GetKey()] -= CI.GetDat();
00673       }
00674     }
00675 #pragma omp parallel for
00676     for (int ui = 0; ui < NewNIDV.Len(); ui++) {
00677       int NewNID = NewNIDV[ui];
00678       if (NewNID < 0) { continue; }
00679       F[NewNID] = NewF[ui];
00680     }
00681     for (int ui = 0; ui < NewNIDV.Len(); ui++) {
00682       int NewNID = NewNIDV[ui];
00683       if (NewNID < 0) { continue; }
00684       for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
00685         SumFV[CI.GetKey()] += CI.GetDat();
00686       }
00687     }
00688     // update the nodes who are optimal
00689     for (int ui = 0; ui < NewNIDV.Len(); ui++) {
00690       int NewNID = NewNIDV[ui];
00691       if (NewNID < 0) { continue; }
00692       TUNGraph::TNodeI UI = G->GetNI(NewNID);
00693       NIDOPTV[NewNID] = 0;
00694       for (int e = 0; e < UI.GetDeg(); e++) {
00695         NIDOPTV[UI.GetNbrNId(e)] = 0;
00696       }
00697     }
00698     int OPTCnt = 0;
00699     for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } }
00700     // fit W (logistic regression)
00701     if (! PlotNm.Empty()) {
00702       printf("\r%d iterations [%s] %s secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), TUInt64::GetStr(TSecTm::GetCurTm().GetAbsSecs()-StartTm).CStr());
00703       if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); }
00704       fflush(stdout);
00705     }
00706     if (iter == 0 || (iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) {
00707   #pragma omp parallel for
00708       for (int k = 0; k < Attrs; k++) {
00709         TFltV GradWV(NumComs);
00710         GradientForWK(GradWV, k);
00711         if (TLinAlg::Norm2(GradWV) < 1e-4) { continue; }
00712         double LearnRate = GetStepSizeByLineSearchForWK(k, GradWV, GradWV, StepAlpha, StepBeta);
00713         if (LearnRate == 0.0) { continue; }
00714         for (int c = 0; c < GradWV.Len(); c++){
00715           W[k][c] += LearnRate * GradWV[c];
00716           if (W[k][c] < MinValW) { W[k][c] = MinValW; }
00717           if (W[k][c] > MaxValW) { W[k][c] = MaxValW; }
00718         }
00719       }
00720       PrevIter = iter;
00721       double CurL = Likelihood(true);
00722       IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL));
00723       printf("\r%d iterations, Likelihood: %f, Diff: %f [%lu secs]", iter, CurL,  CurL - PrevL, time(NULL) - InitTime);
00724        fflush(stdout);
00725       if (CurL - PrevL <= Thres * fabs(PrevL)) { 
00726         break;
00727       }
00728       else {
00729         PrevL = CurL;
00730       }
00731     }
00732   }
00733   if (! PlotNm.Empty()) {
00734     printf("\nMLE completed with %d iterations(%s secs)\n", iter, TUInt64::GetStr(TSecTm::GetCurTm().GetAbsSecs()-StartTm).CStr());
00735     TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
00736   } else {
00737     printf("\rMLE completed with %d iterations(%lu secs)", iter, time(NULL) - InitTime);
00738     fflush(stdout);
00739   }
00740   return iter;
00741 }