SNAP Library 2.2, Developer Reference  2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
agmdirected.cpp
Go to the documentation of this file.
00001 
00002 #include "stdafx.h"
00003 #include "agmfast.h"
00004 #include "agmdirected.h"
00005 #include "Snap.h"
00006 #include "agm.h"
00007 
00008 void TCoda::Save(TSOut& SOut) {
00009   G->Save(SOut);
00010   F.Save(SOut);
00011   H.Save(SOut);
00012   NIDV.Save(SOut);
00013   RegCoef.Save(SOut);
00014   SumFV.Save(SOut);
00015   SumHV.Save(SOut);
00016   NodesOk.Save(SOut);
00017   MinVal.Save(SOut);
00018   MaxVal.Save(SOut);
00019   NegWgt.Save(SOut);
00020   NumComs.Save(SOut);
00021   HOVIDSV.Save(SOut);
00022   PNoCom.Save(SOut);
00023 }
00024 
00025 void TCoda::Load(TSIn& SIn, const int& RndSeed) {
00026   G->Load(SIn);
00027   F.Load(SIn);
00028   H.Load(SIn);
00029   NIDV.Load(SIn);
00030   RegCoef.Load(SIn);
00031   SumFV.Load(SIn);
00032   SumHV.Load(SIn);
00033   NodesOk.Load(SIn);
00034   MinVal.Load(SIn);
00035   MaxVal.Load(SIn);
00036   NegWgt.Load(SIn);
00037   NumComs.Load(SIn);
00038   HOVIDSV.Load(SIn);
00039   PNoCom.Load(SIn);
00040   Rnd.PutSeed(RndSeed);
00041 }
00042 
00043 void TCoda::RandomInit(const int InitComs) {
00044   F.Gen(G->GetNodes());
00045   H.Gen(G->GetNodes());
00046   SumFV.Gen(InitComs);
00047   SumHV.Gen(InitComs);
00048   NumComs = InitComs;
00049   for (int u = 0; u < F.Len(); u++) {
00050     //assign to just one community
00051     int Mem = G->GetNI(u).GetOutDeg();
00052     if (Mem > 10) { Mem = 10; }
00053     for (int c = 0; c < Mem; c++) {
00054       int CID = Rnd.GetUniDevInt(InitComs);
00055       AddComOut(u, CID, Rnd.GetUniDev());
00056     }
00057   }
00058   for (int u = 0; u < H.Len(); u++) {
00059     //assign to just one community
00060     int Mem = G->GetNI(u).GetInDeg();
00061     if (Mem > 10) { Mem = 10; }
00062     for (int c = 0; c < Mem; c++) {
00063       int CID = Rnd.GetUniDevInt(InitComs);
00064       AddComIn(u, CID, Rnd.GetUniDev());
00065     }
00066   }
00067   //assign a member to zero-member community (if any)
00068   for (int c = 0; c < SumFV.Len(); c++) {
00069     if (SumFV[c] == 0.0) {
00070       int UID = Rnd.GetUniDevInt(G->GetNodes());
00071       AddComOut(UID, c, Rnd.GetUniDev());
00072     }
00073   }
00074   //assign a member to zero-member community (if any)
00075   for (int c = 0; c < SumHV.Len(); c++) {
00076     if (SumHV[c] == 0.0) {
00077       int UID = Rnd.GetUniDevInt(G->GetNodes());
00078       AddComIn(UID, c, Rnd.GetUniDev());
00079     }
00080   }
00081 }
00082 
00083 void TCoda::NeighborComInit(const int InitComs) {
00084   //initialize with best neighborhood communities (Gleich et.al. KDD'12)
00085   TExeTm RunTm;
00086   TFltIntPrV NIdPhiV(F.Len(), 0);
00087   TAGMFastUtil::GetNIdPhiV<PNGraph>(G, NIdPhiV);
00088   NeighborComInit(NIdPhiV, InitComs);
00089 }
00090 
00091 void TCoda::NeighborComInit(TFltIntPrV& NIdPhiV, const int InitComs) {
00092   NIdPhiV.Sort(true);
00093   F.Gen(G->GetNodes());
00094   H.Gen(G->GetNodes());
00095   SumFV.Gen(InitComs);
00096   SumHV.Gen(InitComs);
00097   NumComs = InitComs;
00098   //TIntFltH NCPhiH(F.Len());
00099   TIntSet InvalidNIDS(F.Len());
00100   TIntV ChosenNIDV(InitComs, 0); //FOR DEBUG
00101   //choose nodes with local minimum in conductance
00102   int CurCID = 0;
00103   for (int ui = 0; ui < NIdPhiV.Len(); ui++) {
00104     int UID = NIdPhiV[ui].Val2;
00105     fflush(stdout);
00106     if (InvalidNIDS.IsKey(UID)) { continue; }
00107     ChosenNIDV.Add(UID); //FOR DEBUG
00108     //add the node and its neighbors to the current community
00109     TNGraph::TNodeI NI = G->GetNI(UID);
00110     if (NI.GetOutDeg() > 0) { AddComOut(UID, CurCID, 1.0); }
00111     if (NI.GetInDeg() > 0) { AddComIn(UID, CurCID, 1.0); }
00112     fflush(stdout);
00113     //add neighbors depending on whether they have incoming / outgoing edges from the center node (NI)
00114     for (int e = 0; e < NI.GetDeg(); e++) {
00115       int VID = NI.GetNbrNId(e);
00116       TNGraph::TNodeI VI = G->GetNI(VID);
00117       if (VI.GetOutDeg() > 0) { AddComOut(VID, CurCID, 1.0); }
00118       if (VI.GetInDeg() > 0) { AddComIn(VID, CurCID, 1.0); }
00119     }
00120     //exclude its neighbors from the next considerations
00121     for (int e = 0; e < NI.GetDeg(); e++) {
00122       InvalidNIDS.AddKey(NI.GetNbrNId(e));
00123     }
00124     CurCID++;
00125     fflush(stdout);
00126     if (CurCID >= NumComs) { break;  }
00127   }
00128   if (NumComs > CurCID) {
00129     printf("%d communities needed to fill randomly\n", NumComs - CurCID);
00130   }
00131   //assign a member to zero-member community (if any)
00132   for (int c = 0; c < SumFV.Len(); c++) {
00133     if (SumFV[c] == 0.0) {
00134       int ComSz = 10;
00135       for (int u = 0; u < ComSz; u++) {
00136         int UID = Rnd.GetUniDevInt(G->GetNodes());
00137         AddComOut(UID, c, Rnd.GetUniDev());
00138       }
00139     }
00140   }
00141   //assign a member to zero-member community (if any)
00142   for (int c = 0; c < SumHV.Len(); c++) {
00143     if (SumHV[c] == 0.0) {
00144       int ComSz = 10;
00145       for (int u = 0; u < ComSz; u++) {
00146         int UID = Rnd.GetUniDevInt(G->GetNodes());
00147         AddComIn(UID, c, Rnd.GetUniDev());
00148       }
00149     }
00150   }
00151 }
00152 
00153 void TCoda::GetNonEdgePairScores(TFltIntIntTrV& ScoreV) {
00154   ScoreV.Gen(G->GetNodes() * G->GetNodes(), 0);
00155   TIntV NIDV;
00156   G->GetNIdV(NIDV);
00157   TIntSet Cuv;
00158   for (int u = 0; u < NIDV.Len(); u++) {
00159     int UID = NIDV[u];
00160     for (int v = 0; v < NIDV.Len(); v++) {
00161       int VID = NIDV[v];
00162       if (UID == VID) { continue; }
00163       if (! G->IsEdge(UID, VID)) {
00164         double Val = 1.0 - Prediction(UID, VID);
00165         ScoreV.Add(TFltIntIntTr(Val, UID, VID));
00166       }
00167     }
00168   }
00169 }
00170 
00171 void TCoda::SetCmtyVV(const TVec<TIntV>& CmtyVVOut, const TVec<TIntV>& CmtyVVIn) {
00172   IAssert(CmtyVVOut.Len() == CmtyVVIn.Len());
00173   F.Gen(G->GetNodes());
00174   H.Gen(G->GetNodes());
00175   SumFV.Gen(CmtyVVOut.Len());
00176   SumHV.Gen(CmtyVVIn.Len());
00177   NumComs = CmtyVVOut.Len();
00178   TIntH NIDIdxH(NIDV.Len());
00179   if (! NodesOk) {
00180     for (int u = 0; u < NIDV.Len(); u++) {
00181       NIDIdxH.AddDat(NIDV[u], u);
00182     }
00183   }
00184   for (int c = 0; c < CmtyVVOut.Len(); c++) {
00185     for (int u = 0; u < CmtyVVOut[c].Len(); u++) {
00186       int UID = CmtyVVOut[c][u];
00187       if (! NodesOk) { UID = NIDIdxH.GetDat(UID); }
00188       if (G->IsNode(UID)) { 
00189         AddComOut(UID, c, 1.0);
00190       }
00191     }
00192   }
00193   for (int c = 0; c < CmtyVVIn.Len(); c++) {
00194     for (int u = 0; u < CmtyVVIn[c].Len(); u++) {
00195       int UID = CmtyVVIn[c][u];
00196       if (! NodesOk) { UID = NIDIdxH.GetDat(UID); }
00197       if (G->IsNode(UID)) { 
00198         AddComIn(UID, c, 1.0);
00199       }
00200     }
00201   }
00202 }
00203 
00204 void TCoda::SetGraph(const PNGraph& GraphPt) {
00205   G = GraphPt;
00206   HOVIDSV.Gen(G->GetNodes());  
00207   NodesOk = true;
00208   GraphPt->GetNIdV(NIDV);
00209   // check that nodes IDs are {0,1,..,Nodes-1}
00210   for (int nid = 0; nid < GraphPt->GetNodes(); nid++) {
00211     if (! GraphPt->IsNode(nid)) { 
00212       NodesOk = false; 
00213       break; 
00214     } 
00215   }
00216   if (! NodesOk) {
00217     printf("rearrage nodes\n");
00218     G = TSnap::GetSubGraph(GraphPt, NIDV, true);
00219     for (int nid = 0; nid < G->GetNodes(); nid++) {
00220       IAssert(G->IsNode(nid)); 
00221     }
00222   }
00223   TSnap::DelSelfEdges(G);
00224   
00225   PNoCom = 1.0 / (double) G->GetNodes();
00226   DoParallel = false;
00227   if (1.0 / PNoCom > sqrt(TFlt::Mx)) { PNoCom = 0.99 / sqrt(TFlt::Mx); } // to prevent overflow
00228   NegWgt = 1.0;
00229 }
00230 
00231 double TCoda::Likelihood(const bool _DoParallel) { 
00232   TExeTm ExeTm;
00233   double L = 0.0;
00234   if (_DoParallel) {
00235   #pragma omp parallel for 
00236     for (int u = 0; u < F.Len(); u++) {
00237       double LU = LikelihoodForNode(true, u);
00238       #pragma omp atomic
00239         L += LU;
00240     }
00241   }
00242   else {
00243     for (int u = 0; u < F.Len(); u++) {
00244       double LU = LikelihoodForNode(true, u);
00245         L += LU;
00246     }
00247   }
00248   return L;
00249 }
00250 
00251 double TCoda::LikelihoodForNode(const bool IsRow, const int UID) {
00252   if (IsRow) {
00253     return LikelihoodForNode(IsRow, UID, F[UID]);
00254   } else {
00255     return LikelihoodForNode(IsRow, UID, H[UID]);
00256   }
00257 }
00258 
00259 double TCoda::LikelihoodForNode(const bool IsRow, const int UID, const TIntFltH& FU) {
00260   double L = 0.0;
00261   TFltV HOSumHV; //adjust for Hv of v hold out
00262   if (HOVIDSV[UID].Len() > 0) {
00263     HOSumHV.Gen(NumComs);
00264     for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
00265       for (int c = 0; c < SumHV.Len(); c++) {
00266         HOSumHV[c] += GetCom(! IsRow, HOVIDSV[UID][e], c);
00267       }
00268     }
00269   }
00270   TNGraph::TNodeI NI = G->GetNI(UID);
00271   const int Deg = IsRow ? NI.GetOutDeg(): NI.GetInDeg();
00272   for (int e = 0; e < Deg; e++) {
00273     const int v = IsRow ? NI.GetOutNId(e): NI.GetInNId(e);
00274     if (v == UID) { continue; }
00275     if (HOVIDSV[UID].IsKey(v)) { continue; }
00276     if (IsRow) {
00277       L += log (1.0 - Prediction(FU, H[v])) + NegWgt * DotProduct(FU, H[v]);
00278     } else {
00279       L += log (1.0 - Prediction(F[v], FU)) + NegWgt * DotProduct(F[v], FU);
00280     }
00281   }
00282   for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
00283     double HOSum = HOVIDSV[UID].Len() > 0?  HOSumHV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
00284     L -= NegWgt * (GetSumVal(! IsRow, HI.GetKey()) - HOSum - GetCom(! IsRow, UID, HI.GetKey())) * HI.GetDat();
00285   }
00286   //add regularization
00287   if (RegCoef > 0.0) { //L1
00288     L -= RegCoef * Sum(FU);
00289   }
00290   if (RegCoef < 0.0) { //L2
00291     L += RegCoef * Norm2(FU);
00292   }
00293   return L;
00294 }
00295 
00296 
00297 /*
00298 double TCoda::LikelihoodForRow(const int UID) {
00299   return LikelihoodForRow(UID, F[UID]);
00300 }
00301 
00302 
00303 double TCoda::LikelihoodForRow(const int UID, const TIntFltH& FU) {
00304   double L = 0.0;
00305   TFltV HOSumHV; //adjust for Hv of v hold out
00306   if (HOVIDSV[UID].Len() > 0) {
00307     HOSumHV.Gen(SumFV.Len());
00308     
00309     for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
00310       for (int c = 0; c < SumHV.Len(); c++) {
00311         HOSumHV[c] += GetComIn(HOVIDSV[UID][e], c);
00312       }
00313     }
00314   }
00315   TNGraph::TNodeI NI = G->GetNI(UID);
00316   for (int e = 0; e < NI.GetOutDeg(); e++) {
00317     int v = NI.GetOutNId(e);
00318     if (v == UID) { continue; }
00319     if (HOVIDSV[UID].IsKey(v)) { continue; }
00320     double LU = log (1.0 - Prediction(FU, H[v])) + NegWgt * DotProduct(FU, H[v]);
00321     L += LU;
00322   }
00323   for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
00324     double HOSum = HOVIDSV[UID].Len() > 0?  HOSumHV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
00325     double LU = NegWgt * (SumHV[HI.GetKey()] - HOSum - GetComIn(UID, HI.GetKey())) * HI.GetDat();
00326     L -= LU;
00327   }
00328   //add regularization
00329   if (RegCoef > 0.0) { //L1
00330     L -= RegCoef * Sum(FU);
00331   }
00332   if (RegCoef < 0.0) { //L2
00333     L += RegCoef * Norm2(FU);
00334   }
00335 
00336   return L;
00337 }
00338 
00339 double TCoda::LikelihoodForCol(const int VID) {
00340   return LikelihoodForCol(VID, H[VID]);
00341 }
00342 
00343 
00344 double TCoda::LikelihoodForCol(const int VID, const TIntFltH& HV) {
00345   double L = 0.0;
00346   TFltV HOSumFV; //adjust for Fv of v hold out
00347   if (HOVIDSV[VID].Len() > 0) {
00348     HOSumFV.Gen(SumFV.Len());
00349     for (int e = 0; e < HOVIDSV[VID].Len(); e++) {
00350       for (int c = 0; c < SumFV.Len(); c++) {
00351         HOSumFV[c] += GetComOut(HOVIDSV[VID][e], c);
00352       }
00353     }
00354   }
00355   TNGraph::TNodeI NI = G->GetNI(VID);
00356   for (int e = 0; e < NI.GetInDeg(); e++) {
00357     int v = NI.GetInNId(e);
00358     if (v == VID) { continue; }
00359     if (HOVIDSV[VID].IsKey(v)) { continue; }
00360     L += log (1.0 - Prediction(F[v], HV)) + NegWgt * DotProduct(F[v], HV);
00361   }
00362   for (TIntFltH::TIter HI = HV.BegI(); HI < HV.EndI(); HI++) {
00363     double HOSum = HOVIDSV[VID].Len() > 0?  HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
00364     L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetComOut(VID, HI.GetKey())) * HI.GetDat();
00365   }
00366   //add regularization
00367   if (RegCoef > 0.0) { //L1
00368     L -= RegCoef * Sum(HV);
00369   }
00370   if (RegCoef < 0.0) { //L2
00371     L += RegCoef * Norm2(HV);
00372   }
00373 
00374   return L;
00375 }
00376 */
00377 void TCoda::GradientForNode(const bool IsRow, const int UID, TIntFltH& GradU, const TIntSet& CIDSet) {
00378   GradU.Gen(CIDSet.Len());
00379 
00380   TFltV HOSumHV; //adjust for Hv of v hold out
00381   if (HOVIDSV[UID].Len() > 0) {
00382     HOSumHV.Gen(NumComs);
00383     for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
00384       for (int c = 0; c < SumHV.Len(); c++) {
00385         HOSumHV[c] += GetCom(! IsRow, HOVIDSV[UID][e], c);
00386       }
00387     }
00388   }
00389     
00390   TNGraph::TNodeI NI = G->GetNI(UID);
00391   int Deg = IsRow ? NI.GetOutDeg(): NI.GetInDeg();
00392   TFltV PredV(Deg), GradV(CIDSet.Len());
00393   TIntV CIDV(CIDSet.Len());
00394   for (int e = 0; e < Deg; e++) {
00395     int VID = IsRow? NI.GetOutNId(e): NI.GetInNId(e);
00396     if (VID == UID) { continue; }
00397     if (HOVIDSV[UID].IsKey(VID)) { continue; }
00398     PredV[e] = IsRow? Prediction(UID, VID): Prediction(VID, UID);
00399   }
00400   
00401   for (int c = 0; c < CIDSet.Len(); c++) {
00402     int CID = CIDSet.GetKey(c);
00403     double Val = 0.0;
00404     for (int e = 0; e < Deg; e++) {
00405       int VID = IsRow? NI.GetOutNId(e): NI.GetInNId(e);
00406       if (VID == UID) { continue; }
00407       if (HOVIDSV[UID].IsKey(VID)) { continue; }
00408       Val += PredV[e] * GetCom(! IsRow, VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(! IsRow, VID, CID);
00409     }
00410     double HOSum = HOVIDSV[UID].Len() > 0?  HOSumHV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
00411     Val -= NegWgt * (GetSumVal(! IsRow, CID) - HOSum - GetCom(! IsRow, UID, CID));
00412     CIDV[c] = CID;
00413     GradV[c] = Val;
00414   }
00415   //add regularization
00416   if (RegCoef > 0.0) { //L1
00417     for (int c = 0; c < GradV.Len(); c++) {
00418       GradV[c] -= RegCoef; 
00419     }
00420   }
00421   if (RegCoef < 0.0) { //L2
00422     for (int c = 0; c < GradV.Len(); c++) {
00423       GradV[c] += 2 * RegCoef * GetCom(IsRow, UID, CIDV[c]); 
00424     }
00425   }
00426   for (int c = 0; c < GradV.Len(); c++) {
00427     if (GetCom(IsRow, UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
00428     if (fabs(GradV[c]) < 0.0001) { continue; }
00429     GradU.AddDat(CIDV[c], GradV[c]);
00430   }
00431   for (int c = 0; c < GradU.Len(); c++) {
00432     if (GradU[c] >= 10) { GradU[c] = 10; }
00433     if (GradU[c] <= -10) { GradU[c] = -10; }
00434     IAssert(GradU[c] >= -10);
00435   }
00436 }
00437 
00438 /*
00439 void TCoda::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) {
00440   GradU.Gen(CIDSet.Len());
00441 
00442   TFltV HOSumHV; //adjust for Hv of v hold out
00443   if (HOVIDSV[UID].Len() > 0) {
00444     HOSumHV.Gen(SumHV.Len());
00445     
00446     for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
00447       for (int c = 0; c < SumHV.Len(); c++) {
00448         HOSumHV[c] += GetComIn(HOVIDSV[UID][e], c);
00449       }
00450     }
00451   }
00452     
00453   TNGraph::TNodeI NI = G->GetNI(UID);
00454   int Deg = NI.GetOutDeg();
00455   TFltV PredV(Deg), GradV(CIDSet.Len());
00456   TIntV CIDV(CIDSet.Len());
00457   for (int e = 0; e < Deg; e++) {
00458     if (NI.GetOutNId(e) == UID) { continue; }
00459     if (HOVIDSV[UID].IsKey(NI.GetOutNId(e))) { continue; }
00460     PredV[e] = Prediction(UID, NI.GetOutNId(e));
00461   }
00462   
00463   for (int c = 0; c < CIDSet.Len(); c++) {
00464     int CID = CIDSet.GetKey(c);
00465     double Val = 0.0;
00466     for (int e = 0; e < Deg; e++) {
00467       int VID = NI.GetNbrNId(e);
00468       if (VID == UID) { continue; }
00469       if (HOVIDSV[UID].IsKey(VID)) { continue; }
00470       Val += PredV[e] * GetComIn(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetComIn(VID, CID);
00471     }
00472     double HOSum = HOVIDSV[UID].Len() > 0?  HOSumHV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
00473     Val -= NegWgt * (SumHV[CID] - HOSum - GetComIn(UID, CID));
00474     CIDV[c] = CID;
00475     GradV[c] = Val;
00476   }
00477   //add regularization
00478   if (RegCoef > 0.0) { //L1
00479     for (int c = 0; c < GradV.Len(); c++) {
00480       GradV[c] -= RegCoef; 
00481     }
00482   }
00483   if (RegCoef < 0.0) { //L2
00484     for (int c = 0; c < GradV.Len(); c++) {
00485       GradV[c] += 2 * RegCoef * GetComOut(UID, CIDV[c]); 
00486     }
00487   }
00488   for (int c = 0; c < GradV.Len(); c++) {
00489     if (GetComOut(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
00490     if (fabs(GradV[c]) < 0.0001) { continue; }
00491     GradU.AddDat(CIDV[c], GradV[c]);
00492   }
00493   for (int c = 0; c < GradU.Len(); c++) {
00494     if (GradU[c] >= 10) { GradU[c] = 10; }
00495     if (GradU[c] <= -10) { GradU[c] = -10; }
00496     IAssert(GradU[c] >= -10);
00497   }
00498 }
00499 */
00500 
00501 void TCoda::GetCmtyVV(const bool IsOut, TVec<TIntV>& CmtyVV) {
00502   GetCmtyVV(IsOut, CmtyVV, sqrt(1.0 / G->GetNodes()), 3);
00503 }
00504 
00506 void TCoda::GetCommunity(TIntV& CmtyVIn, TIntV& CmtyVOut, const int CID, const double Thres) {
00507   TIntFltH NIDFucH(F.Len() / 10), NIDHucH(F.Len() / 10);
00508   for (int u = 0; u < NIDV.Len(); u++) {
00509     int NID = u;
00510     if (! NodesOk) { NID = NIDV[u]; }
00511     if (GetCom(true, u, CID) >= Thres) { NIDFucH.AddDat(NID, GetCom(true, u, CID)); }
00512     if (GetCom(false, u, CID) >= Thres) { NIDHucH.AddDat(NID, GetCom(false, u, CID)); }
00513   }
00514   NIDFucH.SortByDat(false);
00515   NIDHucH.SortByDat(false);
00516   NIDFucH.GetKeyV(CmtyVOut);
00517   NIDHucH.GetKeyV(CmtyVIn);
00518 }
00519 
00520 void TCoda::GetTopCIDs(TIntV& CIdV, const int TopK, const int IsAverage, const int MinSz) {
00521   TIntFltH CIdFHH;
00522   for (int c = 0; c < GetNumComs(); c++) {
00523     if (IsAverage == 1) {
00524       TIntV CmtyVIn, CmtyVOut;
00525       GetCommunity(CmtyVIn, CmtyVOut, c);
00526       if (CmtyVIn.Len() == 0 || CmtyVOut.Len() == 0) { continue; }
00527       if (CmtyVIn.Len() < MinSz || CmtyVOut.Len() < MinSz) { continue; }
00528       CIdFHH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c) / (double) CmtyVIn.Len() / (double) CmtyVOut.Len());
00529     } else {
00530       CIdFHH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c));
00531     }
00532   }
00533   CIdFHH.SortByDat(false);
00534   CIdFHH.GetKeyV(CIdV);
00535   if (TopK < CIdFHH.Len()) { CIdV.Trunc(TopK); }
00536 }
00537 
00539 void TCoda::GetCmtyVV(const bool IsOut, TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) {
00540   CmtyVV.Gen(NumComs, 0);
00541   TIntFltH CIDSumFH(NumComs);
00542   for (int c = 0; c < NumComs; c++) {
00543     CIDSumFH.AddDat(c, GetSumVal(IsOut, c));
00544   }
00545   CIDSumFH.SortByDat(false);
00546   for (int c = 0; c < NumComs; c++) {
00547     int CID = CIDSumFH.GetKey(c);
00548     TIntFltH NIDFucH, NIDHucH, NIDInOutH;
00549     TIntV CmtyV;
00550     GetNIDValH(NIDInOutH, NIDFucH, NIDHucH, CID, Thres);
00551     if (IsOut) {
00552       NIDFucH.GetKeyV(CmtyV);
00553     } else {
00554       NIDHucH.GetKeyV(CmtyV);
00555     }
00556     if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
00557   }
00558   if ( NumComs != CmtyVV.Len()) {
00559     printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
00560   }
00561 }
00562 
00563 void TCoda::GetCmtyVVUnSorted(const bool IsOut, TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) {
00564   CmtyVV.Gen(NumComs, 0);
00565   for (int c = 0; c < NumComs; c++) {
00566     TIntV CmtyV((int) (GetSumVal(IsOut, c) * 10), 0);
00567     for (int u = 0; u < G->GetNodes(); u++) {
00568       if (GetCom(IsOut, u, c) > Thres) { CmtyV.Add(NIDV[u]); }
00569     }
00570     if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
00571   }
00572   if ( NumComs != CmtyVV.Len()) {
00573     printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
00574   }
00575 }
00576 
00577 PNGraph TCoda::GetGraphRawNID() {
00578   PNGraph NewG = TNGraph::New(G->GetNodes(), -1);
00579   for (TNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
00580     //add node
00581     int NIdx = NI.GetId();
00582     int NID = NIDV[NIdx];
00583     if (! NewG->IsNode(NID)) { NewG->AddNode(NID); }
00584     //add edge
00585     for (int e = 0; e < NI.GetOutDeg(); e++) {
00586       int OutNID = NIDV[NI.GetOutNId(e)];
00587       if (! NewG->IsNode(OutNID)) { NewG->AddNode(OutNID); }
00588       NewG->AddEdge(NID, OutNID);
00589     }
00590   }
00591   IAssert(G->GetNodes() == NewG->GetNodes());
00592   IAssert(G->GetEdges() == NewG->GetEdges());
00593   return NewG;
00594 }
00595 
00596 void TCoda::GetNIDValH(TIntFltH& NIdValInOutH, TIntFltH& NIdValOutH, TIntFltH& NIdValInH, const int CID, const double Thres) {
00597   NIdValOutH.Gen((int) GetSumVal(true, CID) + 1);
00598   NIdValInH.Gen((int) GetSumVal(false, CID) + 1);
00599   NIdValInOutH.Gen((int) GetSumVal(false, CID) + 1);
00600   if (GetSumVal(true, CID) < Thres && GetSumVal(false, CID) < Thres) { return; }
00601   for (int u = 0; u < NIDV.Len(); u++) {
00602     if (GetCom(true, u, CID) >= Thres && GetCom(false, u, CID) >= Thres) {
00603       NIdValInOutH.AddDat(NIDV[u], GetCom(true, u, CID) + GetCom(false, u, CID));
00604     }
00605     if (GetCom(true, u, CID) >= Thres) {
00606       NIdValOutH.AddDat(NIDV[u], GetCom(true, u, CID));
00607     }
00608     if (GetCom(false, u, CID) >= Thres) {
00609       NIdValInH.AddDat(NIDV[u], GetCom(false, u, CID));
00610     }
00611   }
00612   NIdValInH.SortByDat(false);
00613   NIdValOutH.SortByDat(false);
00614   NIdValInOutH.SortByDat(false);
00615 }
00616 
00617 void TCoda::DumpMemberships(const TStr& OutFNm, const TStrHash<TInt>& NodeNameH, const double Thres) {
00618   if (NodeNameH.Len() > 0) { IAssert(NodeNameH.Len() == G->GetNodes()); }
00619   FILE* FId = fopen(OutFNm.CStr(), "wt");
00620   TIntFltH CIDSumFH(NumComs);
00621   for (int c = 0; c < NumComs; c++) {
00622     CIDSumFH.AddDat(c, GetSumVal(true, c) * GetSumVal(false, c));
00623   }
00624   CIDSumFH.SortByDat(false);
00625   for (int c = 0; c < NumComs; c++) {
00626     int CID = CIDSumFH.GetKey(c);
00627     TIntFltH NIDOutFH, NIDInFH, NIDInOutFH;
00628     GetNIDValH(NIDInOutFH, NIDOutFH, NIDInFH, CID, Thres);
00629     if (NIDOutFH.Len() == 0 || NIDInFH.Len() == 0) { continue; }
00630     
00631     /*
00632     if (GetSumVal(true, CID) < Thres && GetSumVal(false, CID) < Thres) { continue; }
00633     for (int u = 0; u < NIDV.Len(); u++) {
00634       if (GetCom(true, u, CID) >= Thres && GetCom(false, u, CID) >= Thres) { 
00635         NIDInOutFH.AddDat(u, GetCom(true, u, CID) + GetCom(false, u, CID)); 
00636       } else if (GetCom(true, u, CID) >= Thres) {
00637         NIDOutFH.AddDat(u, GetCom(true, u, CID));
00638       } else if (GetCom(false, u, CID) >= Thres) {
00639         NIDInFH.AddDat(u, GetCom(false, u, CID));
00640       }
00641     }
00642     NIDInOutFH.SortByDat(false);
00643     NIDInFH.SortByDat(false);
00644     NIDOutFH.SortByDat(false);
00645     */
00646     fprintf(FId, "%d\t%d\t%d\t%f\t%f\t%f\t", NIDInOutFH.Len(), NIDInFH.Len() - NIDInOutFH.Len(), NIDOutFH.Len() - NIDInOutFH.Len(), CIDSumFH.GetDat(CID).Val, GetSumVal(false, CID).Val, GetSumVal(true, CID).Val);
00647     fprintf(FId, "InOut:\t");
00648     for (int u = 0; u < NIDInOutFH.Len(); u++) {
00649       int NIdx = NIDInOutFH.GetKey(u);
00650       fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDInOutFH[u].Val);
00651     }
00652     fprintf(FId, "In:\t");
00653     for (int u = 0; u < NIDInFH.Len(); u++) {
00654       int NIdx = NIDInFH.GetKey(u);
00655       fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDInFH[u].Val);
00656     }
00657     fprintf(FId, "Out:\t");
00658     for (int u = 0; u < NIDOutFH.Len(); u++) {
00659       int NIdx = NIDOutFH.GetKey(u);
00660       fprintf(FId, "%s (%f)\t", NodeNameH.GetKey(NIdx), NIDOutFH[u].Val);
00661     }
00662     fprintf(FId, "\n");
00663   }
00664   fclose(FId);
00665 }
00666 
00667 
00668 void TCoda::DumpMemberships(const TStr& OutFNm, const double Thres) {
00669   TStrHash<TInt> NodeNameH(G->GetNodes(), false);
00670   for (int u = 0; u < NIDV.Len(); u++) { NodeNameH.AddKey(TStr::Fmt("%d", NIDV[u].Val)); }
00671   DumpMemberships(OutFNm, NodeNameH, Thres);
00672 }
00673 
00674 void TCoda::GetCmtyS(TIntSet& CmtySOut, TIntSet& CmtySIn, const int CID, const double Thres) {
00675   CmtySOut.Gen(G->GetNodes() / 10);
00676   CmtySIn.Gen(G->GetNodes() / 10);
00677   for (int u = 0; u < NIDV.Len(); u++) {
00678     if (GetCom(true, u, CID) > Thres) {
00679       //CmtySOut.AddKey(
00680     }
00681   }
00682 }
00683 
00684 /*
00685 void TCoda::GetCmtyVVIn(TVec<TIntV>& CmtyVV) {
00686   GetCmtyVVIn(CmtyVV, sqrt(1.0 / G->GetNodes()), 3);
00687 }
00688 
00690 void TCoda::GetCmtyVVIn(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) {
00691   CmtyVV.Gen(NumComs, 0);
00692   TIntFltH CIDSumHH(NumComs);
00693   for (int c = 0; c < SumHV.Len(); c++) {
00694     CIDSumHH.AddDat(c, SumHV[c]);
00695   }
00696   CIDSumHH.SortByDat(false);
00697   for (int c = 0; c < NumComs; c++) {
00698     int CID = CIDSumHH.GetKey(c);
00699     TIntFltH NIDHucH(H.Len() / 10);
00700     TIntV CmtyV;
00701     IAssert(SumHV[CID] == CIDSumHH.GetDat(CID));
00702     if (SumHV[CID] < Thres) { continue; }
00703     for (int u = 0; u < H.Len(); u++) {
00704       int NID = u;
00705       if (! NodesOk) { NID = NIDV[u]; }
00706       if (GetComIn(u, CID) >= Thres) { NIDHucH.AddDat(NID, GetComIn(u, CID)); }
00707     }
00708     NIDHucH.SortByDat(false);
00709     NIDHucH.GetKeyV(CmtyV);
00710     if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
00711   }
00712   if ( NumComs != CmtyVV.Len()) {
00713     printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
00714   }
00715 }
00716 */
00717 void TCoda::GetCmtyVV(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn, const int MinSz) {
00718   GetCmtyVV(false, CmtyVVIn, sqrt(1.0 / G->GetNodes()), MinSz);
00719   GetCmtyVV(true, CmtyVVOut, sqrt(1.0 / G->GetNodes()), MinSz);
00720 }
00721 
00722 void TCoda::GetCmtyVVUnSorted(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn) {
00723   GetCmtyVVUnSorted(false, CmtyVVIn, sqrt(1.0 / G->GetNodes()));
00724   GetCmtyVVUnSorted(true, CmtyVVOut, sqrt(1.0 / G->GetNodes()));
00725 }
00726 
00727 void TCoda::GetCmtyVV(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn, const double ThresOut, const double ThresIn, const int MinSz) {
00728   GetCmtyVV(false, CmtyVVIn, ThresIn, MinSz);
00729   GetCmtyVV(true, CmtyVVOut, ThresOut, MinSz);
00730 }
00731 
00733 int TCoda::FindComsByCV(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const int EdgesForCV, const double StepAlpha, const double StepBeta) {
00734     double ComsGap = exp(TMath::Log((double) MaxComs / (double) MinComs) / (double) DivComs);
00735     TIntV ComsV;
00736     ComsV.Add(MinComs);
00737     while (ComsV.Len() < DivComs) {
00738       int NewComs = int(ComsV.Last() * ComsGap);
00739       if (NewComs == ComsV.Last().Val) { NewComs++; }
00740       ComsV.Add(NewComs);
00741     }
00742     if (ComsV.Last() < MaxComs) { ComsV.Add(MaxComs); }
00743     return FindComsByCV(ComsV, 0.1, NumThreads, OutFNm, EdgesForCV, StepAlpha, StepBeta);
00744 }
00745 
00746 int TCoda::FindComsByCV(TIntV& ComsV, const double HOFrac, const int NumThreads, const TStr PlotLFNm, const int EdgesForCV, const double StepAlpha, const double StepBeta) {
00747   if (ComsV.Len() == 0) {
00748     int MaxComs = G->GetNodes() / 5;
00749     ComsV.Add(2);
00750     while(ComsV.Last() < MaxComs) { ComsV.Add(ComsV.Last() * 2); }
00751   }
00752   int MaxIterCV = 3;
00753 
00754   TVec<TVec<TIntSet> > HoldOutSets(MaxIterCV);
00755   TFltIntPrV NIdPhiV;
00756   TAGMFastUtil::GetNIdPhiV<PNGraph>(G, NIdPhiV);
00757 
00758   if (G->GetEdges() > EdgesForCV) { //if edges are many enough, use CV
00759     printf("generating hold out set\n");
00760     TIntV NIdV1, NIdV2;
00761     G->GetNIdV(NIdV1);
00762     G->GetNIdV(NIdV2);
00763     for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
00764       // generate holdout sets
00765       TAGMFastUtil::GenHoldOutPairs(G, HoldOutSets[IterCV], HOFrac, Rnd);
00766       /*
00767       HoldOutSets[IterCV].Gen(G->GetNodes());
00768       const int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
00769       int HOCnt = 0;
00770       int HOEdges = (int) TMath::Round(HOFrac * G->GetEdges());
00771       printf("holding out %d edges...\n", HOEdges);
00772       for (int he = 0; he < (int) HOEdges; he++) {
00773         HoldOutSets[IterCV][EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
00774         HoldOutSets[IterCV][EdgeV[he].Val2].AddKey(EdgeV[he].Val1);
00775         HOCnt++;
00776       }
00777       printf("%d Edges hold out\n", HOCnt);
00778       while(HOCnt++ < HOTotal) {
00779         int SrcNID = Rnd.GetUniDevInt(G->GetNodes());
00780         int DstNID = Rnd.GetUniDevInt(G->GetNodes());
00781         HoldOutSets[IterCV][SrcNID].AddKey(DstNID);
00782         HoldOutSets[IterCV][DstNID].AddKey(SrcNID);
00783       }
00784       */
00785     }
00786     
00787     printf("hold out set generated\n");
00788   }
00789 
00790   TFltV HOLV(ComsV.Len());
00791   TIntFltPrV ComsLV;
00792   for (int c = 0; c < ComsV.Len(); c++) {
00793     const int Coms = ComsV[c];
00794     printf("Try number of Coms:%d\n", Coms);
00795 
00796     if (G->GetEdges() > EdgesForCV) { //if edges are many enough, use CV
00797       for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
00798         HOVIDSV = HoldOutSets[IterCV];
00799         NeighborComInit(NIdPhiV, Coms);
00800         printf("Initialized\n");
00801 
00802         if (NumThreads == 1) {
00803           printf("MLE without parallelization begins\n");
00804           MLEGradAscent(0.05, 10 * G->GetNodes(), "", StepAlpha, StepBeta);
00805         } else {
00806           printf("MLE with parallelization begins\n");
00807           MLEGradAscentParallel(0.05, 100, NumThreads, "", StepAlpha, StepBeta);
00808         }
00809         double HOL = LikelihoodHoldOut();
00810         HOL = HOL < 0? HOL: TFlt::Mn;
00811         HOLV[c] += HOL;
00812       }
00813     }
00814     else {
00815       HOVIDSV.Gen(G->GetNodes());
00816       MLEGradAscent(0.0001, 100 * G->GetNodes(), "");
00817       double BIC = 2 * Likelihood() - (double) G->GetNodes() * Coms * 2.0 * log ( (double) G->GetNodes());
00818       HOLV[c] = BIC;
00819     }
00820   }
00821   int EstComs = 2;
00822   double MaxL = TFlt::Mn;
00823   printf("\n");
00824   for (int c = 0; c < ComsV.Len(); c++) {
00825     ComsLV.Add(TIntFltPr(ComsV[c].Val, HOLV[c].Val));
00826     printf("%d(%f)\t", ComsV[c].Val, HOLV[c].Val);
00827     if (MaxL < HOLV[c]) {
00828       MaxL = HOLV[c];
00829       EstComs = ComsV[c];
00830     }
00831   }
00832   printf("\n");
00833   RandomInit(EstComs);
00834   HOVIDSV.Gen(G->GetNodes());
00835   if (! PlotLFNm.Empty()) {
00836     TGnuPlot::PlotValV(ComsLV, PlotLFNm, "hold-out likelihood", "communities", "likelihood");
00837   }
00838   return EstComs;
00839 }
00840 
00841 double TCoda::LikelihoodHoldOut(const bool DoParallel) { 
00842   double L = 0.0;
00843   for (int u = 0; u < HOVIDSV.Len(); u++) {
00844     for (int e = 0; e < HOVIDSV[u].Len(); e++) {
00845       int VID = HOVIDSV[u][e];
00846       if (VID == u) { continue; } 
00847       double Pred = Prediction(u, VID);
00848       if (G->IsEdge(u, VID)) {
00849         L += log(1.0 - Pred);
00850       }
00851       else {
00852         L += NegWgt * log(Pred);
00853       }
00854     }
00855   }
00856   return L;
00857 }
00858 
00859 double TCoda::GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter) {
00860   double StepSize = 1.0;
00861   double InitLikelihood = LikelihoodForNode(IsRow, UID);
00862   TIntFltH NewVarV(DeltaV.Len());
00863   for(int iter = 0; iter < MaxIter; iter++) {
00864     for (int i = 0; i < DeltaV.Len(); i++){
00865       int CID = DeltaV.GetKey(i);
00866       double NewVal;
00867       NewVal = GetCom(IsRow, UID, CID) + StepSize * DeltaV.GetDat(CID);
00868       if (NewVal < MinVal) { NewVal = MinVal; }
00869       if (NewVal > MaxVal) { NewVal = MaxVal; }
00870       NewVarV.AddDat(CID, NewVal);
00871     }
00872     if (LikelihoodForNode(IsRow, UID, NewVarV) < InitLikelihood + Alpha * StepSize * DotProduct(GradV, DeltaV)) {
00873       StepSize *= Beta;
00874     } else {
00875       break;
00876     }
00877     if (iter == MaxIter - 1) { 
00878       StepSize = 0.0;
00879       break;
00880     }
00881   }
00882   return StepSize;
00883 }
00884 
00885 int TCoda::MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
00886   time_t InitTime = time(NULL);
00887   TExeTm ExeTm, CheckTm;
00888   int iter = 0, PrevIter = 0;
00889   TIntFltPrV IterLV;
00890   TNGraph::TNodeI UI;
00891   double PrevL = TFlt::Mn, CurL = 0.0;
00892   TIntV NIdxV(F.Len(), 0);
00893   for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
00894   IAssert(NIdxV.Len() == F.Len());
00895   TIntFltH GradV;
00896   while(iter < MaxIter) {
00897     NIdxV.Shuffle(Rnd);
00898     for (int ui = 0; ui < F.Len(); ui++, iter++) {
00899       const bool IsRow = (ui % 2 == 0);
00900       int u = NIdxV[ui]; //
00901       //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
00902       UI = G->GetNI(u);
00903       const int Deg = IsRow? UI.GetOutDeg(): UI.GetInDeg();
00904       TIntSet CIDSet(5 * Deg);
00905       for (int e = 0; e < Deg; e++) {
00906         int VID = IsRow? UI.GetOutNId(e): UI.GetInNId(e);
00907         if (HOVIDSV[u].IsKey(VID)) { continue; }
00908         TIntFltH NbhCIDH = IsRow? H[VID]: F[VID];
00909         for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
00910           CIDSet.AddKey(CI.GetKey());
00911           //printf("CI.GetKey:%d\n", CI.GetKey());
00912           IAssert(CI.GetKey() <= NumComs);
00913         }
00914       }
00915       TIntFltH& CurMem = IsRow? F[u]: H[u];
00916       for (TIntFltH::TIter CI = CurMem.BegI(); CI < CurMem.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
00917         if (! CIDSet.IsKey(CI.GetKey())) {
00918           DelCom(IsRow, u, CI.GetKey());
00919         }
00920       }
00921       if (CIDSet.Empty()) { continue; }
00922       GradientForNode(IsRow, u, GradV, CIDSet);
00923       if (Norm2(GradV) < 1e-4) { continue; }
00924       double LearnRate = GetStepSizeByLineSearch(IsRow, u, GradV, GradV, StepAlpha, StepBeta);
00925       if (LearnRate == 0.0) { continue; }
00926       for (int ci = 0; ci < GradV.Len(); ci++) {
00927         int CID = GradV.GetKey(ci);
00928         double Change = LearnRate * GradV.GetDat(CID);
00929         double NewFuc = GetCom(IsRow, u, CID) + Change;
00930         if (NewFuc <= 0.0) {
00931           DelCom(IsRow, u, CID);
00932         } else {
00933           AddCom(IsRow, u, CID, NewFuc);
00934         }
00935       }
00936       if (! PlotNm.Empty() && (iter + 1) % G->GetNodes() == 0) {
00937         IterLV.Add(TIntFltPr(iter, Likelihood(false)));
00938       }
00939     }
00940     printf("\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime);
00941     fflush(stdout);
00942     if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) {
00943       PrevIter = iter;
00944       CurL = Likelihood();
00945       if (PrevL > TFlt::Mn && ! PlotNm.Empty()) {
00946         printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL,  CurL - PrevL);
00947       }
00948       fflush(stdout);
00949       if (CurL - PrevL <= Thres * fabs(PrevL)) { break; }
00950       else { PrevL = CurL; }
00951     }
00952     
00953   }
00954   printf("\n");
00955   printf("MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr());
00956   if (! PlotNm.Empty()) {
00957     TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
00958   }
00959   return iter;
00960 }
00961 
00962 int TCoda::MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
00963   //parallel
00964   time_t InitTime = time(NULL);
00965   //uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs();
00966   TExeTm ExeTm, CheckTm;
00967   double PrevL = Likelihood(true);
00968   TIntFltPrV IterLV;
00969   int PrevIter = 0;
00970   int iter = 0;
00971   TIntV NIdxV(F.Len(), 0);
00972   for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
00973   TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization
00974   NIDOPTV.PutAll(0);
00975   TVec<TIntFltH> NewF(ChunkNum * ChunkSize);
00976   TIntV NewNIDV(ChunkNum * ChunkSize);
00977   TBoolV IsRowV(ChunkNum * ChunkSize);
00978   for (iter = 0; iter < MaxIter; iter++) {
00979     NIdxV.Clr(false);
00980     for (int i = 0; i < F.Len(); i++) { 
00981       //if (NIDOPTV[i] == 0) {  NIdxV.Add(i); }
00982       NIdxV.Add(i);
00983     }
00984     IAssert (NIdxV.Len() <= F.Len());
00985     NIdxV.Shuffle(Rnd);
00986     // compute gradient for chunk of nodes
00987 #pragma omp parallel for schedule(static, 1)
00988     for (int TIdx = 0; TIdx < ChunkNum; TIdx++) {
00989       TIntFltH GradV;
00990       for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) {
00991         const bool IsRow = (ui % 2 == 0);
00992         NewNIDV[ui] = -1;
00993         if (ui > NIdxV.Len()) { continue; }
00994         const int u = NIdxV[ui]; //
00995         //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
00996         TNGraph::TNodeI UI = G->GetNI(u);
00997         const int Deg = IsRow? UI.GetOutDeg(): UI.GetInDeg();
00998         TIntSet CIDSet(5 * Deg);
00999         TIntFltH CurFU = IsRow? F[u]: H[u];
01000         for (int e = 0; e < Deg; e++) {
01001           int VID = IsRow? UI.GetOutNId(e): UI.GetInNId(e);
01002           if (HOVIDSV[u].IsKey(VID)) { continue; }
01003           TIntFltH& NbhCIDH = IsRow? H[VID]: F[VID];
01004           for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
01005             CIDSet.AddKey(CI.GetKey());
01006           }
01007         }
01008         if (CIDSet.Empty()) { 
01009           CurFU.Clr();
01010         }
01011         else {
01012           for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
01013             if (! CIDSet.IsKey(CI.GetKey())) {
01014               CurFU.DelIfKey(CI.GetKey());
01015             }
01016           }
01017           GradientForNode(IsRow, u, GradV, CIDSet);
01018           if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; }
01019           double LearnRate = GetStepSizeByLineSearch(IsRow, u, GradV, GradV, StepAlpha, StepBeta);
01020           if (LearnRate == 0.0) { NewNIDV[ui] = -2; continue; }
01021           for (int ci = 0; ci < GradV.Len(); ci++) {
01022             int CID = GradV.GetKey(ci);
01023             double Change = LearnRate * GradV.GetDat(CID);
01024             double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change;
01025             if (NewFuc <= 0.0) {
01026               CurFU.DelIfKey(CID);
01027             } else {
01028               CurFU.AddDat(CID) = NewFuc;
01029             }
01030           }
01031           CurFU.Defrag();
01032         }
01033         //store changes
01034         NewF[ui] = CurFU;
01035         NewNIDV[ui] = u;
01036         IsRowV[ui] = IsRow;
01037       }
01038     }
01039     int NumNoChangeGrad = 0;
01040     int NumNoChangeStepSize = 0;
01041     for (int ui = 0; ui < NewNIDV.Len(); ui++) {
01042       int NewNID = NewNIDV[ui];
01043       if (NewNID == -1) { NumNoChangeGrad++; continue; }
01044       if (NewNID == -2) { NumNoChangeStepSize++; continue; }
01045       if (IsRowV[ui]) {
01046         for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
01047           SumFV[CI.GetKey()] -= CI.GetDat();
01048         }
01049       } else {
01050         for (TIntFltH::TIter CI = H[NewNID].BegI(); CI < H[NewNID].EndI(); CI++) {
01051           SumHV[CI.GetKey()] -= CI.GetDat();
01052         }
01053       }
01054     }
01055 #pragma omp parallel for
01056     for (int ui = 0; ui < NewNIDV.Len(); ui++) {
01057       int NewNID = NewNIDV[ui];
01058       if (NewNID < 0) { continue; }
01059       if (IsRowV[ui]) {
01060         F[NewNID] = NewF[ui];
01061       } else {
01062         H[NewNID] = NewF[ui];
01063       }
01064     }
01065     for (int ui = 0; ui < NewNIDV.Len(); ui++) {
01066       int NewNID = NewNIDV[ui];
01067       if (NewNID < 0) { continue; }
01068       if (IsRowV[ui]) {
01069         for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
01070           SumFV[CI.GetKey()] += CI.GetDat();
01071         }
01072       } else {
01073         for (TIntFltH::TIter CI = H[NewNID].BegI(); CI < H[NewNID].EndI(); CI++) {
01074           SumHV[CI.GetKey()] += CI.GetDat();
01075         }
01076       }
01077     }
01078     // update the nodes who are optimal
01079     for (int ui = 0; ui < NewNIDV.Len(); ui++) {
01080       int NewNID = NewNIDV[ui];
01081       if (NewNID < 0) { continue; }
01082       TNGraph::TNodeI UI = G->GetNI(NewNID);
01083       NIDOPTV[NewNID] = 0;
01084       for (int e = 0; e < UI.GetDeg(); e++) {
01085         NIDOPTV[UI.GetNbrNId(e)] = 0;
01086       }
01087     }
01088     int OPTCnt = 0;
01089     for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } }
01090     /*
01091     if (! PlotNm.Empty()) {
01092       printf("\r%d iterations [%s] %lu secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), TSecTm::GetCurTm().GetAbsSecs() - StartTm);
01093       if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); }
01094       fflush(stdout);
01095     }
01096     */
01097     if ((iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) {
01098       PrevIter = iter;
01099       double CurL = Likelihood(true);
01100       IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL));
01101       printf("\r%d iterations, Likelihood: %f, Diff: %f [%lu secs]", iter, CurL,  CurL - PrevL, time(NULL) - InitTime);
01102        fflush(stdout);
01103       if (CurL - PrevL <= Thres * fabs(PrevL)) { 
01104         break;
01105       }
01106       else {
01107         PrevL = CurL;
01108       }
01109     }
01110   }
01111   if (! PlotNm.Empty()) {
01112     printf("\nMLE completed with %d iterations(%lu secs)\n", iter, time(NULL) - InitTime);
01113     TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
01114   } else {
01115     printf("\rMLE completed with %d iterations(%lu secs)\n", iter, time(NULL) - InitTime);
01116     fflush(stdout);
01117   }
01118   return iter;
01119 }