SNAP Library 2.2, Developer Reference  2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
agm.cpp
Go to the documentation of this file.
00001 #include "stdafx.h"
00002 #include "Snap.h"
00003 #include "agm.h"
00004 #include "agmfit.h"
00005 
00007 // AGM graph generation.
00008 
00010 void TAGM::RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd){
00011   int CNodes = CmtyV.Len(), CEdges;
00012   if (CNodes < 20) {
00013     CEdges = (int) Rnd.GetBinomialDev(Prob, CNodes * (CNodes-1) / 2);
00014   } else {
00015     CEdges = (int) (Prob * CNodes * (CNodes - 1) / 2);
00016   }
00017   THashSet<TIntPr> NewEdgeSet(CEdges);
00018   for (int edge = 0; edge < CEdges; ) {
00019     int SrcNId = CmtyV[Rnd.GetUniDevInt(CNodes)];
00020     int DstNId = CmtyV[Rnd.GetUniDevInt(CNodes)];
00021     if (SrcNId > DstNId) { Swap(SrcNId,DstNId); }
00022     if (SrcNId != DstNId && ! NewEdgeSet.IsKey(TIntPr(SrcNId, DstNId))) { // is new edge
00023       NewEdgeSet.AddKey(TIntPr(SrcNId, DstNId));
00024       Graph->AddEdge(SrcNId, DstNId);
00025       edge++; 
00026     } 
00027   }
00028 }
00029 
00030 
00031 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd){
00032   PUNGraph TryG = TAGM::GenAGM(CmtyVV, DensityCoef, 1.0, Rnd);
00033   const double ScaleCoef = (double) TargetEdges / (double) TryG->GetEdges();
00034   return TAGM::GenAGM(CmtyVV, DensityCoef, ScaleCoef, Rnd);
00035 }
00036 
00037 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd){
00038   TFltV CProbV;
00039   double Prob;
00040   for (int i = 0; i < CmtyVV.Len(); i++) {
00041     Prob = ScaleCoef*pow( double( CmtyVV[i].Len()), - DensityCoef);
00042     if (Prob > 1.0) { Prob = 1; }
00043     CProbV.Add(Prob);
00044   }
00045   return TAGM::GenAGM(CmtyVV, CProbV, Rnd);
00046 }
00047 
00049 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom){
00050   PUNGraph G = TUNGraph::New(100 * CmtyVV.Len(), -1);
00051   printf("AGM begins\n");
00052   for (int i = 0; i < CmtyVV.Len(); i++) {
00053     TIntV& CmtyV = CmtyVV[i];
00054     for (int u = 0; u < CmtyV.Len(); u++) {
00055       if ( G->IsNode(CmtyV[u])) { continue; }
00056       G->AddNode(CmtyV[u]);
00057     }
00058     double Prob = CProbV[i];
00059     RndConnectInsideCommunity(G, CmtyV, Prob, Rnd);
00060   }
00061   if (PNoCom > 0.0) { //if we want to connect nodes that do not share any community
00062     TIntSet NIDS;
00063     for (int c = 0; c < CmtyVV.Len(); c++) {
00064       for (int u = 0; u < CmtyVV[c].Len(); u++) {
00065         NIDS.AddKey(CmtyVV[c][u]);
00066       }
00067     }
00068     TIntV NIDV;
00069     NIDS.GetKeyV(NIDV);
00070     RndConnectInsideCommunity(G,NIDV,PNoCom,Rnd);
00071   }
00072   printf("AGM completed (%d nodes %d edges)\n",G->GetNodes(),G->GetEdges());
00073   G->Defrag();
00074   return G;
00075 }
00076 
00079 
00081 void TAGMUtil::GenPLSeq(TIntV& SzSeq, const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max) {
00082   SzSeq.Gen(SeqLen, 0);
00083   while (SzSeq.Len() < SeqLen) {
00084     int Sz = (int) TMath::Round(Rnd.GetPowerDev(Alpha));
00085     if (Sz >= Min && Sz <= Max) {
00086       SzSeq.Add(Sz);
00087     }
00088   }
00089 }
00090 
00092 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
00093   TIntV NIDV(Nodes, 0);
00094   for (int i = 0; i < Nodes; i++) {
00095     NIDV.Add(i);
00096   }
00097   GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd);
00098 }
00099 
00101 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const PUNGraph& Graph, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
00102   if (Coms == 0 || Nodes == 0) {
00103     CmtyVV.Clr();
00104     return;
00105   }
00106   TIntV NIDV;
00107   Graph->GetNIdV(NIDV);
00108   GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd);
00109 }
00110 
00112 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const TIntV& NIDV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){
00113   if (Coms == 0 || Nodes == 0) {
00114     CmtyVV.Clr();
00115     return;
00116   }
00117   TIntV ComSzSeq, MemSeq;
00118   TAGMUtil::GenPLSeq(ComSzSeq,Coms,ComSzAlpha,Rnd,MinSz,MaxSz);
00119   TAGMUtil::GenPLSeq(MemSeq,Nodes,MemAlpha,Rnd,MinK,MaxK);
00120   TIntPrV CIDSzPrV, NIDMemPrV;
00121   for (int i = 0; i < ComSzSeq.Len(); i++) {
00122     CIDSzPrV.Add(TIntPr(i, ComSzSeq[i]));
00123   }
00124   for (int i = 0; i < MemSeq.Len(); i++) {
00125     NIDMemPrV.Add(TIntPr(NIDV[i], MemSeq[i]));
00126   }
00127   TAGMUtil::ConnectCmtyVV(CmtyVV, CIDSzPrV, NIDMemPrV, Rnd);
00128 }
00129 
00131 void TAGMUtil::ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd) {
00132   const int Nodes = NIDMemPrV.Len(), Coms = CIDSzPrV.Len();
00133   TIntV NDegV,CDegV;
00134   TIntPrSet CNIDSet;
00135   TIntSet HitNodes(Nodes);
00136   THash<TInt,TIntV> CmtyVH;
00137   for (int i = 0;i < CIDSzPrV.Len(); i++) {
00138     for (int j = 0; j < CIDSzPrV[i].Val2; j++) {
00139       CDegV.Add(CIDSzPrV[i].Val1);
00140     }
00141   }
00142   for (int i = 0; i < NIDMemPrV.Len(); i++) {
00143     for (int j = 0; j < NIDMemPrV[i].Val2; j++) {
00144       NDegV.Add(NIDMemPrV[i].Val1);
00145     }
00146   }
00147   while (CDegV.Len() < (int) (1.2 * Nodes)) {
00148     CDegV.Add(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1);
00149   }
00150   while (NDegV.Len() < CDegV.Len()) {
00151     NDegV.Add(NIDMemPrV[Rnd.GetUniDevInt(Nodes)].Val1);
00152   }
00153   printf("Total Mem: %d, Total Sz: %d\n",NDegV.Len(), CDegV.Len());
00154   int c=0;
00155   while (c++ < 15 && CDegV.Len() > 1) {
00156     for (int i = 0; i < CDegV.Len(); i++) {
00157       int u = Rnd.GetUniDevInt(CDegV.Len());
00158       int v = Rnd.GetUniDevInt(NDegV.Len());
00159       if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; }
00160       CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
00161       HitNodes.AddKey(NDegV[v]);
00162       if (u == CDegV.Len() - 1) { CDegV.DelLast(); }
00163       else { 
00164         CDegV[u] = CDegV.Last(); 
00165         CDegV.DelLast();
00166       }
00167       if (v == NDegV.Len() - 1) { NDegV.DelLast(); }
00168       else { 
00169         NDegV[v] = NDegV.Last();
00170         NDegV.DelLast();
00171       }
00172     }
00173   }
00174   //make sure that every node belongs to at least one community
00175   for (int i = 0; i < Nodes; i++) {
00176     int NID = NIDMemPrV[i].Val1;
00177     if (! HitNodes.IsKey(NID)) {
00178       CNIDSet.AddKey(TIntPr(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1, NID));
00179       HitNodes.AddKey(NID);
00180     }
00181   }
00182   IAssert(HitNodes.Len() == Nodes);
00183   for (int i = 0; i < CNIDSet.Len(); i++) {
00184     TIntPr CNIDPr = CNIDSet[i];
00185     CmtyVH.AddDat(CNIDPr.Val1);
00186     CmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
00187   }
00188   CmtyVH.GetDatV(CmtyVV);
00189 }
00190 
00192 void TAGMUtil::RewireCmtyVV(const TVec<TIntV>& CmtyVVIn, TVec<TIntV>& CmtyVVOut, TRnd& Rnd){
00193   THash<TInt,TIntV> CmtyVH;
00194   for (int i = 0; i < CmtyVVIn.Len(); i++) {
00195     CmtyVH.AddDat(i, CmtyVVIn[i]);
00196   }
00197   TAGMUtil::RewireCmtyNID(CmtyVH, Rnd);
00198   CmtyVH.GetDatV(CmtyVVOut);
00199 }
00200 
00202 void TAGMUtil::RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd) {
00203   THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len());
00204   TIntV NDegV;
00205   TIntV CDegV;
00206   for (int i = 0; i < CmtyVH.Len(); i++) {
00207     int CID = CmtyVH.GetKey(i);
00208     for (int j = 0; j < CmtyVH[i].Len(); j++) {
00209       int NID = CmtyVH[i][j];
00210       NDegV.Add(NID);
00211       CDegV.Add(CID);
00212     }
00213   }
00214   TIntPrSet CNIDSet(CDegV.Len());
00215   int c=0;
00216   while (c++ < 15 && CDegV.Len() > 1){
00217     for (int i = 0; i < CDegV.Len(); i++) {
00218       int u = Rnd.GetUniDevInt(CDegV.Len());
00219       int v = Rnd.GetUniDevInt(NDegV.Len());
00220       if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; }
00221       CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
00222       if (u == CDegV.Len() - 1) { 
00223         CDegV.DelLast(); 
00224       }  else {
00225         CDegV[u] = CDegV.Last();
00226         CDegV.DelLast();
00227       }
00228       if ( v == NDegV.Len() - 1) {
00229         NDegV.DelLast();
00230       }  else{
00231         NDegV[v] = NDegV.Last();
00232         NDegV.DelLast();
00233       }
00234     }
00235   }
00236   for (int i = 0; i < CNIDSet.Len(); i++) {
00237     TIntPr CNIDPr = CNIDSet[i];
00238     IAssert(CmtyVH.IsKey(CNIDPr.Val1));
00239     NewCmtyVH.AddDat(CNIDPr.Val1);
00240     NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
00241   }
00242   CmtyVH = NewCmtyVH;
00243 }
00244 
00246 void TAGMUtil::LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV) {
00247   CmtyVV.Gen(Kilo(100), 0);
00248   TSsParser Ss(InFNm, ssfWhiteSep);
00249   while (Ss.Next()) {
00250     if(Ss.GetFlds() > 0) {
00251       TIntV CmtyV;
00252       for (int i = 0; i < Ss.GetFlds(); i++) {
00253         if (Ss.IsInt(i)) {
00254           CmtyV.Add(Ss.GetInt(i));
00255         }
00256       }
00257       CmtyVV.Add(CmtyV);
00258     }
00259   }
00260   CmtyVV.Pack();
00261   printf("community loading completed (%d communities)\n",CmtyVV.Len());
00262 
00263 }
00264 
00266 void TAGMUtil::LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV, TStrHash<TInt>& StrToNIdH, const int BeginCol, const int MinSz, const TSsFmt Sep) {
00267   CmtyVV.Gen(Kilo(100), 0);
00268   TSsParser Ss(InFNm, Sep);
00269   while (Ss.Next()) {
00270     if(Ss.GetFlds() > BeginCol) {
00271       TIntV CmtyV;
00272       for (int i = BeginCol; i < Ss.GetFlds(); i++) {
00273         if (StrToNIdH.IsKey(Ss.GetFld(i))) {
00274           CmtyV.Add(StrToNIdH.GetKeyId(Ss.GetFld(i)));
00275         }
00276       }
00277       if (CmtyV.Len() < MinSz) { continue; }
00278       CmtyVV.Add(CmtyV);
00279     }
00280   }
00281   CmtyVV.Pack();
00282   printf("community loading completed (%d communities)\n",CmtyVV.Len());
00283 }
00284 
00286 void TAGMUtil::DumpCmtyVV(const TStr& OutFNm, const TVec<TIntV>& CmtyVV) {
00287   FILE* F = fopen(OutFNm.CStr(),"wt");
00288   for (int i = 0; i < CmtyVV.Len(); i++) {
00289     for (int j = 0; j < CmtyVV[i].Len(); j++) {
00290       fprintf(F,"%d\t", (int) CmtyVV[i][j]);
00291     }
00292     fprintf(F,"\n");
00293   }
00294   fclose(F);
00295 }
00296 
00298 void TAGMUtil::DumpCmtyVV(const TStr OutFNm, TVec<TIntV>& CmtyVV, TIntStrH& NIDNmH) {
00299   FILE* F = fopen(OutFNm.CStr(), "wt");
00300   for (int c = 0; c < CmtyVV.Len(); c++) {
00301     for (int u = 0; u < CmtyVV[c].Len(); u++) {
00302       if (NIDNmH.IsKey(CmtyVV[c][u])){
00303         fprintf(F, "%s\t", NIDNmH.GetDat(CmtyVV[c][u]).CStr());
00304       }
00305       else {
00306         fprintf(F, "%d\t", (int) CmtyVV[c][u]);
00307       }
00308     }
00309     fprintf(F, "\n");
00310   }
00311   fclose(F);
00312 }
00313 
00315 int TAGMUtil::TotalMemberships(const TVec<TIntV>& CmtyVV){
00316   int M = 0;
00317   for (int i = 0; i < CmtyVV.Len(); i++) {
00318     M += CmtyVV[i].Len();
00319   }
00320   return M;
00321 }
00322 
00324 void TAGMUtil::GetNodeMembership(TIntH& NIDComVH, const THash<TInt,TIntV >& CmtyVH) {
00325   NIDComVH.Clr();
00326   for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){
00327     for (int j = 0;j < HI.GetDat().Len(); j++) {
00328       int NID = HI.GetDat()[j];
00329       NIDComVH.AddDat(NID)++;
00330     }
00331   }
00332 }
00333 
00335 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV) {
00336   NIDComVH.Clr();
00337   for (int i = 0; i < CmtyVV.Len(); i++){
00338     int CID = i;
00339     for (int j = 0; j < CmtyVV[i].Len(); j++) {
00340       int NID = CmtyVV[i][j];
00341       NIDComVH.AddDat(NID).AddKey(CID);
00342     }
00343   }
00344 }
00345 
00347 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV, const TIntV& NIDV) {
00348   NIDComVH.Clr();
00349   for (int u = 0; u < NIDV.Len(); u++) {
00350     NIDComVH.AddDat(NIDV[u]);
00351   }
00352   for (int i = 0; i < CmtyVV.Len(); i++){
00353     int CID = i;
00354     for (int j = 0; j < CmtyVV[i].Len(); j++) {
00355       int NID = CmtyVV[i][j];
00356       NIDComVH.AddDat(NID).AddKey(CID);
00357     }
00358   }
00359 }
00360 
00361 
00362 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntSet>& CmtyVV) {
00363   for (int i = 0; i < CmtyVV.Len(); i++){
00364     int CID = i;
00365     for (TIntSet::TIter SI = CmtyVV[i].BegI(); SI < CmtyVV[i].EndI(); SI++) {
00366       int NID = SI.GetKey();
00367       NIDComVH.AddDat(NID).AddKey(CID);
00368     }
00369   }
00370 }
00371 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) {
00372   for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){
00373     int CID = HI.GetKey();
00374     for (int j = 0; j < HI.GetDat().Len(); j++) {
00375       int NID = HI.GetDat()[j];
00376       NIDComVH.AddDat(NID).AddKey(CID);
00377     }
00378   }
00379 }
00380 
00381 void TAGMUtil::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) {
00382   for (int i = 0; i < CmtyVH.Len(); i++){
00383     int CID = CmtyVH.GetKey(i);
00384     for (int j = 0; j < CmtyVH[i].Len(); j++) {
00385       int NID = CmtyVH[i][j];
00386       NIDComVH.AddDat(NID).Add(CID);
00387     }
00388   }
00389 }
00390 
00391 void TAGMUtil::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const TVec<TIntV>& CmtyVV) {
00392   THash<TInt,TIntV> CmtyVH;
00393   for (int i = 0; i < CmtyVV.Len(); i++) {
00394     CmtyVH.AddDat(i, CmtyVV[i]);
00395   }
00396   GetNodeMembership(NIDComVH, CmtyVH);
00397 }
00398 
00399 int TAGMUtil::Intersection(const TIntV& C1, const TIntV& C2) {
00400   TIntSet S1(C1), S2(C2);
00401   return TAGMUtil::Intersection(S1, S2);
00402 }
00403 
00404 void TAGMUtil::GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C) {
00405   C.Gen(A.Len());
00406   if (A.Len() < B.Len()) {
00407     for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++) 
00408       if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey());
00409   } else {
00410     for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++) 
00411       if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey());
00412   }
00413 }
00414 
00415 int TAGMUtil::Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B) {
00416   int n = 0;
00417   if (A.Len() < B.Len()) {
00418     for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++) 
00419       if (B.IsKey(it.GetKey())) n++;
00420   } else {
00421     for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++) 
00422       if (A.IsKey(it.GetKey())) n++;
00423   }
00424   return n;
00425 }
00426 
00428 void TAGMUtil::SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV>& CmtyVVAtr, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH ) {
00429   THash<TInt,TIntV> NIDComVHAtr;
00430   TAGMUtil::GetNodeMembership(NIDComVHAtr, CmtyVVAtr);
00431 
00432   FILE* F = fopen(OutFNm.CStr(), "wt");
00433   fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
00434   fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
00435   fprintf(F, "\t<graph mode='static' defaultedgetype='undirected'>\n");
00436   if (CmtyVVAtr.Len() > 0) {
00437     fprintf(F, "\t<attributes class='node'>\n");
00438     for (int c = 0; c < CmtyVVAtr.Len(); c++) {
00439       fprintf(F, "\t\t<attribute id='%d' title='c%d' type='boolean'>", c, c);
00440       fprintf(F, "\t\t<default>false</default>\n");
00441       fprintf(F, "\t\t</attribute>\n");
00442     }
00443     fprintf(F, "\t</attributes>\n");
00444   }
00445   fprintf(F, "\t\t<nodes>\n");
00446   for (TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
00447     int NID = NI.GetId();
00448     TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
00449     TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
00450 
00451     double Size = MinSz;
00452     double SizeStep = (MaxSz - MinSz) / (double) CmtyVVAtr.Len();
00453     if (NIDComVHAtr.IsKey(NID)) {
00454       Size = MinSz +  SizeStep *  (double) NIDComVHAtr.GetDat(NID).Len();
00455     }
00456     double Alpha = 1.0;
00457     fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
00458     fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
00459     fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
00460     //specify attributes
00461     if (NIDComVHAtr.IsKey(NID)) {
00462       fprintf(F, "\t\t\t\t<attvalues>\n");
00463       for (int c = 0; c < NIDComVHAtr.GetDat(NID).Len(); c++) {
00464         int CID = NIDComVHAtr.GetDat(NID)[c];
00465         fprintf(F, "\t\t\t\t\t<attvalue for='%d' value='true'/>\n", CID);
00466       }
00467       fprintf(F, "\t\t\t\t</attvalues>\n");
00468     }
00469 
00470     fprintf(F, "\t\t\t</node>\n");
00471   }
00472   fprintf(F, "\t\t</nodes>\n");
00473   //plot edges
00474   int EID = 0;
00475   fprintf(F, "\t\t<edges>\n");
00476   for (TUNGraph::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
00477     fprintf(F, "\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, EI.GetSrcNId(), EI.GetDstNId());
00478   }
00479   fprintf(F, "\t\t</edges>\n");
00480   fprintf(F, "\t</graph>\n");
00481   fprintf(F, "</gexf>\n");
00482   fclose(F);
00483 }
00484 
00486 void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) {
00488   if (CmtyVV.Len() == 0) { return; }
00489   double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0;
00490   double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax;
00491   double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len();
00492   THash<TInt,TIntV> NIDComVH;
00493   TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV);
00494 
00495   FILE* F = fopen(OutFNm.CStr(), "wt");
00496   fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
00497   fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
00498   fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n");
00499   fprintf(F, "\t\t<nodes>\n");
00500   for (int c = 0; c < CmtyVV.Len(); c++) {
00501     int CID = c;
00502     double XPos = c * CStep + CXMin;
00503     TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120);
00504     fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID);
00505     fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val);
00506     fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz);
00507     fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
00508     fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax); 
00509     fprintf(F, "\t\t\t</node>\n");
00510   }
00511 
00512   for (int u = 0;u < NIDV.Len(); u++) {
00513     int NID = NIDV[u];
00514     TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
00515     double Size = MinSz;
00516     double XPos = NXMin + u * NStep;
00517     TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
00518     double Alpha = 1.0;
00519     fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
00520     fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
00521     fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
00522     fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
00523     fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin); 
00524     fprintf(F, "\t\t\t</node>\n");
00525   }
00526   fprintf(F, "\t\t</nodes>\n");
00527   fprintf(F, "\t\t<edges>\n");
00528   int EID = 0;
00529   for (int u = 0;u < NIDV.Len(); u++) {
00530     int NID = NIDV[u];
00531     if (NIDComVH.IsKey(NID)) {
00532       for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) {
00533         int CID = NIDComVH.GetDat(NID)[c];
00534         fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID);
00535       }
00536     }
00537   }
00538   fprintf(F, "\t\t</edges>\n");
00539   fprintf(F, "\t</graph>\n");
00540   fprintf(F, "</gexf>\n");
00541 }
00542 
00544 int TAGMUtil::FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom, const TStr PltFPrx) {
00545   TRnd Rnd(RndSeed);
00546   int LambdaIter = 100;
00547   if (Graph->GetNodes() < 200) { LambdaIter = 1; } 
00548   if (Graph->GetNodes() < 200 && Graph->GetEdges() > 2000) { LambdaIter = 100; } 
00549 
00550   //Find coms with large C
00551   TAGMFit AGMFitM(Graph, InitComs, RndSeed);
00552   if (PNoCom > 0.0) { AGMFitM.SetPNoCom(PNoCom); }
00553   AGMFitM.RunMCMC(MaxIter, LambdaIter, "");
00554 
00555   int TE = Graph->GetEdges();
00556   TFltV RegV; 
00557   RegV.Add(0.3 * TE);
00558   for (int r = 0; r < 25; r++) {
00559     RegV.Add(RegV.Last() * RegGap);
00560   }
00561   TFltPrV RegComsV, RegLV, RegBICV;
00562   TFltV LV, BICV;
00563   //record likelihood and number of communities with nonzero P_c
00564   for (int r = 0; r < RegV.Len(); r++) {
00565     double RegCoef = RegV[r];
00566     AGMFitM.SetRegCoef(RegCoef);
00567     AGMFitM.MLEGradAscentGivenCAG(0.01, 1000);
00568     AGMFitM.SetRegCoef(0.0);
00569         
00570     TVec<TIntV> EstCmtyVV;
00571     AGMFitM.GetCmtyVV(EstCmtyVV, 0.99);
00572     int NumLowQ = EstCmtyVV.Len();
00573     RegComsV.Add(TFltPr(RegCoef, (double) NumLowQ));
00574 
00575     if (EstCmtyVV.Len() > 0) {
00576       TAGMFit AFTemp(Graph, EstCmtyVV, Rnd);
00577       AFTemp.MLEGradAscentGivenCAG(0.001, 1000);
00578       double CurL = AFTemp.Likelihood();
00579       LV.Add(CurL);
00580       BICV.Add(-2.0 * CurL + (double) EstCmtyVV.Len() * log((double) Graph->GetNodes() * (Graph->GetNodes() - 1) / 2.0));
00581     }
00582     else {
00583       break;
00584     }
00585   }
00586   // if likelihood does not exist or does not change at all, report the smallest number of communities or 2
00587   if (LV.Len() == 0) { return 2; }
00588   else if (LV[0] == LV.Last()) { return (int) TMath::Mx<TFlt>(2.0, RegComsV[LV.Len() - 1].Val2); }
00589 
00590 
00591   //normalize likelihood and BIC to 0~100
00592   int MaxL = 100;
00593   {
00594     TFltV& ValueV = LV;
00595     TFltPrV& RegValueV = RegLV;
00596     double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
00597     for (int l = 0; l < ValueV.Len(); l++) {
00598       if (ValueV[l] < MinValue) { MinValue = ValueV[l]; }
00599       if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; }
00600     }
00601     while (ValueV.Len() < RegV.Len()) { ValueV.Add(MinValue); }
00602     double RangeVal = MaxValue - MinValue;
00603     for (int l = 0; l < ValueV.Len(); l++) {
00604       RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
00605     }
00606     
00607   }
00608   {
00609     TFltV& ValueV = BICV;
00610     TFltPrV& RegValueV = RegBICV;
00611     double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
00612     for (int l = 0; l < ValueV.Len(); l++) {
00613       if (ValueV[l] < MinValue) { MinValue = ValueV[l]; }
00614       if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; }
00615     }
00616     while (ValueV.Len() < RegV.Len()) { ValueV.Add(MaxValue); }
00617     double RangeVal = MaxValue - MinValue;
00618     for (int l = 0; l < ValueV.Len(); l++) {
00619       RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
00620     }
00621   }
00622 
00623   //fit logistic regression to normalized likelihood.
00624   TVec<TFltV> XV(RegLV.Len());
00625   TFltV YV (RegLV.Len());
00626   for (int l = 0; l < RegLV.Len(); l++) {
00627     XV[l] = TFltV::GetV(log(RegLV[l].Val1));
00628     YV[l] = RegLV[l].Val2 / (double) MaxL;
00629   }
00630   TFltPrV LRVScaled, LRV;
00631   TLogRegFit LRFit;
00632   PLogRegPredict LRMd = LRFit.CalcLogRegNewton(XV, YV, PltFPrx);
00633   for (int l = 0; l < RegLV.Len(); l++) {
00634     LRV.Add(TFltPr(RegV[l], LRMd->GetCfy(XV[l])));
00635     LRVScaled.Add(TFltPr(RegV[l], double(MaxL) * LRV.Last().Val2));
00636   }
00637 
00638   //estimate # communities from fitted logistic regression
00639   int NumComs = 0, IdxRegDrop = 0;
00640   double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25
00641   double LeftReg = 0.0, RightReg = 0.0;
00642   TFltV Theta;
00643   LRMd->GetTheta(Theta);
00644   RegDrop = (- Theta[1] - LRThres) / Theta[0];
00645   if (RegDrop <= XV[0][0]) { NumComs = (int) RegComsV[0].Val2; }
00646   else if (RegDrop >= XV.Last()[0]) { NumComs = (int) RegComsV.Last().Val2; }
00647   else {  //interpolate for RegDrop
00648     for (int i = 0; i < XV.Len(); i++) {
00649       if (XV[i][0] > RegDrop) { IdxRegDrop = i; break; }
00650     }
00651     
00652     if (IdxRegDrop == 0) {
00653       printf("Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta[0].Val, Theta[1].Val);
00654       for (int l = 0; l < RegLV.Len(); l++) {
00655         printf("X[%d]:%f, Y[%d]:%f\n", l, XV[l][0].Val, l, YV[l].Val);
00656       }
00657     }
00658     IAssert(IdxRegDrop > 0);
00659     LeftReg = RegDrop - XV[IdxRegDrop - 1][0];
00660     RightReg = XV[IdxRegDrop][0] - RegDrop;
00661     NumComs = (int) TMath::Round( (RightReg * RegComsV[IdxRegDrop - 1].Val2 + LeftReg * RegComsV[IdxRegDrop].Val2) / (LeftReg + RightReg));
00662 
00663   }
00664   //printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - 1].Val2, RegComsV[IdxRegDrop].Val2);
00665   printf("Num Coms:%d\n", NumComs);
00666   if (NumComs < 2) { NumComs = 2; }
00667 
00668   if (PltFPrx.Len() > 0) {
00669     TStr PlotTitle = TStr::Fmt("N:%d, E:%d ", Graph->GetNodes(), TE);
00670     TGnuPlot GPC(PltFPrx + ".l");
00671     GPC.AddPlot(RegComsV, gpwLinesPoints, "C");
00672     GPC.AddPlot(RegLV, gpwLinesPoints, "likelihood");
00673     GPC.AddPlot(RegBICV, gpwLinesPoints, "BIC");
00674     GPC.AddPlot(LRVScaled, gpwLinesPoints, "Sigmoid (scaled)");
00675     GPC.SetScale(gpsLog10X);
00676     GPC.SetTitle(PlotTitle);
00677     GPC.SavePng(PltFPrx + ".l.png");
00678   }
00679   
00680   return NumComs;
00681 }
00682 
00683 double TAGMUtil::GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges) {
00684   const int Edges2 = Edges >= 0 ? 2*Edges : Graph->GetEdges();
00685   int Vol = 0,  Cut = 0; 
00686   double Phi = 0.0;
00687   for (int i = 0; i < CmtyS.Len(); i++) {
00688     if (! Graph->IsNode(CmtyS[i])) { continue; }
00689     TUNGraph::TNodeI NI = Graph->GetNI(CmtyS[i]);
00690     for (int e = 0; e < NI.GetOutDeg(); e++) {
00691       if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; }
00692     }
00693     Vol += NI.GetOutDeg();
00694   }
00695   // get conductance
00696   if (Vol != Edges2) {
00697     if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); }
00698     else if (Vol == 0) { Phi = 0.0; }
00699     else { Phi = Cut / double(Vol); }
00700   } else {
00701     if (Vol == Edges2) { Phi = 1.0; }
00702   }
00703   return Phi;
00704 }
00705 
00706 void TAGMUtil::GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS) {
00707   TUNGraph::TNodeI NI = Graph->GetNI(NID);
00708   NBCmtyS.Gen(NI.GetDeg());
00709   NBCmtyS.AddKey(NID);
00710   for (int e = 0; e < NI.GetDeg(); e++) {
00711     NBCmtyS.AddKey(NI.GetNbrNId(e));
00712   }
00713 }
00714 
00716 // Logistic regression by gradient ascent
00717 
00718 void TLogRegFit::GetNewtonStep(TFltVV& HVV, const TFltV& GradV, TFltV& DeltaLV){
00719   bool HSingular = false;
00720   for (int i = 0; i < HVV.GetXDim(); i++) {
00721     if (HVV(i,i) == 0.0) {
00722       HVV(i,i) = 0.001;
00723       HSingular = true;
00724     }
00725     DeltaLV[i] = GradV[i] / HVV(i, i);
00726   }
00727   if (! HSingular) {
00728     if (HVV(0, 0) < 0) { // if Hessian is negative definite, convert it to positive definite
00729       for (int r = 0; r < Theta.Len(); r++) {
00730         for (int c = 0; c < Theta.Len(); c++) {
00731           HVV(r, c) = - HVV(r, c);
00732         }
00733       }
00734       TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
00735     }
00736     else {
00737       TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
00738       for (int i = 0; i < DeltaLV.Len(); i++) {
00739         DeltaLV[i] = - DeltaLV[i];
00740       }
00741     }
00742 
00743   }
00744 }
00745 
00746 void TLogRegFit::Hessian(TFltVV& HVV) {
00747   HVV.Gen(Theta.Len(), Theta.Len());
00748   TFltV OutV;
00749   TLogRegPredict::GetCfy(X, OutV, Theta);
00750   for (int i = 0; i < X.Len(); i++) {
00751     for (int r = 0; r < Theta.Len(); r++) {
00752       HVV.At(r, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][r]);
00753       for (int c = r + 1; c < Theta.Len(); c++) {
00754         HVV.At(r, c) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
00755         HVV.At(c, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
00756       }
00757     }
00758   }
00759   /*
00760   printf("\n");
00761   for (int r = 0; r < Theta.Len(); r++) {
00762     for (int c = 0; c < Theta.Len(); c++) {
00763       printf("%f\t", HVV.At(r, c).Val);
00764     }
00765     printf("\n");
00766   }
00767   */
00768 }
00769 
00770 int TLogRegFit::MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) {
00771   TExeTm ExeTm;
00772   TFltV GradV(Theta.Len()), DeltaLV(Theta.Len());
00773   TFltVV HVV(Theta.Len(), Theta.Len());
00774   int iter = 0;
00775   double MinVal = -1e10, MaxVal = 1e10;
00776   for(iter = 0; iter < MaxStep; iter++) {
00777     Gradient(GradV);
00778     Hessian(HVV);
00779     GetNewtonStep(HVV, GradV, DeltaLV);
00780     double Increment = TLinAlg::DotProduct(GradV, DeltaLV);
00781     if (Increment <= ChangeEps) {break;}
00782     double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1);
00783     for(int i = 0; i < Theta.Len(); i++) {
00784       double Change = LearnRate * DeltaLV[i];
00785       Theta[i] += Change;
00786       if(Theta[i] < MinVal) { Theta[i] = MinVal;}
00787       if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
00788     }
00789   }
00790   if (! PlotNm.Empty()) {
00791     printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
00792   }
00793 
00794   return iter;
00795 }
00796 
00797 int TLogRegFit::MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) {
00798   TExeTm ExeTm;
00799   TFltV GradV(Theta.Len());
00800   int iter = 0;
00801   TIntFltPrV IterLV, IterGradNormV;
00802   double MinVal = -1e10, MaxVal = 1e10;
00803   double GradCutOff = 100000;
00804   for(iter = 0; iter < MaxStep; iter++) {
00805     Gradient(GradV);    //if gradient is going out of the boundary, cut off
00806     for(int i = 0; i < Theta.Len(); i++) {
00807       if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; }
00808       if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; }
00809       if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; }
00810       if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; }
00811     }
00812     double Alpha = 0.15, Beta = 0.9;
00813     //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
00814     double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
00815     if (TLinAlg::Norm(GradV) < ChangeEps) { break; }
00816     for(int i = 0; i < Theta.Len(); i++) {
00817       double Change = LearnRate * GradV[i];
00818       Theta[i] += Change;
00819       if(Theta[i] < MinVal) { Theta[i] = MinVal;}
00820       if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
00821     }
00822     if (! PlotNm.Empty()) {
00823       double L = Likelihood();
00824       IterLV.Add(TIntFltPr(iter, L));
00825       IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV)));
00826     }
00827     
00828   }
00829   if (! PlotNm.Empty()) {
00830     TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
00831     TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q");
00832     printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
00833   }
00834   return iter;
00835 }
00836 
00837 double TLogRegFit::GetStepSizeByLineSearch(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta) {
00838   double StepSize = 1.0;
00839   double InitLikelihood = Likelihood();
00840   IAssert(Theta.Len() == DeltaV.Len());
00841   TFltV NewThetaV(Theta.Len());
00842   double MinVal = -1e10, MaxVal = 1e10;
00843   for(int iter = 0; ; iter++) {
00844     for (int i = 0; i < Theta.Len(); i++){
00845       NewThetaV[i] = Theta[i] + StepSize * DeltaV[i];
00846       if (NewThetaV[i] < MinVal) { NewThetaV[i] = MinVal;  }
00847       if (NewThetaV[i] > MaxVal) { NewThetaV[i] = MaxVal; }
00848     }
00849     if (Likelihood(NewThetaV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) {
00850       StepSize *= Beta;
00851     } else {
00852       break;
00853     }
00854   }
00855   return StepSize;
00856 }
00857 
00858 double TLogRegFit::Likelihood(const TFltV& NewTheta) {
00859   TFltV OutV;
00860   TLogRegPredict::GetCfy(X, OutV, NewTheta);
00861   double L = 0;
00862   for (int r = 0; r < OutV.Len(); r++) {
00863     L += Y[r] * log(OutV[r]);
00864     L += (1 - Y[r]) * log(1 - OutV[r]);
00865   }
00866   return L;
00867 }
00868 
00869 void TLogRegFit::Gradient(TFltV& GradV) {
00870   TFltV OutV;
00871   TLogRegPredict::GetCfy(X, OutV, Theta);
00872   GradV.Gen(M);
00873   for (int r = 0; r < X.Len(); r++) {
00874     //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val);
00875     for (int m = 0; m < M; m++) {
00876       GradV[m] += (Y[r] - OutV[r]) * X[r][m];
00877     }
00878   }
00879   //for (int m = 0; m < M; m++) {  printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); }
00880 }
00881 
00882 PLogRegPredict TLogRegFit::CalcLogRegNewton(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) {
00883 
00884   X = XPt;
00885   Y = yPt;
00886   IAssert(X.Len() == Y.Len());
00887   if (Intercept == false) { // if intercept is not included, add it
00888     for (int r = 0; r < X.Len(); r++) {  X[r].Add(1); }
00889   }
00890   M = X[0].Len();
00891   for (int r = 0; r < X.Len(); r++) {  IAssert(X[r].Len() == M); }
00892   for (int r = 0; r < Y.Len(); r++) {  
00893     if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
00894     if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
00895   }
00896   Theta.Gen(M);
00897   MLENewton(ChangeEps, MaxStep, PlotNm);
00898   return new TLogRegPredict(Theta); 
00899 };
00900 
00901 PLogRegPredict TLogRegFit::CalcLogRegGradient(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) {
00902   X = XPt;
00903   Y = yPt;
00904   IAssert(X.Len() == Y.Len());
00905   if (Intercept == false) { // if intercept is not included, add it
00906     for (int r = 0; r < X.Len(); r++) {  X[r].Add(1); }
00907   }
00908   M = X[0].Len();
00909   for (int r = 0; r < X.Len(); r++) {  IAssert(X[r].Len() == M); }
00910   for (int r = 0; r < Y.Len(); r++) {  
00911     if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
00912     if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
00913   }
00914   Theta.Gen(M);
00915   MLEGradient(ChangeEps, MaxStep, PlotNm);
00916   return new TLogRegPredict(Theta); 
00917 };
00918 
00920 // Logistic-Regression-Model
00921 
00922 double TLogRegPredict::GetCfy(const TFltV& AttrV, const TFltV& NewTheta) {
00923     int len = AttrV.Len();
00924     double res = 0;
00925     if (len < NewTheta.Len()) { res = NewTheta.Last(); } //if feature vector is shorter, add an intercept
00926     for (int i = 0; i < len; i++) {
00927       if (i < NewTheta.Len()) { res += AttrV[i] * NewTheta[i]; }
00928     }
00929     double mu = 1 / (1 + exp(-res));
00930     return mu;
00931 }
00932 
00933 void TLogRegPredict::GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta) {
00934   OutV.Gen(X.Len());
00935   for (int r = 0; r < X.Len(); r++) {
00936     OutV[r] = GetCfy(X[r], NewTheta);
00937   }
00938 }