SNAP Library, User Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
gsvd.cpp
Go to the documentation of this file.
00001 
00002 // Directed Graph Matrix -- sparse {0,1} row matrix 
00003 bool TNGraphMtx::CheckNodeIds() {
00004   for (int NId = 0; NId < Graph->GetNodes(); NId++) {
00005     if (! Graph->IsNode(NId)) { return false; }
00006   }
00007   return true;
00008 }
00009 
00010 TNGraphMtx::TNGraphMtx(const PNGraph& GraphPt) : Graph() { 
00011   Graph = GraphPt;
00012   if (! CheckNodeIds()) {
00013     printf("  Renumbering nodes.\n");
00014     Graph = TSnap::ConvertGraph<PNGraph>(GraphPt, true);
00015   }
00016 }
00017 
00018 // Result = A * B(:,ColId)
00019 void TNGraphMtx::PMultiply(const TFltVV& B, int ColId, TFltV& Result) const {
00020   const int RowN = GetRows();
00021   Assert(B.GetRows() >= RowN && Result.Len() >= RowN);
00022   const THash<TInt, TNGraph::TNode>& NodeH = Graph->NodeH;
00023   for (int j = 0; j < RowN; j++) {
00024     const TIntV& RowV = NodeH[j].OutNIdV;
00025     Result[j] = 0.0;
00026     for (int i = 0; i < RowV.Len(); i++) {
00027       Result[j] += B(RowV[i], ColId);
00028     }
00029   }
00030 }
00031 
00032 // Result = A * Vec
00033 void TNGraphMtx::PMultiply(const TFltV& Vec, TFltV& Result) const {
00034   const int RowN = GetRows();
00035   Assert(Vec.Len() >= RowN && Result.Len() >= RowN);
00036   const THash<TInt, TNGraph::TNode>& NodeH = Graph->NodeH;
00037   for (int j = 0; j < RowN; j++) {
00038     const TIntV& RowV = NodeH[j].OutNIdV;
00039     Result[j] = 0.0;
00040     for (int i = 0; i < RowV.Len(); i++) {
00041       Result[j] += Vec[RowV[i]];
00042     }
00043   }
00044 }
00045 
00046 // Result = A' * B(:,ColId)
00047 void TNGraphMtx::PMultiplyT(const TFltVV& B, int ColId, TFltV& Result) const {
00048   const int ColN = GetCols();
00049   Assert(B.GetRows() >= ColN && Result.Len() >= ColN);
00050   const THash<TInt, TNGraph::TNode>& NodeH = Graph->NodeH;
00051   for (int i = 0; i < ColN; i++) Result[i] = 0.0;
00052   for (int j = 0; j < ColN; j++) {
00053     const TIntV& RowV = NodeH[j].OutNIdV;
00054     for (int i = 0; i < RowV.Len(); i++) {
00055       Result[RowV[i]] += B(j, ColId);
00056     }
00057   }
00058 }
00059 
00060 // Result = A' * Vec
00061 void TNGraphMtx::PMultiplyT(const TFltV& Vec, TFltV& Result) const {
00062   const int RowN = GetRows();
00063   Assert(Vec.Len() >= RowN && Result.Len() >= RowN);
00064   const THash<TInt, TNGraph::TNode>& NodeH = Graph->NodeH;
00065   for (int i = 0; i < RowN; i++) Result[i] = 0.0;
00066   for (int j = 0; j < RowN; j++) {
00067     const TIntV& RowV = NodeH[j].OutNIdV;
00068     for (int i = 0; i < RowV.Len(); i++) {
00069       Result[RowV[i]] += Vec[j];
00070     }
00071   }
00072 }
00073 
00075 // Undirected Graph Matrix -- sparse {0,1} row matrix 
00076 bool TUNGraphMtx::CheckNodeIds() {
00077   for (int NId = 0; NId < Graph->GetNodes(); NId++) {
00078     if (! Graph->IsNode(NId)) { return false; }
00079   }
00080   return true;
00081 }
00082 
00083 TUNGraphMtx::TUNGraphMtx(const PUNGraph& GraphPt) : Graph() { 
00084   Graph = GraphPt;
00085   if (! CheckNodeIds()) {
00086     printf("  Renumbering %d nodes....", GraphPt->GetNodes());
00087     TExeTm ExeTm;
00088     Graph = TSnap::ConvertGraph<PUNGraph>(GraphPt, true);
00089     /*TIntSet NIdSet;
00090     for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00091       NIdSet.AddKey(NI.GetId());
00092     }
00093     Graph = TUNGraph::New();  *Graph = *GraphPt; */
00094     printf("done [%s]\n", ExeTm.GetStr());
00095   }
00096 }
00097 
00098 // Result = A * B(:,ColId)
00099 void TUNGraphMtx::PMultiply(const TFltVV& B, int ColId, TFltV& Result) const {
00100   const int RowN = GetRows();
00101   Assert(B.GetRows() >= RowN && Result.Len() >= RowN);
00102   const THash<TInt, TUNGraph::TNode>& NodeH = Graph->NodeH;
00103   for (int j = 0; j < RowN; j++) {
00104     const TIntV& RowV = NodeH[j].NIdV;
00105     Result[j] = 0.0;
00106     for (int i = 0; i < RowV.Len(); i++) {
00107       Result[j] += B(RowV[i], ColId);
00108     }
00109   }
00110 }
00111 
00112 // Result = A * Vec
00113 void TUNGraphMtx::PMultiply(const TFltV& Vec, TFltV& Result) const {
00114   const int RowN = GetRows();
00115   Assert(Vec.Len() >= RowN && Result.Len() >= RowN);
00116   const THash<TInt, TUNGraph::TNode>& NodeH = Graph->NodeH;
00117   for (int j = 0; j < RowN; j++) {
00118     const TIntV& RowV = NodeH[j].NIdV;
00119     Result[j] = 0.0;
00120     for (int i = 0; i < RowV.Len(); i++) {
00121       Result[j] += Vec[RowV[i]];
00122     }
00123   }
00124 }
00125 
00126 // Result = A' * B(:,ColId)
00127 void TUNGraphMtx::PMultiplyT(const TFltVV& B, int ColId, TFltV& Result) const {
00128   const int ColN = GetCols();
00129   Assert(B.GetRows() >= ColN && Result.Len() >= ColN);
00130   const THash<TInt, TUNGraph::TNode>& NodeH = Graph->NodeH;
00131   for (int i = 0; i < ColN; i++) Result[i] = 0.0;
00132   for (int j = 0; j < ColN; j++) {
00133     const TIntV& RowV = NodeH[j].NIdV;
00134     for (int i = 0; i < RowV.Len(); i++) {
00135       Result[RowV[i]] += B(j, ColId);
00136     }
00137   }
00138 }
00139 
00140 // Result = A' * Vec
00141 void TUNGraphMtx::PMultiplyT(const TFltV& Vec, TFltV& Result) const {
00142   const int RowN = GetRows();
00143   Assert(Vec.Len() >= RowN && Result.Len() >= RowN);
00144   const THash<TInt, TUNGraph::TNode>& NodeH = Graph->NodeH;
00145   for (int i = 0; i < RowN; i++) Result[i] = 0.0;
00146   for (int j = 0; j < RowN; j++) {
00147     const TIntV& RowV = NodeH[j].NIdV;
00148     for (int i = 0; i < RowV.Len(); i++) {
00149       Result[RowV[i]] += Vec[j];
00150     }
00151   }
00152 }
00153 
00155 // Graphs Singular Value Decomposition
00156 namespace TSnap {
00157 
00158 void SetAllInvertSign(TFltV& ValV, const double& Val) {
00159   for (int i = 0; i < ValV.Len(); i++) { 
00160     ValV[i] = -ValV[i]; 
00161   }
00162 }
00163 bool IsAllValVNeg(TFltV& ValV, const bool& InvertSign) {
00164   bool IsAllNeg=true;
00165   for (int i = 0; i < ValV.Len(); i++) {
00166     if (ValV[i]>0.0) { IsAllNeg=false; break; }
00167   }
00168   if (IsAllNeg && InvertSign) {
00169     for (int i = 0; i < ValV.Len(); i++) { 
00170       ValV[i] = -ValV[i]; }
00171   }
00172   return IsAllNeg;
00173 }
00174 
00175 void GetSngVals(const PNGraph& Graph, const int& SngVals, TFltV& SngValV) {
00176   const int Nodes = Graph->GetNodes();
00177   IAssert(SngVals > 0);
00178   if (Nodes < 100) {
00179     // perform full SVD
00180     TFltVV AdjMtx(Nodes+1, Nodes+1);
00181     TFltVV LSingV, RSingV;
00182     TIntH NodeIdH;
00183     // create adjecency matrix
00184     for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00185       NodeIdH.AddKey(NodeI.GetId()); }
00186     for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00187       const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1;
00188       for (int e = 0; e < NodeI.GetOutDeg(); e++) {
00189         const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1;  // no self edges
00190         if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1;
00191       }
00192     }
00193     try { // can fail to converge but results seem to be good
00194       TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); }
00195     catch(...) {
00196       printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); }
00197   } else {
00198     // Lanczos
00199     TNGraphMtx GraphMtx(Graph);
00200     int CalcVals = int(2*SngVals);
00201     //if (CalcVals > Nodes) { CalcVals = int(2*Nodes); }
00202     //if (CalcVals > Nodes) { CalcVals = Nodes; }
00203     //while (SngValV.Len() < SngVals && CalcVals < 10*SngVals) {
00204     try {
00205       if (SngVals > 4) { 
00206         TSparseSVD::SimpleLanczosSVD(GraphMtx, 2*SngVals, SngValV, false); }
00207       else { TFltVV LSingV, RSingV;  // this is much more precise, but also much slower
00208         TSparseSVD::LanczosSVD(GraphMtx, SngVals, 3*SngVals, ssotFull, SngValV, LSingV, RSingV); }
00209     }
00210     catch(...) {
00211       printf("\n  ***EXCEPTION:  TRIED %d GOT %d values** \n", 2*SngVals, SngValV.Len()); }
00212     if (SngValV.Len() < SngVals) {
00213       printf("  ***TRIED %d GOT %d values** \n", CalcVals, SngValV.Len()); }
00214     //  CalcVals += SngVals;
00215     //}
00216   }
00217   SngValV.Sort(false);
00218   //if (SngValV.Len() > SngVals) {
00219   //  SngValV.Del(SngVals, SngValV.Len()-1); }
00220   //else {
00221   //  while (SngValV.Len() < SngVals) SngValV.Add(1e-6); }
00222   //IAssert(SngValV.Len() == SngVals);
00223 }
00224 
00225 void GetSngVec(const PNGraph& Graph, TFltV& LeftSV, TFltV& RightSV) {
00226   const int Nodes = Graph->GetNodes();
00227   TFltVV LSingV, RSingV;
00228   TFltV SngValV;
00229   if (Nodes < 500) {
00230     // perform full SVD
00231     TFltVV AdjMtx(Nodes+1, Nodes+1);
00232     TIntH NodeIdH;
00233     // create adjecency matrix
00234     for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00235       NodeIdH.AddKey(NodeI.GetId()); }
00236     for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00237       const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1;
00238       for (int e = 0; e < NodeI.GetOutDeg(); e++) {
00239         const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1;  // no self edges
00240         if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1;
00241       }
00242     }
00243     try { // can fail to converge but results seem to be good
00244       TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); }
00245     catch(...) {
00246       printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); }
00247   } else { // Lanczos
00248     TNGraphMtx GraphMtx(Graph);
00249     TSparseSVD::LanczosSVD(GraphMtx, 1, 8, ssotFull, SngValV, LSingV, RSingV);
00250   }
00251   TFlt MxSngVal = TFlt::Mn;
00252   int ValN = 0;
00253   for (int i = 0; i < SngValV.Len(); i++) {
00254     if (MxSngVal < SngValV[i]) { MxSngVal = SngValV[i]; ValN = i; } }
00255   LSingV.GetCol(ValN, LeftSV);
00256   RSingV.GetCol(ValN, RightSV);
00257   IsAllValVNeg(LeftSV, true);
00258   IsAllValVNeg(RightSV, true);
00259 }
00260 
00261 void GetSngVec(const PNGraph& Graph, const int& SngVecs, TFltV& SngValV, TVec<TFltV>& LeftSV, TVec<TFltV>& RightSV) {
00262   const int Nodes = Graph->GetNodes();
00263   SngValV.Clr();
00264   LeftSV.Clr();
00265   RightSV.Clr();
00266   TFltVV LSingV, RSingV;
00267   if (Nodes < 100) {
00268     // perform full SVD
00269     TFltVV AdjMtx(Nodes+1, Nodes+1);
00270     TIntH NodeIdH;
00271     // create adjecency matrix (1-based)
00272     for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00273       NodeIdH.AddKey(NodeI.GetId()); }
00274     for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
00275       const int NodeId = NodeIdH.GetKeyId(NodeI.GetId())+1;
00276       for (int e = 0; e < NodeI.GetOutDeg(); e++) {
00277         const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e))+1;  // no self edges
00278         if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1;
00279       }
00280     }
00281     try { // can fail to converge but results seem to be good
00282       TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV);
00283     } catch(...) {
00284       printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); 
00285     }
00286   } else { // Lanczos
00287     TNGraphMtx GraphMtx(Graph);
00288     TSparseSVD::LanczosSVD(GraphMtx, SngVecs, 2*SngVecs, ssotFull, SngValV, LSingV, RSingV);
00289     //TGAlg::SaveFullMtx(Graph, "adj_mtx.txt");
00290     //TLAMisc::DumpTFltVVMjrSubMtrx(LSingV, LSingV.GetRows(), LSingV.GetCols(), "LSingV2.txt"); // save MTX
00291   }
00292   TFltIntPrV SngValIdV;
00293   for (int i = 0; i < SngValV.Len(); i++) {
00294     SngValIdV.Add(TFltIntPr(SngValV[i], i)); 
00295   }
00296   SngValIdV.Sort(false);
00297   SngValV.Sort(false);
00298   for (int v = 0; v < SngValIdV.Len(); v++) { 
00299     LeftSV.Add();
00300     LSingV.GetCol(SngValIdV[v].Val2, LeftSV.Last());
00301     RightSV.Add();
00302     RSingV.GetCol(SngValIdV[v].Val2, RightSV.Last());
00303   }
00304   IsAllValVNeg(LeftSV[0], true);
00305   IsAllValVNeg(RightSV[0], true);
00306 }
00307 
00308 void GetEigVals(const PUNGraph& Graph, const int& EigVals, TFltV& EigValV) {
00309   // Lanczos
00310   TUNGraphMtx GraphMtx(Graph);
00311   //const int Nodes = Graph->GetNodes();
00312   //int CalcVals = int(2*EigVals);
00313   //if (CalcVals > Nodes) { CalcVals = Nodes; }
00314   //while (EigValV.Len() < EigVals && CalcVals < 3*EigVals) {
00315   try {
00316     if (EigVals > 4) { 
00317       TSparseSVD::SimpleLanczos(GraphMtx, 2*EigVals, EigValV, false); }
00318     else { TFltVV EigVecVV; // this is much more precise, but also much slower
00319       TSparseSVD::Lanczos(GraphMtx, EigVals, 3*EigVals, ssotFull, EigValV, EigVecVV, false); }
00320   }
00321   catch(...) {
00322     printf("\n  ***EXCEPTION:  TRIED %d GOT %d values** \n", 2*EigVals, EigValV.Len()); }
00323   if (EigValV.Len() < EigVals) {
00324     printf("  ***TRIED %d GOT %d values** \n", 2*EigVals, EigValV.Len()); }
00325   //  CalcVals += EigVals;
00326   //}
00327   EigValV.Sort(false);
00328   /*if (EigValV.Len() > EigVals) {
00329     EigValV.Del(EigVals, EigValV.Len()-1); }
00330   else {
00331     while (EigValV.Len() < EigVals) EigValV.Add(1e-6); 
00332   }
00333   IAssert(EigValV.Len() == EigVals);*/
00334 }
00335 
00336 void GetEigVec(const PUNGraph& Graph, TFltV& EigVecV) {
00337   TUNGraphMtx GraphMtx(Graph);
00338   TFltV EigValV;
00339   TFltVV EigVecVV;
00340   TSparseSVD::Lanczos(GraphMtx, 1, 8, ssotFull, EigValV, EigVecVV, false);
00341   EigVecVV.GetCol(0, EigVecV); // vector components are not sorted!!!
00342   IsAllValVNeg(EigVecV, true);
00343 }
00344 
00345 // to get first few eigenvectors
00346 void GetEigVec(const PUNGraph& Graph, const int& EigVecs, TFltV& EigValV, TVec<TFltV>& EigVecV) {
00347   const int Nodes = Graph->GetNodes();
00348   // Lanczos
00349   TUNGraphMtx GraphMtx(Graph);
00350   int CalcVals = int(2*EigVecs);
00351   if (CalcVals > Nodes) { CalcVals = Nodes; }
00352   TFltVV EigVecVV;
00353   //while (EigValV.Len() < EigVecs && CalcVals < 10*EigVecs) {
00354   try {
00355     TSparseSVD::Lanczos(GraphMtx, EigVecs, 2*EigVecs, ssotFull, EigValV, EigVecVV, false); }
00356   catch(...) {
00357     printf("\n  ***EXCEPTION:  TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); }
00358   if (EigValV.Len() < EigVecs) {
00359     printf("  ***TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); }
00360   //  CalcVals += EigVecs;
00361   //}
00362   TFltIntPrV EigValIdV;
00363   for (int i = 0; i < EigValV.Len(); i++) {
00364     EigValIdV.Add(TFltIntPr(EigValV[i], i)); 
00365   }
00366   EigValIdV.Sort(false);
00367   EigValV.Sort(false);
00368   for (int v = 0; v < EigValIdV.Len(); v++) { // vector components are not sorted!!!
00369     EigVecV.Add();
00370     EigVecVV.GetCol(EigValIdV[v].Val2, EigVecV.Last());
00371   }
00372   IsAllValVNeg(EigVecV[0], true);
00373 }
00374 
00375 // Inverse participation ratio: normalize EigVec to have L2=1 and then I=sum_k EigVec[i]^4
00376 // see Spectra of "real-world" graphs: Beyond the semicircle law by Farkas, Derenyi, Barabasi and Vicsek
00377 void GetInvParticipRat(const PUNGraph& Graph, int MaxEigVecs, int TimeLimit, TFltPrV& EigValIprV) {
00378   TUNGraphMtx GraphMtx(Graph);
00379   TFltVV EigVecVV;
00380   TFltV EigValV;
00381   TExeTm ExeTm;
00382   if (MaxEigVecs<=1) { MaxEigVecs=1000; }
00383   int EigVecs = TMath::Mn(Graph->GetNodes(), MaxEigVecs);
00384   printf("start %d vecs...", EigVecs);
00385   try {
00386     TSparseSVD::Lanczos2(GraphMtx, EigVecs, TimeLimit, ssotFull, EigValV, EigVecVV, false);
00387   } catch(...) {
00388     printf("\n  ***EXCEPTION:  TRIED %d GOT %d values** \n", EigVecs, EigValV.Len()); }
00389   printf("  ***TRIED %d GOT %d values in %s\n", EigVecs, EigValV.Len(), ExeTm.GetStr());
00390   TFltV EigVec;
00391   EigValIprV.Clr();
00392   if (EigValV.Empty()) { return; }
00393   for (int v = 0; v < EigVecVV.GetCols(); v++) {
00394     EigVecVV.GetCol(v, EigVec);
00395     EigValIprV.Add(TFltPr(EigValV[v], TSnapDetail::GetInvParticipRat(EigVec)));
00396   }
00397   EigValIprV.Sort();
00398 }
00399 
00400 namespace TSnapDetail {
00401 double GetInvParticipRat(const TFltV& EigVec) {
00402   double Sum2=0.0, Sum4=0.0;
00403   for (int i = 0; i < EigVec.Len(); i++) {
00404     Sum2 += EigVec[i]*EigVec[i];
00405     Sum4 += pow(EigVec[i].Val, 4.0);
00406   }
00407   return Sum4/(Sum2*Sum2);
00408 }
00409 } // namespace TSnapDetail
00410 
00411 }; // namespace TSnap