SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
kronecker.cpp
Go to the documentation of this file.
1 #include "stdafx.h"
2 #include "kronecker.h"
3 
5 // Kronecker Graphs
6 const double TKronMtx::NInf = -DBL_MAX;
8 
9 TKronMtx::TKronMtx(const TFltV& SeedMatrix) : SeedMtx(SeedMatrix) {
10  MtxDim = (int) sqrt((double)SeedMatrix.Len());
12 }
13 
14 void TKronMtx::SaveTxt(const TStr& OutFNm) const {
15  FILE *F = fopen(OutFNm.CStr(), "wt");
16  for (int i = 0; i < GetDim(); i++) {
17  for (int j = 0; j < GetDim(); j++) {
18  if (j > 0) fprintf(F, "\t");
19  fprintf(F, "%f", At(i,j)); }
20  fprintf(F, "\n");
21  }
22  fclose(F);
23 }
24 
26  if (this != &Kronecker){
27  MtxDim=Kronecker.MtxDim;
28  SeedMtx=Kronecker.SeedMtx;
29  }
30  return *this;
31 }
32 
33 bool TKronMtx::IsProbMtx() const {
34  for (int i = 0; i < Len(); i++) {
35  if (At(i) < 0.0 || At(i) > 1.0) return false;
36  }
37  return true;
38 }
39 
40 void TKronMtx::SetRndMtx(const int& PrmMtxDim, const double& MinProb) {
41  MtxDim = PrmMtxDim;
43  for (int p = 0; p < SeedMtx.Len(); p++) {
44  do {
45  SeedMtx[p] = TKronMtx::Rnd.GetUniDev();
46  } while (SeedMtx[p] < MinProb);
47  }
48 }
49 
50 void TKronMtx::SetEpsMtx(const double& Eps1, const double& Eps0, const int& Eps1Val, const int& Eps0Val) {
51  for (int i = 0; i < Len(); i++) {
52  double& Val = At(i);
53  if (Val == Eps1Val) Val = double(Eps1);
54  else if (Val == Eps0Val) Val = double(Eps0);
55  }
56 }
57 
58 // scales parameter values to allow Edges
59 void TKronMtx::SetForEdges(const int& Nodes, const int& Edges) {
60  const int KronIter = GetKronIter(Nodes);
61  const double EZero = pow((double) Edges, 1.0/double(KronIter));
62  const double Factor = EZero / GetMtxSum();
63  for (int i = 0; i < Len(); i++) {
64  At(i) *= Factor;
65  if (At(i) > 1) { At(i) = 1; }
66  }
67 }
68 
69 void TKronMtx::AddRndNoise(const double& SDev) {
70  Dump("before");
71  double NewVal;
72  int c =0;
73  for (int i = 0; i < Len(); i++) {
74  for(c = 0; ((NewVal = At(i)*Rnd.GetNrmDev(1, SDev, 0.8, 1.2)) < 0.01 || NewVal>0.99) && c <1000; c++) { }
75  if (c < 999) { At(i) = NewVal; } else { printf("XXXXX\n"); }
76  }
77  Dump("after");
78 }
79 
81  TChA ChA("[");
82  for (int i = 0; i < Len(); i++) {
83  ChA += TStr::Fmt("%g", At(i));
84  if ((i+1)%GetDim()==0 && (i+1<Len())) { ChA += "; "; }
85  else if (i+1<Len()) { ChA += ", "; }
86  }
87  ChA += "]";
88  return TStr(ChA);
89 }
90 
92  for (int i = 0; i < Len(); i++) {
93  IAssert(At(i) >= 0.0 && At(i) <= 1.0);
94  At(i) = 1.0 - At(i);
95  }
96 }
97 
99  LLMtx.GenMtx(MtxDim);
100  for (int i = 0; i < Len(); i++) {
101  if (At(i) != 0.0) { LLMtx.At(i) = log(At(i)); }
102  else { LLMtx.At(i) = NInf; }
103  }
104 }
105 
107  ProbMtx.GenMtx(MtxDim);
108  for (int i = 0; i < Len(); i++) {
109  if (At(i) != NInf) { ProbMtx.At(i) = exp(At(i)); }
110  else { ProbMtx.At(i) = 0.0; }
111  }
112 }
113 
114 void TKronMtx::Swap(TKronMtx& KronMtx) {
115  ::Swap(MtxDim, KronMtx.MtxDim);
116  SeedMtx.Swap(KronMtx.SeedMtx);
117 }
118 
119 int TKronMtx::GetNodes(const int& NIter) const {
120  return (int) pow(double(GetDim()), double(NIter));
121 }
122 
123 int TKronMtx::GetEdges(const int& NIter) const {
124  return (int) pow(double(GetMtxSum()), double(NIter));
125 }
126 
127 int TKronMtx::GetKronIter(const int& Nodes) const {
128  return (int) ceil(log(double(Nodes)) / log(double(GetDim()))); // upper bound
129  //return (int) TMath::Round(log(double(Nodes)) / log(double(GetDim()))); // round to nearest power
130 }
131 
132 int TKronMtx::GetNZeroK(const PNGraph& Graph) const {
133  return GetNodes(GetKronIter(Graph->GetNodes()));
134 }
135 
136 double TKronMtx::GetEZero(const int& Edges, const int& KronIters) const {
137  return pow((double) Edges, 1.0/double(KronIters));
138 }
139 
140 double TKronMtx::GetMtxSum() const {
141  double Sum = 0;
142  for (int i = 0; i < Len(); i++) {
143  Sum += At(i); }
144  return Sum;
145 }
146 
147 double TKronMtx::GetRowSum(const int& RowId) const {
148  double Sum = 0;
149  for (int c = 0; c < GetDim(); c++) {
150  Sum += At(RowId, c); }
151  return Sum;
152 }
153 
154 double TKronMtx::GetColSum(const int& ColId) const {
155  double Sum = 0;
156  for (int r = 0; r < GetDim(); r++) {
157  Sum += At(r, ColId); }
158  return Sum;
159 }
160 
161 double TKronMtx::GetEdgeProb(int NId1, int NId2, const int& NKronIters) const {
162  double Prob = 1.0;
163  for (int level = 0; level < NKronIters; level++) {
164  Prob *= At(NId1 % MtxDim, NId2 % MtxDim);
165  if (Prob == 0.0) { return 0.0; }
166  NId1 /= MtxDim; NId2 /= MtxDim;
167  }
168  return Prob;
169 }
170 
171 double TKronMtx::GetNoEdgeProb(int NId1, int NId2, const int& NKronIters) const {
172  return 1.0 - GetEdgeProb(NId1, NId2, NKronIters);
173 }
174 
175 double TKronMtx::GetEdgeLL(int NId1, int NId2, const int& NKronIters) const {
176  double LL = 0.0;
177  for (int level = 0; level < NKronIters; level++) {
178  const double& LLVal = At(NId1 % MtxDim, NId2 % MtxDim);
179  if (LLVal == NInf) return NInf;
180  LL += LLVal;
181  NId1 /= MtxDim; NId2 /= MtxDim;
182  }
183  return LL;
184 }
185 
186 double TKronMtx::GetNoEdgeLL(int NId1, int NId2, const int& NKronIters) const {
187  return log(1.0 - exp(GetEdgeLL(NId1, NId2, NKronIters)));
188 }
189 
190 // 2nd order Taylor approximation log(1-x) ~ -x - 0.5x^2
191 double TKronMtx::GetApxNoEdgeLL(int NId1, int NId2, const int& NKronIters) const {
192  const double EdgeLL = GetEdgeLL(NId1, NId2, NKronIters);
193  return -exp(EdgeLL) - 0.5*exp(2*EdgeLL);
194 }
195 
196 bool TKronMtx::IsEdgePlace(int NId1, int NId2, const int& NKronIters, const double& ProbTresh) const {
197  double Prob = 1.0;
198  for (int level = 0; level < NKronIters; level++) {
199  Prob *= At(NId1 % MtxDim, NId2 % MtxDim);
200  if (ProbTresh > Prob) { return false; }
201  NId1 /= MtxDim; NId2 /= MtxDim;
202  }
203  return true;
204 }
205 
206 // deriv a*log(x) = a/x
207 double TKronMtx::GetEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const {
208  const int ThetaX = ParamId % GetDim();
209  const int ThetaY = ParamId / GetDim();
210  int ThetaCnt = 0;
211  for (int level = 0; level < NKronIters; level++) {
212  if ((NId1 % MtxDim) == ThetaX && (NId2 % MtxDim) == ThetaY) {
213  ThetaCnt++; }
214  NId1 /= MtxDim; NId2 /= MtxDim;
215  }
216  return double(ThetaCnt) / exp(At(ParamId));
217 }
218 
219 // deriv log(1-x^a*y^b..) = -x'/(1-x) = (-a*x^(a-1)*y^b..) / (1-x^a*y^b..)
220 double TKronMtx::GetNoEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const {
221  const int& ThetaX = ParamId % GetDim();
222  const int& ThetaY = ParamId / GetDim();
223  int ThetaCnt = 0;
224  double DLL = 0, LL = 0;
225  for (int level = 0; level < NKronIters; level++) {
226  const int X = NId1 % MtxDim;
227  const int Y = NId2 % MtxDim;
228  const double LVal = At(X, Y);
229  if (X == ThetaX && Y == ThetaY) {
230  if (ThetaCnt != 0) { DLL += LVal; }
231  ThetaCnt++;
232  } else { DLL += LVal; }
233  LL += LVal;
234  NId1 /= MtxDim; NId2 /= MtxDim;
235  }
236  return -ThetaCnt*exp(DLL) / (1.0 - exp(LL));
237 }
238 
239 // 2nd order Taylor approximation log(1-x) ~ -x - 0.5x^2
240 double TKronMtx::GetApxNoEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const {
241  const int& ThetaX = ParamId % GetDim();
242  const int& ThetaY = ParamId / GetDim();
243  int ThetaCnt = 0;
244  double DLL = 0;//, LL = 0;
245  for (int level = 0; level < NKronIters; level++) {
246  const int X = NId1 % MtxDim;
247  const int Y = NId2 % MtxDim;
248  const double LVal = At(X, Y); IAssert(LVal > NInf);
249  if (X == ThetaX && Y == ThetaY) {
250  if (ThetaCnt != 0) { DLL += LVal; }
251  ThetaCnt++;
252  } else { DLL += LVal; }
253  //LL += LVal;
254  NId1 /= MtxDim; NId2 /= MtxDim;
255  }
256  //return -ThetaCnt*exp(DLL)*(1.0 + exp(LL)); // -x'/(1+x) WRONG!
257  // deriv = -(ax^(a-1)*y^b..) - a*x^(2a-1)*y^2b..
258  // = - (ax^(a-1)*y^b..) - a*x*(x^(a-1)*y^b..)^2
259  return -ThetaCnt*exp(DLL) - ThetaCnt*exp(At(ThetaX, ThetaY)+2*DLL);
260 }
261 
262 uint TKronMtx::GetNodeSig(const double& OneProb) {
263  uint Sig = 0;
264  for (int i = 0; i < (int)(8*sizeof(uint)); i++) {
265  if (TKronMtx::Rnd.GetUniDev() < OneProb) {
266  Sig |= (1u<<i); }
267  }
268  return Sig;
269 }
270 
271 double TKronMtx::GetEdgeProb(const uint& NId1Sig, const uint& NId2Sig, const int& NIter) const {
272  Assert(GetDim() == 2);
273  double Prob = 1.0;
274  for (int i = 0; i < NIter; i++) {
275  const uint Mask = (1u<<i);
276  const uint Bit1 = NId1Sig & Mask;
277  const uint Bit2 = NId2Sig & Mask;
278  Prob *= At(int(Bit1!=0), int(Bit2!=0));
279  }
280  return Prob;
281 }
282 
283 PNGraph TKronMtx::GenThreshGraph(const double& Thresh) const {
284  PNGraph Graph = TNGraph::New();
285  for (int i = 0; i < GetDim(); i++) {
286  Graph->AddNode(i); }
287  for (int r = 0; r < GetDim(); r++) {
288  for (int c = 0; c < GetDim(); c++) {
289  if (At(r, c) >= Thresh) { Graph->AddEdge(r, c); }
290  }
291  }
292  return Graph;
293 }
294 
295 PNGraph TKronMtx::GenRndGraph(const double& RndFact) const {
296  PNGraph Graph = TNGraph::New();
297  for (int i = 0; i < GetDim(); i++) {
298  Graph->AddNode(i); }
299  for (int r = 0; r < GetDim(); r++) {
300  for (int c = 0; c < GetDim(); c++) {
301  if (RndFact * At(r, c) >= TKronMtx::Rnd.GetUniDev()) { Graph->AddEdge(r, c); }
302  }
303  }
304  return Graph;
305 }
306 
307 int TKronMtx::GetKronIter(const int& GNodes, const int& SeedMtxSz) {
308  return (int) ceil(log(double(GNodes)) / log(double(SeedMtxSz)));
309 }
310 
311 // slow but exaxt procedure (we flip all O(N^2) edges)
312 PNGraph TKronMtx::GenKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir, const int& Seed) {
313  const TKronMtx& SeedGraph = SeedMtx;
314  const int NNodes = SeedGraph.GetNodes(NIter);
315  printf(" Kronecker: %d nodes, %s...\n", NNodes, IsDir ? "Directed":"UnDirected");
316  PNGraph Graph = TNGraph::New(NNodes, -1);
317  TExeTm ExeTm;
318  TRnd Rnd(Seed);
319  int edges = 0;
320  for (int node1 = 0; node1 < NNodes; node1++) {
321  Graph->AddNode(node1); }
322  if (IsDir) {
323  for (int node1 = 0; node1 < NNodes; node1++) {
324  for (int node2 = 0; node2 < NNodes; node2++) {
325  if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) {
326  Graph->AddEdge(node1, node2);
327  edges++;
328  }
329  }
330  if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000);
331  }
332  } else {
333  for (int node1 = 0; node1 < NNodes; node1++) {
334  for (int node2 = node1; node2 < NNodes; node2++) {
335  if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) {
336  Graph->AddEdge(node1, node2);
337  Graph->AddEdge(node2, node1);
338  edges++;
339  }
340  }
341  if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000);
342  }
343  }
344  printf("\r %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr());
345  return Graph;
346 }
347 
348 // use RMat like recursive descent to quickly generate a Kronecker graph
349 PNGraph TKronMtx::GenFastKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir, const int& Seed) {
350  const TKronMtx& SeedGraph = SeedMtx;
351  const int MtxDim = SeedGraph.GetDim();
352  const double MtxSum = SeedGraph.GetMtxSum();
353  const int NNodes = SeedGraph.GetNodes(NIter);
354  const int NEdges = SeedGraph.GetEdges(NIter);
355  //const double DiagEdges = NNodes * pow(SeedGraph.At(0,0), double(NIter));
356  //const int NEdges = (int) TMath::Round(((pow(double(SeedGraph.GetMtxSum()), double(NIter)) - DiagEdges) /2.0));
357  printf(" FastKronecker: %d nodes, %d edges, %s...\n", NNodes, NEdges, IsDir ? "Directed":"UnDirected");
358  PNGraph Graph = TNGraph::New(NNodes, -1);
359  TRnd Rnd(Seed);
360  TExeTm ExeTm;
361  // prepare cell probability vector
362  TVec<TFltIntIntTr> ProbToRCPosV; // row, col position
363  double CumProb = 0.0;
364  for (int r = 0; r < MtxDim; r++) {
365  for (int c = 0; c < MtxDim; c++) {
366  const double Prob = SeedGraph.At(r, c);
367  if (Prob > 0.0) {
368  CumProb += Prob;
369  ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c));
370  }
371  }
372  }
373  // add nodes
374  for (int i = 0; i < NNodes; i++) {
375  Graph->AddNode(i); }
376  // add edges
377  int Rng, Row, Col, Collision=0, n = 0;
378  for (int edges = 0; edges < NEdges; ) {
379  Rng=NNodes; Row=0; Col=0;
380  for (int iter = 0; iter < NIter; iter++) {
381  const double& Prob = Rnd.GetUniDev();
382  n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; }
383  const int MtxRow = ProbToRCPosV[n].Val2;
384  const int MtxCol = ProbToRCPosV[n].Val3;
385  Rng /= MtxDim;
386  Row += MtxRow * Rng;
387  Col += MtxCol * Rng;
388  }
389  if (! Graph->IsEdge(Row, Col)) { // allow self-loops
390  Graph->AddEdge(Row, Col); edges++;
391  if (! IsDir) {
392  if (Row != Col) Graph->AddEdge(Col, Row);
393  edges++;
394  }
395  } else { Collision++; }
396  //if (edges % 1000 == 0) printf("\r...%dk", edges/1000);
397  }
398  //printf(" %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr());
399  printf(" collisions: %d (%.4f)\n", Collision, Collision/(double)Graph->GetEdges());
400  return Graph;
401 }
402 
403 // use RMat like recursive descent to quickly generate a Kronecker graph
404 PNGraph TKronMtx::GenFastKronecker(const TKronMtx& SeedMtx, const int& NIter, const int& Edges, const bool& IsDir, const int& Seed) {
405  const TKronMtx& SeedGraph = SeedMtx;
406  const int MtxDim = SeedGraph.GetDim();
407  const double MtxSum = SeedGraph.GetMtxSum();
408  const int NNodes = SeedGraph.GetNodes(NIter);
409  const int NEdges = Edges;
410  //const double DiagEdges = NNodes * pow(SeedGraph.At(0,0), double(NIter));
411  //const int NEdges = (int) TMath::Round(((pow(double(SeedGraph.GetMtxSum()), double(NIter)) - DiagEdges) /2.0));
412  printf(" RMat Kronecker: %d nodes, %d edges, %s...\n", NNodes, NEdges, IsDir ? "Directed":"UnDirected");
413  PNGraph Graph = TNGraph::New(NNodes, -1);
414  TRnd Rnd(Seed);
415  TExeTm ExeTm;
416  // prepare cell probability vector
417  TVec<TFltIntIntTr> ProbToRCPosV; // row, col position
418  double CumProb = 0.0;
419  for (int r = 0; r < MtxDim; r++) {
420  for (int c = 0; c < MtxDim; c++) {
421  const double Prob = SeedGraph.At(r, c);
422  if (Prob > 0.0) {
423  CumProb += Prob;
424  ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c));
425  }
426  }
427  }
428  // add nodes
429  for (int i = 0; i < NNodes; i++) {
430  Graph->AddNode(i); }
431  // add edges
432  int Rng, Row, Col, Collision=0, n = 0;
433  for (int edges = 0; edges < NEdges; ) {
434  Rng=NNodes; Row=0; Col=0;
435  for (int iter = 0; iter < NIter; iter++) {
436  const double& Prob = Rnd.GetUniDev();
437  n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; }
438  const int MtxRow = ProbToRCPosV[n].Val2;
439  const int MtxCol = ProbToRCPosV[n].Val3;
440  Rng /= MtxDim;
441  Row += MtxRow * Rng;
442  Col += MtxCol * Rng;
443  }
444  if (! Graph->IsEdge(Row, Col)) { // allow self-loops
445  Graph->AddEdge(Row, Col); edges++;
446  if (! IsDir) {
447  if (Row != Col) Graph->AddEdge(Col, Row);
448  edges++;
449  }
450  } else { Collision++; }
451  //if (edges % 1000 == 0) printf("\r...%dk", edges/1000);
452  }
453  //printf(" %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr());
454  printf(" collisions: %d (%.4f)\n", Collision, Collision/(double)Graph->GetEdges());
455  return Graph;
456 }
457 
458 PNGraph TKronMtx::GenDetKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir) {
459  const TKronMtx& SeedGraph = SeedMtx;
460  const int NNodes = SeedGraph.GetNodes(NIter);
461  printf(" Deterministic Kronecker: %d nodes, %s...\n", NNodes, IsDir ? "Directed":"UnDirected");
462  PNGraph Graph = TNGraph::New(NNodes, -1);
463  TExeTm ExeTm;
464  int edges = 0;
465  for (int node1 = 0; node1 < NNodes; node1++) { Graph->AddNode(node1); }
466 
467  for (int node1 = 0; node1 < NNodes; node1++) {
468  for (int node2 = 0; node2 < NNodes; node2++) {
469  if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) {
470  Graph->AddEdge(node1, node2);
471  edges++;
472  }
473  }
474  if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000);
475  }
476  return Graph;
477 }
478 
479 void TKronMtx::PlotCmpGraphs(const TKronMtx& SeedMtx, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) {
480  const int KronIters = SeedMtx.GetKronIter(Graph->GetNodes());
481  PNGraph KronG, WccG;
482  const bool FastGen = true;
483  if (FastGen) { KronG = TKronMtx::GenFastKronecker(SeedMtx, KronIters, true, 0); }
484  else { KronG = TKronMtx::GenKronecker(SeedMtx, KronIters, true, 0); }
485  TSnap::DelZeroDegNodes(KronG);
486  WccG = TSnap::GetMxWcc(KronG);
487  const TStr Desc1 = TStr::Fmt("%s", Desc.CStr());
489  //gsdHops
490  //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf
491  GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH G(%d, %d)", Graph->GetNodes(), Graph->GetEdges()));
492  GS.Add(KronG, TSecTm(2), TStr::Fmt("KRONECKER K(%d, %d)", KronG->GetNodes(), KronG->GetEdges()));
493  GS.Add(WccG, TSecTm(3), TStr::Fmt("KRONECKER wccK(%d, %d)", WccG->GetNodes(), WccG->GetEdges()));
494  const TStr Style = "linewidth 1 pointtype 6 pointsize 1";
495  GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
496  GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
497  GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
498  GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
499  GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
500  GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
501  GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
502  GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
503  GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
504  GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
505  GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
506  GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
507  GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
508  GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
509  GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
510 // typedef enum { distrUndef, distrInDeg, distrOutDeg, distrWcc, distrScc,
511 // distrHops, distrWccHops, distrSngVal, distrSngVec, distrClustCf, distrMx } TGraphDistr;*/
512 }
513 
514 void TKronMtx::PlotCmpGraphs(const TKronMtx& SeedMtx1, const TKronMtx& SeedMtx2, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) {
515  const int KronIters1 = SeedMtx1.GetKronIter(Graph->GetNodes());
516  const int KronIters2 = SeedMtx2.GetKronIter(Graph->GetNodes());
517  PNGraph KronG1, KronG2;
518  const bool FastGen = true;
519  if (FastGen) {
520  KronG1 = TKronMtx::GenFastKronecker(SeedMtx1, KronIters1, true, 0);
521  KronG2 = TKronMtx::GenFastKronecker(SeedMtx2, KronIters2, false, 0); }
522  else {
523  KronG1 = TKronMtx::GenKronecker(SeedMtx1, KronIters1, true, 0);
524  KronG2 = TKronMtx::GenKronecker(SeedMtx2, KronIters2, true, 0); }
525  TSnap::DelZeroDegNodes(KronG1);
526  TSnap::DelZeroDegNodes(KronG2);
527  const TStr Desc1 = TStr::Fmt("%s", Desc.CStr());
529  //gsdHops
530  //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf
531  GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH G(%d, %d)", Graph->GetNodes(), Graph->GetEdges()));
532  GS.Add(KronG1, TSecTm(2), TStr::Fmt("KRONECKER1 K(%d, %d) %s", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtx1.GetMtxStr().CStr()));
533  GS.Add(KronG2, TSecTm(3), TStr::Fmt("KRONECKER2 K(%d, %d) %s", KronG2->GetNodes(), KronG2->GetEdges(), SeedMtx2.GetMtxStr().CStr()));
534  const TStr Style = "linewidth 1 pointtype 6 pointsize 1";
535  // raw data
536  GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
537  GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
538  GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
539  GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
540  GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
541  GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
542  GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
543  GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
544  GS.ImposeDistr(gsdTriadPart, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
545  // smooth
546  GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
547  GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
548  GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
549  GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
550  GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
551  GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
552  GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
553  GS.ImposeDistr(gsdTriadPart, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
554 }
555 
556 void TKronMtx::PlotCmpGraphs(const TVec<TKronMtx>& SeedMtxV, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) {
557  const TStr Desc1 = TStr::Fmt("%s", Desc.CStr());
559  GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH G(%d, %d)", Graph->GetNodes(), Graph->GetEdges()));
560  //gsdHops
561  //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf
562  for (int m = 0; m < SeedMtxV.Len(); m++) {
563  const int KronIters = SeedMtxV[m].GetKronIter(Graph->GetNodes());
564  PNGraph KronG1 = TKronMtx::GenFastKronecker(SeedMtxV[m], KronIters, true, 0);
565  printf("*** K(%d, %d) n0=%d\n", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtxV[m].GetDim());
566  TSnap::DelZeroDegNodes(KronG1);
567  printf(" del zero deg K(%d, %d) n0=%d\n", KronG1->GetNodes(), KronG1->GetEdges(), m);
568  GS.Add(KronG1, TSecTm(m+2), TStr::Fmt("K(%d, %d) n0^k=%d n0=%d", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtxV[m].GetNZeroK(Graph), SeedMtxV[m].GetDim()));
569  // plot after each Kronecker is done
570  const TStr Style = "linewidth 1 pointtype 6 pointsize 1";
571  GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLines, Style);
572  GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
573  GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLines, Style);
574  GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
575  GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLines, Style);
576  GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLines, Style);
577  GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
578  GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLines, Style);
579  GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
580  GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLines, Style);
581  GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
582  GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLines, Style);
583  GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
584  GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLines, Style);
585  GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
586  }
587  // typedef enum { distrUndef, distrInDeg, distrOutDeg, distrWcc, distrScc,
588  // distrHops, distrWccHops, distrSngVal, distrSngVec, distrClustCf, distrMx } TGraphDistr;*/
589 }
590 
591 void TKronMtx::KronMul(const TKronMtx& Left, const TKronMtx& Right, TKronMtx& Result) {
592  const int LDim = Left.GetDim();
593  const int RDim = Right.GetDim();
594  Result.GenMtx(LDim * RDim);
595  for (int r1 = 0; r1 < LDim; r1++) {
596  for (int c1 = 0; c1 < LDim; c1++) {
597  const double& Val = Left.At(r1, c1);
598  for (int r2 = 0; r2 < RDim; r2++) {
599  for (int c2 = 0; c2 < RDim; c2++) {
600  Result.At(r1*RDim+r2, c1*RDim+c2) = Val * Right.At(r2, c2);
601  }
602  }
603  }
604  }
605 }
606 
607 void TKronMtx::KronSum(const TKronMtx& Left, const TKronMtx& Right, TKronMtx& Result) {
608  const int LDim = Left.GetDim();
609  const int RDim = Right.GetDim();
610  Result.GenMtx(LDim * RDim);
611  for (int r1 = 0; r1 < LDim; r1++) {
612  for (int c1 = 0; c1 < LDim; c1++) {
613  const double& Val = Left.At(r1, c1);
614  for (int r2 = 0; r2 < RDim; r2++) {
615  for (int c2 = 0; c2 < RDim; c2++) {
616  if (Val == NInf || Right.At(r2, c2) == NInf) {
617  Result.At(r1*RDim+r2, c1*RDim+c2) = NInf; }
618  else {
619  Result.At(r1*RDim+r2, c1*RDim+c2) = Val + Right.At(r2, c2); }
620  }
621  }
622  }
623  }
624 }
625 
626 void TKronMtx::KronPwr(const TKronMtx& KronMtx, const int& NIter, TKronMtx& OutMtx) {
627  OutMtx = KronMtx;
628  TKronMtx NewOutMtx;
629  for (int iter = 0; iter < NIter; iter++) {
630  KronMul(OutMtx, KronMtx, NewOutMtx);
631  NewOutMtx.Swap(OutMtx);
632  }
633 
634 }
635 
636 void TKronMtx::Dump(const TStr& MtxNm, const bool& Sort) const {
637  /*printf("%s: %d x %d\n", MtxNm.Empty()?"Mtx":MtxNm.CStr(), GetDim(), GetDim());
638  for (int r = 0; r < GetDim(); r++) {
639  for (int c = 0; c < GetDim(); c++) { printf(" %8.2g", At(r, c)); }
640  printf("\n");
641  }*/
642  if (! MtxNm.Empty()) printf("%s\n", MtxNm.CStr());
643  double Sum=0.0;
644  TFltV ValV = SeedMtx;
645  if (Sort) { ValV.Sort(false); }
646  for (int i = 0; i < ValV.Len(); i++) {
647  printf(" %10.4g", ValV[i]());
648  Sum += ValV[i];
649  if ((i+1) % GetDim() == 0) { printf("\n"); }
650  }
651  printf(" (sum:%.4f)\n", Sum);
652 }
653 
654 // average difference in the parameters
655 double TKronMtx::GetAvgAbsErr(const TKronMtx& Kron1, const TKronMtx& Kron2) {
656  TFltV P1 = Kron1.GetMtx();
657  TFltV P2 = Kron2.GetMtx();
658  IAssert(P1.Len() == P2.Len());
659  P1.Sort(); P2.Sort();
660  double delta = 0.0;
661  for (int i = 0; i < P1.Len(); i++) {
662  delta += fabs(P1[i] - P2[i]);
663  }
664  return delta/P1.Len();
665 }
666 
667 // average L2 difference in the parameters
668 double TKronMtx::GetAvgFroErr(const TKronMtx& Kron1, const TKronMtx& Kron2) {
669  TFltV P1 = Kron1.GetMtx();
670  TFltV P2 = Kron2.GetMtx();
671  IAssert(P1.Len() == P2.Len());
672  P1.Sort(); P2.Sort();
673  double delta = 0.0;
674  for (int i = 0; i < P1.Len(); i++) {
675  delta += pow(P1[i] - P2[i], 2);
676  }
677  return sqrt(delta/P1.Len());
678 }
679 
680 // get matrix from matlab matrix notation
682  TStrV RowStrV, ColStrV;
683  MatlabMtxStr.ChangeChAll(',', ' ');
684  MatlabMtxStr.SplitOnAllCh(';', RowStrV); IAssert(! RowStrV.Empty());
685  RowStrV[0].SplitOnWs(ColStrV); IAssert(! ColStrV.Empty());
686  const int Rows = RowStrV.Len();
687  const int Cols = ColStrV.Len();
688  IAssert(Rows == Cols);
689  TKronMtx Mtx(Rows);
690  for (int r = 0; r < Rows; r++) {
691  RowStrV[r].SplitOnWs(ColStrV);
692  IAssert(ColStrV.Len() == Cols);
693  for (int c = 0; c < Cols; c++) {
694  Mtx.At(r, c) = (double) ColStrV[c].GetFlt(); }
695  }
696  return Mtx;
697 }
698 
699 TKronMtx TKronMtx::GetRndMtx(const int& Dim, const double& MinProb) {
700  TKronMtx Mtx;
701  Mtx.SetRndMtx(Dim, MinProb);
702  return Mtx;
703 }
704 
705 TKronMtx TKronMtx::GetInitMtx(const int& Dim, const int& Nodes, const int& Edges) {
706  const double MxParam = 0.8+TKronMtx::Rnd.GetUniDev()/5.0;
707  const double MnParam = 0.2-TKronMtx::Rnd.GetUniDev()/5.0;
708  const double Step = (MxParam-MnParam) / (Dim*Dim-1);
709  TFltV ParamV(Dim*Dim);
710  if (Dim == 1) { ParamV.PutAll(0.5); } // random graph
711  else {
712  for (int p = 0; p < ParamV.Len(); p++) {
713  ParamV[p] = MxParam - p*Step; }
714  }
715  //IAssert(ParamV[0]==MxParam && ParamV.Last()==MnParam);
716  TKronMtx Mtx(ParamV);
717  Mtx.SetForEdges(Nodes, Edges);
718  return Mtx;
719 }
720 
721 TKronMtx TKronMtx::GetInitMtx(const TStr& MtxStr, const int& Dim, const int& Nodes, const int& Edges) {
722  TKronMtx Mtx(Dim);
723  if (TCh::IsNum(MtxStr[0])) { Mtx = TKronMtx::GetMtx(MtxStr); }
724  else if (MtxStr[0] == 'r') { Mtx = TKronMtx::GetRndMtx(Dim, 0.1); }
725  else if (MtxStr[0] == 'a') {
726  const double Prob = TKronMtx::Rnd.GetUniDev();
727  if (Prob < 0.4) {
728  Mtx = TKronMtx::GetInitMtx(Dim, Nodes, Edges); }
729  else { // interpolate so that there are in the corners 0.9, 0.5, 0.1, 0.5
730  const double Max = 0.9+TKronMtx::Rnd.GetUniDev()/10.0;
731  const double Min = 0.1-TKronMtx::Rnd.GetUniDev()/10.0;
732  const double Med = (Max-Min)/2.0;
733  Mtx.At(0,0) = Max; Mtx.At(0,Dim-1) = Med;
734  Mtx.At(Dim-1, 0) = Med; Mtx.At(Dim-1, Dim-1) = Min;
735  for (int i = 1; i < Dim-1; i++) {
736  Mtx.At(i,i) = Max - double(i)*(Max-Min)/double(Dim-1);
737  Mtx.At(i, 0) = Mtx.At(0, i) = Max - double(i)*(Max-Med)/double(Dim-1);
738  Mtx.At(i, Dim-1) = Mtx.At(Dim-1, i) = Med - double(i)*(Med-Min)/double(Dim-1);
739  }
740  for (int i = 1; i < Dim-1; i++) {
741  for (int j = 1; j < Dim-1; j++) {
742  if (i >= j) { continue; }
743  Mtx.At(i,j) = Mtx.At(j,i) = Mtx.At(i,i) - (j-i)*(Mtx.At(i,i)-Mtx.At(i,Dim-1))/(Dim-i-1);
744  }
745  }
746  Mtx.AddRndNoise(0.1);
747  }
748  } else { FailR("Wrong mtx: matlab str, or random (r), or all (a)"); }
749  Mtx.SetForEdges(Nodes, Edges);
750  return Mtx;
751 }
752 
754  if (MtxNm == "3chain") return TKronMtx::GetMtx("1 1 0; 1 1 1; 0 1 1");
755  else if (MtxNm == "4star") return TKronMtx::GetMtx("1 1 1 1; 1 1 0 0 ; 1 0 1 0; 1 0 0 1");
756  else if (MtxNm == "4chain") return TKronMtx::GetMtx("1 1 0 0; 1 1 1 0 ; 0 1 1 1; 0 0 1 1");
757  else if (MtxNm == "4square") return TKronMtx::GetMtx("1 1 0 1; 1 1 1 0 ; 0 1 1 1; 1 0 1 1");
758  else if (MtxNm == "5star") return TKronMtx::GetMtx("1 1 1 1 1; 1 1 0 0 0; 1 0 1 0 0; 1 0 0 1 0; 1 0 0 0 1");
759  else if (MtxNm == "6star") return TKronMtx::GetMtx("1 1 1 1 1 1; 1 1 0 0 0 0; 1 0 1 0 0 0; 1 0 0 1 0 0; 1 0 0 0 1 0; 1 0 0 0 0 1");
760  else if (MtxNm == "7star") return TKronMtx::GetMtx("1 1 1 1 1 1 1; 1 1 0 0 0 0 0; 1 0 1 0 0 0 0; 1 0 0 1 0 0 0; 1 0 0 0 1 0 0; 1 0 0 0 0 1 0; 1 0 0 0 0 0 1");
761  else if (MtxNm == "5burst") return TKronMtx::GetMtx("1 1 1 1 0; 1 1 0 0 0; 1 0 1 0 0; 1 0 0 1 1; 0 0 0 1 1");
762  else if (MtxNm == "7burst") return TKronMtx::GetMtx("1 0 0 1 0 0 0; 0 1 0 1 0 0 0; 0 0 1 1 0 0 0; 1 1 1 1 1 0 0; 0 0 0 1 1 1 1; 0 0 0 0 1 1 0; 0 0 0 0 1 0 1");
763  else if (MtxNm == "7cross") return TKronMtx::GetMtx("1 0 0 1 0 0 0; 0 1 0 1 0 0 0; 0 0 1 1 0 0 0; 1 1 1 1 1 0 0; 0 0 0 1 1 1 0; 0 0 0 0 1 1 1; 0 0 0 0 0 1 1");
764  FailR(TStr::Fmt("Unknow matrix: '%s'", MtxNm.CStr()).CStr());
765  return TKronMtx();
766 }
767 
769  PSs Ss = TSs::LoadTxt(ssfTabSep, MtxFNm);
770  IAssertR(Ss->GetXLen() == Ss->GetYLen(), "Not a square matrix");
771  IAssert(Ss->GetYLen() == Ss->GetXLen());
772  TKronMtx Mtx(Ss->GetYLen());
773  for (int r = 0; r < Ss->GetYLen(); r++) {
774  for (int c = 0; c < Ss->GetXLen(); c++) {
775  Mtx.At(r, c) = (double) Ss->At(c, r).GetFlt(); }
776  }
777  return Mtx;
778 }
779 
780 
782 // Kronecker Log Likelihood
783 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TFltV& ParamV, const double& PermPSwapNd): PermSwapNodeProb(PermPSwapNd) {
784  InitLL(GraphPt, TKronMtx(ParamV));
785 }
786 
787 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const double& PermPSwapNd) : PermSwapNodeProb(PermPSwapNd) {
788  InitLL(GraphPt, ParamMtx);
789 }
790 
791 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const TIntV& NodeIdPermV, const double& PermPSwapNd) : PermSwapNodeProb(PermPSwapNd) {
792  InitLL(GraphPt, ParamMtx);
793  NodePerm = NodeIdPermV;
795 }
796 
797 PKroneckerLL TKroneckerLL::New(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const double& PermPSwapNd) {
798  return new TKroneckerLL(GraphPt, ParamMtx, PermPSwapNd);
799 }
800 
801 PKroneckerLL TKroneckerLL::New(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const TIntV& NodeIdPermV, const double& PermPSwapNd) {
802  return new TKroneckerLL(GraphPt, ParamMtx, NodeIdPermV, PermPSwapNd);
803 }
804 
805 void TKroneckerLL::SetPerm(const char& PermId) {
806  if (PermId == 'o') { SetOrderPerm(); }
807  else if (PermId == 'd') { SetDegPerm(); }
808  else if (PermId == 'r') { SetRndPerm(); }
809  else if (PermId == 'b') { SetBestDegPerm(); }
810  else FailR("Unknown permutation type (o,d,r)");
811 }
812 
814  NodePerm.Gen(Nodes, 0);
815  for (int i = 0; i < Graph->GetNodes(); i++) {
816  NodePerm.Add(i); }
818 }
819 
821  NodePerm.Gen(Nodes, 0);
822  for (int i = 0; i < Graph->GetNodes(); i++) {
823  NodePerm.Add(i); }
824  NodePerm.Shuffle(TKronMtx::Rnd);
826 }
827 
829  TIntPrV DegNIdV;
830  for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
831  DegNIdV.Add(TIntPr(NI.GetDeg(), NI.GetId()));
832  }
833  DegNIdV.Sort(false);
834  NodePerm.Gen(DegNIdV.Len(), 0);
835  for (int i = 0; i < DegNIdV.Len(); i++) {
836  NodePerm.Add(DegNIdV[i].Val2);
837  }
839 }
840 
843  NodePerm.Gen(Nodes);
844  const int NZero = ProbMtx.GetDim();
845  TFltIntPrV DegV(Nodes), CDegV(Nodes);
846  TFltV Row(NZero);
847  TFltV Col(NZero);
848  for(int i = 0; i < NZero; i++) {
849  for(int j = 0; j < NZero; j++) {
850  Row[i] += ProbMtx.At(i, j);
851  Col[i] += ProbMtx.At(j, i);
852  }
853  }
854 
855  for(int i = 0; i < Nodes; i++) {
856  TNGraph::TNodeI NodeI = Graph->GetNI(i);
857  int NId = i;
858  double RowP = 1.0, ColP = 1.0;
859  for(int j = 0; j < KronIters; j++) {
860  int Bit = NId % NZero;
861  RowP *= Row[Bit]; ColP *= Col[Bit];
862  NId /= NZero;
863  }
864  CDegV[i] = TFltIntPr(RowP + ColP, i);
865  DegV[i] = TFltIntPr(NodeI.GetDeg(), i);
866  }
867  DegV.Sort(false); CDegV.Sort(false);
868  for(int i = 0; i < Nodes; i++) {
869  NodePerm[DegV[i].Val2] = CDegV[i].Val2;
870  }
872 }
873 
875 void TKroneckerLL::SetIPerm(const TIntV& Perm) {
876  InvertPerm.Gen(Perm.Len());
877  for (int i = 0; i < Perm.Len(); i++) {
878  InvertPerm[Perm[i]] = i;
879  }
880 }
881 
882 void TKroneckerLL::SetGraph(const PNGraph& GraphPt) {
883  Graph = GraphPt;
884  bool NodesOk = true;
885  // check that nodes IDs are {0,1,..,Nodes-1}
886  for (int nid = 0; nid < Graph->GetNodes(); nid++) {
887  if (! Graph->IsNode(nid)) { NodesOk=false; break; } }
888  if (! NodesOk) {
889  TIntV NIdV; GraphPt->GetNIdV(NIdV);
890  Graph = TSnap::GetSubGraph(GraphPt, NIdV, true);
891  for (int nid = 0; nid < Graph->GetNodes(); nid++) {
892  IAssert(Graph->IsNode(nid)); }
893  }
894  Nodes = Graph->GetNodes();
895  IAssert(LLMtx.GetDim() > 1 && LLMtx.Len() == ProbMtx.Len());
896  KronIters = (int) ceil(log(double(Nodes)) / log(double(ProbMtx.GetDim())));
897  // edge vector (for swap-edge permutation proposal)
898 // if (PermSwapNodeProb < 1.0) { /// !!!!! MYUNGHWAN, CHECK! WHY IS THIS COMMENTED OUT
899  GEdgeV.Gen(Graph->GetEdges(), 0);
900  for (TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
901  if (EI.GetSrcNId() != EI.GetDstNId()) {
902  GEdgeV.Add(TIntTr(EI.GetSrcNId(), EI.GetDstNId(), -1));
903  }
904  }
905 // }
906 
907  RealNodes = Nodes;
908  RealEdges = Graph->GetEdges();
909  LEdgeV = TIntTrV();
910  LSelfEdge = 0;
911 }
912 
913 
915  Nodes = (int) pow((double)ProbMtx.GetDim(), KronIters);
916  // add nodes until filling the Kronecker graph model
917  for (int nid = Graph->GetNodes(); nid < Nodes; nid++) {
918  Graph->AddNode(nid);
919  }
920 }
921 
923 void TKroneckerLL::RestoreGraph(const bool RestoreNodes) {
924  // remove from Graph
925  int NId1, NId2;
926  for (int e = 0; e < LEdgeV.Len(); e++) {
927  NId1 = LEdgeV[e].Val1; NId2 = LEdgeV[e].Val2;
928  Graph->DelEdge(NId1, NId2);
929 // GEdgeV.DelIfIn(LEdgeV[e]);
930  }
931  if(LEdgeV.Len() - LSelfEdge)
932  GEdgeV.Del(GEdgeV.Len() - LEdgeV.Len() + LSelfEdge, GEdgeV.Len() - 1);
933  LEdgeV.Clr();
934  LSelfEdge = 0;
935 
936  if(RestoreNodes) {
937  for(int i = Graph->GetNodes()-1; i >= RealNodes; i--) {
938  Graph->DelNode(i);
939  }
940  }
941 }
942 
944  // the number of times a seed matrix element appears in
945  // the full kronecker adjacency matrix after KronIter
946  // kronecker multiplications
947  double ElemCnt = 1;
948  const double dim = LLMtx.GetDim();
949  // count number of times x appears in the full kronecker matrix
950  for (int i = 1; i < KronIters; i++) {
951  ElemCnt = dim*dim*ElemCnt + TMath::Power(dim, 2*i);
952  }
953  return ElemCnt * LLMtx.GetMtxSum();
954 }
955 
956 double TKroneckerLL::GetFullRowLL(int RowId) const {
957  double RowLL = 0.0;
958  const int MtxDim = LLMtx.GetDim();
959  for (int level = 0; level < KronIters; level++) {
960  RowLL += LLMtx.GetRowSum(RowId % MtxDim);
961  RowId /= MtxDim;
962  }
963  return RowLL;
964 }
965 
966 double TKroneckerLL::GetFullColLL(int ColId) const {
967  double ColLL = 0.0;
968  const int MtxDim = LLMtx.GetDim();
969  for (int level = 0; level < KronIters; level++) {
970  ColLL += LLMtx.GetColSum(ColId % MtxDim);
971  ColId /= MtxDim;
972  }
973  return ColLL;
974 }
975 
977  double LL = 0;
978  for (int NId1 = 0; NId1 < LLMtx.GetNodes(KronIters); NId1++) {
979  for (int NId2 = 0; NId2 < LLMtx.GetNodes(KronIters); NId2++) {
980  LL = LL + LLMtx.GetNoEdgeLL(NId1, NId2, KronIters);
981  }
982  }
983  return LL;
984 }
985 
986 // 2nd prder Taylor approximation, log(1-x) ~ -x - 0.5x^2
988  double Sum=0.0, SumSq=0.0;
989  for (int i = 0; i < ProbMtx.Len(); i++) {
990  Sum += ProbMtx.At(i);
991  SumSq += TMath::Sqr(ProbMtx.At(i));
992  }
993  return -pow(Sum, KronIters) - 0.5*pow(SumSq, KronIters);
994 }
995 
996 void TKroneckerLL::InitLL(const TFltV& ParamV) {
997  InitLL(TKronMtx(ParamV));
998 }
999 
1000 void TKroneckerLL::InitLL(const TKronMtx& ParamMtx) {
1001  IAssert(ParamMtx.IsProbMtx());
1002  ProbMtx = ParamMtx;
1003  ProbMtx.GetLLMtx(LLMtx);
1005  if (GradV.Len() != ProbMtx.Len()) {
1006  GradV.Gen(ProbMtx.Len()); }
1007  GradV.PutAll(0.0);
1008 }
1009 
1010 void TKroneckerLL::InitLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx) {
1011  IAssert(ParamMtx.IsProbMtx());
1012  ProbMtx = ParamMtx;
1013  ProbMtx.GetLLMtx(LLMtx);
1014  SetGraph(GraphPt);
1016  if (GradV.Len() != ProbMtx.Len()) {
1017  GradV.Gen(ProbMtx.Len()); }
1018  GradV.PutAll(0.0);
1019 }
1020 
1021 // exact graph log-likelihood, takes O(N^2 + E)
1023  LogLike = GetEmptyGraphLL(); // takes O(N^2)
1024  for (int nid = 0; nid < Nodes; nid++) {
1025  const TNGraph::TNodeI Node = Graph->GetNI(nid);
1026  const int SrcNId = NodePerm[nid];
1027  for (int e = 0; e < Node.GetOutDeg(); e++) {
1028  const int DstNId = NodePerm[Node.GetOutNId(e)];
1029  LogLike = LogLike - LLMtx.GetNoEdgeLL(SrcNId, DstNId, KronIters)
1030  + LLMtx.GetEdgeLL(SrcNId, DstNId, KronIters);
1031  }
1032  }
1033  return LogLike;
1034 }
1035 
1036 // approximate graph log-likelihood, takes O(E + N_0)
1038  LogLike = GetApxEmptyGraphLL(); // O(N_0)
1039  for (int nid = 0; nid < Nodes; nid++) {
1040  const TNGraph::TNodeI Node = Graph->GetNI(nid);
1041  const int SrcNId = NodePerm[nid];
1042  for (int e = 0; e < Node.GetOutDeg(); e++) {
1043  const int DstNId = NodePerm[Node.GetOutNId(e)];
1044  LogLike = LogLike - LLMtx.GetApxNoEdgeLL(SrcNId, DstNId, KronIters)
1045  + LLMtx.GetEdgeLL(SrcNId, DstNId, KronIters);
1046  }
1047  }
1048  return LogLike;
1049 }
1050 
1051 // Used in TKroneckerLL::SwapNodesLL: DeltaLL if we
1052 // add the node to the matrix (node gets/creates all
1053 // of its in- and out-edges).
1054 // Zero is for the empty row/column (isolated node)
1055 double TKroneckerLL::NodeLLDelta(const int& NId) const {
1056  if (! Graph->IsNode(NId)) { return 0.0; } // zero degree node
1057  double Delta = 0.0;
1058  const TNGraph::TNodeI Node = Graph->GetNI(NId);
1059  // out-edges
1060  const int SrcRow = NodePerm[NId];
1061  for (int e = 0; e < Node.GetOutDeg(); e++) {
1062  const int DstCol = NodePerm[Node.GetOutNId(e)];
1063  Delta += - LLMtx.GetApxNoEdgeLL(SrcRow, DstCol, KronIters)
1064  + LLMtx.GetEdgeLL(SrcRow, DstCol, KronIters);
1065  }
1066  //in-edges
1067  const int SrcCol = NodePerm[NId];
1068  for (int e = 0; e < Node.GetInDeg(); e++) {
1069  const int DstRow = NodePerm[Node.GetInNId(e)];
1070  Delta += - LLMtx.GetApxNoEdgeLL(DstRow, SrcCol, KronIters)
1071  + LLMtx.GetEdgeLL(DstRow, SrcCol, KronIters);
1072  }
1073  // double counted self-edge
1074  if (Graph->IsEdge(NId, NId)) {
1075  Delta += + LLMtx.GetApxNoEdgeLL(SrcRow, SrcCol, KronIters)
1076  - LLMtx.GetEdgeLL(SrcRow, SrcCol, KronIters);
1077  IAssert(SrcRow == SrcCol);
1078  }
1079  return Delta;
1080 }
1081 
1082 // swapping two nodes, only need to go over two rows and columns
1083 double TKroneckerLL::SwapNodesLL(const int& NId1, const int& NId2) {
1084  // subtract old LL (remove nodes)
1085  LogLike = LogLike - NodeLLDelta(NId1) - NodeLLDelta(NId2);
1086  const int PrevId1 = NodePerm[NId1], PrevId2 = NodePerm[NId2];
1087  // double-counted edges
1088  if (Graph->IsEdge(NId1, NId2)) {
1089  LogLike += - LLMtx.GetApxNoEdgeLL(PrevId1, PrevId2, KronIters)
1090  + LLMtx.GetEdgeLL(PrevId1, PrevId2, KronIters); }
1091  if (Graph->IsEdge(NId2, NId1)) {
1092  LogLike += - LLMtx.GetApxNoEdgeLL(PrevId2, PrevId1, KronIters)
1093  + LLMtx.GetEdgeLL(PrevId2, PrevId1, KronIters); }
1094  // swap
1095  NodePerm.Swap(NId1, NId2);
1096  InvertPerm.Swap(NodePerm[NId1], NodePerm[NId2]);
1097  // add new LL (add nodes)
1098  LogLike = LogLike + NodeLLDelta(NId1) + NodeLLDelta(NId2);
1099  const int NewId1 = NodePerm[NId1], NewId2 = NodePerm[NId2];
1100  // correct for double-counted edges
1101  if (Graph->IsEdge(NId1, NId2)) {
1102  LogLike += + LLMtx.GetApxNoEdgeLL(NewId1, NewId2, KronIters)
1103  - LLMtx.GetEdgeLL(NewId1, NewId2, KronIters); }
1104  if (Graph->IsEdge(NId2, NId1)) {
1105  LogLike += + LLMtx.GetApxNoEdgeLL(NewId2, NewId1, KronIters)
1106  - LLMtx.GetEdgeLL(NewId2, NewId1, KronIters); }
1107  return LogLike;
1108 }
1109 
1110 // metropolis sampling from P(permutation|graph)
1111 bool TKroneckerLL::SampleNextPerm(int& NId1, int& NId2) {
1112  // pick 2 uniform nodes and swap
1113  if (TKronMtx::Rnd.GetUniDev() < PermSwapNodeProb) {
1114  NId1 = TKronMtx::Rnd.GetUniDevInt(Nodes);
1115  NId2 = TKronMtx::Rnd.GetUniDevInt(Nodes);
1116  while (NId2 == NId1) { NId2 = TKronMtx::Rnd.GetUniDevInt(Nodes); }
1117  } else {
1118  // pick uniform edge and swap endpoints (slow as it moves around high degree nodes)
1119  const int e = TKronMtx::Rnd.GetUniDevInt(GEdgeV.Len());
1120  NId1 = GEdgeV[e].Val1; NId2 = GEdgeV[e].Val2;
1121  }
1122  const double U = TKronMtx::Rnd.GetUniDev();
1123  const double OldLL = LogLike;
1124  const double NewLL = SwapNodesLL(NId1, NId2);
1125  const double LogU = log(U);
1126  if (LogU > NewLL - OldLL) { // reject
1127  LogLike = OldLL;
1128  NodePerm.Swap(NId2, NId1); //swap back
1129  InvertPerm.Swap(NodePerm[NId2], NodePerm[NId1]); // swap back
1130  return false;
1131  }
1132  return true; // accept new sample
1133 }
1134 
1135 // exact gradient of an empty graph, O(N^2)
1136 double TKroneckerLL::GetEmptyGraphDLL(const int& ParamId) const {
1137  double DLL = 0.0;
1138  for (int NId1 = 0; NId1 < Nodes; NId1++) {
1139  for (int NId2 = 0; NId2 < Nodes; NId2++) {
1140  DLL += LLMtx.GetNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
1141  }
1142  }
1143  return DLL;
1144 }
1145 
1146 // approx gradient, using 2nd order Taylor approximation, O(N_0^2)
1147 double TKroneckerLL::GetApxEmptyGraphDLL(const int& ParamId) const {
1148  double Sum=0.0, SumSq=0.0;
1149  for (int i = 0; i < ProbMtx.Len(); i++) {
1150  Sum += ProbMtx.At(i);
1151  SumSq += TMath::Sqr(ProbMtx.At(i));
1152  }
1153  // d/dx -sum(x_i) - 0.5sum(x_i^2) = d/dx sum(theta)^k - 0.5 sum(theta^2)^k
1154  return -KronIters*pow(Sum, KronIters-1) - KronIters*pow(SumSq, KronIters-1)*ProbMtx.At(ParamId);
1155 }
1156 
1157 // exact graph gradient, runs O(N^2)
1159  for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
1160  double DLL = 0.0;
1161  for (int NId1 = 0; NId1 < Nodes; NId1++) {
1162  for (int NId2 = 0; NId2 < Nodes; NId2++) {
1163  if (Graph->IsEdge(NId1, NId2)) {
1164  DLL += LLMtx.GetEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
1165  } else {
1166  DLL += LLMtx.GetNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
1167  }
1168  }
1169  }
1170  GradV[ParamId] = DLL;
1171  }
1172  return GradV;
1173 }
1174 
1175 // slow (but correct) approximate gradient, runs O(N^2)
1177  for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
1178  double DLL = 0.0;
1179  for (int NId1 = 0; NId1 < Nodes; NId1++) {
1180  for (int NId2 = 0; NId2 < Nodes; NId2++) {
1181  if (Graph->IsEdge(NId1, NId2)) {
1182  DLL += LLMtx.GetEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
1183  } else {
1184  DLL += LLMtx.GetApxNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
1185  }
1186  }
1187  }
1188  GradV[ParamId] = DLL;
1189  }
1190  return GradV;
1191 }
1192 
1193 // fast approximate gradient, runs O(E)
1195  for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
1196  double DLL = GetApxEmptyGraphDLL(ParamId);
1197  for (int nid = 0; nid < Nodes; nid++) {
1198  const TNGraph::TNodeI Node = Graph->GetNI(nid);
1199  const int SrcNId = NodePerm[nid];
1200  for (int e = 0; e < Node.GetOutDeg(); e++) {
1201  const int DstNId = NodePerm[Node.GetOutNId(e)];
1202  DLL = DLL - LLMtx.GetApxNoEdgeDLL(ParamId, SrcNId, DstNId, KronIters)
1203  + LLMtx.GetEdgeDLL(ParamId, SrcNId, DstNId, KronIters);
1204  }
1205  }
1206  GradV[ParamId] = DLL;
1207  }
1208  return GradV;
1209 }
1210 
1211 // Used in TKroneckerLL::UpdateGraphDLL: DeltaDLL if we
1212 // add the node to the empty matrix/graph (node
1213 // gets/creates all of its in- and out-edges).
1214 double TKroneckerLL::NodeDLLDelta(const int ParamId, const int& NId) const {
1215  if (! Graph->IsNode(NId)) { return 0.0; } // zero degree node
1216  double Delta = 0.0;
1217  const TNGraph::TNodeI Node = Graph->GetNI(NId);
1218  const int SrcRow = NodePerm[NId];
1219  for (int e = 0; e < Node.GetOutDeg(); e++) {
1220  const int DstCol = NodePerm[Node.GetOutNId(e)];
1221  Delta += - LLMtx.GetApxNoEdgeDLL(ParamId, SrcRow, DstCol, KronIters)
1222  + LLMtx.GetEdgeDLL(ParamId, SrcRow, DstCol, KronIters);
1223  }
1224  const int SrcCol = NodePerm[NId];
1225  for (int e = 0; e < Node.GetInDeg(); e++) {
1226  const int DstRow = NodePerm[Node.GetInNId(e)];
1227  Delta += - LLMtx.GetApxNoEdgeDLL(ParamId, DstRow, SrcCol, KronIters)
1228  + LLMtx.GetEdgeDLL(ParamId, DstRow, SrcCol, KronIters);
1229  }
1230  // double counter self-edge
1231  if (Graph->IsEdge(NId, NId)) {
1232  Delta += + LLMtx.GetApxNoEdgeDLL(ParamId, SrcRow, SrcCol, KronIters)
1233  - LLMtx.GetEdgeDLL(ParamId, SrcRow, SrcCol, KronIters);
1234  IAssert(SrcRow == SrcCol);
1235  }
1236  return Delta;
1237 }
1238 
1239 // given old DLL and new permutation, efficiently updates the DLL
1240 // permutation is new, but DLL is old
1241 void TKroneckerLL::UpdateGraphDLL(const int& SwapNId1, const int& SwapNId2) {
1242  for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
1243  // permutation before the swap (swap back to previous position)
1244  NodePerm.Swap(SwapNId1, SwapNId2);
1245  // subtract old DLL
1246  TFlt& DLL = GradV[ParamId];
1247  DLL = DLL - NodeDLLDelta(ParamId, SwapNId1) - NodeDLLDelta(ParamId, SwapNId2);
1248  // double-counted edges
1249  const int PrevId1 = NodePerm[SwapNId1], PrevId2 = NodePerm[SwapNId2];
1250  if (Graph->IsEdge(SwapNId1, SwapNId2)) {
1251  DLL += - LLMtx.GetApxNoEdgeDLL(ParamId, PrevId1, PrevId2, KronIters)
1252  + LLMtx.GetEdgeDLL(ParamId, PrevId1, PrevId2, KronIters); }
1253  if (Graph->IsEdge(SwapNId2, SwapNId1)) {
1254  DLL += - LLMtx.GetApxNoEdgeDLL(ParamId, PrevId2, PrevId1, KronIters)
1255  + LLMtx.GetEdgeDLL(ParamId, PrevId2, PrevId1, KronIters); }
1256  // permutation after the swap (restore the swap)
1257  NodePerm.Swap(SwapNId1, SwapNId2);
1258  // add new DLL
1259  DLL = DLL + NodeDLLDelta(ParamId, SwapNId1) + NodeDLLDelta(ParamId, SwapNId2);
1260  const int NewId1 = NodePerm[SwapNId1], NewId2 = NodePerm[SwapNId2];
1261  // double-counted edges
1262  if (Graph->IsEdge(SwapNId1, SwapNId2)) {
1263  DLL += + LLMtx.GetApxNoEdgeDLL(ParamId, NewId1, NewId2, KronIters)
1264  - LLMtx.GetEdgeDLL(ParamId, NewId1, NewId2, KronIters); }
1265  if (Graph->IsEdge(SwapNId2, SwapNId1)) {
1266  DLL += + LLMtx.GetApxNoEdgeDLL(ParamId, NewId2, NewId1, KronIters)
1267  - LLMtx.GetEdgeDLL(ParamId, NewId2, NewId1, KronIters); }
1268  }
1269 }
1270 
1271 void TKroneckerLL::SampleGradient(const int& WarmUp, const int& NSamples, double& AvgLL, TFltV& AvgGradV) {
1272  printf("SampleGradient: %s (%s warm-up):", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr());
1273  int NId1=0, NId2=0, NAccept=0;
1274  TExeTm ExeTm1;
1275  if (WarmUp > 0) {
1276  CalcApxGraphLL();
1277  for (int s = 0; s < WarmUp; s++) { SampleNextPerm(NId1, NId2); }
1278  printf(" warm-up:%s,", ExeTm1.GetTmStr()); ExeTm1.Tick();
1279  }
1280  CalcApxGraphLL(); // re-calculate LL (due to numerical errors)
1281  CalcApxGraphDLL();
1282  AvgLL = 0;
1283  AvgGradV.Gen(LLMtx.Len()); AvgGradV.PutAll(0.0);
1284  printf(" sampl");
1285  for (int s = 0; s < NSamples; s++) {
1286  if (SampleNextPerm(NId1, NId2)) { // new permutation
1287  UpdateGraphDLL(NId1, NId2); NAccept++; }
1288  for (int m = 0; m < LLMtx.Len(); m++) { AvgGradV[m] += GradV[m]; }
1289  AvgLL += GetLL();
1290  }
1291  printf("ing");
1292  AvgLL = AvgLL / double(NSamples);
1293  for (int m = 0; m < LLMtx.Len(); m++) {
1294  AvgGradV[m] = AvgGradV[m] / double(NSamples); }
1295  printf(":%s (%.0f/s), accept %.1f%%\n", ExeTm1.GetTmStr(), double(NSamples)/ExeTm1.GetSecs(),
1296  double(100*NAccept)/double(NSamples));
1297 }
1298 
1299 double TKroneckerLL::GradDescent(const int& NIter, const double& LrnRate, double MnStep, double MxStep, const int& WarmUp, const int& NSamples) {
1300  printf("\n----------------------------------------------------------------------\n");
1301  printf("Fitting graph on %d nodes, %d edges\n", Graph->GetNodes(), Graph->GetEdges());
1302  printf("Kron iters: %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters()));
1303  TExeTm IterTm, TotalTm;
1304  double OldLL=-1e10, CurLL=0;
1305  const double EZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters));
1306  TFltV CurGradV, LearnRateV(GetParams()), LastStep(GetParams());
1307  LearnRateV.PutAll(LrnRate);
1308  TKronMtx NewProbMtx = ProbMtx;
1309 
1310  if(DebugMode) {
1311  LLV.Gen(NIter, 0);
1312  MtxV.Gen(NIter, 0);
1313  }
1314 
1315  for (int Iter = 0; Iter < NIter; Iter++) {
1316  printf("%03d] ", Iter);
1317  SampleGradient(WarmUp, NSamples, CurLL, CurGradV);
1318  for (int p = 0; p < GetParams(); p++) {
1319  LearnRateV[p] *= 0.95;
1320  if (Iter < 1) {
1321  while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; }
1322  while (fabs(LearnRateV[p]*CurGradV[p]) < 0.02) { LearnRateV[p] *= (1.0/0.95); } // move more
1323  } else {
1324  // set learn rate so that move for each parameter is inside the [MnStep, MxStep]
1325  while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");}
1326  while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");}
1327  if (MxStep > 3*MnStep) { MxStep *= 0.95; }
1328  }
1329  NewProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*CurGradV[p];
1330  if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; }
1331  if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; }
1332  }
1333  printf(" trueE0: %.2f (%d), estE0: %.2f (%d), ERR: %f\n", EZero, Graph->GetEdges(),
1335  printf(" currLL: %.4f, deltaLL: %.4f\n", CurLL, CurLL-OldLL); // positive is good
1336  for (int p = 0; p < GetParams(); p++) {
1337  printf(" %d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p),
1338  ProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]());
1339  }
1340  if (Iter+1 < NIter) { // skip last update
1341  ProbMtx = NewProbMtx; ProbMtx.GetLLMtx(LLMtx); }
1342  OldLL=CurLL;
1343  printf("\n"); fflush(stdout);
1344 
1345  if(DebugMode) {
1346  LLV.Add(CurLL);
1347  MtxV.Add(NewProbMtx);
1348  }
1349  }
1350  printf("TotalExeTm: %s %g\n", TotalTm.GetStr(), TotalTm.GetSecs());
1351  ProbMtx.Dump("FITTED PARAMS", false);
1352  return CurLL;
1353 }
1354 
1355 double TKroneckerLL::GradDescent2(const int& NIter, const double& LrnRate, double MnStep, double MxStep, const int& WarmUp, const int& NSamples) {
1356  printf("\n----------------------------------------------------------------------\n");
1357  printf("GradDescent2\n");
1358  printf("Fitting graph on %d nodes, %d edges\n", Graph->GetNodes(), Graph->GetEdges());
1359  printf("Skip moves that make likelihood smaller\n");
1360  printf("Kron iters: %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters()));
1361  TExeTm IterTm, TotalTm;
1362  double CurLL=0, NewLL=0;
1363  const double EZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters));
1364  TFltV CurGradV, NewGradV, LearnRateV(GetParams()), LastStep(GetParams());
1365  LearnRateV.PutAll(LrnRate);
1366  TKronMtx NewProbMtx=ProbMtx, CurProbMtx=ProbMtx;
1367  bool GoodMove = false;
1368  // Start
1369  for (int Iter = 0; Iter < NIter; Iter++) {
1370  printf("%03d] ", Iter);
1371  if (! GoodMove) { SampleGradient(WarmUp, NSamples, CurLL, CurGradV); }
1372  CurProbMtx = ProbMtx;
1373  // update parameters
1374  for (int p = 0; p < GetParams(); p++) {
1375  while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");}
1376  while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");}
1377  NewProbMtx.At(p) = CurProbMtx.At(p) + LearnRateV[p]*CurGradV[p];
1378  if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; }
1379  if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; }
1380  LearnRateV[p] *= 0.95;
1381  }
1382  printf(" ");
1383  ProbMtx=NewProbMtx; ProbMtx.GetLLMtx(LLMtx);
1384  SampleGradient(WarmUp, NSamples, NewLL, NewGradV);
1385  if (NewLL > CurLL) { // accept the move
1386  printf("== Good move:\n");
1387  printf(" trueE0: %.2f (%d), estE0: %.2f (%d), ERR: %f\n", EZero, Graph->GetEdges(),
1389  printf(" currLL: %.4f deltaLL: %.4f\n", CurLL, NewLL-CurLL); // positive is good
1390  for (int p = 0; p < GetParams(); p++) {
1391  printf(" %d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p),
1392  CurProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]()); }
1393  CurLL = NewLL;
1394  CurGradV = NewGradV;
1395  GoodMove = true;
1396  } else {
1397  printf("** BAD move:\n");
1398  printf(" *trueE0: %.2f (%d), estE0: %.2f (%d), ERR: %f\n", EZero, Graph->GetEdges(),
1400  printf(" *curLL: %.4f deltaLL: %.4f\n", CurLL, NewLL-CurLL); // positive is good
1401  for (int p = 0; p < GetParams(); p++) {
1402  printf(" b%d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p),
1403  CurProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]()); }
1404  // move to old position
1405  ProbMtx = CurProbMtx; ProbMtx.GetLLMtx(LLMtx);
1406  GoodMove = false;
1407  }
1408  printf("\n"); fflush(stdout);
1409  }
1410  printf("TotalExeTm: %s %g\n", TotalTm.GetStr(), TotalTm.GetSecs());
1411  ProbMtx.Dump("FITTED PARAMS\n", false);
1412  return CurLL;
1413 }
1414 
1416 // filling in random edges for KronEM
1417 void TKroneckerLL::SetRandomEdges(const int& NEdges, const bool isDir) {
1418  int count = 0, added = 0, collision = 0;
1419  const int MtxDim = ProbMtx.GetDim();
1420  const double MtxSum = ProbMtx.GetMtxSum();
1421  TVec<TFltIntIntTr> ProbToRCPosV; // row, col position
1422  double CumProb = 0.0;
1423 
1424  for(int r = 0; r < MtxDim; r++) {
1425  for(int c = 0; c < MtxDim; c++) {
1426  const double Prob = ProbMtx.At(r, c);
1427  if (Prob > 0.0) {
1428  CumProb += Prob;
1429  ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c));
1430  }
1431  }
1432  }
1433 
1434  int Rng, Row, Col, n, NId1, NId2;
1435  while(added < NEdges) {
1436  Rng = Nodes; Row = 0; Col = 0;
1437  for (int iter = 0; iter < KronIters; iter++) {
1438  const double& Prob = TKronMtx::Rnd.GetUniDev();
1439  n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; }
1440  const int MtxRow = ProbToRCPosV[n].Val2;
1441  const int MtxCol = ProbToRCPosV[n].Val3;
1442  Rng /= MtxDim;
1443  Row += MtxRow * Rng;
1444  Col += MtxCol * Rng;
1445  }
1446 
1447  count++;
1448 
1449  NId1 = InvertPerm[Row]; NId2 = InvertPerm[Col];
1450 
1451  // Check conflicts
1452  if(EMType != kronEdgeMiss && IsObsEdge(NId1, NId2)) {
1453  continue;
1454  }
1455 
1456  if (! Graph->IsEdge(NId1, NId2)) {
1457  Graph->AddEdge(NId1, NId2);
1458  if(NId1 != NId2) { GEdgeV.Add(TIntTr(NId1, NId2, LEdgeV.Len())); }
1459  else { LSelfEdge++; }
1460  LEdgeV.Add(TIntTr(NId1, NId2, GEdgeV.Len()-1));
1461  added++;
1462  if (! isDir) {
1463  if (NId1 != NId2) {
1464  Graph->AddEdge(NId2, NId1);
1465  GEdgeV.Add(TIntTr(NId2, NId1, LEdgeV.Len()));
1466  LEdgeV.Add(TIntTr(NId2, NId1, GEdgeV.Len()-1));
1467  added++;
1468  }
1469  }
1470  } else { collision ++; }
1471  }
1472 // printf("total = %d / added = %d / collision = %d\n", count, added, collision);
1473 }
1474 
1475 // sampling setup for KronEM
1476 void TKroneckerLL::MetroGibbsSampleSetup(const int& WarmUp) {
1477  double alpha = log(ProbMtx.GetMtxSum()) / log(double(ProbMtx.GetDim()));
1478  int NId1 = 0, NId2 = 0;
1479  int NMissing;
1480 
1481  RestoreGraph(false);
1482  if(EMType == kronEdgeMiss) {
1483  CalcApxGraphLL();
1484  for (int s = 0; s < WarmUp; s++) SampleNextPerm(NId1, NId2);
1485  }
1486 
1487  if(EMType == kronFutureLink) {
1488  NMissing = (int) (pow(ProbMtx.GetMtxSum(), KronIters) - pow(double(RealNodes), alpha));
1489  } else if(EMType == kronEdgeMiss) {
1490  NMissing = MissEdges;
1491  } else {
1492  NMissing = (int) (pow(ProbMtx.GetMtxSum(), KronIters) * (1.0 - pow(double(RealNodes) / double(Nodes), 2)));
1493  }
1494  NMissing = (NMissing < 1) ? 1 : NMissing;
1495 
1496  SetRandomEdges(NMissing, true);
1497 
1498  CalcApxGraphLL();
1499  for (int s = 0; s < WarmUp; s++) SampleNextPerm(NId1, NId2);
1500 }
1501 
1502 // Metropolis-Hastings steps for KronEM
1503 void TKroneckerLL::MetroGibbsSampleNext(const int& WarmUp, const bool DLLUpdate) {
1504  int NId1 = 0, NId2 = 0, hit = 0, GId = 0;
1505  TIntTr EdgeToRemove, NewEdge;
1506  double RndAccept;
1507 
1508  if(LEdgeV.Len()) {
1509  for(int i = 0; i < WarmUp; i++) {
1510  hit = TKronMtx::Rnd.GetUniDevInt(LEdgeV.Len());
1511 
1512  NId1 = LEdgeV[hit].Val1; NId2 = LEdgeV[hit].Val2;
1513  GId = LEdgeV[hit].Val3;
1514  SetRandomEdges(1, true);
1515  NewEdge = LEdgeV.Last();
1516 
1517  RndAccept = (1.0 - exp(LLMtx.GetEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters))) / (1.0 - exp(LLMtx.GetEdgeLL(NId1, NId2, KronIters)));
1518  RndAccept = (RndAccept > 1.0) ? 1.0 : RndAccept;
1519 
1520  if(TKronMtx::Rnd.GetUniDev() > RndAccept) { // reject
1521  Graph->DelEdge(NewEdge.Val1, NewEdge.Val2);
1522  if(NewEdge.Val1 != NewEdge.Val2) { GEdgeV.DelLast(); }
1523  else { LSelfEdge--; }
1524  LEdgeV.DelLast();
1525  } else { // accept
1526  Graph->DelEdge(NId1, NId2);
1527  LEdgeV[hit] = LEdgeV.Last();
1528  LEdgeV.DelLast();
1529  if(NId1 == NId2) {
1530  LSelfEdge--;
1531  if(NewEdge.Val1 != NewEdge.Val2) {
1532  GEdgeV[GEdgeV.Len()-1].Val3 = hit;
1533  }
1534  } else {
1535  IAssertR(GEdgeV.Last().Val3 >= 0, "Invalid indexing");
1536 
1537  GEdgeV[GId] = GEdgeV.Last();
1538  if(NewEdge.Val1 != NewEdge.Val2) {
1539  GEdgeV[GId].Val3 = hit;
1540  }
1541  LEdgeV[GEdgeV[GId].Val3].Val3 = GId;
1542  GEdgeV.DelLast();
1543  }
1544 
1545  LogLike += LLMtx.GetApxNoEdgeLL(EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters) - LLMtx.GetEdgeLL(EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters);
1546  LogLike += -LLMtx.GetApxNoEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters) + LLMtx.GetEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters);
1547 
1548  if(DLLUpdate) {
1549  for (int p = 0; p < LLMtx.Len(); p++) {
1550  GradV[p] += LLMtx.GetApxNoEdgeDLL(p, EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters) - LLMtx.GetEdgeDLL(p, EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters);
1551  GradV[p] += -LLMtx.GetApxNoEdgeDLL(p, NewEdge.Val1, NewEdge.Val2, KronIters) + LLMtx.GetEdgeDLL(p, NewEdge.Val1, NewEdge.Val2, KronIters);
1552  }
1553  }
1554  }
1555  }
1556  }
1557 
1558 // CalcApxGraphLL();
1559  for (int s = 0; s < WarmUp; s++) {
1560  if(SampleNextPerm(NId1, NId2)) {
1561  if(DLLUpdate) UpdateGraphDLL(NId1, NId2);
1562  }
1563  }
1564 }
1565 
1566 // E-step in KronEM
1567 void TKroneckerLL::RunEStep(const int& GibbsWarmUp, const int& WarmUp, const int& NSamples, TFltV& LLV, TVec<TFltV>& DLLV) {
1568  TExeTm ExeTm, TotalTm;
1569  LLV.Gen(NSamples, 0);
1570  DLLV.Gen(NSamples, 0);
1571 
1572  ExeTm.Tick();
1573  for(int i = 0; i < 2; i++) MetroGibbsSampleSetup(WarmUp);
1574  printf(" Warm-Up [%u] : %s\n", WarmUp, ExeTm.GetTmStr());
1575  CalcApxGraphLL();
1576  for(int i = 0; i < GibbsWarmUp; i++) MetroGibbsSampleNext(10, false);
1577  printf(" Gibbs Warm-Up [%u] : %s\n", GibbsWarmUp, ExeTm.GetTmStr());
1578 
1579  ExeTm.Tick();
1580  CalcApxGraphLL();
1581  CalcApxGraphDLL();
1582  for(int i = 0; i < NSamples; i++) {
1583  MetroGibbsSampleNext(50, false);
1584 
1585  LLV.Add(LogLike);
1586  DLLV.Add(GradV);
1587 
1588  int OnePercent = (i+1) % (NSamples / 10);
1589  if(OnePercent == 0) {
1590  int TenPercent = ((i+1) / (NSamples / 10)) * 10;
1591  printf(" %3u%% done : %s\n", TenPercent, ExeTm.GetTmStr());
1592  }
1593  }
1594 }
1595 
1596 // M-step in KronEM
1597 double TKroneckerLL::RunMStep(const TFltV& LLV, const TVec<TFltV>& DLLV, const int& GradIter, const double& LrnRate, double MnStep, double MxStep) {
1598  TExeTm IterTm, TotalTm;
1599  double OldLL=LogLike, CurLL=0;
1600  const double alpha = log(double(RealEdges)) / log(double(RealNodes));
1601  const double EZero = pow(double(ProbMtx.GetDim()), alpha);
1602 
1603  TFltV CurGradV(GetParams()), LearnRateV(GetParams()), LastStep(GetParams());
1604  LearnRateV.PutAll(LrnRate);
1605 
1606  TKronMtx NewProbMtx = ProbMtx;
1607  const int NSamples = LLV.Len();
1608  const int ReCalcLen = NSamples / 10;
1609 
1610  for (int s = 0; s < LLV.Len(); s++) {
1611  CurLL += LLV[s];
1612  for(int p = 0; p < GetParams(); p++) { CurGradV[p] += DLLV[s][p]; }
1613  }
1614  CurLL /= NSamples;
1615  for(int p = 0; p < GetParams(); p++) { CurGradV[p] /= NSamples; }
1616 
1617  double MaxLL = CurLL;
1618  TKronMtx MaxProbMtx = ProbMtx;
1619  TKronMtx OldProbMtx = ProbMtx;
1620 
1621  for (int Iter = 0; Iter < GradIter; Iter++) {
1622  printf(" %03d] ", Iter+1);
1623  IterTm.Tick();
1624 
1625  for (int p = 0; p < GetParams(); p++) {
1626  if (Iter < 1) {
1627  while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; }
1628  while (fabs(LearnRateV[p]*CurGradV[p]) < 5 * MnStep) { LearnRateV[p] *= (1.0/0.95); } // move more
1629  } else {
1630  // set learn rate so that move for each parameter is inside the [MnStep, MxStep]
1631  while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");}
1632  while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");}
1633  if (MxStep > 3*MnStep) { MxStep *= 0.95; }
1634  }
1635  NewProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*CurGradV[p];
1636  if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; }
1637  if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; }
1638  LearnRateV[p] *= 0.95;
1639  }
1640  printf(" trueE0: %.2f (%u from %u), estE0: %.2f (%u from %u), ERR: %f\n", EZero, RealEdges(), RealNodes(), ProbMtx.GetMtxSum(), Graph->GetEdges(), Graph->GetNodes(), fabs(EZero-ProbMtx.GetMtxSum()));
1641  printf(" currLL: %.4f, deltaLL: %.4f\n", CurLL, CurLL-OldLL); // positive is good
1642  for (int p = 0; p < GetParams(); p++) {
1643  printf(" %d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p),
1644  ProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]());
1645  }
1646 
1647  OldLL=CurLL;
1648  if(Iter == GradIter - 1) {
1649  break;
1650  }
1651 
1652  CurLL = 0;
1653  CurGradV.PutAll(0.0);
1654  TFltV OneDLL;
1655 
1656  CalcApxGraphLL();
1657  CalcApxGraphDLL();
1658 
1659  for(int s = 0; s < NSamples; s++) {
1660  ProbMtx = OldProbMtx; ProbMtx.GetLLMtx(LLMtx);
1661  MetroGibbsSampleNext(10, true);
1662  ProbMtx = NewProbMtx; ProbMtx.GetLLMtx(LLMtx);
1663  if(s % ReCalcLen == ReCalcLen/2) {
1664  CurLL += CalcApxGraphLL();
1665  OneDLL = CalcApxGraphDLL();
1666  } else {
1667  CurLL += LogLike;
1668  OneDLL = GradV;
1669  }
1670  for(int p = 0; p < GetParams(); p++) {
1671  CurGradV[p] += OneDLL[p];
1672  }
1673  }
1674  CurLL /= NSamples;
1675 
1676  if(MaxLL < CurLL) {
1677  MaxLL = CurLL; MaxProbMtx = ProbMtx;
1678  }
1679 
1680  printf(" Time: %s\n", IterTm.GetTmStr());
1681  printf("\n"); fflush(stdout);
1682  }
1683  ProbMtx = MaxProbMtx; ProbMtx.GetLLMtx(LLMtx);
1684 
1685  printf(" FinalLL : %f, TotalExeTm: %s\n", MaxLL, TotalTm.GetTmStr());
1686  ProbMtx.Dump(" FITTED PARAMS", false);
1687 
1688  return MaxLL;
1689 }
1690 
1691 // KronEM
1692 void TKroneckerLL::RunKronEM(const int& EMIter, const int& GradIter, double LrnRate, double MnStep, double MxStep, const int& GibbsWarmUp, const int& WarmUp, const int& NSamples, const TKronEMType& Type, const int& NMissing) {
1693  printf("\n----------------------------------------------------------------------\n");
1694  printf("Fitting graph on %d nodes, %d edges\n", int(RealNodes), int(RealEdges));
1695  printf("Kron iters: %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters()));
1696 
1697  TFltV LLV(NSamples);
1698  TVec<TFltV> DLLV(NSamples);
1699  //int count = 0;
1700 
1701  EMType = Type;
1702  MissEdges = NMissing;
1703  AppendIsoNodes();
1704  SetRndPerm();
1705 
1706  if(DebugMode) {
1707  LLV.Gen(EMIter, 0);
1708  MtxV.Gen(EMIter, 0);
1709  }
1710 
1711  for(int i = 0; i < EMIter; i++) {
1712  printf("\n----------------------------------------------------------------------\n");
1713  printf("%03d EM-iter] E-Step\n", i+1);
1714  RunEStep(GibbsWarmUp, WarmUp, NSamples, LLV, DLLV);
1715  printf("\n\n");
1716 
1717  printf("%03d EM-iter] M-Step\n", i+1);
1718  double CurLL = RunMStep(LLV, DLLV, GradIter, LrnRate, MnStep, MxStep);
1719  printf("\n\n");
1720 
1721  if(DebugMode) {
1722  LLV.Add(CurLL);
1723  MtxV.Add(ProbMtx);
1724  }
1725  }
1726 
1727  RestoreGraph();
1728 }
1729 
1730 
1731 
1732 void GetMinMax(const TFltPrV& XYValV, double& Min, double& Max, const bool& ResetMinMax) {
1733  if (ResetMinMax) { Min = TFlt::Mx; Max = TFlt::Mn; }
1734  for (int i = 0; i < XYValV.Len(); i++) {
1735  Min = TMath::Mn(Min, XYValV[i].Val2.Val);
1736  Max = TMath::Mx(Max, XYValV[i].Val2.Val);
1737  }
1738 }
1739 
1740 void PlotGrad(const TFltPrV& EstLLV, const TFltPrV& TrueLLV, const TVec<TFltPrV>& GradVV, const TFltPrV& AcceptV, const TStr& OutFNm, const TStr& Desc) {
1741  double Min, Max, Min1, Max1;
1742  // plot log-likelihood
1743  { TGnuPlot GP("sLL-"+OutFNm, TStr::Fmt("Log-likelihood (avg 1k samples). %s", Desc.CStr()), true);
1744  GP.AddPlot(EstLLV, gpwLines, "Esimated LL", "linewidth 1");
1745  if (! TrueLLV.Empty()) { GP.AddPlot(TrueLLV, gpwLines, "TRUE LL", "linewidth 1"); }
1746  //GetMinMax(EstLLV, Min, Max, true); GetMinMax(TrueLLV, Min, Max, false);
1747  //GP.SetYRange((int)floor(Min-1), (int)ceil(Max+1));
1748  GP.SetXYLabel("Sample Index (time)", "Log-likelihood");
1749  GP.SavePng(); }
1750  // plot accept
1751  { TGnuPlot GP("sAcc-"+OutFNm, TStr::Fmt("Pct. accepted rnd moves (over 1k samples). %s", Desc.CStr()), true);
1752  GP.AddPlot(AcceptV, gpwLines, "Pct accepted swaps", "linewidth 1");
1753  GP.SetXYLabel("Sample Index (time)", "Pct accept permutation swaps");
1754  GP.SavePng(); }
1755  // plot grads
1756  TGnuPlot GPAll("sGradAll-"+OutFNm, TStr::Fmt("Gradient (avg 1k samples). %s", Desc.CStr()), true);
1757  GetMinMax(GradVV[0], Min1, Max1, true);
1758  for (int g = 0; g < GradVV.Len(); g++) {
1759  GPAll.AddPlot(GradVV[g], gpwLines, TStr::Fmt("param %d", g+1), "linewidth 1");
1760  GetMinMax(GradVV[g], Min1, Max1, false);
1761  TGnuPlot GP(TStr::Fmt("sGrad%02d-", g+1)+OutFNm, TStr::Fmt("Gradient (avg 1k samples). %s", Desc.CStr()), true);
1762  GP.AddPlot(GradVV[g], gpwLines, TStr::Fmt("param id %d", g+1), "linewidth 1");
1763  GetMinMax(GradVV[g], Min, Max, true);
1764  GP.SetYRange((int)floor(Min-1), (int)ceil(Max+1));
1765  GP.SetXYLabel("Sample Index (time)", "Gradient");
1766  GP.SavePng();
1767  }
1768  GPAll.SetYRange((int)floor(Min1-1), (int)ceil(Max1+1));
1769  GPAll.SetXYLabel("Sample Index (time)", "Gradient");
1770  GPAll.SavePng();
1771 }
1772 
1773 void PlotAutoCorrelation(const TFltV& ValV, const int& MaxK, const TStr& OutFNm, const TStr& Desc) {
1774  double Avg=0.0, Var=0.0;
1775  for (int i = 0; i < ValV.Len(); i++) { Avg += ValV[i]; }
1776  Avg /= (double) ValV.Len();
1777  for (int i = 0; i < ValV.Len(); i++) { Var += TMath::Sqr(ValV[i]-Avg); }
1778  TFltPrV ACorrV;
1779  for (int k = 0; k < TMath::Mn(ValV.Len(), MaxK); k++) {
1780  double corr = 0.0;
1781  for (int i = 0; i < ValV.Len() - k; i++) {
1782  corr += (ValV[i]-Avg)*(ValV[i+k]-Avg);
1783  }
1784  ACorrV.Add(TFltPr(k, corr/Var));
1785  }
1786  // plot grads
1787  TGnuPlot GP("sAutoCorr-"+OutFNm, TStr::Fmt("AutoCorrelation (%d samples). %s", ValV.Len(), Desc.CStr()), true);
1788  GP.AddPlot(ACorrV, gpwLines, "", "linewidth 1");
1789  GP.SetXYLabel("Lag, k", "Autocorrelation, r_k");
1790  GP.SavePng();
1791 }
1792 
1793 // sample permutations and plot the current gradient and log-likelihood as the function
1794 // of the number of samples
1795 TFltV TKroneckerLL::TestSamplePerm(const TStr& OutFNm, const int& WarmUp, const int& NSamples, const TKronMtx& TrueMtx, const bool& DoPlot) {
1796  printf("Sample permutations: %s (warm-up: %s)\n", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr());
1797  int NId1=0, NId2=0, NAccept=0;
1798  TExeTm ExeTm;
1799  const int PlotLen = NSamples/1000+1;
1800  double TrueLL=-1, AvgLL=0.0;
1801  TFltV AvgGradV(GetParams());
1802  TFltPrV TrueLLV(PlotLen, 0); // true log-likelihood (under the correct permutation)
1803  TFltPrV EstLLV(PlotLen, 0); // estiamted log-likelihood (averaged over last 1k permutation)
1804  TFltPrV AcceptV; // sample acceptance ratio
1805  TFltV SampleLLV(NSamples, 0);
1806  TVec<TFltPrV> GradVV(GetParams());
1807  for (int g = 0; g < GetParams(); g++) {
1808  GradVV[g].Gen(PlotLen, 0); }
1809  if (! TrueMtx.Empty()) {
1810  TIntV PermV=NodePerm; TKronMtx CurMtx=ProbMtx; ProbMtx.Dump();
1811  InitLL(TrueMtx); SetOrderPerm(); CalcApxGraphLL(); printf("TrueLL: %f\n", LogLike());
1812  TrueLL=LogLike; InitLL(CurMtx); NodePerm=PermV;
1813  }
1814  CalcApxGraphLL();
1815  printf("LogLike at start: %f\n", LogLike());
1816  if (WarmUp > 0) {
1817  EstLLV.Add(TFltPr(0, LogLike));
1818  if (TrueLL != -1) { TrueLLV.Add(TFltPr(0, TrueLL)); }
1819  for (int s = 0; s < WarmUp; s++) { SampleNextPerm(NId1, NId2); }
1820  printf(" warm-up:%s,", ExeTm.GetTmStr()); ExeTm.Tick();
1821  }
1822  printf("LogLike afterm warm-up: %f\n", LogLike());
1823  CalcApxGraphLL(); // re-calculate LL (due to numerical errors)
1824  CalcApxGraphDLL();
1825  EstLLV.Add(TFltPr(WarmUp, LogLike));
1826  if (TrueLL != -1) { TrueLLV.Add(TFltPr(WarmUp, TrueLL)); }
1827  printf(" recalculated: %f\n", LogLike());
1828  // start sampling
1829  printf(" sampling (average per 1000 samples)\n");
1830  TVec<TFltV> SamplVV(5);
1831  for (int s = 0; s < NSamples; s++) {
1832  if (SampleNextPerm(NId1, NId2)) { // new permutation
1833  UpdateGraphDLL(NId1, NId2); NAccept++; }
1834  for (int m = 0; m < AvgGradV.Len(); m++) { AvgGradV[m] += GradV[m]; }
1835  AvgLL += GetLL();
1836  SampleLLV.Add(GetLL());
1837  /*SamplVV[0].Add(GetLL()); // gives worse autocoreelation than the avg below
1838  SamplVV[1].Add(GradV[0]);
1839  SamplVV[2].Add(GradV[1]);
1840  SamplVV[3].Add(GradV[2]);
1841  SamplVV[4].Add(GradV[3]);*/
1842  if (s > 0 && s % 1000 == 0) {
1843  printf(".");
1844  for (int g = 0; g < AvgGradV.Len(); g++) {
1845  GradVV[g].Add(TFltPr(WarmUp+s, AvgGradV[g] / 1000.0)); }
1846  EstLLV.Add(TFltPr(WarmUp+s, AvgLL / 1000.0));
1847  if (TrueLL != -1) { TrueLLV.Add(TFltPr(WarmUp+s, TrueLL)); }
1848  AcceptV.Add(TFltPr(WarmUp+s, NAccept/1000.0));
1849  // better (faster decaying) autocorrelation when one takes avg. of 1000 consecutive samples
1850  /*SamplVV[0].Add(AvgLL);
1851  SamplVV[1].Add(AvgGradV[0]);
1852  SamplVV[2].Add(AvgGradV[1]);
1853  SamplVV[3].Add(AvgGradV[2]);
1854  SamplVV[4].Add(AvgGradV[3]); //*/
1855  if (s % 100000 == 0 && DoPlot) {
1856  const TStr Desc = TStr::Fmt("P(NodeSwap)=%g. Nodes: %d, Edges: %d, Params: %d, WarmUp: %s, Samples: %s", PermSwapNodeProb(),
1857  Graph->GetNodes(), Graph->GetEdges(), GetParams(), TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr());
1858  PlotGrad(EstLLV, TrueLLV, GradVV, AcceptV, OutFNm, Desc);
1859  for (int n = 0; n < SamplVV.Len(); n++) {
1860  PlotAutoCorrelation(SamplVV[n], 1000, TStr::Fmt("%s-n%d", OutFNm.CStr(), n), Desc); }
1861  printf(" samples: %d, time: %s, samples/s: %.1f\n", s, ExeTm.GetTmStr(), double(s+1)/ExeTm.GetSecs());
1862  }
1863  AvgLL = 0; AvgGradV.PutAll(0); NAccept=0;
1864  }
1865  }
1866  if (DoPlot) {
1867  const TStr Desc = TStr::Fmt("P(NodeSwap)=%g. Nodes: %d, Edges: %d, Params: %d, WarmUp: %s, Samples: %s", PermSwapNodeProb(),
1868  Graph->GetNodes(), Graph->GetEdges(), GetParams(), TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr());
1869  PlotGrad(EstLLV, TrueLLV, GradVV, AcceptV, OutFNm, Desc);
1870  for (int n = 0; n < SamplVV.Len(); n++) {
1871  PlotAutoCorrelation(SamplVV[n], 1000, TStr::Fmt("%s-n%d", OutFNm.CStr(), n), Desc); }
1872  }
1873  return SampleLLV; // seems to work better for potential scale reduction plot
1874 }
1875 
1876 void McMcGetAvgAvg(const TFltV& AvgJV, double& AvgAvg) {
1877  AvgAvg = 0.0;
1878  for (int j = 0; j < AvgJV.Len(); j++) {
1879  AvgAvg += AvgJV[j]; }
1880  AvgAvg /= AvgJV.Len();
1881 }
1882 
1883 void McMcGetAvgJ(const TVec<TFltV>& ChainLLV, TFltV& AvgJV) {
1884  for (int j = 0; j < ChainLLV.Len(); j++) {
1885  const TFltV& ChainV = ChainLLV[j];
1886  double Avg = 0;
1887  for (int i = 0; i < ChainV.Len(); i++) {
1888  Avg += ChainV[i];
1889  }
1890  AvgJV.Add(Avg/ChainV.Len());
1891  }
1892 }
1893 
1894 // calculates the chain potential scale reduction (see Gelman Bayesian Statistics book)
1895 double TKroneckerLL::CalcChainR2(const TVec<TFltV>& ChainLLV) {
1896  const double J = ChainLLV.Len();
1897  const double K = ChainLLV[0].Len();
1898  TFltV AvgJV; McMcGetAvgJ(ChainLLV, AvgJV);
1899  double AvgAvg; McMcGetAvgAvg(AvgJV, AvgAvg);
1900  IAssert(AvgJV.Len() == ChainLLV.Len());
1901  double InChainVar=0, OutChainVar=0;
1902  // between chain var
1903  for (int j = 0; j < AvgJV.Len(); j++) {
1904  OutChainVar += TMath::Sqr(AvgJV[j] - AvgAvg); }
1905  OutChainVar = OutChainVar * (K/double(J-1));
1906  printf("*** %g chains of len %g\n", J, K);
1907  printf(" ** between chain var: %f\n", OutChainVar);
1908  //within chain variance
1909  for (int j = 0; j < AvgJV.Len(); j++) {
1910  const TFltV& ChainV = ChainLLV[j];
1911  for (int k = 0; k < ChainV.Len(); k++) {
1912  InChainVar += TMath::Sqr(ChainV[k] - AvgJV[j]); }
1913  }
1914  InChainVar = InChainVar * 1.0/double(J*(K-1));
1915  printf(" ** within chain var: %f\n", InChainVar);
1916  const double PostVar = (K-1)/K * InChainVar + 1.0/K * OutChainVar;
1917  printf(" ** posterior var: %f\n", PostVar);
1918  const double ScaleRed = sqrt(PostVar/InChainVar);
1919  printf(" ** scale reduction (< 1.2): %f\n\n", ScaleRed);
1920  return ScaleRed;
1921 }
1922 
1923 // Gelman-Rubin-Brooks plot: how does potential scale reduction chainge with chain length
1924 void TKroneckerLL::ChainGelmapRubinPlot(const TVec<TFltV>& ChainLLV, const TStr& OutFNm, const TStr& Desc) {
1925  TFltPrV LenR2V; // how does potential scale reduction chainge with chain length
1926  TVec<TFltV> SmallLLV(ChainLLV.Len());
1927  const int K = ChainLLV[0].Len();
1928  const int Buckets=1000;
1929  const int BucketSz = K/Buckets;
1930  for (int b = 1; b < Buckets; b++) {
1931  const int End = TMath::Mn(BucketSz*b, K-1);
1932  for (int c = 0; c < ChainLLV.Len(); c++) {
1933  ChainLLV[c].GetSubValV(0, End, SmallLLV[c]); }
1934  LenR2V.Add(TFltPr(End, TKroneckerLL::CalcChainR2(SmallLLV)));
1935  }
1936  LenR2V.Add(TFltPr(K, TKroneckerLL::CalcChainR2(ChainLLV)));
1937  TGnuPlot::PlotValV(LenR2V, TStr::Fmt("gelman-%s", OutFNm.CStr()), TStr::Fmt("%s. %d chains of len %d. BucketSz: %d.",
1938  Desc.CStr(), ChainLLV.Len(), ChainLLV[0].Len(), BucketSz), "Chain length", "Potential scale reduction");
1939 }
1940 
1941 // given a Kronecker graph generate from TrueParam, try to recover the parameters
1942 TFltQu TKroneckerLL::TestKronDescent(const bool& DoExact, const bool& TruePerm, double LearnRate, const int& WarmUp, const int& NSamples, const TKronMtx& TrueParam) {
1943  printf("Test gradient descent on a synthetic kronecker graphs:\n");
1944  if (DoExact) { printf(" -- Exact gradient calculations\n"); }
1945  else { printf(" -- Approximate gradient calculations\n"); }
1946  if (TruePerm) { printf(" -- No permutation sampling (use true permutation)\n"); }
1947  else { printf(" -- Sample permutations (start with degree permutation)\n"); }
1948  TExeTm IterTm;
1949  int Iter;
1950  double OldLL=0, MyLL=0, AvgAbsErr, AbsSumErr;
1951  TFltV MyGradV, SDevV;
1952  TFltV LearnRateV(GetParams()); LearnRateV.PutAll(LearnRate);
1953  if (TruePerm) {
1954  SetOrderPerm();
1955  }
1956  else {
1957  /*printf("SET EIGEN VECTOR PERMUTATIONS\n");
1958  TFltV LeftSV, RightSV;
1959  TGSvd::GetSngVec(Graph, LeftSV, RightSV);
1960  TFltIntPrV V;
1961  for (int v=0; v<LeftSV.Len();v++) { V.Add(TFltIntPr(LeftSV[v], v)); }
1962  V.Sort(false);
1963  NodePerm.Gen(Nodes, 0);
1964  for (int v=0; v < V.Len();v++) { NodePerm.Add(V[v].Val2); } //*/
1965  //printf("RANDOM PERMUTATION\n"); SetRndPerm();
1966  printf("DEGREE PERMUTATION\n"); SetDegPerm();
1967  }
1968  for (Iter = 0; Iter < 100; Iter++) {
1969  if (TruePerm) {
1970  // don't sample over permutations
1971  if (DoExact) { CalcGraphDLL(); CalcGraphLL(); } // slow, O(N^2)
1972  else { CalcApxGraphDLL(); CalcApxGraphLL(); } // fast
1973  MyLL = LogLike; MyGradV = GradV;
1974  } else {
1975  printf(".");
1976  // sample over permutations (approximate calculations)
1977  SampleGradient(WarmUp, NSamples, MyLL, MyGradV);
1978  }
1979  printf("%d] LL: %g, ", Iter, MyLL);
1980  AvgAbsErr = TKronMtx::GetAvgAbsErr(ProbMtx, TrueParam);
1981  AbsSumErr = fabs(ProbMtx.GetMtxSum() - TrueParam.GetMtxSum());
1982  printf(" avgAbsErr: %.4f, absSumErr: %.4f, newLL: %.2f, deltaLL: %.2f\n", AvgAbsErr, AbsSumErr, MyLL, OldLL-MyLL);
1983  for (int p = 0; p < GetParams(); p++) {
1984  // set learn rate so that move for each parameter is inside the [0.01, 0.1]
1985  LearnRateV[p] *= 0.9;
1986  //printf("%d: rate: %f delta:%f\n", p, LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p]));
1987  while (fabs(LearnRateV[p]*MyGradV[p]) > 0.1) { LearnRateV[p] *= 0.9; }
1988  //printf(" rate: %f delta:%f\n", LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p]));
1989  while (fabs(LearnRateV[p]*MyGradV[p]) < 0.001) { LearnRateV[p] *= (1.0/0.9); }
1990  //printf(" rate: %f delta:%f\n", LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p]));
1991  printf(" %d] %f <-- %f + %f lrnRate:%g\n", p, ProbMtx.At(p) + LearnRateV[p]*MyGradV[p],
1992  ProbMtx.At(p), (double)(LearnRateV[p]*MyGradV[p]), LearnRateV[p]());
1993  ProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*MyGradV[p];
1994  // box constraints
1995  if (ProbMtx.At(p) > 0.99) { ProbMtx.At(p)=0.99; }
1996  if (ProbMtx.At(p) < 0.01) { ProbMtx.At(p)=0.01; }
1997  }
1998  ProbMtx.GetLLMtx(LLMtx); OldLL = MyLL;
1999  if (AvgAbsErr < 0.01) { printf("***CONVERGED!\n"); break; }
2000  printf("\n"); fflush(stdout);
2001  }
2002  TrueParam.Dump("True Thetas", true);
2003  ProbMtx.Dump("Final Thetas", true);
2004  printf(" AvgAbsErr: %f\n AbsSumErr: %f\n Iterations: %d\n", AvgAbsErr, AbsSumErr, Iter);
2005  printf("Iteration run time: %s, sec: %g\n\n", IterTm.GetTmStr(), IterTm.GetSecs());
2006  return TFltQu(AvgAbsErr, AbsSumErr, Iter, IterTm.GetSecs());
2007 }
2008 
2009 void PlotTrueAndEst(const TStr& OutFNm, const TStr& Desc, const TStr& YLabel, const TFltPrV& EstV, const TFltPrV& TrueV) {
2010  TGnuPlot GP(OutFNm, Desc.CStr(), true);
2011  GP.AddPlot(EstV, gpwLinesPoints, YLabel, "linewidth 1 pointtype 6 pointsize 1");
2012  if (! TrueV.Empty()) { GP.AddPlot(TrueV, gpwLines, "TRUE"); }
2013  GP.SetXYLabel("Gradient descent iterations", YLabel);
2014  GP.SavePng();
2015 }
2016 
2017 void TKroneckerLL::GradDescentConvergence(const TStr& OutFNm, const TStr& Desc1, const bool& SamplePerm, const int& NIters,
2018  double LearnRate, const int& WarmUp, const int& NSamples, const int& AvgKGraphs, const TKronMtx& TrueParam) {
2019  TExeTm IterTm;
2020  int Iter;
2021  double OldLL=0, MyLL=0, AvgAbsErr=0, AbsSumErr=0;
2022  TFltV MyGradV, SDevV;
2023  TFltV LearnRateV(GetParams()); LearnRateV.PutAll(LearnRate);
2024  TFltPrV EZeroV, DiamV, Lambda1V, Lambda2V, AvgAbsErrV, AvgLLV;
2025  TFltPrV TrueEZeroV, TrueDiamV, TrueLambda1V, TrueLambda2V, TrueLLV;
2026  TFltV SngValV; TSnap::GetSngVals(Graph, 2, SngValV); SngValV.Sort(false);
2027  const double TrueEZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters));
2028  const double TrueEffDiam = TSnap::GetAnfEffDiam(Graph, false, 10);
2029  const double TrueLambda1 = SngValV[0];
2030  const double TrueLambda2 = SngValV[1];
2031  if (! TrueParam.Empty()) {
2032  const TKronMtx CurParam = ProbMtx; ProbMtx.Dump();
2033  InitLL(TrueParam); SetOrderPerm(); CalcApxGraphLL(); printf("TrueLL: %f\n", LogLike());
2034  OldLL = LogLike; InitLL(CurParam);
2035  }
2036  const double TrueLL = OldLL;
2037  if (! SamplePerm) { SetOrderPerm(); } else { SetDegPerm(); }
2038  for (Iter = 0; Iter < NIters; Iter++) {
2039  if (! SamplePerm) {
2040  // don't sample over permutations
2041  CalcApxGraphDLL(); CalcApxGraphLL(); // fast
2042  MyLL = LogLike; MyGradV = GradV;
2043  } else {
2044  // sample over permutations (approximate calculations)
2045  SampleGradient(WarmUp, NSamples, MyLL, MyGradV);
2046  }
2047  double SumDiam=0, SumSngVal1=0, SumSngVal2=0;
2048  for (int trial = 0; trial < AvgKGraphs; trial++) {
2049  // generate kronecker graph
2050  PNGraph KronGraph = TKronMtx::GenFastKronecker(ProbMtx, KronIters, true, 0); // approx
2051  //PNGraph KronGraph = TKronMtx::GenKronecker(ProbMtx, KronIters, true, 0); // true
2052  SngValV.Clr(true); TSnap::GetSngVals(KronGraph, 2, SngValV); SngValV.Sort(false);
2053  SumDiam += TSnap::GetAnfEffDiam(KronGraph, false, 10);
2054  SumSngVal1 += SngValV[0]; SumSngVal2 += SngValV[1];
2055  }
2056  // how good is the current fit
2057  AvgLLV.Add(TFltPr(Iter, MyLL));
2058  EZeroV.Add(TFltPr(Iter, ProbMtx.GetMtxSum()));
2059  DiamV.Add(TFltPr(Iter, SumDiam/double(AvgKGraphs)));
2060  Lambda1V.Add(TFltPr(Iter, SumSngVal1/double(AvgKGraphs)));
2061  Lambda2V.Add(TFltPr(Iter, SumSngVal2/double(AvgKGraphs)));
2062  TrueLLV.Add(TFltPr(Iter, TrueLL));
2063  TrueEZeroV.Add(TFltPr(Iter, TrueEZero));
2064  TrueDiamV.Add(TFltPr(Iter, TrueEffDiam));
2065  TrueLambda1V.Add(TFltPr(Iter, TrueLambda1));
2066  TrueLambda2V.Add(TFltPr(Iter, TrueLambda2));
2067  if (Iter % 10 == 0) {
2068  const TStr Desc = TStr::Fmt("%s. Iter: %d, G(%d, %d) K(%d, %d)", Desc1.Empty()?OutFNm.CStr():Desc1.CStr(),
2070  PlotTrueAndEst("LL."+OutFNm, Desc, "Average LL", AvgLLV, TrueLLV);
2071  PlotTrueAndEst("E0."+OutFNm, Desc, "E0 (expected number of edges)", EZeroV, TrueEZeroV);
2072  PlotTrueAndEst("Diam."+OutFNm+"-Diam", Desc, "Effective diameter", DiamV, TrueDiamV);
2073  PlotTrueAndEst("Lambda1."+OutFNm, Desc, "Lambda 1", Lambda1V, TrueLambda1V);
2074  PlotTrueAndEst("Lambda2."+OutFNm, Desc, "Lambda 2", Lambda2V, TrueLambda2V);
2075  if (! TrueParam.Empty()) {
2076  PlotTrueAndEst("AbsErr."+OutFNm, Desc, "Average Absolute Error", AvgAbsErrV, TFltPrV()); }
2077  }
2078  if (! TrueParam.Empty()) {
2079  AvgAbsErr = TKronMtx::GetAvgAbsErr(ProbMtx, TrueParam);
2080  AvgAbsErrV.Add(TFltPr(Iter, AvgAbsErr));
2081  } else { AvgAbsErr = 1.0; }
2082  // update parameters
2083  AbsSumErr = fabs(ProbMtx.GetMtxSum() - TrueEZero);
2084  // update parameters
2085  for (int p = 0; p < GetParams(); p++) {
2086  LearnRateV[p] *= 0.99;
2087  while (fabs(LearnRateV[p]*MyGradV[p]) > 0.1) { LearnRateV[p] *= 0.99; printf(".");}
2088  while (fabs(LearnRateV[p]*MyGradV[p]) < 0.002) { LearnRateV[p] *= (1.0/0.95); printf("*");}
2089  printf(" %d] %f <-- %f + %9f Grad: %9.1f, Rate:%g\n", p, ProbMtx.At(p) + LearnRateV[p]*MyGradV[p],
2090  ProbMtx.At(p), (double)(LearnRateV[p]*MyGradV[p]), MyGradV[p](), LearnRateV[p]());
2091  ProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*MyGradV[p];
2092  // box constraints
2093  if (ProbMtx.At(p) > 1.0) { ProbMtx.At(p)=1.0; }
2094  if (ProbMtx.At(p) < 0.001) { ProbMtx.At(p)=0.001; }
2095  }
2096  printf("%d] LL: %g, ", Iter, MyLL);
2097  printf(" avgAbsErr: %.4f, absSumErr: %.4f, newLL: %.2f, deltaLL: %.2f\n", AvgAbsErr, AbsSumErr, MyLL, OldLL-MyLL);
2098  if (AvgAbsErr < 0.001) { printf("***CONVERGED!\n"); break; }
2099  printf("\n"); fflush(stdout);
2100  ProbMtx.GetLLMtx(LLMtx); OldLL = MyLL;
2101  }
2102  TrueParam.Dump("True Thetas", true);
2103  ProbMtx.Dump("Final Thetas", true);
2104  printf(" AvgAbsErr: %f\n AbsSumErr: %f\n Iterations: %d\n", AvgAbsErr, AbsSumErr, Iter);
2105  printf("Iteration run time: %s, sec: %g\n\n", IterTm.GetTmStr(), IterTm.GetSecs());
2106 }
2107 
2108 // given true N0, fit the parameters, get likelihood and calculate BIC (MDL), plot n0 vs. BIC
2109 void TKroneckerLL::TestBicCriterion(const TStr& OutFNm, const TStr& Desc1, const PNGraph& G, const int& GradIters,
2110  double LearnRate, const int& WarmUp, const int& NSamples, const int& TrueN0) {
2111  TFltPrV BicV, MdlV, LLV;
2112  const double rndGP = G->GetEdges()/TMath::Sqr(double(G->GetNodes()));
2113  const double RndGLL = G->GetEdges()*log(rndGP )+ (TMath::Sqr(double(G->GetNodes()))-G->GetEdges())*log(1-rndGP);
2114  LLV.Add(TFltPr(1, RndGLL));
2115  BicV.Add(TFltPr(1, -RndGLL + 0.5*TMath::Sqr(1)*log(TMath::Sqr(G->GetNodes()))));
2116  MdlV.Add(TFltPr(1, -RndGLL + 32*TMath::Sqr(1)+2*(log((double)1)+log((double)G->GetNodes()))));
2117  for (int NZero = 2; NZero < 10; NZero++) {
2118  const TKronMtx InitKronMtx = TKronMtx::GetInitMtx(NZero, G->GetNodes(), G->GetEdges());
2119  InitKronMtx.Dump("INIT PARAM", true);
2120  TKroneckerLL KronLL(G, InitKronMtx);
2121  KronLL.SetPerm('d'); // degree perm
2122  const double LastLL = KronLL.GradDescent(GradIters, LearnRate, 0.001, 0.01, WarmUp, NSamples);
2123  LLV.Add(TFltPr(NZero, LastLL));
2124  BicV.Add(TFltPr(NZero, -LastLL + 0.5*TMath::Sqr(NZero)*log(TMath::Sqr(G->GetNodes()))));
2125  MdlV.Add(TFltPr(NZero, -LastLL + 32*TMath::Sqr(NZero)+2*(log((double)NZero)+log((double)KronLL.GetKronIters()))));
2126  { TGnuPlot GP("LL-"+OutFNm, Desc1);
2127  GP.AddPlot(LLV, gpwLinesPoints, "Log-likelihood", "linewidth 1 pointtype 6 pointsize 2");
2128  GP.SetXYLabel("NZero", "Log-Likelihood"); GP.SavePng(); }
2129  { TGnuPlot GP("BIC-"+OutFNm, Desc1);
2130  GP.AddPlot(BicV, gpwLinesPoints, "BIC", "linewidth 1 pointtype 6 pointsize 2");
2131  GP.SetXYLabel("NZero", "BIC"); GP.SavePng(); }
2132  { TGnuPlot GP("MDL-"+OutFNm, Desc1);
2133  GP.AddPlot(MdlV, gpwLinesPoints, "MDL", "linewidth 1 pointtype 6 pointsize 2");
2134  GP.SetXYLabel("NZero", "MDL"); GP.SavePng(); }
2135  }
2136 }
2137 
2138 void TKroneckerLL::TestGradDescent(const int& KronIters, const int& KiloSamples, const TStr& Permutation) {
2139  const TStr OutFNm = TStr::Fmt("grad-%s%d-%dk", Permutation.CStr(), KronIters, KiloSamples);
2140  TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.6; 0.6 0.4");
2141  PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, KronIters, true, 0);
2142  TKroneckerLL KronLL(Graph, KronParam);
2143  TVec<TFltV> GradVV(4), SDevVV(4); TFltV XValV;
2144  int NId1 = 0, NId2 = 0, NAccept = 0;
2145  TVec<TMom> GradMomV(4);
2146  TExeTm ExeTm;
2147  if (Permutation == "r") KronLL.SetRndPerm();
2148  else if (Permutation == "d") KronLL.SetDegPerm();
2149  else if (Permutation == "o") KronLL.SetOrderPerm();
2150  else FailR("Unknown permutation (r,d,o)");
2151  KronLL.CalcApxGraphLL();
2152  KronLL.CalcApxGraphDLL();
2153  for (int s = 0; s < 1000*KiloSamples; s++) {
2154  if (KronLL.SampleNextPerm(NId1, NId2)) { // new permutation
2155  KronLL.UpdateGraphDLL(NId1, NId2); NAccept++; }
2156  if (s > 50000) { //warm up period
2157  for (int m = 0; m < 4; m++) { GradVV[m].Add(KronLL.GradV[m]); }
2158  if ((s+1) % 1000 == 0) {
2159  printf(".");
2160  for (int m = 0; m < 4; m++) { GradVV[m].Add(KronLL.GradV[m]); }
2161  XValV.Add((s+1));
2162  if ((s+1) % 100000 == 0) {
2163  TGnuPlot GP(OutFNm, TStr::Fmt("Gradient vs. samples. %d nodes, %d edges", Graph->GetNodes(), Graph->GetEdges()), true);
2164  for (int g = 0; g < GradVV.Len(); g++) {
2165  GP.AddPlot(XValV, GradVV[g], gpwLines, TStr::Fmt("grad %d", g)); }
2166  GP.SetXYLabel("sample index","log Gradient");
2167  GP.SavePng();
2168  }
2169  }
2170  }
2171  }
2172  printf("\n");
2173  for (int m = 0; m < 4; m++) {
2174  GradMomV[m].Def();
2175  printf("grad %d: mean: %12f sDev: %12f median: %12f\n", m,
2176  GradMomV[m].GetMean(), GradMomV[m].GetSDev(), GradMomV[m].GetMedian());
2177  }
2178 }
2179 /*
2180 // sample over permutations
2181 void TKroneckerLL::GradDescent(const double& LearnRate, const int& WarmUp, const int& NSamples, const int& NIter) {
2182  TFltV GradV, SDevV;
2183  double AvgLL;
2184  for (int Iter = 0; Iter < 100; Iter++) {
2185  //SampleGradient(WarmUp, NSamples, AvgLL, GradV, SDevV, true);
2186  SampleGradient(WarmUp, NSamples, AvgLL, GradV);
2187  for (int theta = 0; theta < GetParams(); theta++) {
2188  printf("%d] %f <-- %f + %f\n", theta, ProbMtx.At(theta) + LearnRate*GradV[theta], ProbMtx.At(theta), LearnRate*GradV[theta]);
2189  ProbMtx.At(theta) = ProbMtx.At(theta) + LearnRate*GradV[theta];
2190  // box constraints
2191  if (ProbMtx.At(theta) > 0.99) ProbMtx.At(theta)=0.99;
2192  if (ProbMtx.At(theta) < 0.01) ProbMtx.At(theta)=0.01;
2193  }
2194  ProbMtx.GetLLMtx(LLMtx);
2195  }
2196  ProbMtx.Dump("Final Thetas");
2197 }
2198 */
2199 
2200 
2202 // Add Noise to Graph
2204 int TKronNoise::RemoveNodeNoise(PNGraph& Graph, const int& NNodes, const bool Random) {
2205  IAssert(NNodes > 0 && NNodes < (Graph->GetNodes() / 2));
2206 
2207  int i = 0;
2208  TIntV ShufflePerm;
2209  Graph->GetNIdV(ShufflePerm);
2210  if(Random) {
2211  ShufflePerm.Shuffle(TKronMtx::Rnd);
2212  for(i = 0; i < NNodes; i++) {
2213  Graph->DelNode(int(ShufflePerm[i]));
2214  }
2215  } else {
2216  for(i = 0; i < NNodes; i++) {
2217  Graph->DelNode(int(ShufflePerm[ShufflePerm.Len() - 1 - i]));
2218  }
2219  }
2220 
2221  return Graph->GetNodes();
2222 }
2223 
2224 int TKronNoise::RemoveNodeNoise(PNGraph& Graph, const double& Rate, const bool Random) {
2225  IAssert(Rate > 0 && Rate < 0.5);
2226  return TKronNoise::RemoveNodeNoise(Graph, (int) floor(Rate * double(Graph->GetNodes())), Random);
2227 }
2228 
2229 int TKronNoise::FlipEdgeNoise(PNGraph& Graph, const int& NEdges, const bool Random) {
2230  IAssert(NEdges > 0 && NEdges < Graph->GetEdges());
2231 
2232  const int Nodes = Graph->GetNodes();
2233  const int Edges = Graph->GetEdges();
2234  int Src, Dst;
2235 
2236  TIntV NIdV, TempV;
2237  TIntPrV ToAdd, ToDel;
2238  Graph->GetNIdV(NIdV);
2239 
2240  ToAdd.Gen(NEdges / 2, 0);
2241  for(int i = 0; i < NEdges / 2; i++) {
2242  Src = NIdV[TKronMtx::Rnd.GetUniDevInt(Nodes)];
2243  Dst = NIdV[TKronMtx::Rnd.GetUniDevInt(Nodes)];
2244  if(Graph->IsEdge(Src, Dst)) { i--; continue; }
2245 
2246  ToAdd.Add(TIntPr(Src, Dst));
2247  }
2248 
2249  ToDel.Gen(Edges, 0);
2250  for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
2251  ToDel.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
2252  }
2253  ToDel.Shuffle(TKronMtx::Rnd);
2254 
2255  for(int i = 0; i < NEdges / 2; i++) {
2256  Graph->DelEdge(ToDel[i].Val1, ToDel[i].Val2);
2257  Graph->AddEdge(ToAdd[i].Val1, ToAdd[i].Val2);
2258  }
2259 
2260  return Graph->GetEdges();
2261 }
2262 
2263 int TKronNoise::FlipEdgeNoise(PNGraph& Graph, const double& Rate, const bool Random) {
2264  IAssert(Rate > 0 && Rate < 0.5);
2265  return TKronNoise::FlipEdgeNoise(Graph, (int) floor(Rate * double(Graph->GetEdges())), Random);
2266 }
2267 
2268 int TKronNoise::RemoveEdgeNoise(PNGraph& Graph, const int& NEdges) {
2269  IAssert(NEdges > 0 && NEdges < Graph->GetEdges());
2270 
2271  TIntPrV ToDel;
2272 
2273  ToDel.Gen(Graph->GetEdges(), 0);
2274  for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
2275  if(EI.GetSrcNId() != EI.GetDstNId()) {
2276  ToDel.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
2277  }
2278  }
2279  ToDel.Shuffle(TKronMtx::Rnd);
2280 
2281  for(int i = 0; i < NEdges; i++) {
2282  Graph->DelEdge(ToDel[i].Val1, ToDel[i].Val2);
2283  }
2284 
2285  return Graph->GetEdges();
2286 }
2287 
2288 int TKronNoise::RemoveEdgeNoise(PNGraph& Graph, const double& Rate) {
2289  IAssert(Rate > 0 && Rate < 0.5);
2290  return TKronNoise::RemoveEdgeNoise(Graph, (int) floor(Rate * double(Graph->GetEdges())));
2291 }
2292 
2293 
2294 
2296 // Kronecker Log Likelihood Maximization
2297 void TKronMaxLL::SetPerm(const char& PermId) {
2298  if (PermId == 'o') KronLL.SetOrderPerm();
2299  else if (PermId == 'd') KronLL.SetDegPerm();
2300  else if (PermId == 'r') KronLL.SetRndPerm();
2301  else FailR("Unknown permutation type (o,d,r)");
2302 }
2303 
2304 /* void TKronMaxLL::EvalNewEdgeP(const TKronMtx& ProbMtx) {
2305  ProbMtx.Dump("\n***EVAL:");
2306  for (int i = 0; i < ProbMtx.Len(); i++) {
2307  // parameters are out of bounds
2308  if (ProbMtx.At(i) < 0.05 || ProbMtx.At(i) > 0.95) {
2309  TFltV MxDerivV(ProbMtx.Len()); MxDerivV.PutAll(1e5);
2310  FEvalH.AddDat(ProbMtx, TFEval(-1e10, MxDerivV));
2311  return;
2312  }
2313  }
2314  double AvgLL;
2315  TFltV GradV, SDevV;
2316  KronLL.InitLL(ProbMtx); // set current point
2317  //KronLL.SampleGradient(WarmUp, NSamples, AvgLL, GradV, SDevV, true); //sample the gradient
2318  KronLL.SampleGradient(WarmUp, NSamples, AvgLL, GradV);
2319  FEvalH.AddDat(ProbMtx, TFEval(AvgLL, GradV));
2320 }
2321 
2322 double TKronMaxLL::GetLL(const TFltV& ThetaV) {
2323  TKronMtx ProbMtx; RoundTheta(ThetaV, ProbMtx);
2324  if (! FEvalH.IsKey(ProbMtx)) {
2325  EvalNewEdgeP(ProbMtx);
2326  }
2327  return FEvalH.GetDat(ProbMtx).LogLike;
2328 }
2329 
2330 void TKronMaxLL::GetDLL(const TFltV& ThetaV, TFltV& GradV) {
2331  TKronMtx ProbMtx; RoundTheta(ThetaV, ProbMtx);
2332  if (! FEvalH.IsKey(ProbMtx)) {
2333  EvalNewEdgeP(ProbMtx);
2334  }
2335  GradV = FEvalH.GetDat(ProbMtx).GradV;
2336 }
2337 
2338 double TKronMaxLL::GetDLL(const TFltV& ThetaV, const int& ParamId) {
2339  TKronMtx ProbMtx; RoundTheta(ThetaV, ProbMtx);
2340  if (! FEvalH.IsKey(ProbMtx)) {
2341  EvalNewEdgeP(ProbMtx);
2342  }
2343  return FEvalH.GetDat(ProbMtx).GradV[ParamId];
2344 }
2345 void TKronMaxLL::MaximizeLL(const int& NWarmUp, const int& Samples) {
2346  WarmUp = NWarmUp;
2347  NSamples = Samples;
2348  TConjGrad<TFunc> ConjGrad(KronLL.GetProbMtx().GetMtx(), TFunc(this));
2349  //TConjGrad<TLogBarFunc> ConjGrad(KronLL.GetEdgeP().GetV(), TLogBarFunc(this, 0.1));
2350  ConjGrad.ConjGradMin(0.1);
2351 }*/
2352 
2353 // round to 3 decimal places
2354 void TKronMaxLL::RoundTheta(const TFltV& ThetaV, TFltV& NewThetaV) {
2355  NewThetaV.Gen(ThetaV.Len());
2356  for (int i = 0; i < ThetaV.Len(); i++) {
2357  NewThetaV[i] = TMath::Round(ThetaV[i], 3); }
2358 }
2359 
2360 // round to 3 decimal places
2361 void TKronMaxLL::RoundTheta(const TFltV& ThetaV, TKronMtx& Kronecker) {
2362  Kronecker.GenMtx((int)sqrt((double)ThetaV.Len()));
2363  for (int i = 0; i < ThetaV.Len(); i++) {
2364  Kronecker.At(i) = TMath::Round(ThetaV[i], 3); }
2365 }
2366 
2369  TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.7; 0.6 0.5");
2370  PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 8, true, 1);
2371 
2372  TKronMaxLL KronMaxLL(Graph, TFltV::GetV(0.9, 0.7, 0.5, 0.3));
2373  KronMaxLL.SetPerm('d');
2374  //KronMaxLL.MaximizeLL(10000, 50000);
2375  /*TKroneckerLL KronLL(Graph, *TKronMtx::GetMtx("0.9 0.7; 0.5 0.3"));
2376  KronLL.SetDegPerm();
2377  KronLL.GradDescent(0.005/double(Graph->GetNodes()));*/
2378 }
2379 
2381 // Kronecker Phase Plot
2382 /*
2383 void TKronPhasePlot::SaveMatlab(const TStr& OutFNm) const {
2384  FILE *F = fopen(OutFNm.CStr(), "wt");
2385  fprintf(F, "#Take last graph stats\n");
2386  fprintf(F, "#i\tAlpha\tBeta\tNodes\tNonZNodes\tEdges\tWccNodes\tWccEdges\tDiam\tEffDiam\tWccDiam\tWccEffDiam\t1StEigVal\tMxEigVal\n");
2387  for (int i = 0 ; i < PhaseV.Len(); i++) {
2388  const TPhasePoint& Point = PhaseV[i];
2389  const TGrowthStat& GrowthStat = Point.GrowthStat;
2390  const PGraphStat& FirstGrowth = GrowthStat[0];
2391  const PGraphStat& LastGrowth = GrowthStat.Last();
2392  fprintf(F, "%d\t%g\t%g\t", i, Point.Alpha, Point.Beta);
2393  fprintf(F, "%d\t%d\t%d\t", LastGrowth->Nodes, LastGrowth->Edges, LastGrowth->NonZNodes);
2394  fprintf(F, "%d\t%d\t", LastGrowth->WccNodes, LastGrowth->WccEdges);
2395  fprintf(F, "%f\t%f\t%f\t%f\t", LastGrowth->FullDiam, LastGrowth->EffDiam, LastGrowth->FullWccDiam, LastGrowth->EffWccDiam);
2396  //fprintf(F, "%f\t%f", FirstGrowth.MxEigVal, LastGrowth.MxEigVal);
2397  fprintf(F, "\n");
2398  }
2399  fclose(F);
2400 }
2401 
2402 void TKronPhasePlot::KroneckerPhase(const TStr& MtxId, const int& MxIter,
2403  const double& MnAlpha, const double& MxAlpha, const double& AlphaStep,
2404  const double& MnBeta, const double& MxBeta, const double& BetaStep,
2405  const TStr& FNmPref) {
2406  TKronPhasePlot PhasePlot;
2407  TExeTm KronTm;
2408  int AlphaCnt=0, BetaCnt=0;
2409  for (double Alpha = MnAlpha; (Alpha-1e-6) <= MxAlpha; Alpha += AlphaStep) {
2410  AlphaCnt++; BetaCnt = 0;
2411  printf("\n\n****A:%g***********************************************************************", Alpha);
2412  for (double Beta = MnBeta; (Beta-1e-6) <= MxBeta; Beta += BetaStep) {
2413  printf("\n\n==A[%d]:%g====B[%d]:%g=====================================================\n", AlphaCnt, Alpha, BetaCnt, Beta);
2414  BetaCnt++;
2415  TGrowthStat GrowthStat;
2416  PNGraph Graph;
2417  // run Kronecker
2418  TFullRectMtx SeedMtx = TFullRectMtx::GetMtxFromNm(MtxId);
2419  SeedMtx.Epsilonize(Alpha, Beta);
2420  for (int iter = 1; iter < MxIter + 1; iter++) {
2421  printf("%2d] at %s\n", iter, TExeTm::GetCurTm().CStr());
2422  Graph = PNGraph(); KronTm.Tick();
2423  Graph = SeedMtx.GenRMatKronecker(iter, false, 0);
2424  GrowthStat.Add(Graph, TNodeTm(iter));
2425  if (KronTm.GetSecs() > 30 * 60) {
2426  printf("*** TIME LIMIT [%s]\n", KronTm.GetTmStr().CStr()); break; }
2427  }
2428  const TStr Desc = TStr::Fmt("%s. Alpha:%g. Beta:%g", MtxId.CStr(), Alpha, Beta);
2429  const TStr FNmPref1 = TStr::Fmt("%s.a%02d.b%02d", FNmPref.CStr(), AlphaCnt, BetaCnt);
2430  TGPlot::PlotDegDist(Graph, FNmPref1, Desc, false, true, true);
2431  TGPlot::PlotWccDist(Graph, FNmPref1, Desc);
2432  TGPlot::PlotSccDist(Graph, FNmPref1, Desc);
2433  GrowthStat.PlotAll(FNmPref1, Desc);
2434  GrowthStat.SaveTxt(FNmPref1, Desc);
2435  PhasePlot.PhaseV.Add(TPhasePoint(Alpha, Beta, GrowthStat));
2436  }
2437  {TFOut FOut(TStr::Fmt("phase.%s.bin", FNmPref.CStr()));
2438  PhasePlot.Save(FOut); }
2439  }
2440 }
2441 */
2442 /*void TKroneckerLL::SetRndThetas() {
2443  ProbMtx.Dump("TRUE parameters");
2444  TFltV RndV;
2445  double SumRnd = 0.0;
2446  for (int i = 0; i < ProbMtx.Len(); i++) {
2447  RndV.Add(0.1+TKronMtx::Rnd.GetUniDev());
2448  SumRnd += RndV.Last();
2449  }
2450  RndV.Sort(false);
2451  for (int i = 0; i < ProbMtx.Len(); i++) { ProbMtx.At(i) = RndV[i]; }
2452  ProbMtx.Dump("Random parameters");
2453  const double EdgePSum = pow(Graph->GetEdges(), 1.0/KronIters);
2454  bool Repeat = true;
2455  while (Repeat) {
2456  const double Scale = EdgePSum / SumRnd;
2457  Repeat=false; SumRnd = 0.0;
2458  for (int i = 0; i < ProbMtx.Len(); i++) {
2459  ProbMtx.At(i) = ProbMtx.At(i)*Scale;
2460  if (ProbMtx.At(i) > 0.95) { ProbMtx.At(i)=0.95; Repeat=true; }
2461  SumRnd += ProbMtx.At(i);
2462  }
2463  }
2464  ProbMtx.Dump("INIT parameters");
2465  ProbMtx.GetLLMtx(LLMtx);
2466 }*/
2467 
2468 /*
2469 void TKroneckerLL::TestLL() {
2470  TExeTm ExeTm;
2471  // approximation to empty graph log-likelihood
2472  */
2473  /*{ PNGraph Graph = TNGraph::New();
2474  for (uint i = 0; i < TMath::Pow2(4); i++) { Graph->AddNode(i); } //8k nodes
2475  PKronecker KronParam = TKronMtx::GetMtx("0.8 0.6; 0.7 0.3");
2476  TKroneckerLL KronLL(Graph, KronParam);
2477  printf("\nNodes: %d\n", KronLL.GetNodes());
2478  printf("Full Graph LL: %f\n", KronLL.GetFullGraphLL());
2479  printf("Empty Graph Exact LL: %f\n", KronLL.GetEmptyGraphLL());
2480  printf("Empty Approx x=log(1-x) LL: %f\n", KronLL.GetApxEmptyGraphLL());
2481  printf("Empty Sample LL (100/node): %f\n", KronLL.GetSampleEmptyGraphLL(Graph->GetNodes() * 100));
2482  KronLL.SetOrderPerm();
2483  printf("\nEdge prob: %f, LL: %f\n", KronParam->GetEdgeProb(0,0,8), log(KronParam->GetEdgeProb(0,0,8)));
2484  printf("No Edge prob: %f, LL: %f\n", KronParam->GetNoEdgeProb(0,0,8), log(KronParam->GetNoEdgeProb(0,0,8)));
2485  printf("Empty Graph LL: %f\n", KronLL.CalcGraphLL());
2486  printf("Apx Empty Graph LL: %f\n", KronLL.CalcApxGraphLL());
2487  Graph->AddEdge(0, 0);
2488  printf("add 1 edge. LL: %f\n", KronLL.CalcGraphLL());
2489  printf("Apx add 1 edge. LL: %f\n", KronLL.CalcApxGraphLL()); }
2490  */
2491 
2492  // log-likelihood versus different Kronecker parameters
2493  /*{ PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2");
2494  PNGraph Graph = TKronMtx::GenKronecker(KronParam, 10, true, 10);
2495  TVec<PKronecker> ParamV;
2496  ParamV.Add(KronParam);
2497  ParamV.Add(TKronMtx::GetMtx("0.6 0.6; 0.6 0.5")); // sum = 2.3
2498  //ParamV.Add(TKronMtx::GetMtx("0.9 0.9; 0.4 0.1")); // sum = 2.3
2499  //ParamV.Add(TKronMtx::GetMtx("0.8 0.7; 0.6 0.2")); // sum = 2.3
2500  ParamV.Add(TKronMtx::GetMtx("0.9 0.9; 0.6 0.2")); // sum = 2.6
2501  for (int i = 0; i < ParamV.Len(); i++) {
2502  ParamV[i]->Dump();
2503  TKroneckerLL KronLL(Graph, ParamV[i]);
2504  for (int k = 0; k < 3; k++) {
2505  if (k==0) { KronLL.SetOrderPerm(); printf("Order permutation:\n"); }
2506  if (k==1) { KronLL.SetDegPerm(); printf("Degree permutation:\n"); }
2507  if (k==2) { KronLL.SetRndPerm(); printf("Random permutation:\n"); }
2508  const double LL = KronLL.CalcGraphLL(), aLL = KronLL.CalcApxGraphLL();
2509  printf(" Exact Graph LL: %f\n", LL);
2510  printf(" Approx Graph LL: %f\n", aLL);
2511  printf(" error : %.12f\n", -fabs(LL-aLL)/LL);
2512  }
2513  } }
2514  */
2515  // exact vs. approximate log-likelihood
2516  /*{ PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2");
2517  PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 16, true, 0);
2518  TKroneckerLL KronLL(Graph, KronParam);
2519  TMom ExactLL, ApxLL;
2520  printf("Random permutation:\n");
2521  for (int i = 0; i < 100; i++) {
2522  KronLL.SetRndPerm();
2523  //ExactLL.Add(KronLL.CalcGraphLL());
2524  ApxLL.Add(KronLL.CalcApxGraphLL());
2525  //printf(" Exact Graph LL: %f\n", ExactLL.GetVal(ExactLL.GetVals()-1));
2526  printf(" Approx Graph LL: %f\n", ApxLL.GetVal(ApxLL.GetVals()-1));
2527  }
2528  ExactLL.Def(); ApxLL.Def();
2529  //printf("EXACT: %f (%f)\n", ExactLL.GetMean(), ExactLL.GetSDev());
2530  printf("APPROX: %f (%f)\n", ApxLL.GetMean(), ApxLL.GetSDev());
2531  KronLL.SetOrderPerm();
2532  printf("Order permutation:\n");
2533  printf(" Exact Graph LL: %f\n", KronLL.CalcGraphLL());
2534  printf(" Approx Graph LL: %f\n", KronLL.CalcApxGraphLL());
2535  }
2536  */
2537 
2538  // start from random permultation and sort it using bubble sort
2539  // compare the end result with ordered permutation
2540  //PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2");
2541  //PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 10, true, 1);
2542  /*PKronecker KronParam = TKronMtx::GetMtx("0.9 0.7; 0.9 0.5");
2543  PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 6, true, 2);
2544  TGAlg::SaveFullMtx(Graph, "kron32.tab");
2545 
2546  TKroneckerLL KronLL(Graph, KronParam);
2547  KronLL.SetOrderPerm();
2548  KronLL.LogLike = KronLL.CalcApxGraphLL();
2549  printf(" Approx Graph LL: %f\n", KronLL.CalcApxGraphLL());
2550  printf(" swap 1-20: %f\n", KronLL.SwapNodesLL(1, 20));
2551  printf(" swap 20-30: %f\n", KronLL.SwapNodesLL(20, 30));
2552  printf(" swap 30-1: %f\n", KronLL.SwapNodesLL(1, 30));
2553  printf(" swap 20-30: %f\n", KronLL.SwapNodesLL(30, 20));
2554  IAssert(KronLL.GetPerm().IsSorted());
2555  KronLL.SetRndPerm();
2556  KronLL.LogLike = KronLL.CalcApxGraphLL();
2557  for (int i = 0; i < 1000000; i++) {
2558  const int nid1 = TInt::Rnd.GetUniDevInt(KronLL.Nodes);
2559  const int nid2 = TInt::Rnd.GetUniDevInt(KronLL.Nodes);
2560  printf("%3d] swap LL: %f\n", i, KronLL.SwapNodesLL(nid1, nid2));
2561  }
2562  printf("*** approx LL: %f\n", KronLL.CalcApxGraphLL());
2563  printf("*** exact LL: %f\n", KronLL.CalcGraphLL());
2564  */
2565  /*ExeTm.Tick();
2566  // bubble sort
2567  for (int i = 0; i < Graph->GetNodes()-1; i++) {
2568  for (int j = 1; j < Graph->GetNodes(); j++) {
2569  if (KronLL.GetPerm()[j-1] > KronLL.GetPerm()[j]) {
2570  const double oldLL = KronLL.GetLL();
2571  const double newLL = KronLL.SwapNodesLL(j-1, j);
2572  //const double trueLL = KronLL.CalcApxGraphLL();
2573  //printf("swap %3d - %3d: old: %f new: %f true:%f\n",
2574  // KronLL.GetPerm()[j-1], KronLL.GetPerm()[j], oldLL, newLL, trueLL);
2575  }
2576  }
2577  }
2578  //for (int i = 0; i < 100000; i++) {
2579  // KronLL.SwapNodesLL(TInt::Rnd.GetUniDevInt(TMath::Pow2(16)), TInt::Rnd.GetUniDevInt(TMath::Pow2(16))); }
2580  printf("\nPermutation is %s\n", KronLL.GetPerm().IsSorted()? "SORTED" : "NOT SORTED");
2581  printf(" Swap Graph LL: %f\n", KronLL.GetLL());
2582  printf(" Approx Graph LL: %f\n", KronLL.CalcApxGraphLL());
2583  KronLL.SetOrderPerm();
2584  printf(" Order Graph LL: %f\n\n", KronLL.CalcApxGraphLL());
2585  printf("Permutation is %s\n", KronLL.GetPerm().IsSorted()? "SORTED" : "NOT SORTED");
2586  printf("time: %f\n", ExeTm.GetSecs());
2587  */
2588  // evaluate the derivatives
2589  /*{ PNGraph Graph = TNGraph::New();
2590  TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.4; 0.4 0.2");
2591  //for (uint i = 0; i < TMath::Pow2(4); i++) { Graph->AddNode(i); } //8k nodes
2592  Graph = TKronMtx::GenFastKronecker(KronParam, 8, true, 2); //TGAlg::SaveFullMtx(Graph, "kron16.txt");
2593  TKroneckerLL KronLL(Graph, KronParam);
2594  KronLL.SetOrderPerm();
2595  printf("\nNodes: %d\n", KronLL.GetNodes());
2596  printf("Full Graph Exact LL: %f\n", KronLL.GetFullGraphLL());
2597  printf("Empty Graph Exact LL: %f\n", KronLL.GetEmptyGraphLL());
2598  printf("Empty Approx LL: %f\n", KronLL.GetApxEmptyGraphLL());
2599  printf("Exact Graph LL: %f\n", KronLL.CalcGraphLL());
2600  printf("Apx Graph LL: %f\n\n", KronLL.CalcApxGraphLL());
2601  // derivatives
2602  printf("Empty graph Exact DLL: %f\n", KronLL.GetEmptyGraphDLL(0));
2603  printf("Empty graph Apx DLL: %f\n", KronLL.GetApxEmptyGraphDLL(0));
2604  printf("Theta0 edge(0,1) DLL: %f\n", KronLL.LLMtx.GetEdgeDLL(0, 0, 1, 4));
2605  printf("Theta0 NO edge(0,1) DLL: %f\n", KronLL.LLMtx.GetNoEdgeDLL(0, 0, 1, 4));
2606  printf("Theta0 NO edge(0,1) DLL: %f\n", KronLL.LLMtx.GetApxNoEdgeDLL(0, 0, 1, 4));
2607  KronLL.CalcGraphDLL(); printf("Exact Theta0 DLL:"); KronLL.GetDLL(0);
2608  KronLL.CalcApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0);
2609  KronLL.CalcFullApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0);
2610  // swap
2611  */
2612  /*for (int i = 0; i < 100; i++) {
2613  const int A = TInt::Rnd.GetUniDevInt(KronLL.Nodes);
2614  KronLL.NodePerm.Swap(i, A);
2615  //KronLL.UpdateGraphDLL(i, A); printf("Fast Theta0 DLL:"); KronLL.GetDLL(0);
2616  KronLL.CalcApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0);
2617  //KronLL.CalcFullApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0);
2618  //KronLL.CalcGraphDLL(); printf("Exact Theta0 DLL:"); KronLL.GetDLL(0);
2619  printf("\n");
2620  } */
2621  //}
2622 //}
2623 
2624 /*void TKroneckerLL::SampleGradient(const int& WarmUp, const int& NSamples, double& AvgLL, TFltV& AvgGradV, TFltV& SDevV, const bool& Plot) {
2625  printf("Samples: %s (warm-up: %s)\n", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr());
2626  int NId1 = 0, NId2 = 0;
2627  TExeTm ExeTm;
2628  CalcApxGraphLL();
2629  for (int s = 0; s < WarmUp; s++) {
2630  SampleNextPerm(NId1, NId2); }
2631  printf(" warm-up:%s", ExeTm.GetTmStr()); ExeTm.Tick();
2632  CalcApxGraphLL();
2633  CalcApxGraphDLL();
2634  AvgLL = 0;
2635  TVec<TMom> DLLMomV(LLMtx.Len());
2636  for (int s = 0; s < NSamples; s++) {
2637  if (SampleNextPerm(NId1, NId2)) { // new permutation
2638  UpdateGraphDLL(NId1, NId2);
2639  }
2640  AvgLL += GetLL();
2641  for (int m = 0; m < LLMtx.Len(); m++) { DLLMomV[m].Add(GradV[m]); }
2642  }
2643  AvgLL = AvgLL / (NSamples*Nodes);
2644  // plot gradients over sampling time
2645  if (Plot) {
2646  TVec<TFltV> FltVV(LLMtx.Len()+1);
2647  for (int s = 0; s < DLLMomV[0].GetVals(); s += 1000) {
2648  for (int m = 0; m < LLMtx.Len(); m++) { FltVV[m].Add(DLLMomV[m].GetVal(s)); }
2649  FltVV.Last().Add(s); }
2650  const TStr FNm = TFile::GetUniqueFNm(TStr::Fmt("grad%dW%sS%s-#.png", KronIters, TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr()));
2651  TGnuPlot GP(FNm.GetFMid(), TStr::Fmt("Gradient vs. Sample Index. Nodes: %d, WarmUp: %s, Samples: %s, Avg LL: %f", Nodes,
2652  TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr(), AvgLL), true);
2653  for (int m = 0; m < LLMtx.Len(); m++) {
2654  GP.AddPlot(FltVV.Last(), FltVV[m], gpwLines, TStr::Fmt("Grad %d", m+1), "linewidth 5"); }
2655  GP.SetXYLabel("Sample Index (time)", "Log-likelihood gradient");
2656  GP.SavePng();
2657  }
2658  // average gradients
2659  printf(" sampling:%s\n", ExeTm.GetTmStr());
2660  printf(" AverageLL: %f\n", AvgLL);
2661  printf("Gradients:\n");
2662  AvgGradV.Gen(LLMtx.Len());
2663  SDevV.Gen(LLMtx.Len());
2664  for (int m = 0; m < LLMtx.Len(); m++) {
2665  DLLMomV[m].Def();
2666  AvgGradV[m] = DLLMomV[m].GetMean() / (Nodes*Nodes);
2667  SDevV[m] = DLLMomV[m].GetSDev() / (Nodes*Nodes);
2668  printf(" %d] mean: %16f sDev: %16f\n", m, AvgGradV[m], SDevV[m]);
2669  }
2670 }
2671 
2672 void TKronMaxLL::TFunc::FDeriv(const TFltV& Point, TFltV& GradV) {
2673  CallBack->GetDLL(Point, GradV);
2674  for (int i = 0; i < GradV.Len(); i++) { GradV[i] = -GradV[i]; }
2675 }
2676 
2677 double TKronMaxLL::TLogBarFunc::FVal(const TFltV& Point) {
2678  // log-likelihood
2679  const double LogLL = CallBack->GetLL(Point);
2680  // log-barrier
2681  const double MinBarrier = 0.05;
2682  const double MaxBarrier = 0.95;
2683  const double T1 = 1.0/T;
2684  double Barrier = 0.0;
2685  for (int i = 0; i < Point.Len(); i++) {
2686  if(Point[i].Val > MinBarrier && Point[i].Val < MaxBarrier) {
2687  Barrier += - T1 * (log(Point[i]-MinBarrier) + log(MaxBarrier-Point[i])); //log-barrier
2688  } else { Barrier = 1e5; }
2689  }
2690  IAssert(Barrier > 0.0);
2691  printf("barrrier: %f\n", Barrier);
2692  return -LogLL + Barrier; // minus LL since we want to maximize it
2693 }
2694 
2695 void TKronMaxLL::TLogBarFunc::FDeriv(const TFltV& Point, TFltV& DerivV) {
2696  // derivative of log-likelihood
2697  CallBack->GetDLL(Point, DerivV);
2698  // derivative of log barrier
2699  const double MinBarrier = 0.05;
2700  const double MaxBarrier = 0.95;
2701  const double T1 = 1.0/T;
2702  for (int i = 0; i < Point.Len(); i++) {
2703  DerivV[i] = - DerivV[i] + (- T1*(1.0/(Point[i]-MinBarrier) - 1.0/(MaxBarrier-Point[i])));
2704  }
2705 }
2706 */
#define IAssert(Cond)
Definition: bd.h:262
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
void SetRndMtx(const int &MtxDim, const double &MinProb=0.0)
Definition: kronecker.cpp:40
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
TKronMtx & operator=(const TKronMtx &Kronecker)
Definition: kronecker.cpp:25
static int RemoveNodeNoise(PNGraph &Graph, const int &NNodes, const bool Random=true)
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.cpp:2204
static PNGraph GenKronecker(const TKronMtx &SeedMtx, const int &NIter, const bool &IsDir, const int &Seed=0)
Definition: kronecker.cpp:312
static PKroneckerLL New()
Definition: kronecker.h:154
double GetEdgeProb(int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:161
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
#define IAssertR(Cond, Reason)
Definition: bd.h:265
static TRnd Rnd
Definition: kronecker.h:14
TPair< TFlt, TInt > TFltIntPr
Definition: ds.h:97
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:479
int GetParams() const
Definition: kronecker.h:164
int GetEdges(const int &NIter) const
Definition: kronecker.cpp:123
static void TestBicCriterion(const TStr &OutFNm, const TStr &Desc1, const PNGraph &G, const int &GradIters, double LearnRate, const int &WarmUp, const int &NSamples, const int &TrueN0)
Definition: kronecker.cpp:2109
Definition: tm.h:355
static TStr GetMegaStr(const int &Val)
Definition: dt.h:1133
double GetMtxSum() const
Definition: kronecker.cpp:140
static void KronMul(const TKronMtx &LeftPt, const TKronMtx &RightPt, TKronMtx &OutMtx)
Definition: kronecker.cpp:591
TFltV TestSamplePerm(const TStr &OutFNm, const int &WarmUp, const int &NSamples, const TKronMtx &TrueMtx, const bool &DoPlot=true)
Definition: kronecker.cpp:1795
static TKronMtx LoadTxt(const TStr &MtxFNm)
Definition: kronecker.cpp:768
static PSs LoadTxt(const TSsFmt &SsFmt, const TStr &FNm, const PNotify &Notify=NULL, const bool &IsExcelEoln=true, const int &MxY=-1, const TIntV &AllowedColNV=TIntV(), const bool &IsQStr=true)
Definition: ss.cpp:100
TFltV & GetMtx()
Definition: kronecker.h:35
static bool IsNum(const char &Ch)
Definition: dt.h:974
Definition: dt.h:11
void SetRandomEdges(const int &NEdges, const bool isDir=true)
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.cpp:1417
Definition: ds.h:129
void Del(const TSizeTy &ValN)
Removes the element at position ValN.
Definition: ds.h:1130
int GetKronIter(const int &Nodes) const
Definition: kronecker.cpp:127
TKronEMType
Definition: kronecker.h:114
void SavePng(const int &SizeX=1000, const int &SizeY=800, const TStr &Comment=TStr())
Definition: gnuplot.h:120
PGraph GetMxWcc(const PGraph &Graph)
Returns a graph representing the largest weakly connected component on an input Graph.
Definition: cncom.h:452
static const T & Mx(const T &LVal, const T &RVal)
Definition: xmath.h:32
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:425
void RunEStep(const int &GibbsWarmUp, const int &WarmUp, const int &NSamples, TFltV &LLV, TVec< TFltV > &DLLV)
Definition: kronecker.cpp:1567
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:483
void UpdateGraphDLL(const int &SwapNId1, const int &SwapNId2)
Definition: kronecker.cpp:1241
unsigned int uint
Definition: bd.h:11
static double CalcChainR2(const TVec< TFltV > &ChainLLV)
Definition: kronecker.cpp:1895
void SetOrderPerm()
Definition: kronecker.cpp:813
double GetApxNoEdgeLL(int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:191
TEdgeI EndEI() const
Returns an iterator referring to the past-the-end edge in the graph.
Definition: graph.h:516
Definition: gstat.h:28
double GetFullGraphLL() const
Definition: kronecker.cpp:943
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:313
bool IsEdgePlace(int NId1, int NId2, const int &NKronIters, const double &ProbTresh) const
Definition: kronecker.cpp:196
Definition: bits.h:119
void Swap(TKronMtx &KronMtx)
Definition: kronecker.cpp:114
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int Len() const
Definition: kronecker.h:31
void GetMinMax(const TFltPrV &XYValV, double &Min, double &Max, const bool &ResetMinMax)
Definition: kronecker.cpp:1732
void Dump(const TStr &MtxNm=TStr(), const bool &Sort=false) const
Definition: kronecker.cpp:636
TEdgeI BegEI() const
Returns an iterator referring to the first edge in the graph.
Definition: graph.h:514
double GetAnfEffDiam(const PGraph &Graph, const bool &IsDir, const double &Percentile, const int &NApprox)
Definition: anf.h:217
double NodeDLLDelta(const int ParamId, const int &NId) const
Definition: kronecker.cpp:1214
double GetFullRowLL(int RowId) const
Definition: kronecker.cpp:956
void SetPerm(const char &PermId)
Definition: kronecker.cpp:2297
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
double GetEdgeLL(int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:175
void SetXYLabel(const TStr &XLabel, const TStr &YLabel)
Definition: gnuplot.h:73
double GetEmptyGraphDLL(const int &ParamId) const
Definition: kronecker.cpp:1136
TVec< TFltPr > TFltPrV
Definition: ds.h:1539
double GetEmptyGraphLL() const
Definition: kronecker.cpp:976
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:236
static const double NInf
Definition: kronecker.h:13
TVal1 Val1
Definition: ds.h:131
static uint GetNodeSig(const double &OneProb=0.5)
Definition: kronecker.cpp:262
bool SampleNextPerm(int &NId1, int &NId2)
Definition: kronecker.cpp:1111
static TKronMtx GetInitMtx(const int &Dim, const int &Nodes, const int &Edges)
Definition: kronecker.cpp:705
TStr GetMtxStr() const
Definition: kronecker.cpp:80
void RunKronEM(const int &EMIter, const int &GradIter, double LrnRate, double MnStep, double MxStep, const int &GibbsWarmUp, const int &WarmUp, const int &NSamples, const TKronEMType &Type=kronNodeMiss, const int &NMissing=-1)
Definition: kronecker.cpp:1692
static double Sqr(const double &x)
Definition: xmath.h:12
void SetIPerm(const TIntV &Perm)
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.cpp:875
void SetYRange(const double &Min, const double &Max)
Definition: gnuplot.h:80
void SetForEdges(const int &Nodes, const int &Edges)
Definition: kronecker.cpp:59
TKroneckerLL KronLL
Definition: kronecker.h:281
TVec< TIntTr > TIntTrV
Definition: ds.h:1537
static void KronPwr(const TKronMtx &KronPt, const int &NIter, TKronMtx &OutMtx)
Definition: kronecker.cpp:626
static const double Mx
Definition: dt.h:1298
TFlt LogLike
Definition: kronecker.h:135
int ChangeChAll(const char &SrcCh, const char &DstCh)
Definition: dt.cpp:1113
TFlt PermSwapNodeProb
Definition: kronecker.h:123
const double & At(const int &Row, const int &Col) const
Definition: kronecker.h:46
void SetEpsMtx(const double &Eps1, const double &Eps0, const int &Eps1Val=1, const int &Eps0Val=0)
Definition: kronecker.cpp:50
double CalcGraphLL()
Definition: kronecker.cpp:1022
double GetEdgeDLL(const int &ParamId, int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:207
Definition: dt.h:1293
void RestoreGraph(const bool RestoreNodes=true)
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.cpp:923
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:542
Definition: gstat.h:29
static void TestGradDescent(const int &KronIters, const int &KiloSamples, const TStr &Permutation)
Definition: kronecker.cpp:2138
Definition: gstat.h:28
bool IsProbMtx() const
Definition: kronecker.cpp:33
bool IsObsEdge(const int &NId1, const int &NId2) const
Definition: kronecker.h:173
TIntV NodePerm
Definition: kronecker.h:128
TInt MtxDim
Definition: kronecker.h:16
Graph Statistics Sequence.
Definition: gstat.h:155
double CalcApxGraphLL()
Definition: kronecker.cpp:1037
void DelNode(const int &NId)
Deletes node of ID NId from the graph.
Definition: graph.cpp:294
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1047
void GetProbMtx(TKronMtx &ProbMtx)
Definition: kronecker.cpp:106
const char * GetTmStr() const
Definition: tm.h:370
void SetBestDegPerm()
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.cpp:842
void DelZeroDegNodes(PGraph &Graph)
Removes all the zero-degree nodes, that isolated nodes, from the graph.
Definition: alg.h:432
TVal2 Val2
Definition: ds.h:132
void SetDegPerm()
Definition: kronecker.cpp:828
TVec< TKronMtx > MtxV
Definition: kronecker.h:143
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:971
void SetPerm(const char &PermId)
Definition: kronecker.cpp:805
static int FlipEdgeNoise(PNGraph &Graph, const int &NEdges, const bool Random=true)
Definition: kronecker.cpp:2229
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1254
double GetApxEmptyGraphLL() const
Definition: kronecker.cpp:987
Edge iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:386
static void PutRndSeed(const int &Seed)
Definition: kronecker.h:108
double GetApxNoEdgeDLL(const int &ParamId, int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:240
static TKronMtx GetMtxFromNm(const TStr &MtxNm)
Definition: kronecker.cpp:753
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node IDs SrcNId to node DstNId to the graph.
Definition: graph.cpp:321
void SetRndPerm()
Definition: kronecker.cpp:820
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1166
void PlotGrad(const TFltPrV &EstLLV, const TFltPrV &TrueLLV, const TVec< TFltPrV > &GradVV, const TFltPrV &AcceptV, const TStr &OutFNm, const TStr &Desc)
Definition: kronecker.cpp:1740
void SampleGradient(const int &WarmUp, const int &NSamples, double &AvgLL, TFltV &GradV)
Definition: kronecker.cpp:1271
PNGraph GenRndGraph(const double &RndFact=1.0) const
Definition: kronecker.cpp:295
double NodeLLDelta(const int &NId) const
Definition: kronecker.cpp:1055
void DelEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true)
Deletes an edge from node IDs SrcNId to DstNId from the graph.
Definition: graph.cpp:345
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the graph.
Definition: graph.cpp:363
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
double GetApxEmptyGraphDLL(const int &ParamId) const
Definition: kronecker.cpp:1147
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.h:116
void PlotAutoCorrelation(const TFltV &ValV, const int &MaxK, const TStr &OutFNm, const TStr &Desc)
Definition: kronecker.cpp:1773
double RunMStep(const TFltV &LLV, const TVec< TFltV > &DLLV, const int &GradIter, const double &LrnRate, double MnStep, double MxStep)
Definition: kronecker.cpp:1597
static double Round(const double &Val)
Definition: xmath.h:16
int GetKronIters() const
Definition: kronecker.h:159
TBool DebugMode
Definition: kronecker.h:141
#define Assert(Cond)
Definition: bd.h:251
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:477
static double Power(const double &Base, const double &Exponent)
Definition: xmath.h:25
bool Empty() const
Definition: kronecker.h:32
TInt RealEdges
Definition: kronecker.h:132
void AddRndNoise(const double &SDev)
Definition: kronecker.cpp:69
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
static double GetAvgFroErr(const TKronMtx &Kron1, const TKronMtx &Kron2)
Definition: kronecker.cpp:668
static void Test()
Definition: kronecker.cpp:2367
Tab separated.
Definition: ss.h:6
void ToOneMinusMtx()
Definition: kronecker.cpp:91
void GradDescentConvergence(const TStr &OutFNm, const TStr &Desc1, const bool &SamplePerm, const int &NIters, double LearnRate, const int &WarmUp, const int &NSamples, const int &AvgKGraphs, const TKronMtx &TrueParam)
Definition: kronecker.cpp:2017
static void ChainGelmapRubinPlot(const TVec< TFltV > &ChainLLV, const TStr &OutFNm, const TStr &Desc)
Definition: kronecker.cpp:1924
TQuad< TFlt, TFlt, TFlt, TFlt > TFltQu
Definition: ds.h:263
Definition: tm.h:14
#define FailR(Reason)
Definition: bd.h:240
double GetFullColLL(int ColId) const
Definition: kronecker.cpp:966
void Tick()
Definition: tm.h:364
void InitLL(const TFltV &ParamV)
Definition: kronecker.cpp:996
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
Definition: graph.h:358
double GetEZero(const int &Edges, const int &KronIter) const
Definition: kronecker.cpp:136
double GetNoEdgeProb(int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:171
int GetNZeroK(const PNGraph &Graph) const
Definition: kronecker.cpp:132
void GetLLMtx(TKronMtx &LLMtx)
Definition: kronecker.cpp:98
const TFltV & CalcGraphDLL()
Definition: kronecker.cpp:1158
void MetroGibbsSampleSetup(const int &WarmUp)
Definition: kronecker.cpp:1476
const TFltV & CalcFullApxGraphDLL()
Definition: kronecker.cpp:1176
void MetroGibbsSampleNext(const int &WarmUp, const bool DLLUpdate=false)
Definition: kronecker.cpp:1503
void GetSngVals(const PNGraph &Graph, const int &SngVals, TFltV &SngValV)
Computes largest SngVals singular values of the adjacency matrix representing a directed Graph...
Definition: gsvd.cpp:175
Definition: dt.h:201
double GradDescent2(const int &NIter, const double &LrnRate, double MnStep, double MxStep, const int &WarmUp, const int &NSamples)
Definition: kronecker.cpp:1355
static PNGraph GenDetKronecker(const TKronMtx &SeedMtx, const int &NIter, const bool &IsDir)
Definition: kronecker.cpp:458
double GetNrmDev()
Definition: dt.cpp:63
Definition: tm.h:81
void GenMtx(const int &Dim)
Definition: kronecker.h:40
void AppendIsoNodes()
Definition: kronecker.cpp:914
TIntTrV GEdgeV
Definition: kronecker.h:125
Definition: gstat.h:28
double GetColSum(const int &ColId) const
Definition: kronecker.cpp:154
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:481
TInt LSelfEdge
Definition: kronecker.h:127
TIntTrV LEdgeV
Definition: kronecker.h:126
TInt KronIters
Definition: kronecker.h:121
TKronMtx ProbMtx
Definition: kronecker.h:134
void PlotTrueAndEst(const TStr &OutFNm, const TStr &Desc, const TStr &YLabel, const TFltPrV &EstV, const TFltPrV &TrueV)
Definition: kronecker.cpp:2009
TInt MissEdges
Definition: kronecker.h:139
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:362
int GetDim() const
Definition: kronecker.h:30
const TFltV & CalcApxGraphDLL()
Definition: kronecker.cpp:1194
double GetNoEdgeLL(int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:186
static TKronMtx GetRndMtx(const int &Dim, const double &MinProb)
Definition: kronecker.cpp:699
Definition: dt.h:412
Definition: ds.h:218
void SaveTxt(const TStr &OutFNm) const
Definition: kronecker.cpp:14
double SwapNodesLL(const int &NId1, const int &NId2)
Definition: kronecker.cpp:1083
bool Empty() const
Definition: dt.h:488
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TKronMtx LLMtx
Definition: kronecker.h:134
void SplitOnAllCh(const char &SplitCh, TStrV &StrV, const bool &SkipEmpty=true) const
Definition: dt.cpp:926
TInt RealNodes
Definition: kronecker.h:131
void Shuffle(TRnd &Rnd)
Randomly shuffles the elements of the vector.
Definition: ds.h:1271
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:339
PNGraph Graph
Definition: kronecker.h:120
void McMcGetAvgAvg(const TFltV &AvgJV, double &AvgAvg)
Definition: kronecker.cpp:1876
double GetSecs() const
Definition: tm.h:366
double GetUniDev()
Definition: dt.h:30
TFltV GradV
Definition: kronecker.h:136
int AddPlot(const TIntV &YValV, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const TStr &Label=TStr(), const TStr &Style=TStr())
Definition: gnuplot.cpp:186
static void KronSum(const TKronMtx &LeftPt, const TKronMtx &RightPt, TKronMtx &OutMtx)
Definition: kronecker.cpp:607
double GetRowSum(const int &RowId) const
Definition: kronecker.cpp:147
TTriple< TInt, TInt, TInt > TIntTr
Definition: ds.h:170
int GetNodes(const int &NIter) const
Definition: kronecker.cpp:119
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
PNGraph GenThreshGraph(const double &Thresh) const
Definition: kronecker.cpp:283
double GetNoEdgeDLL(const int &ParamId, int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:220
static TVec< TFlt, TSizeTy > GetV(const TFlt &Val1)
Returns a vector on element Val1.
Definition: ds.h:817
void SetGraph(const PNGraph &GraphPt)
Definition: kronecker.cpp:882
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:360
TKronEMType EMType
Definition: kronecker.h:138
TFltV SeedMtx
Definition: kronecker.h:17
char * CStr()
Definition: dt.h:476
TKronMtx()
Definition: kronecker.h:19
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:368
TFltQu TestKronDescent(const bool &DoExact, const bool &TruePerm, double LearnRate, const int &WarmUp, const int &NSamples, const TKronMtx &TrueParam)
Definition: kronecker.cpp:1942
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
void DelLast()
Removes the last element of the vector.
Definition: ds.h:635
TIntV InvertPerm
Definition: kronecker.h:129
double GetLL() const
Definition: kronecker.h:204
static double GetAvgAbsErr(const TKronMtx &Kron1, const TKronMtx &Kron2)
Definition: kronecker.cpp:655
static void PlotCmpGraphs(const TKronMtx &SeedMtx, const PNGraph &Graph, const TStr &OutFNm, const TStr &Desc)
Definition: kronecker.cpp:479
static void RoundTheta(const TFltV &ThetaV, TFltV &NewThetaV)
Definition: kronecker.cpp:2354
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
double GradDescent(const int &NIter, const double &LrnRate, double MnStep, double MxStep, const int &WarmUp, const int &NSamples)
Definition: kronecker.cpp:1299
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:372
TTriple< TFlt, TInt, TInt > TFltIntIntTr
Definition: ds.h:181
void McMcGetAvgJ(const TVec< TFltV > &ChainLLV, TFltV &AvgJV)
Definition: kronecker.cpp:1883
TVal3 Val3
Definition: ds.h:133
static int RemoveEdgeNoise(PNGraph &Graph, const int &NEdges)
Definition: kronecker.cpp:2268
static const double Mn
Definition: dt.h:1297
static PNGraph GenFastKronecker(const TKronMtx &SeedMtx, const int &NIter, const bool &IsDir, const int &Seed=0)
Definition: kronecker.cpp:349
void GetSubValV(const TSizeTy &BValN, const TSizeTy &EValN, TVec< TVal, TSizeTy > &ValV) const
Fills ValV with elements at positions BValN...EValN.
Definition: ds.h:1112