SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
agmattr.h
Go to the documentation of this file.
1 #ifndef yanglib_agmattr1_h
2 #define yanglib_agmattr1_h
3 #include "Snap.h"
4 
5 class TCesnaUtil {
6 public:
7  //static double GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges);
8  //static double GetConductance(const PNGraph& Graph, const TIntSet& CmtyS, const int Edges);
9 template<class PGraph>
10 static double GetConductance(const PGraph& Graph, const TIntSet& CmtyS, const int Edges) {
11  const bool GraphType = HasGraphFlag(typename PGraph::TObj, gfDirected);
12  int Edges2;
13  if (GraphType) { Edges2 = Edges >= 0 ? Edges : Graph->GetEdges(); }
14  else { Edges2 = Edges >= 0 ? 2 * Edges : Graph->GetEdges(); }
15  int Vol = 0, Cut = 0;
16  double Phi = 0.0;
17  for (int i = 0; i < CmtyS.Len(); i++) {
18  if (! Graph->IsNode(CmtyS[i])) { continue; }
19  typename PGraph::TObj::TNodeI NI = Graph->GetNI(CmtyS[i]);
20  for (int e = 0; e < NI.GetOutDeg(); e++) {
21  if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; }
22  }
23  Vol += NI.GetOutDeg();
24  }
25  // get conductance
26  if (Vol != Edges2) {
27  if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); }
28  else if (Vol == 0) { Phi = 0.0; }
29  else { Phi = Cut / double(Vol); }
30  } else {
31  if (Vol == Edges2) { Phi = 1.0; }
32  }
33  return Phi;
34 }
35 
36 
37 template<class PGraph>
38  static void GenHoldOutPairs(const PGraph& G, TVec<TIntSet>& HoldOutSet, double HOFrac, TRnd& Rnd) {
39  TIntPrV EdgeV(G->GetEdges(), 0);
40  for (typename PGraph::TObj::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
41  EdgeV.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
42  }
43  EdgeV.Shuffle(Rnd);
44 
45  const bool GraphType = HasGraphFlag(typename PGraph::TObj, gfDirected);
46  HoldOutSet.Gen(G->GetNodes());
47  int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
48  if (GraphType) { HOTotal *= 2;}
49  int HOCnt = 0;
50  int HOEdges = (int) TMath::Round(HOFrac * G->GetEdges());
51  printf("holding out %d edges...\n", HOEdges);
52  for (int he = 0; he < (int) HOEdges; he++) {
53  HoldOutSet[EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
54  if (! GraphType) { HoldOutSet[EdgeV[he].Val2].AddKey(EdgeV[he].Val1); }
55  HOCnt++;
56  }
57  printf("%d Edges hold out\n", HOCnt);
58  while(HOCnt++ < HOTotal) {
59  int SrcNID = Rnd.GetUniDevInt(G->GetNodes());
60  int DstNID = Rnd.GetUniDevInt(G->GetNodes());
61  if (SrcNID == DstNID) { continue; }
62  HoldOutSet[SrcNID].AddKey(DstNID);
63  if (! GraphType) { HoldOutSet[DstNID].AddKey(SrcNID); }
64  }
65  }
66 
67 template<class PGraph>
68  static void GetNbhCom(const PGraph& Graph, const int NID, TIntSet& NBCmtyS) {
69  typename PGraph::TObj::TNodeI NI = Graph->GetNI(NID);
70  NBCmtyS.Gen(NI.GetDeg());
71  NBCmtyS.AddKey(NID);
72  for (int e = 0; e < NI.GetDeg(); e++) {
73  NBCmtyS.AddKey(NI.GetNbrNId(e));
74  }
75  }
76 template<class PGraph>
77  static void GetNIdPhiV(const PGraph& G, TFltIntPrV& NIdPhiV) {
78  NIdPhiV.Gen(G->GetNodes(), 0);
79  const int Edges = G->GetEdges();
80  TExeTm RunTm;
81  //compute conductance of neighborhood community
82  for (typename PGraph::TObj::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
83  TIntSet NBCmty(NI.GetDeg() + 1);
84  double Phi;
85  if (NI.GetDeg() < 5) { //do not include nodes with too few degree
86  Phi = 1.0;
87  } else {
88  TCesnaUtil::GetNbhCom<PGraph>(G, NI.GetId(), NBCmty);
89  //if (NBCmty.Len() != NI.GetDeg() + 1) { printf("NbCom:%d, Deg:%d\n", NBCmty.Len(), NI.GetDeg()); }
90  //IAssert(NBCmty.Len() == NI.GetDeg() + 1);
91  Phi = TCesnaUtil::GetConductance(G, NBCmty, Edges);
92  }
93  //NCPhiH.AddDat(u, Phi);
94  NIdPhiV.Add(TFltIntPr(Phi, NI.GetId()));
95  }
96  printf("conductance computation completed [%s]\n", RunTm.GetTmStr());
97  fflush(stdout);
98  }
99 
100  static void LoadNIDAttrHFromNIDKH(const TIntV& NIDV, const TStr& InFNm, THash<TInt, TIntV>& NIDAttrH, const TStrHash<TInt>& NodeNameH, const TSsFmt Sep = ssfTabSep) {
101  NIDAttrH.Clr();
102  NIDAttrH.Gen(NIDV.Len());
103  printf("nodes in the graph:%d\n", NIDV.Len());
104  for (int u = 0; u < NIDV.Len(); u++) { NIDAttrH.AddDat(NIDV[u]).Gen(0, 0); }
105  TSsParser Ss(InFNm, ssfTabSep);
106  while (Ss.Next()) {
107  TStr NodeName = Ss.GetFld(0);
108  TInt NID = NodeName.GetInt();
109  if (NodeNameH.Len() > 0 && ! NodeNameH.IsKey(NodeName)) { continue; }
110  if (NodeNameH.Len() > 0) {
111  IAssertR(NodeNameH.IsKey(NodeName), TStr::Fmt("NodeName:%s", NodeName.CStr()));
112  NID = NodeNameH.GetKeyId(NodeName);
113  }
114  if (! NIDAttrH.IsKey(NID)) {
115  //printf("NodeName %s, NID %d does not exist\n", NodeName.CStr(), NID);
116  continue; } //ignore nodes who are not in the graph
117  IAssertR(! NIDAttrH.GetDat(NID).IsIn(Ss.GetInt(1)), TStr::Fmt("NIdx:%d NID:%s, K:%d", NID.Val, NodeName.CStr(), Ss.GetInt(1)));
118  NIDAttrH.GetDat(NID).Add(Ss.GetInt(1));
119  }
120  printf("%s nodes, %s lines read \n", TUInt64::GetStr(NIDAttrH.Len()).CStr(), TUInt64::GetStr(Ss.GetLineNo()).CStr());
121  //printf("%d nodes, %d lines read \n", NIDAttrH.Len(), Ss.GetLineNo());
122  }
123  static void LoadNIDAttrHFromNIDKH(const TIntV& NIDV, const TStr& InFNm, THash<TInt, TIntV>& NIDAttrH) {
124  TStrHash<TInt> TmpH;
125  LoadNIDAttrHFromNIDKH(NIDV, InFNm, NIDAttrH, TmpH);
126  }
127  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntSet>& NIDAttrH, const TStrHash<TInt>& FeatNameH, const TStrHash<TInt>& NodeNameH) {
128  FILE* F = fopen(FNm.CStr(), "wt");
129  for (int u = 0; u < NIDAttrH.Len(); u++) {
130  int NID = NIDAttrH.GetKey(u);
131  TStr NodeName = NodeNameH.IsKeyId(NID)? NodeNameH.GetKey(NID): TStr::Fmt("%d", NID);
132  for (int k = 0; k < NIDAttrH[u].Len(); k++) {
133  int KID = NIDAttrH[u][k];
134  TStr FeatName = FeatNameH.IsKeyId(KID)? FeatNameH.GetKey(KID): TStr::Fmt("%d", KID);
135  fprintf(F,"%s\t%s\n", NodeName.CStr(), FeatName.CStr());
136  }
137  }
138  fclose(F);
139  }
140  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntSet>& NIDAttrH, const TStrHash<TInt>& FeatNameH) {
141  TStrHash<TInt> TmpH;
142  DumpNIDAttrHToNIDK(FNm, NIDAttrH, FeatNameH, TmpH);
143  }
144  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntSet>& NIDAttrH) {
145  TStrHash<TInt> TmpH1, TmpH2;
146  DumpNIDAttrHToNIDK(FNm, NIDAttrH, TmpH1, TmpH2);
147  }
148  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntV>& NIDAttrH, const TStrHash<TInt>& FeatNameH, const TStrHash<TInt>& NodeNameH) {
149  FILE* F = fopen(FNm.CStr(), "wt");
150  for (int u = 0; u < NIDAttrH.Len(); u++) {
151  int NID = NIDAttrH.GetKey(u);
152  TStr NodeName = NodeNameH.IsKeyId(NID)? NodeNameH.GetKey(NID): TStr::Fmt("%d", NID);
153  for (int k = 0; k < NIDAttrH[u].Len(); k++) {
154  int KID = NIDAttrH[u][k];
155  TStr FeatName = FeatNameH.IsKeyId(KID)? FeatNameH.GetKey(KID): TStr::Fmt("%d", KID);
156  fprintf(F,"%s\t%s\n", NodeName.CStr(), FeatName.CStr());
157  }
158  }
159  fclose(F);
160  }
161  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntV>& NIDAttrH, const TStrHash<TInt>& FeatNameH) {
162  TStrHash<TInt> TmpH;
163  DumpNIDAttrHToNIDK(FNm, NIDAttrH, FeatNameH, TmpH);
164  }
165  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntV>& NIDAttrH) {
166  TStrHash<TInt> TmpH1, TmpH2;
167  DumpNIDAttrHToNIDK(FNm, NIDAttrH, TmpH1, TmpH2);
168  }
169  static int GetAttrs(const THash<TInt, TIntV>& NIDAttrH) {
170  int Attrs = 0;
171  for (int u = 0; u < NIDAttrH.Len(); u++) {
172  for (int k = 0; k < NIDAttrH[u].Len(); k++) {
173  if (NIDAttrH[u][k] >= Attrs) { Attrs = NIDAttrH[u][k] + 1; }
174  }
175  }
176  return Attrs;
177  }
178  //Metis format (N + 1) line describes the attributes of N. ID start from 1
179  static void DumpNIDAttrHToMetis(const TStr& FNm, const THash<TInt, TIntV>& NIDAttrH, const TIntV& NIDV) {
180  int AttrCnt = 0;
181  for (int u = 1; u < NIDV.Len(); u++) {
182  if (! NIDAttrH.IsKey(NIDV[u])) { continue; }
183  AttrCnt += NIDAttrH.GetDat(NIDV[u]).Len();
184  }
185  IAssert (NIDV[0] == -1);
186  FILE* F = fopen(FNm.CStr(), "wt");
187  fprintf(F, "%d %d\n", NIDV.Len() - 1, AttrCnt);
188  int TmpCnt = 0;
189  for (int u = 1; u < NIDV.Len(); u++) {
190  if (NIDAttrH.IsKey(NIDV[u])) {
191  for (int k = 0; k < NIDAttrH.GetDat(NIDV[u]).Len(); k++) {
192  if (k > 0) { fprintf(F, " "); }
193  fprintf(F, "%d", NIDAttrH.GetDat(NIDV[u])[k].Val + 1);
194  TmpCnt++;
195  }
196  }
197  fprintf(F, "\n");
198  }
199  fclose(F);
200  IAssert(AttrCnt == TmpCnt);
201 
202  }
203  static void FilterLowEntropy(const THash<TInt, TIntV>& OldNIDAttrH, THash<TInt, TIntV>& NewNIDAttrH, const TIntStrH& OldNameH, TIntStrH& NewNameH, const double MinFrac = 0.00001, const double MaxFrac = 0.95, const int MinCnt = 3) {
204  TIntH KIDCntH;
205  for (int u = 0; u < OldNIDAttrH.Len(); u++) {
206  for (int k = 0; k < OldNIDAttrH[u].Len(); k++) {
207  KIDCntH.AddDat(OldNIDAttrH[u][k])++;
208  }
209  }
210  KIDCntH.SortByDat(false);
211 
212  TIntSet SelectedK(KIDCntH.Len());
213  for (int c = 0; c < KIDCntH.Len(); c++) {
214  double Frac = (double) KIDCntH[c].Val / (double) OldNIDAttrH.Len();
215  if (KIDCntH[c].Val < MinCnt) { continue; }
216  if (Frac > MaxFrac || Frac < MinFrac) { continue; }
217  SelectedK.AddKey(KIDCntH.GetKey(c));
218  }
219  printf("%d attributes selected from %d\n", SelectedK.Len(), KIDCntH.Len());
220  NewNIDAttrH.Gen(OldNIDAttrH.Len());
221  for (int u = 0; u < OldNIDAttrH.Len(); u++) {
222  int NID = OldNIDAttrH.GetKey(u);
223  TIntV& AttrV = NewNIDAttrH.AddDat(NID);
224  for (int k = 0; k < OldNIDAttrH[u].Len(); k++) {
225  if (! SelectedK.IsKey(OldNIDAttrH[u][k])) { continue; }
226  AttrV.Add(SelectedK.GetKeyId(OldNIDAttrH[u][k]));
227  }
228  }
229 
230  if (! OldNameH.Empty()) {
231  NewNameH.Gen(SelectedK.Len());
232  for (int k = 0; k < SelectedK.Len(); k++) {
233  int OldKID = SelectedK.GetKey(k);
234  if (OldNameH.IsKey(OldKID)) {
235  NewNameH.AddDat(k, OldNameH.GetDat(OldKID));
236  }
237  }
238  printf("%d attributes names copied\n", NewNameH.Len());
239  }
240  }
241  static void FilterLowEntropy(const THash<TInt, TIntV>& OldNIDAttrH, THash<TInt, TIntV>& NewNIDAttrH, const double MinFrac = 0.00001, const double MaxFrac = 0.95, const int MinCnt = 3) {
242  TIntStrH TmpH1, TmpH2;
243  FilterLowEntropy(OldNIDAttrH, NewNIDAttrH, TmpH1, TmpH2, MinFrac, MaxFrac, MinCnt);
244  }
245 };
246 class TCesna { //CESNA: community detection in networks with node attributes
247 private:
248  PUNGraph G; //graph to fit
249  TVec<TIntSet> X; // X[u] = {k| X_uk = 1}
250  TVec<TIntFltH> F; // membership for each user (Size: Nodes * Coms)
251  TVec<TFltV> W; // weight vector for logistic regression. w_ck = W[k][c] (Column vector)
252  TInt Attrs; // number of attributes
253  TRnd Rnd; // random number generator
254  TIntSet NIDToIdx; // original node ID vector NIDToIdx[i] = Node ID for index i, NIDToIdx.GetKey(NID) = index for NID
255  TFlt RegCoef; //Regularization coefficient when we fit for P_c +: L1, -: L2
256  TFltV SumFV; // sum_u F_uc for each community c. Needed for efficient calculation
257  TInt NumComs; // number of communities
258  TVec<TIntSet> HOVIDSV; //NID pairs to hold out for cross validation
259  TVec<TIntSet> HOKIDSV; //set of attribute index (k) to hold out
260 public:
261  TFlt MinVal; // minimum value of F (0)
262  TFlt MaxVal; // maximum value of F (for numerical reason)
263  TFlt MinValW; // minimum value of W (for numerical reason)
264  TFlt MaxValW; // maximum value of W (for numerical reason)
265  TFlt NegWgt; // weight of negative example (a pair of nodes without an edge)
266  TFlt LassoCoef; // L1 regularization coefficient for W (MLE = argmax P(X|F, W) - LassoCoef * |W|)
267  TFlt WeightAttr; // likelihood = log P(G|F) + WeightAttr * log P(X|F, W)
268  TFlt PNoCom; // base probability \varepsilon (edge probability between a pair of nodes sharing no community
269  TBool DoParallel; // whether to use parallelism for computation
270 
271  TCesna() { G = TUNGraph::New(10, -1); }
272  TCesna(const PUNGraph& GraphPt, const THash<TInt, TIntV>& NIDAttrH, const int& InitComs, const int RndSeed = 0): Rnd(RndSeed), RegCoef(0),
273  MinVal(0.0), MaxVal(10.0), MinValW(-10.0), MaxValW(10.0), NegWgt(1.0), LassoCoef(1.0), WeightAttr(1.0) { SetGraph(GraphPt, NIDAttrH); NeighborComInit(InitComs); }
274  void Save(TSOut& SOut) {
275  G->Save(SOut);
276  X.Save(SOut);
277  F.Save(SOut);
278  W.Save(SOut);
279  Attrs.Save(SOut);
280  NIDToIdx.Save(SOut);
281  RegCoef.Save(SOut);
282  LassoCoef.Save(SOut);
283  SumFV.Save(SOut);
284  NumComs.Save(SOut);
285  HOVIDSV.Save(SOut);
286  HOKIDSV.Save(SOut);
287  MinVal.Save(SOut);
288  MaxVal.Save(SOut);
289  MinValW.Save(SOut);
290  MaxValW.Save(SOut);
291  NegWgt.Save(SOut);
292  PNoCom.Save(SOut);
293  }
294  void Load(TSIn& SIn, const int& RndSeed = 0) {
295  G->Load(SIn);
296  X.Load(SIn);
297  F.Load(SIn);
298  W.Load(SIn);
299  Attrs.Load(SIn);
300  NIDToIdx.Load(SIn);
301  RegCoef.Load(SIn);
302  LassoCoef.Load(SIn);
303  SumFV.Load(SIn);
304  NumComs.Load(SIn);
305  HOVIDSV.Load(SIn);
306  HOKIDSV.Load(SIn);
307  MinVal.Load(SIn);
308  MaxVal.Load(SIn);
309  MinValW.Load(SIn);
310  MaxValW.Load(SIn);
311  NegWgt.Load(SIn);
312  PNoCom.Load(SIn);
313  }
314 
315  void SetGraph(const PUNGraph& GraphPt, const THash<TInt, TIntV>& NIDAttrH);
316  void SetRegCoef(const double _RegCoef) { RegCoef = _RegCoef; }
317  double GetRegCoef() { return RegCoef; }
318  void SetWeightAttr(const double _WeightAttr) { IAssert (_WeightAttr <= 1.0 && _WeightAttr >= 0.0); WeightAttr = _WeightAttr; }
319  double GetWeightAttr() { return WeightAttr; }
320  void SetLassoCoef(const double _LassoCoef) { LassoCoef = _LassoCoef; }
321  int GetAttrs() { return Attrs; }
322  double GetComFromNID(const int& NID, const int& CID) {
323  int NIdx = NIDToIdx.GetKeyId(NID);
324  if (F[NIdx].IsKey(CID)) {
325  return F[NIdx].GetDat(CID);
326  } else {
327  return 0.0;
328  }
329  }
330  double GetLassoCoef() { return LassoCoef; }
331  void InitW() { // initialize W
332  W.Gen(Attrs);
333  for (int k = 0; k < Attrs; k++) {
334  W[k].Gen(NumComs + 1);
335  }
336  }
337  void SetAttrHoldOut(const int NID, const int KID) {
338  int NIdx = NIDToIdx.GetKeyId(NID);
339  HOKIDSV[NIdx].AddKey(KID);
340  }
341  void SetAttrHoldOutForOneNode(const int NID) {
342  for (int k = 0; k < Attrs; k++) {
343  SetAttrHoldOut(NID, k);
344  }
345  }
346  void GetW(TVec<TFltV>& _W) { _W = W; }
347  void SetW(TVec<TFltV>& _W) { W = _W; }
348  void RandomInit(const int InitComs);
349  void NeighborComInit(const int InitComs);
350  void NeighborComInit(TFltIntPrV& NIdPhiV, const int InitComs);
351  int GetNumComs() { return NumComs; }
352  void SetCmtyVV(const TVec<TIntV>& CmtyVV);
353  double Likelihood(const bool DoParallel = false);
354  double LikelihoodForRow(const int UID);
355  double LikelihoodForRow(const int UID, const TIntFltH& FU);
356  double LikelihoodAttrKForRow(const int UID, const int K) { return LikelihoodAttrKForRow(UID, K, F[UID]); }
357  double LikelihoodAttrKForRow(const int UID, const int K, const TIntFltH& FU) { return LikelihoodAttrKForRow(UID, K, FU, W[K]); }
358  double LikelihoodAttrKForRow(const int UID, const int K, const TIntFltH& FU, const TFltV& WK);
359  double LikelihoodForWK(const int K, const TFltV& WK) {
360  double L = 0.0;
361  for (int u = 0; u < F.Len(); u++) {
362  if (HOKIDSV[u].IsKey(K)) { continue; }
363  L += LikelihoodAttrKForRow(u, K, F[u], WK);
364  }
365  for (int c = 0; c < WK.Len() - 1; c++) {
366  L -= LassoCoef * fabs(WK[c]);
367  }
368  return L;
369  }
370  double LikelihoodForWK(const int K) { return LikelihoodForWK(K, W[K]); }
371  double LikelihoodAttr() {
372  double L = 0.0;
373  for (int k = 0; k < Attrs; k++) {
374  for (int u = 0; u < F.Len(); u++) {
375  if (HOKIDSV[u].IsKey(k)) { continue; }
376  L += LikelihoodAttrKForRow(u, k, F[u], W[k]);
377  }
378  }
379  return L;
380  }
381  double LikelihoodGraph() {
382  double L = Likelihood();
383  //add regularization
384  if (RegCoef > 0.0) { //L1
385  for (int u = 0; u < F.Len(); u++) {
386  L += RegCoef * Sum(F[u]);
387  }
388  }
389  if (RegCoef < 0.0) { //L2
390  for (int u = 0; u < F.Len(); u++) {
391  L -= RegCoef * Norm2(F[u]);
392  }
393  }
394 
395  return L - WeightAttr * LikelihoodAttr();
396  }
397  void GenHoldOutAttr(const double HOFrac, TVec<TIntSet>& HOSetV) {
398  HOSetV.Gen(F.Len());
399  int HoldOutCnt = (int) ceil(HOFrac * G->GetNodes() * Attrs);
400  TIntPrSet NIDKIDSet(HoldOutCnt);
401  int Cnt = 0;
402  for (int h = 0; h < 10 * HoldOutCnt; h++) {
403  int UID = Rnd.GetUniDevInt(F.Len());
404  int KID = Rnd.GetUniDevInt(Attrs);
405  if (! NIDKIDSet.IsKey(TIntPr(UID, KID))) {
406  NIDKIDSet.AddKey(TIntPr(UID, KID));
407  HOSetV[UID].AddKey(KID);
408  Cnt++;
409  }
410  if (Cnt >= HoldOutCnt) { break; }
411  }
412  printf("%d hold out pairs generated for attributes\n", Cnt);
413  }
414  void SetHoldOut(const double HOFrac) {
415  TVec<TIntSet> HoldOut;
416  TCesnaUtil::GenHoldOutPairs(G, HoldOut, HOFrac, Rnd);
417  GenHoldOutAttr(HOFrac, HOKIDSV);
418  HOVIDSV = HoldOut;
419  }
420  void GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet);
421  void GradientForWK(TFltV& GradV, const int K) {
422  GradV.Gen(NumComs + 1);
423  for (int u = 0; u < F.Len(); u++) {
424  if (HOKIDSV[u].IsKey(K)) { continue; }
425  double Pred = PredictAttrK(u, K);
426  for (TIntFltH::TIter CI = F[u].BegI(); CI < F[u].EndI(); CI++) {
427  GradV[CI.GetKey()] += (GetAttr(u, K) - Pred) * GetCom(u, CI.GetKey());
428  }
429  GradV[NumComs] += (GetAttr(u, K) - Pred);
430  }
431 
432  for (int c = 0; c < GradV.Len() - 1; c++) {
433  GradV[c] -= LassoCoef * TMath::Sign(GetW(c, K));
434  }
435  }
436  void GetCmtyVV(TVec<TIntV>& CmtyVV);
437  void GetCmtyVV(TVec<TIntV>& CmtyVV, TVec<TFltV>& Wck, const double Thres, const int MinSz = 3);
438  void GetCmtyVV(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3) {
439  TVec<TFltV> TmpV;
440  GetCmtyVV(CmtyVV, TmpV, Thres, MinSz);
441  }
442  void GetCmtyVV(TVec<TIntV>& CmtyVV, TVec<TFltV>& Wck) {
443  GetCmtyVV(CmtyVV, Wck, sqrt(2.0 * (double) G->GetEdges() / G->GetNodes() / G->GetNodes()), 3);
444  }
445 
446  void GetCmtyVVUnSorted(TVec<TIntV>& CmtyVV);
447  void GetCmtyVVUnSorted(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3);
448  /* GetCmtyVVRelative: NOT working well (low accuracy)
449  void GetCmtyVVRelative(TVec<TIntV>& CmtyVV, const int MinSz = 3) {
450  CmtyVV.Clr();
451  for (int c = 0; c < NumComs; c++) {
452  TIntV CmtyV;
453  double MaxVal = 0.0;
454  for (int u = 0; u < G->GetNodes(); u++) {
455  if (GetCom(u, c) > MaxVal) { MaxVal = GetCom(u, c); }
456  }
457  if (MaxVal == 0.0) { continue; }
458  for (int u = 0; u < G->GetNodes(); u++) {
459  if (GetCom(u, c) > 0.5 * MaxVal) { CmtyV.Add(NIDToIdx[u]); }
460  }
461  if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
462  }
463  if ( NumComs != CmtyVV.Len()) {
464  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
465  }
466  }
467  */
468  int FindComs(TIntV& ComsV, const bool UseBIC = false, const double HOFrac = 0.2, const int NumThreads = 20, const TStr PlotLFNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1);
469  int FindComs(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const bool UseBIC = false, const double HOFrac = 0.1, const double StepAlpha = 0.3, const double StepBeta = 0.3);
470  void DisplayAttrs(const int TopK, const TStrHash<TInt>& NodeNameH) {
471  for (int u = 0; u < X.Len(); u++) {
472  if (NodeNameH.Len() > 0) {
473  printf("NID: %s\t Attrs: ", NodeNameH.GetKey(NIDToIdx[u]));
474  } else {
475  printf("NID: %d\t Attrs: ", NIDToIdx[u].Val);
476  }
477  for (int k = 0; k < X[u].Len(); k++) {
478  printf("%d, ", X[u][k].Val);
479  }
480  printf("\n");
481  if (u >= TopK) { break; }
482  }
483  }
484  double LikelihoodHoldOut();
485  double GetStepSizeByLineSearch(const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter = 10);
486  double GetStepSizeByLineSearchForWK(const int K, const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta, const int MaxIter = 10) {
487  double StepSize = 1.0;
488  double InitLikelihood = LikelihoodForWK(K);
489  TFltV NewVarV(DeltaV.Len());
490  IAssert(DeltaV.Len() == NumComs + 1);
491  for(int iter = 0; iter < MaxIter; iter++) {
492  for (int c = 0; c < DeltaV.Len(); c++){
493  double NewVal = W[K][c] + StepSize * DeltaV[c];
494  if (NewVal < MinValW) { NewVal = MinValW; }
495  if (NewVal > MaxValW) { NewVal = MaxValW; }
496  NewVarV[c] = NewVal;
497  }
498  if (LikelihoodForWK(K, NewVarV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) {
499  StepSize *= Beta;
500  } else {
501  break;
502  }
503  if (iter == MaxIter - 1) {
504  StepSize = 0.0;
505  break;
506  }
507  }
508  return StepSize;
509  }
510  int GetPositiveW() {
511  int PosCnt = 0;
512  for (int c = 0; c < NumComs; c++) {
513  for (int k = 0; k < Attrs; k++) {
514  if (GetW(c, k) > 0.0) { PosCnt++; }
515  }
516  }
517  return PosCnt;
518  }
519  int MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
520  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
521  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const TStr PlotNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1) {
522  int ChunkSize = G->GetNodes() / 10 / ChunkNum;
523  if (ChunkSize == 0) { ChunkSize = 1; }
524  return MLEGradAscentParallel(Thres, MaxIter, ChunkNum, ChunkSize, PlotNm, StepAlpha, StepBeta);
525  }
526  //double FindOptimalThres(const TVec<TIntV>& TrueCmtyVV, TVec<TIntV>& CmtyVV);
527  double inline GetCom(const int& NID, const int& CID) {
528  if (F[NID].IsKey(CID)) {
529  return F[NID].GetDat(CID);
530  } else {
531  return 0.0;
532  }
533  }
534  double inline GetAttr(const int& NID, const int& K) {
535  if (X[NID].IsKey(K)) {
536  return 1.0;
537  } else {
538  return 0.0;
539  }
540  }
541  void inline AddCom(const int& NID, const int& CID, const double& Val) {
542  if (F[NID].IsKey(CID)) {
543  SumFV[CID] -= F[NID].GetDat(CID);
544  }
545  F[NID].AddDat(CID) = Val;
546  SumFV[CID] += Val;
547  }
548 
549  void inline DelCom(const int& NID, const int& CID) {
550  if (F[NID].IsKey(CID)) {
551  SumFV[CID] -= F[NID].GetDat(CID);
552  F[NID].DelKey(CID);
553  }
554  }
555  /*
556  double inline DotProduct(const TIntFltH& UV, const TFltV& VV) {
557  double DP = 0;
558  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
559  DP += VV[HI.GetKey()] * HI.GetDat();
560  }
561  return DP;
562  }
563  */
564  double inline DotProduct(const TIntFltH& UV, const TIntFltH& VV) {
565  double DP = 0;
566  if (UV.Len() > VV.Len()) {
567  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
568  if (VV.IsKey(HI.GetKey())) {
569  DP += VV.GetDat(HI.GetKey()) * HI.GetDat();
570  }
571  }
572  } else {
573  for (TIntFltH::TIter HI = VV.BegI(); HI < VV.EndI(); HI++) {
574  if (UV.IsKey(HI.GetKey())) {
575  DP += UV.GetDat(HI.GetKey()) * HI.GetDat();
576  }
577  }
578  }
579  return DP;
580  }
581  double inline DotProduct(const int& UID, const int& VID) {
582  return DotProduct(F[UID], F[VID]);
583  }
584  double inline Prediction(const TIntFltH& FU, const TIntFltH& FV) {
585  double DP = log (1.0 / (1.0 - PNoCom)) + DotProduct(FU, FV);
586  IAssertR(DP > 0.0, TStr::Fmt("DP: %f", DP));
587  return exp(- DP);
588  }
589  double inline PredictAttrK(const TIntFltH& FU, const TFltV& WK) {
590  double DP = 0.0;
591  for (TIntFltH::TIter FI = FU.BegI(); FI < FU.EndI(); FI++) {
592  DP += FI.GetDat() * WK[FI.GetKey()];
593  }
594  DP += WK.Last();
595  return Sigmoid(DP);
596  }
597  double inline PredictAttrK(const TIntFltH& FU, const int K) {
598  return PredictAttrK(FU, W[K]);
599  }
600  double inline PredictAttrK(const int UID, const int K) {
601  return PredictAttrK(F[UID], W[K]);
602  }
603  double inline GetW(const int CID, const int K) {
604  return W[K][CID];
605  }
606  double inline Prediction(const int& UID, const int& VID) {
607  return Prediction(F[UID], F[VID]);
608  }
609  double inline Sum(const TIntFltH& UV) {
610  double N = 0.0;
611  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
612  N += HI.GetDat();
613  }
614  return N;
615  }
616  double inline Norm2(const TIntFltH& UV) {
617  double N = 0.0;
618  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
619  N += HI.GetDat() * HI.GetDat();
620  }
621  return N;
622  }
623  /*
624  double inline Norm1(const TFltV& UV) {
625  double N = 0.0;
626  for (int i = 0; i < UV.Len(); i++) {
627  N += fabs(UV[i]);
628  }
629  return N;
630  }
631  */
632  double inline Sigmoid(const double X) {
633  return 1.0 / ( 1.0 + exp(-X));
634  }
635 };
636 
637 
638 #endif
double Sigmoid(const double X)
Definition: agmattr.h:632
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.cpp:594
#define IAssert(Cond)
Definition: bd.h:262
int GetInt() const
Definition: dt.h:578
void GenHoldOutAttr(const double HOFrac, TVec< TIntSet > &HOSetV)
Definition: agmattr.h:397
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
PUNGraph G
Definition: agmattr.h:248
double LikelihoodForWK(const int K)
Definition: agmattr.h:370
TFltV SumFV
Definition: agmattr.h:256
#define IAssertR(Cond, Reason)
Definition: bd.h:265
static void GetNIdPhiV(const PGraph &G, TFltIntPrV &NIdPhiV)
Definition: agmattr.h:77
TPair< TFlt, TInt > TFltIntPr
Definition: ds.h:97
void SetWeightAttr(const double _WeightAttr)
Definition: agmattr.h:318
TFlt MaxVal
Definition: agmattr.h:262
TBool DoParallel
Definition: agmattr.h:269
int GetNumComs()
Definition: agmattr.h:351
Definition: tm.h:355
double PredictAttrK(const int UID, const int K)
Definition: agmattr.h:600
TFlt LassoCoef
Definition: agmattr.h:266
Definition: dt.h:11
double Sum(const TIntFltH &UV)
Definition: agmattr.h:609
double GetCom(const int &NID, const int &CID)
Definition: agmattr.h:527
TFlt NegWgt
Definition: agmattr.h:265
static void DumpNIDAttrHToMetis(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TIntV &NIDV)
Definition: agmattr.h:179
TVec< TFltV > W
Definition: agmattr.h:251
int Val
Definition: dt.h:1136
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.cpp:505
void Save(TSOut &SOut) const
Definition: dt.h:1150
bool IsIn(const TVal &Val) const
Checks whether element Val is a member of the vector.
Definition: ds.h:828
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
Definition: agmattr.h:584
int GetKeyId(const TKey &Key) const
Definition: shash.h:1328
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmattr.h:564
TFlt PNoCom
Definition: agmattr.h:268
void SetRegCoef(const double _RegCoef)
Definition: agmattr.h:316
TIter BegI() const
Definition: hash.h:213
TFlt MinVal
Definition: agmattr.h:261
static void GetNbhCom(const PGraph &Graph, const int NID, TIntSet &NBCmtyS)
Definition: agmattr.h:68
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
int Len() const
Definition: hash.h:842
bool Empty() const
Definition: hash.h:227
double LikelihoodAttr()
Definition: agmattr.h:371
Definition: ss.h:72
void Gen(const int &ExpectVals)
Definition: shash.h:1115
double LikelihoodForWK(const int K, const TFltV &WK)
Definition: agmattr.h:359
double GetAttr(const int &NID, const int &K)
Definition: agmattr.h:534
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
Definition: ss.cpp:447
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
void Load(TSIn &SIn, const int &RndSeed=0)
Definition: agmattr.h:294
TIntSet NIDToIdx
Definition: agmattr.h:254
void Save(TSOut &SOut) const
Saves the graph to a (binary) stream SOut.
Definition: graph.h:170
TIter EndI() const
Definition: hash.h:218
void GetW(TVec< TFltV > &_W)
Definition: agmattr.h:346
void Load(TSIn &SIn)
Definition: ds.h:946
int GetAttrs()
Definition: agmattr.h:321
double GetStepSizeByLineSearch(const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
Definition: agmattr.cpp:480
double PredictAttrK(const TIntFltH &FU, const TFltV &WK)
Definition: agmattr.h:589
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
double GetWeightAttr()
Definition: agmattr.h:319
double Likelihood(const bool DoParallel=false)
Definition: agmattr.cpp:137
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
const char * GetFld(const int &FldN) const
Returns the contents of the field at index FldN.
Definition: ss.h:129
double LikelihoodGraph()
Definition: agmattr.h:381
double LikelihoodAttrKForRow(const int UID, const int K)
Definition: agmattr.h:356
Definition: dt.h:1383
static void FilterLowEntropy(const THash< TInt, TIntV > &OldNIDAttrH, THash< TInt, TIntV > &NewNIDAttrH, const TIntStrH &OldNameH, TIntStrH &NewNameH, const double MinFrac=0.00001, const double MaxFrac=0.95, const int MinCnt=3)
Definition: agmattr.h:203
static void LoadNIDAttrHFromNIDKH(const TIntV &NIDV, const TStr &InFNm, THash< TInt, TIntV > &NIDAttrH)
Definition: agmattr.h:123
Definition: fl.h:58
TVec< TIntFltH > F
Definition: agmattr.h:250
void Save(TSOut &SOut) const
Definition: ds.h:954
TSsFmt
Spread-Sheet Separator Format.
Definition: ss.h:5
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH, const TStrHash< TInt > &FeatNameH, const TStrHash< TInt > &NodeNameH)
Definition: agmattr.h:127
void DisplayAttrs(const int TopK, const TStrHash< TInt > &NodeNameH)
Definition: agmattr.h:470
TFlt MaxValW
Definition: agmattr.h:264
TFlt MinValW
Definition: agmattr.h:263
const char * GetTmStr() const
Definition: tm.h:370
TInt Attrs
Definition: agmattr.h:252
void SetW(TVec< TFltV > &_W)
Definition: agmattr.h:347
void Gen(const int &ExpectVals)
Definition: hash.h:222
bool IsKey(const char *Key) const
Definition: hash.h:897
#define HasGraphFlag(TGraph, Flag)
For quick testing of the properties of the graph/network object (see TGraphFlag). ...
Definition: gbase.h:41
double GetComFromNID(const int &NID, const int &CID)
Definition: agmattr.h:322
void RandomInit(const int InitComs)
Definition: agmattr.cpp:9
void SetAttrHoldOut(const int NID, const int KID)
Definition: agmattr.h:337
double LikelihoodHoldOut()
Definition: agmattr.cpp:452
void GetCmtyVV(TVec< TIntV > &CmtyVV, TVec< TFltV > &Wck)
Definition: agmattr.h:442
const char * GetKey(const int &KeyId) const
Definition: hash.h:893
static double Round(const double &Val)
Definition: xmath.h:16
double Norm2(const TIntFltH &UV)
Definition: agmattr.h:616
void Load(TSIn &SIn)
Definition: dt.h:1149
TVec< TIntSet > X
Definition: agmattr.h:249
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const TStr PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.h:521
TFlt RegCoef
Definition: agmattr.h:255
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
TCesna()
Definition: agmattr.h:271
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:172
TVec< TIntSet > HOVIDSV
Definition: agmattr.h:258
int AddKey(const TKey &Key)
Definition: shash.h:1254
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
void AddCom(const int &NID, const int &CID, const double &Val)
Definition: agmattr.h:541
Tab separated.
Definition: ss.h:6
void GetCmtyVVUnSorted(TVec< TIntV > &CmtyVV)
Definition: agmattr.cpp:329
void InitW()
Definition: agmattr.h:331
double PredictAttrK(const TIntFltH &FU, const int K)
Definition: agmattr.h:597
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmattr.h:38
TCesna(const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH, const int &InitComs, const int RndSeed=0)
Definition: agmattr.h:272
double GetRegCoef()
Definition: agmattr.h:317
TRnd Rnd
Definition: agmattr.h:253
Definition: fl.h:128
void GetCmtyVV(TVec< TIntV > &CmtyVV)
Definition: agmattr.cpp:289
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH)
Definition: agmattr.h:165
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
Definition: agmattr.cpp:85
Definition: dt.h:1134
static PUNGraph Load(TSIn &SIn)
Static constructor that loads the graph from a stream SIn and returns a pointer to it...
Definition: graph.h:178
Definition: hash.h:781
directed graph (TNGraph, TNEGraph), else graph is undirected TUNGraph
Definition: gbase.h:13
void Save(TSOut &SOut)
Definition: agmattr.h:274
static void LoadNIDAttrHFromNIDKH(const TIntV &NIDV, const TStr &InFNm, THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &NodeNameH, const TSsFmt Sep=ssfTabSep)
Definition: agmattr.h:100
void NeighborComInit(const int InitComs)
Definition: agmattr.cpp:32
int Len() const
Definition: shash.h:1121
Definition: ds.h:32
double DotProduct(const int &UID, const int &VID)
Definition: agmattr.h:581
static double GetConductance(const PGraph &Graph, const TIntSet &CmtyS, const int Edges)
Definition: agmattr.h:10
TStr GetStr() const
Definition: dt.h:1360
double GetStepSizeByLineSearchForWK(const int K, const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
Definition: agmattr.h:486
Definition: dt.h:412
double LikelihoodForRow(const int UID)
Definition: agmattr.cpp:157
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
double GetLassoCoef()
Definition: agmattr.h:330
TInt NumComs
Definition: agmattr.h:257
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &FeatNameH)
Definition: agmattr.h:161
uint64 GetLineNo() const
Returns the line number of the current line.
Definition: ss.h:118
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &FeatNameH, const TStrHash< TInt > &NodeNameH)
Definition: agmattr.h:148
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH, const TStrHash< TInt > &FeatNameH)
Definition: agmattr.h:140
static int Sign(const T &Val)
Definition: xmath.h:29
void Load(TSIn &SIn)
Definition: dt.h:1402
void GradientForRow(const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
Definition: agmattr.cpp:213
void SetLassoCoef(const double _LassoCoef)
Definition: agmattr.h:320
void SetGraph(const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH)
Definition: agmattr.cpp:99
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361
void Load(TSIn &SIn)
Definition: shash.h:1078
bool Next()
Loads next line from the input file.
Definition: ss.cpp:412
void GradientForWK(TFltV &GradV, const int K)
Definition: agmattr.h:421
static double DotProduct(const TFltV &x, const TFltV &y)
Definition: linalg.cpp:165
void Save(TSOut &SOut) const
Definition: shash.h:1082
void GetCmtyVV(TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
Definition: agmattr.h:438
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
double Prediction(const int &UID, const int &VID)
Definition: agmattr.h:606
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
static int GetAttrs(const THash< TInt, TIntV > &NIDAttrH)
Definition: agmattr.h:169
int FindComs(TIntV &ComsV, const bool UseBIC=false, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.cpp:361
double LikelihoodAttrKForRow(const int UID, const int K, const TIntFltH &FU)
Definition: agmattr.h:357
char * CStr()
Definition: dt.h:476
bool IsKey(const TKey &Key) const
Definition: hash.h:258
int GetPositiveW()
Definition: agmattr.h:510
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
void SetHoldOut(const double HOFrac)
Definition: agmattr.h:414
Definition: dt.h:971
int Len() const
Definition: hash.h:228
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void DelCom(const int &NID, const int &CID)
Definition: agmattr.h:549
void SetAttrHoldOutForOneNode(const int NID)
Definition: agmattr.h:341
TFlt WeightAttr
Definition: agmattr.h:267
double GetW(const int CID, const int K)
Definition: agmattr.h:603
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH)
Definition: agmattr.h:144
void Save(TSOut &SOut) const
Definition: dt.h:1399
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
static void FilterLowEntropy(const THash< TInt, TIntV > &OldNIDAttrH, THash< TInt, TIntV > &NewNIDAttrH, const double MinFrac=0.00001, const double MaxFrac=0.95, const int MinCnt=3)
Definition: agmattr.h:241
int GetKeyId(const char *Key) const
Definition: hash.h:994
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
bool IsKeyId(const int &KeyId) const
Definition: hash.h:904
void SortByDat(const bool &Asc=true)
Definition: hash.h:292