SNAP Library 6.0, User Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
agmattr.h
Go to the documentation of this file.
1 #ifndef yanglib_agmattr1_h
2 #define yanglib_agmattr1_h
3 #include "Snap.h"
4 
5 class TCesnaUtil {
6 public:
7  //static double GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges);
8  //static double GetConductance(const PNGraph& Graph, const TIntSet& CmtyS, const int Edges);
9 template<class PGraph>
10 static double GetConductance(const PGraph& Graph, const TIntSet& CmtyS, const int Edges) {
11  const bool GraphType = HasGraphFlag(typename PGraph::TObj, gfDirected);
12  int Edges2;
13  if (GraphType) { Edges2 = Edges >= 0 ? Edges : Graph->GetEdges(); }
14  else { Edges2 = Edges >= 0 ? 2 * Edges : Graph->GetEdges(); }
15  int Vol = 0, Cut = 0;
16  double Phi = 0.0;
17  for (int i = 0; i < CmtyS.Len(); i++) {
18  if (! Graph->IsNode(CmtyS[i])) { continue; }
19  typename PGraph::TObj::TNodeI NI = Graph->GetNI(CmtyS[i]);
20  for (int e = 0; e < NI.GetOutDeg(); e++) {
21  if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; }
22  }
23  Vol += NI.GetOutDeg();
24  }
25  // get conductance
26  if (Vol != Edges2) {
27  if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); }
28  else if (Vol == 0) { Phi = 0.0; }
29  else { Phi = Cut / double(Vol); }
30  } else {
31  if (Vol == Edges2) { Phi = 1.0; }
32  }
33  return Phi;
34 }
35 
36 
37 template<class PGraph>
38  static void GenHoldOutPairs(const PGraph& G, TVec<TIntSet>& HoldOutSet, double HOFrac, TRnd& Rnd) {
39  TIntPrV EdgeV(G->GetEdges(), 0);
40  for (typename PGraph::TObj::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
41  EdgeV.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
42  }
43  EdgeV.Shuffle(Rnd);
44 
45  const bool GraphType = HasGraphFlag(typename PGraph::TObj, gfDirected);
46  HoldOutSet.Gen(G->GetNodes());
47  int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
48  if (GraphType) { HOTotal *= 2;}
49  int HOCnt = 0;
50  int HOEdges = (int) TMath::Round(HOFrac * G->GetEdges());
51  printf("holding out %d edges...\n", HOEdges);
52  for (int he = 0; he < (int) HOEdges; he++) {
53  HoldOutSet[EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
54  if (! GraphType) { HoldOutSet[EdgeV[he].Val2].AddKey(EdgeV[he].Val1); }
55  HOCnt++;
56  }
57  printf("%d Edges hold out\n", HOCnt);
58  while(HOCnt++ < HOTotal) {
59  int SrcNID = Rnd.GetUniDevInt(G->GetNodes());
60  int DstNID = Rnd.GetUniDevInt(G->GetNodes());
61  if (SrcNID == DstNID) { continue; }
62  HoldOutSet[SrcNID].AddKey(DstNID);
63  if (! GraphType) { HoldOutSet[DstNID].AddKey(SrcNID); }
64  }
65  }
66 
67 template<class PGraph>
68  static void GetNbhCom(const PGraph& Graph, const int NID, TIntSet& NBCmtyS) {
69  typename PGraph::TObj::TNodeI NI = Graph->GetNI(NID);
70  NBCmtyS.Gen(NI.GetDeg());
71  NBCmtyS.AddKey(NID);
72  for (int e = 0; e < NI.GetDeg(); e++) {
73  NBCmtyS.AddKey(NI.GetNbrNId(e));
74  }
75  }
76 template<class PGraph>
77  static void GetNIdPhiV(const PGraph& G, TFltIntPrV& NIdPhiV) {
78  NIdPhiV.Gen(G->GetNodes(), 0);
79  const int Edges = G->GetEdges();
80  TExeTm RunTm;
81  //compute conductance of neighborhood community
82  for (typename PGraph::TObj::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
83  TIntSet NBCmty(NI.GetDeg() + 1);
84  double Phi;
85  if (NI.GetDeg() < 5) { //do not include nodes with too few degree
86  Phi = 1.0;
87  } else {
88  TCesnaUtil::GetNbhCom<PGraph>(G, NI.GetId(), NBCmty);
89  //if (NBCmty.Len() != NI.GetDeg() + 1) { printf("NbCom:%d, Deg:%d\n", NBCmty.Len(), NI.GetDeg()); }
90  //IAssert(NBCmty.Len() == NI.GetDeg() + 1);
91  Phi = TCesnaUtil::GetConductance(G, NBCmty, Edges);
92  }
93  //NCPhiH.AddDat(u, Phi);
94  NIdPhiV.Add(TFltIntPr(Phi, NI.GetId()));
95  }
96  printf("conductance computation completed [%s]\n", RunTm.GetTmStr());
97  fflush(stdout);
98  }
99 
100  static void LoadNIDAttrHFromNIDKH(const TIntV& NIDV, const TStr& InFNm, THash<TInt, TIntV>& NIDAttrH, const TStrHash<TInt>& NodeNameH, const TSsFmt Sep = ssfTabSep) {
101  NIDAttrH.Clr();
102  NIDAttrH.Gen(NIDV.Len());
103  printf("nodes in the graph:%d\n", NIDV.Len());
104  for (int u = 0; u < NIDV.Len(); u++) { NIDAttrH.AddDat(NIDV[u]).Gen(0, 0); }
105  TSsParser Ss(InFNm, ssfTabSep);
106  while (Ss.Next()) {
107  TStr NodeName = Ss.GetFld(0);
108  TInt NID = NodeName.GetInt();
109  if (NodeNameH.Len() > 0 && ! NodeNameH.IsKey(NodeName)) { continue; }
110  if (NodeNameH.Len() > 0) {
111  IAssertR(NodeNameH.IsKey(NodeName), TStr::Fmt("NodeName:%s", NodeName.CStr()));
112  NID = NodeNameH.GetKeyId(NodeName);
113  }
114  if (! NIDAttrH.IsKey(NID)) {
115  //printf("NodeName %s, NID %d does not exist\n", NodeName.CStr(), NID);
116  continue; } //ignore nodes who are not in the graph
117  IAssertR(! NIDAttrH.GetDat(NID).IsIn(Ss.GetInt(1)), TStr::Fmt("NIdx:%d NID:%s, K:%d", NID.Val, NodeName.CStr(), Ss.GetInt(1)));
118  NIDAttrH.GetDat(NID).Add(Ss.GetInt(1));
119  }
120  printf("%s nodes, %s lines read \n", TUInt64::GetStr(NIDAttrH.Len()).CStr(), TUInt64::GetStr(Ss.GetLineNo()).CStr());
121  //printf("%d nodes, %d lines read \n", NIDAttrH.Len(), Ss.GetLineNo());
122  }
123  static void LoadNIDAttrHFromNIDKH(const TIntV& NIDV, const TStr& InFNm, THash<TInt, TIntV>& NIDAttrH) {
124  TStrHash<TInt> TmpH;
125  LoadNIDAttrHFromNIDKH(NIDV, InFNm, NIDAttrH, TmpH);
126  }
127  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntSet>& NIDAttrH, const TStrHash<TInt>& FeatNameH, const TStrHash<TInt>& NodeNameH) {
128  FILE* F = fopen(FNm.CStr(), "wt");
129  for (int u = 0; u < NIDAttrH.Len(); u++) {
130  int NID = NIDAttrH.GetKey(u);
131  TStr NodeName = NodeNameH.IsKeyId(NID)? NodeNameH.GetKey(NID): TStr::Fmt("%d", NID);
132  for (int k = 0; k < NIDAttrH[u].Len(); k++) {
133  int KID = NIDAttrH[u][k];
134  TStr FeatName = FeatNameH.IsKeyId(KID)? FeatNameH.GetKey(KID): TStr::Fmt("%d", KID);
135  fprintf(F,"%s\t%s\n", NodeName.CStr(), FeatName.CStr());
136  }
137  }
138  fclose(F);
139  }
140  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntSet>& NIDAttrH, const TStrHash<TInt>& FeatNameH) {
141  TStrHash<TInt> TmpH;
142  DumpNIDAttrHToNIDK(FNm, NIDAttrH, FeatNameH, TmpH);
143  }
144  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntSet>& NIDAttrH) {
145  TStrHash<TInt> TmpH1, TmpH2;
146  DumpNIDAttrHToNIDK(FNm, NIDAttrH, TmpH1, TmpH2);
147  }
148  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntV>& NIDAttrH, const TStrHash<TInt>& FeatNameH, const TStrHash<TInt>& NodeNameH) {
149  FILE* F = fopen(FNm.CStr(), "wt");
150  for (int u = 0; u < NIDAttrH.Len(); u++) {
151  int NID = NIDAttrH.GetKey(u);
152  TStr NodeName = NodeNameH.IsKeyId(NID)? NodeNameH.GetKey(NID): TStr::Fmt("%d", NID);
153  for (int k = 0; k < NIDAttrH[u].Len(); k++) {
154  int KID = NIDAttrH[u][k];
155  TStr FeatName = FeatNameH.IsKeyId(KID)? FeatNameH.GetKey(KID): TStr::Fmt("%d", KID);
156  fprintf(F,"%s\t%s\n", NodeName.CStr(), FeatName.CStr());
157  }
158  }
159  fclose(F);
160  }
161  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntV>& NIDAttrH, const TStrHash<TInt>& FeatNameH) {
162  TStrHash<TInt> TmpH;
163  DumpNIDAttrHToNIDK(FNm, NIDAttrH, FeatNameH, TmpH);
164  }
165  static void DumpNIDAttrHToNIDK(const TStr& FNm, const THash<TInt, TIntV>& NIDAttrH) {
166  TStrHash<TInt> TmpH1, TmpH2;
167  DumpNIDAttrHToNIDK(FNm, NIDAttrH, TmpH1, TmpH2);
168  }
169  static int GetAttrs(const THash<TInt, TIntV>& NIDAttrH) {
170  int Attrs = 0;
171  for (int u = 0; u < NIDAttrH.Len(); u++) {
172  for (int k = 0; k < NIDAttrH[u].Len(); k++) {
173  if (NIDAttrH[u][k] >= Attrs) { Attrs = NIDAttrH[u][k] + 1; }
174  }
175  }
176  return Attrs;
177  }
178  //Metis format (N + 1) line describes the attributes of N. ID start from 1
179  static void DumpNIDAttrHToMetis(const TStr& FNm, const THash<TInt, TIntV>& NIDAttrH, const TIntV& NIDV) {
180  int AttrCnt = 0;
181  for (int u = 1; u < NIDV.Len(); u++) {
182  if (! NIDAttrH.IsKey(NIDV[u])) { continue; }
183  AttrCnt += NIDAttrH.GetDat(NIDV[u]).Len();
184  }
185  IAssert (NIDV[0] == -1);
186  FILE* F = fopen(FNm.CStr(), "wt");
187  fprintf(F, "%d %d\n", NIDV.Len() - 1, AttrCnt);
188  int TmpCnt = 0;
189  for (int u = 1; u < NIDV.Len(); u++) {
190  if (NIDAttrH.IsKey(NIDV[u])) {
191  for (int k = 0; k < NIDAttrH.GetDat(NIDV[u]).Len(); k++) {
192  if (k > 0) { fprintf(F, " "); }
193  fprintf(F, "%d", NIDAttrH.GetDat(NIDV[u])[k].Val + 1);
194  TmpCnt++;
195  }
196  }
197  fprintf(F, "\n");
198  }
199  fclose(F);
200  IAssert(AttrCnt == TmpCnt);
201 
202  }
203  static void FilterLowEntropy(const THash<TInt, TIntV>& OldNIDAttrH, THash<TInt, TIntV>& NewNIDAttrH, const TIntStrH& OldNameH, TIntStrH& NewNameH, const double MinFrac = 0.00001, const double MaxFrac = 0.95, const int MinCnt = 3) {
204  TIntH KIDCntH;
205  for (int u = 0; u < OldNIDAttrH.Len(); u++) {
206  for (int k = 0; k < OldNIDAttrH[u].Len(); k++) {
207  KIDCntH.AddDat(OldNIDAttrH[u][k])++;
208  }
209  }
210  KIDCntH.SortByDat(false);
211 
212  TIntSet SelectedK(KIDCntH.Len());
213  for (int c = 0; c < KIDCntH.Len(); c++) {
214  double Frac = (double) KIDCntH[c].Val / (double) OldNIDAttrH.Len();
215  if (KIDCntH[c].Val < MinCnt) { continue; }
216  if (Frac > MaxFrac || Frac < MinFrac) { continue; }
217  SelectedK.AddKey(KIDCntH.GetKey(c));
218  }
219  printf("%d attributes selected from %d\n", SelectedK.Len(), KIDCntH.Len());
220  NewNIDAttrH.Gen(OldNIDAttrH.Len());
221  for (int u = 0; u < OldNIDAttrH.Len(); u++) {
222  int NID = OldNIDAttrH.GetKey(u);
223  TIntV& AttrV = NewNIDAttrH.AddDat(NID);
224  for (int k = 0; k < OldNIDAttrH[u].Len(); k++) {
225  if (! SelectedK.IsKey(OldNIDAttrH[u][k])) { continue; }
226  AttrV.Add(SelectedK.GetKeyId(OldNIDAttrH[u][k]));
227  }
228  }
229 
230  if (! OldNameH.Empty()) {
231  NewNameH.Gen(SelectedK.Len());
232  for (int k = 0; k < SelectedK.Len(); k++) {
233  int OldKID = SelectedK.GetKey(k);
234  if (OldNameH.IsKey(OldKID)) {
235  NewNameH.AddDat(k, OldNameH.GetDat(OldKID));
236  }
237  }
238  printf("%d attributes names copied\n", NewNameH.Len());
239  }
240  }
241  static void FilterLowEntropy(const THash<TInt, TIntV>& OldNIDAttrH, THash<TInt, TIntV>& NewNIDAttrH, const double MinFrac = 0.00001, const double MaxFrac = 0.95, const int MinCnt = 3) {
242  TIntStrH TmpH1, TmpH2;
243  FilterLowEntropy(OldNIDAttrH, NewNIDAttrH, TmpH1, TmpH2, MinFrac, MaxFrac, MinCnt);
244  }
245 };
246 class TCesna { //CESNA: community detection in networks with node attributes
247 private:
248  PUNGraph G; //graph to fit
249  TVec<TIntSet> X; // X[u] = {k| X_uk = 1}
250  TVec<TIntFltH> F; // membership for each user (Size: Nodes * Coms)
251  TVec<TFltV> W; // weight vector for logistic regression. w_ck = W[k][c] (Column vector)
252  TInt Attrs; // number of attributes
253  TRnd Rnd; // random number generator
254  TIntSet NIDToIdx; // original node ID vector NIDToIdx[i] = Node ID for index i, NIDToIdx.GetKey(NID) = index for NID
255  TFlt RegCoef; //Regularization coefficient when we fit for P_c +: L1, -: L2
256  TFltV SumFV; // sum_u F_uc for each community c. Needed for efficient calculation
257  TInt NumComs; // number of communities
258  TVec<TIntSet> HOVIDSV; //NID pairs to hold out for cross validation
259  TVec<TIntSet> HOKIDSV; //set of attribute index (k) to hold out
260 public:
261  TFlt MinVal; // minimum value of F (0)
262  TFlt MaxVal; // maximum value of F (for numerical reason)
263  TFlt MinValW; // minimum value of W (for numerical reason)
264  TFlt MaxValW; // maximum value of W (for numerical reason)
265  TFlt NegWgt; // weight of negative example (a pair of nodes without an edge)
266  TFlt LassoCoef; // L1 regularization coefficient for W (MLE = argmax P(X|F, W) - LassoCoef * |W|)
267  TFlt WeightAttr; // likelihood = log P(G|F) + WeightAttr * log P(X|F, W)
268  TFlt PNoCom; // base probability \varepsilon (edge probability between a pair of nodes sharing no community
269  TBool DoParallel; // whether to use parallelism for computation
270 
271  TCesna() { G = TUNGraph::New(10, -1); }
272  TCesna(const PUNGraph& GraphPt, const THash<TInt, TIntV>& NIDAttrH, const int& InitComs, const int RndSeed = 0): Rnd(RndSeed), RegCoef(0),
273  MinVal(0.0), MaxVal(10.0), MinValW(-10.0), MaxValW(10.0), NegWgt(1.0), LassoCoef(1.0), WeightAttr(1.0) { SetGraph(GraphPt, NIDAttrH); NeighborComInit(InitComs); }
274  void Save(TSOut& SOut) {
275  G->Save(SOut);
276  X.Save(SOut);
277  F.Save(SOut);
278  W.Save(SOut);
279  Attrs.Save(SOut);
280  NIDToIdx.Save(SOut);
281  RegCoef.Save(SOut);
282  LassoCoef.Save(SOut);
283  SumFV.Save(SOut);
284  NumComs.Save(SOut);
285  HOVIDSV.Save(SOut);
286  HOKIDSV.Save(SOut);
287  MinVal.Save(SOut);
288  MaxVal.Save(SOut);
289  MinValW.Save(SOut);
290  MaxValW.Save(SOut);
291  NegWgt.Save(SOut);
292  PNoCom.Save(SOut);
293  }
294  void Load(TSIn& SIn, const int& RndSeed = 0) {
295  G->Load(SIn);
296  X.Load(SIn);
297  F.Load(SIn);
298  W.Load(SIn);
299  Attrs.Load(SIn);
300  NIDToIdx.Load(SIn);
301  RegCoef.Load(SIn);
302  LassoCoef.Load(SIn);
303  SumFV.Load(SIn);
304  NumComs.Load(SIn);
305  HOVIDSV.Load(SIn);
306  HOKIDSV.Load(SIn);
307  MinVal.Load(SIn);
308  MaxVal.Load(SIn);
309  MinValW.Load(SIn);
310  MaxValW.Load(SIn);
311  NegWgt.Load(SIn);
312  PNoCom.Load(SIn);
313  }
314 
315  void SetGraph(const PUNGraph& GraphPt, const THash<TInt, TIntV>& NIDAttrH);
316  void SetRegCoef(const double _RegCoef) { RegCoef = _RegCoef; }
317  double GetRegCoef() { return RegCoef; }
318  void SetWeightAttr(const double _WeightAttr) { IAssert (_WeightAttr <= 1.0 && _WeightAttr >= 0.0); WeightAttr = _WeightAttr; }
319  double GetWeightAttr() { return WeightAttr; }
320  void SetLassoCoef(const double _LassoCoef) { LassoCoef = _LassoCoef; }
321  int GetAttrs() { return Attrs; }
322  double GetComFromNID(const int& NID, const int& CID) {
323  int NIdx = NIDToIdx.GetKeyId(NID);
324  if (F[NIdx].IsKey(CID)) {
325  return F[NIdx].GetDat(CID);
326  } else {
327  return 0.0;
328  }
329  }
330  double GetLassoCoef() { return LassoCoef; }
331  void InitW() { // initialize W
332  W.Gen(Attrs);
333  for (int k = 0; k < Attrs; k++) {
334  W[k].Gen(NumComs + 1);
335  }
336  }
337  void SetAttrHoldOut(const int NID, const int KID) {
338  int NIdx = NIDToIdx.GetKeyId(NID);
339  HOKIDSV[NIdx].AddKey(KID);
340  }
341  void SetAttrHoldOutForOneNode(const int NID) {
342  for (int k = 0; k < Attrs; k++) {
343  SetAttrHoldOut(NID, k);
344  }
345  }
346  void GetW(TVec<TFltV>& _W) { _W = W; }
347  void SetW(TVec<TFltV>& _W) { W = _W; }
348  void RandomInit(const int InitComs);
349  void NeighborComInit(const int InitComs);
350  void NeighborComInit(TFltIntPrV& NIdPhiV, const int InitComs);
351  int GetNumComs() { return NumComs; }
352  void SetCmtyVV(const TVec<TIntV>& CmtyVV);
353  double Likelihood(const bool DoParallel = false);
354  double LikelihoodForRow(const int UID);
355  double LikelihoodForRow(const int UID, const TIntFltH& FU);
356  double LikelihoodAttrKForRow(const int UID, const int K) { return LikelihoodAttrKForRow(UID, K, F[UID]); }
357  double LikelihoodAttrKForRow(const int UID, const int K, const TIntFltH& FU) { return LikelihoodAttrKForRow(UID, K, FU, W[K]); }
358  double LikelihoodAttrKForRow(const int UID, const int K, const TIntFltH& FU, const TFltV& WK);
359  double LikelihoodForWK(const int K, const TFltV& WK) {
360  double L = 0.0;
361  for (int u = 0; u < F.Len(); u++) {
362  if (HOKIDSV[u].IsKey(K)) { continue; }
363  L += LikelihoodAttrKForRow(u, K, F[u], WK);
364  }
365  for (int c = 0; c < WK.Len() - 1; c++) {
366  L -= LassoCoef * fabs(WK[c]);
367  }
368  return L;
369  }
370  double LikelihoodForWK(const int K) { return LikelihoodForWK(K, W[K]); }
371  double LikelihoodAttr() {
372  double L = 0.0;
373  for (int k = 0; k < Attrs; k++) {
374  for (int u = 0; u < F.Len(); u++) {
375  if (HOKIDSV[u].IsKey(k)) { continue; }
376  L += LikelihoodAttrKForRow(u, k, F[u], W[k]);
377  }
378  }
379  return L;
380  }
381  double LikelihoodGraph() {
382  double L = Likelihood();
383  //add regularization
384  if (RegCoef > 0.0) { //L1
385  for (int u = 0; u < F.Len(); u++) {
386  L += RegCoef * Sum(F[u]);
387  }
388  }
389  if (RegCoef < 0.0) { //L2
390  for (int u = 0; u < F.Len(); u++) {
391  L -= RegCoef * Norm2(F[u]);
392  }
393  }
394 
395  return L - WeightAttr * LikelihoodAttr();
396  }
397  void GenHoldOutAttr(const double HOFrac, TVec<TIntSet>& HOSetV) {
398  HOSetV.Gen(F.Len());
399  int HoldOutCnt = (int) ceil(HOFrac * G->GetNodes() * Attrs);
400  TIntPrSet NIDKIDSet(HoldOutCnt);
401  int Cnt = 0;
402  for (int h = 0; h < 10 * HoldOutCnt; h++) {
403  int UID = Rnd.GetUniDevInt(F.Len());
404  int KID = Rnd.GetUniDevInt(Attrs);
405  if (! NIDKIDSet.IsKey(TIntPr(UID, KID))) {
406  NIDKIDSet.AddKey(TIntPr(UID, KID));
407  HOSetV[UID].AddKey(KID);
408  Cnt++;
409  }
410  if (Cnt >= HoldOutCnt) { break; }
411  }
412  printf("%d hold out pairs generated for attributes\n", Cnt);
413  }
414  void SetHoldOut(const double HOFrac) {
415  TVec<TIntSet> HoldOut;
416  TCesnaUtil::GenHoldOutPairs(G, HoldOut, HOFrac, Rnd);
417  GenHoldOutAttr(HOFrac, HOKIDSV);
418  HOVIDSV = HoldOut;
419  }
420  void GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet);
421  void GradientForWK(TFltV& GradV, const int K) {
422  GradV.Gen(NumComs + 1);
423  for (int u = 0; u < F.Len(); u++) {
424  if (HOKIDSV[u].IsKey(K)) { continue; }
425  double Pred = PredictAttrK(u, K);
426  for (TIntFltH::TIter CI = F[u].BegI(); CI < F[u].EndI(); CI++) {
427  GradV[CI.GetKey()] += (GetAttr(u, K) - Pred) * GetCom(u, CI.GetKey());
428  }
429  GradV[NumComs] += (GetAttr(u, K) - Pred);
430  }
431 
432  for (int c = 0; c < GradV.Len() - 1; c++) {
433  GradV[c] -= LassoCoef * TMath::Sign(GetW(c, K));
434  }
435  }
436  void GetCmtyVV(TVec<TIntV>& CmtyVV);
437  void GetCmtyVV(TVec<TIntV>& CmtyVV, TVec<TFltV>& Wck, const double Thres, const int MinSz = 3);
438  void GetCmtyVV(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3) {
439  TVec<TFltV> TmpV;
440  GetCmtyVV(CmtyVV, TmpV, Thres, MinSz);
441  }
442  void GetCmtyVV(TVec<TIntV>& CmtyVV, TVec<TFltV>& Wck) {
443  GetCmtyVV(CmtyVV, Wck, sqrt(2.0 * (double) G->GetEdges() / G->GetNodes() / G->GetNodes()), 3);
444  }
445 
446  void GetCmtyVVUnSorted(TVec<TIntV>& CmtyVV);
447  void GetCmtyVVUnSorted(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3);
448  /* GetCmtyVVRelative: NOT working well (low accuracy)
449  void GetCmtyVVRelative(TVec<TIntV>& CmtyVV, const int MinSz = 3) {
450  CmtyVV.Clr();
451  for (int c = 0; c < NumComs; c++) {
452  TIntV CmtyV;
453  double MaxVal = 0.0;
454  for (int u = 0; u < G->GetNodes(); u++) {
455  if (GetCom(u, c) > MaxVal) { MaxVal = GetCom(u, c); }
456  }
457  if (MaxVal == 0.0) { continue; }
458  for (int u = 0; u < G->GetNodes(); u++) {
459  if (GetCom(u, c) > 0.5 * MaxVal) { CmtyV.Add(NIDToIdx[u]); }
460  }
461  if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
462  }
463  if ( NumComs != CmtyVV.Len()) {
464  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
465  }
466  }
467  */
468  int FindComs(TIntV& ComsV, const bool UseBIC = false, const double HOFrac = 0.2, const int NumThreads = 20, const TStr PlotLFNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1);
469  int FindComs(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const bool UseBIC = false, const double HOFrac = 0.1, const double StepAlpha = 0.3, const double StepBeta = 0.3);
470  void DisplayAttrs(const int TopK, const TStrHash<TInt>& NodeNameH) {
471  for (int u = 0; u < X.Len(); u++) {
472  if (NodeNameH.Len() > 0) {
473  printf("NID: %s\t Attrs: ", NodeNameH.GetKey(NIDToIdx[u]));
474  } else {
475  printf("NID: %d\t Attrs: ", NIDToIdx[u].Val);
476  }
477  for (int k = 0; k < X[u].Len(); k++) {
478  printf("%d, ", X[u][k].Val);
479  }
480  printf("\n");
481  if (u >= TopK) { break; }
482  }
483  }
484  double LikelihoodHoldOut();
485  double GetStepSizeByLineSearch(const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter = 10);
486  double GetStepSizeByLineSearchForWK(const int K, const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta, const int MaxIter = 10) {
487  double StepSize = 1.0;
488  double InitLikelihood = LikelihoodForWK(K);
489  TFltV NewVarV(DeltaV.Len());
490  IAssert(DeltaV.Len() == NumComs + 1);
491  for(int iter = 0; iter < MaxIter; iter++) {
492  for (int c = 0; c < DeltaV.Len(); c++){
493  double NewVal = W[K][c] + StepSize * DeltaV[c];
494  if (NewVal < MinValW) { NewVal = MinValW; }
495  if (NewVal > MaxValW) { NewVal = MaxValW; }
496  NewVarV[c] = NewVal;
497  }
498  if (LikelihoodForWK(K, NewVarV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) {
499  StepSize *= Beta;
500  } else {
501  break;
502  }
503  if (iter == MaxIter - 1) {
504  StepSize = 0.0;
505  break;
506  }
507  }
508  return StepSize;
509  }
510  int GetPositiveW() {
511  int PosCnt = 0;
512  for (int c = 0; c < NumComs; c++) {
513  for (int k = 0; k < Attrs; k++) {
514  if (GetW(c, k) > 0.0) { PosCnt++; }
515  }
516  }
517  return PosCnt;
518  }
519  int MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
520  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
521  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const TStr PlotNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1) {
522  int ChunkSize = G->GetNodes() / 10 / ChunkNum;
523  if (ChunkSize == 0) { ChunkSize = 1; }
524  return MLEGradAscentParallel(Thres, MaxIter, ChunkNum, ChunkSize, PlotNm, StepAlpha, StepBeta);
525  }
526  //double FindOptimalThres(const TVec<TIntV>& TrueCmtyVV, TVec<TIntV>& CmtyVV);
527  double inline GetCom(const int& NID, const int& CID) {
528  if (F[NID].IsKey(CID)) {
529  return F[NID].GetDat(CID);
530  } else {
531  return 0.0;
532  }
533  }
534  double inline GetAttr(const int& NID, const int& K) {
535  if (X[NID].IsKey(K)) {
536  return 1.0;
537  } else {
538  return 0.0;
539  }
540  }
541  void inline AddCom(const int& NID, const int& CID, const double& Val) {
542  if (F[NID].IsKey(CID)) {
543  SumFV[CID] -= F[NID].GetDat(CID);
544  }
545  F[NID].AddDat(CID) = Val;
546  SumFV[CID] += Val;
547  }
548 
549  void inline DelCom(const int& NID, const int& CID) {
550  if (F[NID].IsKey(CID)) {
551  SumFV[CID] -= F[NID].GetDat(CID);
552  F[NID].DelKey(CID);
553  }
554  }
555  /*
556  double inline DotProduct(const TIntFltH& UV, const TFltV& VV) {
557  double DP = 0;
558  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
559  DP += VV[HI.GetKey()] * HI.GetDat();
560  }
561  return DP;
562  }
563  */
564  double inline DotProduct(const TIntFltH& UV, const TIntFltH& VV) {
565  double DP = 0;
566  if (UV.Len() > VV.Len()) {
567  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
568  if (VV.IsKey(HI.GetKey())) {
569  DP += VV.GetDat(HI.GetKey()) * HI.GetDat();
570  }
571  }
572  } else {
573  for (TIntFltH::TIter HI = VV.BegI(); HI < VV.EndI(); HI++) {
574  if (UV.IsKey(HI.GetKey())) {
575  DP += UV.GetDat(HI.GetKey()) * HI.GetDat();
576  }
577  }
578  }
579  return DP;
580  }
581  double inline DotProduct(const int& UID, const int& VID) {
582  return DotProduct(F[UID], F[VID]);
583  }
584  double inline Prediction(const TIntFltH& FU, const TIntFltH& FV) {
585  double DP = log (1.0 / (1.0 - PNoCom)) + DotProduct(FU, FV);
586  IAssertR(DP > 0.0, TStr::Fmt("DP: %f", DP));
587  return exp(- DP);
588  }
589  double inline PredictAttrK(const TIntFltH& FU, const TFltV& WK) {
590  double DP = 0.0;
591  for (TIntFltH::TIter FI = FU.BegI(); FI < FU.EndI(); FI++) {
592  DP += FI.GetDat() * WK[FI.GetKey()];
593  }
594  DP += WK.Last();
595  return Sigmoid(DP);
596  }
597  double inline PredictAttrK(const TIntFltH& FU, const int K) {
598  return PredictAttrK(FU, W[K]);
599  }
600  double inline PredictAttrK(const int UID, const int K) {
601  return PredictAttrK(F[UID], W[K]);
602  }
603  double inline GetW(const int CID, const int K) {
604  return W[K][CID];
605  }
606  double inline Prediction(const int& UID, const int& VID) {
607  return Prediction(F[UID], F[VID]);
608  }
609  double inline Sum(const TIntFltH& UV) {
610  double N = 0.0;
611  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
612  N += HI.GetDat();
613  }
614  return N;
615  }
616  double inline Norm2(const TIntFltH& UV) {
617  double N = 0.0;
618  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
619  N += HI.GetDat() * HI.GetDat();
620  }
621  return N;
622  }
623  /*
624  double inline Norm1(const TFltV& UV) {
625  double N = 0.0;
626  for (int i = 0; i < UV.Len(); i++) {
627  N += fabs(UV[i]);
628  }
629  return N;
630  }
631  */
632  double inline Sigmoid(const double X) {
633  return 1.0 / ( 1.0 + exp(-X));
634  }
635 };
636 
637 
638 #endif
double Sigmoid(const double X)
Definition: agmattr.h:632
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.cpp:594
#define IAssert(Cond)
Definition: bd.h:262
int GetInt() const
Definition: dt.h:581
void GenHoldOutAttr(const double HOFrac, TVec< TIntSet > &HOSetV)
Definition: agmattr.h:397
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
PUNGraph G
Definition: agmattr.h:248
double LikelihoodForWK(const int K)
Definition: agmattr.h:370
TIter EndI() const
Returns an iterator referring to the past-the-end element in the vector.
Definition: ds.h:595
TFltV SumFV
Definition: agmattr.h:256
#define IAssertR(Cond, Reason)
Definition: bd.h:265
static void GetNIdPhiV(const PGraph &G, TFltIntPrV &NIdPhiV)
Definition: agmattr.h:77
TPair< TFlt, TInt > TFltIntPr
Definition: ds.h:97
void SetWeightAttr(const double _WeightAttr)
Definition: agmattr.h:318
TFlt MaxVal
Definition: agmattr.h:262
TBool DoParallel
Definition: agmattr.h:269
int GetNumComs()
Definition: agmattr.h:351
Definition: tm.h:355
double PredictAttrK(const int UID, const int K)
Definition: agmattr.h:600
TFlt LassoCoef
Definition: agmattr.h:266
Definition: dt.h:11
double Sum(const TIntFltH &UV)
Definition: agmattr.h:609
double GetCom(const int &NID, const int &CID)
Definition: agmattr.h:527
TFlt NegWgt
Definition: agmattr.h:265
static void DumpNIDAttrHToMetis(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TIntV &NIDV)
Definition: agmattr.h:179
TVec< TFltV > W
Definition: agmattr.h:251
int Val
Definition: dt.h:1139
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.cpp:505
void Save(TSOut &SOut) const
Definition: dt.h:1153
bool IsIn(const TVal &Val) const
Checks whether element Val is a member of the vector.
Definition: ds.h:828
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
Definition: agmattr.h:584
int GetKeyId(const TKey &Key) const
Definition: shash.h:1328
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmattr.h:564
TFlt PNoCom
Definition: agmattr.h:268
void SetRegCoef(const double _RegCoef)
Definition: agmattr.h:316
TIter BegI() const
Definition: hash.h:213
TFlt MinVal
Definition: agmattr.h:261
static void GetNbhCom(const PGraph &Graph, const int NID, TIntSet &NBCmtyS)
Definition: agmattr.h:68
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
int Len() const
Definition: hash.h:842
bool Empty() const
Definition: hash.h:227
double LikelihoodAttr()
Definition: agmattr.h:371
Definition: ss.h:72
void Gen(const int &ExpectVals)
Definition: shash.h:1115
double LikelihoodForWK(const int K, const TFltV &WK)
Definition: agmattr.h:359
double GetAttr(const int &NID, const int &K)
Definition: agmattr.h:534
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
Definition: ss.cpp:447
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
void Load(TSIn &SIn, const int &RndSeed=0)
Definition: agmattr.h:294
TIntSet NIDToIdx
Definition: agmattr.h:254
void Save(TSOut &SOut) const
Saves the graph to a (binary) stream SOut.
Definition: graph.h:170
TIter EndI() const
Definition: hash.h:218
void GetW(TVec< TFltV > &_W)
Definition: agmattr.h:346
void Load(TSIn &SIn)
Definition: ds.h:946
int GetAttrs()
Definition: agmattr.h:321
double GetStepSizeByLineSearch(const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
Definition: agmattr.cpp:480
double PredictAttrK(const TIntFltH &FU, const TFltV &WK)
Definition: agmattr.h:589
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
double GetWeightAttr()
Definition: agmattr.h:319
double Likelihood(const bool DoParallel=false)
Definition: agmattr.cpp:137
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
const char * GetFld(const int &FldN) const
Returns the contents of the field at index FldN.
Definition: ss.h:129
double LikelihoodGraph()
Definition: agmattr.h:381
double LikelihoodAttrKForRow(const int UID, const int K)
Definition: agmattr.h:356
Definition: dt.h:1386
static void FilterLowEntropy(const THash< TInt, TIntV > &OldNIDAttrH, THash< TInt, TIntV > &NewNIDAttrH, const TIntStrH &OldNameH, TIntStrH &NewNameH, const double MinFrac=0.00001, const double MaxFrac=0.95, const int MinCnt=3)
Definition: agmattr.h:203
static void LoadNIDAttrHFromNIDKH(const TIntV &NIDV, const TStr &InFNm, THash< TInt, TIntV > &NIDAttrH)
Definition: agmattr.h:123
Definition: fl.h:58
TVec< TIntFltH > F
Definition: agmattr.h:250
void Save(TSOut &SOut) const
Definition: ds.h:954
TSsFmt
Spread-Sheet Separator Format.
Definition: ss.h:5
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH, const TStrHash< TInt > &FeatNameH, const TStrHash< TInt > &NodeNameH)
Definition: agmattr.h:127
void DisplayAttrs(const int TopK, const TStrHash< TInt > &NodeNameH)
Definition: agmattr.h:470
TFlt MaxValW
Definition: agmattr.h:264
TFlt MinValW
Definition: agmattr.h:263
const char * GetTmStr() const
Definition: tm.h:370
TInt Attrs
Definition: agmattr.h:252
void SetW(TVec< TFltV > &_W)
Definition: agmattr.h:347
void Gen(const int &ExpectVals)
Definition: hash.h:222
bool IsKey(const char *Key) const
Definition: hash.h:897
#define HasGraphFlag(TGraph, Flag)
For quick testing of the properties of the graph/network object (see TGraphFlag). ...
Definition: gbase.h:41
double GetComFromNID(const int &NID, const int &CID)
Definition: agmattr.h:322
void RandomInit(const int InitComs)
Definition: agmattr.cpp:9
void SetAttrHoldOut(const int NID, const int KID)
Definition: agmattr.h:337
double LikelihoodHoldOut()
Definition: agmattr.cpp:452
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:838
void GetCmtyVV(TVec< TIntV > &CmtyVV, TVec< TFltV > &Wck)
Definition: agmattr.h:442
const char * GetKey(const int &KeyId) const
Definition: hash.h:893
static double Round(const double &Val)
Definition: xmath.h:16
double Norm2(const TIntFltH &UV)
Definition: agmattr.h:616
void Load(TSIn &SIn)
Definition: dt.h:1152
TVec< TIntSet > X
Definition: agmattr.h:249
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const TStr PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.h:521
TFlt RegCoef
Definition: agmattr.h:255
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
TCesna()
Definition: agmattr.h:271
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:172
TVec< TIntSet > HOVIDSV
Definition: agmattr.h:258
int AddKey(const TKey &Key)
Definition: shash.h:1254
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
void AddCom(const int &NID, const int &CID, const double &Val)
Definition: agmattr.h:541
Tab separated.
Definition: ss.h:6
void GetCmtyVVUnSorted(TVec< TIntV > &CmtyVV)
Definition: agmattr.cpp:329
void InitW()
Definition: agmattr.h:331
double PredictAttrK(const TIntFltH &FU, const int K)
Definition: agmattr.h:597
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmattr.h:38
TCesna(const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH, const int &InitComs, const int RndSeed=0)
Definition: agmattr.h:272
double GetRegCoef()
Definition: agmattr.h:317
TRnd Rnd
Definition: agmattr.h:253
Definition: fl.h:128
void GetCmtyVV(TVec< TIntV > &CmtyVV)
Definition: agmattr.cpp:289
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH)
Definition: agmattr.h:165
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
Definition: agmattr.cpp:85
Definition: dt.h:1137
static PUNGraph Load(TSIn &SIn)
Static constructor that loads the graph from a stream SIn and returns a pointer to it...
Definition: graph.h:178
Definition: hash.h:781
directed graph (TNGraph, TNEGraph), else graph is undirected TUNGraph
Definition: gbase.h:13
void Save(TSOut &SOut)
Definition: agmattr.h:274
static void LoadNIDAttrHFromNIDKH(const TIntV &NIDV, const TStr &InFNm, THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &NodeNameH, const TSsFmt Sep=ssfTabSep)
Definition: agmattr.h:100
void NeighborComInit(const int InitComs)
Definition: agmattr.cpp:32
int Len() const
Definition: shash.h:1121
Definition: ds.h:32
double DotProduct(const int &UID, const int &VID)
Definition: agmattr.h:581
static double GetConductance(const PGraph &Graph, const TIntSet &CmtyS, const int Edges)
Definition: agmattr.h:10
TStr GetStr() const
Definition: dt.h:1363
double GetStepSizeByLineSearchForWK(const int K, const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
Definition: agmattr.h:486
Definition: dt.h:412
double LikelihoodForRow(const int UID)
Definition: agmattr.cpp:157
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
double GetLassoCoef()
Definition: agmattr.h:330
TInt NumComs
Definition: agmattr.h:257
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &FeatNameH)
Definition: agmattr.h:161
uint64 GetLineNo() const
Returns the line number of the current line.
Definition: ss.h:118
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &FeatNameH, const TStrHash< TInt > &NodeNameH)
Definition: agmattr.h:148
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH, const TStrHash< TInt > &FeatNameH)
Definition: agmattr.h:140
static int Sign(const T &Val)
Definition: xmath.h:29
void Load(TSIn &SIn)
Definition: dt.h:1405
void GradientForRow(const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
Definition: agmattr.cpp:213
void SetLassoCoef(const double _LassoCoef)
Definition: agmattr.h:320
void SetGraph(const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH)
Definition: agmattr.cpp:99
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361
void Load(TSIn &SIn)
Definition: shash.h:1078
bool Next()
Loads next line from the input file.
Definition: ss.cpp:412
void GradientForWK(TFltV &GradV, const int K)
Definition: agmattr.h:421
static double DotProduct(const TFltV &x, const TFltV &y)
Definition: linalg.cpp:165
void Save(TSOut &SOut) const
Definition: shash.h:1082
void GetCmtyVV(TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
Definition: agmattr.h:438
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
double Prediction(const int &UID, const int &VID)
Definition: agmattr.h:606
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
static int GetAttrs(const THash< TInt, TIntV > &NIDAttrH)
Definition: agmattr.h:169
int FindComs(TIntV &ComsV, const bool UseBIC=false, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.cpp:361
double LikelihoodAttrKForRow(const int UID, const int K, const TIntFltH &FU)
Definition: agmattr.h:357
char * CStr()
Definition: dt.h:479
bool IsKey(const TKey &Key) const
Definition: hash.h:258
int GetPositiveW()
Definition: agmattr.h:510
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
void SetHoldOut(const double HOFrac)
Definition: agmattr.h:414
Definition: dt.h:974
int Len() const
Definition: hash.h:228
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void DelCom(const int &NID, const int &CID)
Definition: agmattr.h:549
void SetAttrHoldOutForOneNode(const int NID)
Definition: agmattr.h:341
TFlt WeightAttr
Definition: agmattr.h:267
double GetW(const int CID, const int K)
Definition: agmattr.h:603
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH)
Definition: agmattr.h:144
void Save(TSOut &SOut) const
Definition: dt.h:1402
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
static void FilterLowEntropy(const THash< TInt, TIntV > &OldNIDAttrH, THash< TInt, TIntV > &NewNIDAttrH, const double MinFrac=0.00001, const double MaxFrac=0.95, const int MinCnt=3)
Definition: agmattr.h:241
int GetKeyId(const char *Key) const
Definition: hash.h:994
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
bool IsKeyId(const int &KeyId) const
Definition: hash.h:904
void SortByDat(const bool &Asc=true)
Definition: hash.h:292