SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
table.h
Go to the documentation of this file.
1 #ifndef TABLE_H
2 #define TABLE_H
3 
5 typedef enum {NOT, AND, OR, NOP} TPredOp;
7 typedef enum {LT = 0, LTE, EQ, NEQ, GTE, GT, SUBSTR, SUPERSTR} TPredComp;
8 
9 class TAtomicPredicate;
10 class TPredicateNode;
11 class TPredicate;
12 
13 //#//////////////////////////////////////////////
16  private:
25  // OP RS: 2014/03/25, NonAtom does not work with Snap.py
26  //protected:
27  //static const TAtomicPredicate NonAtom;
28  public:
31  Compare(EQ), Lvar(""), Rvar(""),
32  IntConst(0), FltConst(0), StrConst("") {}
33  //TAtomicPredicate() : Type(NonAtom.Type), IsConst(NonAtom.IsConst),
34  // Compare(NonAtom.Compare), Lvar(NonAtom.Lvar), Rvar(NonAtom.Rvar),
35  // IntConst(NonAtom.IntConst), FltConst(NonAtom.FltConst), StrConst(NonAtom.StrConst) {}
38  TInt ICnst, TFlt FCnst, TStr SCnst) : Type(Typ), IsConst(IsCnst),
39  Compare(Cmp), Lvar(L), Rvar(R), IntConst(ICnst), FltConst(FCnst),
40  StrConst(SCnst) {}
43  Type(Typ), IsConst(IsCnst), Compare(Cmp), Lvar(L), Rvar(R), IntConst(0),
44  FltConst(0), StrConst("") {}
45  friend class TPredicate;
46  friend class TPredicateNode;
47 };
48 
49 //#//////////////////////////////////////////////
52  public:
59  TPredicateNode(): Op(NOP), Result(false), Atom(), Parent(NULL), Left(NULL),
61  Right(NULL) {}
63  TPredicateNode(const TAtomicPredicate& A): Op(NOP), Result(false), Atom(A),
64  Parent(NULL), Left(NULL), Right(NULL) {}
66  TPredicateNode(TPredOp Opr): Op(Opr), Result(false), Atom(), Parent(NULL),
67  Left(NULL), Right(NULL) {}
70  Parent(P.Parent), Left(P.Left), Right(P.Right) {}
72  void AddLeftChild(TPredicateNode* Child) { Left = Child; Child->Parent = this; }
74  void AddRightChild(TPredicateNode* Child) { Right = Child; Child->Parent = this; }
76  void GetVariables(TStrV& Variables);
77  friend class TPredicate;
78 };
79 
80 //#//////////////////////////////////////////////
82 class TPredicate {
83  protected:
88  public:
94  TPredicate(const TPredicate& Pred) : IntVars(Pred.IntVars), FltVars(Pred.FltVars), StrVars(Pred.StrVars), Root(Pred.Root) {}
96  void GetVariables(TStrV& Variables);
98  void SetIntVal(TStr VarName, TInt VarVal) { IntVars.AddDat(VarName, VarVal); }
100  void SetFltVal(TStr VarName, TFlt VarVal) { FltVars.AddDat(VarName, VarVal); }
102  void SetStrVal(TStr VarName, TStr VarVal) { StrVars.AddDat(VarName, VarVal); }
104  TBool Eval();
107 
109  template <class T>
110  static TBool EvalAtom(T Val1, T Val2, TPredComp Cmp) {
111  switch (Cmp) {
112  case LT: return Val1 < Val2;
113  case LTE: return Val1 <= Val2;
114  case EQ: return Val1 == Val2;
115  case NEQ: return Val1 != Val2;
116  case GTE: return Val1 >= Val2;
117  case GT: return Val1 > Val2;
118  default: return false;
119  }
120  };
121 
123  static TBool EvalStrAtom(const TStr& Val1, const TStr& Val2, TPredComp Cmp) {
124  switch (Cmp) {
125  case LT: return Val1 < Val2;
126  case LTE: return Val1 <= Val2;
127  case EQ: return Val1 == Val2;
128  case NEQ: return Val1 != Val2;
129  case GTE: return Val1 >= Val2;
130  case GT: return Val1 > Val2;
131  case SUBSTR: return Val2.IsStrIn(Val1);
132  case SUPERSTR: return Val1.IsStrIn(Val2);
133  default: return false;
134  }
135  }
136 };
137 
138 //#//////////////////////////////////////////////
140 class TTable;
142 typedef TPt<TTable> PTable;
143 
146 
148 // Haversine distance is used to calculate distance between two points on a sphere based on latitude and longitude
150 
151 #if 0
152 // TMetric and TEuclideanMetric are currently not used, kept for future use
153 //#//////////////////////////////////////////////
155 class TMetric {
156 protected:
157  TStr MetricName;
158 public:
159  TMetric(TStr Name) : MetricName(Name) {}
161  TStr GetName();
163  virtual TFlt NumDist(TFlt,TFlt) { return -1; }
165  virtual TFlt StrDist(TStr,TStr) { return -1; }
166 };
167 
168 //#//////////////////////////////////////////////
170 class TEuclideanMetric: public TMetric {
171 public:
172  TEuclideanMetric(TStr Name) : TMetric(Name) {}
174  TFlt NumDist(TFlt x1,TFlt x2) { return fabs(x1-x2); }
175 };
176 #endif
177 
178 //TODO: move to separate file (map.h / file with PR and HITS)
179 namespace TSnap {
180 
182  template <class PGraph>
183  void MapPageRank(const TVec<PGraph>& GraphSeq, TVec<PTable>& TableSeq,
184  TTableContext* Context, const double& C, const double& Eps, const int& MaxIter);
185 
187  template <class PGraph>
188  void MapHits(const TVec<PGraph>& GraphSeq, TVec<PTable>& TableSeq,
189  TTableContext* Context, const int& MaxIter);
190 }
191 
192 //#//////////////////////////////////////////////
195 protected:
197  friend class TTable;
198 public:
204  void Load(TSIn& SIn) { StringVals.Load(SIn); }
206  void Save(TSOut& SOut) { StringVals.Save(SOut); }
208  TInt AddStr(const TStr& Key) {
209  TInt KeyId = TInt(StringVals.AddKey(Key));
210  return(KeyId);
211  }
213  TStr GetStr(const TInt& KeyId) const {
214  return StringVals.GetKey(KeyId);
215  }
216 };
217 
218 //#//////////////////////////////////////////////
220 class TPrimitive {
221 private:
226 
227 public:
228  TPrimitive() : IntVal(-1), FltVal(-1), StrVal(""), AttrType(atInt) {}
229  TPrimitive(const TInt& Val) : IntVal(Val), FltVal(-1), StrVal(""), AttrType(atInt) {}
230  TPrimitive(const TFlt& Val) : IntVal(-1), FltVal(Val), StrVal(""), AttrType(atFlt) {}
231  TPrimitive(const TStr& Val) : IntVal(-1), FltVal(-1), StrVal(Val.CStr()), AttrType(atStr) {}
232  TPrimitive(const TPrimitive& Prim) : IntVal(Prim.IntVal), FltVal(Prim.FltVal),
233  StrVal(Prim.StrVal.CStr()), AttrType(Prim.AttrType) {}
234 
235  TInt GetInt() const { return IntVal; }
236  TFlt GetFlt() const { return FltVal; }
237  TStr GetStr() const { return StrVal; }
238  TAttrType GetType() const { return AttrType; }
239 };
240 
241 //#//////////////////////////////////////////////
243 class TTableRow {
244 protected:
248 public:
252  void AddInt(const TInt& Val) { IntVals.Add(Val); }
254  void AddFlt(const TFlt& Val) { FltVals.Add(Val); }
256  void AddStr(const TStr& Val) { StrVals.Add(Val); }
258  TIntV GetIntVals() const { return IntVals; }
260  TFltV GetFltVals() const { return FltVals; }
262  TStrV GetStrVals() const { return StrVals; }
263 };
264 
269 
272 
273 //#//////////////////////////////////////////////
275 class GroupStmt{
276 protected:
281 public:
283  GroupStmt(const TStrV& Attrs): GroupByAttrs(Attrs), Ordered(true), UsePhysicalRowIds(true), Valid(true){}
284  GroupStmt(const TStrV& Attrs, TBool ordered, TBool physical): GroupByAttrs(Attrs), Ordered(ordered), UsePhysicalRowIds(physical), Valid(true){}
287  TBool operator ==(const GroupStmt& stmt) const{
288  if(stmt.Ordered != Ordered || stmt.UsePhysicalRowIds != UsePhysicalRowIds){ return false;}
289  if(stmt.GroupByAttrs.Len() != GroupByAttrs.Len()){ return false;}
290  for(int i = 0; i < GroupByAttrs.Len(); i++){
291  if(stmt.GroupByAttrs[i] != GroupByAttrs[i]){ return false;}
292  }
293  return true;
294  }
295  TBool IsValid(){ return Valid;}
296  void Invalidate(){ Valid = false;}
297  TBool IncludesAttr(const TStr& Attr){
298  for(int i = 0; i < GroupByAttrs.Len(); i++){
299  if(GroupByAttrs[i] == Attr){ return true;}
300  }
301  return false;
302  }
303  TSize GetMemUsed() const{
304  TSize sz = 3 * sizeof(TBool);
305  sz += GroupByAttrs.GetMemUsed();
306  for(int i = 0; i < GroupByAttrs.Len(); i++){
307  sz += GroupByAttrs[i].GetMemUsed();
308  }
309  return sz;
310  }
311 
312  int GetPrimHashCd() const{
313  int hc1 = GroupByAttrs.GetPrimHashCd();
315  int hc2 = flags.GetPrimHashCd();
316  return TPairHashImpl::GetHashCd(hc1, hc2);
317  }
318 
319  int GetSecHashCd() const{
320  int hc1 = GroupByAttrs.GetSecHashCd();
322  int hc2 = flags.GetSecHashCd();
323  return TPairHashImpl::GetHashCd(hc1, hc2);
324  }
325 
326  void Print(){
327  for(int i = 0; i < GroupByAttrs.Len(); i++){
328  printf("%s ", GroupByAttrs[i].CStr());
329  }
330  printf("Ordered: %d, UsePhysicalRows: %d, Valid: %d\n", Ordered.Val, UsePhysicalRowIds.Val, Valid.Val);
331  }
332 };
333 
334 //#//////////////////////////////////////////////
336 
341  const TTable* Table;
342 public:
344  TRowIterator(): CurrRowIdx(0), Table(NULL) {}
346  TRowIterator(TInt RowIdx, const TTable* TablePtr): CurrRowIdx(RowIdx), Table(TablePtr) {}
350  TRowIterator& operator++(int);
352  TRowIterator& Next();
354  bool operator < (const TRowIterator& RowI) const;
356  bool operator == (const TRowIterator& RowI) const;
358  TInt GetRowIdx() const;
360  TInt GetIntAttr(TInt ColIdx) const;
362  TFlt GetFltAttr(TInt ColIdx) const;
364  TStr GetStrAttr(TInt ColIdx) const;
366  TInt GetStrMapById(TInt ColIdx) const;
368  TInt GetIntAttr(const TStr& Col) const;
370  TFlt GetFltAttr(const TStr& Col) const;
372  TStr GetStrAttr(const TStr& Col) const;
374  TInt GetStrMapByName(const TStr& Col) const;
376  TBool CompareAtomicConst(TInt ColIdx, const TPrimitive& Val, TPredComp Cmp);
378  TBool CompareAtomicConstTStr(TInt ColIdx, const TStr& Val, TPredComp Cmp);
379 };
380 
381 //#//////////////////////////////////////////////
387 public:
391  TRowIteratorWithRemove(TInt RowIdx, TTable* TablePtr);
393  TRowIteratorWithRemove(TInt RowIdx, TTable* TablePtr, TBool IsStart) : CurrRowIdx(RowIdx),
394  Table(TablePtr), Start(IsStart) {}
397  Table(RowI.Table), Start(RowI.Start) {}
403  bool operator < (const TRowIteratorWithRemove& RowI) const;
405  bool operator == (const TRowIteratorWithRemove& RowI) const;
407  TInt GetRowIdx() const;
409  TInt GetNextRowIdx() const;
411  TInt GetNextIntAttr(TInt ColIdx) const;
413  TFlt GetNextFltAttr(TInt ColIdx) const;
415  TStr GetNextStrAttr(TInt ColIdx) const;
417  TInt GetNextIntAttr(const TStr& Col) const;
419  TFlt GetNextFltAttr(const TStr& Col) const;
421  TStr GetNextStrAttr(const TStr& Col) const;
423  TBool IsFirst() const;
425  void RemoveNext();
427  TBool CompareAtomicConst(TInt ColIdx, const TPrimitive& Val, TPredComp Cmp);
428 };
429 
430 //#//////////////////////////////////////////////
435 public:
437  TTableIterator(TVec<PTable>& PTableV): PTableV(PTableV), CurrTableIdx(0) {}
439  PTable Next() { return PTableV[CurrTableIdx++]; }
441  bool HasNext() { return CurrTableIdx < PTableV.Len(); }
442 };
443 
445 namespace TSnap{
447  template<class PGraph> PGraph ToGraph(PTable Table,
448  const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
450  template<class PGraph> PGraph ToNetwork(PTable Table,
451  const TStr& SrcCol, const TStr& DstCol,
452  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs,
453  TAttrAggr AggrPolicy);
455  template<class PGraph> PGraph ToNetwork(PTable Table,
456  const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
457 
458  template<class PGraph> PGraph ToNetwork(PTable Table,
459  const TStr& SrcCol, const TStr& DstCol,
460  TStrV& EdgeAttrV,
461  TAttrAggr AggrPolicy);
462 
463  template<class PGraph> PGraph ToNetwork(PTable Table,
464  const TStr& SrcCol, const TStr& DstCol,
465  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV,
466  TAttrAggr AggrPolicy);
467  int LoadCrossNet(TCrossNet& Graph, PTable Table, const TStr& SrcCol, const TStr& DstCol,
468  TStrV& EdgeAttrV);
469 
470  int LoadMode(TModeNet& Graph, PTable Table, const TStr& NCol,
471  TStrV& NodeAttrV);
472 
473 #ifdef GCC_ATOMIC
474  template<class PGraphMP> PGraphMP ToGraphMP(PTable Table,
475  const TStr& SrcCol, const TStr& DstCol);
476  template<class PGraphMP> PGraphMP ToGraphMP3(PTable Table,
477  const TStr& SrcCol, const TStr& DstCol);
478  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
479  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
480  template<class PGraphMP> PGraphMP ToNetworkMP2(PTable Table, const TStr& SrcCol, const TStr& DstCol,
481  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
482  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
483  TStrV& EdgeAttrV, TAttrAggr AggrPolicy);
484  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
485  TAttrAggr AggrPolicy);
486  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
487  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV, TAttrAggr AggrPolicy);
488 
489 
490 #endif // GCC_ATOMIC
491 }
492 
493 //#//////////////////////////////////////////////
495 class TTable {
496 protected:
497  static const TInt Last;
498  static const TInt Invalid;
499 
500  static TInt UseMP;
501 public:
502  template<class PGraph> friend PGraph TSnap::ToGraph(PTable Table,
503  const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
504  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
505  const TStr& SrcCol, const TStr& DstCol,
506  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs,
508  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
509  const TStr& SrcCol, const TStr& DstCol,
510  TStrV& EdgeAttrV,
512  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
513  const TStr& SrcCol, const TStr& DstCol,
515  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
516  const TStr& SrcCol, const TStr& DstCol,
517  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV,
519  friend int TSnap::LoadCrossNet(TCrossNet& Graph, PTable Table, const TStr& SrcCol, const TStr& DstCol,
520  TStrV& EdgeAttrV);
521  friend int TSnap::LoadMode(TModeNet& Graph, PTable Table, const TStr& NCol,
522  TStrV& NodeAttrV);
523 
524 #ifdef GCC_ATOMIC
525  template<class PGraphMP> friend PGraphMP TSnap::ToGraphMP(PTable Table, const TStr& SrcCol, const TStr& DstCol);
526  template<class PGraphMP> friend PGraphMP TSnap::ToGraphMP3(PTable Table, const TStr& SrcCol, const TStr& DstCol);
527  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol, TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
528  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP2(PTable Table, const TStr& SrcCol, const TStr& DstCol, TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
529  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol, TStrV& EdgeAttrV, TAttrAggr AggrPolicy);
530  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
531  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
532  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV, TAttrAggr AggrPolicy);
533 
534 #endif // GCC_ATOMIC
535 
536  static void SetMP(TInt Value) { UseMP = Value; }
537  static TInt GetMP() { return UseMP; }
538 
540  static TStr NormalizeColName(const TStr& ColName) {
541  TStr Result = ColName;
542  int RLen = Result.Len();
543  if (RLen == 0) { return Result; }
544  if (Result.GetCh(0) == '_') { return Result; }
545  if (RLen >= 2 && Result.GetCh(RLen-2) == '-') { return Result; }
546  return Result + "-1";
547  }
549  static TStrV NormalizeColNameV(const TStrV& Cols) {
550  TStrV NCols;
551  for (TInt i = 0; i < Cols.Len(); i++) { NCols.Add(NormalizeColName(Cols[i])); }
552  return NCols;
553  }
554 protected:
556 
566 
571 
577 
581 
582  // Group mapping data structures.
584 
588 
592 
595  void InvalidatePhysicalGroupings(); // to be called when rows are added / physically removed
596  void InvalidateAffectedGroupings(const TStr& Attr); // to be called when attributes are removed (projected) or values updated in-place
597 
598  // Fields to be used when constructing a graph.
605 
612 
614 
615 /***** Utility functions *****/
616 public:
618  void AddIntCol(const TStr& ColName);
620  void AddFltCol(const TStr& ColName);
622  void AddStrCol(const TStr& ColName);
623 protected:
625  void IncrementNext();
627  void ClassifyAux(const TIntV& SelectedRows, const TStr& LabelName,
628  const TInt& PositiveLabel = 1, const TInt& NegativeLabel= 0);
629 
630 /***** Utility functions for handling string values *****/
632  const char* GetContextKey(TInt Val) const {
633  return Context->StringVals.GetKey(Val);
634  }
636  TStr GetStrVal(TInt ColIdx, TInt RowIdx) const {
637  return TStr(Context->StringVals.GetKey(StrColMaps[ColIdx][RowIdx]));
638  }
640  void AddStrVal(const TInt& ColIdx, const TStr& Val);
642  void AddStrVal(const TStr& Col, const TStr& Val);
643 
644 /***** Utility functions for handling Schema *****/
646  TStr GetIdColName() const { return IdColName; }
648  TStr GetSchemaColName(TInt Idx) const { return Sch[Idx].Val1; }
650  TAttrType GetSchemaColType(TInt Idx) const { return Sch[Idx].Val2; }
652  void AddSchemaCol(const TStr& ColName, TAttrType ColType) {
653  TStr NColName = NormalizeColName(ColName);
654  Sch.Add(TPair<TStr,TAttrType>(NColName, ColType));
655  }
656  TBool IsColName(const TStr& ColName) const {
657  TStr NColName = NormalizeColName(ColName);
658  return ColTypeMap.IsKey(NColName);
659  }
661  void AddColType(const TStr& ColName, TPair<TAttrType,TInt> ColType) {
662  TStr NColName = NormalizeColName(ColName);
663  ColTypeMap.AddDat(NColName, ColType);
664  }
666  void AddColType(const TStr& ColName, TAttrType ColType, TInt Index) {
667  TStr NColName = NormalizeColName(ColName);
668  AddColType(NColName, TPair<TAttrType,TInt>(ColType, Index));
669  }
671  void DelColType(const TStr& ColName) {
672  TStr NColName = NormalizeColName(ColName);
673  ColTypeMap.DelKey(NColName);
674  }
676  TPair<TAttrType, TInt> GetColTypeMap(const TStr& ColName) const {
677  TStr NColName = NormalizeColName(ColName);
678  return ColTypeMap.GetDat(NColName);
679  }
681  TStr RenumberColName(const TStr& ColName) const;
683  TStr DenormalizeColName(const TStr& ColName) const;
685  Schema DenormalizeSchema() const;
687  TBool IsAttr(const TStr& Attr);
688 
689 /***** Utility functions for adding rows and tables to TTable *****/
691  void AddTable(const TTable& T);
693  void ConcatTable(const PTable& T) {AddTable(*T); Reindex(); }
694 
696  void AddRow(const TRowIterator& RI);
698  void AddRow(const TIntV& IntVals, const TFltV& FltVals, const TStrV& StrVals);
699 
700 /***** Utility functions for building graph from TTable *****/
702  void AddGraphAttribute(const TStr& Attr, TBool IsEdge, TBool IsSrc, TBool IsDst);
704  void AddGraphAttributeV(TStrV& Attrs, TBool IsEdge, TBool IsSrc, TBool IsDst);
706  void CheckAndAddIntNode(PNEANet Graph, THashSet<TInt>& NodeVals, TInt NodeId);
708  template<class T> TInt CheckAndAddFltNode(T Graph, THash<TFlt, TInt>& NodeVals, TFlt FNodeVal);
710  void AddEdgeAttributes(PNEANet& Graph, int RowId);
712  void AddNodeAttributes(TInt NId, TStrV NodeAttrV, TInt RowId,
713  THash<TInt, TStrIntVH>& NodeIntAttrs, THash<TInt, TStrFltVH>& NodeFltAttrs,
714  THash<TInt, TStrStrVH>& NodeStrAttrs);
716  PNEANet BuildGraph(const TIntV& RowIds, TAttrAggr AggrPolicy);
718  void InitRowIdBuckets(int NumBuckets);
720 
723  void FillBucketsByWindow(TStr SplitAttr, TInt JumpSize, TInt WindowSize,
724  TInt StartVal, TInt EndVal);
726 
729  void FillBucketsByInterval(TStr SplitAttr, TIntPrV SplitIntervals);
731 
735 
740 
744 
746 
749  template <class T> T AggregateVector(TVec<T>& V, TAttrAggr Policy);
750 
751  /***** Grouping Utility functions *************/
753  void GroupingSanityCheck(const TStr& GroupBy, const TAttrType& AttrType) const;
755 
759  template <class T> void GroupByIntCol(const TStr& GroupBy, T& Grouping,
760  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds = true) const;
761 #ifdef GCC_ATOMIC
762  public: //Should be protected - this is for debug only
764  void GroupByIntColMP(const TStr& GroupBy, THashMP<TInt, TIntV>& Grouping, TBool UsePhysicalIds = true) const;
765 #endif // GCC_ATOMIC
766  protected:
768  template <class T> void GroupByFltCol(const TStr& GroupBy, T& Grouping,
769  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds = true) const;
771  template <class T> void GroupByStrCol(const TStr& GroupBy, T& Grouping,
772  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds = true) const;
774  template <class T> void UpdateGrouping(THash<T,TIntV>& Grouping, T Key, TInt Val) const;
775 #ifdef GCC_ATOMIC
776  template <class T> void UpdateGrouping(THashMP<T,TIntV>& Grouping, T Key, TInt Val) const;
778 #endif // GCC_ATOMIC
779  void PrintGrouping(const THash<TGroupKey, TIntV>& Grouping) const;
780 
781  /***** Utility functions for sorting by columns *****/
783  inline TInt CompareRows(TInt R1, TInt R2, const TAttrType& CompareByType,
784  const TInt& CompareByIndex, TBool Asc = true);
786  inline TInt CompareRows(TInt R1, TInt R2, const TVec<TAttrType>& CompareByTypes,
787  const TIntV& CompareByIndices, TBool Asc = true);
789  TInt GetPivot(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
790  const TIntV& SortByIndices, TBool Asc);
792  TInt Partition(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
793  const TIntV& SortByIndices, TBool Asc);
795  void ISort(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
796  const TIntV& SortByIndices, TBool Asc = true);
798  void QSort(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
799  const TIntV& SortByIndices, TBool Asc = true);
801  void Merge(TIntV& V, TInt Idx1, TInt Idx2, TInt Idx3, const TVec<TAttrType>& SortByTypes,
802  const TIntV& SortByIndices, TBool Asc = true);
803 #ifdef USE_OPENMP
804  void QSortPar(TIntV& V, const TVec<TAttrType>& SortByTypes, const TIntV& SortByIndices,
806  TBool Asc = true);
807 #endif // USE_OPENMP
808 
809 /***** Utility functions for removing rows (not through iterator) *****/
811  bool IsRowValid(TInt RowIdx) const{ return Next[RowIdx] != Invalid;}
815  void RemoveFirstRow();
817  void RemoveRow(TInt RowIdx, TInt PrevRowIdx);
819  void KeepSortedRows(const TIntV& KeepV);
822  for (int i = 0; i < Next.Len(); i++) {
823  if(Next[i] != TTable::Invalid) { FirstValidRow = i; return;}
824  }
825  TExcept::Throw("SetFirstValidRow: Table is empty");
826  }
827 
828 /***** Utility functions for Join *****/
830  PTable InitializeJointTable(const TTable& Table);
832  void AddJointRow(const TTable& T1, const TTable& T2, TInt RowIdx1, TInt RowIdx2);
833 /***** Utility functions for Threshold Join *****/
834  void ThresholdJoinInputCorrectness(const TStr& KeyCol1, const TStr& JoinCol1, const TTable& Table,
835  const TStr& KeyCol2, const TStr& JoinCol2);
836  void ThresholdJoinCountCollisions(const TTable& TB, const TTable& TS,
837  const TIntIntVH& T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS,
838  THash<TIntPr,TIntTr>& Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType);
839  PTable ThresholdJoinOutputTable(const THash<TIntPr,TIntTr>& Counters, TInt Threshold, const TTable& Table);
840  void ThresholdJoinCountPerJoinKeyCollisions(const TTable& TB, const TTable& TS,
841  const TIntIntVH& T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS,
842  THash<TIntTr,TIntTr>& Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType);
843  PTable ThresholdJoinPerJoinKeyOutputTable(const THash<TIntTr,TIntTr>& Counters, TInt Threshold, const TTable& Table);
844 
846  void ResizeTable(int RowCount);
848  int GetEmptyRowsStart(int NewRows);
850  void AddSelectedRows(const TTable& Table, const TIntV& RowIDs);
852  void AddNRows(int NewRows, const TVec<TIntV>& IntColsP, const TVec<TFltV>& FltColsP,
853  const TVec<TIntV>& StrColMapsP);
854 #ifdef USE_OPENMP
855  void AddNJointRowsMP(const TTable& T1, const TTable& T2, const TVec<TIntPrV>& JointRowIDSet);
857 #endif // USE_OPENMP
858  void UpdateTableForNewRow();
860 
861 #ifdef GCC_ATOMIC
862  static void LoadSSPar(PTable& NewTable, const Schema& S, const TStr& InFNm, const TIntV& RelevantCols, const char& Separator, TBool HasTitleLine);
864 #endif // GCC_ATOMIC
865  static void LoadSSSeq(PTable& NewTable, const Schema& S, const TStr& InFNm, const TIntV& RelevantCols, const char& Separator, TBool HasTitleLine);
867 
868 /***** Utility functions for Group *****/
870 
873  void GroupAux(const TStrV& GroupBy, THash<TGroupKey, TPair<TInt, TIntV> >& Grouping,
874  TBool Ordered, const TStr& GroupColName, TBool KeepUnique, TIntV& UniqueVec, TBool UsePhysicalIds = true);
875 #ifdef USE_OPENMP
876  //void GroupAuxMP(const TStrV& GroupBy, THashGenericMP<TGroupKey, TPair<TInt, TIntV> >& Grouping,
878  // TBool Ordered, const TStr& GroupColName, TBool KeepUnique, TIntV& UniqueVec, TBool UsePhysicalIds = false);
879 #endif // USE_OPENMP
880  void StoreGroupCol(const TStr& GroupColName, const TVec<TPair<TInt, TInt> >& GroupAndRowIds);
884  //template<class T> void RegisterGrouping(const T& Grouping, const TStr& GroupByCol, TBool UsePhysicalRows);
885 
887  void Reindex();
889  void AddIdColumn(const TStr& IdColName);
890 
891  static TInt CompareKeyVal(const TInt& K1, const TInt& V1, const TInt& K2, const TInt& V2);
892  static TInt CheckSortedKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
893  static void ISortKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
894  static TInt GetPivotKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
895  static TInt PartitionKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
896  static void QSortKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
897 
899  void GetCollidingRows(const TTable& T, THashSet<TInt>& Collisions);
900 
901 public:
902 /***** Constructors *****/
903  TTable();
905  TTable(const Schema& S, TTableContext* Context);
907 
909  TTable(const THash<TInt,TInt>& H, const TStr& Col1, const TStr& Col2,
910  TTableContext* Context, const TBool IsStrKeys = false);
912  TTable(const THash<TInt,TFlt>& H, const TStr& Col1, const TStr& Col2,
913  TTableContext* Context, const TBool IsStrKeys = false);
914  // TTable(const TStr& TableName, const THash<TInt,TStr>& H, const TStr& Col1,
915  // const TStr& Col2, TTableContext* Context);
916 
918  TTable(const TTable& Table): Context(Table.Context), Sch(Table.Sch),
920  LastValidRow(Table.LastValidRow), Next(Table.Next), IntCols(Table.IntCols),
921  FltCols(Table.FltCols), StrColMaps(Table.StrColMaps), ColTypeMap(Table.ColTypeMap),
924  SrcCol(Table.SrcCol), DstCol(Table.DstCol),
927  IsNextDirty(Table.IsNextDirty) {}
928 
929  TTable(const TTable& Table, const TIntV& RowIds);
930 
931  static PTable New() { return new TTable(); }
932  static PTable New(TTableContext* Context) { return new TTable(Context); }
933  static PTable New(const Schema& S, TTableContext* Context) {
934  return new TTable(S, Context);
935  }
937  static PTable New(const THash<TInt,TInt>& H, const TStr& Col1,
938  const TStr& Col2, TTableContext* Context, const TBool IsStrKeys = false) {
939  return new TTable(H, Col1, Col2, Context, IsStrKeys);
940  }
942  static PTable New(const THash<TInt,TFlt>& H, const TStr& Col1,
943  const TStr& Col2, TTableContext* Context, const TBool IsStrKeys = false) {
944  return new TTable(H, Col1, Col2, Context, IsStrKeys);
945  }
947  static PTable New(const PTable Table) { return new TTable(*Table); }
949  // static PTable New(const PTable Table, const TStr& TableName) {
950  // PTable T = New(Table); T->Name = TableName;
951  // return T;
952  // }
954  static void GetSchema(const TStr& InFNm, Schema& S, const char& Separator = '\t');
955 /***** Save / Load functions *****/
957  static PTable LoadSS(const Schema& S, const TStr& InFNm, TTableContext* Context,
958  const char& Separator = '\t', TBool HasTitleLine = false);
960  static PTable LoadSS(const Schema& S, const TStr& InFNm, TTableContext* Context,
961  const TIntV& RelevantCols, const char& Separator = '\t', TBool HasTitleLine = false);
963  void SaveSS(const TStr& OutFNm);
965  void SaveBin(const TStr& OutFNm);
967 
970  static PTable Load(TSIn& SIn, TTableContext* Context){ return new TTable(SIn, Context);}
972 
974  void Save(TSOut& SOut);
976  void Dump(FILE *OutF=stdout) const;
977 
979  static PTable TableFromHashMap(const THash<TInt,TInt>& H, const TStr& Col1, const TStr& Col2,
980  TTableContext* Context, const TBool IsStrKeys = false) {
981  PTable T = New(H, Col1, Col2, Context, IsStrKeys);
982  T->InitIds();
983  return T;
984  }
986  static PTable TableFromHashMap(const THash<TInt,TFlt>& H, const TStr& Col1, const TStr& Col2,
987  TTableContext* Context, const TBool IsStrKeys = false) {
988  PTable T = New(H, Col1, Col2, Context, IsStrKeys);
989  T->InitIds();
990  return T;
991  }
993  void AddRow(const TTableRow& Row) { AddRow(Row.GetIntVals(), Row.GetFltVals(), Row.GetStrVals()); };
994 
997  return Context;
998  }
1001 
1002 /***** Value Getters - getValue(column name, physical row Idx) *****/
1004  TInt GetColIdx(const TStr& ColName) const {
1005  TStr NColName = NormalizeColName(ColName);
1006  return ColTypeMap.IsKey(NColName) ? ColTypeMap.GetDat(NColName).Val2 : TInt(-1);
1007  }
1008 
1009  // No type checking. Assuming ColName actually refers to the right type.
1011  TInt GetIntVal(const TStr& ColName, const TInt& RowIdx) {
1012  return IntCols[GetColIdx(ColName)][RowIdx];
1013  }
1015  TFlt GetFltVal(const TStr& ColName, const TInt& RowIdx) {
1016  return FltCols[GetColIdx(ColName)][RowIdx];
1017  }
1019  TStr GetStrVal(const TStr& ColName, const TInt& RowIdx) const {
1020  return GetStrVal(GetColIdx(ColName), RowIdx);
1021  }
1022 
1024  TInt GetStrMapById(TInt ColIdx, TInt RowIdx) const {
1025  return StrColMaps[ColIdx][RowIdx];
1026  }
1027 
1029  TInt GetStrMapByName(const TStr& ColName, TInt RowIdx) const {
1030  return StrColMaps[GetColIdx(ColName)][RowIdx];
1031  }
1032 
1034  TStr GetStrValById(TInt ColIdx, TInt RowIdx) const {
1035  return GetStrVal(ColIdx, RowIdx);
1036  }
1037 
1039  TStr GetStrValByName(const TStr& ColName, const TInt& RowIdx) const {
1040  return GetStrVal(ColName, RowIdx);
1041  }
1042 
1044 
1050  TIntV GetIntRowIdxByVal(const TStr& ColName, const TInt& Val) const;
1052 
1058  TIntV GetStrRowIdxByMap(const TStr& ColName, const TInt& Map) const;
1060 
1066  TIntV GetFltRowIdxByVal(const TStr& ColName, const TFlt& Val) const;
1067 
1069 
1077  TInt RequestIndexInt(const TStr& ColName);
1079 
1087  TInt RequestIndexFlt(const TStr& ColName);
1089 
1097  TInt RequestIndexStrMap(const TStr& ColName);
1098 
1100  TStr GetStr(const TInt& KeyId) const {
1101  return Context->StringVals.GetKey(KeyId);
1102  }
1103 
1104 /***** Value Getters - getValue(col idx, row Idx) *****/
1105  // No type and bound checking
1107  TInt GetIntValAtRowIdx(const TInt& ColIdx, const TInt& RowIdx) {
1108  return IntCols[ColIdx][RowIdx];
1109  }
1111  TFlt GetFltValAtRowIdx(const TInt& ColIdx, const TInt& RowIdx) {
1112  return FltCols[ColIdx][RowIdx];
1113  }
1114 
1117 
1118 /***** Graph handling *****/
1121  TInt WindowSize, TInt JumpSize, TInt StartVal = TInt::Mn, TInt EndVal = TInt::Mx);
1123  TVec<PNEANet> ToVarGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals);
1126 
1128 
1132  TInt WindowSize, TInt JumpSize, TInt StartVal = TInt::Mn, TInt EndVal = TInt::Mx);
1134 
1137  PNEANet ToVarGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals);
1139 
1147 
1149  TStr GetSrcCol() const { return SrcCol; }
1151  void SetSrcCol(const TStr& Src) {
1152  if (!IsColName(Src)) { TExcept::Throw(Src + ": no such column"); }
1153  SrcCol = NormalizeColName(Src);
1154  }
1156  TStr GetDstCol() const { return DstCol; }
1158  void SetDstCol(const TStr& Dst) {
1159  if (!IsColName(Dst)) { TExcept::Throw(Dst + ": no such column"); }
1160  DstCol = NormalizeColName(Dst);
1161  }
1163  void AddEdgeAttr(const TStr& Attr) { AddGraphAttribute(Attr, true, false, false); }
1165  void AddEdgeAttr(TStrV& Attrs) { AddGraphAttributeV(Attrs, true, false, false); }
1167  void AddSrcNodeAttr(const TStr& Attr) { AddGraphAttribute(Attr, false, true, false); }
1169  void AddSrcNodeAttr(TStrV& Attrs) { AddGraphAttributeV(Attrs, false, true, false); }
1171  void AddDstNodeAttr(const TStr& Attr) { AddGraphAttribute(Attr, false, false, true); }
1173  void AddDstNodeAttr(TStrV& Attrs) { AddGraphAttributeV(Attrs, false, false, true); }
1175  void AddNodeAttr(const TStr& Attr) { AddSrcNodeAttr(Attr); AddDstNodeAttr(Attr); }
1177  void AddNodeAttr(TStrV& Attrs) { AddSrcNodeAttr(Attrs); AddDstNodeAttr(Attrs); }
1179  void SetCommonNodeAttrs(const TStr& SrcAttr, const TStr& DstAttr, const TStr& CommonAttrName){
1180  CommonNodeAttrs.Add(TStrTr(NormalizeColName(SrcAttr), NormalizeColName(DstAttr), NormalizeColName(CommonAttrName)));
1181  }
1183  TStrV GetSrcNodeIntAttrV() const;
1185  TStrV GetDstNodeIntAttrV() const;
1187  TStrV GetEdgeIntAttrV() const;
1189  TStrV GetSrcNodeFltAttrV() const;
1191  TStrV GetDstNodeFltAttrV() const;
1193  TStrV GetEdgeFltAttrV() const;
1195  TStrV GetSrcNodeStrAttrV() const;
1197  TStrV GetDstNodeStrAttrV() const;
1199  TStrV GetEdgeStrAttrV() const;
1200 
1202  static PTable GetNodeTable(const PNEANet& Network, TTableContext* Context);
1204  static PTable GetEdgeTable(const PNEANet& Network, TTableContext* Context);
1205 
1206 #ifdef USE_OPENMP
1207  static PTable GetEdgeTablePN(const PNGraphMP& Network, TTableContext* Context);
1209 #endif // USE_OPENMP
1210 
1212  static PTable GetFltNodePropertyTable(const PNEANet& Network, const TIntFltH& Property,
1213  const TStr& NodeAttrName, const TAttrType& NodeAttrType, const TStr& PropertyAttrName,
1215 
1216 /***** Basic Getters *****/
1218  TAttrType GetColType(const TStr& ColName) const {
1219  TStr NColName = NormalizeColName(ColName);
1220  return ColTypeMap.GetDat(NColName).Val1;
1221  }
1223  TInt GetNumRows() const { return NumRows;}
1225  TInt GetNumValidRows() const { return NumValidRows;}
1226 
1229 
1230 /***** Iterators *****/
1232  TRowIterator BegRI() const { return TRowIterator(FirstValidRow, this);}
1234  TRowIterator EndRI() const { return TRowIterator(TTable::Last, this);}
1240  void GetPartitionRanges(TIntPrV& Partitions, TInt NumPartitions) const;
1241 
1242 /***** Table Operations *****/
1244  void Rename(const TStr& Column, const TStr& NewLabel);
1245 
1247  void Unique(const TStr& Col);
1249  void Unique(const TStrV& Cols, TBool Ordered = true);
1250 
1252 
1256  void Select(TPredicate& Predicate, TIntV& SelectedRows, TBool Remove = true);
1257  void Select(TPredicate& Predicate) {
1258  TIntV SelectedRows;
1259  Select(Predicate, SelectedRows, true);
1260  }
1261  void Classify(TPredicate& Predicate, const TStr& LabelName, const TInt& PositiveLabel = 1,
1262  const TInt& NegativeLabel = 0);
1263 
1265 
1267  void SelectAtomic(const TStr& Col1, const TStr& Col2, TPredComp Cmp,
1268  TIntV& SelectedRows, TBool Remove = true);
1269  void SelectAtomic(const TStr& Col1, const TStr& Col2, TPredComp Cmp) {
1270  TIntV SelectedRows;
1271  SelectAtomic(Col1, Col2, Cmp, SelectedRows, true);
1272  }
1273  void ClassifyAtomic(const TStr& Col1, const TStr& Col2, TPredComp Cmp,
1274  const TStr& LabelName, const TInt& PositiveLabel = 1, const TInt& NegativeLabel = 0);
1275 
1277  void SelectAtomicConst(const TStr& Col, const TPrimitive& Val, TPredComp Cmp,
1278  TIntV& SelectedRows, PTable& SelectedTable, TBool Remove = true, TBool Table = true);
1279 
1280  template <class T>
1281  void SelectAtomicConst(const TStr& Col, const T& Val, TPredComp Cmp) {
1282  TIntV SelectedRows;
1283  PTable SelectedTable;
1284  SelectAtomicConst(Col, TPrimitive(Val), Cmp, SelectedRows, SelectedTable, true, false);
1285  }
1286  template <class T>
1287  void SelectAtomicConst(const TStr& Col, const T& Val, TPredComp Cmp, PTable& SelectedTable) {
1288  TIntV SelectedRows;
1289  SelectAtomicConst(Col, TPrimitive(Val), Cmp, SelectedRows, SelectedTable, false, true);
1290  }
1291  template <class T>
1292  void ClassifyAtomicConst(const TStr& Col, const T& Val, TPredComp Cmp,
1293  const TStr& LabelName, const TInt& PositiveLabel = 1, const TInt& NegativeLabel = 0) {
1294  TIntV SelectedRows;
1295  PTable SelectedTable;
1296  SelectAtomicConst(Col, TPrimitive(Val), Cmp, SelectedRows, SelectedTable, false, false);
1297  ClassifyAux(SelectedRows, LabelName, PositiveLabel, NegativeLabel);
1298  }
1299 
1300  void SelectAtomicIntConst(const TStr& Col, const TInt& Val, TPredComp Cmp) {
1301  SelectAtomicConst(Col, Val, Cmp);
1302  }
1303  void SelectAtomicIntConst(const TStr& Col, const TInt& Val, TPredComp Cmp, PTable& SelectedTable) {
1304  SelectAtomicConst(Col, Val, Cmp, SelectedTable);
1305  }
1306 
1307  void SelectAtomicStrConst(const TStr& Col, const TStr& Val, TPredComp Cmp) {
1308  SelectAtomicConst(Col, Val, Cmp);
1309  }
1310  void SelectAtomicStrConst(const TStr& Col, const TStr& Val, TPredComp Cmp, PTable& SelectedTable) {
1311  SelectAtomicConst(Col, Val, Cmp, SelectedTable);
1312  }
1313 
1314  void SelectAtomicFltConst(const TStr& Col, const TFlt& Val, TPredComp Cmp) {
1315  SelectAtomicConst(Col, Val, Cmp);
1316  }
1317  void SelectAtomicFltConst(const TStr& Col, const TFlt& Val, TPredComp Cmp, PTable& SelectedTable) {
1318  SelectAtomicConst(Col, Val, Cmp, SelectedTable);
1319  }
1320 
1322 
1325  void Group(const TStrV& GroupBy, const TStr& GroupColName, TBool Ordered = true, TBool UsePhysicalIds = true);
1326 
1328 
1331  void Count(const TStr& CountColName, const TStr& Col);
1332 
1334  void Order(const TStrV& OrderBy, TStr OrderColName = "", TBool ResetRankByMSC = false, TBool Asc = true);
1335 
1337  void Aggregate(const TStrV& GroupByAttrs, TAttrAggr AggOp, const TStr& ValAttr,
1338  const TStr& ResAttr, TBool Ordered = true);
1339 
1341  void AggregateCols(const TStrV& AggrAttrs, TAttrAggr AggOp, const TStr& ResAttr);
1342 
1344  TVec<PTable> SpliceByGroup(const TStrV& GroupByAttrs, TBool Ordered = true);
1345 
1347 
1350  PTable Join(const TStr& Col1, const TTable& Table, const TStr& Col2);
1351  PTable Join(const TStr& Col1, const PTable& Table, const TStr& Col2) {
1352  return Join(Col1, *Table, Col2);
1353  }
1354  PTable ThresholdJoin(const TStr& KeyCol1, const TStr& JoinCol1, const TTable& Table, const TStr& KeyCol2, const TStr& JoinCol2, TInt Threshold, TBool PerJoinKey = false);
1355 
1357  PTable SelfJoin(const TStr& Col) { return Join(Col, *this, Col); }
1358  PTable SelfSimJoin(const TStrV& Cols, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold) { return SimJoin(Cols, *this, Cols, DistanceColName, SimType, Threshold); }
1360 
1362  PTable SelfSimJoinPerGroup(const TStr& GroupAttr, const TStr& SimCol, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold);
1363 
1365  PTable SelfSimJoinPerGroup(const TStrV& GroupBy, const TStr& SimCol, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold);
1366 
1368  PTable SimJoin(const TStrV& Cols1, const TTable& Table, const TStrV& Cols2, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold);
1370  void SelectFirstNRows(const TInt& N);
1371 
1372  // Computes distances between elements in this->Col1 and Table->Col2 according
1373  // to given metric. Store the distances in DistCol, but keep only rows where
1374  // distance <= threshold
1375  // void Dist(const TStr& Col1, const TTable& Table, const TStr Col2, const TStr& DistColName,
1376  // const TMetric& Metric, TFlt threshold);
1377 
1379 
1382  void Defrag();
1383 
1385  void StoreIntCol(const TStr& ColName, const TIntV& ColVals);
1387  void StoreFltCol(const TStr& ColName, const TFltV& ColVals);
1389  void StoreStrCol(const TStr& ColName, const TStrV& ColVals);
1390 
1391  // Assumption: KeyAttr is a primary key in this table, and FKeyAttr is a primary key in
1392  // the argument table. Equivalent to SQL's: UPDATE this SET UpdateAttr = ReadAttr WHERE KeyAttr = FKeyAttr
1393  void UpdateFltFromTable(const TStr& KeyAttr, const TStr& UpdateAttr, const TTable& Table,
1394  const TStr& FKeyAttr, const TStr& ReadAttr, TFlt DefaultFltVal = 0.0);
1395 #ifdef GCC_ATOMIC
1396  void UpdateFltFromTableMP(const TStr& KeyAttr, const TStr& UpdateAttr, const TTable& Table,
1397  const TStr& FKeyAttr, const TStr& ReadAttr, TFlt DefaultFltVal = 0.0);
1398  // TODO: this should be a generic vector operation (parallel equivalent to TVec::PutAll)
1399  void SetFltColToConstMP(TInt UpdateColIdx, TFlt DefaultFltVal);
1400 #endif // GCC_ATOMIC
1401 
1403  PTable Union(const TTable& Table);
1404  PTable Union(const PTable& Table) { return Union(*Table); };
1406  PTable UnionAll(const TTable& Table);
1407  PTable UnionAll(const PTable& Table) { return UnionAll(*Table); };
1409  void UnionAllInPlace(const TTable& Table);
1410  void UnionAllInPlace(const PTable& Table) { return UnionAllInPlace(*Table); };
1412  PTable Intersection(const TTable& Table);
1413  PTable Intersection(const PTable& Table) { return Intersection(*Table); };
1415  PTable Minus(TTable& Table);
1416  PTable Minus(const PTable& Table) { return Minus(*Table); };
1418  PTable Project(const TStrV& ProjectCols);
1420  void ProjectInPlace(const TStrV& ProjectCols);
1421 
1422  /* Column-wise arithmetic operations */
1423 
1425 
1428  void ColGenericOp(const TStr& Attr1, const TStr& Attr2, const TStr& ResAttr, TArithOp op);
1429 #ifdef USE_OPENMP
1430  void ColGenericOpMP(TInt ArgColIdx1, TInt ArgColIdx2, TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op);
1431 #endif // USE_OPENMP
1432  void ColAdd(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1435  void ColSub(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1437  void ColMul(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1439  void ColDiv(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1441  void ColMod(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1443  void ColMin(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1445  void ColMax(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1446 
1448  void ColGenericOp(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr,
1449  TArithOp op, TBool AddToFirstTable);
1450  // void ColGenericOpMP(TTable& Table, TBool AddToFirstTable, TInt ArgColIdx1, TInt ArgColIdx2,
1451  // TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op);
1453  void ColAdd(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1454  TBool AddToFirstTable=true);
1456  void ColSub(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1457  TBool AddToFirstTable=true);
1459  void ColMul(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1460  TBool AddToFirstTable=true);
1462  void ColDiv(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1463  TBool AddToFirstTable=true);
1465  void ColMod(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1466  TBool AddToFirstTable=true);
1467 
1469  void ColGenericOp(const TStr& Attr1, const TFlt& Num, const TStr& ResAttr, TArithOp op, const TBool floatCast);
1470 #ifdef USE_OPENMP
1471  void ColGenericOpMP(const TInt& ColIdx1, const TInt& ColIdx2, TAttrType ArgType, const TFlt& Num, TArithOp op, TBool ShouldCast);
1472 #endif // USE_OPENMP
1473  void ColAdd(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1476  void ColSub(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1478  void ColMul(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1480  void ColDiv(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1482  void ColMod(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1483 
1484  /* Column-wise string operations */
1485 
1487  void ColConcat(const TStr& Attr1, const TStr& Attr2, const TStr& Sep = "", const TStr& ResAttr="");
1489  void ColConcat(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& Sep = "", const TStr& ResAttr="",
1490  TBool AddToFirstTable=true);
1492  void ColConcatConst(const TStr& Attr1, const TStr& Val, const TStr& Sep = "", const TStr& ResAttr="");
1493 
1495  void ReadIntCol(const TStr& ColName, TIntV& Result) const;
1497  void ReadFltCol(const TStr& ColName, TFltV& Result) const;
1499  void ReadStrCol(const TStr& ColName, TStrV& Result) const;
1500 
1502  void InitIds();
1503 
1505 
1507  PTable IsNextK(const TStr& OrderCol, TInt K, const TStr& GroupBy, const TStr& RankColName = "");
1508 
1511  const double& C = 0.85, const double& Eps = 1e-4, const int& MaxIter = 100) {
1512  TVec<PTable> TableSeq(GraphSeq.Len());
1513  TSnap::MapPageRank(GraphSeq, TableSeq, Context, C, Eps, MaxIter);
1514  return TTableIterator(TableSeq);
1515  }
1516 
1519  TTableContext* Context, const int& MaxIter = 20) {
1520  TVec<PTable> TableSeq(GraphSeq.Len());
1521  TSnap::MapHits(GraphSeq, TableSeq, Context, MaxIter);
1522  return TTableIterator(TableSeq);
1523  }
1524 
1525  void PrintSize();
1526  void PrintContextSize();
1528  TSize GetMemUsedKB();
1531 
1532  friend class TPt<TTable>;
1533  friend class TRowIterator;
1535 };
1536 
1538 
1539 template<class T>
1541  if (!NodeVals.IsKey(FNodeVal)) {
1542  TInt NodeVal = NodeVals.Len();
1543  Graph->AddNode(NodeVal);
1544  NodeVals.AddKey(FNodeVal);
1545  NodeVals.AddDat(FNodeVal, NodeVal);
1546  return NodeVal;
1547  } else { return NodeVals.GetDat(FNodeVal); }
1548 }
1549 
1550 template <class T>
1552  switch (Policy) {
1553  case aaMin: {
1554  T Res = V[0];
1555  for (TInt i = 1; i < V.Len(); i++) {
1556  if (V[i] < Res) { Res = V[i]; }
1557  }
1558  return Res;
1559  }
1560  case aaMax: {
1561  T Res = V[0];
1562  for (TInt i = 1; i < V.Len(); i++) {
1563  if (V[i] > Res) { Res = V[i]; }
1564  }
1565  return Res;
1566  }
1567  case aaFirst: {
1568  return V[0];
1569  }
1570  case aaLast:{
1571  return V[V.Len()-1];
1572  }
1573  case aaSum: {
1574  T Res = V[0];
1575  for (TInt i = 1; i < V.Len(); i++) {
1576  Res = Res + V[i];
1577  }
1578  return Res;
1579  }
1580  case aaMean: {
1581  T Res = V[0];
1582  for (TInt i = 1; i < V.Len(); i++) {
1583  Res = Res + V[i];
1584  }
1585  //Res = Res / V.Len(); // TODO: Handle Str case separately?
1586  return Res;
1587  }
1588  case aaMedian: {
1589  V.Sort();
1590  return V[V.Len()/2];
1591  }
1592  case aaCount: {
1593  // NOTE: Code should never reach here
1594  // I had to put this here to avoid a compiler warning.
1595  // Is there a better way to do this?
1596  return V[0];
1597  }
1598  }
1599  // Added to remove a compiler warning.
1600  T ShouldNotComeHere;
1601  return ShouldNotComeHere;
1602 }
1603 
1604 template <class T>
1605 void TTable::GroupByIntCol(const TStr& GroupBy, T& Grouping,
1606  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds) const {
1607  TInt IdColIdx = GetColIdx(IdColName);
1608  if(!UsePhysicalIds && IdColIdx < 0){
1609  TExcept::Throw("Grouping: Either use physical row ids, or have an id column");
1610  }
1611  // TO do: add a check if grouping already exists and is valid
1612  GroupingSanityCheck(GroupBy, atInt);
1613  if (All) {
1614  // Optimize for the common and most expensive case - iterate over only valid rows.
1615  for (TRowIterator it = BegRI(); it < EndRI(); it++) {
1616  TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1617  UpdateGrouping<TInt>(Grouping, it.GetIntAttr(GroupBy), idx);
1618  }
1619  } else {
1620  // Consider only rows in IndexSet.
1621  for (TInt i = 0; i < IndexSet.Len(); i++) {
1622  if (IsRowValid(IndexSet[i])) {
1623  TInt RowIdx = IndexSet[i];
1624  const TIntV& Col = IntCols[GetColIdx(GroupBy)];
1625  TInt idx = UsePhysicalIds ? RowIdx : IntCols[IdColIdx][RowIdx];
1626  UpdateGrouping<TInt>(Grouping, Col[RowIdx], idx);
1627  }
1628  }
1629  }
1630 }
1631 
1632 template <class T>
1633 void TTable::GroupByFltCol(const TStr& GroupBy, T& Grouping,
1634  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds) const {
1635  TInt IdColIdx = GetColIdx(IdColName);
1636  if(!UsePhysicalIds && IdColIdx < 0){
1637  TExcept::Throw("Grouping: Either use physical row ids, or have an id column");
1638  }
1639  GroupingSanityCheck(GroupBy, atFlt);
1640  if (All) {
1641  // Optimize for the common and most expensive case - iterate over only valid rows.
1642  for (TRowIterator it = BegRI(); it < EndRI(); it++) {
1643  TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1644  UpdateGrouping<TFlt>(Grouping, it.GetFltAttr(GroupBy), idx);
1645  }
1646  } else {
1647  // Consider only rows in IndexSet.
1648  for (TInt i = 0; i < IndexSet.Len(); i++) {
1649  if (IsRowValid(IndexSet[i])) {
1650  TInt RowIdx = IndexSet[i];
1651  const TFltV& Col = FltCols[GetColIdx(GroupBy)];
1652  TInt idx = UsePhysicalIds ? RowIdx : IntCols[IdColIdx][RowIdx];
1653  UpdateGrouping<TFlt>(Grouping, Col[RowIdx], idx);
1654  }
1655  }
1656  }
1657 }
1658 
1659 template <class T>
1660 void TTable::GroupByStrCol(const TStr& GroupBy, T& Grouping,
1661  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds) const {
1662  TInt IdColIdx = GetColIdx(IdColName);
1663  if(!UsePhysicalIds && IdColIdx < 0){
1664  TExcept::Throw("Grouping: Either use physical row ids, or have an id column");
1665  }
1666  GroupingSanityCheck(GroupBy, atStr);
1667  if (All) {
1668  // Optimize for the common and most expensive case - iterate over all valid rows.
1669  for (TRowIterator it = BegRI(); it < EndRI(); it++) {
1670  TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1671  UpdateGrouping<TInt>(Grouping, it.GetStrMapByName(GroupBy), idx);
1672  }
1673  } else {
1674  // Consider only rows in IndexSet.
1675  for (TInt i = 0; i < IndexSet.Len(); i++) {
1676  if (IsRowValid(IndexSet[i])) {
1677  TInt RowIdx = IndexSet[i];
1678  TInt ColIdx = GetColIdx(GroupBy);
1679  TInt idx = UsePhysicalIds ? RowIdx : IntCols[IdColIdx][RowIdx];
1680  UpdateGrouping<TInt>(Grouping, StrColMaps[ColIdx][RowIdx], idx);
1681  }
1682  }
1683  }
1684 }
1685 
1686 template <class T>
1687 void TTable::UpdateGrouping(THash<T,TIntV>& Grouping, T Key, TInt Val) const{
1688  if (Grouping.IsKey(Key)) {
1689  Grouping.GetDat(Key).Add(Val);
1690  } else {
1691  TIntV NewGroup;
1692  NewGroup.Add(Val);
1693  Grouping.AddDat(Key, NewGroup);
1694  }
1695 }
1696 
1697 #ifdef GCC_ATOMIC
1698 template <class T>
1699 void TTable::UpdateGrouping(THashMP<T,TIntV>& Grouping, T Key, TInt Val) const{
1700  if (Grouping.IsKey(Key)) {
1701  //printf("y\n");
1702  Grouping.GetDat(Key).Add(Val);
1703  } else {
1704  //printf("n\n");
1705  TIntV NewGroup;
1706  NewGroup.Add(Val);
1707  Grouping.AddDat(Key, NewGroup);
1708  }
1709 }
1710 #endif // GCC_ATOMIC
1711 
1712 /*
1713 template<class T>
1714 void TTable::RegisterGrouping(const T& Grouping, const TStr& GroupByCol, TBool UsePhysicalIds){
1715  TStrV GroupByVec;
1716  GroupByVec.Add(GroupByCol);
1717  GroupStmt Stmt(NormalizeColNameV(GroupByVec), true, UsePhysicalIds);
1718  GroupMapping.AddKey(Stmt);
1719  for(T::TIter it = Grouping.BegI(); it < Grouping.EndI(); it++){
1720  GroupMapping.GetDat(Stmt).AddDat(it.GetKey(), TIntV(it.GetDat()));
1721  }
1722 }
1723 */
1724 
1725 namespace TSnap {
1726 
1728  template <class PGraph>
1729  void MapPageRank(const TVec<PGraph>& GraphSeq, TVec<PTable>& TableSeq,
1730  TTableContext* Context, const double& C, const double& Eps, const int& MaxIter) {
1731  int NumGraphs = GraphSeq.Len();
1732  TableSeq.Reserve(NumGraphs, NumGraphs);
1733  // This loop is parallelizable.
1734  for (TInt i = 0; i < NumGraphs; i++){
1735  TIntFltH PRankH;
1736  GetPageRank(GraphSeq[i], PRankH, C, Eps, MaxIter);
1737  TableSeq[i] = TTable::TableFromHashMap(PRankH, "NodeId", "PageRank", Context, false);
1738  }
1739  }
1740 
1742  template <class PGraph>
1743  void MapHits(const TVec<PGraph>& GraphSeq, TVec<PTable>& TableSeq,
1744  TTableContext* Context, const int& MaxIter) {
1745  int NumGraphs = GraphSeq.Len();
1746  TableSeq.Reserve(NumGraphs, NumGraphs);
1747  // This loop is parallelizable.
1748  for (TInt i = 0; i < NumGraphs; i++){
1749  TIntFltH HubH;
1750  TIntFltH AuthH;
1751  GetHits(GraphSeq[i], HubH, AuthH, MaxIter);
1752  PTable HubT = TTable::TableFromHashMap(HubH, "NodeId", "Hub", Context, false);
1753  PTable AuthT = TTable::TableFromHashMap(AuthH, "NodeId", "Authority", Context, false);
1754  PTable HitsT = HubT->Join("NodeId", AuthT, "NodeId");
1755  HitsT->Rename("1.NodeId", "NodeId");
1756  HitsT->Rename("1.Hub", "Hub");
1757  HitsT->Rename("2.Authority", "Authority");
1758  TStrV V = TStrV(3, 0);
1759  V.Add("NodeId");
1760  V.Add("Hub");
1761  V.Add("Authority");
1762  HitsT->ProjectInPlace(V);
1763  TableSeq[i] = HitsT;
1764  }
1765  }
1766 }
1767 
1768 #endif //TABLE_H
1769 
Definition: bd.h:440
void UpdateGrouping(THash< T, TIntV > &Grouping, T Key, TInt Val) const
Template for utility function to update a grouping hash map.
Definition: table.h:1687
Definition: table.h:268
TStr GetDstCol() const
Gets the name of the column to be used as dst nodes in the graph.
Definition: table.h:1156
TSize GetMemUsedKB()
Returns approximate memory used by table in [KB].
Definition: table.cpp:3918
void ThresholdJoinInputCorrectness(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2)
Definition: table.cpp:2458
void AddSchemaCol(const TStr &ColName, TAttrType ColType)
Adds column with name ColName and type ColType to the schema.
Definition: table.h:652
TFlt GetFltAttr(TInt ColIdx) const
Returns value of floating point attribute specified by float column index for current row...
Definition: table.cpp:159
TBool Valid
Definition: table.h:280
TInt RequestIndexInt(const TStr &ColName)
Creates Index for Int Column ColName.
Definition: table.cpp:5453
Definition: table.h:268
TBool IsLastGraphOfSequence()
Checks if the end of the graph sequence is reached.
Definition: table.cpp:3663
TBool IsAttr(const TStr &Attr)
Checks if Attr is an attribute of this table schema.
Definition: table.cpp:4605
void SetFltVal(TStr VarName, TFlt VarVal)
Set flt variable value in the predicate or all the children that use it.
Definition: table.h:100
void Order(const TStrV &OrderBy, TStr OrderColName="", TBool ResetRankByMSC=false, TBool Asc=true)
Orders the rows according to the values in columns of OrderBy (in descending lexicographic order)...
Definition: table.cpp:3220
TInt GetNumRows() const
Gets total number of rows in this table.
Definition: table.h:1223
void FillBucketsByInterval(TStr SplitAttr, TIntPrV SplitIntervals)
Fills RowIdBuckets with sets of row ids.
Definition: table.cpp:3577
void RemoveRow(TInt RowIdx, TInt PrevRowIdx)
Removes row with id RowIdx.
Definition: table.cpp:1115
Definition: table.h:268
void SelectAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1287
void AddInt(const TInt &Val)
Adds int attribute to this row.
Definition: table.h:252
TStrV EdgeAttrV
List of columns (attributes) to serve as edge attributes.
Definition: table.h:601
THash< GroupStmt, THash< TGroupKey, TIntV > > GroupMapping
Maps grouping statements to their (group-by key –> group id) mapping.
Definition: table.h:591
TInt FirstValidRow
Physical index of first valid row.
Definition: table.h:563
int GetPrimHashCd() const
Returns primary hash code of the vector. Used by THash.
Definition: ds.h:948
TStr DenormalizeColName(const TStr &ColName) const
Removes suffix to column name if exists.
Definition: table.cpp:4625
int Len() const
Definition: dt.h:487
TInt GetPivot(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Gets pivot element for QSort.
Definition: table.cpp:3090
TInt GetColIdx(const TStr &ColName) const
Gets index of column ColName among columns of the same type in the schema.
Definition: table.h:1004
enum TAttrType_ TAttrType
Types for tables, sparse and dense attributes.
static PTable New(TTableContext *Context)
Definition: table.h:932
void StoreGroupCol(const TStr &GroupColName, const TVec< TPair< TInt, TInt > > &GroupAndRowIds)
Parallel helper function for grouping. - we currently don't support such parallel grouping by complex...
Definition: table.cpp:1290
static const TInt Last
Special value for Next vector entry - last row in table.
Definition: table.h:497
PTable UnionAll(const TTable &Table)
Returns union of this table with given Table, preserving duplicates.
Definition: table.cpp:4488
TStrV GetStrVals() const
Gets string attributes of this row.
Definition: table.h:262
static TInt PartitionKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5332
Primitive class: Wrapper around primitive data types.
Definition: table.h:220
bool operator==(const TRowIterator &RowI) const
Checks if this iterator points to the same row pointed by RowI.
Definition: table.cpp:147
TStrV GetSrcNodeIntAttrV() const
Gets src node int attribute name vector.
Definition: table.cpp:985
PTable Minus(const PTable &Table)
Definition: table.h:1416
void PrintGrouping(const THash< TGroupKey, TIntV > &Grouping) const
Definition: table.cpp:1768
Schema Sch
Table Schema.
Definition: table.h:559
void SelectFirstNRows(const TInt &N)
Selects first N rows from the table.
Definition: table.cpp:3337
TStrV GetDstNodeStrAttrV() const
Gets dst node str attribute name vector.
Definition: table.cpp:1062
Definition: ds.h:129
void GetPartitionRanges(TIntPrV &Partitions, TInt NumPartitions) const
Partitions the table into NumPartitions and populate Partitions with the ranges.
Definition: table.cpp:1157
TInt GetIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for current row.
Definition: table.cpp:155
TPredComp
Comparison operators for selection predicates.
Definition: table.h:7
TStr GetStr(const TInt &KeyId) const
Returns a string with KeyId.
Definition: table.h:213
TPredicateNode(TPredOp Opr)
Constructor for logical operation predicate node (internal node)
Definition: table.h:66
void Defrag()
Releases memory of deleted rows, and defrags.
Definition: table.cpp:3291
PGraphMP ToGraphMP(PTable Table, const TStr &SrcCol, const TStr &DstCol)
Performs table to graph conversion in parallel using the sort-first algorithm. This is the recommende...
Definition: conv.h:193
PNEANet ToVarGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates the graph sequence one at a time.
Definition: table.cpp:3649
void SaveBin(const TStr &OutFNm)
Saves table schema and content to a binary file.
Definition: table.cpp:829
TStr GetStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for current row.
Definition: table.cpp:163
void AddIntCol(const TStr &ColName)
Adds an integer column with name ColName.
Definition: table.cpp:4650
THash< TStr, TPair< TAttrType, TInt > > ColTypeMap
Definition: table.h:574
TStr Rvar
Right variable of the comparison op.
Definition: table.h:21
void SetDstCol(const TStr &Dst)
Sets the name of the column to be used as dst nodes in the graph.
Definition: table.h:1158
TInt GetLastValidRowIdx()
Gets the id of the last valid row of the table.
static const int Mx
Definition: dt.h:1049
Definition: table.h:266
static PTable New(const THash< TInt, TInt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Returns pointer to a table constructed from given int->int hash.
Definition: table.h:937
void ThresholdJoinCountCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntPr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
Definition: table.cpp:2486
void AddGraphAttributeV(TStrV &Attrs, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds vector of names of columns to be used as graph attributes.
Definition: table.cpp:972
void GroupByIntColMP(const TStr &GroupBy, THashMP< TInt, TIntV > &Grouping, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values, using OpenMP multi-threading.
Definition: table.cpp:1205
void SetFltColToConstMP(TInt UpdateColIdx, TFlt DefaultFltVal)
Definition: table.cpp:4129
TBool IsValid()
Definition: table.h:295
void GetPageRank(const PGraph &Graph, TIntFltH &PRankH, const double &C=0.85, const double &Eps=1e-4, const int &MaxIter=100)
Definition: centr.h:240
void ThresholdJoinCountPerJoinKeyCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntTr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
Definition: table.cpp:2537
void ColAdd(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise addition. See TTable::ColGenericOp.
Definition: table.cpp:4793
TArithOp
Possible column-wise arithmetic operations.
Definition: table.h:268
TInt RequestIndexStrMap(const TStr &ColName)
Creates Index for Str Column ColName.
Definition: table.cpp:5491
TFlt GetNextFltAttr(TInt ColIdx) const
Returns value of float attribute specified by float column index for next row.
Definition: table.cpp:252
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
void AddSelectedRows(const TTable &Table, const TIntV &RowIDs)
Adds rows from Table that correspond to ids in RowIDs.
Definition: table.cpp:4376
Definition: table.h:268
void SetSrcCol(const TStr &Src)
Sets the name of the column to be used as src nodes in the graph.
Definition: table.h:1151
TRowIteratorWithRemove(const TRowIteratorWithRemove &RowI)
Copy constructor.
Definition: table.h:396
TStr IdColName
A mapping from column name to column type and column index among columns of the same type...
Definition: table.h:575
Predicate - encapsulates comparison operations.
Definition: table.h:82
TBool CompareAtomicConstTStr(TInt ColIdx, const TStr &Val, TPredComp Cmp)
Compares value in column ColIdx with given TStr Val.
Definition: table.cpp:208
PTable SelfSimJoinPerGroup(const TStr &GroupAttr, const TStr &SimCol, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
Definition: table.cpp:2074
static TStrV NormalizeColNameV(const TStrV &Cols)
Adds suffix to column name if it doesn't exist.
Definition: table.h:549
static TInt CompareKeyVal(const TInt &K1, const TInt &V1, const TInt &K2, const TInt &V2)
Definition: table.cpp:5274
const char * GetContextKey(TInt Val) const
Gets the Key of the Context StringVals pool. Used by ToGraph method in conv.cpp.
Definition: table.h:632
void Save(TSOut &SOut)
Saves TTableContext in binary to SOut.
Definition: table.h:206
int GetSecHashCd() const
Returns secondary hash code of the vector. Used by THash.
Definition: ds.h:960
THash< TStr, THash< TInt, TIntV > > StrMapColIndexes
Indexes for String Columns.
Definition: table.h:579
THash< TStr, THash< TInt, TIntV > > IntColIndexes
Indexes for Int Columns.
Definition: table.h:578
void ColConcat(const TStr &Attr1, const TStr &Attr2, const TStr &Sep="", const TStr &ResAttr="")
Concatenates two string columns.
Definition: table.cpp:5060
TStrV GetSrcNodeStrAttrV() const
Gets src node str attribute name vector.
Definition: table.cpp:1051
void AddNodeAttr(const TStr &Attr)
Handles the common case where src and dst both belong to the same "universe" of entities.
Definition: table.h:1175
TTableContext * Context
Execution Context.
Definition: table.h:555
void AddRow(const TTableRow &Row)
Adds row with values taken from given TTableRow.
Definition: table.h:993
TSimType
Distance metrics for similarity joins.
Definition: table.h:149
TBool Start
A flag indicating whether the current row in the first valid row of the table.
Definition: table.h:386
void QSort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort on given vector V.
Definition: table.cpp:3134
TAttrType Type
Type of the predicate variables.
Definition: table.h:17
TPredicateNode * Left
Left child of this node.
Definition: table.h:57
THash< TStr, TInt > IntVars
Int variables in the current predicate tree.
Definition: table.h:84
void InvalidateAffectedGroupings(const TStr &Attr)
Definition: table.cpp:1561
void Dump(FILE *OutF=stdout) const
Prints table contents to a text file.
Definition: table.cpp:867
TInt LastValidRow
Physical index of last valid row.
Definition: table.h:564
void UnionAllInPlace(const PTable &Table)
Definition: table.h:1410
TPredicate(TPredicateNode *R)
Construct predicate with given root node R.
Definition: table.h:92
void Group(const TStrV &GroupBy, const TStr &GroupColName, TBool Ordered=true, TBool UsePhysicalIds=true)
Groups rows depending on values of GroupBy columns.
Definition: table.cpp:1549
void SelectAtomicStrConst(const TStr &Col, const TStr &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1310
void ResizeTable(int RowCount)
Resizes the table to hold RowCount rows.
Definition: table.cpp:4307
Iterator over a vector of tables.
Definition: table.h:432
void PrintContextSize()
Definition: table.cpp:3937
bool HasNext()
Checks if iterator has reached end of the sequence.
Definition: table.h:441
TPredicate()
Default constructor.
Definition: table.h:90
TPrimitive()
Definition: table.h:228
TPrimitive(const TPrimitive &Prim)
Definition: table.h:232
static TInt GetMP()
Definition: table.h:537
TTableContext()
Default constructor.
Definition: table.h:200
TAttrAggr
Possible policies for aggregating node attributes.
Definition: table.h:266
void ColDiv(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise division. See TTable::ColGenericOp.
Definition: table.cpp:4805
void Rename(const TStr &Column, const TStr &NewLabel)
Renames a column.
Definition: table.cpp:1085
void GroupAux(const TStrV &GroupBy, THash< TGroupKey, TPair< TInt, TIntV > > &Grouping, TBool Ordered, const TStr &GroupColName, TBool KeepUnique, TIntV &UniqueVec, TBool UsePhysicalIds=true)
Helper function for grouping.
Definition: table.cpp:1302
TStrV GetEdgeFltAttrV() const
Gets edge float attribute name vector.
Definition: table.cpp:1040
Definition: table.h:149
TStr GetNextStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for next row.
Definition: table.cpp:256
Execution context.
Definition: table.h:194
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
static PTable New(const PTable Table)
Returns pointer to a new table created from given Table.
Definition: table.h:947
void AddRightChild(TPredicateNode *Child)
Add right child to this node.
Definition: table.h:74
static PTable TableFromHashMap(const THash< TInt, TInt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Builds table from hash table of int->int.
Definition: table.h:979
Schema GetSchema()
Gets the schema of this table.
Definition: table.h:1116
TFltV GetFltVals() const
Gets float attributes of this row.
Definition: table.h:260
TVec< TIntV > RowIdBuckets
Partitioning of row ids into buckets corresponding to different graph objects when generating a seque...
Definition: table.h:609
TRowIteratorWithRemove BegRIWR()
Gets iterator with reomve to the first valid row.
Definition: table.h:1236
TInt GetNumValidRows() const
Gets number of valid, i.e. not deleted, rows in this table.
Definition: table.h:1225
TStr GetStr(const TInt &KeyId) const
Gets the string with KeyId.
Definition: table.h:1100
TRowIterator BegRI() const
Gets iterator to the first valid row of the table.
Definition: table.h:1232
TPredicateNode()
Default constructor.
Definition: table.h:60
Definition: table.h:7
PNEANet ToGraphPerGroupIterator(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates the graph sequence one at a time.
Definition: table.cpp:3654
TVec< TIntV > IntCols
Next[i] is the successor of row i. Table iterators follow the order dictated by Next ...
Definition: table.h:568
void SelectAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp)
Definition: table.h:1281
Iterator class for TTable rows, that allows logical row removal while iterating.
Definition: table.h:383
TSizeTy GetMemUsed() const
Returns the memory footprint (the number of bytes) of the vector.
Definition: ds.h:483
void CheckAndAddIntNode(PNEANet Graph, THashSet< TInt > &NodeVals, TInt NodeId)
Checks if given NodeId is seen earlier; if not, add it to Graph and hashmap NodeVals.
Definition: table.cpp:3368
TVec< PNEANet > ToGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates a sequence of graphs based on values of column SplitAttr and windows specified by JumpSize an...
Definition: table.cpp:3629
void GroupByFltCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with float values. Returns hash table with grouping.
Definition: table.h:1633
TInt GetStrMapByName(const TStr &Col) const
Returns integer mapping of string attribute specified by attribute name for current row...
Definition: table.cpp:181
PTable Minus(TTable &Table)
Returns table with rows that are present in this table but not in given Table.
Definition: table.cpp:4569
static PTable GetNodeTable(const PNEANet &Network, TTableContext *Context)
Extracts node TTable from PNEANet.
Definition: table.cpp:3667
THash< TStr, TStr > StrVars
String variables in the current predicate tree.
Definition: table.h:86
TIntV GetStrRowIdxByMap(const TStr &ColName, const TInt &Map) const
Gets the rows containing int mapping Map in str column ColName.
Definition: table.cpp:5408
TIntV GetIntVals() const
Gets int attributes of this row.
Definition: table.h:258
TStr GetIdColName() const
Gets name of the id column of this table.
Definition: table.h:646
static TBool EvalStrAtom(const TStr &Val1, const TStr &Val2, TPredComp Cmp)
Compare atomic string values Val1 and Val2 using predicate Cmp.
Definition: table.h:123
Definition: gbase.h:23
TTable(const TTable &Table)
Copy constructor.
Definition: table.h:918
TRowIteratorWithRemove()
Default constructor.
Definition: table.h:389
int GetSecHashCd() const
Definition: ds.h:156
static void LoadSSSeq(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Sequentially loads data from input file at InFNm into NewTable.
Definition: table.cpp:649
Definition: table.h:7
Definition: dt.h:1293
void AddEdgeAttr(const TStr &Attr)
Adds column to be used as graph edge attribute.
Definition: table.h:1163
TRowIterator(const TRowIterator &RowI)
Copy constructor.
Definition: table.h:348
TStr StrVal
Definition: table.h:224
Definition: fl.h:58
void IncrementNext()
Increments the next vector and set last, NumRows and NumValidRows.
Definition: table.cpp:2235
PTable SimJoin(const TStrV &Cols1, const TTable &Table, const TStrV &Cols2, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
Definition: table.cpp:1974
void InitIds()
Adds explicit row ids, initialize hash set mapping ids to physical rows.
Definition: table.cpp:1863
TStrTrV CommonNodeAttrs
List of attribute pairs with values common to source and destination and their common given name...
Definition: table.h:604
void QSortPar(TIntV &V, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort in parallel on given vector V.
Definition: table.cpp:3186
void Save(TSOut &SOut)
Saves table schema and content to a binary format.
Definition: table.cpp:834
PTable Join(const TStr &Col1, const PTable &Table, const TStr &Col2)
Definition: table.h:1351
void MapHits(const TVec< PGraph > &GraphSeq, TVec< PTable > &TableSeq, TTableContext *Context, const int &MaxIter)
Gets sequence of Hits tables from given GraphSeq into TableSeq.
Definition: table.h:1743
TBool Result
Result of evaulating the predicate rooted at this node.
Definition: table.h:54
void ReadFltCol(const TStr &ColName, TFltV &Result) const
Reads values of entire float column into Result.
Definition: table.cpp:5198
void InvalidatePhysicalGroupings()
Definition: table.cpp:1557
TBool operator==(const GroupStmt &stmt) const
Definition: table.h:287
TPair< TIntV, TFltV > TGroupKey
Represents grouping key with IntV for integer and string attributes and FltV for float attributes...
Definition: table.h:145
Iterator class for TTable rows.
Definition: table.h:339
TInt GetNextRowIdx() const
Gets physical index of next row.
Definition: table.cpp:243
void DelKey(const TKey &Key)
Definition: hash.h:362
static const int Mn
Definition: dt.h:1048
void Aggregate(const TStrV &GroupByAttrs, TAttrAggr AggOp, const TStr &ValAttr, const TStr &ResAttr, TBool Ordered=true)
Aggregates values of ValAttr after grouping with respect to GroupByAttrs. Result are stored as new at...
Definition: table.cpp:1565
TAttrType GetSchemaColType(TInt Idx) const
Gets type of the column with index Idx in the schema.
Definition: table.h:650
Definition: table.h:266
PGraph ToGraph(PTable Table, const TStr &SrcCol, const TStr &DstCol, TAttrAggr AggrPolicy)
Sequentially converts the table into a graph with links from nodes in SrcCol to those in DstCol...
Definition: conv.h:8
PTable Intersection(const PTable &Table)
Definition: table.h:1413
void SetIntVal(TStr VarName, TInt VarVal)
Set int variable value in the predicate or all the children that use it.
Definition: table.h:98
TStrV GetEdgeIntAttrV() const
Gets edge int attribute name vector.
Definition: table.cpp:1007
Definition: table.h:149
Table Row (Record)
Definition: table.h:243
TRowIteratorWithRemove(TInt RowIdx, TTable *TablePtr, TBool IsStart)
Constructs iterator pointing to given row.
Definition: table.h:393
void SetStrVal(TStr VarName, TStr VarVal)
Set str variable value in the predicate or all the children that use it.
Definition: table.h:102
void RemoveNext()
Removes next row.
Definition: table.cpp:278
int GetPrimHashCd() const
Definition: table.h:312
TStr StrConst
Str const value if this object is a string constant.
Definition: table.h:24
TVec< PNEANet > ToGraphPerGroup(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates a sequence of graphs based on grouping specified by GroupAttr.
Definition: table.cpp:3640
void AddColType(const TStr &ColName, TAttrType ColType, TInt Index)
Adds column with name ColName and type ColType to the ColTypeMap.
Definition: table.h:666
const TTable * Table
Reference to table containing this row.
Definition: table.h:341
int LoadCrossNet(TCrossNet &Graph, PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &EdgeAttrV)
Loads the edges from the TTable and EdgeAttrV specifies columns containing edge attributes.
Definition: conv.cpp:69
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1254
void SelectAtomicIntConst(const TStr &Col, const TInt &Val, TPredComp Cmp)
Definition: table.h:1300
static void Throw(const TStr &MsgStr)
Definition: ut.h:187
Schema DenormalizeSchema() const
Removes suffix to column names in the Schema.
Definition: table.cpp:4642
void AddDstNodeAttr(const TStr &Attr)
Adds column to be used as dst node atribute of the graph.
Definition: table.h:1171
TBool UsePhysicalRowIds
Definition: table.h:279
PNEANet NextGraphIterator()
Calls to this must be preceded by a call to one of the above ToGraph*Iterator functions.
Definition: table.cpp:3659
TInt IntVal
Definition: table.h:222
friend class TRowIterator
Definition: table.h:1533
TStr GetSrcCol() const
Gets the name of the column to be used as src nodes in the graph.
Definition: table.h:1149
PNEANet BuildGraph(const TIntV &RowIds, TAttrAggr AggrPolicy)
Makes a single pass over the rows in the given row id set, and creates nodes, edges, assigns node and edge attributes.
Definition: table.cpp:3425
PGraphMP ToNetworkMP(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Does Table to Network conversion in parallel using the sort-first algorithm. This is the recommended ...
Definition: conv.h:698
TBool EvalAtomicPredicate(const TAtomicPredicate &Atom)
Evaluate the give atomic predicate.
Definition: table.cpp:102
void ColSub(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise subtraction. See TTable::ColGenericOp.
Definition: table.cpp:4797
TFlt GetFltValAtRowIdx(const TInt &ColIdx, const TInt &RowIdx)
Get the float value at column ColIdx and row RowIdx.
Definition: table.h:1111
int GetEmptyRowsStart(int NewRows)
Gets the start index to a chunk of empty rows of size NewRows.
Definition: table.cpp:4353
TSize GetMemUsed() const
Definition: table.h:303
void PrintSize()
Definition: table.cpp:3908
TStrV GroupByAttrs
Definition: table.h:277
THash< TStr, THash< TFlt, TIntV > > FltColIndexes
Indexes for Float Columns.
Definition: table.h:580
TStr Lvar
Left variable of the comparison op.
Definition: table.h:20
const char * GetKey(const int &KeyId) const
Definition: hash.h:821
void ProjectInPlace(const TStrV &ProjectCols)
Keeps only the columns specified in ProjectCols.
Definition: table.cpp:5216
TAtomicPredicate(TAttrType Typ, TBool IsCnst, TPredComp Cmp, TStr L, TStr R)
Compact prototype for constructing non-const atomic predicate.
Definition: table.h:42
TStr GetStr() const
Definition: table.h:237
Definition: table.h:7
void SelectAtomicStrConst(const TStr &Col, const TStr &Val, TPredComp Cmp)
Definition: table.h:1307
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
Definition: table.cpp:190
TRowIteratorWithRemove EndRIWR()
Gets iterator with reomve to the last valid row.
Definition: table.h:1238
TFltV FltVals
Values of the flt columns for this row.
Definition: table.h:246
size_t TSize
Definition: bd.h:58
TInt GetStrMapById(TInt ColIdx, TInt RowIdx) const
Gets the integer mapping of the string at column ColIdx at row RowIdx.
Definition: table.h:1024
void SelectAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp)
Definition: table.h:1269
void Reindex()
Reinitializes row ids.
Definition: table.cpp:1869
TInt CurrBucket
Current row id bucket - used when generating a sequence of graphs using an iterator.
Definition: table.h:610
PTable IsNextK(const TStr &OrderCol, TInt K, const TStr &GroupBy, const TStr &RankColName="")
Distance based filter.
Definition: table.cpp:3869
TAttrType GetColType(const TStr &ColName) const
Gets type of column ColName.
Definition: table.h:1218
TVec< TIntV > StrColMaps
Data columns of integer mappings of string attributes.
Definition: table.h:570
TRowIteratorWithRemove & Next()
Increments the iterator (For Python compatibility).
Definition: table.cpp:222
TPredicate(const TPredicate &Pred)
Copy constructor.
Definition: table.h:94
PNEANet ToGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates the graph sequence one at a time.
Definition: table.cpp:3644
TPrimitive(const TFlt &Val)
Definition: table.h:230
PTable SelfJoin(const TStr &Col)
Joins table with itself, on values of Col.
Definition: table.h:1357
Definition: table.h:149
void GroupByIntCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values.
Definition: table.h:1605
PTable Join(const TStr &Col1, const TTable &Table, const TStr &Col2)
Performs equijoin.
Definition: table.cpp:2252
static int GetHashCd(const int hc1, const int hc2)
Definition: bd.h:590
void Save(TSOut &SOut, bool PoolToo=true) const
Definition: hash.h:761
bool IsKey(const TKey &Key) const
Definition: hashmp.h:191
bool Val
Definition: dt.h:883
static void LoadSSPar(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Parallelly loads data from input file at InFNm into NewTable. Only work when NewTable has no string c...
Definition: table.cpp:487
int LoadMode(TModeNet &Graph, PTable Table, const TStr &NCol, TStrV &NodeAttrV)
Loads the nodes specified in column NCol from the TTable with the attributes specified in NodeAttrV...
Definition: conv.cpp:14
TPair< TStr, TAttrType > TStrTypPr
Definition: table.h:1537
TIntV GetIntRowIdxByVal(const TStr &ColName, const TInt &Val) const
Gets the rows containing Val in int column ColName.
Definition: table.cpp:5387
TFlt FltVal
Definition: table.h:223
TInt GetRowIdx() const
Gets the id of the row pointed by this iterator.
Definition: table.cpp:151
A class representing a cached grouping statement identifier.
Definition: table.h:275
TStr GetSchemaColName(TInt Idx) const
Gets name of the column with index Idx in the schema.
Definition: table.h:648
TInt GetStrMapById(TInt ColIdx) const
Returns integer mapping of a string attribute value specified by string column index for current row...
Definition: table.cpp:186
TBool UsePhysicalIds()
Definition: table.h:286
TStrV SrcNodeAttrV
List of columns (attributes) to serve as source node attributes.
Definition: table.h:602
TAttrAggr AggrPolicy
Aggregation policy used for solving conflicts between different values of an attribute of the same no...
Definition: table.h:611
static void QSortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5355
void Select(TPredicate &Predicate, TIntV &SelectedRows, TBool Remove=true)
Selects rows that satisfy given Predicate.
Definition: table.cpp:2730
PTable UnionAll(const PTable &Table)
Definition: table.h:1407
void UnionAllInPlace(const TTable &Table)
Same as TTable::ConcatTable.
Definition: table.cpp:4501
TInt GetInt() const
Definition: table.h:235
char GetCh(const int &ChN) const
Definition: dt.h:483
TIntIntH RowIdMap
Mapping of permanent row ids to physical id.
Definition: table.h:576
void SaveSS(const TStr &OutFNm)
Saves table schema and content to a TSV file.
Definition: table.cpp:780
PTable Union(const TTable &Table)
Returns union of this table with given Table.
Definition: table.cpp:4508
void SelectAtomicConst(const TStr &Col, const TPrimitive &Val, TPredComp Cmp, TIntV &SelectedRows, PTable &SelectedTable, TBool Remove=true, TBool Table=true)
Selects rows where the value of Col matches given primitive Val.
Definition: table.cpp:2853
Definition: table.h:5
void UpdateFltFromTable(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
Definition: table.cpp:4219
void ColConcatConst(const TStr &Attr1, const TStr &Val, const TStr &Sep="", const TStr &ResAttr="")
Concatenates column values with given string value.
Definition: table.cpp:5159
Definition: fl.h:128
void GetCollidingRows(const TTable &T, THashSet< TInt > &Collisions)
Gets set of row ids of rows common with table T.
Definition: table.cpp:3991
void AddGraphAttribute(const TStr &Attr, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds names of columns to be used as graph attributes.
Definition: table.cpp:965
TBool Ordered
Definition: table.h:278
void KeepSortedRows(const TIntV &KeepV)
Removes all rows that are not mentioned in the SORTED vector KeepV.
Definition: table.cpp:1132
The nodes of one particular mode in a TMMNet, and their neighbor vectors as TIntV attributes...
Definition: mmnet.h:23
TPair< TAttrType, TInt > GetColTypeMap(const TStr &ColName) const
Gets column type and index of ColName.
Definition: table.h:676
TTableRow()
Default constructor.
Definition: table.h:250
TAttrType GetType() const
Definition: table.h:238
Definition: table.h:7
void GroupingSanityCheck(const TStr &GroupBy, const TAttrType &AttrType) const
Checks if grouping key exists and matches given attr type.
Definition: table.cpp:1195
TStrHash< TInt, TBigStrPool > StringVals
StringPool - stores string data values and maps them to integers.
Definition: table.h:196
static PTable TableFromHashMap(const THash< TInt, TFlt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Builds table from hash table of int->float.
Definition: table.h:986
void UpdateTableForNewRow()
Updates table state after adding one or more rows.
Definition: table.cpp:4117
void SetCommonNodeAttrs(const TStr &SrcAttr, const TStr &DstAttr, const TStr &CommonAttrName)
Sets the columns to be used as both src and dst node attributes.
Definition: table.h:1179
int AddKey(const char *Key)
Definition: hash.h:896
void GetHits(const PGraph &Graph, TIntFltH &NIdHubH, TIntFltH &NIdAuthH, const int &MaxIter=20)
Definition: centr.h:485
static TInt UseMP
Global switch for choosing multi-threaded versions of TTable functions.
Definition: table.h:500
TRowIterator()
Default constructor.
Definition: table.h:344
TPredComp Compare
Comparison op represented by this node.
Definition: table.h:19
TTableIterator(TVec< PTable > &PTableV)
Default constructor.
Definition: table.h:437
static TTableIterator GetMapHitsIterator(const TVec< PNEANet > &GraphSeq, TTableContext *Context, const int &MaxIter=20)
Gets sequence of Hits tables from given GraphSeq.
Definition: table.h:1518
void DelColType(const TStr &ColName)
Adds column with name ColName and type ColType to the ColTypeMap.
Definition: table.h:671
Definition: dt.h:1044
void ReadIntCol(const TStr &ColName, TIntV &Result) const
Reads values of entire int column into Result.
Definition: table.cpp:5189
int GetPrimHashCd() const
Definition: ds.h:155
void FillBucketsByWindow(TStr SplitAttr, TInt JumpSize, TInt WindowSize, TInt StartVal, TInt EndVal)
Fills RowIdBuckets with sets of row ids.
Definition: table.cpp:3527
static TStr NormalizeColName(const TStr &ColName)
Adds suffix to column name if it doesn't exist.
Definition: table.h:540
void AddStrCol(const TStr &ColName)
Adds a string column with name ColName.
Definition: table.cpp:4664
THash< TStr, GroupStmt > GroupStmtNames
Maps user-given grouping statement names to their group-by attributes.
Definition: table.h:583
TTableContext(TSIn &SIn)
Loads TTableContext in binary from SIn.
Definition: table.h:202
TRowIterator & Next()
Increments the iterator (For Python compatibility).
Definition: table.cpp:135
static PTable Load(TSIn &SIn, TTableContext *Context)
Loads table from a binary format.
Definition: table.h:970
TStr SrcCol
Column (attribute) to serve as src nodes when constructing the graph.
Definition: table.h:599
PTable Project(const TStrV &ProjectCols)
Returns table with only the columns in ProjectCols.
Definition: table.cpp:4592
TVec< PTable > PTableV
Vector of TTables which are to be iterated over.
Definition: table.h:433
void StoreStrCol(const TStr &ColName, const TStrV &ColVals)
Adds entire str column to table.
Definition: table.cpp:4098
TPredicateNode * Right
Definition: table.h:58
TVec< TFltV > FltCols
Data columns of floating point attributes.
Definition: table.h:569
TVec< TStr > TStrV
Definition: ds.h:1534
void AddSrcNodeAttr(TStrV &Attrs)
Adds columns to be used as src node attributes of the graph.
Definition: table.h:1169
TStrV GetDstNodeFltAttrV() const
Gets dst node float attribute name vector.
Definition: table.cpp:1029
TStrV DstNodeAttrV
List of columns (attributes) to serve as destination node attributes.
Definition: table.h:603
TIntV Next
A vector describing the logical order of the rows.
Definition: table.h:565
void AddStr(const TStr &Val)
Adds string attribute to this row.
Definition: table.h:256
TPredicateNode(const TAtomicPredicate &A)
Constructor for atomic predicate node (leaf)
Definition: table.h:63
Definition: ds.h:32
TAtomicPredicate(TAttrType Typ, TBool IsCnst, TPredComp Cmp, TStr L, TStr R, TInt ICnst, TFlt FCnst, TStr SCnst)
Construct predicate from given comparison op, variables and constants.
Definition: table.h:37
void SelectAtomicFltConst(const TStr &Col, const TFlt &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1317
int AddKey(const TKey &Key)
Definition: hash.h:331
TRowIterator EndRI() const
Gets iterator to the last valid row of the table.
Definition: table.h:1234
void AddStrVal(const TInt &ColIdx, const TStr &Val)
Adds Val in column with id ColIdx.
Definition: table.cpp:951
TTable * Table
Reference to table containing this row.
Definition: table.h:385
PGraphMP ToGraphMP3(PTable Table, const TStr &SrcCol, const TStr &DstCol)
Performs table to graph conversion in parallel. Uses the hash-first method, which is less optimal...
Definition: conv.h:534
void AddRow(const TRowIterator &RI)
Adds row corresponding to RI.
Definition: table.cpp:4272
void Load(TSIn &SIn, bool PoolToo=true)
Definition: hash.h:759
TInt NumRows
Number of rows in the table (valid and invalid).
Definition: table.h:561
TFlt GetFltVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of float attribute ColName at row RowIdx.
Definition: table.h:1015
static TTableIterator GetMapPageRank(const TVec< PNEANet > &GraphSeq, TTableContext *Context, const double &C=0.85, const double &Eps=1e-4, const int &MaxIter=100)
Gets sequence of PageRank tables from given GraphSeq.
Definition: table.h:1510
static PTable LoadSS(const Schema &S, const TStr &InFNm, TTableContext *Context, const char &Separator= '\t', TBool HasTitleLine=false)
Loads table from spread sheet (TSV, CSV, etc). Note: HasTitleLine = true is not supported. Please comment title lines instead.
Definition: table.cpp:775
TPrimitive(const TInt &Val)
Definition: table.h:229
TStr GetStrVal(const TStr &ColName, const TInt &RowIdx) const
Gets the value of string attribute ColName at row RowIdx.
Definition: table.h:1019
void Unique(const TStr &Col)
Removes rows with duplicate values in given column.
Definition: table.cpp:1246
TRowIteratorWithRemove & operator++(int)
Increments the iterator.
Definition: table.cpp:218
void AddJointRow(const TTable &T1, const TTable &T2, TInt RowIdx1, TInt RowIdx2)
Adds joint row T1[RowIdx1]<=>T2[RowIdx2].
Definition: table.cpp:1937
void Classify(TPredicate &Predicate, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Definition: table.cpp:2785
void Merge(TIntV &V, TInt Idx1, TInt Idx2, TInt Idx3, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Helper function for parallel QSort.
Definition: table.cpp:3158
TStr DstCol
Column (attribute) to serve as dst nodes when constructing the graph.
Definition: table.h:600
void AddSrcNodeAttr(const TStr &Attr)
Adds column to be used as src node atribute of the graph.
Definition: table.h:1167
void MapPageRank(const TVec< PGraph > &GraphSeq, TVec< PTable > &TableSeq, TTableContext *Context, const double &C, const double &Eps, const int &MaxIter)
Gets sequence of PageRank tables from given GraphSeq into TableSeq.
Definition: table.h:1729
void ReadStrCol(const TStr &ColName, TStrV &Result) const
Reads values of entire string column into Result.
Definition: table.cpp:5207
TStr GetStrVal(TInt ColIdx, TInt RowIdx) const
Gets the value in column with id ColIdx at row RowIdx.
Definition: table.h:636
void Invalidate()
Definition: table.h:296
static void SetMP(TInt Value)
Definition: table.h:536
static PTable GetEdgeTable(const PNEANet &Network, TTableContext *Context)
Extracts edge TTable from PNEANet.
Definition: table.cpp:3719
GroupStmt(const GroupStmt &stmt)
Definition: table.h:285
static const TInt Invalid
Special value for Next vector entry - logically removed row.
Definition: table.h:498
TStrV StrVals
Values of the str columns for this row.
Definition: table.h:247
void AddColType(const TStr &ColName, TPair< TAttrType, TInt > ColType)
Adds column with name ColName and type ColType to the ColTypeMap.
Definition: table.h:661
Definition: dt.h:412
PNEANet GetNextGraphFromSequence()
Returns the next graph in sequence corresponding to RowIdBuckets.
Definition: table.cpp:3612
TBool IncludesAttr(const TStr &Attr)
Definition: table.h:297
Definition: table.h:7
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
Definition: table.cpp:282
void StoreFltCol(const TStr &ColName, const TFltV &ColVals)
Adds entire flt column to table.
Definition: table.cpp:4081
THash< GroupStmt, THash< TInt, TGroupKey > > GroupIDMapping
Maps grouping statements to their (group id –> group-by key) mapping.
Definition: table.h:587
TInt IntConst
Int const value if this object is an integer constant.
Definition: table.h:22
void AddFlt(const TFlt &Val)
Adds float attribute to this row.
Definition: table.h:254
TTriple< TStr, TStr, TStr > TStrTr
Definition: ds.h:185
Definition: table.h:266
GroupStmt(const TStrV &Attrs, TBool ordered, TBool physical)
Definition: table.h:284
TPredOp Op
Logical op represented by this node.
Definition: table.h:53
GroupStmt()
Definition: table.h:282
TInt CurrTableIdx
Index of the current table pointed to by this iterator.
Definition: table.h:434
void GroupByStrCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with string values. Returns hash table with grouping.
Definition: table.h:1660
T AggregateVector(TVec< T > &V, TAttrAggr Policy)
Aggregates vector into a single scalar value according to a policy.
Definition: table.h:1551
TTableContext * ChangeContext(TTableContext *Context)
Changes the current context. Moves all object items to the new context.
Definition: table.cpp:901
TInt CurrRowIdx
Physical row index of current row pointer by iterator.
Definition: table.h:384
void AddNodeAttr(TStrV &Attrs)
Handles the common case where src and dst both belong to the same "universe" of entities.
Definition: table.h:1177
TPredicateNode * Root
Rood node of the current predicate tree.
Definition: table.h:87
Definition: gbase.h:23
Definition: table.h:268
void AggregateCols(const TStrV &AggrAttrs, TAttrAggr AggOp, const TStr &ResAttr)
Aggregates attributes in AggrAttrs across columns.
Definition: table.cpp:1730
bool operator==(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to the same row pointed by RowI.
Definition: table.cpp:235
Table class: Relational table with columnar data storage.
Definition: table.h:495
bool operator<(const TRowIterator &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
Definition: table.cpp:141
TPredicateNode(const TPredicateNode &P)
Copy constructor.
Definition: table.h:69
TStr GetStrValById(TInt ColIdx, TInt RowIdx) const
Gets the value of the string attribute at column ColIdx at row RowIdx.
Definition: table.h:1034
void SelectAtomicFltConst(const TStr &Col, const TFlt &Val, TPredComp Cmp)
Definition: table.h:1314
void UpdateFltFromTableMP(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
Definition: table.cpp:4151
static PTable GetEdgeTablePN(const PNGraphMP &Network, TTableContext *Context)
Extracts edge TTable from parallel graph PNGraphMP.
Definition: table.cpp:3777
void ISort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs insertion sort on given vector V.
Definition: table.cpp:3076
TInt GetRowIdx() const
Gets physical index of current row.
Definition: table.cpp:239
TPredOp
Boolean operators for selection predicates.
Definition: table.h:5
TInt RequestIndexFlt(const TStr &ColName)
Creates Index for Flt Column ColName.
Definition: table.cpp:5472
static TBool EvalAtom(T Val1, T Val2, TPredComp Cmp)
Compare atomic values Val1 and Val2 using predicate Cmp.
Definition: table.h:110
static PTable New(const THash< TInt, TFlt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Returns pointer to a table constructed from given int->float hash.
Definition: table.h:942
bool operator<(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
Definition: table.cpp:229
static PTable New(const Schema &S, TTableContext *Context)
Definition: table.h:933
void SelectAtomicIntConst(const TStr &Col, const TInt &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1303
void InitRowIdBuckets(int NumBuckets)
Initializes the RowIdBuckets vector which will be used for the graph sequence creation.
Definition: table.cpp:3515
void AddLeftChild(TPredicateNode *Child)
Add left child to this node.
Definition: table.h:72
TStrV GetSrcNodeFltAttrV() const
Gets src node float attribute name vector.
Definition: table.cpp:1018
static PTable GetFltNodePropertyTable(const PNEANet &Network, const TIntFltH &Property, const TStr &NodeAttrName, const TAttrType &NodeAttrType, const TStr &PropertyAttrName, TTableContext *Context)
Extracts node and edge property TTables from THash.
Definition: table.cpp:3830
void ConcatTable(const PTable &T)
Appends all rows of T to this table, and recalculate indices.
Definition: table.h:693
Hash-Table with multiprocessing support.
Definition: hashmp.h:81
PTable ThresholdJoinPerJoinKeyOutputTable(const THash< TIntTr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
Definition: table.cpp:2602
TPrimitive(const TStr &Val)
Definition: table.h:231
PTable ThresholdJoin(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2, TInt Threshold, TBool PerJoinKey=false)
Definition: table.cpp:2624
Definition: table.h:266
void Load(TSIn &SIn)
Loads TTableContext in binary from SIn.
Definition: table.h:204
static void ISortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5298
TBool IsConst
Flag if this atomic node represents a constant value.
Definition: table.h:18
TInt CurrRowIdx
Physical row index of current row pointed by iterator.
Definition: table.h:340
static TInt GetPivotKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5315
TIntV IntVals
Values of the int columns for this row.
Definition: table.h:245
Definition: table.h:7
Definition: bd.h:196
TInt IsNextDirty
Flag to signify whether the rows are stored in logical sequence or reordered. Used for optimizing Get...
Definition: table.h:613
void Select(TPredicate &Predicate)
Definition: table.h:1257
TStrV GetEdgeStrAttrV() const
Gets edge str attribute name vector.
Definition: table.cpp:1074
friend class TRowIteratorWithRemove
Definition: table.h:1534
Definition: table.h:5
void AddFltCol(const TStr &ColName)
Adds a float column with name ColName.
Definition: table.cpp:4657
TInt CompareRows(TInt R1, TInt R2, const TAttrType &CompareByType, const TInt &CompareByIndex, TBool Asc=true)
Returns positive value if R1 is bigger, negative value if R2 is bigger, and 0 if they are equal (strc...
Definition: table.cpp:3044
TStr RenumberColName(const TStr &ColName) const
Returns a re-numbered column name based on number of existing columns with conflicting names...
Definition: table.cpp:4609
TAtomicPredicate()
Default constructor.
Definition: table.h:30
TInt NumValidRows
Number of valid rows in the table (i.e. rows that were not logically removed).
Definition: table.h:562
TTable()
Definition: table.cpp:302
PTable ThresholdJoinOutputTable(const THash< TIntPr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
Definition: table.cpp:2588
TRowIterator(TInt RowIdx, const TTable *TablePtr)
Constructs iterator to row RowIds of TablePtr.
Definition: table.h:346
void Count(const TStr &CountColName, const TStr &Col)
Counts number of unique elements.
Definition: table.cpp:1782
Definition: table.h:7
PTable InitializeJointTable(const TTable &Table)
Initializes an empty table for the join of this table with the given table.
Definition: table.cpp:1896
Definition: table.h:266
void ColMax(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs max of two columns. See TTable::ColGenericOp.
Definition: table.cpp:4817
TStr GetStrValByName(const TStr &ColName, const TInt &RowIdx) const
Gets the value of the string attribute at column ColName at row RowIdx.
Definition: table.h:1039
void Reserve(const TSizeTy &_MxVals)
Reserves enough memory for the vector to store _MxVals elements.
Definition: ds.h:515
void ClassifyAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Definition: table.cpp:2846
bool Cmp(const int &RelOp, const TRec &Rec1, const TRec &Rec2)
Definition: bd.h:426
void StoreIntCol(const TStr &ColName, const TIntV &ColVals)
Adds entire int column to table.
Definition: table.cpp:4064
void AddIdColumn(const TStr &IdColName)
Adds a column of explicit integer identifiers to the rows.
Definition: table.cpp:1880
void Print()
Definition: table.h:326
void GetVariables(TStrV &Variables)
Get variables in the predicate tree rooted at this node.
Definition: table.cpp:1
Definition: table.h:266
static TInt CheckSortedKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5287
void AddEdgeAttributes(PNEANet &Graph, int RowId)
Adds attributes of edge corresponding to RowId to the Graph.
Definition: table.cpp:3375
Definition: table.h:5
Definition: gbase.h:23
TPt< TTable > PTable
Definition: table.h:141
TVec< PNEANet > ToVarGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates a sequence of graphs based on values of column SplitAttr and intervals specified by SplitInte...
Definition: table.cpp:3635
PTable Next()
Returns next table in the sequence and update iterator.
Definition: table.h:439
Definition: table.h:7
TInt GetNextIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for next row.
Definition: table.cpp:248
void ColGenericOp(const TStr &Attr1, const TStr &Attr2, const TStr &ResAttr, TArithOp op)
Performs columnwise arithmetic operation.
Definition: table.cpp:4729
void SelectAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, TIntV &SelectedRows, TBool Remove=true)
Selects rows using atomic compare operation.
Definition: table.cpp:2793
TRowIterator & operator++(int)
Increments the iterator.
Definition: table.cpp:131
bool IsKey(const TKey &Key) const
Definition: hash.h:216
void GetVariables(TStrV &Variables)
Get variables in current predicate.
Definition: table.cpp:10
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
TAttrType AttrType
Definition: table.h:225
TDat & AddDat(const TKey &Key)
Definition: hashmp.h:181
PGraph ToNetwork(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Converts table to a network. Suitable for PNEANet - Requires node and edge attribute column names as ...
Definition: conv.h:65
void ColMin(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs min of two columns. See TTable::ColGenericOp.
Definition: table.cpp:4813
Definition: dt.h:881
bool IsRowValid(TInt RowIdx) const
Checks if RowIdx corresponds to a valid (i.e. not deleted) row.
Definition: table.h:811
void ColMod(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise modulus. See TTable::ColGenericOp.
Definition: table.cpp:4809
TCRef CRef
Definition: table.h:560
void RemoveFirstRow()
Removes first valid row of the table.
Definition: table.cpp:1102
bool IsStrIn(const TStr &Str) const
Definition: dt.h:554
TBool IsFirst() const
Checks whether iterator points to first valid row of the table.
Definition: table.cpp:274
Atomic predicate - encapsulates comparison operations.
Definition: table.h:15
TInt GetStrMapByName(const TStr &ColName, TInt RowIdx) const
Gets the integer mapping of the string at column ColName at row RowIdx.
Definition: table.h:1029
TBool IsColName(const TStr &ColName) const
Definition: table.h:656
TInt GetIntValAtRowIdx(const TInt &ColIdx, const TInt &RowIdx)
Get the integer value at column ColIdx and row RowIdx.
Definition: table.h:1107
Definition: table.h:268
TInt CheckAndAddFltNode(T Graph, THash< TFlt, TInt > &NodeVals, TFlt FNodeVal)
Checks if given NodeVal is seen earlier; if not, add it to Graph and hashmap NodeVals.
Definition: table.h:1540
TFlt GetFlt() const
Definition: table.h:236
Predicate node - represents a binary predicate operation on two predicate nodes.
Definition: table.h:51
int Len() const
Definition: hash.h:186
PTable SelfSimJoin(const TStrV &Cols, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Definition: table.h:1358
static PTable New()
Definition: table.h:931
void AddNodeAttributes(TInt NId, TStrV NodeAttrV, TInt RowId, THash< TInt, TStrIntVH > &NodeIntAttrs, THash< TInt, TStrFltVH > &NodeFltAttrs, THash< TInt, TStrStrVH > &NodeStrAttrs)
Takes as parameters, and updates, maps NodeXAttrs: Node Id –> (attribute name –> Vector of attribut...
Definition: table.cpp:3394
GroupStmt(const TStrV &Attrs)
Definition: table.h:283
PNEANet GetFirstGraphFromSequence(TAttrAggr AggrPolicy)
Returns the first graph of the sequence.
Definition: table.cpp:3606
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
void ClassifyAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Definition: table.h:1292
PTable Intersection(const TTable &Table)
Returns intersection of this table with given Table.
Definition: table.cpp:4544
void AddDstNodeAttr(TStrV &Attrs)
Adds columns to be used as dst node attributes of the graph.
Definition: table.h:1173
void AddNJointRowsMP(const TTable &T1, const TTable &T2, const TVec< TIntPrV > &JointRowIDSet)
Adds rows from T1 and T2 to this table in a parallel manner. Used by Join.
Definition: table.cpp:4419
const TDat & GetDat(const TKey &Key) const
Definition: hashmp.h:195
TTableContext * GetContext()
Returns the context.
Definition: table.h:996
TFlt FltConst
Flt const value if this object is a float constant.
Definition: table.h:23
TBool Eval()
Return the result of evaluating current predicate.
Definition: table.cpp:14
TIntV GetFltRowIdxByVal(const TStr &ColName, const TFlt &Val) const
Gets the rows containing Val in flt column ColName.
Definition: table.cpp:5430
Definition: table.h:268
TSize GetContextMemUsedKB()
Returns approximate memory used by table context in [KB].
Definition: table.cpp:3946
TInt AddStr(const TStr &Key)
Adds string Key to the context, returns its KeyId.
Definition: table.h:208
TPredicateNode * Parent
Parent node of this node.
Definition: table.h:56
TInt GetIntVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of integer attribute ColName at row RowIdx.
Definition: table.h:1011
THash< TInt, TInt > GetRowIdMap() const
Gets a map of logical to physical row ids.
Definition: table.h:1228
void SetFirstValidRow()
Sets the first valid row of the TTable.
Definition: table.h:821
void AddTable(const TTable &T)
Adds all the rows of the input table. Allows duplicate rows (not a union).
Definition: table.cpp:3952
void ColMul(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise multiplication. See TTable::ColGenericOp.
Definition: table.cpp:4801
void ClassifyAux(const TIntV &SelectedRows, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Adds a label attribute with positive labels on selected rows and negative labels on the rest...
Definition: table.cpp:4671
THash< TStr, TFlt > FltVars
Float variables in the current predicate tree.
Definition: table.h:85
void AddNRows(int NewRows, const TVec< TIntV > &IntColsP, const TVec< TFltV > &FltColsP, const TVec< TIntV > &StrColMapsP)
Adds NewRows rows from the given vectors for each column type.
Definition: table.cpp:4398
TVec< PTable > SpliceByGroup(const TStrV &GroupByAttrs, TBool Ordered=true)
Splices table into subtables according to a grouping statement.
Definition: table.cpp:1788
PGraphMP ToNetworkMP2(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Implements table to network conversion in parallel. Not the recommended algorithm, using ToNetworkMP instead.
Definition: conv.h:1120
Definition: table.h:266
Definition: table.h:5
void AddEdgeAttr(TStrV &Attrs)
Adds columns to be used as graph edge attributes.
Definition: table.h:1165
TVec< TPair< TStr, TAttrType > > Schema
A table schema is a vector of pairs .
Definition: table.h:271
void ColGenericOpMP(TInt ArgColIdx1, TInt ArgColIdx2, TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op)
Definition: table.cpp:4685
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
TVec< PNEANet > GetGraphsFromSequence(TAttrAggr AggrPolicy)
Returns a sequence of graphs.
Definition: table.cpp:3594
TStrV GetDstNodeIntAttrV() const
Gets dst node int attribute name vector.
Definition: table.cpp:996
PTable Union(const PTable &Table)
Definition: table.h:1404
TAtomicPredicate Atom
Atomic predicate at this node.
Definition: table.h:55
TInt Partition(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Partitions vector for QSort.
Definition: table.cpp:3106
Implements a single CrossNet consisting of edges between two TModeNets (could be the same TModeNet) ...
Definition: mmnet.h:124
int GetSecHashCd() const
Definition: table.h:319