SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
table.h
Go to the documentation of this file.
1 #ifndef TABLE_H
2 #define TABLE_H
3 
5 typedef enum {NOT, AND, OR, NOP} TPredOp;
7 typedef enum {LT = 0, LTE, EQ, NEQ, GTE, GT, SUBSTR, SUPERSTR} TPredComp;
8 
9 class TAtomicPredicate;
10 class TPredicateNode;
11 class TPredicate;
12 
13 //#//////////////////////////////////////////////
16  private:
25  // OP RS: 2014/03/25, NonAtom does not work with Snap.py
26  //protected:
27  //static const TAtomicPredicate NonAtom;
28  public:
31  Compare(EQ), Lvar(""), Rvar(""),
32  IntConst(0), FltConst(0), StrConst("") {}
33  //TAtomicPredicate() : Type(NonAtom.Type), IsConst(NonAtom.IsConst),
34  // Compare(NonAtom.Compare), Lvar(NonAtom.Lvar), Rvar(NonAtom.Rvar),
35  // IntConst(NonAtom.IntConst), FltConst(NonAtom.FltConst), StrConst(NonAtom.StrConst) {}
38  TInt ICnst, TFlt FCnst, TStr SCnst) : Type(Typ), IsConst(IsCnst),
39  Compare(Cmp), Lvar(L), Rvar(R), IntConst(ICnst), FltConst(FCnst),
40  StrConst(SCnst) {}
43  Type(Typ), IsConst(IsCnst), Compare(Cmp), Lvar(L), Rvar(R), IntConst(0),
44  FltConst(0), StrConst("") {}
45  friend class TPredicate;
46  friend class TPredicateNode;
47 };
48 
49 //#//////////////////////////////////////////////
52  public:
59  TPredicateNode(): Op(NOP), Result(false), Atom(), Parent(NULL), Left(NULL),
61  Right(NULL) {}
63  TPredicateNode(const TAtomicPredicate& A): Op(NOP), Result(false), Atom(A),
64  Parent(NULL), Left(NULL), Right(NULL) {}
66  TPredicateNode(TPredOp Opr): Op(Opr), Result(false), Atom(), Parent(NULL),
67  Left(NULL), Right(NULL) {}
70  Parent(P.Parent), Left(P.Left), Right(P.Right) {}
72  void AddLeftChild(TPredicateNode* Child) { Left = Child; Child->Parent = this; }
74  void AddRightChild(TPredicateNode* Child) { Right = Child; Child->Parent = this; }
76  void GetVariables(TStrV& Variables);
77  friend class TPredicate;
78 };
79 
80 //#//////////////////////////////////////////////
82 class TPredicate {
83  protected:
88  public:
94  TPredicate(const TPredicate& Pred) : IntVars(Pred.IntVars), FltVars(Pred.FltVars), StrVars(Pred.StrVars), Root(Pred.Root) {}
96  void GetVariables(TStrV& Variables);
98  void SetIntVal(TStr VarName, TInt VarVal) { IntVars.AddDat(VarName, VarVal); }
100  void SetFltVal(TStr VarName, TFlt VarVal) { FltVars.AddDat(VarName, VarVal); }
102  void SetStrVal(TStr VarName, TStr VarVal) { StrVars.AddDat(VarName, VarVal); }
104  TBool Eval();
107 
109  template <class T>
110  static TBool EvalAtom(T Val1, T Val2, TPredComp Cmp) {
111  switch (Cmp) {
112  case LT: return Val1 < Val2;
113  case LTE: return Val1 <= Val2;
114  case EQ: return Val1 == Val2;
115  case NEQ: return Val1 != Val2;
116  case GTE: return Val1 >= Val2;
117  case GT: return Val1 > Val2;
118  default: return false;
119  }
120  };
121 
123  static TBool EvalStrAtom(const TStr& Val1, const TStr& Val2, TPredComp Cmp) {
124  switch (Cmp) {
125  case LT: return Val1 < Val2;
126  case LTE: return Val1 <= Val2;
127  case EQ: return Val1 == Val2;
128  case NEQ: return Val1 != Val2;
129  case GTE: return Val1 >= Val2;
130  case GT: return Val1 > Val2;
131  case SUBSTR: return Val2.IsStrIn(Val1);
132  case SUPERSTR: return Val1.IsStrIn(Val2);
133  default: return false;
134  }
135  }
136 };
137 
138 //#//////////////////////////////////////////////
140 class TTable;
142 typedef TPt<TTable> PTable;
143 
146 
148 // Haversine distance is used to calculate distance between two points on a sphere based on latitude and longitude
150 
151 #if 0
152 // TMetric and TEuclideanMetric are currently not used, kept for future use
153 //#//////////////////////////////////////////////
155 class TMetric {
156 protected:
157  TStr MetricName;
158 public:
159  TMetric(TStr Name) : MetricName(Name) {}
161  TStr GetName();
163  virtual TFlt NumDist(TFlt,TFlt) { return -1; }
165  virtual TFlt StrDist(TStr,TStr) { return -1; }
166 };
167 
168 //#//////////////////////////////////////////////
170 class TEuclideanMetric: public TMetric {
171 public:
172  TEuclideanMetric(TStr Name) : TMetric(Name) {}
174  TFlt NumDist(TFlt x1,TFlt x2) { return fabs(x1-x2); }
175 };
176 #endif
177 
178 //#//////////////////////////////////////////////
181 protected:
183  friend class TTable;
184 
185 public:
191  void Load(TSIn& SIn) { StringVals.Load(SIn); }
193  void LoadShM(TShMIn& ShMIn) {
194  StringVals.LoadShM(ShMIn, true);
195  }
197  void Save(TSOut& SOut) { StringVals.Save(SOut); }
199  TInt AddStr(const TStr& Key) {
200  TInt KeyId = TInt(StringVals.AddKey(Key));
201  return(KeyId);
202  }
204  TStr GetStr(const TInt& KeyId) const {
205  return StringVals.GetKey(KeyId);
206  }
207 };
208 
209 //#//////////////////////////////////////////////
211 class TPrimitive {
212 private:
217 
218 public:
219  TPrimitive() : IntVal(-1), FltVal(-1), StrVal(""), AttrType(atInt) {}
220  TPrimitive(const TInt& Val) : IntVal(Val), FltVal(-1), StrVal(""), AttrType(atInt) {}
221  TPrimitive(const TFlt& Val) : IntVal(-1), FltVal(Val), StrVal(""), AttrType(atFlt) {}
222  TPrimitive(const TStr& Val) : IntVal(-1), FltVal(-1), StrVal(Val.CStr()), AttrType(atStr) {}
223  TPrimitive(const TPrimitive& Prim) : IntVal(Prim.IntVal), FltVal(Prim.FltVal),
224  StrVal(Prim.StrVal.CStr()), AttrType(Prim.AttrType) {}
225 
226  TInt GetInt() const { return IntVal; }
227  TFlt GetFlt() const { return FltVal; }
228  TStr GetStr() const { return StrVal; }
229  TAttrType GetType() const { return AttrType; }
230 };
231 
232 //#//////////////////////////////////////////////
234 class TTableRow {
235 protected:
239 public:
243  void AddInt(const TInt& Val) { IntVals.Add(Val); }
245  void AddFlt(const TFlt& Val) { FltVals.Add(Val); }
247  void AddStr(const TStr& Val) { StrVals.Add(Val); }
249  TIntV GetIntVals() const { return IntVals; }
251  TFltV GetFltVals() const { return FltVals; }
253  TStrV GetStrVals() const { return StrVals; }
254 };
255 
260 
263 
264 //#//////////////////////////////////////////////
266 class GroupStmt{
267 protected:
272 public:
274  GroupStmt(const TStrV& Attrs): GroupByAttrs(Attrs), Ordered(true), UsePhysicalRowIds(true), Valid(true){}
275  GroupStmt(const TStrV& Attrs, TBool ordered, TBool physical): GroupByAttrs(Attrs), Ordered(ordered), UsePhysicalRowIds(physical), Valid(true){}
278  TBool operator ==(const GroupStmt& stmt) const{
279  if(stmt.Ordered != Ordered || stmt.UsePhysicalRowIds != UsePhysicalRowIds){ return false;}
280  if(stmt.GroupByAttrs.Len() != GroupByAttrs.Len()){ return false;}
281  for(int i = 0; i < GroupByAttrs.Len(); i++){
282  if(stmt.GroupByAttrs[i] != GroupByAttrs[i]){ return false;}
283  }
284  return true;
285  }
286  TBool IsValid(){ return Valid;}
287  void Invalidate(){ Valid = false;}
288  TBool IncludesAttr(const TStr& Attr){
289  for(int i = 0; i < GroupByAttrs.Len(); i++){
290  if(GroupByAttrs[i] == Attr){ return true;}
291  }
292  return false;
293  }
294  TSize GetMemUsed() const{
295  TSize sz = 3 * sizeof(TBool);
296  sz += GroupByAttrs.GetMemUsed();
297  for(int i = 0; i < GroupByAttrs.Len(); i++){
298  sz += GroupByAttrs[i].GetMemUsed();
299  }
300  return sz;
301  }
302 
303  int GetPrimHashCd() const{
304  int hc1 = GroupByAttrs.GetPrimHashCd();
306  int hc2 = flags.GetPrimHashCd();
307  return TPairHashImpl::GetHashCd(hc1, hc2);
308  }
309 
310  int GetSecHashCd() const{
311  int hc1 = GroupByAttrs.GetSecHashCd();
313  int hc2 = flags.GetSecHashCd();
314  return TPairHashImpl::GetHashCd(hc1, hc2);
315  }
316 
317  void Print(){
318  for(int i = 0; i < GroupByAttrs.Len(); i++){
319  printf("%s ", GroupByAttrs[i].CStr());
320  }
321  printf("Ordered: %d, UsePhysicalRows: %d, Valid: %d\n", Ordered.Val, UsePhysicalRowIds.Val, Valid.Val);
322  }
323 };
324 
325 //#//////////////////////////////////////////////
327 
332  const TTable* Table;
333 public:
335  TRowIterator(): CurrRowIdx(0), Table(NULL) {}
337  TRowIterator(TInt RowIdx, const TTable* TablePtr): CurrRowIdx(RowIdx), Table(TablePtr) {}
341  TRowIterator& operator++(int);
343  TRowIterator& Next();
345  bool operator < (const TRowIterator& RowI) const;
347  bool operator == (const TRowIterator& RowI) const;
349  TInt GetRowIdx() const;
351  TInt GetIntAttr(TInt ColIdx) const;
353  TFlt GetFltAttr(TInt ColIdx) const;
355  TStr GetStrAttr(TInt ColIdx) const;
357  TInt GetStrMapById(TInt ColIdx) const;
359  TInt GetIntAttr(const TStr& Col) const;
361  TFlt GetFltAttr(const TStr& Col) const;
363  TStr GetStrAttr(const TStr& Col) const;
365  TInt GetStrMapByName(const TStr& Col) const;
367  TBool CompareAtomicConst(TInt ColIdx, const TPrimitive& Val, TPredComp Cmp);
369  TBool CompareAtomicConstTStr(TInt ColIdx, const TStr& Val, TPredComp Cmp);
370 };
371 
372 //#//////////////////////////////////////////////
378 public:
382  TRowIteratorWithRemove(TInt RowIdx, TTable* TablePtr);
384  TRowIteratorWithRemove(TInt RowIdx, TTable* TablePtr, TBool IsStart) : CurrRowIdx(RowIdx),
385  Table(TablePtr), Start(IsStart) {}
388  Table(RowI.Table), Start(RowI.Start) {}
394  bool operator < (const TRowIteratorWithRemove& RowI) const;
396  bool operator == (const TRowIteratorWithRemove& RowI) const;
398  TInt GetRowIdx() const;
400  TInt GetNextRowIdx() const;
402  TInt GetNextIntAttr(TInt ColIdx) const;
404  TFlt GetNextFltAttr(TInt ColIdx) const;
406  TStr GetNextStrAttr(TInt ColIdx) const;
408  TInt GetNextIntAttr(const TStr& Col) const;
410  TFlt GetNextFltAttr(const TStr& Col) const;
412  TStr GetNextStrAttr(const TStr& Col) const;
414  TBool IsFirst() const;
416  void RemoveNext();
418  TBool CompareAtomicConst(TInt ColIdx, const TPrimitive& Val, TPredComp Cmp);
419 };
420 
421 //#//////////////////////////////////////////////
426 public:
428  TTableIterator(TVec<PTable>& PTableV): PTableV(PTableV), CurrTableIdx(0) {}
430  PTable Next() { return PTableV[CurrTableIdx++]; }
432  bool HasNext() { return CurrTableIdx < PTableV.Len(); }
433 };
434 
436 namespace TSnap {
438  template<class PGraph> PGraph ToGraph(PTable Table,
439  const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
441  template<class PGraph> PGraph ToNetwork(PTable Table,
442  const TStr& SrcCol, const TStr& DstCol,
443  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs,
444  TAttrAggr AggrPolicy);
446  template<class PGraph> PGraph ToNetwork(PTable Table,
447  const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
448  template<class PGraph> PGraph ToNetwork(PTable Table,
449  const TStr& SrcCol, const TStr& DstCol,
450  TStrV& EdgeAttrV,
451  TAttrAggr AggrPolicy);
452  template<class PGraph> PGraph ToNetwork(PTable Table,
453  const TStr& SrcCol, const TStr& DstCol,
454  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV,
455  TAttrAggr AggrPolicy);
456  int LoadCrossNet(TCrossNet& Graph, PTable Table,
457  const TStr& SrcCol, const TStr& DstCol,
458  TStrV& EdgeAttrV);
459  int LoadMode(TModeNet& Graph, PTable Table, const TStr& NCol,
460  TStrV& NodeAttrV);
461 
462 #ifdef GCC_ATOMIC
463  template<class PGraphMP> PGraphMP ToGraphMP(PTable Table,
464  const TStr& SrcCol, const TStr& DstCol);
465  template<class PGraphMP> PGraphMP ToGraphMP3(PTable Table,
466  const TStr& SrcCol, const TStr& DstCol);
467  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
468  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
469  template<class PGraphMP> PGraphMP ToNetworkMP2(PTable Table, const TStr& SrcCol, const TStr& DstCol,
470  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
471  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
472  TStrV& EdgeAttrV, TAttrAggr AggrPolicy);
473  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
474  TAttrAggr AggrPolicy);
475  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
476  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV, TAttrAggr AggrPolicy);
477 
478 
479 #endif // GCC_ATOMIC
480 }
481 
482 //#//////////////////////////////////////////////
484 class TTable {
485 protected:
486  static const TInt Last;
487  static const TInt Invalid;
488 
489  static TInt UseMP;
490 public:
491  template<class PGraph> friend PGraph TSnap::ToGraph(PTable Table,
492  const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
493  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
494  const TStr& SrcCol, const TStr& DstCol,
495  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs,
497  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
498  const TStr& SrcCol, const TStr& DstCol,
500  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
501  const TStr& SrcCol, const TStr& DstCol,
502  TStrV& EdgeAttrV,
504  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
505  const TStr& SrcCol, const TStr& DstCol,
506  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV,
508  friend int TSnap::LoadCrossNet(TCrossNet& Graph, PTable Table,
509  const TStr& SrcCol, const TStr& DstCol,
510  TStrV& EdgeAttrV);
511  friend int TSnap::LoadMode(TModeNet& Graph, PTable Table,
512  const TStr& NCol, TStrV& NodeAttrV);
513 
514 #ifdef GCC_ATOMIC
515  template<class PGraphMP> friend PGraphMP TSnap::ToGraphMP(PTable Table, const TStr& SrcCol, const TStr& DstCol);
516  template<class PGraphMP> friend PGraphMP TSnap::ToGraphMP3(PTable Table, const TStr& SrcCol, const TStr& DstCol);
517  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol, TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
518  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP2(PTable Table, const TStr& SrcCol, const TStr& DstCol, TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
519  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol, TStrV& EdgeAttrV, TAttrAggr AggrPolicy);
520  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
521  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
522  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV, TAttrAggr AggrPolicy);
523 
524 #endif // GCC_ATOMIC
525 
526  static void SetMP(TInt Value) { UseMP = Value; }
527  static TInt GetMP() { return UseMP; }
528 
530  static TStr NormalizeColName(const TStr& ColName) {
531  TStr Result = ColName;
532  int RLen = Result.Len();
533  if (RLen == 0) { return Result; }
534  if (Result.GetCh(0) == '_') { return Result; }
535  if (RLen >= 2 && Result.GetCh(RLen-2) == '-') { return Result; }
536  return Result + "-1";
537  }
539  static TStrV NormalizeColNameV(const TStrV& Cols) {
540  TStrV NCols;
541  for (TInt i = 0; i < Cols.Len(); i++) { NCols.Add(NormalizeColName(Cols[i])); }
542  return NCols;
543  }
544 protected:
546 
556 
561 
567 
571 
572  // Group mapping data structures.
574 
578 
582 
585  void InvalidatePhysicalGroupings(); // to be called when rows are added / physically removed
586  void InvalidateAffectedGroupings(const TStr& Attr); // to be called when attributes are removed (projected) or values updated in-place
587 
588  // Fields to be used when constructing a graph.
595 
602 
604 
605 /***** Utility functions *****/
606 public:
608  void AddIntCol(const TStr& ColName);
610  void AddFltCol(const TStr& ColName);
612  void AddStrCol(const TStr& ColName);
613 protected:
615  void IncrementNext();
617  void ClassifyAux(const TIntV& SelectedRows, const TStr& LabelName,
618  const TInt& PositiveLabel = 1, const TInt& NegativeLabel= 0);
619 
620 /***** Utility functions for handling string values *****/
622  const char* GetContextKey(TInt Val) const {
623  return Context->StringVals.GetKey(Val);
624  }
626  TStr GetStrVal(TInt ColIdx, TInt RowIdx) const {
627  return TStr(Context->StringVals.GetKey(StrColMaps[ColIdx][RowIdx]));
628  }
630  void AddStrVal(const TInt& ColIdx, const TStr& Val);
632  void AddStrVal(const TStr& Col, const TStr& Val);
633 
634 /***** Utility functions for handling Schema *****/
636  TStr GetIdColName() const { return IdColName; }
638  TStr GetSchemaColName(TInt Idx) const { return Sch[Idx].Val1; }
640  TAttrType GetSchemaColType(TInt Idx) const { return Sch[Idx].Val2; }
642  void AddSchemaCol(const TStr& ColName, TAttrType ColType) {
643  TStr NColName = NormalizeColName(ColName);
644  Sch.Add(TPair<TStr,TAttrType>(NColName, ColType));
645  }
646  TBool IsColName(const TStr& ColName) const {
647  TStr NColName = NormalizeColName(ColName);
648  return ColTypeMap.IsKey(NColName);
649  }
651  void AddColType(const TStr& ColName, TPair<TAttrType,TInt> ColType) {
652  TStr NColName = NormalizeColName(ColName);
653  ColTypeMap.AddDat(NColName, ColType);
654  }
656  void AddColType(const TStr& ColName, TAttrType ColType, TInt Index) {
657  TStr NColName = NormalizeColName(ColName);
658  AddColType(NColName, TPair<TAttrType,TInt>(ColType, Index));
659  }
661  void DelColType(const TStr& ColName) {
662  TStr NColName = NormalizeColName(ColName);
663  ColTypeMap.DelKey(NColName);
664  }
666  TPair<TAttrType, TInt> GetColTypeMap(const TStr& ColName) const {
667  TStr NColName = NormalizeColName(ColName);
668  return ColTypeMap.GetDat(NColName);
669  }
671  TStr RenumberColName(const TStr& ColName) const;
673  TStr DenormalizeColName(const TStr& ColName) const;
675  Schema DenormalizeSchema() const;
677  TBool IsAttr(const TStr& Attr);
678 
679 /***** Utility functions for adding rows and tables to TTable *****/
681  void AddTable(const TTable& T);
683  void ConcatTable(const PTable& T) {AddTable(*T); Reindex(); }
684 
686  void AddRow(const TRowIterator& RI);
688  void AddRow(const TIntV& IntVals, const TFltV& FltVals, const TStrV& StrVals);
689 
690 /***** Utility functions for building graph from TTable *****/
692  void AddGraphAttribute(const TStr& Attr, TBool IsEdge, TBool IsSrc, TBool IsDst);
694  void AddGraphAttributeV(TStrV& Attrs, TBool IsEdge, TBool IsSrc, TBool IsDst);
696  void CheckAndAddIntNode(PNEANet Graph, THashSet<TInt>& NodeVals, TInt NodeId);
698  template<class T> TInt CheckAndAddFltNode(T Graph, THash<TFlt, TInt>& NodeVals, TFlt FNodeVal);
700  void AddEdgeAttributes(PNEANet& Graph, int RowId);
702  void AddNodeAttributes(TInt NId, TStrV NodeAttrV, TInt RowId,
703  THash<TInt, TStrIntVH>& NodeIntAttrs, THash<TInt, TStrFltVH>& NodeFltAttrs,
704  THash<TInt, TStrStrVH>& NodeStrAttrs);
706  PNEANet BuildGraph(const TIntV& RowIds, TAttrAggr AggrPolicy);
708  void InitRowIdBuckets(int NumBuckets);
710 
713  void FillBucketsByWindow(TStr SplitAttr, TInt JumpSize, TInt WindowSize,
714  TInt StartVal, TInt EndVal);
716 
719  void FillBucketsByInterval(TStr SplitAttr, TIntPrV SplitIntervals);
721 
725 
730 
734 
736 
739  template <class T> T AggregateVector(TVec<T>& V, TAttrAggr Policy);
740 
741  /***** Grouping Utility functions *************/
743  void GroupingSanityCheck(const TStr& GroupBy, const TAttrType& AttrType) const;
745 
749  template <class T> void GroupByIntCol(const TStr& GroupBy, T& Grouping,
750  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds = true) const;
751 #ifdef GCC_ATOMIC
752  public: //Should be protected - this is for debug only
754  void GroupByIntColMP(const TStr& GroupBy, THashMP<TInt, TIntV>& Grouping, TBool UsePhysicalIds = true) const;
755 #endif // GCC_ATOMIC
756  protected:
758  template <class T> void GroupByFltCol(const TStr& GroupBy, T& Grouping,
759  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds = true) const;
761  template <class T> void GroupByStrCol(const TStr& GroupBy, T& Grouping,
762  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds = true) const;
764  template <class T> void UpdateGrouping(THash<T,TIntV>& Grouping, T Key, TInt Val) const;
765 #ifdef GCC_ATOMIC
766  template <class T> void UpdateGrouping(THashMP<T,TIntV>& Grouping, T Key, TInt Val) const;
768 #endif // GCC_ATOMIC
769  void PrintGrouping(const THash<TGroupKey, TIntV>& Grouping) const;
770 
771  /***** Utility functions for sorting by columns *****/
773  inline TInt CompareRows(TInt R1, TInt R2, const TAttrType& CompareByType,
774  const TInt& CompareByIndex, TBool Asc = true);
776  inline TInt CompareRows(TInt R1, TInt R2, const TVec<TAttrType>& CompareByTypes,
777  const TIntV& CompareByIndices, TBool Asc = true);
779  TInt GetPivot(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
780  const TIntV& SortByIndices, TBool Asc);
782  TInt Partition(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
783  const TIntV& SortByIndices, TBool Asc);
785  void ISort(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
786  const TIntV& SortByIndices, TBool Asc = true);
788  void QSort(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
789  const TIntV& SortByIndices, TBool Asc = true);
791  void Merge(TIntV& V, TInt Idx1, TInt Idx2, TInt Idx3, const TVec<TAttrType>& SortByTypes,
792  const TIntV& SortByIndices, TBool Asc = true);
793 #ifdef USE_OPENMP
794  void QSortPar(TIntV& V, const TVec<TAttrType>& SortByTypes, const TIntV& SortByIndices,
796  TBool Asc = true);
797 #endif // USE_OPENMP
798 
799 /***** Utility functions for removing rows (not through iterator) *****/
801  bool IsRowValid(TInt RowIdx) const{ return Next[RowIdx] != Invalid;}
805  void RemoveFirstRow();
807  void RemoveRow(TInt RowIdx, TInt PrevRowIdx);
809  void KeepSortedRows(const TIntV& KeepV);
812  for (int i = 0; i < Next.Len(); i++) {
813  if(Next[i] != TTable::Invalid) { FirstValidRow = i; return;}
814  }
815  TExcept::Throw("SetFirstValidRow: Table is empty");
816  }
817 
818 /***** Utility functions for Join *****/
820  PTable InitializeJointTable(const TTable& Table);
822  void AddJointRow(const TTable& T1, const TTable& T2, TInt RowIdx1, TInt RowIdx2);
823 /***** Utility functions for Threshold Join *****/
824  void ThresholdJoinInputCorrectness(const TStr& KeyCol1, const TStr& JoinCol1, const TTable& Table,
825  const TStr& KeyCol2, const TStr& JoinCol2);
826  void ThresholdJoinCountCollisions(const TTable& TB, const TTable& TS,
827  const TIntIntVH& T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS,
828  THash<TIntPr,TIntTr>& Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType);
829  PTable ThresholdJoinOutputTable(const THash<TIntPr,TIntTr>& Counters, TInt Threshold, const TTable& Table);
830  void ThresholdJoinCountPerJoinKeyCollisions(const TTable& TB, const TTable& TS,
831  const TIntIntVH& T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS,
832  THash<TIntTr,TIntTr>& Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType);
833  PTable ThresholdJoinPerJoinKeyOutputTable(const THash<TIntTr,TIntTr>& Counters, TInt Threshold, const TTable& Table);
834 
836  void ResizeTable(int RowCount);
838  int GetEmptyRowsStart(int NewRows);
840  void AddSelectedRows(const TTable& Table, const TIntV& RowIDs);
842  void AddNRows(int NewRows, const TVec<TIntV>& IntColsP, const TVec<TFltV>& FltColsP,
843  const TVec<TIntV>& StrColMapsP);
844 #ifdef USE_OPENMP
845  void AddNJointRowsMP(const TTable& T1, const TTable& T2, const TVec<TIntPrV>& JointRowIDSet);
847 #endif // USE_OPENMP
848  void UpdateTableForNewRow();
850 
851 #ifdef GCC_ATOMIC
852  static void LoadSSPar(PTable& NewTable, const Schema& S, const TStr& InFNm, const TIntV& RelevantCols, const char& Separator, TBool HasTitleLine);
854 #endif // GCC_ATOMIC
855  static void LoadSSSeq(PTable& NewTable, const Schema& S, const TStr& InFNm, const TIntV& RelevantCols, const char& Separator, TBool HasTitleLine);
857 
858 /***** Utility functions for Group *****/
860 
863  void GroupAux(const TStrV& GroupBy, THash<TGroupKey, TPair<TInt, TIntV> >& Grouping,
864  TBool Ordered, const TStr& GroupColName, TBool KeepUnique, TIntV& UniqueVec, TBool UsePhysicalIds = true);
865 #ifdef USE_OPENMP
866  //void GroupAuxMP(const TStrV& GroupBy, THashGenericMP<TGroupKey, TPair<TInt, TIntV> >& Grouping,
868  // TBool Ordered, const TStr& GroupColName, TBool KeepUnique, TIntV& UniqueVec, TBool UsePhysicalIds = false);
869 #endif // USE_OPENMP
870  void StoreGroupCol(const TStr& GroupColName, const TVec<TPair<TInt, TInt> >& GroupAndRowIds);
874  //template<class T> void RegisterGrouping(const T& Grouping, const TStr& GroupByCol, TBool UsePhysicalRows);
875 
877  void Reindex();
879  void AddIdColumn(const TStr& IdColName);
880 
881  static TInt CompareKeyVal(const TInt& K1, const TInt& V1, const TInt& K2, const TInt& V2);
882  static TInt CheckSortedKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
883  static void ISortKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
884  static TInt GetPivotKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
885  static TInt PartitionKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
886  static void QSortKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
887 
889  void GetCollidingRows(const TTable& T, THashSet<TInt>& Collisions);
890 private:
891  class TLoadVecInit {
892  public:
894  template<typename TElem>
895  void operator() (TVec<TElem>* Node, TShMIn& ShMIn) {Node->LoadShM(ShMIn);}
896  };
897 private:
898  void GenerateColTypeMap(THash<TStr,TPair<TInt,TInt> > & ColTypeIntMap);
899  void LoadTableShM(TShMIn& ShMIn, TTableContext* ContextTable);
900 
901 
902 public:
903 /***** Constructors *****/
904  TTable();
906  TTable(const Schema& S, TTableContext* Context);
908 
910  TTable(const THash<TInt,TInt>& H, const TStr& Col1, const TStr& Col2,
911  TTableContext* Context, const TBool IsStrKeys = false);
913  TTable(const THash<TInt,TFlt>& H, const TStr& Col1, const TStr& Col2,
914  TTableContext* Context, const TBool IsStrKeys = false);
915  // TTable(const TStr& TableName, const THash<TInt,TStr>& H, const TStr& Col1,
916  // const TStr& Col2, TTableContext* Context);
917 
919  TTable(const TTable& Table): Context(Table.Context), Sch(Table.Sch),
921  LastValidRow(Table.LastValidRow), Next(Table.Next), IntCols(Table.IntCols),
922  FltCols(Table.FltCols), StrColMaps(Table.StrColMaps), ColTypeMap(Table.ColTypeMap),
925  SrcCol(Table.SrcCol), DstCol(Table.DstCol),
928  IsNextDirty(Table.IsNextDirty) {}
929 
930  TTable(const TTable& Table, const TIntV& RowIds);
931 
932  static PTable New() { return new TTable(); }
933  static PTable New(TTableContext* Context) { return new TTable(Context); }
934  static PTable New(const Schema& S, TTableContext* Context) {
935  return new TTable(S, Context);
936  }
938  static PTable New(const THash<TInt,TInt>& H, const TStr& Col1,
939  const TStr& Col2, TTableContext* Context, const TBool IsStrKeys = false) {
940  return new TTable(H, Col1, Col2, Context, IsStrKeys);
941  }
943  static PTable New(const THash<TInt,TFlt>& H, const TStr& Col1,
944  const TStr& Col2, TTableContext* Context, const TBool IsStrKeys = false) {
945  return new TTable(H, Col1, Col2, Context, IsStrKeys);
946  }
948  static PTable New(const PTable Table) { return new TTable(*Table); }
950  // static PTable New(const PTable Table, const TStr& TableName) {
951  // PTable T = New(Table); T->Name = TableName;
952  // return T;
953  // }
955  static void GetSchema(const TStr& InFNm, Schema& S, const char& Separator = '\t');
956 /***** Save / Load functions *****/
958  static PTable LoadSS(const Schema& S, const TStr& InFNm, TTableContext* Context,
959  const char& Separator = '\t', TBool HasTitleLine = false);
961  static PTable LoadSS(const Schema& S, const TStr& InFNm, TTableContext* Context,
962  const TIntV& RelevantCols, const char& Separator = '\t', TBool HasTitleLine = false);
964  void SaveSS(const TStr& OutFNm);
966  void SaveBin(const TStr& OutFNm);
968 
971  static PTable Load(TSIn& SIn, TTableContext* Context){ return new TTable(SIn, Context);}
973 
976  TTable* Table = new TTable();
977  Table->LoadTableShM(ShMIn, Context);
978  return PTable(Table);
979  }
981 
983  void Save(TSOut& SOut);
985  void Dump(FILE *OutF=stdout) const;
986 
988  static PTable TableFromHashMap(const THash<TInt,TInt>& H, const TStr& Col1, const TStr& Col2,
989  TTableContext* Context, const TBool IsStrKeys = false) {
990  PTable T = New(H, Col1, Col2, Context, IsStrKeys);
991  T->InitIds();
992  return T;
993  }
995  static PTable TableFromHashMap(const THash<TInt,TFlt>& H, const TStr& Col1, const TStr& Col2,
996  TTableContext* Context, const TBool IsStrKeys = false) {
997  PTable T = New(H, Col1, Col2, Context, IsStrKeys);
998  T->InitIds();
999  return T;
1000  }
1002  void AddRow(const TTableRow& Row) { AddRow(Row.GetIntVals(), Row.GetFltVals(), Row.GetStrVals()); };
1003 
1006  return Context;
1007  }
1010 
1011 /***** Value Getters - getValue(column name, physical row Idx) *****/
1013  TInt GetColIdx(const TStr& ColName) const {
1014  TStr NColName = NormalizeColName(ColName);
1015  return ColTypeMap.IsKey(NColName) ? ColTypeMap.GetDat(NColName).Val2 : TInt(-1);
1016  }
1017 
1018  // No type checking. Assuming ColName actually refers to the right type.
1020  TInt GetIntVal(const TStr& ColName, const TInt& RowIdx) {
1021  return IntCols[GetColIdx(ColName)][RowIdx];
1022  }
1024  TFlt GetFltVal(const TStr& ColName, const TInt& RowIdx) {
1025  return FltCols[GetColIdx(ColName)][RowIdx];
1026  }
1028  TStr GetStrVal(const TStr& ColName, const TInt& RowIdx) const {
1029  return GetStrVal(GetColIdx(ColName), RowIdx);
1030  }
1031 
1033  TInt GetStrMapById(TInt ColIdx, TInt RowIdx) const {
1034  return StrColMaps[ColIdx][RowIdx];
1035  }
1036 
1038  TInt GetStrMapByName(const TStr& ColName, TInt RowIdx) const {
1039  return StrColMaps[GetColIdx(ColName)][RowIdx];
1040  }
1041 
1043  TStr GetStrValById(TInt ColIdx, TInt RowIdx) const {
1044  return GetStrVal(ColIdx, RowIdx);
1045  }
1046 
1048  TStr GetStrValByName(const TStr& ColName, const TInt& RowIdx) const {
1049  return GetStrVal(ColName, RowIdx);
1050  }
1051 
1053 
1059  TIntV GetIntRowIdxByVal(const TStr& ColName, const TInt& Val) const;
1061 
1067  TIntV GetStrRowIdxByMap(const TStr& ColName, const TInt& Map) const;
1069 
1075  TIntV GetFltRowIdxByVal(const TStr& ColName, const TFlt& Val) const;
1076 
1078 
1086  TInt RequestIndexInt(const TStr& ColName);
1088 
1096  TInt RequestIndexFlt(const TStr& ColName);
1098 
1106  TInt RequestIndexStrMap(const TStr& ColName);
1107 
1109  TStr GetStr(const TInt& KeyId) const {
1110  return Context->StringVals.GetKey(KeyId);
1111  }
1112 
1113 /***** Value Getters - getValue(col idx, row Idx) *****/
1114  // No type and bound checking
1116  TInt GetIntValAtRowIdx(const TInt& ColIdx, const TInt& RowIdx) {
1117  return IntCols[ColIdx][RowIdx];
1118  }
1120  TFlt GetFltValAtRowIdx(const TInt& ColIdx, const TInt& RowIdx) {
1121  return FltCols[ColIdx][RowIdx];
1122  }
1123 
1126 
1127 /***** Graph handling *****/
1130  TInt WindowSize, TInt JumpSize, TInt StartVal = TInt::Mn, TInt EndVal = TInt::Mx);
1132  TVec<PNEANet> ToVarGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals);
1135 
1137 
1141  TInt WindowSize, TInt JumpSize, TInt StartVal = TInt::Mn, TInt EndVal = TInt::Mx);
1143 
1146  PNEANet ToVarGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals);
1148 
1156 
1158  TStr GetSrcCol() const { return SrcCol; }
1160  void SetSrcCol(const TStr& Src) {
1161  if (!IsColName(Src)) { TExcept::Throw(Src + ": no such column"); }
1162  SrcCol = NormalizeColName(Src);
1163  }
1165  TStr GetDstCol() const { return DstCol; }
1167  void SetDstCol(const TStr& Dst) {
1168  if (!IsColName(Dst)) { TExcept::Throw(Dst + ": no such column"); }
1169  DstCol = NormalizeColName(Dst);
1170  }
1172  void AddEdgeAttr(const TStr& Attr) { AddGraphAttribute(Attr, true, false, false); }
1174  void AddEdgeAttr(TStrV& Attrs) { AddGraphAttributeV(Attrs, true, false, false); }
1176  void AddSrcNodeAttr(const TStr& Attr) { AddGraphAttribute(Attr, false, true, false); }
1178  void AddSrcNodeAttr(TStrV& Attrs) { AddGraphAttributeV(Attrs, false, true, false); }
1180  void AddDstNodeAttr(const TStr& Attr) { AddGraphAttribute(Attr, false, false, true); }
1182  void AddDstNodeAttr(TStrV& Attrs) { AddGraphAttributeV(Attrs, false, false, true); }
1184  void AddNodeAttr(const TStr& Attr) { AddSrcNodeAttr(Attr); AddDstNodeAttr(Attr); }
1186  void AddNodeAttr(TStrV& Attrs) { AddSrcNodeAttr(Attrs); AddDstNodeAttr(Attrs); }
1188  void SetCommonNodeAttrs(const TStr& SrcAttr, const TStr& DstAttr, const TStr& CommonAttrName){
1189  CommonNodeAttrs.Add(TStrTr(NormalizeColName(SrcAttr), NormalizeColName(DstAttr), NormalizeColName(CommonAttrName)));
1190  }
1192  TStrV GetSrcNodeIntAttrV() const;
1194  TStrV GetDstNodeIntAttrV() const;
1196  TStrV GetEdgeIntAttrV() const;
1198  TStrV GetSrcNodeFltAttrV() const;
1200  TStrV GetDstNodeFltAttrV() const;
1202  TStrV GetEdgeFltAttrV() const;
1204  TStrV GetSrcNodeStrAttrV() const;
1206  TStrV GetDstNodeStrAttrV() const;
1208  TStrV GetEdgeStrAttrV() const;
1209 
1211  static PTable GetNodeTable(const PNEANet& Network, TTableContext* Context);
1213  static PTable GetEdgeTable(const PNEANet& Network, TTableContext* Context);
1214 
1215 #ifdef USE_OPENMP
1216  static PTable GetEdgeTablePN(const PNGraphMP& Network, TTableContext* Context);
1218 #endif // USE_OPENMP
1219 
1221  static PTable GetFltNodePropertyTable(const PNEANet& Network, const TIntFltH& Property,
1222  const TStr& NodeAttrName, const TAttrType& NodeAttrType, const TStr& PropertyAttrName,
1224 
1225 /***** Basic Getters *****/
1227  TAttrType GetColType(const TStr& ColName) const {
1228  TStr NColName = NormalizeColName(ColName);
1229  return ColTypeMap.GetDat(NColName).Val1;
1230  }
1232  TInt GetNumRows() const { return NumRows;}
1234  TInt GetNumValidRows() const { return NumValidRows;}
1235 
1238 
1239 /***** Iterators *****/
1241  TRowIterator BegRI() const { return TRowIterator(FirstValidRow, this);}
1243  TRowIterator EndRI() const { return TRowIterator(TTable::Last, this);}
1249  void GetPartitionRanges(TIntPrV& Partitions, TInt NumPartitions) const;
1250 
1251 /***** Table Operations *****/
1253  void Rename(const TStr& Column, const TStr& NewLabel);
1254 
1256  void Unique(const TStr& Col);
1258  void Unique(const TStrV& Cols, TBool Ordered = true);
1259 
1261 
1265  void Select(TPredicate& Predicate, TIntV& SelectedRows, TBool Remove = true);
1266  void Select(TPredicate& Predicate) {
1267  TIntV SelectedRows;
1268  Select(Predicate, SelectedRows, true);
1269  }
1270  void Classify(TPredicate& Predicate, const TStr& LabelName, const TInt& PositiveLabel = 1,
1271  const TInt& NegativeLabel = 0);
1272 
1274 
1276  void SelectAtomic(const TStr& Col1, const TStr& Col2, TPredComp Cmp,
1277  TIntV& SelectedRows, TBool Remove = true);
1278  void SelectAtomic(const TStr& Col1, const TStr& Col2, TPredComp Cmp) {
1279  TIntV SelectedRows;
1280  SelectAtomic(Col1, Col2, Cmp, SelectedRows, true);
1281  }
1282  void ClassifyAtomic(const TStr& Col1, const TStr& Col2, TPredComp Cmp,
1283  const TStr& LabelName, const TInt& PositiveLabel = 1, const TInt& NegativeLabel = 0);
1284 
1286  void SelectAtomicConst(const TStr& Col, const TPrimitive& Val, TPredComp Cmp,
1287  TIntV& SelectedRows, PTable& SelectedTable, TBool Remove = true, TBool Table = true);
1288 
1289  template <class T>
1290  void SelectAtomicConst(const TStr& Col, const T& Val, TPredComp Cmp) {
1291  TIntV SelectedRows;
1292  PTable SelectedTable;
1293  SelectAtomicConst(Col, TPrimitive(Val), Cmp, SelectedRows, SelectedTable, true, false);
1294  }
1295  template <class T>
1296  void SelectAtomicConst(const TStr& Col, const T& Val, TPredComp Cmp, PTable& SelectedTable) {
1297  TIntV SelectedRows;
1298  SelectAtomicConst(Col, TPrimitive(Val), Cmp, SelectedRows, SelectedTable, false, true);
1299  }
1300  template <class T>
1301  void ClassifyAtomicConst(const TStr& Col, const T& Val, TPredComp Cmp,
1302  const TStr& LabelName, const TInt& PositiveLabel = 1, const TInt& NegativeLabel = 0) {
1303  TIntV SelectedRows;
1304  PTable SelectedTable;
1305  SelectAtomicConst(Col, TPrimitive(Val), Cmp, SelectedRows, SelectedTable, false, false);
1306  ClassifyAux(SelectedRows, LabelName, PositiveLabel, NegativeLabel);
1307  }
1308 
1309  void SelectAtomicIntConst(const TStr& Col, const TInt& Val, TPredComp Cmp) {
1310  SelectAtomicConst(Col, Val, Cmp);
1311  }
1312  void SelectAtomicIntConst(const TStr& Col, const TInt& Val, TPredComp Cmp, PTable& SelectedTable) {
1313  SelectAtomicConst(Col, Val, Cmp, SelectedTable);
1314  }
1315 
1316  void SelectAtomicStrConst(const TStr& Col, const TStr& Val, TPredComp Cmp) {
1317  SelectAtomicConst(Col, Val, Cmp);
1318  }
1319  void SelectAtomicStrConst(const TStr& Col, const TStr& Val, TPredComp Cmp, PTable& SelectedTable) {
1320  SelectAtomicConst(Col, Val, Cmp, SelectedTable);
1321  }
1322 
1323  void SelectAtomicFltConst(const TStr& Col, const TFlt& Val, TPredComp Cmp) {
1324  SelectAtomicConst(Col, Val, Cmp);
1325  }
1326  void SelectAtomicFltConst(const TStr& Col, const TFlt& Val, TPredComp Cmp, PTable& SelectedTable) {
1327  SelectAtomicConst(Col, Val, Cmp, SelectedTable);
1328  }
1329 
1331 
1334  void Group(const TStrV& GroupBy, const TStr& GroupColName, TBool Ordered = true, TBool UsePhysicalIds = true);
1335 
1337 
1340  void Count(const TStr& CountColName, const TStr& Col);
1341 
1343  void Order(const TStrV& OrderBy, TStr OrderColName = "", TBool ResetRankByMSC = false, TBool Asc = true);
1344 
1346  void Aggregate(const TStrV& GroupByAttrs, TAttrAggr AggOp, const TStr& ValAttr,
1347  const TStr& ResAttr, TBool Ordered = true);
1348 
1350  void AggregateCols(const TStrV& AggrAttrs, TAttrAggr AggOp, const TStr& ResAttr);
1351 
1353  TVec<PTable> SpliceByGroup(const TStrV& GroupByAttrs, TBool Ordered = true);
1354 
1356 
1359  PTable Join(const TStr& Col1, const TTable& Table, const TStr& Col2);
1360  PTable Join(const TStr& Col1, const PTable& Table, const TStr& Col2) {
1361  return Join(Col1, *Table, Col2);
1362  }
1363  PTable ThresholdJoin(const TStr& KeyCol1, const TStr& JoinCol1, const TTable& Table, const TStr& KeyCol2, const TStr& JoinCol2, TInt Threshold, TBool PerJoinKey = false);
1364 
1366  PTable SelfJoin(const TStr& Col) { return Join(Col, *this, Col); }
1367  PTable SelfSimJoin(const TStrV& Cols, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold) { return SimJoin(Cols, *this, Cols, DistanceColName, SimType, Threshold); }
1369 
1371  PTable SelfSimJoinPerGroup(const TStr& GroupAttr, const TStr& SimCol, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold);
1372 
1374  PTable SelfSimJoinPerGroup(const TStrV& GroupBy, const TStr& SimCol, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold);
1375 
1377  PTable SimJoin(const TStrV& Cols1, const TTable& Table, const TStrV& Cols2, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold);
1379  void SelectFirstNRows(const TInt& N);
1380 
1381  // Computes distances between elements in this->Col1 and Table->Col2 according
1382  // to given metric. Store the distances in DistCol, but keep only rows where
1383  // distance <= threshold
1384  // void Dist(const TStr& Col1, const TTable& Table, const TStr Col2, const TStr& DistColName,
1385  // const TMetric& Metric, TFlt threshold);
1386 
1388 
1391  void Defrag();
1392 
1394  void StoreIntCol(const TStr& ColName, const TIntV& ColVals);
1396  void StoreFltCol(const TStr& ColName, const TFltV& ColVals);
1398  void StoreStrCol(const TStr& ColName, const TStrV& ColVals);
1399 
1400  // Assumption: KeyAttr is a primary key in this table, and FKeyAttr is a primary key in
1401  // the argument table. Equivalent to SQL's: UPDATE this SET UpdateAttr = ReadAttr WHERE KeyAttr = FKeyAttr
1402  void UpdateFltFromTable(const TStr& KeyAttr, const TStr& UpdateAttr, const TTable& Table,
1403  const TStr& FKeyAttr, const TStr& ReadAttr, TFlt DefaultFltVal = 0.0);
1404 #ifdef GCC_ATOMIC
1405  void UpdateFltFromTableMP(const TStr& KeyAttr, const TStr& UpdateAttr, const TTable& Table,
1406  const TStr& FKeyAttr, const TStr& ReadAttr, TFlt DefaultFltVal = 0.0);
1407  // TODO: this should be a generic vector operation (parallel equivalent to TVec::PutAll)
1408  void SetFltColToConstMP(TInt UpdateColIdx, TFlt DefaultFltVal);
1409 #endif // GCC_ATOMIC
1410 
1412  PTable Union(const TTable& Table);
1413  PTable Union(const PTable& Table) { return Union(*Table); };
1415  PTable UnionAll(const TTable& Table);
1416  PTable UnionAll(const PTable& Table) { return UnionAll(*Table); };
1418  void UnionAllInPlace(const TTable& Table);
1419  void UnionAllInPlace(const PTable& Table) { return UnionAllInPlace(*Table); };
1421  PTable Intersection(const TTable& Table);
1422  PTable Intersection(const PTable& Table) { return Intersection(*Table); };
1424  PTable Minus(TTable& Table);
1425  PTable Minus(const PTable& Table) { return Minus(*Table); };
1427  PTable Project(const TStrV& ProjectCols);
1429  void ProjectInPlace(const TStrV& ProjectCols);
1430 
1431  /* Column-wise arithmetic operations */
1432 
1434 
1437  void ColGenericOp(const TStr& Attr1, const TStr& Attr2, const TStr& ResAttr, TArithOp op);
1438 #ifdef USE_OPENMP
1439  void ColGenericOpMP(TInt ArgColIdx1, TInt ArgColIdx2, TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op);
1440 #endif // USE_OPENMP
1441  void ColAdd(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1444  void ColSub(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1446  void ColMul(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1448  void ColDiv(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1450  void ColMod(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1452  void ColMin(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1454  void ColMax(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1455 
1457  void ColGenericOp(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr,
1458  TArithOp op, TBool AddToFirstTable);
1459  // void ColGenericOpMP(TTable& Table, TBool AddToFirstTable, TInt ArgColIdx1, TInt ArgColIdx2,
1460  // TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op);
1462  void ColAdd(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1463  TBool AddToFirstTable=true);
1465  void ColSub(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1466  TBool AddToFirstTable=true);
1468  void ColMul(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1469  TBool AddToFirstTable=true);
1471  void ColDiv(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1472  TBool AddToFirstTable=true);
1474  void ColMod(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1475  TBool AddToFirstTable=true);
1476 
1478  void ColGenericOp(const TStr& Attr1, const TFlt& Num, const TStr& ResAttr, TArithOp op, const TBool floatCast);
1479 #ifdef USE_OPENMP
1480  void ColGenericOpMP(const TInt& ColIdx1, const TInt& ColIdx2, TAttrType ArgType, const TFlt& Num, TArithOp op, TBool ShouldCast);
1481 #endif // USE_OPENMP
1482  void ColAdd(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1485  void ColSub(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1487  void ColMul(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1489  void ColDiv(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1491  void ColMod(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1492 
1493  /* Column-wise string operations */
1494 
1496  void ColConcat(const TStr& Attr1, const TStr& Attr2, const TStr& Sep = "", const TStr& ResAttr="");
1498  void ColConcat(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& Sep = "", const TStr& ResAttr="",
1499  TBool AddToFirstTable=true);
1501  void ColConcatConst(const TStr& Attr1, const TStr& Val, const TStr& Sep = "", const TStr& ResAttr="");
1502 
1504  void ReadIntCol(const TStr& ColName, TIntV& Result) const;
1506  void ReadFltCol(const TStr& ColName, TFltV& Result) const;
1508  void ReadStrCol(const TStr& ColName, TStrV& Result) const;
1509 
1511  void InitIds();
1512 
1514 
1516  PTable IsNextK(const TStr& OrderCol, TInt K, const TStr& GroupBy, const TStr& RankColName = "");
1517 
1518  void PrintSize();
1519  void PrintContextSize();
1521  TSize GetMemUsedKB();
1524 
1525  friend class TPt<TTable>;
1526  friend class TRowIterator;
1528 };
1529 
1531 
1532 template<class T>
1534  if (!NodeVals.IsKey(FNodeVal)) {
1535  TInt NodeVal = NodeVals.Len();
1536  Graph->AddNode(NodeVal);
1537  NodeVals.AddKey(FNodeVal);
1538  NodeVals.AddDat(FNodeVal, NodeVal);
1539  return NodeVal;
1540  } else { return NodeVals.GetDat(FNodeVal); }
1541 }
1542 
1543 template <class T>
1545  switch (Policy) {
1546  case aaMin: {
1547  T Res = V[0];
1548  for (TInt i = 1; i < V.Len(); i++) {
1549  if (V[i] < Res) { Res = V[i]; }
1550  }
1551  return Res;
1552  }
1553  case aaMax: {
1554  T Res = V[0];
1555  for (TInt i = 1; i < V.Len(); i++) {
1556  if (V[i] > Res) { Res = V[i]; }
1557  }
1558  return Res;
1559  }
1560  case aaFirst: {
1561  return V[0];
1562  }
1563  case aaLast:{
1564  return V[V.Len()-1];
1565  }
1566  case aaSum: {
1567  T Res = V[0];
1568  for (TInt i = 1; i < V.Len(); i++) {
1569  Res = Res + V[i];
1570  }
1571  return Res;
1572  }
1573  case aaMean: {
1574  T Res = V[0];
1575  for (TInt i = 1; i < V.Len(); i++) {
1576  Res = Res + V[i];
1577  }
1578  //Res = Res / V.Len(); // TODO: Handle Str case separately?
1579  return Res;
1580  }
1581  case aaMedian: {
1582  V.Sort();
1583  return V[V.Len()/2];
1584  }
1585  case aaCount: {
1586  // NOTE: Code should never reach here
1587  // I had to put this here to avoid a compiler warning.
1588  // Is there a better way to do this?
1589  return V[0];
1590  }
1591  }
1592  // Added to remove a compiler warning.
1593  T ShouldNotComeHere;
1594  return ShouldNotComeHere;
1595 }
1596 
1597 template <class T>
1598 void TTable::GroupByIntCol(const TStr& GroupBy, T& Grouping,
1599  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds) const {
1600  TInt IdColIdx = GetColIdx(IdColName);
1601  if(!UsePhysicalIds && IdColIdx < 0){
1602  TExcept::Throw("Grouping: Either use physical row ids, or have an id column");
1603  }
1604  // TO do: add a check if grouping already exists and is valid
1605  GroupingSanityCheck(GroupBy, atInt);
1606  if (All) {
1607  // Optimize for the common and most expensive case - iterate over only valid rows.
1608  for (TRowIterator it = BegRI(); it < EndRI(); it++) {
1609  TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1610  UpdateGrouping<TInt>(Grouping, it.GetIntAttr(GroupBy), idx);
1611  }
1612  } else {
1613  // Consider only rows in IndexSet.
1614  for (TInt i = 0; i < IndexSet.Len(); i++) {
1615  if (IsRowValid(IndexSet[i])) {
1616  TInt RowIdx = IndexSet[i];
1617  const TIntV& Col = IntCols[GetColIdx(GroupBy)];
1618  TInt idx = UsePhysicalIds ? RowIdx : IntCols[IdColIdx][RowIdx];
1619  UpdateGrouping<TInt>(Grouping, Col[RowIdx], idx);
1620  }
1621  }
1622  }
1623 }
1624 
1625 template <class T>
1626 void TTable::GroupByFltCol(const TStr& GroupBy, T& Grouping,
1627  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds) const {
1628  TInt IdColIdx = GetColIdx(IdColName);
1629  if(!UsePhysicalIds && IdColIdx < 0){
1630  TExcept::Throw("Grouping: Either use physical row ids, or have an id column");
1631  }
1632  GroupingSanityCheck(GroupBy, atFlt);
1633  if (All) {
1634  // Optimize for the common and most expensive case - iterate over only valid rows.
1635  for (TRowIterator it = BegRI(); it < EndRI(); it++) {
1636  TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1637  UpdateGrouping<TFlt>(Grouping, it.GetFltAttr(GroupBy), idx);
1638  }
1639  } else {
1640  // Consider only rows in IndexSet.
1641  for (TInt i = 0; i < IndexSet.Len(); i++) {
1642  if (IsRowValid(IndexSet[i])) {
1643  TInt RowIdx = IndexSet[i];
1644  const TFltV& Col = FltCols[GetColIdx(GroupBy)];
1645  TInt idx = UsePhysicalIds ? RowIdx : IntCols[IdColIdx][RowIdx];
1646  UpdateGrouping<TFlt>(Grouping, Col[RowIdx], idx);
1647  }
1648  }
1649  }
1650 }
1651 
1652 template <class T>
1653 void TTable::GroupByStrCol(const TStr& GroupBy, T& Grouping,
1654  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds) const {
1655  TInt IdColIdx = GetColIdx(IdColName);
1656  if(!UsePhysicalIds && IdColIdx < 0){
1657  TExcept::Throw("Grouping: Either use physical row ids, or have an id column");
1658  }
1659  GroupingSanityCheck(GroupBy, atStr);
1660  if (All) {
1661  // Optimize for the common and most expensive case - iterate over all valid rows.
1662  for (TRowIterator it = BegRI(); it < EndRI(); it++) {
1663  TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1664  UpdateGrouping<TInt>(Grouping, it.GetStrMapByName(GroupBy), idx);
1665  }
1666  } else {
1667  // Consider only rows in IndexSet.
1668  for (TInt i = 0; i < IndexSet.Len(); i++) {
1669  if (IsRowValid(IndexSet[i])) {
1670  TInt RowIdx = IndexSet[i];
1671  TInt ColIdx = GetColIdx(GroupBy);
1672  TInt idx = UsePhysicalIds ? RowIdx : IntCols[IdColIdx][RowIdx];
1673  UpdateGrouping<TInt>(Grouping, StrColMaps[ColIdx][RowIdx], idx);
1674  }
1675  }
1676  }
1677 }
1678 
1679 template <class T>
1680 void TTable::UpdateGrouping(THash<T,TIntV>& Grouping, T Key, TInt Val) const{
1681  if (Grouping.IsKey(Key)) {
1682  Grouping.GetDat(Key).Add(Val);
1683  } else {
1684  TIntV NewGroup;
1685  NewGroup.Add(Val);
1686  Grouping.AddDat(Key, NewGroup);
1687  }
1688 }
1689 
1690 #ifdef GCC_ATOMIC
1691 template <class T>
1692 void TTable::UpdateGrouping(THashMP<T,TIntV>& Grouping, T Key, TInt Val) const{
1693  if (Grouping.IsKey(Key)) {
1694  //printf("y\n");
1695  Grouping.GetDat(Key).Add(Val);
1696  } else {
1697  //printf("n\n");
1698  TIntV NewGroup;
1699  NewGroup.Add(Val);
1700  Grouping.AddDat(Key, NewGroup);
1701  }
1702 }
1703 #endif // GCC_ATOMIC
1704 
1705 /*
1706 template<class T>
1707 void TTable::RegisterGrouping(const T& Grouping, const TStr& GroupByCol, TBool UsePhysicalIds){
1708  TStrV GroupByVec;
1709  GroupByVec.Add(GroupByCol);
1710  GroupStmt Stmt(NormalizeColNameV(GroupByVec), true, UsePhysicalIds);
1711  GroupMapping.AddKey(Stmt);
1712  for(T::TIter it = Grouping.BegI(); it < Grouping.EndI(); it++){
1713  GroupMapping.GetDat(Stmt).AddDat(it.GetKey(), TIntV(it.GetDat()));
1714  }
1715 }
1716 */
1717 
1718 #endif //TABLE_H
1719 
Definition: bd.h:440
void UpdateGrouping(THash< T, TIntV > &Grouping, T Key, TInt Val) const
Template for utility function to update a grouping hash map.
Definition: table.h:1680
Definition: table.h:259
TStr GetDstCol() const
Gets the name of the column to be used as dst nodes in the graph.
Definition: table.h:1165
TSize GetMemUsedKB()
Returns approximate memory used by table in [KB].
Definition: table.cpp:3940
void ThresholdJoinInputCorrectness(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2)
Definition: table.cpp:2478
void AddSchemaCol(const TStr &ColName, TAttrType ColType)
Adds column with name ColName and type ColType to the schema.
Definition: table.h:642
TFlt GetFltAttr(TInt ColIdx) const
Returns value of floating point attribute specified by float column index for current row...
Definition: table.cpp:159
TBool Valid
Definition: table.h:271
TInt RequestIndexInt(const TStr &ColName)
Creates Index for Int Column ColName.
Definition: table.cpp:5476
Definition: table.h:259
TBool IsLastGraphOfSequence()
Checks if the end of the graph sequence is reached.
Definition: table.cpp:3685
TBool IsAttr(const TStr &Attr)
Checks if Attr is an attribute of this table schema.
Definition: table.cpp:4628
void SetFltVal(TStr VarName, TFlt VarVal)
Set flt variable value in the predicate or all the children that use it.
Definition: table.h:100
void Order(const TStrV &OrderBy, TStr OrderColName="", TBool ResetRankByMSC=false, TBool Asc=true)
Orders the rows according to the values in columns of OrderBy (in descending lexicographic order)...
Definition: table.cpp:3240
TInt GetNumRows() const
Gets total number of rows in this table.
Definition: table.h:1232
void FillBucketsByInterval(TStr SplitAttr, TIntPrV SplitIntervals)
Fills RowIdBuckets with sets of row ids.
Definition: table.cpp:3599
void RemoveRow(TInt RowIdx, TInt PrevRowIdx)
Removes row with id RowIdx.
Definition: table.cpp:1135
Definition: table.h:259
void SelectAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1296
void AddInt(const TInt &Val)
Adds int attribute to this row.
Definition: table.h:243
TStrV EdgeAttrV
List of columns (attributes) to serve as edge attributes.
Definition: table.h:591
THash< GroupStmt, THash< TGroupKey, TIntV > > GroupMapping
Maps grouping statements to their (group-by key –> group id) mapping.
Definition: table.h:581
TInt FirstValidRow
Physical index of first valid row.
Definition: table.h:553
int GetPrimHashCd() const
Returns primary hash code of the vector. Used by THash.
Definition: ds.h:999
TStr DenormalizeColName(const TStr &ColName) const
Removes suffix to column name if exists.
Definition: table.cpp:4648
int Len() const
Definition: dt.h:487
TInt GetPivot(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Gets pivot element for QSort.
Definition: table.cpp:3110
TInt GetColIdx(const TStr &ColName) const
Gets index of column ColName among columns of the same type in the schema.
Definition: table.h:1013
enum TAttrType_ TAttrType
Types for tables, sparse and dense attributes.
static PTable New(TTableContext *Context)
Definition: table.h:933
void StoreGroupCol(const TStr &GroupColName, const TVec< TPair< TInt, TInt > > &GroupAndRowIds)
Parallel helper function for grouping. - we currently don't support such parallel grouping by complex...
Definition: table.cpp:1310
static const TInt Last
Special value for Next vector entry - last row in table.
Definition: table.h:486
PTable UnionAll(const TTable &Table)
Returns union of this table with given Table, preserving duplicates.
Definition: table.cpp:4511
TStrV GetStrVals() const
Gets string attributes of this row.
Definition: table.h:253
static TInt PartitionKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5355
Primitive class: Wrapper around primitive data types.
Definition: table.h:211
bool operator==(const TRowIterator &RowI) const
Checks if this iterator points to the same row pointed by RowI.
Definition: table.cpp:147
TStrV GetSrcNodeIntAttrV() const
Gets src node int attribute name vector.
Definition: table.cpp:1005
PTable Minus(const PTable &Table)
Definition: table.h:1425
void PrintGrouping(const THash< TGroupKey, TIntV > &Grouping) const
Definition: table.cpp:1788
Schema Sch
Table Schema.
Definition: table.h:549
void SelectFirstNRows(const TInt &N)
Selects first N rows from the table.
Definition: table.cpp:3357
TStrV GetDstNodeStrAttrV() const
Gets dst node str attribute name vector.
Definition: table.cpp:1082
Definition: ds.h:130
void GetPartitionRanges(TIntPrV &Partitions, TInt NumPartitions) const
Partitions the table into NumPartitions and populate Partitions with the ranges.
Definition: table.cpp:1177
TInt GetIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for current row.
Definition: table.cpp:155
TPredComp
Comparison operators for selection predicates.
Definition: table.h:7
TStr GetStr(const TInt &KeyId) const
Returns a string with KeyId.
Definition: table.h:204
TPredicateNode(TPredOp Opr)
Constructor for logical operation predicate node (internal node)
Definition: table.h:66
void Defrag()
Releases memory of deleted rows, and defrags.
Definition: table.cpp:3311
PGraphMP ToGraphMP(PTable Table, const TStr &SrcCol, const TStr &DstCol)
Performs table to graph conversion in parallel using the sort-first algorithm. This is the recommende...
Definition: conv.h:192
PNEANet ToVarGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates the graph sequence one at a time.
Definition: table.cpp:3671
void SaveBin(const TStr &OutFNm)
Saves table schema and content to a binary file.
Definition: table.cpp:849
TStr GetStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for current row.
Definition: table.cpp:163
void AddIntCol(const TStr &ColName)
Adds an integer column with name ColName.
Definition: table.cpp:4673
THash< TStr, TPair< TAttrType, TInt > > ColTypeMap
Definition: table.h:564
TStr Rvar
Right variable of the comparison op.
Definition: table.h:21
void SetDstCol(const TStr &Dst)
Sets the name of the column to be used as dst nodes in the graph.
Definition: table.h:1167
TInt GetLastValidRowIdx()
Gets the id of the last valid row of the table.
static const int Mx
Definition: dt.h:1139
Definition: table.h:257
static PTable New(const THash< TInt, TInt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Returns pointer to a table constructed from given int->int hash.
Definition: table.h:938
void ThresholdJoinCountCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntPr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
Definition: table.cpp:2506
void AddGraphAttributeV(TStrV &Attrs, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds vector of names of columns to be used as graph attributes.
Definition: table.cpp:992
void GroupByIntColMP(const TStr &GroupBy, THashMP< TInt, TIntV > &Grouping, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values, using OpenMP multi-threading.
Definition: table.cpp:1225
void SetFltColToConstMP(TInt UpdateColIdx, TFlt DefaultFltVal)
Definition: table.cpp:4152
TBool IsValid()
Definition: table.h:286
void ThresholdJoinCountPerJoinKeyCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntTr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
Definition: table.cpp:2557
void ColAdd(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise addition. See TTable::ColGenericOp.
Definition: table.cpp:4816
TArithOp
Possible column-wise arithmetic operations.
Definition: table.h:259
TInt RequestIndexStrMap(const TStr &ColName)
Creates Index for Str Column ColName.
Definition: table.cpp:5514
TFlt GetNextFltAttr(TInt ColIdx) const
Returns value of float attribute specified by float column index for next row.
Definition: table.cpp:252
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void AddSelectedRows(const TTable &Table, const TIntV &RowIDs)
Adds rows from Table that correspond to ids in RowIDs.
Definition: table.cpp:4399
Definition: table.h:259
void SetSrcCol(const TStr &Src)
Sets the name of the column to be used as src nodes in the graph.
Definition: table.h:1160
TRowIteratorWithRemove(const TRowIteratorWithRemove &RowI)
Copy constructor.
Definition: table.h:387
TStr IdColName
A mapping from column name to column type and column index among columns of the same type...
Definition: table.h:565
Predicate - encapsulates comparison operations.
Definition: table.h:82
TBool CompareAtomicConstTStr(TInt ColIdx, const TStr &Val, TPredComp Cmp)
Compares value in column ColIdx with given TStr Val.
Definition: table.cpp:208
PTable SelfSimJoinPerGroup(const TStr &GroupAttr, const TStr &SimCol, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
Definition: table.cpp:2094
static TStrV NormalizeColNameV(const TStrV &Cols)
Adds suffix to column name if it doesn't exist.
Definition: table.h:539
static TInt CompareKeyVal(const TInt &K1, const TInt &V1, const TInt &K2, const TInt &V2)
Definition: table.cpp:5297
const char * GetContextKey(TInt Val) const
Gets the Key of the Context StringVals pool. Used by ToGraph method in conv.cpp.
Definition: table.h:622
void Save(TSOut &SOut)
Saves TTableContext in binary to SOut.
Definition: table.h:197
int GetSecHashCd() const
Returns secondary hash code of the vector. Used by THash.
Definition: ds.h:1011
THash< TStr, THash< TInt, TIntV > > StrMapColIndexes
Indexes for String Columns.
Definition: table.h:569
THash< TStr, THash< TInt, TIntV > > IntColIndexes
Indexes for Int Columns.
Definition: table.h:568
void ColConcat(const TStr &Attr1, const TStr &Attr2, const TStr &Sep="", const TStr &ResAttr="")
Concatenates two string columns.
Definition: table.cpp:5083
TStrV GetSrcNodeStrAttrV() const
Gets src node str attribute name vector.
Definition: table.cpp:1071
void AddNodeAttr(const TStr &Attr)
Handles the common case where src and dst both belong to the same "universe" of entities.
Definition: table.h:1184
TTableContext * Context
Execution Context.
Definition: table.h:545
void AddRow(const TTableRow &Row)
Adds row with values taken from given TTableRow.
Definition: table.h:1002
TSimType
Distance metrics for similarity joins.
Definition: table.h:149
TBool Start
A flag indicating whether the current row in the first valid row of the table.
Definition: table.h:377
void QSort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort on given vector V.
Definition: table.cpp:3154
TAttrType Type
Type of the predicate variables.
Definition: table.h:17
TPredicateNode * Left
Left child of this node.
Definition: table.h:57
THash< TStr, TInt > IntVars
Int variables in the current predicate tree.
Definition: table.h:84
void InvalidateAffectedGroupings(const TStr &Attr)
Definition: table.cpp:1581
void Dump(FILE *OutF=stdout) const
Prints table contents to a text file.
Definition: table.cpp:887
TInt LastValidRow
Physical index of last valid row.
Definition: table.h:554
void UnionAllInPlace(const PTable &Table)
Definition: table.h:1419
TPredicate(TPredicateNode *R)
Construct predicate with given root node R.
Definition: table.h:92
void Group(const TStrV &GroupBy, const TStr &GroupColName, TBool Ordered=true, TBool UsePhysicalIds=true)
Groups rows depending on values of GroupBy columns.
Definition: table.cpp:1569
void SelectAtomicStrConst(const TStr &Col, const TStr &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1319
void ResizeTable(int RowCount)
Resizes the table to hold RowCount rows.
Definition: table.cpp:4330
Iterator over a vector of tables.
Definition: table.h:423
void PrintContextSize()
Definition: table.cpp:3959
bool HasNext()
Checks if iterator has reached end of the sequence.
Definition: table.h:432
TPredicate()
Default constructor.
Definition: table.h:90
TPrimitive()
Definition: table.h:219
TPrimitive(const TPrimitive &Prim)
Definition: table.h:223
static TInt GetMP()
Definition: table.h:527
TTableContext()
Default constructor.
Definition: table.h:187
TAttrAggr
Possible policies for aggregating node attributes.
Definition: table.h:257
void ColDiv(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise division. See TTable::ColGenericOp.
Definition: table.cpp:4828
Definition: fl.h:384
void Rename(const TStr &Column, const TStr &NewLabel)
Renames a column.
Definition: table.cpp:1105
void GroupAux(const TStrV &GroupBy, THash< TGroupKey, TPair< TInt, TIntV > > &Grouping, TBool Ordered, const TStr &GroupColName, TBool KeepUnique, TIntV &UniqueVec, TBool UsePhysicalIds=true)
Helper function for grouping.
Definition: table.cpp:1322
TStrV GetEdgeFltAttrV() const
Gets edge float attribute name vector.
Definition: table.cpp:1060
Definition: table.h:149
TStr GetNextStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for next row.
Definition: table.cpp:256
Execution context.
Definition: table.h:180
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
static PTable New(const PTable Table)
Returns pointer to a new table created from given Table.
Definition: table.h:948
void GenerateColTypeMap(THash< TStr, TPair< TInt, TInt > > &ColTypeIntMap)
Definition: table.cpp:337
void AddRightChild(TPredicateNode *Child)
Add right child to this node.
Definition: table.h:74
static PTable TableFromHashMap(const THash< TInt, TInt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Builds table from hash table of int->int.
Definition: table.h:988
Schema GetSchema()
Gets the schema of this table.
Definition: table.h:1125
TFltV GetFltVals() const
Gets float attributes of this row.
Definition: table.h:251
TVec< TIntV > RowIdBuckets
Partitioning of row ids into buckets corresponding to different graph objects when generating a seque...
Definition: table.h:599
TRowIteratorWithRemove BegRIWR()
Gets iterator with reomve to the first valid row.
Definition: table.h:1245
TInt GetNumValidRows() const
Gets number of valid, i.e. not deleted, rows in this table.
Definition: table.h:1234
TStr GetStr(const TInt &KeyId) const
Gets the string with KeyId.
Definition: table.h:1109
TRowIterator BegRI() const
Gets iterator to the first valid row of the table.
Definition: table.h:1241
TPredicateNode()
Default constructor.
Definition: table.h:60
Definition: table.h:7
PNEANet ToGraphPerGroupIterator(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates the graph sequence one at a time.
Definition: table.cpp:3676
TVec< TIntV > IntCols
Next[i] is the successor of row i. Table iterators follow the order dictated by Next ...
Definition: table.h:558
void SelectAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp)
Definition: table.h:1290
Iterator class for TTable rows, that allows logical row removal while iterating.
Definition: table.h:374
TSizeTy GetMemUsed() const
Returns the memory footprint (the number of bytes) of the vector.
Definition: ds.h:511
void CheckAndAddIntNode(PNEANet Graph, THashSet< TInt > &NodeVals, TInt NodeId)
Checks if given NodeId is seen earlier; if not, add it to Graph and hashmap NodeVals.
Definition: table.cpp:3388
TVec< PNEANet > ToGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates a sequence of graphs based on values of column SplitAttr and windows specified by JumpSize an...
Definition: table.cpp:3651
void GroupByFltCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with float values. Returns hash table with grouping.
Definition: table.h:1626
TInt GetStrMapByName(const TStr &Col) const
Returns integer mapping of string attribute specified by attribute name for current row...
Definition: table.cpp:181
PTable Minus(TTable &Table)
Returns table with rows that are present in this table but not in given Table.
Definition: table.cpp:4592
static PTable GetNodeTable(const PNEANet &Network, TTableContext *Context)
Extracts node TTable from PNEANet.
Definition: table.cpp:3689
THash< TStr, TStr > StrVars
String variables in the current predicate tree.
Definition: table.h:86
TIntV GetStrRowIdxByMap(const TStr &ColName, const TInt &Map) const
Gets the rows containing int mapping Map in str column ColName.
Definition: table.cpp:5431
TIntV GetIntVals() const
Gets int attributes of this row.
Definition: table.h:249
TStr GetIdColName() const
Gets name of the id column of this table.
Definition: table.h:636
static TBool EvalStrAtom(const TStr &Val1, const TStr &Val2, TPredComp Cmp)
Compare atomic string values Val1 and Val2 using predicate Cmp.
Definition: table.h:123
Definition: gbase.h:23
TTable(const TTable &Table)
Copy constructor.
Definition: table.h:919
TRowIteratorWithRemove()
Default constructor.
Definition: table.h:380
int GetSecHashCd() const
Definition: ds.h:157
static void LoadSSSeq(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Sequentially loads data from input file at InFNm into NewTable.
Definition: table.cpp:669
Definition: table.h:7
Definition: dt.h:1383
void AddEdgeAttr(const TStr &Attr)
Adds column to be used as graph edge attribute.
Definition: table.h:1172
TRowIterator(const TRowIterator &RowI)
Copy constructor.
Definition: table.h:339
TStr StrVal
Definition: table.h:215
Definition: fl.h:58
void IncrementNext()
Increments the next vector and set last, NumRows and NumValidRows.
Definition: table.cpp:2255
PTable SimJoin(const TStrV &Cols1, const TTable &Table, const TStrV &Cols2, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
Definition: table.cpp:1994
void InitIds()
Adds explicit row ids, initialize hash set mapping ids to physical rows.
Definition: table.cpp:1883
TStrTrV CommonNodeAttrs
List of attribute pairs with values common to source and destination and their common given name...
Definition: table.h:594
void QSortPar(TIntV &V, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort in parallel on given vector V.
Definition: table.cpp:3206
void Save(TSOut &SOut)
Saves table schema and content to a binary format.
Definition: table.cpp:854
PTable Join(const TStr &Col1, const PTable &Table, const TStr &Col2)
Definition: table.h:1360
TBool Result
Result of evaulating the predicate rooted at this node.
Definition: table.h:54
void ReadFltCol(const TStr &ColName, TFltV &Result) const
Reads values of entire float column into Result.
Definition: table.cpp:5221
void InvalidatePhysicalGroupings()
Definition: table.cpp:1577
TBool operator==(const GroupStmt &stmt) const
Definition: table.h:278
TPair< TIntV, TFltV > TGroupKey
Represents grouping key with IntV for integer and string attributes and FltV for float attributes...
Definition: table.h:145
Iterator class for TTable rows.
Definition: table.h:330
TInt GetNextRowIdx() const
Gets physical index of next row.
Definition: table.cpp:243
void DelKey(const TKey &Key)
Definition: hash.h:404
static const int Mn
Definition: dt.h:1138
void Aggregate(const TStrV &GroupByAttrs, TAttrAggr AggOp, const TStr &ValAttr, const TStr &ResAttr, TBool Ordered=true)
Aggregates values of ValAttr after grouping with respect to GroupByAttrs. Result are stored as new at...
Definition: table.cpp:1585
TAttrType GetSchemaColType(TInt Idx) const
Gets type of the column with index Idx in the schema.
Definition: table.h:640
Definition: table.h:257
PGraph ToGraph(PTable Table, const TStr &SrcCol, const TStr &DstCol, TAttrAggr AggrPolicy)
Sequentially converts the table into a graph with links from nodes in SrcCol to those in DstCol...
Definition: conv.h:8
PTable Intersection(const PTable &Table)
Definition: table.h:1422
void SetIntVal(TStr VarName, TInt VarVal)
Set int variable value in the predicate or all the children that use it.
Definition: table.h:98
TStrV GetEdgeIntAttrV() const
Gets edge int attribute name vector.
Definition: table.cpp:1027
Definition: table.h:149
Table Row (Record)
Definition: table.h:234
TRowIteratorWithRemove(TInt RowIdx, TTable *TablePtr, TBool IsStart)
Constructs iterator pointing to given row.
Definition: table.h:384
void SetStrVal(TStr VarName, TStr VarVal)
Set str variable value in the predicate or all the children that use it.
Definition: table.h:102
void RemoveNext()
Removes next row.
Definition: table.cpp:278
int GetPrimHashCd() const
Definition: table.h:303
TStr StrConst
Str const value if this object is a string constant.
Definition: table.h:24
TVec< PNEANet > ToGraphPerGroup(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates a sequence of graphs based on grouping specified by GroupAttr.
Definition: table.cpp:3662
void AddColType(const TStr &ColName, TAttrType ColType, TInt Index)
Adds column with name ColName and type ColType to the ColTypeMap.
Definition: table.h:656
const TTable * Table
Reference to table containing this row.
Definition: table.h:332
int LoadCrossNet(TCrossNet &Graph, PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &EdgeAttrV)
Loads the edges from the TTable and EdgeAttrV specifies columns containing edge attributes.
Definition: conv.cpp:69
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
void SelectAtomicIntConst(const TStr &Col, const TInt &Val, TPredComp Cmp)
Definition: table.h:1309
static void Throw(const TStr &MsgStr)
Definition: ut.h:187
Schema DenormalizeSchema() const
Removes suffix to column names in the Schema.
Definition: table.cpp:4665
void AddDstNodeAttr(const TStr &Attr)
Adds column to be used as dst node atribute of the graph.
Definition: table.h:1180
TBool UsePhysicalRowIds
Definition: table.h:270
PNEANet NextGraphIterator()
Calls to this must be preceded by a call to one of the above ToGraph*Iterator functions.
Definition: table.cpp:3681
TInt IntVal
Definition: table.h:213
friend class TRowIterator
Definition: table.h:1526
TStr GetSrcCol() const
Gets the name of the column to be used as src nodes in the graph.
Definition: table.h:1158
PNEANet BuildGraph(const TIntV &RowIds, TAttrAggr AggrPolicy)
Makes a single pass over the rows in the given row id set, and creates nodes, edges, assigns node and edge attributes.
Definition: table.cpp:3445
PGraphMP ToNetworkMP(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Does Table to Network conversion in parallel using the sort-first algorithm. This is the recommended ...
Definition: conv.h:696
TBool EvalAtomicPredicate(const TAtomicPredicate &Atom)
Evaluate the give atomic predicate.
Definition: table.cpp:102
void ColSub(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise subtraction. See TTable::ColGenericOp.
Definition: table.cpp:4820
TFlt GetFltValAtRowIdx(const TInt &ColIdx, const TInt &RowIdx)
Get the float value at column ColIdx and row RowIdx.
Definition: table.h:1120
int GetEmptyRowsStart(int NewRows)
Gets the start index to a chunk of empty rows of size NewRows.
Definition: table.cpp:4376
TSize GetMemUsed() const
Definition: table.h:294
void PrintSize()
Definition: table.cpp:3930
TStrV GroupByAttrs
Definition: table.h:268
THash< TStr, THash< TFlt, TIntV > > FltColIndexes
Indexes for Float Columns.
Definition: table.h:570
TStr Lvar
Left variable of the comparison op.
Definition: table.h:20
const char * GetKey(const int &KeyId) const
Definition: hash.h:893
void ProjectInPlace(const TStrV &ProjectCols)
Keeps only the columns specified in ProjectCols.
Definition: table.cpp:5239
TAtomicPredicate(TAttrType Typ, TBool IsCnst, TPredComp Cmp, TStr L, TStr R)
Compact prototype for constructing non-const atomic predicate.
Definition: table.h:42
TStr GetStr() const
Definition: table.h:228
Definition: table.h:7
void SelectAtomicStrConst(const TStr &Col, const TStr &Val, TPredComp Cmp)
Definition: table.h:1316
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
Definition: table.cpp:190
TRowIteratorWithRemove EndRIWR()
Gets iterator with reomve to the last valid row.
Definition: table.h:1247
TFltV FltVals
Values of the flt columns for this row.
Definition: table.h:237
size_t TSize
Definition: bd.h:58
TInt GetStrMapById(TInt ColIdx, TInt RowIdx) const
Gets the integer mapping of the string at column ColIdx at row RowIdx.
Definition: table.h:1033
void SelectAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp)
Definition: table.h:1278
void Reindex()
Reinitializes row ids.
Definition: table.cpp:1889
TInt CurrBucket
Current row id bucket - used when generating a sequence of graphs using an iterator.
Definition: table.h:600
PTable IsNextK(const TStr &OrderCol, TInt K, const TStr &GroupBy, const TStr &RankColName="")
Distance based filter.
Definition: table.cpp:3891
TAttrType GetColType(const TStr &ColName) const
Gets type of column ColName.
Definition: table.h:1227
TVec< TIntV > StrColMaps
Data columns of integer mappings of string attributes.
Definition: table.h:560
TRowIteratorWithRemove & Next()
Increments the iterator (For Python compatibility).
Definition: table.cpp:222
void LoadShM(TShMIn &ShMIn, bool SharedPool=true)
Load hash from shared memory. If shared pool is true load pool from shared memory.
Definition: hash.h:815
TPredicate(const TPredicate &Pred)
Copy constructor.
Definition: table.h:94
PNEANet ToGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates the graph sequence one at a time.
Definition: table.cpp:3666
TPrimitive(const TFlt &Val)
Definition: table.h:221
PTable SelfJoin(const TStr &Col)
Joins table with itself, on values of Col.
Definition: table.h:1366
Definition: table.h:149
void GroupByIntCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values.
Definition: table.h:1598
PTable Join(const TStr &Col1, const TTable &Table, const TStr &Col2)
Performs equijoin.
Definition: table.cpp:2272
static int GetHashCd(const int hc1, const int hc2)
Definition: bd.h:590
PGraph ToNetwork(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Converts the Table into a graph with edges from SrcCol to DstCol, and attribute vector defined by the...
Definition: conv.h:64
void Save(TSOut &SOut, bool PoolToo=true) const
Definition: hash.h:833
bool IsKey(const TKey &Key) const
Definition: hashmp.h:191
bool Val
Definition: dt.h:973
static void LoadSSPar(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Parallelly loads data from input file at InFNm into NewTable. Only work when NewTable has no string c...
Definition: table.cpp:507
int LoadMode(TModeNet &Graph, PTable Table, const TStr &NCol, TStrV &NodeAttrV)
Loads the nodes specified in column NCol from the TTable with the attributes specified in NodeAttrV...
Definition: conv.cpp:14
TPair< TStr, TAttrType > TStrTypPr
Definition: table.h:1530
TIntV GetIntRowIdxByVal(const TStr &ColName, const TInt &Val) const
Gets the rows containing Val in int column ColName.
Definition: table.cpp:5410
TFlt FltVal
Definition: table.h:214
TInt GetRowIdx() const
Gets the id of the row pointed by this iterator.
Definition: table.cpp:151
A class representing a cached grouping statement identifier.
Definition: table.h:266
TStr GetSchemaColName(TInt Idx) const
Gets name of the column with index Idx in the schema.
Definition: table.h:638
TInt GetStrMapById(TInt ColIdx) const
Returns integer mapping of a string attribute value specified by string column index for current row...
Definition: table.cpp:186
TBool UsePhysicalIds()
Definition: table.h:277
TStrV SrcNodeAttrV
List of columns (attributes) to serve as source node attributes.
Definition: table.h:592
TAttrAggr AggrPolicy
Aggregation policy used for solving conflicts between different values of an attribute of the same no...
Definition: table.h:601
static void QSortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5378
void Select(TPredicate &Predicate, TIntV &SelectedRows, TBool Remove=true)
Selects rows that satisfy given Predicate.
Definition: table.cpp:2750
PTable UnionAll(const PTable &Table)
Definition: table.h:1416
void UnionAllInPlace(const TTable &Table)
Same as TTable::ConcatTable.
Definition: table.cpp:4524
TInt GetInt() const
Definition: table.h:226
char GetCh(const int &ChN) const
Definition: dt.h:483
TIntIntH RowIdMap
Mapping of permanent row ids to physical id.
Definition: table.h:566
void SaveSS(const TStr &OutFNm)
Saves table schema and content to a TSV file.
Definition: table.cpp:800
PTable Union(const TTable &Table)
Returns union of this table with given Table.
Definition: table.cpp:4531
void SelectAtomicConst(const TStr &Col, const TPrimitive &Val, TPredComp Cmp, TIntV &SelectedRows, PTable &SelectedTable, TBool Remove=true, TBool Table=true)
Selects rows where the value of Col matches given primitive Val.
Definition: table.cpp:2873
Definition: table.h:5
void UpdateFltFromTable(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
Definition: table.cpp:4242
void ColConcatConst(const TStr &Attr1, const TStr &Val, const TStr &Sep="", const TStr &ResAttr="")
Concatenates column values with given string value.
Definition: table.cpp:5182
Definition: fl.h:128
void GetCollidingRows(const TTable &T, THashSet< TInt > &Collisions)
Gets set of row ids of rows common with table T.
Definition: table.cpp:4014
void AddGraphAttribute(const TStr &Attr, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds names of columns to be used as graph attributes.
Definition: table.cpp:985
TBool Ordered
Definition: table.h:269
void KeepSortedRows(const TIntV &KeepV)
Removes all rows that are not mentioned in the SORTED vector KeepV.
Definition: table.cpp:1152
The nodes of one particular mode in a TMMNet, and their neighbor vectors as TIntV attributes...
Definition: mmnet.h:23
TPair< TAttrType, TInt > GetColTypeMap(const TStr &ColName) const
Gets column type and index of ColName.
Definition: table.h:666
TTableRow()
Default constructor.
Definition: table.h:241
TAttrType GetType() const
Definition: table.h:229
Definition: table.h:7
void GroupingSanityCheck(const TStr &GroupBy, const TAttrType &AttrType) const
Checks if grouping key exists and matches given attr type.
Definition: table.cpp:1215
TStrHash< TInt, TBigStrPool > StringVals
StringPool - stores string data values and maps them to integers.
Definition: table.h:182
static PTable TableFromHashMap(const THash< TInt, TFlt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Builds table from hash table of int->float.
Definition: table.h:995
void UpdateTableForNewRow()
Updates table state after adding one or more rows.
Definition: table.cpp:4140
void SetCommonNodeAttrs(const TStr &SrcAttr, const TStr &DstAttr, const TStr &CommonAttrName)
Sets the columns to be used as both src and dst node attributes.
Definition: table.h:1188
int AddKey(const char *Key)
Definition: hash.h:968
static TInt UseMP
Global switch for choosing multi-threaded versions of TTable functions.
Definition: table.h:489
TRowIterator()
Default constructor.
Definition: table.h:335
TPredComp Compare
Comparison op represented by this node.
Definition: table.h:19
TTableIterator(TVec< PTable > &PTableV)
Default constructor.
Definition: table.h:428
void DelColType(const TStr &ColName)
Adds column with name ColName and type ColType to the ColTypeMap.
Definition: table.h:661
Definition: dt.h:1134
void ReadIntCol(const TStr &ColName, TIntV &Result) const
Reads values of entire int column into Result.
Definition: table.cpp:5212
int GetPrimHashCd() const
Definition: ds.h:156
void FillBucketsByWindow(TStr SplitAttr, TInt JumpSize, TInt WindowSize, TInt StartVal, TInt EndVal)
Fills RowIdBuckets with sets of row ids.
Definition: table.cpp:3547
static TStr NormalizeColName(const TStr &ColName)
Adds suffix to column name if it doesn't exist.
Definition: table.h:530
void AddStrCol(const TStr &ColName)
Adds a string column with name ColName.
Definition: table.cpp:4687
THash< TStr, GroupStmt > GroupStmtNames
Maps user-given grouping statement names to their group-by attributes.
Definition: table.h:573
TTableContext(TSIn &SIn)
Loads TTableContext in binary from SIn.
Definition: table.h:189
TRowIterator & Next()
Increments the iterator (For Python compatibility).
Definition: table.cpp:135
static PTable Load(TSIn &SIn, TTableContext *Context)
Loads table from a binary format.
Definition: table.h:971
TStr SrcCol
Column (attribute) to serve as src nodes when constructing the graph.
Definition: table.h:589
PTable Project(const TStrV &ProjectCols)
Returns table with only the columns in ProjectCols.
Definition: table.cpp:4615
TVec< PTable > PTableV
Vector of TTables which are to be iterated over.
Definition: table.h:424
void StoreStrCol(const TStr &ColName, const TStrV &ColVals)
Adds entire str column to table.
Definition: table.cpp:4121
TPredicateNode * Right
Definition: table.h:58
void LoadShM(TShMIn &ShMIn)
Constructs the vector from a shared memory input.
Definition: ds.h:932
TVec< TFltV > FltCols
Data columns of floating point attributes.
Definition: table.h:559
void AddSrcNodeAttr(TStrV &Attrs)
Adds columns to be used as src node attributes of the graph.
Definition: table.h:1178
TStrV GetDstNodeFltAttrV() const
Gets dst node float attribute name vector.
Definition: table.cpp:1049
TStrV DstNodeAttrV
List of columns (attributes) to serve as destination node attributes.
Definition: table.h:593
TIntV Next
A vector describing the logical order of the rows.
Definition: table.h:555
void AddStr(const TStr &Val)
Adds string attribute to this row.
Definition: table.h:247
TPredicateNode(const TAtomicPredicate &A)
Constructor for atomic predicate node (leaf)
Definition: table.h:63
Definition: ds.h:32
TAtomicPredicate(TAttrType Typ, TBool IsCnst, TPredComp Cmp, TStr L, TStr R, TInt ICnst, TFlt FCnst, TStr SCnst)
Construct predicate from given comparison op, variables and constants.
Definition: table.h:37
void SelectAtomicFltConst(const TStr &Col, const TFlt &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1326
int AddKey(const TKey &Key)
Definition: hash.h:373
TRowIterator EndRI() const
Gets iterator to the last valid row of the table.
Definition: table.h:1243
void AddStrVal(const TInt &ColIdx, const TStr &Val)
Adds Val in column with id ColIdx.
Definition: table.cpp:971
TTable * Table
Reference to table containing this row.
Definition: table.h:376
PGraphMP ToGraphMP3(PTable Table, const TStr &SrcCol, const TStr &DstCol)
Performs table to graph conversion in parallel. Uses the hash-first method, which is less optimal...
Definition: conv.h:532
void AddRow(const TRowIterator &RI)
Adds row corresponding to RI.
Definition: table.cpp:4295
void Load(TSIn &SIn, bool PoolToo=true)
Definition: hash.h:811
TInt NumRows
Number of rows in the table (valid and invalid).
Definition: table.h:551
TFlt GetFltVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of float attribute ColName at row RowIdx.
Definition: table.h:1024
static PTable LoadSS(const Schema &S, const TStr &InFNm, TTableContext *Context, const char &Separator= '\t', TBool HasTitleLine=false)
Loads table from spread sheet (TSV, CSV, etc). Note: HasTitleLine = true is not supported. Please comment title lines instead.
Definition: table.cpp:795
TPrimitive(const TInt &Val)
Definition: table.h:220
TStr GetStrVal(const TStr &ColName, const TInt &RowIdx) const
Gets the value of string attribute ColName at row RowIdx.
Definition: table.h:1028
void Unique(const TStr &Col)
Removes rows with duplicate values in given column.
Definition: table.cpp:1266
TRowIteratorWithRemove & operator++(int)
Increments the iterator.
Definition: table.cpp:218
void AddJointRow(const TTable &T1, const TTable &T2, TInt RowIdx1, TInt RowIdx2)
Adds joint row T1[RowIdx1]<=>T2[RowIdx2].
Definition: table.cpp:1957
void Classify(TPredicate &Predicate, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Definition: table.cpp:2805
void Merge(TIntV &V, TInt Idx1, TInt Idx2, TInt Idx3, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Helper function for parallel QSort.
Definition: table.cpp:3178
TStr DstCol
Column (attribute) to serve as dst nodes when constructing the graph.
Definition: table.h:590
void AddSrcNodeAttr(const TStr &Attr)
Adds column to be used as src node atribute of the graph.
Definition: table.h:1176
void ReadStrCol(const TStr &ColName, TStrV &Result) const
Reads values of entire string column into Result.
Definition: table.cpp:5230
TStr GetStrVal(TInt ColIdx, TInt RowIdx) const
Gets the value in column with id ColIdx at row RowIdx.
Definition: table.h:626
void Invalidate()
Definition: table.h:287
static void SetMP(TInt Value)
Definition: table.h:526
static PTable GetEdgeTable(const PNEANet &Network, TTableContext *Context)
Extracts edge TTable from PNEANet.
Definition: table.cpp:3741
GroupStmt(const GroupStmt &stmt)
Definition: table.h:276
void operator()(TVec< TElem > *Node, TShMIn &ShMIn)
Definition: table.h:895
static const TInt Invalid
Special value for Next vector entry - logically removed row.
Definition: table.h:487
void LoadShM(TShMIn &ShMIn)
Loads TTableContext using shared memory, the object is read only.
Definition: table.h:193
TStrV StrVals
Values of the str columns for this row.
Definition: table.h:238
void AddColType(const TStr &ColName, TPair< TAttrType, TInt > ColType)
Adds column with name ColName and type ColType to the ColTypeMap.
Definition: table.h:651
Definition: dt.h:412
PNEANet GetNextGraphFromSequence()
Returns the next graph in sequence corresponding to RowIdBuckets.
Definition: table.cpp:3634
TBool IncludesAttr(const TStr &Attr)
Definition: table.h:288
Definition: table.h:7
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
Definition: table.cpp:282
void StoreFltCol(const TStr &ColName, const TFltV &ColVals)
Adds entire flt column to table.
Definition: table.cpp:4104
THash< GroupStmt, THash< TInt, TGroupKey > > GroupIDMapping
Maps grouping statements to their (group id –> group-by key) mapping.
Definition: table.h:577
TInt IntConst
Int const value if this object is an integer constant.
Definition: table.h:22
void AddFlt(const TFlt &Val)
Adds float attribute to this row.
Definition: table.h:245
TTriple< TStr, TStr, TStr > TStrTr
Definition: ds.h:186
Definition: table.h:257
GroupStmt(const TStrV &Attrs, TBool ordered, TBool physical)
Definition: table.h:275
TPredOp Op
Logical op represented by this node.
Definition: table.h:53
void LoadTableShM(TShMIn &ShMIn, TTableContext *ContextTable)
Definition: table.cpp:360
GroupStmt()
Definition: table.h:273
TInt CurrTableIdx
Index of the current table pointed to by this iterator.
Definition: table.h:425
void GroupByStrCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with string values. Returns hash table with grouping.
Definition: table.h:1653
T AggregateVector(TVec< T > &V, TAttrAggr Policy)
Aggregates vector into a single scalar value according to a policy.
Definition: table.h:1544
TTableContext * ChangeContext(TTableContext *Context)
Changes the current context. Moves all object items to the new context.
Definition: table.cpp:921
TInt CurrRowIdx
Physical row index of current row pointer by iterator.
Definition: table.h:375
void AddNodeAttr(TStrV &Attrs)
Handles the common case where src and dst both belong to the same "universe" of entities.
Definition: table.h:1186
TPredicateNode * Root
Rood node of the current predicate tree.
Definition: table.h:87
Definition: gbase.h:23
Definition: table.h:259
void AggregateCols(const TStrV &AggrAttrs, TAttrAggr AggOp, const TStr &ResAttr)
Aggregates attributes in AggrAttrs across columns.
Definition: table.cpp:1750
bool operator==(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to the same row pointed by RowI.
Definition: table.cpp:235
Table class: Relational table with columnar data storage.
Definition: table.h:484
bool operator<(const TRowIterator &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
Definition: table.cpp:141
TPredicateNode(const TPredicateNode &P)
Copy constructor.
Definition: table.h:69
TStr GetStrValById(TInt ColIdx, TInt RowIdx) const
Gets the value of the string attribute at column ColIdx at row RowIdx.
Definition: table.h:1043
void SelectAtomicFltConst(const TStr &Col, const TFlt &Val, TPredComp Cmp)
Definition: table.h:1323
void UpdateFltFromTableMP(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
Definition: table.cpp:4174
static PTable GetEdgeTablePN(const PNGraphMP &Network, TTableContext *Context)
Extracts edge TTable from parallel graph PNGraphMP.
Definition: table.cpp:3799
void ISort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs insertion sort on given vector V.
Definition: table.cpp:3096
TInt GetRowIdx() const
Gets physical index of current row.
Definition: table.cpp:239
TPredOp
Boolean operators for selection predicates.
Definition: table.h:5
TInt RequestIndexFlt(const TStr &ColName)
Creates Index for Flt Column ColName.
Definition: table.cpp:5495
static TBool EvalAtom(T Val1, T Val2, TPredComp Cmp)
Compare atomic values Val1 and Val2 using predicate Cmp.
Definition: table.h:110
static PTable New(const THash< TInt, TFlt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Returns pointer to a table constructed from given int->float hash.
Definition: table.h:943
bool operator<(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
Definition: table.cpp:229
static PTable New(const Schema &S, TTableContext *Context)
Definition: table.h:934
void SelectAtomicIntConst(const TStr &Col, const TInt &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1312
void InitRowIdBuckets(int NumBuckets)
Initializes the RowIdBuckets vector which will be used for the graph sequence creation.
Definition: table.cpp:3535
void AddLeftChild(TPredicateNode *Child)
Add left child to this node.
Definition: table.h:72
TStrV GetSrcNodeFltAttrV() const
Gets src node float attribute name vector.
Definition: table.cpp:1038
static PTable GetFltNodePropertyTable(const PNEANet &Network, const TIntFltH &Property, const TStr &NodeAttrName, const TAttrType &NodeAttrType, const TStr &PropertyAttrName, TTableContext *Context)
Extracts node and edge property TTables from THash.
Definition: table.cpp:3852
void ConcatTable(const PTable &T)
Appends all rows of T to this table, and recalculate indices.
Definition: table.h:683
Hash-Table with multiprocessing support.
Definition: hashmp.h:81
PTable ThresholdJoinPerJoinKeyOutputTable(const THash< TIntTr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
Definition: table.cpp:2622
TPrimitive(const TStr &Val)
Definition: table.h:222
PTable ThresholdJoin(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2, TInt Threshold, TBool PerJoinKey=false)
Definition: table.cpp:2644
Definition: table.h:257
void Load(TSIn &SIn)
Loads TTableContext in binary from SIn.
Definition: table.h:191
static void ISortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5321
TBool IsConst
Flag if this atomic node represents a constant value.
Definition: table.h:18
TInt CurrRowIdx
Physical row index of current row pointed by iterator.
Definition: table.h:331
static TInt GetPivotKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5338
TIntV IntVals
Values of the int columns for this row.
Definition: table.h:236
Definition: table.h:7
Definition: bd.h:196
TInt IsNextDirty
Flag to signify whether the rows are stored in logical sequence or reordered. Used for optimizing Get...
Definition: table.h:603
void Select(TPredicate &Predicate)
Definition: table.h:1266
TStrV GetEdgeStrAttrV() const
Gets edge str attribute name vector.
Definition: table.cpp:1094
friend class TRowIteratorWithRemove
Definition: table.h:1527
Definition: table.h:5
void AddFltCol(const TStr &ColName)
Adds a float column with name ColName.
Definition: table.cpp:4680
TInt CompareRows(TInt R1, TInt R2, const TAttrType &CompareByType, const TInt &CompareByIndex, TBool Asc=true)
Returns positive value if R1 is bigger, negative value if R2 is bigger, and 0 if they are equal (strc...
Definition: table.cpp:3064
TStr RenumberColName(const TStr &ColName) const
Returns a re-numbered column name based on number of existing columns with conflicting names...
Definition: table.cpp:4632
TAtomicPredicate()
Default constructor.
Definition: table.h:30
TInt NumValidRows
Number of valid rows in the table (i.e. rows that were not logically removed).
Definition: table.h:552
TTable()
Definition: table.cpp:302
PTable ThresholdJoinOutputTable(const THash< TIntPr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
Definition: table.cpp:2608
TRowIterator(TInt RowIdx, const TTable *TablePtr)
Constructs iterator to row RowIds of TablePtr.
Definition: table.h:337
void Count(const TStr &CountColName, const TStr &Col)
Counts number of unique elements.
Definition: table.cpp:1802
Definition: table.h:7
PTable InitializeJointTable(const TTable &Table)
Initializes an empty table for the join of this table with the given table.
Definition: table.cpp:1916
Definition: table.h:257
void ColMax(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs max of two columns. See TTable::ColGenericOp.
Definition: table.cpp:4840
TStr GetStrValByName(const TStr &ColName, const TInt &RowIdx) const
Gets the value of the string attribute at column ColName at row RowIdx.
Definition: table.h:1048
void ClassifyAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Definition: table.cpp:2866
bool Cmp(const int &RelOp, const TRec &Rec1, const TRec &Rec2)
Definition: bd.h:426
void StoreIntCol(const TStr &ColName, const TIntV &ColVals)
Adds entire int column to table.
Definition: table.cpp:4087
void AddIdColumn(const TStr &IdColName)
Adds a column of explicit integer identifiers to the rows.
Definition: table.cpp:1900
void Print()
Definition: table.h:317
void GetVariables(TStrV &Variables)
Get variables in the predicate tree rooted at this node.
Definition: table.cpp:1
Definition: table.h:257
static TInt CheckSortedKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5310
void AddEdgeAttributes(PNEANet &Graph, int RowId)
Adds attributes of edge corresponding to RowId to the Graph.
Definition: table.cpp:3395
Definition: table.h:5
Definition: gbase.h:23
TPt< TTable > PTable
Definition: table.h:141
TVec< PNEANet > ToVarGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates a sequence of graphs based on values of column SplitAttr and intervals specified by SplitInte...
Definition: table.cpp:3657
PTable Next()
Returns next table in the sequence and update iterator.
Definition: table.h:430
Definition: table.h:7
TInt GetNextIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for next row.
Definition: table.cpp:248
void ColGenericOp(const TStr &Attr1, const TStr &Attr2, const TStr &ResAttr, TArithOp op)
Performs columnwise arithmetic operation.
Definition: table.cpp:4752
void SelectAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, TIntV &SelectedRows, TBool Remove=true)
Selects rows using atomic compare operation.
Definition: table.cpp:2813
TRowIterator & operator++(int)
Increments the iterator.
Definition: table.cpp:131
bool IsKey(const TKey &Key) const
Definition: hash.h:258
void GetVariables(TStrV &Variables)
Get variables in current predicate.
Definition: table.cpp:10
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TAttrType AttrType
Definition: table.h:216
static PTable LoadShM(TShMIn &ShMIn, TTableContext *Context)
Static constructor to load table from memory.
Definition: table.h:975
TDat & AddDat(const TKey &Key)
Definition: hashmp.h:181
void ColMin(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs min of two columns. See TTable::ColGenericOp.
Definition: table.cpp:4836
Definition: dt.h:971
bool IsRowValid(TInt RowIdx) const
Checks if RowIdx corresponds to a valid (i.e. not deleted) row.
Definition: table.h:801
void ColMod(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise modulus. See TTable::ColGenericOp.
Definition: table.cpp:4832
TCRef CRef
Definition: table.h:550
void RemoveFirstRow()
Removes first valid row of the table.
Definition: table.cpp:1122
bool IsStrIn(const TStr &Str) const
Definition: dt.h:554
TBool IsFirst() const
Checks whether iterator points to first valid row of the table.
Definition: table.cpp:274
Atomic predicate - encapsulates comparison operations.
Definition: table.h:15
TInt GetStrMapByName(const TStr &ColName, TInt RowIdx) const
Gets the integer mapping of the string at column ColName at row RowIdx.
Definition: table.h:1038
TBool IsColName(const TStr &ColName) const
Definition: table.h:646
TInt GetIntValAtRowIdx(const TInt &ColIdx, const TInt &RowIdx)
Get the integer value at column ColIdx and row RowIdx.
Definition: table.h:1116
Definition: table.h:259
TInt CheckAndAddFltNode(T Graph, THash< TFlt, TInt > &NodeVals, TFlt FNodeVal)
Checks if given NodeVal is seen earlier; if not, add it to Graph and hashmap NodeVals.
Definition: table.h:1533
TFlt GetFlt() const
Definition: table.h:227
Predicate node - represents a binary predicate operation on two predicate nodes.
Definition: table.h:51
int Len() const
Definition: hash.h:228
PTable SelfSimJoin(const TStrV &Cols, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Definition: table.h:1367
static PTable New()
Definition: table.h:932
void AddNodeAttributes(TInt NId, TStrV NodeAttrV, TInt RowId, THash< TInt, TStrIntVH > &NodeIntAttrs, THash< TInt, TStrFltVH > &NodeFltAttrs, THash< TInt, TStrStrVH > &NodeStrAttrs)
Takes as parameters, and updates, maps NodeXAttrs: Node Id –> (attribute name –> Vector of attribut...
Definition: table.cpp:3414
GroupStmt(const TStrV &Attrs)
Definition: table.h:274
PNEANet GetFirstGraphFromSequence(TAttrAggr AggrPolicy)
Returns the first graph of the sequence.
Definition: table.cpp:3628
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void ClassifyAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Definition: table.h:1301
PTable Intersection(const TTable &Table)
Returns intersection of this table with given Table.
Definition: table.cpp:4567
void AddDstNodeAttr(TStrV &Attrs)
Adds columns to be used as dst node attributes of the graph.
Definition: table.h:1182
void AddNJointRowsMP(const TTable &T1, const TTable &T2, const TVec< TIntPrV > &JointRowIDSet)
Adds rows from T1 and T2 to this table in a parallel manner. Used by Join.
Definition: table.cpp:4442
const TDat & GetDat(const TKey &Key) const
Definition: hashmp.h:195
TTableContext * GetContext()
Returns the context.
Definition: table.h:1005
TFlt FltConst
Flt const value if this object is a float constant.
Definition: table.h:23
TBool Eval()
Return the result of evaluating current predicate.
Definition: table.cpp:14
TIntV GetFltRowIdxByVal(const TStr &ColName, const TFlt &Val) const
Gets the rows containing Val in flt column ColName.
Definition: table.cpp:5453
Definition: table.h:259
TSize GetContextMemUsedKB()
Returns approximate memory used by table context in [KB].
Definition: table.cpp:3969
TInt AddStr(const TStr &Key)
Adds string Key to the context, returns its KeyId.
Definition: table.h:199
TPredicateNode * Parent
Parent node of this node.
Definition: table.h:56
TInt GetIntVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of integer attribute ColName at row RowIdx.
Definition: table.h:1020
THash< TInt, TInt > GetRowIdMap() const
Gets a map of logical to physical row ids.
Definition: table.h:1237
void SetFirstValidRow()
Sets the first valid row of the TTable.
Definition: table.h:811
void AddTable(const TTable &T)
Adds all the rows of the input table. Allows duplicate rows (not a union).
Definition: table.cpp:3975
void ColMul(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise multiplication. See TTable::ColGenericOp.
Definition: table.cpp:4824
void ClassifyAux(const TIntV &SelectedRows, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Adds a label attribute with positive labels on selected rows and negative labels on the rest...
Definition: table.cpp:4694
THash< TStr, TFlt > FltVars
Float variables in the current predicate tree.
Definition: table.h:85
void AddNRows(int NewRows, const TVec< TIntV > &IntColsP, const TVec< TFltV > &FltColsP, const TVec< TIntV > &StrColMapsP)
Adds NewRows rows from the given vectors for each column type.
Definition: table.cpp:4421
TVec< PTable > SpliceByGroup(const TStrV &GroupByAttrs, TBool Ordered=true)
Splices table into subtables according to a grouping statement.
Definition: table.cpp:1808
PGraphMP ToNetworkMP2(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Implements table to network conversion in parallel. Not the recommended algorithm, using ToNetworkMP instead.
Definition: conv.h:1118
Definition: table.h:257
Definition: table.h:5
void AddEdgeAttr(TStrV &Attrs)
Adds columns to be used as graph edge attributes.
Definition: table.h:1174
TVec< TPair< TStr, TAttrType > > Schema
A table schema is a vector of pairs .
Definition: table.h:262
void ColGenericOpMP(TInt ArgColIdx1, TInt ArgColIdx2, TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op)
Definition: table.cpp:4708
TVec< PNEANet > GetGraphsFromSequence(TAttrAggr AggrPolicy)
Returns a sequence of graphs.
Definition: table.cpp:3616
TStrV GetDstNodeIntAttrV() const
Gets dst node int attribute name vector.
Definition: table.cpp:1016
PTable Union(const PTable &Table)
Definition: table.h:1413
TAtomicPredicate Atom
Atomic predicate at this node.
Definition: table.h:55
TInt Partition(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Partitions vector for QSort.
Definition: table.cpp:3126
Implements a single CrossNet consisting of edges between two TModeNets (could be the same TModeNet) ...
Definition: mmnet.h:133
int GetSecHashCd() const
Definition: table.h:310