d6/dd0/table_8cpp_source.html

 void TPredicateNode::GetVariables(TStrV& Variables) {

   if (Left != NULL) { Left->GetVariables(Variables); }

   if (Right != NULL) { Right->GetVariables(Variables); }

   if (Op == NOP) {

     if (Atom.Lvar != "" ) { Variables.Add(Atom.Lvar); }

     if (Atom.Rvar != "" ) { Variables.Add(Atom.Rvar); }

   }

 }


 void TPredicate::GetVariables(TStrV& Variables) {

   Root->GetVariables(Variables);

 }


 TBool TPredicate::Eval() {

   TPredicateNode* Curr = Root;

   TPredicateNode* Prev = NULL;

   while (!(Curr == NULL && Prev == Root)) {

     // going down the tree

     if (Prev == NULL || Prev == Curr->Parent) {

       // left child exists and was not yet evaluated

       if (Curr->Left != NULL) {

         Prev = Curr;

         Curr = Curr->Left;

       } else if (Curr->Right != NULL) {

         Prev = Curr;

         Curr = Curr->Right;

       } else {

         Curr->Result = EvalAtomicPredicate(Curr->Atom);

         Prev = Curr;

         Curr = Curr->Parent;

       }

     } else if (Prev == Curr->Left) {

       // going back up through left (first) child

       switch (Curr->Op) {

         case NOT: {

           Assert(Curr->Right == NULL);

           Curr->Result = !(Prev->Result);

           Prev = Curr;

           Curr = Curr->Parent;

           break;

         }

         case AND: {

           Assert(Curr->Right != NULL);

           if (!Prev->Result) {

             Curr->Result = false;

             Prev = Curr;

             Curr = Curr->Parent;

           } else {

             Prev = Curr;

             Curr = Curr->Right;

           }

           break;

         }

         case OR: {

           Assert(Curr->Right != NULL);

           if (Prev->Result) {

             Curr->Result = true;

             Prev = Curr;

             Curr = Curr->Parent;

           } else {

             Prev = Curr;

             Curr = Curr->Right;

           }

           break;

         }

         case NOP: {

           break;

         }

       }

     } else {

       // going back up the tree from right (second) child

       Assert(Prev == Curr->Right);

       switch (Curr->Op) {

         case NOT: {

           Assert(Curr->Left == NULL);

           Curr->Result = !(Prev->Result);

           break;

         }

         case AND: {

           Assert(Curr->Left != NULL);

           Assert(Curr->Left->Result);

           Curr->Result = Prev->Result;

           break;

         }

         case OR: {

           Assert(Curr->Left != NULL);

           Assert(!Curr->Left->Result);

           Curr->Result = Prev->Result;

           break;

         }

         case NOP: {

           break;

         }

       }

       Prev = Curr;

       Curr = Curr->Parent;

     }

   }

   return Root->Result;

 }


 TBool TPredicate::EvalAtomicPredicate(const TAtomicPredicate& Atom) {

   switch (Atom.Type) {

     case atInt: {

       if (Atom.IsConst) {

         return EvalAtom<TInt>(IntVars.GetDat(Atom.Lvar), Atom.IntConst, Atom.Compare);

       }

       return EvalAtom<TInt>(IntVars.GetDat(Atom.Lvar), IntVars.GetDat(Atom.Rvar), Atom.Compare);

     }

     case atFlt: {

       if (Atom.IsConst) {

         return EvalAtom<TFlt>(FltVars.GetDat(Atom.Lvar), Atom.FltConst, Atom.Compare);

       }

       return EvalAtom<TFlt>(FltVars.GetDat(Atom.Lvar), FltVars.GetDat(Atom.Rvar), Atom.Compare);

     }

     case atStr: {

       if (Atom.IsConst) {

         return EvalAtom<TStr>(StrVars.GetDat(Atom.Lvar), Atom.StrConst, Atom.Compare);

       }

       return EvalAtom<TStr>(StrVars.GetDat(Atom.Lvar), StrVars.GetDat(Atom.Rvar), Atom.Compare);

     }

   }

   return false;

 }


 TInt const TTable::Last = -1;

 TInt const TTable::Invalid = -2;


 TInt TTable::UseMP = 1;


 TRowIterator& TRowIterator::operator++(int) {

   return this->Next();

 }


 TRowIterator& TRowIterator::Next() {

   CurrRowIdx = Table->Next[CurrRowIdx];

   //Assert(CurrRowIdx != TTable::Invalid);

   return *this;

 }


 bool TRowIterator::operator < (const TRowIterator& RowI) const{

   if (CurrRowIdx == TTable::Last) { return false; }

   if (RowI.CurrRowIdx == TTable::Last) { return true; }

   return CurrRowIdx < RowI.CurrRowIdx;

 }


 bool TRowIterator::operator == (const TRowIterator& RowI) const {

   return CurrRowIdx == RowI.CurrRowIdx;

 }


 TInt TRowIterator::GetRowIdx() const {

   return CurrRowIdx;

 }

 // We do not check column type in the iterator.

 TInt TRowIterator::GetIntAttr(TInt ColIdx) const {

   return Table->IntCols[ColIdx][CurrRowIdx];

 }


 TFlt TRowIterator::GetFltAttr(TInt ColIdx) const {

   return Table->FltCols[ColIdx][CurrRowIdx];

 }


 TStr TRowIterator::GetStrAttr(TInt ColIdx) const {

   return Table->GetStrValIdx(ColIdx, CurrRowIdx);

 }


 TInt TRowIterator::GetIntAttr(const TStr& Col) const {

   TInt ColIdx = Table->GetColIdx(Col);

   return Table->IntCols[ColIdx][CurrRowIdx];

 }


 TFlt TRowIterator::GetFltAttr(const TStr& Col) const {

   TInt ColIdx = Table->GetColIdx(Col);

   return Table->FltCols[ColIdx][CurrRowIdx];

 }


 TStr TRowIterator::GetStrAttr(const TStr& Col) const {

   return Table->GetStrVal(Col, CurrRowIdx);

 }


 TInt TRowIterator::GetStrMapByName(const TStr& Col) const {

   TInt ColIdx = Table->GetColIdx(Col);

   return Table->StrColMaps[ColIdx][CurrRowIdx];

 }


 TInt TRowIterator::GetStrMapById(TInt ColIdx) const {

   return Table->StrColMaps[ColIdx][CurrRowIdx];

 }


 TBool TRowIterator::CompareAtomicConst(TInt ColIdx, const TPrimitive& Val, TPredComp Cmp) {

   TBool Result;

   switch (Val.GetType()) {

     case atInt:

       Result = TPredicate::EvalAtom(GetIntAttr(ColIdx), Val.GetInt(), Cmp);

       break;

     case atFlt:

       Result = TPredicate::EvalAtom(GetFltAttr(ColIdx), Val.GetFlt(), Cmp);

       break;

     case atStr:

       Result = TPredicate::EvalStrAtom(GetStrAttr(ColIdx), Val.GetStr(), Cmp);

       break;

     default:

       Result = TBool(false);

   }

   return Result;

 }


 TBool TRowIterator::CompareAtomicConstTStr(TInt ColIdx, const TStr& Val, TPredComp Cmp) {

   TBool Result;

   //printf("string compare\n");

   Result = TPredicate::EvalStrAtom(GetStrAttr(ColIdx), Val, Cmp);

   return Result;

 }


 TRowIteratorWithRemove::TRowIteratorWithRemove(TInt RowIdx, TTable* TablePtr) :

   CurrRowIdx(RowIdx), Table(TablePtr), Start(RowIdx == TablePtr->FirstValidRow) {}


 TRowIteratorWithRemove& TRowIteratorWithRemove::operator++(int) {

   return this->Next();

 }


 TRowIteratorWithRemove& TRowIteratorWithRemove::Next() {

   CurrRowIdx = GetNextRowIdx();

   Start = false;

   Assert(CurrRowIdx != TTable::Invalid);

   return *this;

 }


 bool TRowIteratorWithRemove::operator < (const TRowIteratorWithRemove& RowI) const {

   if (CurrRowIdx == TTable::Last) { return false; }

   if (RowI.CurrRowIdx == TTable::Last) { return true; }

   return CurrRowIdx < RowI.CurrRowIdx;

 }


 bool TRowIteratorWithRemove::operator == (const TRowIteratorWithRemove& RowI) const {

   return CurrRowIdx == RowI.CurrRowIdx;

 }


 TInt TRowIteratorWithRemove::GetRowIdx() const {

   return CurrRowIdx;

 }


 TInt TRowIteratorWithRemove::GetNextRowIdx() const {

   return (Start ? Table->FirstValidRow : Table->Next[CurrRowIdx]);

 }


 // We do not check column type in the iterator.

 TInt TRowIteratorWithRemove::GetNextIntAttr(TInt ColIdx) const {

   return Table->IntCols[ColIdx][GetNextRowIdx()];

 }


 TFlt TRowIteratorWithRemove::GetNextFltAttr(TInt ColIdx) const {

   return Table->FltCols[ColIdx][GetNextRowIdx()];

 }


 TStr TRowIteratorWithRemove::GetNextStrAttr(TInt ColIdx) const {

   return Table->GetStrValIdx(ColIdx, GetNextRowIdx());

 }


 TInt TRowIteratorWithRemove::GetNextIntAttr(const TStr& Col) const {

   TInt ColIdx = Table->GetColIdx(Col);

   return Table->IntCols[ColIdx][GetNextRowIdx()];

 }


 TFlt TRowIteratorWithRemove::GetNextFltAttr(const TStr& Col) const {

   TInt ColIdx = Table->GetColIdx(Col);

   return Table->FltCols[ColIdx][GetNextRowIdx()];

 }


 TStr TRowIteratorWithRemove::GetNextStrAttr(const TStr& Col) const {

   return Table->GetStrVal(Col, GetNextRowIdx());

 }


 TBool TRowIteratorWithRemove::IsFirst() const {

   return CurrRowIdx == Table->FirstValidRow;

 }


 void TRowIteratorWithRemove::RemoveNext() {

   Table->RemoveRow(GetNextRowIdx(), CurrRowIdx);

 }


 TBool TRowIteratorWithRemove::CompareAtomicConst(TInt ColIdx, const TPrimitive& Val, TPredComp Cmp) {

   TBool Result;

   switch (Val.GetType()) {

     case atInt:

       Result = TPredicate::EvalAtom(GetNextIntAttr(ColIdx), Val.GetInt(), Cmp);

       break;

     case atFlt:

       Result = TPredicate::EvalAtom(GetNextFltAttr(ColIdx), Val.GetFlt(), Cmp);

       break;

     case atStr:

       Result = TPredicate::EvalStrAtom(GetNextStrAttr(ColIdx), Val.GetStr(), Cmp);

       break;

     default:

       Result = TBool(false);

   }

   return Result;

 }


 // Better not use default constructor as it leads to a memory leak.

 // - OR - implement a destructor.

 TTable::TTable(): Context(new TTableContext), NumRows(0), NumValidRows(0),

   FirstValidRow(0), LastValidRow(-1) {}


 TTable::TTable(TTableContext* Context): Context(Context), NumRows(0),

   NumValidRows(0), FirstValidRow(0), LastValidRow(-1) {}


 TTable::TTable(const Schema& TableSchema, TTableContext* Context): Context(Context),

   NumRows(0), NumValidRows(0), FirstValidRow(0), LastValidRow(-1), IsNextDirty(0) {

   TInt IntColCnt = 0;

   TInt FltColCnt = 0;

   TInt StrColCnt = 0;

   for (TInt i = 0; i < TableSchema.Len(); i++) {

     TStr ColName = TableSchema[i].Val1;

     TAttrType ColType = TableSchema[i].Val2;

     AddSchemaCol(ColName, ColType);

     switch (ColType) {

       case atInt:

         AddColType(ColName, atInt, IntColCnt);

         IntColCnt++;

         break;

       case atFlt:

         AddColType(ColName, atFlt, FltColCnt);

         FltColCnt++;

         break;

       case atStr:

         AddColType(ColName, atStr, StrColCnt);

         StrColCnt++;

         break;

     }

   }

   IntCols = TVec<TIntV>(IntColCnt);

   FltCols = TVec<TFltV>(FltColCnt);

   StrColMaps = TVec<TIntV>(StrColCnt);

 }


 void TTable::GenerateColTypeMap(THash<TStr,TPair<TInt,TInt> > & ColTypeIntMap) {

   ColTypeMap.Clr();

   Sch.Clr();

   for (THash<TStr,TPair<TInt,TInt> >::TIter it = ColTypeIntMap.BegI(); it < ColTypeIntMap.EndI(); it++) {

     TPair<TInt,TInt> dat = it.GetDat();

     switch (dat.GetVal1()) {

       case 0:

         AddColType(it.GetKey(), atInt, dat.GetVal2());

         AddSchemaCol(it.GetKey(), atInt);

         break;

       case 1:

         AddColType(it.GetKey(), atFlt, dat.GetVal2());

         AddSchemaCol(it.GetKey(), atFlt);

         break;

       case 2:

         AddColType(it.GetKey(), atStr, dat.GetVal2());

         AddSchemaCol(it.GetKey(), atStr);

         break;

     }

   }

   IsNextDirty = 0;

 }


 void TTable::LoadTableShM(TShMIn& ShMIn, TTableContext* ContextTable) {

   Context = ContextTable;

   NumRows = TInt(ShMIn);

   NumValidRows = TInt(ShMIn);

   FirstValidRow = TInt(ShMIn);

   LastValidRow = TInt(ShMIn);

   Next.LoadShM(ShMIn);


   TLoadVecInit Fn;

   IntCols.LoadShM(ShMIn, Fn);

   FltCols.Load(ShMIn);

   StrColMaps.LoadShM(ShMIn, Fn);

   THash<TStr,TPair<TInt,TInt> > ColTypeIntMap;

   ColTypeIntMap.LoadShM(ShMIn);


   GenerateColTypeMap(ColTypeIntMap);

 }


 TTable::TTable(TSIn& SIn, TTableContext* Context): Context(Context), NumRows(SIn),

   NumValidRows(SIn), FirstValidRow(SIn), LastValidRow(SIn), Next(SIn), IntCols(SIn),

   FltCols(SIn), StrColMaps(SIn) {

   THash<TStr,TPair<TInt,TInt> > ColTypeIntMap(SIn);

   GenerateColTypeMap(ColTypeIntMap);

 }


 TTable::TTable(const TIntIntH& H, const TStr& Col1, const TStr& Col2,

  TTableContext* Context, const TBool IsStrKeys) : Context(Context), NumRows(H.Len()),

   NumValidRows(H.Len()), FirstValidRow(0), LastValidRow(H.Len()-1) {

     TAttrType KeyType = IsStrKeys ? atStr : atInt;

     AddSchemaCol(Col1, KeyType);

     AddSchemaCol(Col2, atInt);

     AddColType(Col1, KeyType, 0);

     AddColType(Col2, atInt, 1);

     if (IsStrKeys) {

       StrColMaps = TVec<TIntV>(1);

       IntCols = TVec<TIntV>(1);

       H.GetKeyV(StrColMaps[0]);

       H.GetDatV(IntCols[0]);

     } else {

       IntCols = TVec<TIntV>(2);

       H.GetKeyV(IntCols[0]);

       H.GetDatV(IntCols[1]);

     }

     Next = TIntV(NumRows);

     for (TInt i = 0; i < NumRows; i++) {

       Next[i] = i+1;

     }

     Next[NumRows-1] = Last;

     IsNextDirty = 0;

     InitIds();

 }


 TTable::TTable(const TIntFltH& H, const TStr& Col1, const TStr& Col2,

  TTableContext* Context, const TBool IsStrKeys) : Context(Context),

   NumRows(H.Len()), NumValidRows(H.Len()), FirstValidRow(0), LastValidRow(H.Len()-1) {

   TAttrType KeyType = IsStrKeys ? atStr : atInt;

   AddSchemaCol(Col1, KeyType);

   AddSchemaCol(Col2, atFlt);

   AddColType(Col1, KeyType, 0);

   AddColType(Col2, atFlt, 0);

   if (IsStrKeys) {

     StrColMaps = TVec<TIntV>(1);

     H.GetKeyV(StrColMaps[0]);

   } else {

     IntCols = TVec<TIntV>(1);

     H.GetKeyV(IntCols[0]);

   }

   FltCols = TVec<TFltV>(1);

   H.GetDatV(FltCols[0]);

   Next = TIntV(NumRows);

   for (TInt i = 0; i < NumRows; i++) {

     Next[i] = i+1;

   }

   Next[NumRows-1] = Last;

   IsNextDirty = 0;

   InitIds();

 }


 TTable::TTable(const TTable& Table, const TIntV& RowIDs) : Context(Table.Context),

   Sch(Table.Sch), SrcCol(Table.SrcCol), DstCol(Table.DstCol), EdgeAttrV(Table.EdgeAttrV),

   SrcNodeAttrV(Table.SrcNodeAttrV), DstNodeAttrV(Table.DstNodeAttrV),

   CommonNodeAttrs(Table.CommonNodeAttrs) {

   ColTypeMap = Table.ColTypeMap;

   IntCols = TVec<TIntV>(Table.IntCols.Len());

   FltCols = TVec<TFltV>(Table.FltCols.Len());

   StrColMaps = TVec<TIntV>(Table.StrColMaps.Len());

   FirstValidRow = 0;

   LastValidRow = -1;

   NumRows = 0;

   NumValidRows = 0;

   AddSelectedRows(Table, RowIDs);

   IsNextDirty = 0;

   InitIds();

 }


 void TTable::GetSchema(const TStr& InFNm, Schema& S, const char& Separator) {

   // Determine Attr Type

   // Assume that the data is tab separated

   TSsParser Ss(InFNm, '\t', false, false, false);

   TInt rowsToPeek = 1000;

   TInt currRow = 0;

   TInt lastComment = 0;

   while (Ss.Next()) {

     if (Ss.IsCmt()) {

       lastComment += 1;

     }

     else break;

   }

   if (Ss.Eof()) {TExcept::Throw("No Data to determine attribute types!");}

   TInt numCols = Ss.GetFlds();

   TVec<TAttrType> colAttrV(numCols);

   colAttrV.PutAll(atInt);

   while (true) {

     for (TInt i = 0; i < numCols; i++) {

       if (Ss.IsInt(i)) {

       }

       else if (Ss.IsFlt(i)) {

         colAttrV[i] = atFlt;

       }

       else {

         colAttrV[i] = atStr;

       }

     }

     currRow++;

     if (currRow > rowsToPeek || Ss.Eof()) break;

     Ss.Next();

   }

   // Default Separator is tab

   TSsParser SsNames(InFNm, Separator, false, false, false);

   for (int i = 0; i < lastComment; i++) { SsNames.Next();}

   TVec<TStr> attrV;

   TStr first(SsNames[0]);

   int begin = 0;

   TStr comment('#');

   if (first != comment) {

     for (int i = 1; i < first.Len(); i++){

       if (first[i] != ' ') { begin = i; break;}

     }

     attrV.Add(first.GetSubStr(begin));

   }

   for (int i = 1; i < SsNames.GetFlds(); i++) {attrV.Add(SsNames[i]);}

   for (TInt i = 0; i < numCols; i++) {

     S.Add(TPair<TStr,TAttrType>(attrV[i],colAttrV[i]));

   }

 }


 #ifdef GCC_ATOMIC

 void TTable::LoadSSPar(PTable& T, const Schema& S, const TStr& InFNm, const TIntV& RelevantCols,

                         const char& Separator, TBool HasTitleLine) {

   // preloaded necessary variables

   TInt RowLen = T->Sch.Len();

   TVec<TAttrType> ColTypes = TVec<TAttrType>(RowLen);

   for (TInt i = 0; i < RowLen; i++) {

     ColTypes[i] = T->GetSchemaColType(i);

   }


   TSsParserMP Ss(InFNm, Separator);

   Ss.SkipCommentLines();


   // if title line (i.e. names of the columns) is included as first row in the

   // input file - use it to validate schema

   if (HasTitleLine) {

     Ss.Next();

     if (S.Len() != Ss.GetFlds()) {

       printf("%s\n", Ss[0]); TExcept::Throw("Table Schema Mismatch!");

     }

     for (TInt i = 0; i < Ss.GetFlds(); i++) {

       // remove carriage return char

       TInt L = strlen(Ss[i]);

       if (Ss[i][L-1] < ' ') { Ss[i][L-1] = 0; }

       if (NormalizeColName(S[i].Val1) != NormalizeColName(Ss[i])) { TExcept::Throw("Table Schema Mismatch!"); }

     }

   }


   // Divide remaining part of stream into equal sized chunks

   // Find starting position in stream for each thread

   int64 Cnt = 0;

   uint64 Pos = Ss.GetStreamPos();

   uint64 Len = Ss.GetStreamLen();

   uint64 Rem = Len - Pos;

   int NumThreads = omp_get_max_threads();


   uint64 Delta = Rem / NumThreads;

   if (Delta < 1) Delta = 1;


   TVec<uint64> StartIntV(NumThreads);

   TVec<uint64> LineCountV(NumThreads);

   TVec<uint64> PrefixSumV(NumThreads);


   StartIntV[0] = Pos;

   for (int i = 1; i < NumThreads; i++) {

     StartIntV[i] = StartIntV[i-1] + Delta;

   }

   StartIntV.Add(Len);


   // Find number of lines handled by each thread

   omp_set_num_threads(NumThreads);

   #pragma omp parallel for schedule(dynamic) reduction(+:Cnt)

   for (int i = 0; i < NumThreads; i++) {

     LineCountV[i] = Ss.CountNewLinesInRange(StartIntV[i], StartIntV[i+1]);

     Cnt += LineCountV[i];

   }


   // Calculate row index offsets for each thread

   PrefixSumV[0] = 0;

   for (int i = 1; i < NumThreads; i++) {

     PrefixSumV[i] = PrefixSumV[i-1] + LineCountV[i-1];

   }

   Ss.SetStreamPos(Pos);


   // allocate memory for columns

   TInt IntColIdx = 0;

   TInt FltColIdx = 0;

   for (TInt i = 0; i < RowLen; i++) {

     switch (ColTypes[i]) {

       case atInt:

         T->IntCols[IntColIdx].Gen(Cnt);

         IntColIdx++;

         break;

       case atFlt:

         T->FltCols[FltColIdx].Gen(Cnt);

         FltColIdx++;

         break;

       case atStr:

         break;

     }

   }


   Cnt = 0;

   omp_set_num_threads(NumThreads);

   #pragma omp parallel for schedule(dynamic) reduction(+:Cnt)

   for (int i = 0; i < NumThreads; i++) {

     // calculate beginning of each line handled by thread

     TVec<uint64> LineStartPosV = Ss.GetStartPosV(StartIntV[i], StartIntV[i+1]);


     // parse line and fill rows

     for (uint64 k = 0; k < (uint64) LineStartPosV.Len(); k++) {

       TVec<char*> FieldsV;

       Ss.NextFromIndex(LineStartPosV[k], FieldsV);

       if (FieldsV.Len() != S.Len()) {

         TExcept::Throw("Error reading tsv file");

       }

       TInt IntColIdx = 0;

       TInt FltColIdx = 0;

       TInt RowIdx = PrefixSumV[i] + k;


       for (TInt j = 0; j < RowLen; j++) {

         switch (ColTypes[j]) {

           case atInt:

             if (RelevantCols.Len() == 0) {

               T->IntCols[IntColIdx][RowIdx] = \

                 (Ss.GetIntFromFldV(FieldsV, j));

             } else {

               T->IntCols[IntColIdx][RowIdx] = \

                 (Ss.GetIntFromFldV(FieldsV, RelevantCols[j]));

             }

             IntColIdx++;

             break;

           case atFlt:

             if (RelevantCols.Len() == 0) {

               T->FltCols[FltColIdx][RowIdx] = \

                 (Ss.GetFltFromFldV(FieldsV, j));

             } else {

               T->FltCols[FltColIdx][RowIdx] = \

                 (Ss.GetFltFromFldV(FieldsV, RelevantCols[j]));

             }

             FltColIdx++;

             break;

           case atStr:

             TExcept::Throw("TTable::LoadSS:: Str Col found\n");

             break;

         }

       }

       Cnt++;

     }

   }


   // set number of rows and "Next" vector

   T->NumRows = Cnt;

   T->NumValidRows = T->NumRows;


   T->Next.Clr();

   T->Next.Gen(Cnt);


   omp_set_num_threads(NumThreads);

   #pragma omp parallel for schedule(dynamic, 10000)

   for (int64 i = 0; i < Cnt-1; i++) {

     T->Next[i] = i+1;

   }

   T->IsNextDirty = 0;

   T->Next[Cnt-1] = Last;

   T->LastValidRow = T->NumRows - 1;


   T->IdColName = "_id";

   TInt IdCol = T->IntCols.Add();

   T->IntCols[IdCol].Gen(Cnt);


   // initialize ID column

   omp_set_num_threads(NumThreads);

   #pragma omp parallel for schedule(dynamic, 10000)

   for (int64 i = 0; i < Cnt; i++) {

     T->IntCols[IdCol][i] = i;

   }


   T->AddSchemaCol(T->IdColName, atInt);

   T->AddColType(T->IdColName, atInt, T->IntCols.Len()-1);

 }

 #endif // GCC_ATOMIC


 void TTable::LoadSSSeq(

  PTable& T, const Schema& S, const TStr& InFNm, const TIntV& RelevantCols,

  const char& Separator, TBool HasTitleLine) {

   // preloaded necessary variables

   int RowLen = T->Sch.Len();

   TVec<TAttrType> ColTypes = TVec<TAttrType>(RowLen);

   for (int i = 0; i < RowLen; i++) {

     ColTypes[i] = T->GetSchemaColType(i);

   }


   // Sequential load

   TSsParser Ss(InFNm, Separator);

   // if title line (i.e. names of the columns) is included as first row in the

   // input file - use it to validate schema

   if (HasTitleLine) {

     Ss.Next();

     if (S.Len() != Ss.GetFlds()) {

       printf("%s\n", Ss[0]); TExcept::Throw("Table Schema Mismatch!");

     }

     for (int i = 0; i < Ss.GetFlds(); i++) {

       // remove carriage return char

       int L = strlen(Ss[i]);

       if (Ss[i][L-1] < ' ') { Ss[i][L-1] = 0; }

       if (NormalizeColName(S[i].Val1) != NormalizeColName(Ss[i])) { TExcept::Throw("Table Schema Mismatch!"); }

     }

   }


   // populate table columns

   //printf("starting to populate table\n");

   uint64 Cnt = 0;

   while (Ss.Next()) {

     int IntColIdx = 0;

     int FltColIdx = 0;

     int StrColIdx = 0;

     Assert(Ss.GetFlds() == S.Len()); // compiled only in debug

     if (Ss.GetFlds() != S.Len()) {

       printf("%s\n", Ss[S.Len()]); TExcept::Throw("Error reading tsv file");

     }

     for (int i = 0; i < RowLen; i++) {

       switch (ColTypes[i]) {

         case atInt:

           if (RelevantCols.Len() == 0) {

             T->IntCols[IntColIdx].Add(Ss.GetInt(i));

           } else {

             T->IntCols[IntColIdx].Add(Ss.GetInt(RelevantCols[i]));

           }

           IntColIdx++;

           break;

         case atFlt:

           if (RelevantCols.Len() == 0) {

             T->FltCols[FltColIdx].Add(Ss.GetFlt(i));

           } else {

             T->FltCols[FltColIdx].Add(Ss.GetFlt(RelevantCols[i]));

           }

           FltColIdx++;

           break;

         case atStr:

           int ColIdx;

           if (RelevantCols.Len() == 0) {

             ColIdx = i;

           } else {

             ColIdx = RelevantCols[i];

           }

           TStr Sval = TStr(Ss[ColIdx]);

           T->AddStrVal(StrColIdx, Sval);

           StrColIdx++;

           break;

       }

     }

     Cnt += 1;

   }

   //printf("finished populating table\n");

   // set number of rows and "Next" vector

   T->NumRows = static_cast<int>(Cnt);

   T->NumValidRows = T->NumRows;


   T->Next.Clr();

   T->Next.Gen(static_cast<int>(Cnt));

   for (uint64 i = 0; i < Cnt-1; i++) {

     T->Next[static_cast<int>(i)] = static_cast<int>(i+1);

   }

   T->IsNextDirty = 0;

   T->Next[static_cast<int>(Cnt-1)] = Last;

   T->LastValidRow = T->NumRows - 1;


   T->InitIds();

 }


 PTable TTable::LoadSS(const Schema& S, const TStr& InFNm, TTableContext* Context,

  const TIntV& RelevantCols, const char& Separator, TBool HasTitleLine) {

   TVec<uint64> IntGroupByCols;

   bool NoStringCols = true;


   // find the schema for the new table which contains only relevant columns

   Schema SR;

   if (RelevantCols.Len() == 0) {

     SR = S;

   } else {

     for (int i = 0; i < RelevantCols.Len(); i++) {

       SR.Add(S[RelevantCols[i]]);

     }

   }

   PTable T = New(SR, Context);


   // find col types and check for string cols

   for (int i = 0; i < SR.Len(); i++) {

     if (T->GetSchemaColType(i) == atStr) {

       NoStringCols = false;

       break;

     }

   }


   if (GetMP() && NoStringCols) {

     // Right now, can load in parallel only in Linux (for mmap) and if

     // there are no string columns

 #ifdef GLib_LINUX

     LoadSSPar(T, S, InFNm, RelevantCols, Separator, HasTitleLine);

 #else

     LoadSSSeq(T, S, InFNm, RelevantCols, Separator, HasTitleLine);

 #endif

   } else {

     LoadSSSeq(T, S, InFNm, RelevantCols, Separator, HasTitleLine);

   }

   return T;

 }


 PTable TTable::LoadSS(const Schema& S, const TStr& InFNm, TTableContext* Context,

  const char& Separator, TBool HasTitleLine) {

   return LoadSS(S, InFNm, Context, TIntV(), Separator, HasTitleLine);

 }


 void TTable::SaveSS(const TStr& OutFNm) {

   if (NumValidRows == 0) {

     printf("Table is empty");

     return;

   }

   FILE* F = fopen(OutFNm.CStr(), "w");

   // debug

   if (F == NULL) {

     printf("failed to open file %s\n", OutFNm.CStr());

     perror("fail ");

     return;

   }


   Dump(F);


 #if 0

   Schema DSch = DenormalizeSchema();


   TInt L = Sch.Len();

   // print title (schema)

   fprintf(F, "# ");

   for (TInt i = 0; i < L-1; i++) {

     fprintf(F, "%s\t", DSch[i].Val1.CStr());

   }

   fprintf(F, "%s\n", DSch[L-1].Val1.CStr());

   // print table contents

   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

     for (TInt i = 0; i < L; i++) {

       char C = (i == L-1) ? '\n' : '\t';

       switch (GetSchemaColType(i)) {

         case atInt: {

           fprintf(F, "%d%c", RowI.GetIntAttr(GetSchemaColName(i)).Val, C);

           break;

         }

         case atFlt: {

           fprintf(F, "%f%c", RowI.GetFltAttr(GetSchemaColName(i)).Val, C);

           break;

         }

         case atStr: {

           fprintf(F, "%s%c", RowI.GetStrAttr(GetSchemaColName(i)).CStr(), C);

           break;

         }

       }

     }

   }

 #endif

   fclose(F);

 }


 void TTable::SaveBin(const TStr& OutFNm) {

   TFOut SOut(OutFNm);

   Save(SOut);

 }


 void TTable::Save(TSOut& SOut) {

   NumRows.Save(SOut);

   NumValidRows.Save(SOut);

   FirstValidRow.Save(SOut);

   LastValidRow.Save(SOut);

   Next.Save(SOut);

   IntCols.Save(SOut);

   FltCols.Save(SOut);

   StrColMaps.Save(SOut);


   THash<TStr,TPair<TInt,TInt> > ColTypeIntMap;

   TInt atIntVal = TInt(0);

   TInt atFltVal = TInt(1);

   TInt atStrVal = TInt(2);

   for (THash<TStr,TPair<TAttrType,TInt> >::TIter it = ColTypeMap.BegI(); it < ColTypeMap.EndI(); it++) {

     TPair<TAttrType,TInt> dat = it.GetDat();

     TStr DColName = DenormalizeColName(it.GetKey());

     switch (dat.GetVal1()) {

       case atInt:

         ColTypeIntMap.AddDat(DColName, TPair<TInt,TInt>(atIntVal, dat.GetVal2()));

         break;

       case atFlt:

         ColTypeIntMap.AddDat(DColName, TPair<TInt,TInt>(atFltVal, dat.GetVal2()));

         break;

       case atStr:

         ColTypeIntMap.AddDat(DColName, TPair<TInt,TInt>(atStrVal, dat.GetVal2()));

         break;

     }

   }

   ColTypeIntMap.Save(SOut);

   SOut.Flush();

 }


 void TTable::Dump(FILE *OutF) const {

   TInt L = Sch.Len();

   Schema DSch = DenormalizeSchema();


   // LoadSS() will not throw away lines with #

   //fprintf(OutF, "# Table: rows: %d, columns: %d\n", GetNumValidRows(), GetNodes());

   // print title (schema), LoadSS() will take first line as (optional) schema

   fprintf(OutF, "# ");

   for (TInt i = 0; i < L-1; i++) {

     fprintf(OutF, "%s\t", DSch[i].Val1.CStr());

   }

   fprintf(OutF, "%s\n", DSch[L-1].Val1.CStr());

   // print table contents

   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

     for (TInt i = 0; i < L; i++) {

       char C = (i == L-1) ? '\n' : '\t';

       switch (GetSchemaColType(i)) {

         case atInt: {

           fprintf(OutF, "%d%c", RowI.GetIntAttr(GetSchemaColName(i)).Val, C);

           break;

         }

         case atFlt: {

           fprintf(OutF, "%f%c", RowI.GetFltAttr(GetSchemaColName(i)).Val, C);

           break;

         }

         case atStr: {

           fprintf(OutF, "%s%c", RowI.GetStrAttr(GetSchemaColName(i)).CStr(), C);

           break;

         }

       }

     }

   }

 }


 TTableContext* TTable::ChangeContext(TTableContext* NewContext) {

   TInt L = Sch.Len();


 #if 0

   // print table on the input, iterate over all columns

   for (TInt i = 0; i < L; i++) {

     // skip non-string columns

     if (GetSchemaColType(i) != atStr) {

       continue;

     }


     TInt ColIdx = GetColIdx(GetSchemaColName(i));


     // iterate over all rows

     for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

       TInt RowIdx = RowI.GetRowIdx();

       TInt KeyId = StrColMaps[ColIdx][RowIdx];

       printf("ChangeContext in  %d  %d  %d  .%s.\n",

           ColIdx.Val, RowIdx.Val, KeyId.Val, GetStrVal(ColIdx, RowIdx).CStr());

     }

   }

 #endif


   // add strings to the new context, change values

   // iterate over all columns

   for (TInt i = 0; i < L; i++) {

     // skip non-string columns

     if (GetSchemaColType(i) != atStr) {

       continue;

     }


     TInt ColIdx = GetColIdx(GetSchemaColName(i));


     // iterate over all rows

     for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

       TInt RowIdx = RowI.GetRowIdx();

       // get the string

       TStr Key = GetStrValIdx(ColIdx, RowIdx);

       // add the string to the new context

       TInt KeyId = TInt(NewContext->StringVals.AddKey(Key));

       // change the value in the table

       StrColMaps[ColIdx][RowIdx] = KeyId;

     }

   }


   // set the new context

   Context = NewContext;

   return Context;

 }


 void TTable::AddStrVal(const TInt& ColIdx, const TStr& Key) {

   TInt KeyId = TInt(Context->StringVals.AddKey(Key));

   //printf("TTable::AddStrVal2  %d  .%s.  %d\n", ColIdx.Val, Key.CStr(), KeyId.Val);

   StrColMaps[ColIdx].Add(KeyId);

 }


 void TTable::AddStrVal(const TStr& Col, const TStr& Key) {

   if (GetColType(Col) != atStr) {

     TExcept::Throw(Col + " is not a string valued column");

   }

   //printf("TTable::AddStrVal1  .%s.  .%s.\n", Col.CStr(), Key.CStr());

   AddStrVal(GetColIdx(Col), Key);

 }


 void TTable::AddGraphAttribute(const TStr& Attr, TBool IsEdge, TBool IsSrc, TBool IsDst) {

   if (!IsColName(Attr)) { TExcept::Throw(Attr + ": No such column"); }

   if (IsEdge) { EdgeAttrV.Add(NormalizeColName(Attr)); }

   if (IsSrc) { SrcNodeAttrV.Add(NormalizeColName(Attr)); }

   if (IsDst) { DstNodeAttrV.Add(NormalizeColName(Attr)); }

 }


 void TTable::AddGraphAttributeV(TStrV& Attrs, TBool IsEdge, TBool IsSrc, TBool IsDst) {

   for (TInt i = 0; i < Attrs.Len(); i++) {

     if (!IsColName(Attrs[i])) {

       TExcept::Throw(Attrs[i] + ": no such column");

     }

   }

   for (TInt i = 0; i < Attrs.Len(); i++) {

     if (IsEdge) { EdgeAttrV.Add(NormalizeColName(Attrs[i])); }

     if (IsSrc) { SrcNodeAttrV.Add(NormalizeColName(Attrs[i])); }

     if (IsDst) { DstNodeAttrV.Add(NormalizeColName(Attrs[i])); }

   }

 }


 TStrV TTable::GetSrcNodeIntAttrV() const {

   TStrV IntNA = TStrV(IntCols.Len(),0);

   for (TInt i = 0; i < SrcNodeAttrV.Len(); i++) {

     TStr Attr = SrcNodeAttrV[i];

     if (GetColType(Attr) == atInt) {

       IntNA.Add(Attr);

     }

   }

   return IntNA;

 }


 TStrV TTable::GetDstNodeIntAttrV() const {

   TStrV IntNA = TStrV(IntCols.Len(),0);

   for (TInt i = 0; i < DstNodeAttrV.Len(); i++) {

     TStr Attr = DstNodeAttrV[i];

     if (GetColType(Attr) == atInt) {

       IntNA.Add(Attr);

     }

   }

   return IntNA;

 }


 TStrV TTable::GetEdgeIntAttrV() const {

   TStrV IntEA = TStrV(IntCols.Len(),0);

   for (TInt i = 0; i < EdgeAttrV.Len(); i++) {

     TStr Attr = EdgeAttrV[i];

     if (GetColType(Attr) == atInt) {

       IntEA.Add(Attr);

     }

   }

   return IntEA;

 }


 TStrV TTable::GetSrcNodeFltAttrV() const {

   TStrV FltNA = TStrV(FltCols.Len(),0);

   for (TInt i = 0; i < SrcNodeAttrV.Len(); i++) {

     TStr Attr = SrcNodeAttrV[i];

     if (GetColType(Attr) == atFlt) {

       FltNA.Add(Attr);

     }

   }

   return FltNA;

 }


 TStrV TTable::GetDstNodeFltAttrV() const {

   TStrV FltNA = TStrV(FltCols.Len(),0);

   for (TInt i = 0; i < DstNodeAttrV.Len(); i++) {

     TStr Attr = DstNodeAttrV[i];

     if (GetColType(Attr) == atFlt) {

       FltNA.Add(Attr);

     }

   }

   return FltNA;

 }


 TStrV TTable::GetEdgeFltAttrV() const {

   TStrV FltEA = TStrV(FltCols.Len(),0);;

   for (TInt i = 0; i < EdgeAttrV.Len(); i++) {

     TStr Attr = EdgeAttrV[i];

     if (GetColType(Attr) == atFlt) {

       FltEA.Add(Attr);

     }

   }

   return FltEA;

 }


 TStrV TTable::GetSrcNodeStrAttrV() const {

   TStrV StrNA = TStrV(StrColMaps.Len(),0);

   for (TInt i = 0; i < SrcNodeAttrV.Len(); i++) {

     TStr Attr = SrcNodeAttrV[i];

     if (GetColType(Attr) == atStr) {

       StrNA.Add(Attr);

     }

   }

   return StrNA;

 }


 TStrV TTable::GetDstNodeStrAttrV() const {

   TStrV StrNA = TStrV(StrColMaps.Len(),0);

   for (TInt i = 0; i < DstNodeAttrV.Len(); i++) {

     TStr Attr = DstNodeAttrV[i];

     if (GetColType(Attr) == atStr) {

       StrNA.Add(Attr);

     }

   }

   return StrNA;

 }


 TStrV TTable::GetEdgeStrAttrV() const {

   TStrV StrEA = TStrV(StrColMaps.Len(),0);

   for (TInt i = 0; i < EdgeAttrV.Len(); i++) {

     TStr Attr = EdgeAttrV[i];

     if (GetColType(Attr) == atStr) {

       StrEA.Add(Attr);

     }

   }

   return StrEA;

 }


 void TTable::Rename(const TStr& column, const TStr& NewLabel) {

   // This function is necessary, for example to take the union of two tables

   // where the attribute names don't match.

   if (!IsColName(column)) { TExcept::Throw("no such column " + column); }

   TPair<TAttrType,TInt> ColVal = GetColTypeMap(column);

   DelColType(column);

   AddColType(NewLabel, ColVal);

   TStr NColName = NormalizeColName(column);

   TStr NLabel = NormalizeColName(NewLabel);

   for (TInt c = 0; c < Sch.Len(); c++) {

     if (Sch[c].Val1 == NColName) {

       Sch.SetVal(c, TPair<TStr, TAttrType>(NLabel, Sch[c].Val2));

       break;

     }

   }

 }


 void TTable::RemoveFirstRow() {

   if (FirstValidRow == LastValidRow) {

     LastValidRow = -1;

   }


   TInt Old = FirstValidRow;

   FirstValidRow = Next[FirstValidRow];

   Next[Old] = TTable::Invalid;

   NumValidRows--;

   TInt IdColIdx = GetColIdx(GetIdColName());

   RowIdMap.AddDat(IntCols[IdColIdx][Old], Invalid);

 }


 void TTable::RemoveRow(TInt RowIdx, TInt PrevRowIdx) {

   if (RowIdx == FirstValidRow) {

     RemoveFirstRow();

     return;

   }

   Assert(RowIdx != TTable::Invalid);

   if (RowIdx == TTable::Last) { return; }

   Next[PrevRowIdx] = Next[RowIdx];

   if (LastValidRow == RowIdx) {

     LastValidRow = RowIdx;

   }

   Next[RowIdx] = TTable::Invalid;

   NumValidRows--;

   TInt IdColIdx = GetColIdx(GetIdColName());

   RowIdMap.AddDat(IntCols[IdColIdx][RowIdx], Invalid);

 }


 void TTable::KeepSortedRows(const TIntV& KeepV) {

   TIntIntH KeepH(KeepV.Len());

   for (TInt i = 0; i < KeepV.Len(); i++) {

     KeepH.AddKey(KeepV[i]);

   }


   TRowIteratorWithRemove RowI = BegRIWR();

   TInt KeepSize = 0;

   while (RowI.GetNextRowIdx() != Last) {

     if (KeepSize < KeepV.Len()) {

       if (KeepH.IsKey(RowI.GetNextRowIdx())) {

         KeepSize++;

         RowI++;

       } else {

         RowI.RemoveNext();

       }

     } else {

       // Covered all of KeepV. Remove the rest of the rows.

       // Current RowI.CurrRowIdx is the last element of KeepV.

       RowI.RemoveNext();

     }

   }

   LastValidRow = KeepV[KeepV.Len()-1];

 }


 void TTable::GetPartitionRanges(TIntPrV& Partitions, TInt NumPartitions) const {

   TInt PartitionSize = NumValidRows / (NumPartitions);

   if (NumValidRows % NumPartitions != 0) PartitionSize++;

   if (PartitionSize < 10) {

     PartitionSize = 10;

     NumPartitions = NumValidRows / PartitionSize;

   }

   Partitions.Reserve(NumPartitions+1);


   TInt currRow = FirstValidRow;

   TInt currStart = currRow;

   if (IsNextDirty) {

     TInt currCount = PartitionSize;

     while (currRow != TTable::Last) {

       if (currCount == 0) {

         Partitions.Add(TIntPr(currStart, currRow));

         currStart = currRow;

         currCount = PartitionSize;

       }

       currRow = Next[currRow];

       currCount--;

     }

     Partitions.Add(TIntPr(currStart, currRow));

   } else {

     // Optimize for the case when rows are logically in sequence.

     currRow += PartitionSize;

     while (currRow != TTable::Last && currRow < Next.Len()) {

       if (Next[currRow] == TTable::Invalid) { currRow++; continue; }

       Partitions.Add(TIntPr(currStart, currRow));

       currStart = currRow;

       currRow += PartitionSize;

     }

     Partitions.Add(TIntPr(currStart, TTable::Last));

   }

   //printf("Num partitions: %d\n", Partitions.Len());

 }


 /*****  Grouping Utility functions ****/

 void TTable::GroupingSanityCheck(const TStr& GroupBy, const TAttrType& AttrType) const {

   if (!IsColName(GroupBy)) {

     TExcept::Throw("no such column " + GroupBy);

   }

   if (GetColType(GroupBy) != AttrType) {

     TExcept::Throw(GroupBy + " values are not of expected type");

   }

 }


 #ifdef GCC_ATOMIC

 void TTable::GroupByIntColMP(const TStr& GroupBy, THashMP<TInt, TIntV>& Grouping, TBool UsePhysicalIds) const {

   timeval timer0;

   gettimeofday(&timer0, NULL);

   //double t1 = timer0.tv_sec + (timer0.tv_usec/1000000.0);

   //printf("X\n");

   TInt IdColIdx = GetColIdx(IdColName);

   TInt GroupByColIdx = GetColIdx(GroupBy);

   if(!UsePhysicalIds && IdColIdx < 0){

         TExcept::Throw("Grouping: Either use physical row ids, or have an id column");

   }

   //double startFn = omp_get_wtime();

   GroupingSanityCheck(GroupBy, atInt);

   TIntPrV Partitions;

   GetPartitionRanges(Partitions, 8*CHUNKS_PER_THREAD);

   TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;

   //double endPart = omp_get_wtime();

   //printf("Partition time = %f\n", endPart-startFn);


   Grouping.Gen(NumValidRows);

   //double endGen = omp_get_wtime();

   //printf("Gen time = %f\n", endGen-endPart);

   //printf("S\n");

   #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD) //num_threads(1)

   for (int i = 0; i < Partitions.Len(); i++){

     TRowIterator RowI(Partitions[i].GetVal1(), this);

     TRowIterator EndI(Partitions[i].GetVal2(), this);

     while (RowI < EndI) {

       TInt idx = UsePhysicalIds ? RowI.GetRowIdx() : RowI.GetIntAttr(IdColIdx);

       // printf("updating grouping with key = %d, row_id = %d\n", RowI.GetIntAttr(GroupBy).Val, idx.Val);

       UpdateGrouping<TInt>(Grouping, RowI.GetIntAttr(GroupByColIdx), idx);

       RowI++;

     }

   }

   gettimeofday(&timer0, NULL);

   //double t2 = timer0.tv_sec + (timer0.tv_usec/1000000.0);

   //printf("Grouping time: %f\n", t2 - t1);

   //double endAdd = omp_get_wtime();

   //printf("Add time = %f\n", endAdd-endGen);

 }

 #endif // GCC_ATOMIC


 void TTable::Unique(const TStr& Col) {

   TIntV RemainingRows;

   TStr NCol = NormalizeColName(Col);

   switch (GetColType(NCol)) {

     case atInt: {

       TIntIntVH Grouping;

       GroupByIntCol(NCol, Grouping, TIntV(), true, true);

       for (TIntIntVH::TIter it = Grouping.BegI(); it < Grouping.EndI(); it++) {

         RemainingRows.Add(it->Dat[0]);

       }

       break;

     }

     case atFlt: {

       THash<TFlt,TIntV> Grouping;

       GroupByFltCol(NCol, Grouping, TIntV(), true, true);

       for (THash<TFlt,TIntV>::TIter it = Grouping.BegI(); it < Grouping.EndI(); it++) {

         RemainingRows.Add(it->Dat[0]);

       }

       break;

     }

     case atStr: {

       TIntIntVH Grouping;

       GroupByStrCol(NCol, Grouping, TIntV(), true, true);

       for (TIntIntVH::TIter it = Grouping.BegI(); it < Grouping.EndI(); it++) {

         RemainingRows.Add(it->Dat[0]);

       }

       break;

     }

   }

   KeepSortedRows(RemainingRows);

 }


 void TTable::Unique(const TStrV& Cols, TBool Ordered) {

   if(Cols.Len() == 1){

         Unique(Cols[0]);

         return;

   }

   TStrV NCols = NormalizeColNameV(Cols);

   THash<TGroupKey, TPair<TInt, TIntV> > Grouping;

   TIntV UniqueVec;

   GroupAux(NCols, Grouping, Ordered, "", true, UniqueVec, true);

   KeepSortedRows(UniqueVec);

 }


 void TTable::StoreGroupCol(const TStr& GroupColName, const TVec<TPair<TInt, TInt> >& GroupAndRowIds) {

   // Add a column where the value of the i'th row is the group id of row i.

   IntCols.Add(TIntV(NumRows));

   TInt L = IntCols.Len();

   AddColType(GroupColName, atInt, L-1);

   // Store group id for each row.

   for (TInt i = 0; i < GroupAndRowIds.Len(); i++) {

     IntCols[L-1][GroupAndRowIds[i].Val2] = GroupAndRowIds[i].Val1;

   }

 }


 // Core crouping logic.

 void TTable::GroupAux(const TStrV& GroupBy, THash<TGroupKey, TPair<TInt, TIntV> >& Grouping,

  TBool Ordered, const TStr& GroupColName, TBool KeepUnique, TIntV& UniqueVec, TBool UsePhysicalIds) {

   TInt IdColIdx = GetColIdx(IdColName);

   if(!UsePhysicalIds && IdColIdx < 0){

         TExcept::Throw("Grouping: Either use physical row ids, or have an id column");

   }

   TIntV IntGroupByCols;

   TIntV FltGroupByCols;

   TIntV StrGroupByCols;

   // get indices for each column type

   for (TInt c = 0; c < GroupBy.Len(); c++) {

         //printf("GroupBy col %d: %s\n", c.Val, GroupBy[c].CStr());

     if (!IsColName(GroupBy[c])) {

       TExcept::Throw("no such column " + GroupBy[c]);

     }


     TPair<TAttrType, TInt> ColType = GetColTypeMap(GroupBy[c]);

     switch (ColType.Val1) {

       case atInt:

         IntGroupByCols.Add(ColType.Val2);

         break;

       case atFlt:

         FltGroupByCols.Add(ColType.Val2);

         break;

       case atStr:

         StrGroupByCols.Add(ColType.Val2);

         break;

     }

   }


   TInt IKLen = IntGroupByCols.Len();

   TInt FKLen = FltGroupByCols.Len();

   TInt SKLen = StrGroupByCols.Len();


   TInt GroupNum = 0;

   TVec<TPair<TInt, TInt> > GroupAndRowIds;

   //printf("done GroupAux initialization\n");


   // iterate over rows

   for (TRowIterator it = BegRI(); it < EndRI(); it++) {

     TIntV IKey(IKLen + SKLen, 0);

     TFltV FKey(FKLen, 0);

     TIntV SKey(SKLen, 0);


     // find group key

     for (TInt c = 0; c < IKLen; c++) {

       IKey.Add(it.GetIntAttr(IntGroupByCols[c]));

     }

     for (TInt c = 0; c < FKLen; c++) {

       FKey.Add(it.GetFltAttr(FltGroupByCols[c]));

     }

     for (TInt c = 0; c < SKLen; c++) {

       SKey.Add(it.GetStrMapById(StrGroupByCols[c]));

     }

     if (!Ordered) {

       if (IKLen > 0) { IKey.ISort(0, IKey.Len()-1, true); }

       if (FKLen > 0) { FKey.ISort(0, FKey.Len()-1, true); }

       if (SKLen > 0) { SKey.ISort(0, SKey.Len()-1, true); }

     }

     for (TInt c = 0; c < SKLen; c++) {

       IKey.Add(SKey[c]);

     }


     // look for group matching the key

     TGroupKey GroupKey = TGroupKey(IKey, FKey);


     TInt RowIdx = it.GetRowIdx();

     TInt idx = UsePhysicalIds ? it.GetRowIdx() : IntCols[IdColIdx][it.GetRowIdx()];

     if (!Grouping.IsKey(GroupKey)) {

       // Grouping key hasn't been seen before, create a new group

       TPair<TInt, TIntV> NewGroup;

       NewGroup.Val1 = GroupNum;

       NewGroup.Val2.Add(idx);

       Grouping.AddDat(GroupKey, NewGroup);

       if (GroupColName != "") {

         GroupAndRowIds.Add(TPair<TInt, TInt>(GroupNum, RowIdx));

       }

       if (KeepUnique) {

         UniqueVec.Add(idx);

       }

       GroupNum++;

     } else {

       // Grouping key has been seen before, update corresponding group

       if (!KeepUnique) {

         TPair<TInt, TIntV>& NewGroup = Grouping.GetDat(GroupKey);

         NewGroup.Val2.Add(idx);

         if (GroupColName != "") {

           GroupAndRowIds.Add(TPair<TInt, TInt>(NewGroup.Val1, RowIdx));

         }

       }

     }

   }

   // printf("KeepUnique: %d\n", KeepUnique.Val);

   // update group mapping

   if (!KeepUnique) {

     GroupStmt Stmt(NormalizeColNameV(GroupBy), Ordered, UsePhysicalIds);

     GroupStmtNames.AddDat(GroupColName, Stmt);

     GroupIDMapping.AddKey(Stmt);

     GroupMapping.AddKey(Stmt);

     //printf("Adding statement: ");

     //Stmt.Print();

     for (THash<TGroupKey, TPair<TInt, TIntV> >::TIter it = Grouping.BegI(); it < Grouping.EndI(); it++) {

       TGroupKey key = it.GetKey();

       TPair<TInt, TIntV> group = it.GetDat();

       GroupIDMapping.GetDat(Stmt).AddDat(group.Val1, TGroupKey(key));

       GroupMapping.GetDat(Stmt).AddDat(TGroupKey(key), TIntV(group.Val2));

     }

   }


   // add a column to the table

   if (GroupColName != "") {

     StoreGroupCol(GroupColName, GroupAndRowIds);

     AddSchemaCol(GroupColName, atInt);  // update schema

   }

 }


 /*

 // Core grouping logic.

 #ifdef USE_OPENMP

 void TTable::GroupAuxMP(const TStrV& GroupBy, THashGenericMP<TGroupKey, TPair<TInt, TIntV> >& Grouping,

  TBool Ordered, const TStr& GroupColName, TBool KeepUnique, TIntV& UniqueVec, TBool UsePhysicalIds) {

   //double startFn = omp_get_wtime();

   TIntV IntGroupByCols;

   TIntV FltGroupByCols;

   TIntV StrGroupByCols;

   // get indices for each column type

   for (TInt c = 0; c < GroupBy.Len(); c++) {

     if (!IsColName(GroupBy[c])) {

       TExcept::Throw("no such column " + GroupBy[c]);

     }


     TPair<TAttrType, TInt> ColType = GetColTypeMap(GroupBy[c]);

     switch (ColType.Val1) {

       case atInt:

         IntGroupByCols.Add(ColType.Val2);

         break;

       case atFlt:

         FltGroupByCols.Add(ColType.Val2);

         break;

       case atStr:

         StrGroupByCols.Add(ColType.Val2);

         break;

     }

   }


   TInt IKLen = IntGroupByCols.Len();

   TInt FKLen = FltGroupByCols.Len();

   TInt SKLen = StrGroupByCols.Len();


   TInt GroupNum = 0;

   TInt IdColIdx = GetColIdx(IdColName);


   //double endInit = omp_get_wtime();

   //printf("Init time = %f\n", endInit-startFn);


   TVec<TPair<TInt, TInt> > GroupAndRowIds;


   // iterate over rows

   for (TRowIterator it = BegRI(); it < EndRI(); it++) {

     TIntV IKey(IKLen + SKLen, 0);

     TFltV FKey(FKLen, 0);

     TIntV SKey(SKLen, 0);


     // find group key

     for (TInt c = 0; c < IKLen; c++) {

       IKey.Add(it.GetIntAttr(IntGroupByCols[c]));

     }

     for (TInt c = 0; c < FKLen; c++) {

       FKey.Add(it.GetFltAttr(FltGroupByCols[c]));

     }

     for (TInt c = 0; c < SKLen; c++) {

       SKey.Add(it.GetStrMapById(StrGroupByCols[c]));

     }

     if (!Ordered) {

       if (IKLen > 0) { IKey.ISort(0, IKey.Len()-1, true); }

       if (FKLen > 0) { FKey.ISort(0, FKey.Len()-1, true); }

       if (SKLen > 0) { SKey.ISort(0, SKey.Len()-1, true); }

     }

     for (TInt c = 0; c < SKLen; c++) {

       IKey.Add(SKey[c]);

     }


     // look for group matching the key

     TGroupKey GroupKey = TGroupKey(IKey, FKey);


     TInt RowIdx = it.GetRowIdx();

     if (!Grouping.IsKey(GroupKey)) {

       // Grouping key hasn't been seen before, create a new group

       TPair<TInt, TIntV> NewGroup;

       NewGroup.Val1 = GroupNum;

       if(IdColIdx > 0){

         NewGroup.Val2.Add(IntCols[IdColIdx][RowIdx]);

       }

       Grouping.AddDat(GroupKey, NewGroup);

       if (GroupColName != "") {

         GroupAndRowIds.Add(TPair<TInt, TInt>(GroupNum, RowIdx));

       }

       if (KeepUnique) {

         UniqueVec.Add(RowIdx);

       }

       GroupNum++;

     } else {

       // Grouping key has been seen before, update corresponding group

       if (!KeepUnique) {

         TPair<TInt, TIntV>& NewGroup = Grouping.GetDat(GroupKey);

         if(IdColIdx > 0){

                 NewGroup.Val2.Add(IntCols[IdColIdx][RowIdx]);

         }

         if (GroupColName != "") {

           GroupAndRowIds.Add(TPair<TInt, TInt>(NewGroup.Val1, RowIdx));

         }

       }

     }

   }


   //double endIter = omp_get_wtime();

   //printf("Iter time = %f\n", endIter-endInit);


   // update group mapping

   if (!KeepUnique) {

     TPair<TStrV, TBool> GroupStmt(GroupBy, Ordered);

     GroupStmtNames.AddDat(GroupColName, GroupStmt);

     GroupIDMapping.AddDat(GroupStmt);

     GroupMapping.AddDat(GroupStmt);

     for (THash<TGroupKey, TPair<TInt, TIntV> >::TIter it = Grouping.BegI(); it < Grouping.EndI(); it++) {

       TGroupKey key = it.GetKey();

       TPair<TInt, TIntV> group = it.GetDat();

       GroupIDMapping.GetDat(GroupStmt).AddDat(group.Val1, key);

       GroupMapping.GetDat(GroupStmt).AddDat(key, group.Val2);

     }

   }


   //double endMapping = omp_get_wtime();

   //printf("Mapping time = %f\n", endMapping-endIter);


   // add a column to the table

   if (GroupColName != "") {

     StoreGroupCol(GroupColName, GroupAndRowIds);

     AddSchemaCol(GroupColName, atInt);  // update schema

   }


   //double endStore = omp_get_wtime();

   //printf("Store time = %f\n", endStore-endMapping);

 }

 #endif // USE_OPENMP

 */


 void TTable::Group(const TStrV& GroupBy, const TStr& GroupColName, TBool Ordered, TBool UsePhysicalIds) {

   TStrV NGroupBy = NormalizeColNameV(GroupBy);

   TStr NGroupColName = NormalizeColName(GroupColName);

   TIntV UniqueVec;

   THash<TGroupKey, TPair<TInt, TIntV> > Grouping;

   GroupAux(NGroupBy, Grouping, Ordered, NGroupColName, false, UniqueVec, UsePhysicalIds);

 }


 void TTable::InvalidatePhysicalGroupings(){

         //TODO

 }


 void TTable::InvalidateAffectedGroupings(const TStr& Attr){

         //TODO

 }


 void TTable::Aggregate(const TStrV& GroupByAttrs, TAttrAggr AggOp,

  const TStr& ValAttr, const TStr& ResAttr, TBool Ordered) {


    for (TInt c = 0; c < GroupByAttrs.Len(); c++) {

     if (!IsColName(GroupByAttrs[c])) {

       TExcept::Throw("no such column " + GroupByAttrs[c]);

     }

    }


   // double startFn = omp_get_wtime();

   TStrV NGroupByAttrs = NormalizeColNameV(GroupByAttrs);

   TBool UsePhysicalIds = (GetColIdx(IdColName) < 0);


   THash<TInt,TIntV> GroupByIntMapping;

   THash<TFlt,TIntV> GroupByFltMapping;

   THash<TInt,TIntV> GroupByStrMapping;

   THash<TGroupKey,TIntV> Mapping;

 #ifdef GCC_ATOMIC

   THashMP<TInt,TIntV> GroupByIntMapping_MP(NumValidRows);

   TIntV GroupByIntMPKeys(NumValidRows);

 #endif

   TInt NumOfGroups = 0;

   TInt GroupingCase = 0;


   // check if grouping already exists

   GroupStmt Stmt(NGroupByAttrs, Ordered, UsePhysicalIds);

   if (GroupMapping.IsKey(Stmt)) {

     Mapping = GroupMapping.GetDat(Stmt);

   } else{

         if(NGroupByAttrs.Len() == 1){

                 switch(GetColType(NGroupByAttrs[0])){

                         case atInt:

 #ifdef GCC_ATOMIC

                                 if(GetMP()){

                                         GroupByIntColMP(NGroupByAttrs[0], GroupByIntMapping_MP, UsePhysicalIds);

                                         int x = 0;

                                         for(THashMP<TInt,TIntV>::TIter it = GroupByIntMapping_MP.BegI(); it < GroupByIntMapping_MP.EndI(); it++){

                                                 GroupByIntMPKeys[x] = it.GetKey();

                                                 x++;

                                                 /*

                                                 printf("%d --> ", it.GetKey().Val);

                                                 TIntV& V = it.GetDat();

                                                 for(int i = 0; i < V.Len(); i++){

                                                         printf(" %d", V[i].Val);

                                                 }

                                                 printf("\n");

                                                 */

                                         }

                                         NumOfGroups = x;

                                         GroupingCase = 4;

                                         //printf("Number of groups: %d\n", NumOfGroups.Val);

                                         break;

                                 }

 #endif // GCC_ATOMIC

                                 GroupByIntCol(NGroupByAttrs[0], GroupByIntMapping, TIntV(), true, UsePhysicalIds);

                                 NumOfGroups = GroupByIntMapping.Len();

                                 GroupingCase = 1;

                                 break;

                         case atFlt:

                                 GroupByFltCol(NGroupByAttrs[0], GroupByFltMapping, TIntV(), true, UsePhysicalIds);

                                 NumOfGroups = GroupByFltMapping.Len();

                                 GroupingCase = 2;

                                 break;

                         case atStr:

                                 GroupByStrCol(NGroupByAttrs[0], GroupByStrMapping, TIntV(), true, UsePhysicalIds);

                                 NumOfGroups = GroupByStrMapping.Len();

                                 GroupingCase = 3;

                                 break;

                 }

         }

         else{

                 TIntV UniqueVector;

                 THash<TGroupKey, TPair<TInt, TIntV> > Mapping_aux;

                 GroupAux(NGroupByAttrs, Mapping_aux, Ordered, "", false, UniqueVector, UsePhysicalIds);

                 for(THash<TGroupKey, TPair<TInt, TIntV> >::TIter it = Mapping_aux.BegI(); it < Mapping_aux.EndI(); it++){

                         Mapping.AddDat(it.GetKey(), it.GetDat().Val2);

                 }

                 NumOfGroups = Mapping.Len();

         }

   }


   // double endGroup = omp_get_wtime();

   // printf("Group time = %f\n", endGroup-startFn);


   TAttrType T = GetColType(ValAttr);


   // add column corresponding to result attribute type

   if (AggOp == aaCount) { AddIntCol(ResAttr); }

   else {

     if (T == atInt) { AddIntCol(ResAttr); }

     else if (T == atFlt) { AddFltCol(ResAttr); }

     else {

       // Count is the only aggregation operation handled for Str

       TExcept::Throw("Invalid aggregation for Str type!");

     }

   }

   TInt ColIdx = GetColIdx(ResAttr);

   TInt AggrColIdx = GetColIdx(ValAttr);


   // double endAdd = omp_get_wtime();

   // printf("AddCol time = %f\n", endAdd-endGroup);


 #ifdef USE_OPENMP

   #pragma omp parallel for schedule(dynamic)

 #endif

   for (int g = 0; g < NumOfGroups; g++) {

         TIntV* GroupRows = NULL;

         switch(GroupingCase){

                 case 0:

                         GroupRows = & Mapping.GetDat(Mapping.GetKey(g));

                         break;

                 case 1:

                         GroupRows = & GroupByIntMapping.GetDat(GroupByIntMapping.GetKey(g));

                         break;

                 case 2:

                         GroupRows = & GroupByIntMapping.GetDat(GroupByIntMapping.GetKey(g));

                         break;

             case 3:

                         GroupRows = & GroupByStrMapping.GetDat(GroupByStrMapping.GetKey(g));

                         break;

                 case 4:

 #ifdef GCC_ATOMIC

                         GroupRows = & GroupByIntMapping_MP.GetDat(GroupByIntMPKeys[g]);

 #endif

                         break;

         }


     // find valid rows of group

     /*

     TIntV ValidRows;

     for (TInt i = 0; i < GroupRows.Len(); i++) {

       // TODO: This should not be necessary

       if (!RowIdMap.IsKey(GroupRows[i])) { continue; }

       TInt RowId = RowIdMap.GetDat(GroupRows[i]);

       // GroupRows has physical row indices

       if (RowId != Invalid) { ValidRows.Add(RowId); }

     }

     */

         TIntV& ValidRows = *GroupRows;

     TInt sz = ValidRows.Len();

     if (sz <= 0) continue;

     // Count is handled separately (other operations have aggregation policies defined in a template)

     if (AggOp == aaCount) {

       for (TInt i = 0; i < sz; i++) { IntCols[ColIdx][ValidRows[i]] = sz; }

     } else {

       // aggregate based on column type

       if (T == atInt) {

         TIntV V;

         for (TInt i = 0; i < sz; i++) { V.Add(IntCols[AggrColIdx][ValidRows[i]]); }

         TInt Res = AggregateVector<TInt>(V, AggOp);

         if (AggOp == aaMean) { Res = Res / sz; }

         for (TInt i = 0; i < sz; i++) { IntCols[ColIdx][ValidRows[i]] = Res; }

       } else {

         TFltV V;

         for (TInt i = 0; i < sz; i++) { V.Add(FltCols[AggrColIdx][ValidRows[i]]); }

         TFlt Res = AggregateVector<TFlt>(V, AggOp);

         if (AggOp == aaMean) { Res /= sz; }

         for (TInt i = 0; i < sz; i++) { FltCols[ColIdx][ValidRows[i]] = Res; }

       }

     }

   }

   // double endIter = omp_get_wtime();

   // printf("Iter time = %f\n", endIter-endAdd);

 }


 void TTable::AggregateCols(const TStrV& AggrAttrs, TAttrAggr AggOp, const TStr& ResAttr) {

   TVec<TPair<TAttrType, TInt> >Info;

   for (TInt i = 0; i < AggrAttrs.Len(); i++) {

     Info.Add(GetColTypeMap(AggrAttrs[i]));

     if (Info[i].Val1 != Info[0].Val1) {

       TExcept::Throw("AggregateCols: Aggregation attributes must have the same type");

     }

   }


   if (Info[0].Val1 == atInt) {

     AddIntCol(ResAttr);

     TInt ResIdx = GetColIdx(ResAttr);


     for (TRowIterator RI = BegRI(); RI < EndRI(); RI++) {

       TInt RowIdx = RI.GetRowIdx();

       TIntV V;

       for (TInt i = 0; i < AggrAttrs.Len(); i++) {

         V.Add(IntCols[Info[i].Val2][RowIdx]);

       }

       IntCols[ResIdx][RowIdx] = AggregateVector<TInt>(V, AggOp);

     }

   } else if (Info[0].Val1 == atFlt) {

     AddFltCol(ResAttr);

     TInt ResIdx = GetColIdx(ResAttr);


     for (TRowIterator RI = BegRI(); RI < EndRI(); RI++) {

       TInt RowIdx = RI.GetRowIdx();

       TFltV V;

       for (TInt i = 0; i < AggrAttrs.Len(); i++) {

         V.Add(FltCols[Info[i].Val2][RowIdx]);

       }

       FltCols[ResIdx][RowIdx] = AggregateVector<TFlt>(V, AggOp);

     }

   } else {

     TExcept::Throw("AggregateCols: Only Int and Flt aggregation supported right now");

   }

 }


 void TTable::PrintGrouping(const THash<TGroupKey, TIntV>& Mapping) const{

         for(THash<TGroupKey, TIntV>::TIter it = Mapping.BegI(); it < Mapping.EndI(); it++){

                 TGroupKey gk = it.GetKey();

                 TIntV ik = gk.Val1;

                 TFltV fk = gk.Val2;

                 for(int i = 0; i < ik.Len(); i++){ printf("%d ",ik[i].Val);}

                 for(int i = 0; i < fk.Len(); i++){ printf("%f ",fk[i].Val);}

                 printf("-->");

                 TIntV v = it.GetDat();

                 for(int i = 0; i < v.Len(); i++){ printf("%d ",v[i].Val);}

                 printf("\n");

         }

 }


 void TTable::Count(const TStr& CountColName, const TStr& Col) {

   TStrV GroupByAttrs;

   GroupByAttrs.Add(CountColName);

   Aggregate(GroupByAttrs, aaCount, "", Col);

 }


 TVec<PTable> TTable::SpliceByGroup(const TStrV& GroupBy, TBool Ordered) {

   TStrV NGroupBy = NormalizeColNameV(GroupBy);

   TIntV UniqueVec;

   THash<TGroupKey, TPair<TInt, TIntV> >Grouping;

   TVec<PTable> Result;


   Schema NewSchema;

   for (TInt c = 0; c < Sch.Len(); c++) {

     if (Sch[c].Val1 != GetIdColName()) {

       NewSchema.Add(Sch[c]);

     }

   }


   GroupAux(NGroupBy, Grouping, Ordered, "", false, UniqueVec);


   TInt cnt = 0;

   // iterate over groups

   for (THash<TGroupKey, TPair<TInt, TIntV> >::TIter it = Grouping.BegI(); it != Grouping.EndI(); it++) {

     PTable GroupTable = TTable::New(NewSchema, Context);


     TVec<TPair<TAttrType, TInt> > ColInfo;

     TIntV V;

     for (TInt i = 0; i < Sch.Len(); i++) {

       ColInfo.Add(GroupTable->GetColTypeMap(Sch[i].Val1));

       if (Sch[i].Val1 == IdColName()) {

         ColInfo[i].Val2 = -1;

       }

       V.Add(GetColIdx(Sch[i].Val1));

     }


     TIntV& Rows = it.GetDat().Val2;


     // iterate over rows in group

     for (TInt i = 0; i < Rows.Len(); i++) {

       // convert from permanent ID to row ID

       TInt RowIdx = RowIdMap.GetDat(Rows[i]);


       // iterate over schema

       for (TInt c = 0; c < Sch.Len(); c++) {

         TPair<TAttrType, TInt> Info = ColInfo[c];

         TInt ColIdx = Info.Val2;


         if (ColIdx == -1) { continue; }


         // add row to new group

         switch (Info.Val1) {

           case atInt:

             GroupTable->IntCols[ColIdx].Add(IntCols[V[c]][RowIdx]);

             break;

           case atFlt:

             GroupTable->FltCols[ColIdx].Add(FltCols[V[c]][RowIdx]);

             break;

           case atStr:

             GroupTable->StrColMaps[ColIdx].Add(StrColMaps[V[c]][RowIdx]);

             break;

         }


       }

       if (GroupTable->LastValidRow >= 0) {

         GroupTable->Next[GroupTable->LastValidRow] = GroupTable->NumRows;

       }

       GroupTable->Next.Add(GroupTable->Last);

       GroupTable->LastValidRow = GroupTable->NumRows;


       GroupTable->NumRows++;

       GroupTable->NumValidRows++;

     }

     GroupTable->InitIds();

     Result.Add(GroupTable);


     cnt += 1;

   }

   return Result;

 }


 void TTable::InitIds() {

   IdColName = "_id";

   //Assert(NumRows == NumValidRows);

   AddIdColumn(IdColName);

 }


 void TTable::Reindex() {

   RowIdMap.Clr();

   TInt IdColIdx = GetColIdx(IdColName);

   TInt IdCnt = 0;

   for (TRowIterator RI = BegRI(); RI < EndRI(); RI++) {

     IntCols[IdColIdx][RI.GetRowIdx()] = IdCnt;

     RowIdMap.AddDat(RI.GetRowIdx(), IdCnt);

     IdCnt++;

   }

 }


 void TTable::AddIdColumn(const TStr& ColName) {

   //printf("NumRows: %d\n", NumRows.Val);

   TInt IdCol = IntCols.Add();

   IntCols[IdCol].Reserve(NumRows, NumRows);

   //printf("IdCol Reserved\n");

   TInt IdCnt = 0;

   RowIdMap.Clr();

   for (TRowIterator RI = BegRI(); RI < EndRI(); RI++) {

     IntCols[IdCol][RI.GetRowIdx()] = IdCnt;

     RowIdMap.AddDat(IdCnt, RI.GetRowIdx());

     IdCnt++;

   }

   AddSchemaCol(ColName, atInt);

   AddColType(ColName, atInt, IntCols.Len()-1);

 }


  PTable TTable::InitializeJointTable(const TTable& Table) {

   PTable JointTable = New(Context);

   JointTable->IntCols = TVec<TIntV>(IntCols.Len() + Table.IntCols.Len() + 1);

   JointTable->FltCols = TVec<TFltV>(FltCols.Len() + Table.FltCols.Len());

   JointTable->StrColMaps = TVec<TIntV>(StrColMaps.Len() + Table.StrColMaps.Len());

   for (TInt i = 0; i < Sch.Len(); i++) {

     TStr ColName = GetSchemaColName(i);

     TAttrType ColType = GetSchemaColType(i);

     TStr CName = JointTable->RenumberColName(ColName);

     TPair<TAttrType, TInt> TypeMap = GetColTypeMap(ColName);

     JointTable->AddColType(CName, TypeMap);

     //JointTable->AddLabel(CName, ColName);

     JointTable->AddSchemaCol(CName, ColType);

   }

   for (TInt i = 0; i < Table.Sch.Len(); i++) {

     TStr ColName = Table.GetSchemaColName(i);

     TAttrType ColType = Table.GetSchemaColType(i);

     TStr CName = JointTable->RenumberColName(ColName);

     TPair<TAttrType, TInt> NewDat = Table.GetColTypeMap(ColName);

     Assert(ColType == NewDat.Val1);

     // add offsets

     switch (NewDat.Val1) {

       case atInt:

         NewDat.Val2 += IntCols.Len();

         break;

       case atFlt:

         NewDat.Val2 += FltCols.Len();

         break;

       case atStr:

         NewDat.Val2 += StrColMaps.Len();

         break;

     }

     JointTable->AddColType(CName, NewDat);

     JointTable->AddSchemaCol(CName, ColType);

   }

   TStr IdColName = "_id";

   JointTable->AddColType(IdColName, atInt, IntCols.Len() + Table.IntCols.Len());

   JointTable->AddSchemaCol(IdColName, atInt);

   return JointTable;

 }


 void TTable::AddJointRow(const TTable& T1, const TTable& T2, TInt RowIdx1, TInt RowIdx2) {

   for (TInt i = 0; i < T1.IntCols.Len(); i++) {

     IntCols[i].Add(T1.IntCols[i][RowIdx1]);

   }

   for (TInt i = 0; i < T1.FltCols.Len(); i++) {

     FltCols[i].Add(T1.FltCols[i][RowIdx1]);

   }

   for (TInt i = 0; i < T1.StrColMaps.Len(); i++) {

     StrColMaps[i].Add(T1.StrColMaps[i][RowIdx1]);

   }

   TInt IntOffset = T1.IntCols.Len();

   TInt FltOffset = T1.FltCols.Len();

   TInt StrOffset = T1.StrColMaps.Len();

   for (TInt i = 0; i < T2.IntCols.Len(); i++) {

     IntCols[i+IntOffset].Add(T2.IntCols[i][RowIdx2]);

   }

   for (TInt i = 0; i < T2.FltCols.Len(); i++) {

     FltCols[i+FltOffset].Add(T2.FltCols[i][RowIdx2]);

   }

   for (TInt i = 0; i < T2.StrColMaps.Len(); i++) {

     StrColMaps[i+StrOffset].Add(T2.StrColMaps[i][RowIdx2]);

   }

   TInt IdOffset = IntOffset + T2.IntCols.Len();

   NumRows++;

   NumValidRows++;

   if (!Next.Empty()) {

     Next[Next.Len()-1] = NumValidRows-1;

     LastValidRow = NumValidRows-1;

   }

   Next.Add(Last);

   RowIdMap.AddDat(NumRows-1,NumRows-1);

   IntCols[IdOffset].Add(NumRows-1);

 }


 PTable TTable::SimJoin(const TStrV& Cols1, const TTable& Table, const TStrV& Cols2, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold)

 {

         Assert(Cols1.Len() == Cols2.Len());


         if(Cols1.Len()!=Cols2.Len()){

                 TExcept::Throw("Column vectors must match in type and length");

         }


         for (TInt i = 0; i < Cols1.Len(); i++) {

                 if(!IsColName(Cols1[i]) || !Table.IsColName(Cols2[i])){

                         TExcept::Throw("Column not found in Table");

                 }


                 TAttrType Type1 = GetColType(Cols1[i]);

                 TAttrType Type2 = GetColType(Cols2[i]);


                 if(Type1!=Type2){

                         TExcept::Throw("Column types on the two tables must match.");

                 }


                 // When supporting more distance metrics, check if the types are supported for given metric.

                 if((Type1!=atInt && Type1!=atFlt) || (Type2!=atInt && Type2!=atFlt)){

                         TExcept::Throw("Column type not supported. Only Flt and Int column types are supported.");

                 }

   }


         // Initialize Join table and add the similarity column

   PTable JointTable = InitializeJointTable(Table);

         TFltV DistanceV;


         // O(n^2): Parallelize

         for(TRowIterator RowI = this->BegRI(); RowI < this->EndRI(); RowI++) {

                 for(TRowIterator RowI2 = Table.BegRI(); RowI2 < Table.EndRI(); RowI2++) {

                         float distance = 0;


                         switch(SimType)

                         {

                                 // Calculate the distance metric

                                 case L2Norm:

                                         for(TInt i = 0; i < Cols1.Len(); i++) {

                                                 float attrVal1, attrVal2;

                                                 attrVal1 = GetColType(Cols1[i])==atInt ? (float)RowI.GetIntAttr(Cols1[i]) : (float)RowI.GetFltAttr(Cols1[i]);

                                                 attrVal2 = Table.GetColType(Cols2[i])==atInt ? (float)RowI2.GetIntAttr(Cols2[i]) : (float)RowI2.GetFltAttr(Cols2[i]);

                                                 distance += pow(attrVal1 - attrVal2, 2);

                                         }


                                         distance = sqrt(distance);


                                         if(distance<=Threshold){

                                                 JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), RowI2.GetRowIdx());

                                                 DistanceV.Add(distance);

                                         }


                                         // Add row to the joint table if distance <= Threshold

                                         break;

                                 // Haversine distance to calculate the distance between two points on Earth from latitude/longitude

                                 case Haversine:

                                         {

                                                 if(Cols1.Len()!=2){

                                                         TExcept::Throw("Haversine disance expects exactly two attributes - latitude and longitude - in that order.");

                                                 }


                                                 // Block to prevent cross-initialization error from compiler

                                                 TFlt Radius = 6373; // km

                                                 float Latitude1  = GetColType(Cols1[0])==atInt ? (float)RowI.GetIntAttr(Cols1[0]) : (float)RowI.GetFltAttr(Cols1[0]);

                                                 float Latitude2 = Table.GetColType(Cols2[0])==atInt ? (float)RowI2.GetIntAttr(Cols2[0]) : (float)RowI2.GetFltAttr(Cols2[0]);


                                                 float Longitude1  = GetColType(Cols1[1])==atInt ? (float)RowI.GetIntAttr(Cols1[1]) : (float)RowI.GetFltAttr(Cols1[1]);

                                                 float Longitude2  = Table.GetColType(Cols2[1])==atInt ? (float)RowI2.GetIntAttr(Cols2[1]) : (float)RowI2.GetFltAttr(Cols2[1]);


                                                 Latitude1 *= static_cast<float>(M_PI/180.0);

                                                 Latitude2 *= static_cast<float>(M_PI/180.0);

                                                 Longitude1 *= static_cast<float>(M_PI/180.0);

                                                 Longitude2 *= static_cast<float>(M_PI/180.0);


                                                 float dlon = Longitude2 - Longitude1;

                                                 float dlat = Latitude2 - Latitude1;

                                                 float a = pow(sin(dlat/2), 2) + cos(Latitude1)*cos(Latitude2)*pow(sin(dlon/2), 2);

                                                 float c = 2*atan2(sqrt(a), sqrt(1-a));

                                                 distance = (static_cast<float>(Radius.Val))*c;


                                                 if(distance<=Threshold){

                                                         JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), RowI2.GetRowIdx());

                                                         DistanceV.Add(distance);

                                                 }

                                         }

                                         break;

                                 case L1Norm:

                                 case Jaccard:

                                         TExcept::Throw("This distance metric is not supported");

                         }

                 }

         }


         // Add the value for the similarity column

         JointTable->StoreFltCol(DistanceColName, DistanceV);

         JointTable->InitIds();

         return JointTable;

 }


 PTable TTable::SelfSimJoinPerGroup(const TStr& GroupAttr, const TStr& SimCol, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold)

 {

         if(!IsColName(SimCol) || !IsColName(GroupAttr)){

                 TExcept::Throw("No such column found in table");

         }


   PTable JointTable = New(Context);

         // Initialize the joint table - (GroupId1, GroupId2, Similarity)

         JointTable->IntCols = TVec<TIntV>(2);

         JointTable->FltCols = TVec<TFltV>(1);


         for(TInt i=0;i<2;i++){

     TInt Suffix = i+1;

     TStr CName = "GroupId_" + Suffix.GetStr();

     TPair<TAttrType, TInt> Group(atInt, (int)i);

     JointTable->AddColType(CName, Group);

     JointTable->AddSchemaCol(CName, atInt);

   }


         TPair<TAttrType, TInt> Group(atFlt, 0);

         JointTable->AddColType(DistanceColName, Group);

         JointTable->AddSchemaCol(DistanceColName, atFlt);


         THash<TInt, THash<TInt, TInt> > TIntHH;


         TAttrType attrType = GetColType(SimCol);

         TInt GroupColIdx = GetColIdx(GroupAttr);

         TInt SimColIdx = GetColIdx(SimCol);


         for (TRowIterator RowI = this->BegRI(); RowI < this->EndRI(); RowI++) {

                 TInt GroupId = IntCols[GroupColIdx][RowI.GetRowIdx()];


                 if(attrType==atInt || attrType==atStr)

                 {

                         if(!TIntHH.IsKey(GroupId)){

                                 THash<TInt, TInt> TIntH;

                                 TIntHH.AddDat(GroupId, TIntH);

                         }


                         THash<TInt, TInt>& TIntH = TIntHH.GetDat(GroupId);

                         TInt SimAttrVal = (attrType==atInt ? IntCols[SimColIdx][RowI.GetRowIdx()] : StrColMaps[SimColIdx][RowI.GetRowIdx()]);

                         TIntH.AddDat(SimAttrVal, 0);

                 }

                 else

                 {

                         TExcept::Throw("Attribute type not supported.");

                 }

         }


         // Iterate through every pair of groups and calculate the distance

         for (THash<TInt, THash<TInt, TInt> >::TIter it1 = TIntHH.BegI(); it1 < TIntHH.EndI(); it1++) {

                 THash<TInt, TInt> Vals1H = it1.GetDat();

                 TInt GroupId1 = it1.GetKey();


                 for (THash<TInt, THash<TInt, TInt> >::TIter it2 = TIntHH.BegI(); it2 < TIntHH.EndI(); it2++) {

                                 int intersectionCount = 0;

                                 TInt GroupId2 = it2.GetKey();

                                 THash<TInt, TInt> Vals2H = it2.GetDat();


                                 for(THash<TInt, TInt>::TIter it = Vals1H.BegI(); it < Vals1H.EndI(); it++)

                                 {

                                         TInt Val = it.GetKey();

                                         if(Vals2H.IsKey(Val)){

                                                 intersectionCount+=1;

                                         }

                                 }


                                 int unionCount = Vals1H.Len() + Vals2H.Len() - intersectionCount;

                                 float distance = 1.0f - (float)intersectionCount/unionCount;


                                 // Add a new row to the JointTable

                                 if(distance<=Threshold){

                                                 JointTable->IntCols[0].Add(GroupId1);

                                                 JointTable->IntCols[1].Add(GroupId2);

                                                 JointTable->FltCols[0].Add(distance);

                                                 JointTable->IncrementNext();

                         }

                 }

         }


   JointTable->InitIds();

         return JointTable;

 }


 PTable TTable::SelfSimJoinPerGroup(const TStrV& GroupBy, const TStr& SimCol,

  const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold) {

   TStrV NGroupBy = NormalizeColNameV(GroupBy);

         TStrV ProjectionV;


         // Only keep the GroupBy cols and the SimCol

         for(TInt i=0; i<GroupBy.Len(); i++)

         {

                 ProjectionV.Add(GroupBy[i]);

         }


         ProjectionV.Add(SimCol);

         ProjectInPlace(ProjectionV);


         TStr CName = "Group";

   TIntV UniqueVec;

   THash<TGroupKey, TPair<TInt, TIntV> > Grouping;

   GroupAux(NGroupBy, Grouping, false, CName, false, UniqueVec);

         PTable GroupJointTable = SelfSimJoinPerGroup(CName, SimCol, DistanceColName, SimType, Threshold);

         PTable JointTable = InitializeJointTable(*this);


         // Hash of groupid to any arbitrary row of that group. Arbitrary because the GroupBy

         // columns within that group are the same, so we can choose any one.

         THash<TInt, TInt> GroupIdH;


         for(THash<TGroupKey, TPair<TInt, TIntV> >::TIter it=Grouping.BegI(); it<Grouping.EndI(); it++)

         {

                 TPair<TInt, TIntV> group = it.GetDat();

                 TInt GroupNum = group.Val1;

                 TIntV RowIds = group.Val2;


                 if(!GroupIdH.IsKey(GroupNum))

                 {

                         TInt RandomRowId = RowIds[0];  // Arbitrarily select the 1st row.

                         GroupIdH.AddDat(GroupNum, RandomRowId);

                 }

         }


         for(TRowIterator RowI = GroupJointTable->BegRI(); RowI < GroupJointTable->EndRI(); RowI++)

         {

                 // The GroupJoinTable has a well defined structure - columns 0 and 1 are GroupIds

                 TInt GroupId1 = GroupJointTable->IntCols[0][RowI.GetRowIdx()];

                 TInt GroupId2 = GroupJointTable->IntCols[1][RowI.GetRowIdx()];


                 // Get the rows for groupid1 and groupid and arbitrary select one row

                 TInt RowId1 = GroupIdH.GetDat(GroupId1);

                 TInt RowId2 = GroupIdH.GetDat(GroupId2);

                 JointTable->AddJointRow(*this, *this, RowId1, RowId2);

         }


         // Add the simiarlity column from the GroupJointTable - GroupJointTable has a

         // well defined structure - The first float column is the similarity;

         JointTable->StoreFltCol(DistanceColName, GroupJointTable->FltCols[0]);

         ProjectionV.Clr();

         ProjectionV.Add(DistanceColName);


         // Find the GroupBy columns in the JointTable by matching the Suffix of the Schema

         // columns with the original GroupBy columns - Note that Join renames columns.

         for(TInt i=0; i<GroupBy.Len(); i++){

                 for(TInt j=0; j<JointTable->Sch.Len(); j++)

                 {

                         TStr ColName = JointTable->Sch[j].Val1;

                         if(ColName.IsStrIn(GroupBy[i]))

                         {

                                 ProjectionV.Add(ColName);

                         }

                 }

         }


         JointTable->ProjectInPlace(ProjectionV);

         JointTable->InitIds();

         return JointTable;

 }


 // Increments the next vector and set last, NumRows and NumValidRows.

 void TTable::IncrementNext()

 {

         // Advance the Next vector

         NumRows++;

         NumValidRows++;

         if (!Next.Empty()) {

                 Next[Next.Len()-1] = NumValidRows-1;

                 LastValidRow = NumValidRows-1;

         }

         Next.Add(Last);

 }


 // Q: Do we want to have any gurantees in terms of order of the 0t rows - i.e.

 // ordered by "this" table row idx as primary key and "Table" row idx as secondary key

  // This means only keeping joint row indices (pairs of original row indices), sorting them

  // and adding all rows in the end. Sorting can be expensive, but we would be able to pre-allocate

  // memory for the joint table..

 PTable TTable::Join(const TStr& Col1, const TTable& Table, const TStr& Col2) {

   // double startFn = omp_get_wtime();

   if (!IsColName(Col1)) {

     TExcept::Throw("no such column " + Col1);

     printf("no such column %s\n", Col1.CStr());

   }

   if (!Table.IsColName(Col2)) {

     TExcept::Throw("no such column " + Col2);

     printf("no such column %s\n", Col2.CStr());

   }

   if (GetColType(Col1) != Table.GetColType(Col2)) {

     TExcept::Throw("Trying to Join on columns of different type");

     printf("Trying to Join on columns of different type\n");

   }

   //printf("passed initial checks\n");

   // initialize result table

   PTable JointTable = InitializeJointTable(Table);

   //printf("initialized joint table\n");

   // hash smaller table (group by column)

   TAttrType ColType = GetColType(Col1);

   TBool ThisIsSmaller = (NumValidRows <= Table.NumValidRows);

   const TTable& TS = ThisIsSmaller ? *this : Table;

   const TTable& TB = ThisIsSmaller ?  Table : *this;

   TStr ColS = ThisIsSmaller ? Col1 : Col2;

   TStr ColB = ThisIsSmaller ? Col2 : Col1;

   TInt ColBId = ThisIsSmaller ? Table.GetColIdx(ColB) : GetColIdx(ColB);

   // double endInit = omp_get_wtime();

   // printf("Init time = %f\n", endInit-startFn);

   // iterate over the rows of the bigger table and check for "collisions"

   // with the group keys for the small table.

 #ifdef GCC_ATOMIC

   if (GetMP()) {

     switch(ColType){

       case atInt:{

         THashMP<TInt, TIntV> T(TS.GetNumValidRows());

         TS.GroupByIntColMP(ColS, T, true);

         // double endGroup = omp_get_wtime();

         // printf("Group time = %f\n", endGroup-endInit);


         TIntPrV Partitions;

         TB.GetPartitionRanges(Partitions, omp_get_max_threads()*CHUNKS_PER_THREAD);

         TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;

         TVec<TIntPrV> JointRowIDSet(Partitions.Len());

         // double endPart = omp_get_wtime();

         // printf("Partition time = %f\n", endPart-endGroup);


         #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)

         for (int i = 0; i < Partitions.Len(); i++){

           //double start = omp_get_wtime();

           JointRowIDSet[i].Reserve(PartitionSize);

           TRowIterator RowI(Partitions[i].GetVal1(), &TB);

           TRowIterator EndI(Partitions[i].GetVal2(), &TB);

           while (RowI < EndI) {

             TInt K = RowI.GetIntAttr(ColBId);

             if(T.IsKey(K)){

               TIntV& Group = T.GetDat(K);

               for(TInt j = 0; j < Group.Len(); j++){

                 if(ThisIsSmaller){

                   JointRowIDSet[i].Add(TIntPr(Group[j], RowI.GetRowIdx()));

                 } else{

                   JointRowIDSet[i].Add(TIntPr(RowI.GetRowIdx(), Group[j]));

                 }

               }

             }

             RowI++;

           }

           //double end = omp_get_wtime();

           //printf("END: Thread %d: i = %d, start = %d, end = %d, num = %d, time = %f\n", omp_get_thread_num(), i,

           //    Partitions[i].GetVal1().Val, Partitions[i].GetVal2().Val, JointRowIDSet[i].Len(), end-start);

         }

         // double endJoin = omp_get_wtime();

         // printf("Iterate time = %f\n", endJoin-endPart);

         JointTable->AddNJointRowsMP(*this, Table, JointRowIDSet);

         // double endAdd = omp_get_wtime();

         // printf("Add time = %f\n", endAdd-endJoin);

         break;

       }

       case atFlt:{

         THashMP<TFlt, TIntV> T(TS.GetNumValidRows());

         TS.GroupByFltCol(ColS, T, TIntV(), true);


         TIntPrV Partitions;

         TB.GetPartitionRanges(Partitions, omp_get_max_threads()*CHUNKS_PER_THREAD);

         TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;

         TVec<TIntPrV> JointRowIDSet(Partitions.Len());


         #pragma omp parallel for schedule(dynamic)

         for (int i = 0; i < Partitions.Len(); i++){

           JointRowIDSet[i].Reserve(PartitionSize);

           TRowIterator RowI(Partitions[i].GetVal1(), &TB);

           TRowIterator EndI(Partitions[i].GetVal2(), &TB);

           while (RowI < EndI) {

             TFlt K = RowI.GetFltAttr(ColBId);

             if(T.IsKey(K)){

               TIntV& Group = T.GetDat(K);

               for(TInt j = 0; j < Group.Len(); j++){

                 if(ThisIsSmaller){

                   JointRowIDSet[i].Add(TIntPr(Group[j], RowI.GetRowIdx()));

                 } else{

                   JointRowIDSet[i].Add(TIntPr(RowI.GetRowIdx(), Group[j]));

                 }

               }

             }

             RowI++;

           }

         }

         JointTable->AddNJointRowsMP(*this, Table, JointRowIDSet);

         break;

       }

       case atStr:{

         THashMP<TInt, TIntV> T(TS.GetNumValidRows());

         TS.GroupByStrCol(ColS, T, TIntV(), true);


         TIntPrV Partitions;

         TB.GetPartitionRanges(Partitions, omp_get_max_threads()*CHUNKS_PER_THREAD);

         TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;

         TVec<TIntPrV> JointRowIDSet(Partitions.Len());


         #pragma omp parallel for schedule(dynamic)

         for (int i = 0; i < Partitions.Len(); i++){

           JointRowIDSet[i].Reserve(PartitionSize);

           TRowIterator RowI(Partitions[i].GetVal1(), &TB);

           TRowIterator EndI(Partitions[i].GetVal2(), &TB);

           while (RowI < EndI) {

             TInt K = RowI.GetStrMapById(ColBId);

             if(T.IsKey(K)){

               TIntV& Group = T.GetDat(K);

               for(TInt j = 0; j < Group.Len(); j++){

                 if(ThisIsSmaller){

                   JointRowIDSet[i].Add(TIntPr(Group[j], RowI.GetRowIdx()));

                 } else{

                   JointRowIDSet[i].Add(TIntPr(RowI.GetRowIdx(), Group[j]));

                 }

               }

             }

             RowI++;

           }

         }

         JointTable->AddNJointRowsMP(*this, Table, JointRowIDSet);

       }

       break;

     }

   } else {

 #endif // GCC_ATOMIC

     switch (ColType) {

       case atInt:{

         TIntIntVH T;

         TS.GroupByIntCol(ColS, T, TIntV(), true);

         for (TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++) {

           TInt K = RowI.GetIntAttr(ColBId);

           if (T.IsKey(K)) {

             TIntV& Group = T.GetDat(K);

             for (TInt i = 0; i < Group.Len(); i++) {

               if (ThisIsSmaller) {

                 JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx());

               } else {

                 JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]);

               }

             }

           }

         }

         break;

       }

       case atFlt:{

         THash<TFlt, TIntV> T;

         TS.GroupByFltCol(ColS, T, TIntV(), true);

         for (TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++) {

           TFlt K = RowI.GetFltAttr(ColBId);

           if (T.IsKey(K)) {

             TIntV& Group = T.GetDat(K);

             for (TInt i = 0; i < Group.Len(); i++) {

               if (ThisIsSmaller) {

                 JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx());

               } else {

                 JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]);

               }

             }

           }

         }

         break;

       }

       case atStr:{

         TIntIntVH T;

         TS.GroupByStrCol(ColS, T, TIntV(), true);

         for (TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++) {

           TInt K = RowI.GetStrMapById(ColBId);

           if (T.IsKey(K)) {

             TIntV& Group = T.GetDat(K);

             for (TInt i = 0; i < Group.Len(); i++) {

               if (ThisIsSmaller) {

                 JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx());

               } else {

                 JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]);

               }

             }

           }

         }

       }

       break;

     }

 #ifdef GCC_ATOMIC

   }

 #endif

   return JointTable;

 }


 void TTable::ThresholdJoinInputCorrectness(const TStr& KeyCol1, const TStr& JoinCol1, const TTable& Table,

   const TStr& KeyCol2, const TStr& JoinCol2){

   if (!IsColName(KeyCol1)) {

     printf("no such column %s\n", KeyCol1.CStr());

     TExcept::Throw("no such column " + KeyCol1);

   }

   if (!Table.IsColName(KeyCol2)) {

     printf("no such column %s\n", KeyCol2.CStr());

     TExcept::Throw("no such column " + KeyCol2);

   }

   if (!IsColName(JoinCol1)) {

     printf("no such column %s\n", JoinCol1.CStr());

     TExcept::Throw("no such column " + JoinCol1);

   }

   if (!Table.IsColName(JoinCol2)) {

     printf("no such column %s\n", JoinCol2.CStr());

     TExcept::Throw("no such column " + JoinCol2);

   }

   if (GetColType(JoinCol1) != Table.GetColType(JoinCol2)) {

     printf("Trying to Join on columns of different type\n");

     TExcept::Throw("Trying to Join on columns of different type");

   }

   if (GetColType(KeyCol1) != Table.GetColType(KeyCol2)) {

     printf("Key type mismatch\n");

     TExcept::Throw("Key type mismatch");

   }

 }


 void TTable::ThresholdJoinCountCollisions(const TTable& TB, const TTable& TS,

   const TIntIntVH& T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS,

   THash<TIntPr,TIntTr>& Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType){

     // iterate over big table and count / record joint tuples

     for (TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++) {

     // value to join on from big table

       TInt JVal = 0;

       if(JoinColType == atStr){

         JVal = RowI.GetStrMapById(JoinColIdxB);

       } else{

         JVal = RowI.GetIntAttr(JoinColIdxB);

       }

       //printf("JVal: %d\n", JVal.Val);

       if(T.IsKey(JVal)){

         // read key attribute of big table row

         TInt KeyB = 0;

         if(KeyType == atStr){

           KeyB = RowI.GetStrMapById(KeyColIdxB);

         } else{

           KeyB = RowI.GetIntAttr(KeyColIdxB);

         }

         // read row ids from small table with join attribute value of JVal

         const TIntV& RelevantRows = T.GetDat(JVal);

         for(int i = 0; i < RelevantRows.Len(); i++){

           // read key attribute of relevant row from small table

           TInt KeyS = 0;

           if(KeyType == atStr){

             KeyS = TS.StrColMaps[KeyColIdxS][RelevantRows[i]];

           } else{

             KeyS = TS.IntCols[KeyColIdxS][RelevantRows[i]];

           }

           // create a pair of keys - serves as a key in Counters

           TIntPr Keys = ThisIsSmaller ? TIntPr(KeyS, KeyB) : TIntPr(KeyB, KeyS);

           if(Counters.IsKey(Keys)){

           // if the key pair has been seen before - increment its counter by 1

             TIntTr& V = Counters.GetDat(Keys);

             V.Val3 = V.Val3 + 1;

           } else{

             // if the key pair hasn't been seen before - add it with value of

             // row indices that create a joint record with this key pair

             if(ThisIsSmaller){

               Counters.AddDat(Keys, TIntTr(RelevantRows[i], RowI.GetRowIdx(),1));

             } else{

               Counters.AddDat(Keys, TIntTr(RowI.GetRowIdx(), RelevantRows[i],1));

             }

           }

         }       // end of for loop

       } // end of if statement

     } // end of for loop

 }


 void TTable::ThresholdJoinCountPerJoinKeyCollisions(const TTable& TB, const TTable& TS,

   const TIntIntVH& T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS,

   THash<TIntTr,TIntTr>& Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType){

     for (TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++) {

       // value to join on from big table

       TInt JVal = 0;

       if(JoinColType == atStr){

         JVal = RowI.GetStrMapById(JoinColIdxB);

        } else{

         JVal = RowI.GetIntAttr(JoinColIdxB);

        }

       //printf("JVal: %d\n", JVal.Val);

       if(T.IsKey(JVal)){

         // read key attribute of big table row

         TInt KeyB = 0;

         if(KeyType == atStr){

           KeyB = RowI.GetStrMapById(KeyColIdxB);

         } else{

           KeyB = RowI.GetIntAttr(KeyColIdxB);

         }

         // read row ids from small table with join attribute value of JVal

         const TIntV& RelevantRows = T.GetDat(JVal);

         for(int i = 0; i < RelevantRows.Len(); i++){

           // read key attribute of relevant row from small table

           TInt KeyS = 0;

           if(KeyType == atStr){

             KeyS = TS.StrColMaps[KeyColIdxS][RelevantRows[i]];

           } else{

             KeyS = TS.IntCols[KeyColIdxS][RelevantRows[i]];

           }

                 // create a pair of keys - serves as a key in Counters

                 TIntPr Keys = ThisIsSmaller ? TIntPr(KeyS, KeyB) : TIntPr(KeyB, KeyS);

                 TIntTr K(Keys.Val1,Keys.Val2,JVal);

           if(Counters.IsKey(K)){

             // if the key pair has been seen before - increment its counter by 1

             TIntTr& V = Counters.GetDat(K);

             V.Val3 = V.Val3 + 1;

           } else{

             // if the key pair hasn't been seen before - add it with value of

             // row indices that create a joint record with this key pair

             if(ThisIsSmaller){

               Counters.AddDat(K, TIntTr(RelevantRows[i], RowI.GetRowIdx(),1));

             } else{

               Counters.AddDat(K, TIntTr(RowI.GetRowIdx(), RelevantRows[i],1));

             }

           }

         }       // end of for loop

       } // end of if statement

     } // end of for loop

   }


 PTable TTable::ThresholdJoinOutputTable(const THash<TIntPr,TIntTr>& Counters, TInt Threshold, const TTable& Table){

   // initialize result table

   PTable JointTable = InitializeJointTable(Table);

   for(THash<TIntPr,TIntTr>::TIter iter = Counters.BegI(); iter < Counters.EndI(); iter++){

     TIntTr& Counter = iter.GetDat();

     //printf("keys: %d, %d\n", iter.GetKey().Val1.Val, iter.GetKey().Val2.Val);

     //printf("selected rows: %d,%d, counter: %d\n", Counter.Val1.Val, Counter.Val2.Val, Counter.Val3.Val);

     if(Counter.Val3 >= Threshold){

       JointTable->AddJointRow(*this, Table, Counter.Val1, Counter.Val2);

     }

   }

   return JointTable;

 }


 PTable TTable::ThresholdJoinPerJoinKeyOutputTable(const THash<TIntTr,TIntTr>& Counters, TInt Threshold, const TTable& Table){

   PTable JointTable = InitializeJointTable(Table);

   for(THash<TIntTr,TIntTr>::TIter iter = Counters.BegI(); iter < Counters.EndI(); iter++){

     const TIntTr& Counter = iter.GetDat();

     const TIntTr& Keys = iter.GetKey();

     THashSet<TIntPr> Pairs;

     if(Counter.Val3 >= Threshold){

       TIntPr K(Keys.Val1,Keys.Val2);

       if(!Pairs.IsKey(K)){

         Pairs.AddKey(K);

         JointTable->AddJointRow(*this, Table, Counter.Val1, Counter.Val2);

       }

     }

   }

   return JointTable;

 }


 // expected output: one joint tuple (R1,R2) with:

 // (1) R1[KeyCol1] = K1 and R2[KeyCol2] = K2

 // for every pair of keys (K1,K2) such that the number of joint tuples

 // (joint on R1[JoinCol1] = R2[JointCol2]) that hold property (1) is at least Threshold

 PTable TTable::ThresholdJoin(const TStr& KeyCol1, const TStr& JoinCol1, const TTable& Table,

   const TStr& KeyCol2, const TStr& JoinCol2, TInt Threshold, TBool PerJoinKey){

   // test input correctness

   ThresholdJoinInputCorrectness(KeyCol1, JoinCol1, Table, KeyCol2, JoinCol2);

   //printf("verified input correctness\n");

   // type of column on which we join (currently support only int)

   TAttrType JoinColType = GetColType(JoinCol1);

   // type of key column (currently support only int)

   TAttrType KeyType = GetColType(KeyCol1);

   // Determine which table is smaller

   TBool ThisIsSmaller = (NumValidRows <= Table.NumValidRows);

   const TTable& TS = ThisIsSmaller ? *this : Table;

   const TTable& TB = ThisIsSmaller ?  Table : *this;

   TStr JoinColS = JoinCol1;

   TInt JoinColIdxB = GetColIdx(JoinCol2);

   TInt KeyColIdxS = GetColIdx(KeyCol1);

   TInt KeyColIdxB = GetColIdx(KeyCol2);

   if(!ThisIsSmaller){

         JoinColS = JoinCol2;

     JoinColIdxB = GetColIdx(JoinCol1);

         KeyColIdxS = GetColIdx(KeyCol2);

     KeyColIdxB = GetColIdx(KeyCol1);

   }


   // debug print

   //printf("JoinColS = %d, JoinColIdxB = %d, KeyColIdxS = %d, KeyColIdxB = %d\n",

         //GetColIdx(JoinColS).Val, JoinColIdxB.Val, KeyColIdxS.Val, KeyColIdxB.Val);

   //printf("starting switch-case\n");


   if(KeyType != atInt && KeyType != atStr){

     printf("ThresholdJoin only supports integer or string key attributes\n");

     TExcept::Throw("ThresholdJoin only supports integer or string key attributes");

   }

   if(JoinColType != atInt && JoinColType != atStr){

     printf("ThresholdJoin only supports integer or string join attributes\n");

     TExcept::Throw("ThresholdJoin only supports integer or string join attributes");

   }

   //printf("starting the real stuff!\n");

   // hash the smaller table T: join col value --> physical row ids of rows with that value

   TIntIntVH T;

   if(JoinColType == atInt){

     TS.GroupByIntCol(JoinColS, T, TIntV(), true);

   } else if(JoinColType == atStr){

     TS.GroupByStrCol(JoinColS, T, TIntV(), true);

   } else{

     TExcept::Throw("ThresholdJoin only supports integer or string join attributes");

   }


  /*

   for(THash<TInt,TIntV>::TIter it = T.BegI(); it < T.EndI(); it++){

     if(JoinColType == atStr){

       printf("%s -->", Context.StringVals.GetKey(it.GetKey().Val));

     } else{

       printf("%d -->", it.GetKey().Val);

     }

     const TIntV& V = it.GetDat();

     for(int sr = 0; sr < V.Len(); sr++){

       printf(" %d", V[sr].Val);

     }

     printf("\n");

   }

   */


   // Counters: (K1,K2) --> (RowIdx1,RowIdx2, count) where K1 is a key from KeyCol1,

   // K2 is a key from Table's KeyCol2; RowIdx1 and RowIdx2 are physical row ids

   // that participates in a joint tuple that satisfies (1).

   // count is the count of joint records that satisfy (1).

   // In case of string attributes - the integer mappings of the key attribute values are used.

   if(PerJoinKey){

     //printf("PerJoinKey\n");

     THash<TIntTr,TIntTr> Counters;

     ThresholdJoinCountPerJoinKeyCollisions(TB, TS, T, JoinColIdxB, KeyColIdxB, KeyColIdxS, Counters, ThisIsSmaller, JoinColType, KeyType);

     /*

     for(THash<TIntTr,TIntTr>::TIter it = Counters.BegI(); it < Counters.EndI(); it++){

       const TIntTr& K = it.GetKey();

       const TIntTr& V = it.GetDat();

       if(KeyType == atStr){

         printf("%s %s --> %d %d %d\n", Context->StringVals.GetKey(K.Val1), Context->StringVals.GetKey(K.Val2), V.Val1.Val, V.Val2.Val, V.Val3.Val);

       } else{

         printf("%d %d --> %d %d %d\n", K.Val1.Val, K.Val2.Val, V.Val1.Val, V.Val2.Val, V.Val3.Val);

       }

     }

     */

     //printf("found collisions\n");

     return ThresholdJoinPerJoinKeyOutputTable(Counters, Threshold, Table);

   } else{

     //printf("not PerJoinKey\n");

     THash<TIntPr,TIntTr> Counters;

     ThresholdJoinCountCollisions(TB, TS, T, JoinColIdxB, KeyColIdxB, KeyColIdxS, Counters, ThisIsSmaller, JoinColType, KeyType);

     /*

     for(THash<TIntPr,TIntTr>::TIter it = Counters.BegI(); it < Counters.EndI(); it++){

       const TIntPr& K = it.GetKey();

       const TIntTr& V = it.GetDat();

       if(KeyType == atStr){

         printf("%s %s --> %d %d %d\n", Context->StringVals.GetKey(K.Val1), Context->StringVals.GetKey(K.Val2), V.Val1.Val, V.Val2.Val, V.Val3.Val);

       } else{

         printf("%d %d --> %d %d %d\n", K.Val1.Val, K.Val2.Val, V.Val1.Val, V.Val2.Val, V.Val3.Val);

       }

     }

     */

     //printf("found collisions\n");

     return ThresholdJoinOutputTable(Counters, Threshold, Table);

   }

 }


 void TTable::Select(TPredicate& Predicate, TIntV& SelectedRows, TBool Remove) {

   TIntV Selected;

   TStrV RelevantCols;

   Predicate.GetVariables(RelevantCols);

   TInt NumRelevantCols = RelevantCols.Len();

   TVec<TAttrType> ColTypes = TVec<TAttrType>(NumRelevantCols);

   TIntV ColIndices = TIntV(NumRelevantCols);

   for (TInt i = 0; i < NumRelevantCols; i++) {

     ColTypes[i] = GetColType(RelevantCols[i]);

     ColIndices[i] = GetColIdx(RelevantCols[i]);

   }


   if (Remove) {

     TRowIteratorWithRemove RowI = BegRIWR();

     while (RowI.GetNextRowIdx() != Last) {

       // prepare arguments for predicate evaluation

       for (TInt i = 0; i < NumRelevantCols; i++) {

         switch (ColTypes[i]) {

         case atInt:

           Predicate.SetIntVal(RelevantCols[i], RowI.GetNextIntAttr(ColIndices[i]));

           break;

         case atFlt:

           Predicate.SetFltVal(RelevantCols[i], RowI.GetNextFltAttr(ColIndices[i]));

           break;

         case atStr:

           Predicate.SetStrVal(RelevantCols[i], RowI.GetNextStrAttr(ColIndices[i]));

           break;

         }

       }

       if (!Predicate.Eval()) {

         RowI.RemoveNext();

       } else {

         RowI++;

       }

     }

   } else {

     for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

       for (TInt i = 0; i < NumRelevantCols; i++) {

         switch (ColTypes[i]) {

         case atInt:

           Predicate.SetIntVal(RelevantCols[i], RowI.GetIntAttr(RelevantCols[i]));

           break;

         case atFlt:

           Predicate.SetFltVal(RelevantCols[i], RowI.GetFltAttr(RelevantCols[i]));

           break;

         case atStr:

           Predicate.SetStrVal(RelevantCols[i], RowI.GetStrAttr(RelevantCols[i]));

           break;

         }

       }

       if (Predicate.Eval()) { SelectedRows.Add(RowI.GetRowIdx()); }

     }

   }

 }


 void TTable::Classify(TPredicate& Predicate, const TStr& LabelName, const TInt& PositiveLabel, const TInt& NegativeLabel) {

   TIntV SelectedRows;

   Select(Predicate, SelectedRows, false);

   ClassifyAux(SelectedRows, LabelName, PositiveLabel, NegativeLabel);

 }


 // Further optimization: both comparison operation and type of columns don't change between rows..

 void TTable::SelectAtomic(const TStr& Col1, const TStr& Col2, TPredComp Cmp, TIntV& SelectedRows, TBool Remove) {

   const TAttrType Ty1 = GetColType(Col1);

   const TAttrType Ty2 = GetColType(Col2);

   const TInt ColIdx1 = GetColIdx(Col1);

   const TInt ColIdx2 = GetColIdx(Col2);

   if (Ty1 != Ty2) {

     TExcept::Throw("SelectAtomic: diff types");

   }

   if (Cmp == SUBSTR || Cmp == SUPERSTR) { Assert(Ty1 == atStr); }


   if (Remove) {

     TRowIteratorWithRemove RowI = BegRIWR();

     while (RowI.GetNextRowIdx() != Last) {


       TBool Result;

       switch (Ty1) {

         case atInt:

           Result = TPredicate::EvalAtom(RowI.GetNextIntAttr(ColIdx1), RowI.GetNextIntAttr(ColIdx2), Cmp);

           break;

         case atFlt:

           Result = TPredicate::EvalAtom(RowI.GetNextFltAttr(ColIdx1), RowI.GetNextFltAttr(ColIdx2), Cmp);

           break;

         case atStr:

           Result = TPredicate::EvalStrAtom(RowI.GetNextStrAttr(ColIdx1), RowI.GetNextStrAttr(ColIdx2), Cmp);

           break;

       }


       if (!Result) {

         RowI.RemoveNext();

       } else {

         RowI++;

       }


     }

   } else {

     for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

       TBool Result;

       switch (Ty1) {

         case atInt:

           Result = TPredicate::EvalAtom(RowI.GetIntAttr(Col1), RowI.GetIntAttr(Col2), Cmp);

           break;

         case atFlt:

           Result = TPredicate::EvalAtom(RowI.GetFltAttr(Col1), RowI.GetFltAttr(Col2), Cmp);

           break;

         case atStr:

           Result = TPredicate::EvalStrAtom(RowI.GetStrAttr(Col1), RowI.GetStrAttr(Col2), Cmp);

           break;

       }

       if (Result) { SelectedRows.Add(RowI.GetRowIdx()); }

     }

   }

 }


 void TTable::ClassifyAtomic(const TStr& Col1, const TStr& Col2, TPredComp Cmp,

   const TStr& LabelName, const TInt& PositiveLabel, const TInt& NegativeLabel) {

   TIntV SelectedRows;

   SelectAtomic(Col1, Col2, Cmp, SelectedRows, false);

   ClassifyAux(SelectedRows, LabelName, PositiveLabel, NegativeLabel);

 }


 void TTable::SelectAtomicConst(const TStr& Col, const TPrimitive& Val, TPredComp Cmp,

   TIntV& SelectedRows, PTable& SelectedTable, TBool Remove, TBool Table) {

   //double startFn = omp_get_wtime();

   TStr ValTStr(Val.GetStr());

   TAttrType Type = GetColType(Col);

   TInt ColIdx = GetColIdx(Col);


   if (Type != Val.GetType()) {

     TExcept::Throw("SelectAtomicConst: coltype does not match const type");

   }


   if(Remove){

 #ifdef USE_OPENMP

     if (GetMP()) {

       //double endInit = omp_get_wtime();

       //printf("Init time = %f\n", endInit-startFn);

       TIntPrV Partitions;

       GetPartitionRanges(Partitions, omp_get_max_threads()*CHUNKS_PER_THREAD);

       TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;

       int RemoveCount = 0;

       //double endPart = omp_get_wtime();

       //printf("Partition time = %f\n", endPart-endInit);


       TIntPrV Bounds(Partitions.Len());


       // #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD) reduction(+:RemoveCount) shared(Val)

       #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD) reduction(+:RemoveCount)

       for (int i = 0; i < Partitions.Len(); i++){

         //TPrimitive ThreadLocalVal(Val);

         TRowIterator RowI(Partitions[i].GetVal1(), this);

         TRowIterator EndI(Partitions[i].GetVal2(), this);

         TInt FirstRowIdx = TTable::Invalid;

         TInt LastRowIdx = TTable::Invalid;

         TBool First = true;

         while (RowI < EndI) {

           TInt CurrRowIdx = RowI.GetRowIdx();

           TBool Result;

           if (Type != atStr) {

             Result = RowI.CompareAtomicConst(ColIdx, Val, Cmp);

           } else {

             Result = RowI.CompareAtomicConstTStr(ColIdx, ValTStr, Cmp);

           }

           RowI++;

           if(!Result) {

             Next[CurrRowIdx] = TTable::Invalid;

             RemoveCount++;

           } else {

             if (First) { FirstRowIdx = CurrRowIdx; First = false; }

             else { Next[LastRowIdx] = CurrRowIdx; }

             LastRowIdx = CurrRowIdx;

           }

         }

         Bounds[i] = TIntPr(FirstRowIdx, LastRowIdx);

         //printf("Thread %d: i = %d, start = %d, end = %d\n", omp_get_thread_num(), i,

         //  Partitions[i].GetVal1().Val, Partitions[i].GetVal2().Val);

       }

       //double endIter = omp_get_wtime();

       //printf("Iter time = %f\n", endIter-endPart);


       // repair the next vector

       TInt CurrBound = 0;

       while (CurrBound < Bounds.Len() && Bounds[CurrBound].Val1 == TTable::Invalid) {

         CurrBound++;

       }

       if (CurrBound == Bounds.Len()) {

         // selected table is empty

         Assert(NumValidRows == RemoveCount);

         NumValidRows = 0;

         FirstValidRow = TTable::Invalid;

         LastValidRow = TTable::Invalid;

       } else {

         NumValidRows -= RemoveCount;

         FirstValidRow = Bounds[CurrBound].Val1;

         LastValidRow = Bounds[CurrBound].Val2;

         TInt PrevBound = CurrBound;

         CurrBound++;

         while (CurrBound < Bounds.Len()) {

           if (Bounds[CurrBound].Val1 == TTable::Invalid) { CurrBound++; continue; }

           Next[Bounds[PrevBound].Val2] = Bounds[CurrBound].Val1;

           LastValidRow = Bounds[CurrBound].Val2;

           PrevBound = CurrBound;

           CurrBound++;

         }

         Next[Bounds[PrevBound].Val2] = TTable::Last;

       }

       IsNextDirty = 1;

       //double endRepair = omp_get_wtime();

       //printf("Repair time = %f\n", endRepair-endIter);

     } else {

 #endif

       TRowIteratorWithRemove RowI = BegRIWR();

       while(RowI.GetNextRowIdx() != Last){

         if (!RowI.CompareAtomicConst(ColIdx, Val, Cmp)) {

           RowI.RemoveNext();

         } else {

           RowI++;

         }

       }

       IsNextDirty = 1;

 #ifdef USE_OPENMP

     }

 #endif

   } else if (Table) {

 #ifdef USE_OPENMP

     if (GetMP()) {

       //double endInit = omp_get_wtime();

       //printf("Init time = %f\n", endInit-startFn);

       TIntPrV Partitions;

       GetPartitionRanges(Partitions, omp_get_max_threads()*CHUNKS_PER_THREAD);

       TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;

       //double endPart = omp_get_wtime();

       //printf("Partition time = %f\n", endPart-endInit);


       int TotalSelectedRows = 0;

       #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD) reduction(+:TotalSelectedRows)

       for (int i = 0; i < Partitions.Len(); i++){

         TRowIterator RowI(Partitions[i].GetVal1(), this);

         TRowIterator EndI(Partitions[i].GetVal2(), this);

         while (RowI < EndI) {

           if (Type != atStr) {

             if (RowI.CompareAtomicConst(ColIdx, Val, Cmp)) {

               TotalSelectedRows++;

             }

           } else {

             if (RowI.CompareAtomicConstTStr(ColIdx, ValTStr, Cmp)) {

               TotalSelectedRows++;

             }

           }

           RowI++;

         }

       }

       //double endCount = omp_get_wtime();

       //printf("Count time = %f\n", endCount-endPart);


       SelectedTable->ResizeTable(TotalSelectedRows);

       //double endResize = omp_get_wtime();

       //printf("Resize time = %f\n", endResize-endCount);


       if (TotalSelectedRows == 0) {

         // printf("Select: Empty output!\n");

         return;

       }


       #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)

       for (int i = 0; i < Partitions.Len(); i++){

         TIntV LocalSelectedRows;

         LocalSelectedRows.Reserve(PartitionSize);

         TRowIterator RowI(Partitions[i].GetVal1(), this);

         TRowIterator EndI(Partitions[i].GetVal2(), this);

         while (RowI < EndI) {

           if (Type != atStr) {

             if (RowI.CompareAtomicConst(ColIdx, Val, Cmp)) {

               LocalSelectedRows.Add(RowI.GetRowIdx());

             }

           } else {

             if (RowI.CompareAtomicConstTStr(ColIdx, ValTStr, Cmp)) {

               LocalSelectedRows.Add(RowI.GetRowIdx());

             }

           }

           RowI++;

         }

         SelectedTable->AddSelectedRows(*this, LocalSelectedRows);

         //printf("Thread %d: i = %d, start = %d, end = %d\n", omp_get_thread_num(), i,

         //  Partitions[i].GetVal1().Val, Partitions[i].GetVal2().Val);

       }

       //double endIter = omp_get_wtime();

       //printf("Iter time = %f\n", endIter-endResize);


       //SelectedTable->ResizeTable(SelectedTable->GetNumValidRows());

       //double endResize2 = omp_get_wtime();

       //printf("Resize2 time = %f\n", endResize2-endIter);

       SelectedTable->SetFirstValidRow();

     } else {

 #endif

       for(TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++){

         if (RowI.CompareAtomicConst(ColIdx, Val, Cmp)) {

           SelectedTable->AddRowI(RowI);

         }

       }

 #ifdef USE_OPENMP

     }

 #endif

   } else {

     for(TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++){

       if (RowI.CompareAtomicConst(ColIdx, Val, Cmp)) {

         SelectedRows.Add(RowI.GetRowIdx());

       }

     }

   }

 }


 inline TInt TTable::CompareRows(TInt R1, TInt R2, const TAttrType& CompareByType, const TInt& CompareByIndex, TBool Asc) {

   //printf("comparing rows %d %d by %s\n", R1.Val, R2.Val, CompareBy.CStr());

   switch (CompareByType) {

     case atInt:{

       if (IntCols[CompareByIndex][R1] > IntCols[CompareByIndex][R2]) { return (Asc ? 1 : -1); }

       if (IntCols[CompareByIndex][R1] < IntCols[CompareByIndex][R2]) { return (Asc ? -1 : 1); }

       return 0;

     }

     case atFlt:{

       if (FltCols[CompareByIndex][R1] > FltCols[CompareByIndex][R2]) { return (Asc ? 1 : -1); }

       if (FltCols[CompareByIndex][R1] < FltCols[CompareByIndex][R2]) { return (Asc ? -1 : 1); }

       return 0;

     }

     case atStr:{

       TStr S1 = GetStrValIdx(CompareByIndex, R1);

       TStr S2 = GetStrValIdx(CompareByIndex, R2);

       int CmpRes = strcmp(S1.CStr(), S2.CStr());

       return (Asc ? CmpRes : -CmpRes);

     }

   }

   // code should not come here, added to remove a compiler warning

   return 0;

 }


 inline TInt TTable::CompareRows(TInt R1, TInt R2, const TVec<TAttrType>& CompareByTypes, const TIntV& CompareByIndices, TBool Asc) {

   for (TInt i = 0; i < CompareByTypes.Len(); i++) {

     TInt res = CompareRows(R1, R2, CompareByTypes[i], CompareByIndices[i], Asc);

     if (res != 0) { return res; }

   }

   return 0;

 }


 void TTable::ISort(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes, const TIntV& SortByIndices, TBool Asc) {

   if (StartIdx < EndIdx) {

     for (TInt i = StartIdx+1; i <= EndIdx; i++) {

       TInt Val = V[i];

       TInt j = i;

       while ((StartIdx < j) && (CompareRows(V[j-1], Val, SortByTypes, SortByIndices, Asc) > 0)) {

         V[j] = V[j-1];

         j--;

       }

       V[j] = Val;

     }

   }

 }


 TInt TTable::GetPivot(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes, const TIntV& SortByIndices, TBool Asc) {

   TInt L = EndIdx - StartIdx + 1;

   const TInt Idx1 = StartIdx + TInt::GetRnd(L);

   const TInt Idx2 = StartIdx + TInt::GetRnd(L);

   const TInt Idx3 = StartIdx + TInt::GetRnd(L);

   if (CompareRows(V[Idx1], V[Idx2], SortByTypes, SortByIndices, Asc) < 0) {

     if (CompareRows(V[Idx2], V[Idx3], SortByTypes, SortByIndices, Asc) < 0) { return Idx2; }

     if (CompareRows(V[Idx1], V[Idx3], SortByTypes, SortByIndices, Asc) < 0) { return Idx3; }

     return Idx1;

   } else {

     if (CompareRows(V[Idx3], V[Idx2], SortByTypes, SortByIndices, Asc) < 0) { return Idx2; }

     if (CompareRows(V[Idx3], V[Idx1], SortByTypes, SortByIndices, Asc) < 0) { return Idx3; }

     return Idx1;

   }

 }


 TInt TTable::Partition(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes, const TIntV& SortByIndices, TBool Asc) {


   // test if the elements are already sorted

   TInt j;

   for (j = StartIdx; j < EndIdx; j++) {

     if (CompareRows(V[j], V[j+1], SortByTypes, SortByIndices, Asc) > 0) {

       break;

     }

   }

   if (j >= EndIdx) {

     return EndIdx+1;

   }


   TInt PivotIdx = GetPivot(V, StartIdx, EndIdx, SortByTypes, SortByIndices, Asc);

   TInt Pivot = V[PivotIdx];

   V.Swap(PivotIdx, EndIdx);

   TInt StoreIdx = StartIdx;

   for (TInt i = StartIdx; i < EndIdx; i++) {

     if (CompareRows(V[i], Pivot, SortByTypes, SortByIndices, Asc) <= 0) {

       V.Swap(i, StoreIdx);

       StoreIdx++;

     }

   }

   // move pivot value to its place

   V.Swap(StoreIdx, EndIdx);

   return StoreIdx;

 }


 void TTable::QSort(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes, const TIntV& SortByIndices, TBool Asc) {

   if (StartIdx < EndIdx) {

     if (EndIdx - StartIdx < 20) {

       ISort(V, StartIdx, EndIdx, SortByTypes, SortByIndices, Asc);

     } else {

       TInt Pivot = Partition(V, StartIdx, EndIdx, SortByTypes, SortByIndices, Asc);

       if (Pivot > EndIdx) {

         return;

       }

       // Everything <= Pivot will be in StartIdx, Pivot-1. Shrink this

       // range to ignore elements equal to the pivot in the first

       // recursive call, to optimize for the case when a lot of

       // rows are equal.

       int Ub = Pivot - 1;

       while (Ub >= StartIdx && CompareRows(

         V[Ub], V[Pivot], SortByTypes, SortByIndices, Asc) == 0) {

         Ub -= 1;

       }

       QSort(V, StartIdx, Ub, SortByTypes, SortByIndices, Asc);

       QSort(V, Pivot+1, EndIdx, SortByTypes, SortByIndices, Asc);

     }

   }

 }


 void TTable::Merge(TIntV& V, TInt Idx1, TInt Idx2, TInt Idx3, const TVec<TAttrType>& SortByTypes, const TIntV& SortByIndices, TBool Asc) {

   TInt i = Idx1, j = Idx2;

   TIntV SortedV;

   while  (i < Idx2 && j < Idx3) {

     if (CompareRows(V[i], V[j], SortByTypes, SortByIndices, Asc) <= 0) {

       SortedV.Add(V[i]);

       i++;

     }

     else {

       SortedV.Add(V[j]);

       j++;

     }

   }

   while (i < Idx2) {

     SortedV.Add(V[i]);

     i++;

   }

   while (j < Idx3) {

     SortedV.Add(V[j]);

     j++;

   }


   for (TInt sz = 0; sz < Idx3 - Idx1; sz++) {

     V[Idx1 + sz] = SortedV[sz];

   }

 }


 #ifdef USE_OPENMP

 void TTable::QSortPar(TIntV& V, const TVec<TAttrType>& SortByTypes, const TIntV& SortByIndices, TBool Asc) {

   TInt NumThreads = 8; // Setting this to 8 because that results in the fastest sorting on Madmax.

   TInt Sz = V.Len();

   TIntV IndV, NextV;

   for (TInt i = 0; i < NumThreads; i++) {

     IndV.Add(i * (Sz / NumThreads));

   }

   IndV.Add(Sz);


   omp_set_num_threads(NumThreads);

   #pragma omp parallel for

   for (int i = 0; i < NumThreads; i++) {

     QSort(V, IndV[i], IndV[i+1] - 1, SortByTypes, SortByIndices, Asc);

   }


   while (NumThreads > 1) {

     omp_set_num_threads(NumThreads / 2);

     #pragma omp parallel for

     for (int i = 0; i < NumThreads; i += 2) {

       Merge(V, IndV[i], IndV[i+1], IndV[i+2], SortByTypes, SortByIndices, Asc);

     }


     NextV.Clr();

     for (TInt i = 0; i < NumThreads; i+=2) {

       NextV.Add(IndV[i]);

     }

     NextV.Add(Sz);

     IndV = NextV;


     NumThreads = NumThreads / 2;

   }

 }

 #endif // USE_OPENMP


 void TTable::Order(const TStrV& OrderBy, TStr OrderColName, TBool ResetRankByMSC, TBool Asc) {

   // get a vector of all valid row indices

   TIntV ValidRows = TIntV(NumValidRows);

   if (NumRows == NumValidRows) {

     for (TInt i = 0; i < NumValidRows; i++) {

       ValidRows[i] = i;

     }

   } else {

     TInt i = 0;

     for (TRowIterator RI = BegRI(); RI < EndRI(); RI++) {

       ValidRows[i] = RI.GetRowIdx();

       i++;

     }

   }

   TVec<TAttrType> OrderByTypes(OrderBy.Len());

   TIntV OrderByIndices(OrderBy.Len());

   for (TInt i = 0; i < OrderBy.Len(); i++) {

     OrderByTypes[i] = GetColType(OrderBy[i]);

     OrderByIndices[i] = GetColIdx(OrderBy[i]);

   }


   // sort that vector according to the attributes given in "OrderBy" in lexicographic order

 #ifdef USE_OPENMP

   if (GetMP()) {

     QSortPar(ValidRows, OrderByTypes, OrderByIndices, Asc);

   } else {

 #endif

     QSort(ValidRows, 0, NumValidRows-1, OrderByTypes, OrderByIndices, Asc);

 #ifdef USE_OPENMP

   }

 #endif


   // rewire Next vector

   IsNextDirty = 1;

   if (NumValidRows > 0) {

     FirstValidRow = ValidRows[0];

   } else {

     FirstValidRow = Last;

   }

   for (TInt i = 0; i < NumValidRows-1; i++) {

     Next[ValidRows[i]] = ValidRows[i+1];

   }

   if (NumValidRows > 0) {

     Next[ValidRows[NumValidRows-1]] = Last;

     LastValidRow = ValidRows[NumValidRows-1];

   } else {

     LastValidRow = Last;

   }


   // add rank column

   if (!OrderColName.Empty()) {

     TIntV RankCol = TIntV(NumRows);

     for (TInt i = 0; i < NumValidRows; i++) {

       RankCol[ValidRows[i]] = i;

     }

     if (ResetRankByMSC) {

       for (TInt i = 1; i < NumValidRows; i++) {

         TStr GroupName = OrderBy[0];

         if (GetStrVal(GroupName, ValidRows[i]) != GetStrVal(GroupName, ValidRows[i-1])) {

           RankCol[ValidRows[i]] = 0;

         } else {

           RankCol[ValidRows[i]] = RankCol[ValidRows[i-1]] + 1;

         }

       }

     }

     IntCols.Add(RankCol);

     AddSchemaCol(OrderColName, atInt);

     AddColType(OrderColName, atInt, IntCols.Len()-1);

   }

 }


 void TTable::Defrag() {

   TInt FreeIndex = 0;

   TIntV Mapping;  // Mapping[old_index] = new_index/invalid


   TInt IdColIdx = GetColIdx(IdColName);


   for (TInt i = 0; i < Next.Len(); i++) {

     if (Next[i] != TTable::Invalid) {

       // "first row" properly set beforehand

       if (FreeIndex == 0) {

         Assert (i == FirstValidRow);

         FirstValidRow = 0;

       }


       if (Next[i] != Last) {

         Next[FreeIndex] = FreeIndex + 1;

         Mapping.Add(FreeIndex);

       } else {

         Next[FreeIndex] = Last;

         LastValidRow = FreeIndex;

         Mapping.Add(Last);

       }


       RowIdMap.AddDat(IntCols[IdColIdx][i], FreeIndex);


       for (TInt j = 0; j < IntCols.Len(); j++) {

         IntCols[j][FreeIndex] = IntCols[j][i];

       }

       for (TInt j = 0; j < FltCols.Len(); j++) {

         FltCols[j][FreeIndex] = FltCols[j][i];

       }

       for (TInt j = 0; j < StrColMaps.Len(); j++) {

         StrColMaps[j][FreeIndex] = StrColMaps[j][i];

       }


       FreeIndex++;

     } else {

       NumRows--;

       Mapping.Add(TTable::Invalid);

     }

   }


   // should match, or bug somewhere

   Assert (NumValidRows == NumRows);

 }


 void TTable::SelectFirstNRows(const TInt& N) {

   if (N == 0) {

     LastValidRow = -1;

     return;

   }

   TRowIterator RowI = BegRI();

   TInt count = 1;

   while (count < N) {

     if (!(RowI < EndRI())) {

       return; // The table contains less than N rows

     }

     RowI++;

     count++;

   }

   NumValidRows = N;

   TInt LastId = RowI.GetRowIdx();

   if (Next[LastId] == Last) {

     return; // The table contains exactly N rows

   }

   // The table contains more than N rows

   TInt CurrId = LastId;

   while (Next[CurrId] != Last) {

     Assert(Next[CurrId] != Invalid);

     TInt NextId = Next[CurrId];

     Next[CurrId] = Invalid;

     CurrId = NextId;

   }

   Next[LastId] = Last;

   LastValidRow = LastId;

 }


 inline void TTable::CheckAndAddIntNode(PNEANet Graph, THashSet<TInt>& NodeVals, TInt NodeId) {

   if (!NodeVals.IsKey(NodeId)) {

     Graph->AddNode(NodeId);

     NodeVals.AddKey(NodeId);

   }

 }


 inline void TTable::AddEdgeAttributes(PNEANet& Graph, int RowId) {

   for (TInt i = 0; i < EdgeAttrV.Len(); i++) {

     TStr ColName = EdgeAttrV[i];

     TAttrType T = GetColType(ColName);

     TInt Index = GetColIdx(ColName);

     switch (T) {

       case atInt:

         Graph->AddIntAttrDatE(RowId, IntCols[Index][RowId], ColName);

         break;

       case atFlt:

         Graph->AddFltAttrDatE(RowId, FltCols[Index][RowId], ColName);

         break;

       case atStr:

         Graph->AddStrAttrDatE(RowId, GetStrValIdx(Index, RowId), ColName);

         break;

     }

   }

 }


 inline void TTable::AddNodeAttributes(TInt NId, TStrV NodeAttrV, TInt RowId, THash<TInt, TStrIntVH>& NodeIntAttrs,

   THash<TInt, TStrFltVH>& NodeFltAttrs, THash<TInt, TStrStrVH>& NodeStrAttrs) {

   for (TInt i = 0; i < NodeAttrV.Len(); i++) {

     TStr ColAttr = NodeAttrV[i];

     TAttrType CT = GetColType(ColAttr);

     int ColId = GetColIdx(ColAttr);

     // check if this is a common src-dst attribute

     for (TInt i = 0; i < CommonNodeAttrs.Len(); i++) {

       if (CommonNodeAttrs[i].Val1 == ColAttr || CommonNodeAttrs[i].Val2 == ColAttr) {

         ColAttr = CommonNodeAttrs[i].Val3;

         break;

       }

     }

     if (CT == atInt) {

       if (!NodeIntAttrs.IsKey(NId)) { NodeIntAttrs.AddKey(NId); }

       if (!NodeIntAttrs.GetDat(NId).IsKey(ColAttr)) { NodeIntAttrs.GetDat(NId).AddKey(ColAttr); }

       NodeIntAttrs.GetDat(NId).GetDat(ColAttr).Add(IntCols[ColId][RowId]);

     } else if (CT == atFlt) {

       if (!NodeFltAttrs.IsKey(NId)) { NodeFltAttrs.AddKey(NId); }

       if (!NodeFltAttrs.GetDat(NId).IsKey(ColAttr)) { NodeFltAttrs.GetDat(NId).AddKey(ColAttr); }

       NodeFltAttrs.GetDat(NId).GetDat(ColAttr).Add(FltCols[ColId][RowId]);

     } else {

       if (!NodeStrAttrs.IsKey(NId)) { NodeStrAttrs.AddKey(NId); }

       if (!NodeStrAttrs.GetDat(NId).IsKey(ColAttr)) { NodeStrAttrs.GetDat(NId).AddKey(ColAttr); }

       NodeStrAttrs.GetDat(NId).GetDat(ColAttr).Add(GetStrValIdx(ColId, RowId));

     }

   }

 }


 // Makes one pass over all the rows in the vector RowIds, and builds

 // a PNEANet, with each row as an edge between SrcCol and DstCol.

 PNEANet TTable::BuildGraph(const TIntV& RowIds, TAttrAggr AggrPolicy) {

   PNEANet Graph = TNEANet::New();


   const TAttrType NodeType = GetColType(SrcCol);

   Assert(NodeType == GetColType(DstCol));

   const TInt SrcColIdx = GetColIdx(SrcCol);

   const TInt DstColIdx = GetColIdx(DstCol);


   // node values - i.e. the unique values of src/dst col

   //THashSet<TInt> IntNodeVals; // for both int and string node attr types.

   THash<TFlt, TInt> FltNodeVals;


   // node attributes

   THash<TInt, TStrIntVH> NodeIntAttrs;

   THash<TInt, TStrFltVH> NodeFltAttrs;

   THash<TInt, TStrStrVH> NodeStrAttrs;


   // make single pass over all rows in given row id set

   for (TVec<TInt>::TIter it = RowIds.BegI(); it < RowIds.EndI(); it++) {

     TInt CurrRowIdx = *it;


     // add src and dst nodes to graph if they are not seen earlier

     TInt SVal, DVal;

     if (NodeType == atFlt) {

       TFlt FSVal = FltCols[SrcColIdx][CurrRowIdx];

       SVal = CheckAndAddFltNode(Graph, FltNodeVals, FSVal);

       TFlt FDVal = FltCols[SrcColIdx][CurrRowIdx];

       DVal = CheckAndAddFltNode(Graph, FltNodeVals, FDVal);

     } else if (NodeType == atInt || NodeType == atStr) {

       if (NodeType == atInt) {

         SVal = IntCols[SrcColIdx][CurrRowIdx];

         DVal = IntCols[DstColIdx][CurrRowIdx];

       } else {

         SVal = StrColMaps[SrcColIdx][CurrRowIdx];

         if (strlen(Context->StringVals.GetKey(SVal)) == 0) { continue; }  //illegal value

         DVal = StrColMaps[DstColIdx][CurrRowIdx];

         if (strlen(Context->StringVals.GetKey(DVal)) == 0) { continue; }  //illegal value

       }

       if (!Graph->IsNode(SVal)) { Graph->AddNode(SVal); }

       if (!Graph->IsNode(DVal)) { Graph->AddNode(DVal); }

       //CheckAndAddIntNode(Graph, IntNodeVals, SVal);

       //CheckAndAddIntNode(Graph, IntNodeVals, DVal);

     }


     // add edge and edge attributes

     Graph->AddEdge(SVal, DVal, CurrRowIdx);

     if (EdgeAttrV.Len() > 0) { AddEdgeAttributes(Graph, CurrRowIdx); }


     // get src and dst node attributes into hashmaps

     if (SrcNodeAttrV.Len() > 0) {

       AddNodeAttributes(SVal, SrcNodeAttrV, CurrRowIdx, NodeIntAttrs, NodeFltAttrs, NodeStrAttrs);

     }

     if (DstNodeAttrV.Len() > 0) {

       AddNodeAttributes(DVal, DstNodeAttrV, CurrRowIdx, NodeIntAttrs, NodeFltAttrs, NodeStrAttrs);

     }

   }


   // aggregate node attributes and add to graph

   if (SrcNodeAttrV.Len() > 0 || DstNodeAttrV.Len() > 0) {

     for (TNEANet::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {

       TInt NId = NodeI.GetId();

       if (NodeIntAttrs.IsKey(NId)) {

         TStrIntVH IntAttrVals = NodeIntAttrs.GetDat(NId);

         for (TStrIntVH::TIter it = IntAttrVals.BegI(); it < IntAttrVals.EndI(); it++) {

           TInt AttrVal = AggregateVector<TInt>(it.GetDat(), AggrPolicy);

           Graph->AddIntAttrDatN(NId, AttrVal, it.GetKey());

         }

       }

       if (NodeFltAttrs.IsKey(NId)) {

         TStrFltVH FltAttrVals = NodeFltAttrs.GetDat(NId);

         for (TStrFltVH::TIter it = FltAttrVals.BegI(); it < FltAttrVals.EndI(); it++) {

           TFlt AttrVal = AggregateVector<TFlt>(it.GetDat(), AggrPolicy);

           Graph->AddFltAttrDatN(NId, AttrVal, it.GetKey());

         }

       }

       if (NodeStrAttrs.IsKey(NId)) {

         TStrStrVH StrAttrVals = NodeStrAttrs.GetDat(NId);

         for (TStrStrVH::TIter it = StrAttrVals.BegI(); it < StrAttrVals.EndI(); it++) {

           TStr AttrVal = AggregateVector<TStr>(it.GetDat(), AggrPolicy);

           Graph->AddStrAttrDatN(NId, AttrVal, it.GetKey());

         }

       }

     }

   }


   return Graph;

 }


 void TTable::InitRowIdBuckets(int NumBuckets) {

   for (TInt i = 0; i < RowIdBuckets.Len(); i++) {

     RowIdBuckets[i].Clr();

   }

   RowIdBuckets.Clr();


   RowIdBuckets.Gen(NumBuckets);

   for (TInt i = 0; i < NumBuckets; i++) {

     RowIdBuckets[i].Gen(10, 0);

   }

 }


 void TTable::FillBucketsByWindow(TStr SplitAttr, TInt JumpSize, TInt WindowSize, TInt StartVal, TInt EndVal) {

   Assert (JumpSize <= WindowSize);

   int NumBuckets, MinBucket, MaxBucket;

   TInt SplitColId = GetColIdx(SplitAttr);


   if (StartVal == TInt::Mn || EndVal == TInt::Mx) {

     // calculate min and max value of the column 'SplitAttr'

     TInt MinValue = TInt::Mx;

     TInt MaxValue = TInt::Mn;

     for (TInt i = 0; i < Next.Len(); i++) {

       if (Next[i] != Invalid) {

         if (MinValue > IntCols[SplitColId][i]) {

           MinValue = IntCols[SplitColId][i];

         }

         if (MaxValue < IntCols[SplitColId][i]) {

           MaxValue = IntCols[SplitColId][i];

         }

       }

     }


     if (StartVal == TInt::Mn) StartVal = MinValue;

     if (EndVal == TInt::Mx) EndVal = MaxValue;

   }


   // initialize buckets

   NumBuckets = 1;

   if (JumpSize > 0) {

     NumBuckets = (EndVal - StartVal)/JumpSize + 1;

   }


   InitRowIdBuckets(NumBuckets);


   // populate RowIdSets by computing the range of buckets for each row

   for (TInt i = 0; i < Next.Len(); i++) {

     if (Next[i] == Invalid) { continue; }

     int SplitVal = IntCols[SplitColId][i];

     if (SplitVal < StartVal || SplitVal > EndVal) { continue; }

     int RowVal = SplitVal - StartVal;

     if (JumpSize == 0) { // expanding windows

       MinBucket = RowVal/WindowSize;

       MaxBucket = NumBuckets-1;

     } else if (JumpSize == WindowSize) { // disjoint windows

       MinBucket = MaxBucket = RowVal/JumpSize;

     } else { // sliding windows

       if (RowVal < WindowSize) { MinBucket = 0; }

       else { MinBucket = (RowVal-WindowSize)/JumpSize + 1; }

       MaxBucket = RowVal/JumpSize;

     }

     for (TInt j = MinBucket; j <= MaxBucket; j++) { RowIdBuckets[j].Add(i); }

   }

 }


 void TTable::FillBucketsByInterval(TStr SplitAttr, TIntPrV SplitIntervals) {

   TInt SplitColId = GetColIdx(SplitAttr);

   int NumBuckets = SplitIntervals.Len();

   InitRowIdBuckets(NumBuckets);


   // populate RowIdSets by computing the range of buckets for each row

   for (TInt i = 0; i < Next.Len(); i++) {

     if (Next[i] == Invalid) { continue; }

     int SplitVal = IntCols[SplitColId][i];

     for (TInt j = 0; j < SplitIntervals.Len(); j++) {

       if (SplitVal >= SplitIntervals[j].Val1 && SplitVal < SplitIntervals[j].Val2) {

         RowIdBuckets[j].Add(i);

       }

     }

   }

 }


 TVec<PNEANet> TTable::GetGraphsFromSequence(TAttrAggr AggrPolicy) {

   //call BuildGraph on each row id set - parallelizable!

   TVec<PNEANet> GraphSequence;

   for (TInt i = 0; i < RowIdBuckets.Len(); i++) {

     if (RowIdBuckets[i].Len() == 0) { continue; }

     PNEANet PNet = BuildGraph(RowIdBuckets[i], AggrPolicy);

     GraphSequence.Add(PNet);

   }


   return GraphSequence;

 }


 PNEANet TTable::GetFirstGraphFromSequence(TAttrAggr AggrPolicy) {

   CurrBucket = -1;

   this->AggrPolicy = AggrPolicy;

   return GetNextGraphFromSequence();

 }


 PNEANet TTable::GetNextGraphFromSequence() {

   CurrBucket++;

   while (CurrBucket < RowIdBuckets.Len() && RowIdBuckets[CurrBucket].Len() == 0) {

     CurrBucket++;

   }

   if (CurrBucket >= RowIdBuckets.Len()) { return NULL; }

   return BuildGraph(RowIdBuckets[CurrBucket], AggrPolicy);

 }


 // Only integer SplitAttr supported

 // Setting JumpSize = WindowSize will give disjoint windows

 // Setting JumpSize < WindowSize will give sliding windows

 // Setting JumpSize > WindowSize will drop certain rows (currently not supported)

 // Setting JumpSize = 0 will give expanding windows (i.e. starting at 0 and ending at i*WindowSize)

 // To set the range of values of SplitAttr to be considered, use StartVal and EndVal (inclusive)

 // If StartVal == TInt.Mn, then the buckets will start from the min value of SplitAttr in the table.

 // If EndVal == TInt.Mx, then the buckets will end at the max value of SplitAttr in the table.

 TVec<PNEANet> TTable::ToGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal, TInt EndVal) {

   FillBucketsByWindow(SplitAttr, JumpSize, WindowSize, StartVal, EndVal);

   printf("buckets filled\n");

   return GetGraphsFromSequence(AggrPolicy);

 }


 TVec<PNEANet> TTable::ToVarGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals) {

   FillBucketsByInterval(SplitAttr, SplitIntervals);

   return GetGraphsFromSequence(AggrPolicy);

 }


 TVec<PNEANet> TTable::ToGraphPerGroup(TStr GroupAttr, TAttrAggr AggrPolicy) {

   return ToGraphSequence(GroupAttr, AggrPolicy, TInt(1), TInt(1), TInt::Mn, TInt::Mx);

 }


 PNEANet TTable::ToGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal, TInt EndVal) {

   FillBucketsByWindow(SplitAttr, JumpSize, WindowSize, StartVal, EndVal);

   return GetFirstGraphFromSequence(AggrPolicy);

 }


 PNEANet TTable::ToVarGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals) {

   FillBucketsByInterval(SplitAttr, SplitIntervals);

   return GetFirstGraphFromSequence(AggrPolicy);

 }


 PNEANet TTable::ToGraphPerGroupIterator(TStr GroupAttr, TAttrAggr AggrPolicy) {

   return ToGraphSequenceIterator(GroupAttr, AggrPolicy, TInt(1), TInt(1), TInt::Mn, TInt::Mx);

 }


 // calls to this must be preceded by a call to one of the above ToGraph*Iterator functions

 PNEANet TTable::NextGraphIterator() {

   return GetNextGraphFromSequence();

 }


 TBool TTable::IsLastGraphOfSequence() {

   return CurrBucket >= RowIdBuckets.Len() - 1;

 }


 PTable TTable::GetNodeTable(const PNEANet& Network, TTableContext* Context) {

   Schema SR;

   SR.Add(TPair<TStr,TAttrType>("node_id",atInt));


   TStrV IntAttrNames;

   TStrV FltAttrNames;

   TStrV StrAttrNames;


   TNEANet::TNodeI NodeI = Network->BegNI();

   NodeI.GetIntAttrNames(IntAttrNames);

   NodeI.GetFltAttrNames(FltAttrNames);

   NodeI.GetStrAttrNames(StrAttrNames);

   for (TInt i = 0; i < IntAttrNames.Len(); i++) {

     SR.Add(TPair<TStr,TAttrType>(IntAttrNames[i],atInt));

   }

   for (TInt i = 0; i < FltAttrNames.Len(); i++) {

     SR.Add(TPair<TStr,TAttrType>(FltAttrNames[i],atFlt));

   }

   for (TInt i = 0; i < StrAttrNames.Len(); i++) {

     SR.Add(TPair<TStr,TAttrType>(StrAttrNames[i],atStr));

   }


   PTable T = New(SR, Context);


   TInt Cnt = 0;

   // populate table columns

   while (NodeI < Network->EndNI()) {

     T->IntCols[0].Add(NodeI.GetId());

     for (TInt i = 0; i < IntAttrNames.Len(); i++) {

       T->IntCols[i+1].Add(Network->GetIntAttrDatN(NodeI,IntAttrNames[i]));

     }

     for (TInt i = 0; i < FltAttrNames.Len(); i++) {

       T->FltCols[i].Add(Network->GetFltAttrDatN(NodeI,FltAttrNames[i]));

     }

     for (TInt i = 0; i < StrAttrNames.Len(); i++) {

       T->AddStrVal(i, Network->GetStrAttrDatN(NodeI,StrAttrNames[i]));

     }

     Cnt++;

     NodeI++;

   }

   // set number of rows and "Next" vector

   T->NumRows = Cnt;

   T->NumValidRows = T->NumRows;

   T->Next = TIntV(T->NumRows,0);

   for (TInt i = 0; i < T->NumRows-1; i++) {

     T->Next.Add(i+1);

   }

   T->LastValidRow = T->NumRows-1;

   T->Next.Add(Last);

   return T;

 }


 PTable TTable::GetEdgeTable(const PNEANet& Network, TTableContext* Context) {

   Schema SR;

   SR.Add(TPair<TStr,TAttrType>("edg_id",atInt));

   SR.Add(TPair<TStr,TAttrType>("src_id",atInt));

   SR.Add(TPair<TStr,TAttrType>("dst_id",atInt));


   TStrV IntAttrNames;

   TStrV FltAttrNames;

   TStrV StrAttrNames;


   TNEANet::TEdgeI EdgeI = Network->BegEI();

   EdgeI.GetIntAttrNames(IntAttrNames);

   EdgeI.GetFltAttrNames(FltAttrNames);

   EdgeI.GetStrAttrNames(StrAttrNames);

   for (TInt i = 0; i < IntAttrNames.Len(); i++) {

     SR.Add(TPair<TStr,TAttrType>(IntAttrNames[i],atInt));

   }

   for (TInt i = 0; i < FltAttrNames.Len(); i++) {

     SR.Add(TPair<TStr,TAttrType>(FltAttrNames[i],atFlt));

   }

   for (TInt i = 0; i < StrAttrNames.Len(); i++) {

     //printf("%s\n",StrAttrNames[i].CStr());

     SR.Add(TPair<TStr,TAttrType>(StrAttrNames[i],atStr));

   }


   PTable T = New(SR, Context);


   TInt Cnt = 0;

   // populate table columns

   while (EdgeI < Network->EndEI()) {

     T->IntCols[0].Add(EdgeI.GetId());

     T->IntCols[1].Add(EdgeI.GetSrcNId());

     T->IntCols[2].Add(EdgeI.GetDstNId());

     for (TInt i = 0; i < IntAttrNames.Len(); i++) {

       T->IntCols[i+3].Add(Network->GetIntAttrDatE(EdgeI,IntAttrNames[i]));

     }

     for (TInt i = 0; i < FltAttrNames.Len(); i++) {

       T->FltCols[i].Add(Network->GetFltAttrDatE(EdgeI,FltAttrNames[i]));

     }

     for (TInt i = 0; i < StrAttrNames.Len(); i++) {

       T->AddStrVal(i, Network->GetStrAttrDatE(EdgeI,StrAttrNames[i]));

     }

     Cnt++;

     EdgeI++;

   }

   // set number of rows and "Next" vector

   T->NumRows = Cnt;

   T->NumValidRows = T->NumRows;

   T->Next = TIntV(T->NumRows,0);

   for (TInt i = 0; i < T->NumRows-1; i++) {

     T->Next.Add(i+1);

   }

   T->LastValidRow = T->NumRows-1;

   T->Next.Add(Last);

   return T;

 }


 #ifdef GCC_ATOMIC

 PTable TTable::GetEdgeTablePN(const PNGraphMP& Network, TTableContext* Context){

   Schema SR;

   SR.Add(TPair<TStr,TAttrType>("src_id",atInt));

   SR.Add(TPair<TStr,TAttrType>("dst_id",atInt));


   TNGraphMP::TEdgeI FirstEI = Network->BegEI();

   PTable T = New(SR, Context);

   TInt NumEdges = Network->GetEdges();

   TInt NumPartitions = omp_get_max_threads()*CHUNKS_PER_THREAD;

   TInt PartitionSize = NumEdges/NumPartitions;

   if (PartitionSize*NumPartitions < NumEdges) { NumPartitions++;}


   typedef TPair<TNGraphMP::TEdgeI, TNGraphMP::TEdgeI> TEIPr;

   TVec<TEIPr> Partitions;

   TIntV PartitionSizes;

   TNGraphMP::TEdgeI currStart = FirstEI;

   TInt currCount = 0;

   while (FirstEI < Network->EndEI()){

     if (currCount == PartitionSize) {

       Partitions.Add(TEIPr(currStart, FirstEI));

       currStart = FirstEI;

       PartitionSizes.Add(currCount);

       //printf("added: %d\n", currCount.Val);

       currCount = 0;

     }

     //printf("%d\n", currCount.Val);

     FirstEI++;

     currCount++;

   }

   Partitions.Add(TEIPr(currStart, FirstEI));

   PartitionSizes.Add(currCount);


   T->ResizeTable(NumEdges);

   #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)

   for (int p = 0; p < Partitions.Len(); p++) {

     TNGraphMP::TEdgeI EdgeI = Partitions[p].GetVal1();

     TNGraphMP::TEdgeI EndI = Partitions[p].GetVal2();

     //printf("Thread = %d, p = %d, size = %d\n", omp_get_thread_num(), p, PartitionSizes[p].Val);

     int start = T->GetEmptyRowsStart(PartitionSizes[p]);

     while (EdgeI < EndI) {

       T->IntCols[0][start] = EdgeI.GetSrcNId();

       T->IntCols[1][start] = EdgeI.GetDstNId();

       EdgeI++;

       if (EdgeI < EndI) { T->Next[start] = start+1;}

       start++;

     }

   }


   Assert(T->NumRows == NumEdges);

   return T;

 }

 #endif // GCC_ATOMIC


 PTable TTable::GetFltNodePropertyTable(const PNEANet& Network, const TIntFltH& Property,

  const TStr& NodeAttrName, const TAttrType& NodeAttrType, const TStr& PropertyAttrName,

  TTableContext* Context) {

   Schema SR;

   // Determine type of node id

   SR.Add(TPair<TStr,TAttrType>(NodeAttrName,NodeAttrType));

   SR.Add(TPair<TStr,TAttrType>(PropertyAttrName,atFlt));

   PTable T = New(SR, Context);

   TInt NodeColIdx = T->GetColIdx(NodeAttrName);

   TInt Cnt = 0;

   // populate table columns

   for (TNEANet::TNodeI NodeI = Network->BegNI(); NodeI < Network->EndNI(); NodeI++) {

     switch (NodeAttrType) {

       case atInt:

         T->IntCols[NodeColIdx].Add(Network->GetIntAttrDatN(NodeI,NodeAttrName));

         break;

       case atFlt:

         T->FltCols[NodeColIdx].Add(Network->GetFltAttrDatN(NodeI,NodeAttrName));

         break;

       case atStr:

         T->AddStrVal(TInt(0), Network->GetStrAttrDatN(NodeI,NodeAttrName));

         break;

     }

     T->FltCols[0].Add(Property.GetDat(NodeI.GetId()));

     Cnt++;

   }

   // set number of rows and "Next" vector

   T->NumRows = Cnt;

   T->NumValidRows = T->NumRows;

   T->Next = TIntV(T->NumRows,0);

   for (TInt i = 0; i < T->NumRows-1; i++) {

     T->Next.Add(i+1);

   }

   T->LastValidRow = T->NumRows-1;

   T->Next.Add(Last);

   return T;

 }


 /*** Special Filters ***/

 PTable TTable::IsNextK(const TStr& OrderCol, TInt K, const TStr& GroupBy, const TStr& RankColName) {

   TStrV OrderBy;

   if (GroupBy.Empty()) {

     OrderBy.Add(OrderCol);

   } else {

     OrderBy.Add(GroupBy);

     OrderBy.Add(OrderCol);

   }

   if (RankColName.Empty()) {

     Order(OrderBy);

   } else {

     Order(OrderBy, RankColName, true);

   }

   TAttrType GroupByAttrType = GetColType(GroupBy);

   PTable T = InitializeJointTable(*this);

   for (TRowIterator RI = BegRI(); RI < EndRI(); RI++) {

     TInt Succ = RI.GetRowIdx();

     TBool OutOfGroup = false;

     for (TInt i = 0; i < K; i++) {

       Succ = Next[Succ];

       if (Succ == Last) { break; }

       switch (GroupByAttrType) {

         case atInt:

           if (GetIntVal(GroupBy, Succ) != RI.GetIntAttr(GroupBy)) { OutOfGroup = true; }

           break;

         case atFlt:

           if (GetFltVal(GroupBy, Succ) != RI.GetFltAttr(GroupBy)) { OutOfGroup = true; }

           break;

         case atStr:

           if (GetStrVal(GroupBy, Succ) != RI.GetStrAttr(GroupBy)) { OutOfGroup = true; }

           break;

       }

       if (OutOfGroup) { break; }  // break out of inner for loop

       T->AddJointRow(*this, *this, RI.GetRowIdx(), Succ);

     }

   }

   return T;

 }


 void TTable::PrintSize(){

         printf("Total number of rows: %d\n", NumRows.Val);

         printf("Number of valid rows: %d\n", NumValidRows.Val);

         printf("Number of Int columns: %d\n", IntCols.Len());

         printf("Number of Flt columns: %d\n", FltCols.Len());

         printf("Number of Str columns: %d\n", StrColMaps.Len());

         TSize MemUsed = GetMemUsedKB();

         printf("Approximate table size is %s KB\n", TUInt64::GetStr(MemUsed).CStr());

 }


 TSize TTable::GetMemUsedKB() {

   TSize ApproxSize = 0;

   ApproxSize += Next.GetMemUsed()/1000;  // Next vector

   for(int i = 0; i < IntCols.Len(); i++){

         ApproxSize += IntCols[i].GetMemUsed()/1000;

   }

   for(int i = 0; i < FltCols.Len(); i++){

         ApproxSize += FltCols[i].GetMemUsed()/1000;

   }

   for(int i = 0; i < StrColMaps.Len(); i++){

         ApproxSize += StrColMaps[i].GetMemUsed()/1000;

   }

   ApproxSize += RowIdMap.GetMemUsed()/1000;

   ApproxSize += GroupIDMapping.GetMemUsed()/1000;

   ApproxSize += GroupMapping.GetMemUsed()/1000;

   ApproxSize += RowIdBuckets.GetMemUsed() / 1000;

   return ApproxSize;

 }


 void TTable::PrintContextSize(){

         printf("Number of strings in pool: ");

         printf("%d\n", Context->StringVals.Len());

         printf("Number of entries in hash table: ");

         printf("%d\n", Context->StringVals.Reserved());

         TSize MemUsed = GetContextMemUsedKB();

         printf("Approximate context size is %s KB\n",

           TUInt64::GetStr(MemUsed).CStr());

 }


 TSize TTable::GetContextMemUsedKB(){

         TSize ApproxSize = 0;

         ApproxSize += Context->StringVals.GetMemUsed();

         return ApproxSize;

 }


 void TTable::AddTable(const TTable& T) {

   //for (TInt c = 0; c < S.Len(); c++) {

   //  if (S[c] != T.S[c]) { printf("(%s,%d) != (%s,%d)\n", S[c].Val1.CStr(), S[c].Val2, T.S[c].Val1.CStr(), T.S[c].Val2); TExcept::Throw("when adding tables, their schemas must match!"); }

   //}

   for (TInt c = 0; c < Sch.Len(); c++) {

     TStr ColName = GetSchemaColName(c);

     TInt ColIdx = GetColIdx(ColName);

     TInt TColIdx = ColName == IdColName ? T.GetColIdx(T.IdColName) : T.GetColIdx(ColName);

     if (TColIdx < 0) { TExcept::Throw("when adding a table, it must contain all columns of source table!"); }

     switch (GetColType(ColName)) {

     case atInt:

        IntCols[ColIdx].AddV(T.IntCols[TColIdx]);

        break;

     case atFlt:

        FltCols[ColIdx].AddV(T.FltCols[TColIdx]);

        break;

     case atStr:

        StrColMaps[ColIdx].AddV(T.StrColMaps[TColIdx]);

        break;

     }

   }


   TIntV TNext(T.Next);

   for (TInt i = 0; i < TNext.Len(); i++) {

     if (TNext[i] != Last && TNext[i] != Invalid) { TNext[i] += NumRows; }

   }


   Next.AddV(TNext);

   // checks if table is empty

   if (LastValidRow >= 0) {

     Next[LastValidRow] = NumRows + T.FirstValidRow;

   }

   LastValidRow = NumRows + T.LastValidRow;

   NumRows += T.NumRows;

   NumValidRows += T.NumValidRows;

 }


 // returns physical indices of rows of given table present in our table

 // we assume that schema matches exactly (including index of id cols)

 void TTable::GetCollidingRows(const TTable& Table, THashSet<TInt>& Collisions) {

   TIntV UniqueVec;

   THash<TGroupKey, TPair<TInt, TIntV> >Grouping;

   TStrV GroupBy;


   // indices of columns of each type

   TIntV IntGroupByCols;

   TIntV FltGroupByCols;

   TIntV StrGroupByCols;


   TInt IKLen, FKLen, SKLen;


   // check that schemas match

   for (TInt c = 0; c < Sch.Len(); c++) {

     if (Sch[c].Val1 == IdColName) {

       if (Table.Sch[c].Val1 != Table.GetIdColName()) {

         TExcept::Throw("GetCollidingRows: schemas do not match!");

       }

       continue;

     }

     if (Sch[c] != Table.Sch[c]) {

       printf("(%s,%d) != (%s,%d)\n", Sch[c].Val1.CStr(), Sch[c].Val2, Table.Sch[c].Val1.CStr(), Table.Sch[c].Val2);

       TExcept::Throw("GetCollidingRows: schemas do not match!");

     }

     GroupBy.Add(NormalizeColName(Sch[c].Val1));

     TPair<TAttrType, TInt> ColType = Table.GetColTypeMap(Sch[c].Val1);

     switch (ColType.Val1) {

       case atInt:

         IntGroupByCols.Add(ColType.Val2);

         break;

       case atFlt:

         FltGroupByCols.Add(ColType.Val2);

         break;

       case atStr:

         StrGroupByCols.Add(ColType.Val2);

         break;

     }

   }


   IKLen = IntGroupByCols.Len();

   FKLen = FltGroupByCols.Len();

   SKLen = StrGroupByCols.Len();


   // group rows of first table

   GroupAux(GroupBy, Grouping, true, "", false, UniqueVec, true);


   // find colliding rows of second table

   for (TRowIterator it = Table.BegRI(); it < Table.EndRI(); it++) {

     // read keys from row

     TIntV IKey(IKLen + SKLen, 0);

     TFltV FKey(FKLen, 0);


     // find group key

     for (TInt c = 0; c < IKLen; c++) {

       IKey.Add(it.GetIntAttr(IntGroupByCols[c]));

     }

     for (TInt c = 0; c < FKLen; c++) {

       FKey.Add(it.GetFltAttr(FltGroupByCols[c]));

     }

     for (TInt c = 0; c < SKLen; c++) {

       IKey.Add(it.GetStrMapById(StrGroupByCols[c]));

     }

     // look for group matching the key

     TGroupKey GroupKey = TGroupKey(IKey, FKey);


     TInt RowIdx = it.GetRowIdx();

     if (Grouping.IsKey(GroupKey)) {

       // row exists in first table

       Collisions.AddKey(RowIdx);

     }

   }

 }


 void TTable::StoreIntCol(const TStr& ColName, const TIntV& ColVals) {

   if (ColVals.Len() != NumRows) {

     printf("new column dimension must agree with number of rows\n");

     return;

   }

   AddSchemaCol(ColName, atInt);

   IntCols.Add(TIntV(NumRows));

   TInt ColIdx = IntCols.Len()-1;

   TInt i = 0;

   for (TRowIterator RI = BegRI(); RI < EndRI(); RI++) {

     IntCols[ColIdx][RI.GetRowIdx()] = ColVals[i];

     i++;

   }

   TInt L = IntCols.Len();

   AddColType(ColName, atInt, L-1);

 }


 void TTable::StoreFltCol(const TStr& ColName, const TFltV& ColVals) {

   if (ColVals.Len() != NumRows) {

     printf("new column dimension must agree with number of rows\n");

     return;

   }

   AddSchemaCol(ColName, atFlt);

   FltCols.Add(TFltV(NumRows));

   TInt ColIdx = FltCols.Len()-1;

   TInt i = 0;

   for (TRowIterator RI = BegRI(); RI < EndRI(); RI++) {

     FltCols[ColIdx][RI.GetRowIdx()] = ColVals[i];

     i++;

   }

   TInt L = FltCols.Len();

   AddColType(ColName, atFlt, L-1);

 }


 void TTable::StoreStrCol(const TStr& ColName, const TStrV& ColVals) {

   if (ColVals.Len() != NumRows) {

     printf("new column dimension must agree with number of rows\n");

     return;

   }

   AddSchemaCol(ColName, atStr);

   StrColMaps.Add(TIntV(NumRows,0));

   TInt ColIdx = FltCols.Len()-1;

   TInt i = 0;

   for (TRowIterator RI = BegRI(); RI < EndRI(); RI++) {

     TInt Key = Context->StringVals.GetKeyId(ColVals[i]);

     if (Key == -1) { Context->StringVals.AddKey(ColVals[i]); }

     StrColMaps[ColIdx][RI.GetRowIdx()] = Key;

     i++;

   }

   TInt L = StrColMaps.Len();

   AddColType(ColName, atStr, L-1);

 }


 void TTable::UpdateTableForNewRow() {

   if (LastValidRow >= 0) {

     Next[LastValidRow] = NumRows;

   }

   Next.Add(Last);

   LastValidRow = NumRows;


   NumRows++;

   NumValidRows++;

 }


 #ifdef GCC_ATOMIC

 void TTable::SetFltColToConstMP(TInt UpdateColIdx, TFlt DefaultFltVal){

     if(!GetMP()){ TExcept::Throw("Not Using MP!");}

         TIntPrV Partitions;

         GetPartitionRanges(Partitions, omp_get_max_threads()*CHUNKS_PER_THREAD);

         TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;

         #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)

         for (int i = 0; i < Partitions.Len(); i++){

                 TRowIterator RowI(Partitions[i].GetVal1(), this);

                 TRowIterator EndI(Partitions[i].GetVal2(), this);

                 while(RowI < EndI){

                         FltCols[UpdateColIdx][RowI.GetRowIdx()] = DefaultFltVal;

                         RowI++;

                 }

         }

 }


 // OP RS 2016/06/30: this wrapper function is required

 //   for the code to compile on Mac OS X gcc 4.2.1

 int sync_bool_compare_and_swap(int *lock) {

   return(__sync_bool_compare_and_swap(lock, 0, 1));

 }


 void TTable::UpdateFltFromTableMP(const TStr& KeyAttr, const TStr& UpdateAttr,

     const TTable& Table, const TStr& FKeyAttr, const TStr& ReadAttr,

     TFlt DefaultFltVal) {

         if (!GetMP()) {

     TExcept::Throw("Not Using MP!");

   }


   TAttrType KeyType = GetColType(KeyAttr);

   TAttrType FKeyType = Table.GetColType(FKeyAttr);

   if(KeyType != FKeyType){TExcept::Throw("Key Type Mismatch");}

   if(GetColType(UpdateAttr) != atFlt || Table.GetColType(ReadAttr) != atFlt){

     TExcept::Throw("Expecting Float values");

   }

   TStr NKeyAttr = NormalizeColName(KeyAttr);

   //TStr NUpdateAttr = NormalizeColName(UpdateAttr);

   //TStr NFKeyAttr = Table.NormalizeColName(FKeyAttr);

   //TStr NReadAttr = Table.NormalizeColName(ReadAttr);

   TInt UpdateColIdx = GetColIdx(UpdateAttr);

   TInt FKeyColIdx = GetColIdx(FKeyAttr);

   TInt ReadColIdx = GetColIdx(ReadAttr);


   // TODO: this should be a generic vector operation

   SetFltColToConstMP(UpdateColIdx, DefaultFltVal);


         TIntPrV Partitions;

         Table.GetPartitionRanges(Partitions, omp_get_max_threads()*CHUNKS_PER_THREAD);

         TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;

         TIntV Locks(NumRows);

         Locks.PutAll(0);        // need to parallelize this...


   switch (KeyType) {

     // TODO: add support for other cases of KeyType

     case atInt: {

         THashMP<TInt,TIntV> Grouping;

         // must use physical row ids

         GroupByIntColMP(NKeyAttr, Grouping, true);

         #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD) // num_threads(1)

                           for (int i = 0; i < Partitions.Len(); i++) {

                                   TRowIterator RowI(Partitions[i].GetVal1(), &Table);

                                   TRowIterator EndI(Partitions[i].GetVal2(), &Table);

                                   while (RowI < EndI) {

             TInt K = RowI.GetIntAttr(FKeyColIdx);

             if (Grouping.IsKey(K)) {

               TIntV& UpdateRows = Grouping.GetDat(K);

               for (int j = 0; j < UpdateRows.Len(); j++) {

                 int* lock = &Locks[UpdateRows[j]].Val;

                 // OP RS 2016/06/30: needed to define a wrapper function

                 //   for the code to compile on Mac OS X gcc 4.2.1

                 //if (!__sync_bool_compare_and_swap(lock, 0, 1)) {

                 if (!sync_bool_compare_and_swap(lock)) {

                   continue;

                 }

                 //printf("key = %d, row = %d, old_score = %f\n", K.Val, j, UpdateRows[j].Val, FltCols[UpdateColIdx][UpdateRows[j]].Val);

                                                           FltCols[UpdateColIdx][UpdateRows[j]] = RowI.GetFltAttr(ReadColIdx);

                                                           //printf("key = %d, new_score = %f\n", K.Val, j, FltCols[UpdateColIdx][UpdateRows[j]].Val);

               } // end of for loop

             } // end of if statement

             RowI++;

           } // end of while loop

         }       // end of for loop

       } // end of case atInt

       break;

     default:

       break;

   } // end of outer switch statement

 }

 #endif  // GCC_ATOMIC


 void TTable::UpdateFltFromTable(const TStr& KeyAttr, const TStr& UpdateAttr, const TTable& Table,

   const TStr& FKeyAttr, const TStr& ReadAttr, TFlt DefaultFltVal){

   if(!IsColName(KeyAttr)){ TExcept::Throw("Bad KeyAttr parameter");}

   if(!IsColName(UpdateAttr)){ TExcept::Throw("Bad UpdateAttr parameter");}

   if(!Table.IsColName(FKeyAttr)){ TExcept::Throw("Bad FKeyAttr parameter");}

   if(!Table.IsColName(ReadAttr)){ TExcept::Throw("Bad ReadAttr parameter");}


 #ifdef GCC_ATOMIC

   if(GetMP()){

     UpdateFltFromTableMP(KeyAttr, UpdateAttr,Table, FKeyAttr, ReadAttr, DefaultFltVal);

     return;

   }

 #endif  // GCC_ATOMIC


   TAttrType KeyType = GetColType(KeyAttr);

   TAttrType FKeyType = Table.GetColType(FKeyAttr);

   if(KeyType != FKeyType){TExcept::Throw("Key Type Mismatch");}

   if(GetColType(UpdateAttr) != atFlt || Table.GetColType(ReadAttr) != atFlt){

     TExcept::Throw("Expecting Float values");

   }

   TStr NKeyAttr = NormalizeColName(KeyAttr);

   TStr NUpdateAttr = NormalizeColName(UpdateAttr);

   TStr NFKeyAttr = Table.NormalizeColName(FKeyAttr);

   TStr NReadAttr = Table.NormalizeColName(ReadAttr);

   TInt UpdateColIdx = GetColIdx(UpdateAttr);


   for(TRowIterator iter = BegRI(); iter < EndRI(); iter++){

     FltCols[UpdateColIdx][iter.GetRowIdx()] = DefaultFltVal;

   }


   switch(KeyType) {

     // TODO: add support for other cases of KeyType

     case atInt: {

         TIntIntVH Grouping;

         GroupByIntCol(NKeyAttr, Grouping, TIntV(), true, true);

         for (TRowIterator RI = Table.BegRI(); RI < Table.EndRI(); RI++) {

           TInt K = RI.GetIntAttr(NFKeyAttr);

           if (Grouping.IsKey(K)) {

             TIntV& UpdateRows = Grouping.GetDat(K);

             for (int i = 0; i < UpdateRows.Len(); i++) {

               FltCols[UpdateColIdx][UpdateRows[i]] = RI.GetFltAttr(NReadAttr);

             } // end of for loop

           } // end of if statement

         } // end of for loop

       } // end of case atInt

       break;

     default:

       break;

   } // end of outer switch statement

 }


 // can ONLY be called when a table is being initialised (before IDs are allocated)

 void TTable::AddRowI(const TRowIterator& RI) {

   for (TInt c = 0; c < Sch.Len(); c++) {

     TStr ColName = GetSchemaColName(c);

     if (ColName == IdColName) { continue; }


     TInt ColIdx = GetColIdx(ColName);


     switch (GetColType(ColName)) {

     case atInt:

        IntCols[ColIdx].Add(RI.GetIntAttr(ColName));

        break;

     case atFlt:

        FltCols[ColIdx].Add(RI.GetFltAttr(ColName));

        break;

     case atStr:

        StrColMaps[ColIdx].Add(RI.GetStrMapByName(ColName));

        break;

     }

   }

   UpdateTableForNewRow();

 }


 void TTable::AddRowV(const TIntV& IntVals, const TFltV& FltVals, const TStrV& StrVals) {

   for (TInt c = 0; c < IntVals.Len(); c++) {

     IntCols[c].Add(IntVals[c]);

   }

   for (TInt c = 0; c < FltVals.Len(); c++) {

     FltCols[c].Add(FltVals[c]);

   }

   for (TInt c = 0; c < StrVals.Len(); c++) {

     AddStrVal(c, StrVals[c]);

   }

   UpdateTableForNewRow();

 }


 void TTable::ResizeTable(int RowCount) {

   if (RowCount == 0) {

     // initialize empty table

     NumValidRows = 0;

     FirstValidRow = TTable::Invalid;

     LastValidRow = TTable::Invalid;

   }

   if (Next.Len() < RowCount) {

     TInt FltOffset = IntCols.Len();

     TInt StrOffset = FltOffset + FltCols.Len();

     TInt TotalCols = StrOffset + StrColMaps.Len();

 #ifdef USE_OPENMP

     #pragma omp parallel for schedule(static)

 #endif

     for (int i = 0; i < TotalCols+1; i++) {

       if (i < FltOffset) {

         IntCols[i].Reserve(RowCount, RowCount);

       } else if (i < StrOffset) {

         FltCols[i-FltOffset].Reserve(RowCount, RowCount);

       } else if (i < TotalCols) {

         StrColMaps[i-StrOffset].Reserve(RowCount, RowCount);

       } else {

         Next.Reserve(RowCount, RowCount);

       }

     }

   } else if (Next.Len() > RowCount) {

     TInt FltOffset = IntCols.Len();

     TInt StrOffset = FltOffset + FltCols.Len();

     TInt TotalCols = StrOffset + StrColMaps.Len();

 #ifdef USE_OPENMP

     #pragma omp parallel for schedule(static)

 #endif

     for (int i = 0; i < TotalCols+1; i++) {

       if (i < FltOffset) {

         IntCols[i].Trunc(RowCount);

       } else if (i < StrOffset) {

         FltCols[i-FltOffset].Trunc(RowCount);

       } else if (i < TotalCols) {

         StrColMaps[i-StrOffset].Trunc(RowCount);

       } else {

         Next.Trunc(RowCount);

       }

     }

   }

 }


 int TTable::GetEmptyRowsStart(int NewRows) {

   int start = -1;

 #ifdef USE_OPENMP

   #pragma omp critical

   {

 #endif

     start = NumRows;

     NumRows += NewRows;

     NumValidRows += NewRows;

     // To make this function thread-safe, the following call must be done before the

     // code enters parallel region.

     // ResizeTable(NumRows);

     Assert(NumRows <= Next.Len());

     if (LastValidRow >= 0) {Next[LastValidRow] = start;}

     LastValidRow = start+NewRows-1;

     Next[LastValidRow] = Last;

 #ifdef USE_OPENMP

   }

 #endif

   Assert (start >= 0);

   return start;

 }


 void TTable::AddSelectedRows(const TTable& Table, const TIntV& RowIDs) {

   int NewRows = RowIDs.Len();

   if (NewRows == 0) { return; }

   // this call should be thread-safe

   int start = GetEmptyRowsStart(NewRows);

   for (TInt r = 0; r < NewRows; r++) {

     TInt CurrRowIdx = RowIDs[r];

     for (TInt i = 0; i < Table.IntCols.Len(); i++) {

       IntCols[i][start+r] = Table.IntCols[i][CurrRowIdx];

     }

     for (TInt i = 0; i < Table.FltCols.Len(); i++) {

       FltCols[i][start+r] = Table.FltCols[i][CurrRowIdx];

     }

     for (TInt i = 0; i < Table.StrColMaps.Len(); i++) {

       StrColMaps[i][start+r] = Table.StrColMaps[i][CurrRowIdx];

     }

   }

   for (TInt r = 0; r < NewRows-1; r++) {

     Next[start+r] = start+r+1;

   }

 }


 void TTable::AddNRows(int NewRows, const TVec<TIntV>& IntColsP, const TVec<TFltV>& FltColsP, const TVec<TIntV>& StrColMapsP) {

   if (NewRows == 0) { return; }

   // this call should be thread-safe

   int start = GetEmptyRowsStart(NewRows);

   for (TInt r = 0; r < NewRows; r++) {

     for (TInt i = 0; i < IntColsP.Len(); i++) {

       IntCols[i][start+r] = IntColsP[i][r];

     }

     for (TInt i = 0; i < FltColsP.Len(); i++) {

       FltCols[i][start+r] = FltColsP[i][r];

     }

     for (TInt i = 0; i < StrColMapsP.Len(); i++) {

       StrColMaps[i][start+r] = StrColMapsP[i][r];

     }

   }

   for (TInt r = 0; r < NewRows-1; r++) {

     Next[start+r] = start+r+1;

   }

 }


 #ifdef USE_OPENMP

 void TTable::AddNJointRowsMP(const TTable& T1, const TTable& T2, const TVec<TIntPrV>& JointRowIDSet) {

   //double startFn = omp_get_wtime();

   int JointTableSize = 0;

   TIntV StartOffsets(JointRowIDSet.Len());

   for (int i = 0; i < JointRowIDSet.Len(); i++) {

     StartOffsets[i] = JointTableSize;

     JointTableSize += JointRowIDSet[i].Len();

   }

   if (JointTableSize == 0) {

     TExcept::Throw("Joint table is empty");

   }

   //double endOffsets = omp_get_wtime();

   //printf("Offsets time = %f\n",endOffsets-startFn);

   ResizeTable(JointTableSize);

   //double endResize = omp_get_wtime();

   //printf("Resize time = %f\n",endResize-endOffsets);

   NumRows = JointTableSize;

   NumValidRows = JointTableSize;

   Assert(NumRows <= Next.Len());


   TInt IntOffset = T1.IntCols.Len();

   TInt FltOffset = T1.FltCols.Len();

   TInt StrOffset = T1.StrColMaps.Len();


   TInt IdOffset = IntOffset + T2.IntCols.Len();

   RowIdMap.Clr();

   for (TInt IdCnt = 0; IdCnt < JointTableSize; IdCnt++) {

     RowIdMap.AddDat(IdCnt, IdCnt);

   }


   #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)

   for (int j = 0; j < JointRowIDSet.Len(); j++) {

     const TIntPrV& RowIDs = JointRowIDSet[j];

     int start = StartOffsets[j];

     int NewRows = RowIDs.Len();

     if (NewRows == 0) {continue;}

     for (TInt r = 0; r < NewRows; r++){

       TIntPr CurrRowIdPr = RowIDs[r];

       for(TInt i = 0; i < T1.IntCols.Len(); i++){

         IntCols[i][start+r] = T1.IntCols[i][CurrRowIdPr.GetVal1()];

       }

       for(TInt i = 0; i < T1.FltCols.Len(); i++){

         FltCols[i][start+r] = T1.FltCols[i][CurrRowIdPr.GetVal1()];

       }

       for(TInt i = 0; i < T1.StrColMaps.Len(); i++){

         StrColMaps[i][start+r] = T1.StrColMaps[i][CurrRowIdPr.GetVal1()];

       }

       for(TInt i = 0; i < T2.IntCols.Len(); i++){

         IntCols[i+IntOffset][start+r] = T2.IntCols[i][CurrRowIdPr.GetVal2()];

       }

       for(TInt i = 0; i < T2.FltCols.Len(); i++){

         FltCols[i+FltOffset][start+r] = T2.FltCols[i][CurrRowIdPr.GetVal2()];

       }

       for(TInt i = 0; i < T2.StrColMaps.Len(); i++){

         StrColMaps[i+StrOffset][start+r] = T2.StrColMaps[i][CurrRowIdPr.GetVal2()];

       }

       IntCols[IdOffset][start+r] = start+r;

     }

     for(TInt r = 0; r < NewRows; r++){

       Next[start+r] = start+r+1;

     }

   }

   LastValidRow = JointTableSize-1;

   Next[LastValidRow] = Last;

   //double endIterate = omp_get_wtime();

   //printf("Iterate time = %f\n",endIterate-endResize);

 }

 #endif // USE_OPENMP


 PTable TTable::UnionAll(const TTable& Table) {

   Schema NewSchema;

   for (TInt c = 0; c < Sch.Len(); c++) {

     if (Sch[c].Val1 != GetIdColName()) {

       NewSchema.Add(TPair<TStr,TAttrType>(Sch[c].Val1, Sch[c].Val2));

     }

   }

   PTable result = TTable::New(NewSchema, Context);

   result->AddTable(*this);

   result->UnionAllInPlace(Table);

   return result;

 }


 void TTable::UnionAllInPlace(const TTable& Table) {

   AddTable(Table);

   // TODO: For the moment, IDs are not initialized (to avoid having too many ID columns)

   //result->InitIds();

 }


 PTable TTable::Union(const TTable& Table) {

   Schema NewSchema;

   THashSet<TInt> Collisions;

   TStrV ColNames;


   for (TInt c = 0; c < Sch.Len(); c++) {

     if (Sch[c].Val1 != GetIdColName()) {

       NewSchema.Add(TPair<TStr,TAttrType>(Sch[c].Val1, Sch[c].Val2));

       ColNames.Add(Sch[c].Val1);

     }

   }

   PTable result = TTable::New(NewSchema, Context);


   GetCollidingRows(Table, Collisions);


   result->AddTable(*this);


   result->Unique(ColNames);


   // this part should be made faster by adding all the rows in one go

   for (TRowIterator it = Table.BegRI(); it < Table.EndRI(); it++) {

     if (!Collisions.IsKey(it.GetRowIdx())) {

       result->AddRowI(it);

     }

   }


   // printf("this: %d %d, table: %d %d, result: %d %d\n",

   //   this->GetNumRows().Val, this->GetNumValidRows().Val,

   //   Table.GetNumRows().Val, Table.GetNumValidRows().Val,

   //   result->GetNumRows().Val, result->GetNumValidRows().Val);


   result->InitIds();

   return result;

 }


 PTable TTable::Intersection(const TTable& Table) {

   Schema NewSchema;

   THashSet<TInt> Collisions;


   for (TInt c = 0; c < Sch.Len(); c++) {

     if (Sch[c].Val1 != GetIdColName()) {

       NewSchema.Add(TPair<TStr,TAttrType>(Sch[c].Val1, Sch[c].Val2));

     }

   }

   PTable result = TTable::New(NewSchema, Context);


   GetCollidingRows(Table, Collisions);


   // this part should be made faster by adding all the rows in one go

   for (TRowIterator it = Table.BegRI(); it < Table.EndRI(); it++) {

     if (Collisions.IsKey(it.GetRowIdx())) {

       result->AddRowI(it);

     }

   }

   result->InitIds();

   return result;

 }


 // TTable cannot be const because we will eventually call Table->GroupAux

 // as of now, GroupAux cannot be const because it modifies the table in some cases

 PTable TTable::Minus(TTable& Table) {

   Schema NewSchema;

   THashSet<TInt> Collisions;


   for (TInt c = 0; c < Sch.Len(); c++) {

     if (Sch[c].Val1 != GetIdColName()) {

       NewSchema.Add(TPair<TStr,TAttrType>(Sch[c].Val1, Sch[c].Val2));

     }

   }

   PTable result = TTable::New(NewSchema, Context);


   Table.GetCollidingRows(*this, Collisions);


   // this part should be made faster by adding all the rows in one go

   for (TRowIterator it = BegRI(); it < EndRI(); it++) {

     if (!Collisions.IsKey(it.GetRowIdx())) {

       result->AddRowI(it);

     }

   }

   result->InitIds();

   return result;

 }


 PTable TTable::Project(const TStrV& ProjectCols) {

   Schema NewSchema;

   for (TInt c = 0; c < ProjectCols.Len(); c++) {

     if (!IsColName(ProjectCols[c])) { TExcept::Throw("no such column " + ProjectCols[c]); }

     NewSchema.Add(TPair<TStr,TAttrType>(ProjectCols[c], GetColType(ProjectCols[c])));

   }


   PTable result = TTable::New(NewSchema, Context);

   result->AddTable(*this);

   result->InitIds();

   return result;

 }


 TBool TTable::IsAttr(const TStr& Attr) {

   return IsColName(Attr);

 }


 TStr TTable::RenumberColName(const TStr& ColName) const {

   TStr NColName = ColName;

   if (NColName.GetCh(NColName.Len()-2) == '-') {

     NColName = NColName.GetSubStr(0,NColName.Len()-3);

   }

   TInt Conflicts = 0;

   for (TInt i = 0; i < Sch.Len(); i++) {

     if (NColName == Sch[i].Val1.GetSubStr(0, Sch[i].Val1.Len()-3)) {

       Conflicts++;

     }

   }

   Conflicts++;

   NColName = NColName + "-" + Conflicts.GetStr();

   return NColName;

 }


 TStr TTable::DenormalizeColName(const TStr& ColName) const {

   TStr DColName = ColName;

   if (DColName.Len() == 0) { return DColName; }

   if (DColName.GetCh(0) == '_') { return DColName; }

   if (DColName.GetCh(DColName.Len()-2) == '-') {

     DColName = DColName.GetSubStr(0,DColName.Len()-3);

   }

   TInt Conflicts = 0;

   for (TInt i = 0; i < Sch.Len(); i++) {

     if (DColName == Sch[i].Val1.GetSubStr(0, Sch[i].Val1.Len()-3)) {

       Conflicts++;

     }

   }

   if (Conflicts > 1) { return ColName; }

   else { return DColName; }

 }


 Schema TTable::DenormalizeSchema() const {

   Schema DSch;

   for (TInt i = 0; i < Sch.Len(); i++) {

     DSch.Add(TPair<TStr, TAttrType>(DenormalizeColName(Sch[i].Val1), Sch[i].Val2));

   }

   return DSch;

 }


 void TTable::AddIntCol(const TStr& ColName) {

   AddSchemaCol(ColName, atInt);

   IntCols.Add(TIntV(NumRows));

   TInt L = IntCols.Len();

   AddColType(ColName, atInt, L-1);

 }


 void TTable::AddFltCol(const TStr& ColName) {

   AddSchemaCol(ColName, atFlt);

   FltCols.Add(TFltV(NumRows));

   TInt L = FltCols.Len();

   AddColType(ColName, atFlt, L-1);

 }


 void TTable::AddStrCol(const TStr& ColName) {

   AddSchemaCol(ColName, atStr);

   StrColMaps.Add(TIntV(NumRows));

   TInt L = StrColMaps.Len();

   AddColType(ColName, atStr, L-1);

 }


 void TTable::ClassifyAux(const TIntV& SelectedRows, const TStr& LabelName, const TInt& PositiveLabel, const TInt& NegativeLabel) {

   AddSchemaCol(LabelName, atInt);

   TInt LabelColIdx = IntCols.Len();

   AddColType(LabelName, atInt, LabelColIdx);

   IntCols.Add(TIntV(NumRows));

   for (TInt i = 0; i < NumRows; i++) {

     IntCols[LabelColIdx][i] = NegativeLabel;

   }

   for (TInt i = 0; i < SelectedRows.Len(); i++) {

     IntCols[LabelColIdx][SelectedRows[i]] = PositiveLabel;

   }

 }


 #ifdef USE_OPENMP

 void TTable::ColGenericOpMP(TInt ArgColIdx1, TInt ArgColIdx2, TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op){

         TAttrType ResType = atFlt;

         if(ArgType1 == atInt && ArgType2 == atInt){ ResType = atInt;}

         TIntPrV Partitions;

         GetPartitionRanges(Partitions, omp_get_max_threads()*CHUNKS_PER_THREAD);

         TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;

         #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)

         for (int i = 0; i < Partitions.Len(); i++){

                 TRowIterator RowI(Partitions[i].GetVal1(), this);

                 TRowIterator EndI(Partitions[i].GetVal2(), this);

                 while(RowI < EndI){

                         if(ResType == atInt){

                                 TInt V1 = RowI.GetIntAttr(ArgColIdx1);

                                 TInt V2 = RowI.GetIntAttr(ArgColIdx2);

                                 if (op == aoAdd) { IntCols[ResColIdx][RowI.GetRowIdx()] = V1 + V2; }

                         if (op == aoSub) { IntCols[ResColIdx][RowI.GetRowIdx()] = V1 - V2; }

                         if (op == aoMul) { IntCols[ResColIdx][RowI.GetRowIdx()] = V1 * V2; }

                         if (op == aoDiv) { IntCols[ResColIdx][RowI.GetRowIdx()] = V1 / V2; }

                         if (op == aoMod) { IntCols[ResColIdx][RowI.GetRowIdx()] = V1 % V2; }

                         if (op == aoMin) { IntCols[ResColIdx][RowI.GetRowIdx()] = (V1 < V2) ? V1 : V2;}

                         if (op == aoMax) { IntCols[ResColIdx][RowI.GetRowIdx()] = (V1 > V2) ? V1 : V2;}

                         } else{

                             TFlt V1 = (ArgType1 == atInt) ? (TFlt)RowI.GetIntAttr(ArgColIdx1) : RowI.GetFltAttr(ArgColIdx1);

                             TFlt V2 = (ArgType2 == atInt) ? (TFlt)RowI.GetIntAttr(ArgColIdx2) : RowI.GetFltAttr(ArgColIdx2);

                                 if (op == aoAdd) { FltCols[ResColIdx][RowI.GetRowIdx()] = V1 + V2; }

                         if (op == aoSub) { FltCols[ResColIdx][RowI.GetRowIdx()] = V1 - V2; }

                         if (op == aoMul) { FltCols[ResColIdx][RowI.GetRowIdx()] = V1 * V2; }

                         if (op == aoDiv) { FltCols[ResColIdx][RowI.GetRowIdx()] = V1 / V2; }

                         if (op == aoMod) { TExcept::Throw("Cannot find modulo for float columns");  }

                         if (op == aoMin) { FltCols[ResColIdx][RowI.GetRowIdx()] = (V1 < V2) ? V1 : V2;}

                         if (op == aoMax) { FltCols[ResColIdx][RowI.GetRowIdx()] = (V1 > V2) ? V1 : V2;}

                         }

                         RowI++;

                 }

         }

 }

 #endif  // USE_OPENMP


 /* Performs generic operations on two numeric attributes

  * Operation can be +, -, *, /, %, min or max

  * Alternative is to write separate functions for each operation

  * Branch prediction may result in as fast performance anyway ?

  *

  */

 void TTable::ColGenericOp(const TStr& Attr1, const TStr& Attr2, const TStr& ResAttr, TArithOp op) {

   // check if attributes are valid

   if (!IsAttr(Attr1)) TExcept::Throw("No attribute present: " + Attr1);

   if (!IsAttr(Attr2)) TExcept::Throw("No attribute present: " + Attr2);

   TPair<TAttrType, TInt> Info1 = GetColTypeMap(Attr1);

   TPair<TAttrType, TInt> Info2 = GetColTypeMap(Attr2);

   TAttrType Arg1Type = Info1.Val1;

   TAttrType Arg2Type = Info2.Val1;

   if (Arg1Type == atStr || Arg2Type == atStr) {

     TExcept::Throw("Only numeric columns supported in arithmetic operations.");

   }

   if(Arg1Type == atInt && Arg2Type == atFlt && ResAttr == ""){

         TExcept::Throw("Trying to write float values to an existing int-typed column");

   }

   // source column indices

   TInt ColIdx1 = Info1.Val2;

   TInt ColIdx2 = Info2.Val2;


   // destination column index

   TInt ColIdx3 = ColIdx1;

   // Create empty result column with type that of first attribute

   if (ResAttr != "") {

       if (Arg1Type == atInt && Arg2Type == atInt) {

           AddIntCol(ResAttr);

       }

       else {

           AddFltCol(ResAttr);

       }

       ColIdx3 = GetColIdx(ResAttr);

   }

 #ifdef USE_OPENMP

   if(GetMP()){

         ColGenericOpMP(ColIdx1, ColIdx2, Arg1Type, Arg2Type, ColIdx3, op);

         return;

   }

 #endif  //USE_OPENMP

   TAttrType ResType = atFlt;

   if(Arg1Type == atInt && Arg2Type == atInt){ printf("hooray!\n"); ResType = atInt;}

   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

         //printf("%d %d %d %d\n", ColIdx1.Val, ColIdx2.Val, ColIdx3.Val, RowI.GetRowIdx().Val);

                 if(ResType == atInt){

                         TInt V1 = RowI.GetIntAttr(ColIdx1);

                         TInt V2 = RowI.GetIntAttr(ColIdx2);

                         if (op == aoAdd) { IntCols[ColIdx3][RowI.GetRowIdx()] = V1 + V2; }

                 if (op == aoSub) { IntCols[ColIdx3][RowI.GetRowIdx()] = V1 - V2; }

                 if (op == aoMul) { IntCols[ColIdx3][RowI.GetRowIdx()] = V1 * V2; }

                 if (op == aoDiv) { IntCols[ColIdx3][RowI.GetRowIdx()] = V1 / V2; }

                 if (op == aoMod) { IntCols[ColIdx3][RowI.GetRowIdx()] = V1 % V2; }

                 if (op == aoMin) { IntCols[ColIdx3][RowI.GetRowIdx()] = (V1 < V2) ? V1 : V2;}

                 if (op == aoMax) { IntCols[ColIdx3][RowI.GetRowIdx()] = (V1 > V2) ? V1 : V2;}

                 } else{

                         TFlt V1 = (Arg1Type == atInt) ? (TFlt)RowI.GetIntAttr(ColIdx1) : RowI.GetFltAttr(ColIdx1);

                         TFlt V2 = (Arg2Type == atInt) ? (TFlt)RowI.GetIntAttr(ColIdx2) : RowI.GetFltAttr(ColIdx2);

                         if (op == aoAdd) { FltCols[ColIdx3][RowI.GetRowIdx()] = V1 + V2; }

                 if (op == aoSub) { FltCols[ColIdx3][RowI.GetRowIdx()] = V1 - V2; }

                 if (op == aoMul) { FltCols[ColIdx3][RowI.GetRowIdx()] = V1 * V2; }

                 if (op == aoDiv) { FltCols[ColIdx3][RowI.GetRowIdx()] = V1 / V2; }

                 if (op == aoMod) { TExcept::Throw("Cannot find modulo for float columns");  }

                 if (op == aoMin) { FltCols[ColIdx3][RowI.GetRowIdx()] = (V1 < V2) ? V1 : V2;}

                 if (op == aoMax) { FltCols[ColIdx3][RowI.GetRowIdx()] = (V1 > V2) ? V1 : V2;}

                 }

   }

 }


 void TTable::ColAdd(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName) {

   ColGenericOp(Attr1, Attr2, ResultAttrName, aoAdd);

 }


 void TTable::ColSub(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName) {

   ColGenericOp(Attr1, Attr2, ResultAttrName, aoSub);

 }


 void TTable::ColMul(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName) {

   ColGenericOp(Attr1, Attr2, ResultAttrName, aoMul);

 }


 void TTable::ColDiv(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName) {

   ColGenericOp(Attr1, Attr2, ResultAttrName, aoDiv);

 }


 void TTable::ColMod(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName) {

   ColGenericOp(Attr1, Attr2, ResultAttrName, aoMod);

 }


 void TTable::ColMin(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName) {

   ColGenericOp(Attr1, Attr2, ResultAttrName, aoMin);

 }


 void TTable::ColMax(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName) {

   ColGenericOp(Attr1, Attr2, ResultAttrName, aoMax);

 }


 void TTable::ColGenericOp(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr,

  TArithOp op, TBool AddToFirstTable) {

   // check if attributes are valid

   if (!IsAttr(Attr1)) { TExcept::Throw("No attribute present: " + Attr1); }

   if (!Table.IsAttr(Attr2)) { TExcept::Throw("No attribute present: " + Attr2); }


   if (NumValidRows != Table.NumValidRows) {

     TExcept::Throw("Tables do not have equal number of rows");

   }


   TPair<TAttrType, TInt> Info1 = GetColTypeMap(Attr1);

   TPair<TAttrType, TInt> Info2 = Table.GetColTypeMap(Attr2);

   TAttrType Arg1Type = Info1.Val1;

   TAttrType Arg2Type = Info2.Val1;

   if (Info1.Val1 == atStr || Info2.Val1 == atStr) {

     TExcept::Throw("Only numeric columns supported in arithmetic operations.");

   }

   if(Arg1Type == atInt && Arg2Type == atFlt && ResAttr == ""){

         TExcept::Throw("Trying to write float values to an existing int-typed column");

   }

   // source column indices

   TInt ColIdx1 = Info1.Val2;

   TInt ColIdx2 = Info2.Val2;


   // destination column index

   TInt ColIdx3 = AddToFirstTable ? ColIdx1 : ColIdx2;


   // Create empty result column in appropriate table with type that of first attribute

   if (ResAttr != "") {

     if (AddToFirstTable) {

       if (Arg1Type == atInt && Arg2Type == atInt) {

           AddIntCol(ResAttr);

       } else {

           AddFltCol(ResAttr);

       }

       ColIdx3 = GetColIdx(ResAttr);

     }

     else {

       if (Arg1Type == atInt && Arg2Type == atInt) {

           Table.AddIntCol(ResAttr);

       } else {

           Table.AddFltCol(ResAttr);

       }

       ColIdx3 = Table.GetColIdx(ResAttr);

     }

   }


   /*

   #ifdef USE_OPENMP

   if(GetMP()){

         ColGenericOpMP(Table, AddToFirstTable, ColIdx1, ColIdx2, Arg1Type, Arg2Type, ColIdx3, op);

         return;

   }

   #endif        //USE_OPENMP

   */


   TRowIterator RI1, RI2;

   RI1 = BegRI();

   RI2 = Table.BegRI();

   TAttrType ResType = atFlt;

   if(Arg1Type == atInt && Arg2Type == atInt){ ResType = atInt;}

   while (RI1 < EndRI() && RI2 < Table.EndRI()) {

     if (ResType == atInt) {

                 TInt V1 = RI1.GetIntAttr(ColIdx1);

                 TInt V2 = RI2.GetIntAttr(ColIdx2);

         if (AddToFirstTable) {

                 if (op == aoAdd) { IntCols[ColIdx3][RI1.GetRowIdx()] = V1 + V2; }

                 if (op == aoSub) { IntCols[ColIdx3][RI1.GetRowIdx()] = V1 - V2; }

                 if (op == aoMul) { IntCols[ColIdx3][RI1.GetRowIdx()] = V1 * V2; }

                 if (op == aoDiv) { IntCols[ColIdx3][RI1.GetRowIdx()] = V1 / V2; }

                 if (op == aoMod) { IntCols[ColIdx3][RI1.GetRowIdx()] = V1 % V2; }

         }

         else {

                 if (op == aoAdd) { Table.IntCols[ColIdx3][RI2.GetRowIdx()] = V1 + V2; }

                 if (op == aoSub) { Table.IntCols[ColIdx3][RI2.GetRowIdx()] = V1 - V2; }

                 if (op == aoMul) { Table.IntCols[ColIdx3][RI2.GetRowIdx()] = V1 * V2; }

                 if (op == aoDiv) { Table.IntCols[ColIdx3][RI2.GetRowIdx()] = V1 / V2; }

                 if (op == aoMod) { Table.IntCols[ColIdx3][RI2.GetRowIdx()] = V1 % V2; }

         }

     } else {

                 TFlt V1 = (Arg1Type == atInt) ? (TFlt)RI1.GetIntAttr(ColIdx1) : RI2.GetFltAttr(ColIdx1);

                 TFlt V2 = (Arg2Type == atInt) ? (TFlt)RI1.GetIntAttr(ColIdx2) : RI2.GetFltAttr(ColIdx2);

         if (AddToFirstTable) {

                 if (op == aoAdd) { FltCols[ColIdx3][RI1.GetRowIdx()] = V1 + V2; }

                 if (op == aoSub) { FltCols[ColIdx3][RI1.GetRowIdx()] = V1 - V2; }

                 if (op == aoMul) { FltCols[ColIdx3][RI1.GetRowIdx()] = V1 * V2; }

                 if (op == aoDiv) { FltCols[ColIdx3][RI1.GetRowIdx()] = V1 / V2; }

                 if (op == aoMod) { TExcept::Throw("Cannot find modulo for float columns"); }

         } else {

                 if (op == aoAdd) { Table.FltCols[ColIdx3][RI2.GetRowIdx()] = V1 + V2; }

                 if (op == aoSub) { Table.FltCols[ColIdx3][RI2.GetRowIdx()] = V1 - V2; }

                 if (op == aoMul) { Table.FltCols[ColIdx3][RI2.GetRowIdx()] = V1 * V2; }

                 if (op == aoDiv) { Table.FltCols[ColIdx3][RI2.GetRowIdx()] = V1 / V2; }

                 if (op == aoMod) { TExcept::Throw("Cannot find modulo for float columns"); }

         }

     }

     RI1++;

     RI2++;

   }


   if (RI1 != EndRI() || RI2 != Table.EndRI()) {

     TExcept::Throw("ColGenericOp: Iteration error");

   }

 }


 void TTable::ColAdd(const TStr& Attr1, TTable& Table, const TStr& Attr2,

  const TStr& ResultAttrName, TBool AddToFirstTable) {

   ColGenericOp(Attr1, Table, Attr2, ResultAttrName, aoAdd, AddToFirstTable);

 }


 void TTable::ColSub(const TStr& Attr1, TTable& Table, const TStr& Attr2,

  const TStr& ResultAttrName, TBool AddToFirstTable) {

   ColGenericOp(Attr1, Table, Attr2, ResultAttrName, aoSub, AddToFirstTable);

 }


 void TTable::ColMul(const TStr& Attr1, TTable& Table, const TStr& Attr2,

  const TStr& ResultAttrName, TBool AddToFirstTable) {

   ColGenericOp(Attr1, Table, Attr2, ResultAttrName, aoMul, AddToFirstTable);

 }


 void TTable::ColDiv(const TStr& Attr1, TTable& Table, const TStr& Attr2,

  const TStr& ResultAttrName, TBool AddToFirstTable) {

   ColGenericOp(Attr1, Table, Attr2, ResultAttrName, aoDiv, AddToFirstTable);

 }


 void TTable::ColMod(const TStr& Attr1, TTable& Table, const TStr& Attr2,

  const TStr& ResultAttrName, TBool AddToFirstTable) {

   ColGenericOp(Attr1, Table, Attr2, ResultAttrName, aoMod, AddToFirstTable);

 }


 void TTable::ColGenericOp(const TStr& Attr1, const TFlt& Num, const TStr& ResAttr, TArithOp op, const TBool floatCast) {

   // check if attribute is valid

   if (!IsAttr(Attr1)) { TExcept::Throw("No attribute present: " + Attr1); }


   TPair<TAttrType, TInt> Info1 = GetColTypeMap(Attr1);

   TAttrType ArgType = Info1.Val1;

   if (ArgType == atStr) {

     TExcept::Throw("Only numeric columns supported in arithmetic operations.");

   }

   // source column index

   TInt ColIdx1 = Info1.Val2;

   // destination column index

   TInt ColIdx2 = ColIdx1;


   // Create empty result column with type that of first attribute

   TBool shouldCast = floatCast;

   if (ResAttr != "") {

       if ((ArgType == atInt) & !shouldCast) {

           AddIntCol(ResAttr);

       } else {

           AddFltCol(ResAttr);

       }

       ColIdx2 = GetColIdx(ResAttr);

   } else {

     // Cannot change type of existing attribute

     shouldCast = false;

   }


   #ifdef USE_OPENMP

   if(GetMP()){

         ColGenericOpMP(ColIdx1, ColIdx2, ArgType, Num, op, shouldCast);

         return;

   }

   #endif  //USE_OPENMP


   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

     if ((ArgType == atInt) && !shouldCast) {

       TInt CurVal = RowI.GetIntAttr(ColIdx1);

       TInt Val = static_cast<int>(Num);

       if (op == aoAdd) { IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal + Val; }

       if (op == aoSub) { IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal - Val; }

       if (op == aoMul) { IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal * Val; }

       if (op == aoDiv) { IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal / Val; }

       if (op == aoMod) { IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal % Val; }

     }

     else {

       TFlt CurVal = (ArgType == atFlt) ? RowI.GetFltAttr(ColIdx1) : (TFlt) RowI.GetIntAttr(ColIdx1);

       if (op == aoAdd) { FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal + Num; }

       if (op == aoSub) { FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal - Num; }

       if (op == aoMul) { FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal * Num; }

       if (op == aoDiv) { FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal / Num; }

       if (op == aoMod) { TExcept::Throw("Cannot find modulo for float columns"); }

     }

   }

 }


 #ifdef USE_OPENMP

 void TTable::ColGenericOpMP(const TInt& ColIdx1, const TInt& ColIdx2, TAttrType ArgType, const TFlt& Num, TArithOp op, TBool ShouldCast){

         TIntPrV Partitions;

         GetPartitionRanges(Partitions, omp_get_max_threads()*CHUNKS_PER_THREAD);

         TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;

         #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)

         for (int i = 0; i < Partitions.Len(); i++){

                 TRowIterator RowI(Partitions[i].GetVal1(), this);

                 TRowIterator EndI(Partitions[i].GetVal2(), this);

                 while(RowI < EndI){

                         if ((ArgType == atInt) && !ShouldCast) {

                         TInt CurVal = RowI.GetIntAttr(ColIdx1);

                         TInt Val = static_cast<int>(Num);

                         if (op == aoAdd) { IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal + Val; }

                         if (op == aoSub) { IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal - Val; }

                         if (op == aoMul) { IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal * Val; }

                         if (op == aoDiv) { IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal / Val; }

                         if (op == aoMod) { IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal % Val; }

                 } else {

                         TFlt CurVal = (ArgType == atFlt) ? RowI.GetFltAttr(ColIdx1) : (TFlt) RowI.GetIntAttr(ColIdx1);

                         if (op == aoAdd) { FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal + Num; }

                         if (op == aoSub) { FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal - Num; }

                         if (op == aoMul) { FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal * Num; }

                         if (op == aoDiv) { FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal / Num; }

                         if (op == aoMod) { TExcept::Throw("Cannot find modulo for float columns"); }

                 }

                 RowI++;

                 }

         }

 }

 #endif


 void TTable::ColAdd(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName, const TBool floatCast) {

   ColGenericOp(Attr1, Num, ResultAttrName, aoAdd, floatCast);

 }


 void TTable::ColSub(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName, const TBool floatCast) {

   ColGenericOp(Attr1, Num, ResultAttrName, aoSub, floatCast);

 }


 void TTable::ColMul(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName, const TBool floatCast) {

   ColGenericOp(Attr1, Num, ResultAttrName, aoMul, floatCast);

 }


 void TTable::ColDiv(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName, const TBool floatCast) {

   ColGenericOp(Attr1, Num, ResultAttrName, aoDiv, floatCast);

 }


 void TTable::ColMod(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName, const TBool floatCast) {

   ColGenericOp(Attr1, Num, ResultAttrName, aoMod, floatCast);

 }


 void TTable::ColConcat(const TStr& Attr1, const TStr& Attr2, const TStr& Sep, const TStr& ResAttr) {

   // check if attributes are valid

   if (!IsAttr(Attr1)) TExcept::Throw("No attribute present: " + Attr1);

   if (!IsAttr(Attr2)) TExcept::Throw("No attribute present: " + Attr2);


   TPair<TAttrType, TInt> Info1 = GetColTypeMap(Attr1);

   TPair<TAttrType, TInt> Info2 = GetColTypeMap(Attr2);


   if (Info1.Val1 != atStr || Info2.Val1 != atStr) {

     TExcept::Throw("Only string columns supported in concat.");

   }


   // source column indices

   TInt ColIdx1 = Info1.Val2;

   TInt ColIdx2 = Info2.Val2;


   // destination column index

   TInt ColIdx3 = ColIdx1;


   // Create empty result column with type that of first attribute

   if (ResAttr != "") {

       AddStrCol(ResAttr);

       ColIdx3 = GetColIdx(ResAttr);

   }


   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

     TStr CurVal1 = RowI.GetStrAttr(ColIdx1);

     TStr CurVal2 = RowI.GetStrAttr(ColIdx2);

     TStr NewVal = CurVal1 + Sep + CurVal2;

     TInt Key = TInt(Context->StringVals.AddKey(NewVal));

     StrColMaps[ColIdx3][RowI.GetRowIdx()] = Key;

   }

 }


 void TTable::ColConcat(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& Sep,

  const TStr& ResAttr, TBool AddToFirstTable) {

   // check if attributes are valid

   if (!IsAttr(Attr1)) { TExcept::Throw("No attribute present: " + Attr1); }

   if (!Table.IsAttr(Attr2)) { TExcept::Throw("No attribute present: " + Attr2); }


   if (NumValidRows != Table.NumValidRows) {

     TExcept::Throw("Tables do not have equal number of rows");

   }


   TPair<TAttrType, TInt> Info1 = GetColTypeMap(Attr1);

   TPair<TAttrType, TInt> Info2 = Table.GetColTypeMap(Attr2);


   if (Info1.Val1 != atStr || Info2.Val1 != atStr) {

     TExcept::Throw("Only string columns supported in concat.");

   }


   // source column indices

   TInt ColIdx1 = Info1.Val2;

   TInt ColIdx2 = Info2.Val2;


   // destination column index

   TInt ColIdx3 = ColIdx1;


   if (!AddToFirstTable) {

     ColIdx3 = ColIdx2;

   }


   // Create empty result column in appropriate table with type that of first attribute

   if (ResAttr != "") {

     if (AddToFirstTable) {

       AddStrCol(ResAttr);

       ColIdx3 = GetColIdx(ResAttr);

     }

     else {

       Table.AddStrCol(ResAttr);

       ColIdx3 = Table.GetColIdx(ResAttr);

     }

   }


   TRowIterator RI1, RI2;


   RI1 = BegRI();

   RI2 = Table.BegRI();


   while (RI1 < EndRI() && RI2 < Table.EndRI()) {

     TStr CurVal1 = RI1.GetStrAttr(ColIdx1);

     TStr CurVal2 = RI2.GetStrAttr(ColIdx2);

     TStr NewVal = CurVal1 + Sep + CurVal2;

     TInt Key = TInt(Context->StringVals.AddKey(NewVal));

     if (AddToFirstTable) {

       StrColMaps[ColIdx3][RI1.GetRowIdx()] = Key;

     }

     else {

       Table.StrColMaps[ColIdx3][RI2.GetRowIdx()] = Key;

     }

     RI1++;

     RI2++;

   }


   if (RI1 != EndRI() || RI2 != Table.EndRI()) {

     TExcept::Throw("ColGenericOp: Iteration error");

   }

 }


 void TTable::ColConcatConst(const TStr& Attr1, const TStr& Val, const TStr& Sep, const TStr& ResAttr) {

   // check if attribute is valid

   if (!IsAttr(Attr1)) { TExcept::Throw("No attribute present: " + Attr1); }


   TPair<TAttrType, TInt> Info1 = GetColTypeMap(Attr1);


   if (Info1.Val1 != atStr) {

     TExcept::Throw("Only string columns supported in concat.");

   }


   // source column index

   TInt ColIdx1 = Info1.Val2;


   // destination column index

   TInt ColIdx2 = ColIdx1;


   // Create empty result column with type that of first attribute

   if (ResAttr != "") {

     AddStrCol(ResAttr);

     ColIdx2 = GetColIdx(ResAttr);

   }


   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

     TStr CurVal = RowI.GetStrAttr(ColIdx1);

     TStr NewVal = CurVal + Sep + Val;

     TInt Key = TInt(Context->StringVals.AddKey(NewVal));

     StrColMaps[ColIdx2][RowI.GetRowIdx()] = Key;

   }

 }


 void TTable::ReadIntCol(const TStr& ColName, TIntV& Result) const{

   if (!IsColName(ColName)) { TExcept::Throw("no such column " + ColName); }

   if (GetColType(ColName) != atInt) { TExcept::Throw("not an integer column " + ColName); }

   TInt ColId = GetColIdx(ColName);

   for (TRowIterator it = BegRI(); it < EndRI(); it++) {

     Result.Add(it.GetIntAttr(ColId));

   }

 }


 void TTable::ReadFltCol(const TStr& ColName, TFltV& Result) const{

   if (!IsColName(ColName)) { TExcept::Throw("no such column " + ColName); }

   if (GetColType(ColName) != atFlt) { TExcept::Throw("not a floating point column " + ColName); }

   TInt ColId = GetColIdx(ColName);

   for (TRowIterator it = BegRI(); it < EndRI(); it++) {

     Result.Add(it.GetFltAttr(ColId));

   }

 }


 void TTable::ReadStrCol(const TStr& ColName, TStrV& Result) const{

   if (!IsColName(ColName)) { TExcept::Throw("no such column " + ColName); }

   if (GetColType(ColName) != atStr) { TExcept::Throw("not a string column " + ColName); }

   TInt ColId = GetColIdx(ColName);

   for (TRowIterator it = BegRI(); it < EndRI(); it++) {

     Result.Add(it.GetStrAttr(ColId));

   }

 }


 void TTable::ProjectInPlace(const TStrV& ProjectCols) {

   TStrV NProjectCols = NormalizeColNameV(ProjectCols);

   for (TInt c = 0; c < NProjectCols.Len(); c++) {

     if (!IsColName(NProjectCols[c])) { TExcept::Throw("no such column " + NProjectCols[c]); }

   }

   THashSet<TStr> ProjectColsSet = THashSet<TStr>(NProjectCols);

   // Delete the column vectors

   for (TInt i = Sch.Len() - 1; i >= 0; i--) {

     TStr ColName = GetSchemaColName(i);

     if (ProjectColsSet.IsKey(ColName) || ColName == IdColName) { continue; }

     TAttrType ColType = GetSchemaColType(i);

     TInt ColId = GetColIdx(ColName);

     switch (ColType) {

       case atInt:

         IntCols.Del(ColId);

         break;

       case atFlt:

         FltCols.Del(ColId);

         break;

       case atStr:

         StrColMaps.Del(ColId);

         break;

     }

   }


   // Rebuild the ColTypeMap with new indexes of the column vectors

   TInt IntColCnt = 0;

   TInt FltColCnt = 0;

   TInt StrColCnt = 0;

   ColTypeMap.Clr();

   for (TInt i = 0; i < Sch.Len(); i++) {

     TStr ColName = GetSchemaColName(i);

     if (!ProjectColsSet.IsKey(ColName) && ColName != IdColName) { continue; }

     TAttrType ColType = GetSchemaColType(i);

     switch (ColType) {

       case atInt:

         AddColType(ColName, atInt, IntColCnt);

         IntColCnt++;

         break;

       case atFlt:

         AddColType(ColName, atFlt, FltColCnt);

         FltColCnt++;

         break;

       case atStr:

         AddColType(ColName, atStr, StrColCnt);

         StrColCnt++;

         break;

     }

   }


   // Update schema

   for (TInt i = Sch.Len() - 1; i >= 0; i--) {

     TStr ColName = GetSchemaColName(i);

     if (ProjectColsSet.IsKey(ColName) || ColName == IdColName) { continue; }

     Sch.Del(i);

   }

 }


 TInt TTable::CompareKeyVal(const TInt& K1, const TInt& V1, const TInt& K2, const TInt& V2) {

   // if (K1 == K2) {

   //   if (V1 < V2) { return -1; }

   //   else if (V1 > V2) { return 1; }

   //   else return 0;

   // }

   // if (K1 < K2) { return -1; }

   // else { return 1; }


   if (K1 == K2) { return V1 - V2; }

   else { return K1 - K2; }

 }


 TInt TTable::CheckSortedKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End) {

   TInt j;

   for (j = Start; j < End; j++) {

     if (CompareKeyVal(Key[j], Val[j], Key[j+1], Val[j+1]) > 0) {

       break;

     }

   }

   if (j >= End) { return 0; }

   else { return 1; }

 }


 void TTable::ISortKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End) {

   if (Start < End) {

     for (TInt i = Start+1; i <= End; i++) {

       TInt K = Key[i];

       TInt V = Val[i];

       TInt j = i;

       while ((Start < j) && (CompareKeyVal(Key[j-1], Val[j-1], K, V) > 0)) {

         Key[j] = Key[j-1];

         Val[j] = Val[j-1];

         j--;

       }

       Key[j] = K;

       Val[j] = V;

     }

   }

 }


 TInt TTable::GetPivotKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End) {

   TInt L = End - Start + 1;

   const TInt Idx1 = Start + TInt::GetRnd(L);

   const TInt Idx2 = Start + TInt::GetRnd(L);

   const TInt Idx3 = Start + TInt::GetRnd(L);

   if (CompareKeyVal(Key[Idx1], Val[Idx1], Key[Idx2], Val[Idx2]) < 0) {

     if (CompareKeyVal(Key[Idx2], Val[Idx2], Key[Idx3], Val[Idx3]) < 0) { return Idx2; }

     if (CompareKeyVal(Key[Idx1], Val[Idx1], Key[Idx3], Val[Idx3]) < 0) { return Idx3; }

     return Idx1;

   } else {

     if (CompareKeyVal(Key[Idx3], Val[Idx3], Key[Idx2], Val[Idx2]) < 0) { return Idx2; }

     if (CompareKeyVal(Key[Idx3], Val[Idx3], Key[Idx1], Val[Idx1]) < 0) { return Idx3; }

     return Idx1;

   }

 }


 TInt TTable::PartitionKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End) {

   TInt Pivot = GetPivotKeyVal(Key, Val, Start, End);

   //printf("Pivot=%d\n", Pivot.Val);

   TInt PivotKey = Key[Pivot];

   TInt PivotVal = Val[Pivot];

   Key.Swap(Pivot, End);

   Val.Swap(Pivot, End);

   TInt StoreIdx = Start;

   for (TInt i = Start; i < End; i++) {

     //printf("%d %d %d %d\n", Key[i].Val, Val[i].Val, PivotKey.Val, PivotVal.Val);

     if (CompareKeyVal(Key[i], Val[i], PivotKey, PivotVal) <= 0) {

       Key.Swap(i, StoreIdx);

       Val.Swap(i, StoreIdx);

       StoreIdx++;

     }

   }

   //printf("StoreIdx=%d\n", StoreIdx.Val);

   // move pivot value to its place

   Key.Swap(StoreIdx, End);

   Val.Swap(StoreIdx, End);

   return StoreIdx;

 }


 void TTable::QSortKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End) {

   //printf("Thread=%d, Start=%d, End=%d\n", omp_get_thread_num(), Start.Val, End.Val);

   TInt L = End-Start;

   if (L <= 0) { return; }

   if (CheckSortedKeyVal(Key, Val, Start, End) == 0) { return; }


   if (L <= 20) { ISortKeyVal(Key, Val, Start, End); }

   else {

     TInt Pivot = PartitionKeyVal(Key, Val, Start, End);


     if (Pivot > End) { return; }

     if (L <= 500000) {

       QSortKeyVal(Key, Val, Start, Pivot-1);

       QSortKeyVal(Key, Val, Pivot+1, End);

     } else {

 #ifdef USE_OPENMP

 #ifndef GLib_WIN32

       #pragma omp task untied shared(Key, Val)

 #endif

 #endif

       { QSortKeyVal(Key, Val, Start, Pivot-1); }


 #ifdef USE_OPENMP

 #ifndef GLib_WIN32

       #pragma omp task untied shared(Key, Val)

 #endif

 #endif

       { QSortKeyVal(Key, Val, Pivot+1, End); }

     }

   }

 }


 TIntV TTable::GetIntRowIdxByVal(const TStr& ColName, const TInt& Val) const {


   if (IntColIndexes.IsKey(ColName)) {

     THash<TInt, TIntV> ColIndex = IntColIndexes.GetDat(ColName);

     if (ColIndex.IsKey(Val)) {

       return ColIndex.GetDat(Val);

     }

     else {

       TIntV Empty;

       return Empty;

     }

   }

   TIntV ToReturn;

   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

     TInt ValAtRow = RowI.GetIntAttr(ColName);

     if ( Val == ValAtRow) {

       ToReturn.Add(RowI.GetRowIdx());

     }

   }

   return ToReturn;

 }

 TIntV TTable::GetStrRowIdxByMap(const TStr& ColName, const TInt& Map) const {


   if (StrMapColIndexes.IsKey(ColName)) {

     THash<TInt, TIntV> ColIndex = StrMapColIndexes.GetDat(ColName);

     if (ColIndex.IsKey(Map)) {

       return ColIndex.GetDat(Map);

     }

     else {

       TIntV Empty;

       return Empty;

     }

   }

   TIntV ToReturn;

   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

     TInt MapAtRow = RowI.GetStrMapByName(ColName);

     if ( Map == MapAtRow) {

       ToReturn.Add(RowI.GetRowIdx());

     }

   }

   return ToReturn;

 }


 TIntV TTable::GetFltRowIdxByVal(const TStr& ColName, const TFlt& Val) const {


   if (FltColIndexes.IsKey(ColName)) {

     THash<TFlt, TIntV> ColIndex = FltColIndexes.GetDat(ColName);

     if (ColIndex.IsKey(Val)) {

       return ColIndex.GetDat(Val);

     }

     else {

       TIntV Empty;

       return Empty;

     }

   }


   TIntV ToReturn;

   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

     TFlt ValAtRow = RowI.GetFltAttr(ColName);

     if ( Val == ValAtRow) {

       ToReturn.Add(RowI.GetRowIdx());

     }

   }

   return ToReturn;

 }


 TInt TTable::RequestIndexInt(const TStr& ColName) {


   THash<TInt, TIntV> NewIndex;

   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

     TInt ValAtRow = RowI.GetIntAttr(ColName);

     TInt RowIdx = RowI.GetRowIdx();

     if (NewIndex.IsKey(ValAtRow)) {

        TIntV Curr_V = NewIndex.GetDat(ValAtRow);

        Curr_V.Add(RowIdx);

     }

     else {

       TIntV New_V;

       New_V.Add(RowIdx);

       NewIndex.AddDat(ValAtRow, New_V);

     }

   }

   IntColIndexes.AddDat(ColName, NewIndex);

   return 0;

 }

 TInt TTable::RequestIndexFlt(const TStr& ColName) {


   THash<TFlt, TIntV> NewIndex;

   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

     TFlt ValAtRow = RowI.GetFltAttr(ColName);

     TInt RowIdx = RowI.GetRowIdx();

     if (NewIndex.IsKey(ValAtRow)) {

        TIntV Curr_V = NewIndex.GetDat(ValAtRow);

        Curr_V.Add(RowIdx);

     }

     else {

       TIntV New_V;

       New_V.Add(RowIdx);

       NewIndex.AddDat(ValAtRow, New_V);

     }

   }

   FltColIndexes.AddDat(ColName, NewIndex);

   return 0;

 }

 TInt TTable::RequestIndexStrMap(const TStr& ColName) {

   THash<TInt, TIntV> NewIndex;

   for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {

     TInt MapAtRow = RowI.GetStrMapByName(ColName);

     TInt RowIdx = RowI.GetRowIdx();

     if (NewIndex.IsKey(MapAtRow)) {

        TIntV Curr_V = NewIndex.GetDat(MapAtRow);

        Curr_V.Add(RowIdx);

     }

     else {

       TIntV New_V;

       New_V.Add(RowIdx);

       NewIndex.AddDat(MapAtRow, New_V);

     }

   }

   StrMapColIndexes.AddDat(ColName, NewIndex);

   return 0;

 }

aoMod
Definition: table.h:259

TTable::GetMemUsedKB
TSize GetMemUsedKB()
Returns approximate memory used by table in [KB].
Definition: table.cpp:3940

TTable::ThresholdJoinInputCorrectness
void ThresholdJoinInputCorrectness(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2)
Definition: table.cpp:2478

TTable::AddSchemaCol
void AddSchemaCol(const TStr &ColName, TAttrType ColType)
Adds column with name ColName and type ColType to the schema.
Definition: table.h:642

TRowIterator::GetFltAttr
TFlt GetFltAttr(TInt ColIdx) const
Returns value of floating point attribute specified by float column index for current row...
Definition: table.cpp:159

TIntPr
TPair< TInt, TInt > TIntPr
Definition: ds.h:83

TTable::RequestIndexInt
TInt RequestIndexInt(const TStr &ColName)
Creates Index for Int Column ColName.
Definition: table.cpp:5476

aoDiv
Definition: table.h:259

TTable::IsLastGraphOfSequence
TBool IsLastGraphOfSequence()
Checks if the end of the graph sequence is reached.
Definition: table.cpp:3685

TTable::IsAttr
TBool IsAttr(const TStr &Attr)
Checks if Attr is an attribute of this table schema.
Definition: table.cpp:4628

TPredicate::SetFltVal
void SetFltVal(TStr VarName, TFlt VarVal)
Set flt variable value in the predicate or all the children that use it.
Definition: table.h:100

TTable::Order
void Order(const TStrV &OrderBy, TStr OrderColName="", TBool ResetRankByMSC=false, TBool Asc=true)
Orders the rows according to the values in columns of OrderBy (in descending lexicographic order)...
Definition: table.cpp:3240

TTable::FillBucketsByInterval
void FillBucketsByInterval(TStr SplitAttr, TIntPrV SplitIntervals)
Fills RowIdBuckets with sets of row ids.
Definition: table.cpp:3599

TSsParserMP::Next
bool Next()
Loads next line from the input file.
Definition: ssmp.cpp:17

TVec::EndI
TIter EndI() const
Returns an iterator referring to the past-the-end element in the vector.
Definition: ds.h:595

TTable::RemoveRow
void RemoveRow(TInt RowIdx, TInt PrevRowIdx)
Removes row with id RowIdx.
Definition: table.cpp:1135

TStrHash::Reserved
int Reserved() const
Definition: hash.h:843

aoAdd
Definition: table.h:259

TTable::EdgeAttrV
TStrV EdgeAttrV
List of columns (attributes) to serve as edge attributes.
Definition: table.h:591

TInt::GetStr
TStr GetStr() const
Definition: dt.h:1200

TTable::GroupMapping
THash< GroupStmt, THash< TGroupKey, TIntV > > GroupMapping
Maps grouping statements to their (group-by key –> group id) mapping.
Definition: table.h:581

TTable::FirstValidRow
TInt FirstValidRow
Physical index of first valid row.
Definition: table.h:553

TTable::DenormalizeColName
TStr DenormalizeColName(const TStr &ColName) const
Removes suffix to column name if exists.
Definition: table.cpp:4648

TStr::Len
int Len() const
Definition: dt.h:490

THash::GetDatV
void GetDatV(TVec< TDat > &DatV) const
Definition: hash.h:492

TTable::GetPivot
TInt GetPivot(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Gets pivot element for QSort.
Definition: table.cpp:3110

TTable::GetColIdx
TInt GetColIdx(const TStr &ColName) const
Gets index of column ColName among columns of the same type in the schema.
Definition: table.h:1013

TAttrType
enum TAttrType_ TAttrType
Types for tables, sparse and dense attributes.

TSsParserMP::GetStartPosV
TVec< uint64 > GetStartPosV(uint64 Lb, uint64 Ub) const
Finds start positions of all lines ending somewhere in [Lb, Ub)
Definition: ssmp.cpp:106

TTable::StoreGroupCol
void StoreGroupCol(const TStr &GroupColName, const TVec< TPair< TInt, TInt > > &GroupAndRowIds)
Parallel helper function for grouping. - we currently don't support such parallel grouping by complex...
Definition: table.cpp:1310

TTable::Last
static const TInt Last
Special value for Next vector entry - last row in table.
Definition: table.h:486

TTable::UnionAll
PTable UnionAll(const TTable &Table)
Returns union of this table with given Table, preserving duplicates.
Definition: table.cpp:4511

TStrHash::GetMemUsed
::TSize GetMemUsed() const
Definition: hash.h:866

THashKeyDatI
Definition: hash.h:49

TTable::PartitionKeyVal
static TInt PartitionKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5355

TPrimitive
Primitive class: Wrapper around primitive data types.
Definition: table.h:211

TRowIterator::operator==
bool operator==(const TRowIterator &RowI) const
Checks if this iterator points to the same row pointed by RowI.
Definition: table.cpp:147

TTable::GetSrcNodeIntAttrV
TStrV GetSrcNodeIntAttrV() const
Gets src node int attribute name vector.
Definition: table.cpp:1005

TTable::PrintGrouping
void PrintGrouping(const THash< TGroupKey, TIntV > &Grouping) const
Definition: table.cpp:1788

TTable::Sch
Schema Sch
Table Schema.
Definition: table.h:549

TTable::SelectFirstNRows
void SelectFirstNRows(const TInt &N)
Selects first N rows from the table.
Definition: table.cpp:3357

TTable::GetDstNodeStrAttrV
TStrV GetDstNodeStrAttrV() const
Gets dst node str attribute name vector.
Definition: table.cpp:1082

TTriple
Definition: ds.h:130

TVec::Del
void Del(const TSizeTy &ValN)
Removes the element at position ValN.
Definition: ds.h:1189

TTable::GetPartitionRanges
void GetPartitionRanges(TIntPrV &Partitions, TInt NumPartitions) const
Partitions the table into NumPartitions and populate Partitions with the ranges.
Definition: table.cpp:1177

TRowIterator::GetIntAttr
TInt GetIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for current row.
Definition: table.cpp:155

TPredComp
TPredComp
Comparison operators for selection predicates.
Definition: table.h:7

TInt::Val
int Val
Definition: dt.h:1139

TTable::Defrag
void Defrag()
Releases memory of deleted rows, and defrags.
Definition: table.cpp:3311

TTable::ToVarGraphSequenceIterator
PNEANet ToVarGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates the graph sequence one at a time.
Definition: table.cpp:3671

THashMPKeyDatI
Definition: hashmp.h:42

TTable::SaveBin
void SaveBin(const TStr &OutFNm)
Saves table schema and content to a binary file.
Definition: table.cpp:849

TRowIterator::GetStrAttr
TStr GetStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for current row.
Definition: table.cpp:163

TInt::Save
void Save(TSOut &SOut) const
Definition: dt.h:1153

TTable::AddIntCol
void AddIntCol(const TStr &ColName)
Adds an integer column with name ColName.
Definition: table.cpp:4673

TTable::ColTypeMap
THash< TStr, TPair< TAttrType, TInt > > ColTypeMap
Definition: table.h:564

TAtomicPredicate::Rvar
TStr Rvar
Right variable of the comparison op.
Definition: table.h:21

TInt::Mx
static const int Mx
Definition: dt.h:1142

aaMean
Definition: table.h:257

TTable::ThresholdJoinCountCollisions
void ThresholdJoinCountCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntPr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
Definition: table.cpp:2506

TTable::AddGraphAttributeV
void AddGraphAttributeV(TStrV &Attrs, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds vector of names of columns to be used as graph attributes.
Definition: table.cpp:992

TTable::GroupByIntColMP
void GroupByIntColMP(const TStr &GroupBy, THashMP< TInt, TIntV > &Grouping, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values, using OpenMP multi-threading.
Definition: table.cpp:1225

TTable::SetFltColToConstMP
void SetFltColToConstMP(TInt UpdateColIdx, TFlt DefaultFltVal)
Definition: table.cpp:4152

TSsParserMP::GetFlds
int GetFlds() const
Returns the number of fields in the current line.
Definition: ssmp.h:51

TPair::GetVal1
const TVal1 & GetVal1() const
Definition: ds.h:60

TTable::ThresholdJoinCountPerJoinKeyCollisions
void ThresholdJoinCountPerJoinKeyCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntTr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
Definition: table.cpp:2557

TSsParserMP::GetStreamPos
uint64 GetStreamPos() const
Returns position of stream pointer.
Definition: ssmp.h:89

THash::BegI
TIter BegI() const
Definition: hash.h:213

TTable::ColAdd
void ColAdd(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise addition. See TTable::ColGenericOp.
Definition: table.cpp:4816

TArithOp
TArithOp
Possible column-wise arithmetic operations.
Definition: table.h:259

TTable::RequestIndexStrMap
TInt RequestIndexStrMap(const TStr &ColName)
Creates Index for Str Column ColName.
Definition: table.cpp:5514

TFlt::Val
double Val
Definition: dt.h:1388

TFOut
Definition: fl.h:319

TRowIteratorWithRemove::GetNextFltAttr
TFlt GetNextFltAttr(TInt ColIdx) const
Returns value of float attribute specified by float column index for next row.
Definition: table.cpp:252

TVec::Len
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575

aoMul
Definition: table.h:259

TTable::AddSelectedRows
void AddSelectedRows(const TTable &Table, const TIntV &RowIDs)
Adds rows from Table that correspond to ids in RowIDs.
Definition: table.cpp:4399

TStrHash::Len
int Len() const
Definition: hash.h:842

TTable::IdColName
TStr IdColName
A mapping from column name to column type and column index among columns of the same type...
Definition: table.h:565

TPredicate
Predicate - encapsulates comparison operations.
Definition: table.h:82

TRowIterator::CompareAtomicConstTStr
TBool CompareAtomicConstTStr(TInt ColIdx, const TStr &Val, TPredComp Cmp)
Compares value in column ColIdx with given TStr Val.
Definition: table.cpp:208

TTable::SelfSimJoinPerGroup
PTable SelfSimJoinPerGroup(const TStr &GroupAttr, const TStr &SimCol, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
Definition: table.cpp:2094

TTable::NormalizeColNameV
static TStrV NormalizeColNameV(const TStrV &Cols)
Adds suffix to column name if it doesn't exist.
Definition: table.h:539

TTable::CompareKeyVal
static TInt CompareKeyVal(const TInt &K1, const TInt &V1, const TInt &K2, const TInt &V2)
Definition: table.cpp:5297

TTable::StrMapColIndexes
THash< TStr, THash< TInt, TIntV > > StrMapColIndexes
Indexes for String Columns.
Definition: table.h:569

TTable::IntColIndexes
THash< TStr, THash< TInt, TIntV > > IntColIndexes
Indexes for Int Columns.
Definition: table.h:568

TTable::ColConcat
void ColConcat(const TStr &Attr1, const TStr &Attr2, const TStr &Sep="", const TStr &ResAttr="")
Concatenates two string columns.
Definition: table.cpp:5083

THash::Save
void Save(TSOut &SOut) const
Definition: hash.h:183

TTable::GetSrcNodeStrAttrV
TStrV GetSrcNodeStrAttrV() const
Gets src node str attribute name vector.
Definition: table.cpp:1071

TTable::Context
TTableContext * Context
Execution Context.
Definition: table.h:545

TSimType
TSimType
Distance metrics for similarity joins.
Definition: table.h:149

TRowIteratorWithRemove::Start
TBool Start
A flag indicating whether the current row in the first valid row of the table.
Definition: table.h:377

TTable::QSort
void QSort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort on given vector V.
Definition: table.cpp:3154

TAtomicPredicate::Type
TAttrType Type
Type of the predicate variables.
Definition: table.h:17

TPredicateNode::Left
TPredicateNode * Left
Left child of this node.
Definition: table.h:57

TPredicate::IntVars
THash< TStr, TInt > IntVars
Int variables in the current predicate tree.
Definition: table.h:84

TSsParser
Definition: ss.h:72

TTable::InvalidateAffectedGroupings
void InvalidateAffectedGroupings(const TStr &Attr)
Definition: table.cpp:1581

TTable::Dump
void Dump(FILE *OutF=stdout) const
Prints table contents to a text file.
Definition: table.cpp:887

TTable::LastValidRow
TInt LastValidRow
Physical index of last valid row.
Definition: table.h:554

TTable::Group
void Group(const TStrV &GroupBy, const TStr &GroupColName, TBool Ordered=true, TBool UsePhysicalIds=true)
Groups rows depending on values of GroupBy columns.
Definition: table.cpp:1569

TStr::GetSubStr
TStr GetSubStr(const int &BChN, const int &EChN) const
Definition: dt.cpp:811

TTable::ResizeTable
void ResizeTable(int RowCount)
Resizes the table to hold RowCount rows.
Definition: table.cpp:4330

TTable::PrintContextSize
void PrintContextSize()
Definition: table.cpp:3959

TTable::GetMP
static TInt GetMP()
Definition: table.h:527

TAttrAggr
TAttrAggr
Possible policies for aggregating node attributes.
Definition: table.h:257

TTable::ColDiv
void ColDiv(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise division. See TTable::ColGenericOp.
Definition: table.cpp:4828

TShMIn
Definition: fl.h:384

TTable::Rename
void Rename(const TStr &Column, const TStr &NewLabel)
Renames a column.
Definition: table.cpp:1105

TTable::GroupAux
void GroupAux(const TStrV &GroupBy, THash< TGroupKey, TPair< TInt, TIntV > > &Grouping, TBool Ordered, const TStr &GroupColName, TBool KeepUnique, TIntV &UniqueVec, TBool UsePhysicalIds=true)
Helper function for grouping.
Definition: table.cpp:1322

TPair::GetVal2
const TVal2 & GetVal2() const
Definition: ds.h:61

TTriple::Val1
TVal1 Val1
Definition: ds.h:132

TTable::GetEdgeFltAttrV
TStrV GetEdgeFltAttrV() const
Gets edge float attribute name vector.
Definition: table.cpp:1060

L2Norm
Definition: table.h:149

TSsParser::GetInt
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
Definition: ss.cpp:447

TRowIteratorWithRemove::GetNextStrAttr
TStr GetNextStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for next row.
Definition: table.cpp:256

TTableContext
Execution context.
Definition: table.h:180

THash::GetDat
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262

TTable::GenerateColTypeMap
void GenerateColTypeMap(THash< TStr, TPair< TInt, TInt > > &ColTypeIntMap)
Definition: table.cpp:337

TNEANet::TNodeI
Node iterator. Only forward iteration (operator++) is supported.
Definition: network.h:1792

TNEANet::TEdgeI::GetStrAttrNames
void GetStrAttrNames(TStrV &Names) const
Gets vector of str attribute names.
Definition: network.h:1900

THash::EndI
TIter EndI() const
Definition: hash.h:218

TPt::Clr
void Clr()
Definition: bd.h:502

TVec::Load
void Load(TSIn &SIn)
Definition: ds.h:946

TTable::GetSchema
Schema GetSchema()
Gets the schema of this table.
Definition: table.h:1125

TTable::RowIdBuckets
TVec< TIntV > RowIdBuckets
Partitioning of row ids into buckets corresponding to different graph objects when generating a seque...
Definition: table.h:599

TTable::BegRIWR
TRowIteratorWithRemove BegRIWR()
Gets iterator with reomve to the first valid row.
Definition: table.h:1245

TTable::GetNumValidRows
TInt GetNumValidRows() const
Gets number of valid, i.e. not deleted, rows in this table.
Definition: table.h:1234

TTable::BegRI
TRowIterator BegRI() const
Gets iterator to the first valid row of the table.
Definition: table.h:1241

TSsParser::GetFlds
int GetFlds() const
Returns the number of fields in the current line.
Definition: ss.h:116

TTable::ToGraphPerGroupIterator
PNEANet ToGraphPerGroupIterator(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates the graph sequence one at a time.
Definition: table.cpp:3676

TTable::IntCols
TVec< TIntV > IntCols
Next[i] is the successor of row i. Table iterators follow the order dictated by Next ...
Definition: table.h:558

TRowIteratorWithRemove
Iterator class for TTable rows, that allows logical row removal while iterating.
Definition: table.h:374

TVec::GetMemUsed
TSizeTy GetMemUsed() const
Returns the memory footprint (the number of bytes) of the vector.
Definition: ds.h:511

TTable::CheckAndAddIntNode
void CheckAndAddIntNode(PNEANet Graph, THashSet< TInt > &NodeVals, TInt NodeId)
Checks if given NodeId is seen earlier; if not, add it to Graph and hashmap NodeVals.
Definition: table.cpp:3388

TTable::ToGraphSequence
TVec< PNEANet > ToGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates a sequence of graphs based on values of column SplitAttr and windows specified by JumpSize an...
Definition: table.cpp:3651

TTable::GroupByFltCol
void GroupByFltCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with float values. Returns hash table with grouping.
Definition: table.h:1626

TRowIterator::GetStrMapByName
TInt GetStrMapByName(const TStr &Col) const
Returns integer mapping of string attribute specified by attribute name for current row...
Definition: table.cpp:181

TTable::Minus
PTable Minus(TTable &Table)
Returns table with rows that are present in this table but not in given Table.
Definition: table.cpp:4592

THashSet::IsKey
bool IsKey(const TKey &Key) const
Definition: shash.h:1148

TTable::GetNodeTable
static PTable GetNodeTable(const PNEANet &Network, TTableContext *Context)
Extracts node TTable from PNEANet.
Definition: table.cpp:3689

TPredicate::StrVars
THash< TStr, TStr > StrVars
String variables in the current predicate tree.
Definition: table.h:86

TTable::GetStrRowIdxByMap
TIntV GetStrRowIdxByMap(const TStr &ColName, const TInt &Map) const
Gets the rows containing int mapping Map in str column ColName.
Definition: table.cpp:5431

TNEANet::TEdgeI::GetId
int GetId() const
Returns edge ID.
Definition: network.h:1882

THash::LoadShM
void LoadShM(TShMIn &ShMIn)
Load THash from shared memory file. Copying/Deleting Keys is illegal.
Definition: hash.h:157

TTable::GetIdColName
TStr GetIdColName() const
Gets name of the id column of this table.
Definition: table.h:636

TPredicate::EvalStrAtom
static TBool EvalStrAtom(const TStr &Val1, const TStr &Val2, TPredComp Cmp)
Compare atomic string values Val1 and Val2 using predicate Cmp.
Definition: table.h:123

atInt
Definition: gbase.h:23

TRowIteratorWithRemove::TRowIteratorWithRemove
TRowIteratorWithRemove()
Default constructor.
Definition: table.h:380

TTable::LoadSSSeq
static void LoadSSSeq(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Sequentially loads data from input file at InFNm into NewTable.
Definition: table.cpp:669

SUBSTR
Definition: table.h:7

TFlt
Definition: dt.h:1386

TTable::GetStrValIdx
TStr GetStrValIdx(TInt ColIdx, TInt RowIdx) const
Gets the value in column with id ColIdx at row RowIdx.
Definition: table.h:626

TSIn
Definition: fl.h:58

TVec::Save
void Save(TSOut &SOut) const
Definition: ds.h:954

TTable::IncrementNext
void IncrementNext()
Increments the next vector and set last, NumRows and NumValidRows.
Definition: table.cpp:2255

TTable::SimJoin
PTable SimJoin(const TStrV &Cols1, const TTable &Table, const TStrV &Cols2, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
Definition: table.cpp:1994

TVec::Empty
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570

TTable::InitIds
void InitIds()
Adds explicit row ids, initialize hash set mapping ids to physical rows.
Definition: table.cpp:1883

TTable::CommonNodeAttrs
TStrTrV CommonNodeAttrs
List of attribute pairs with values common to source and destination and their common given name...
Definition: table.h:594

TTable::QSortPar
void QSortPar(TIntV &V, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort in parallel on given vector V.
Definition: table.cpp:3206

TTable::Save
void Save(TSOut &SOut)
Saves table schema and content to a binary format.
Definition: table.cpp:854

TNEANet::TEdgeI::GetDstNId
int GetDstNId() const
Returns the destination of the edge.
Definition: network.h:1886

TVec::Swap
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1101

TPredicateNode::Result
TBool Result
Result of evaulating the predicate rooted at this node.
Definition: table.h:54

THashSet
Definition: shash.h:1047

TTable::ReadFltCol
void ReadFltCol(const TStr &ColName, TFltV &Result) const
Reads values of entire float column into Result.
Definition: table.cpp:5221

TTable::InvalidatePhysicalGroupings
void InvalidatePhysicalGroupings()
Definition: table.cpp:1577

THashMP::EndI
TIter EndI() const
Definition: hashmp.h:156

TSsParserMP::SkipCommentLines
void SkipCommentLines()
Skips lines that begin with a comment character.
Definition: ssmp.cpp:12

TGroupKey
TPair< TIntV, TFltV > TGroupKey
Represents grouping key with IntV for integer and string attributes and FltV for float attributes...
Definition: table.h:145

TRowIterator
Iterator class for TTable rows.
Definition: table.h:330

TRowIteratorWithRemove::GetNextRowIdx
TInt GetNextRowIdx() const
Gets physical index of next row.
Definition: table.cpp:243

TTriple::Val2
TVal2 Val2
Definition: ds.h:133

TNEANet::TNodeI::GetId
int GetId() const
Returns ID of the current node.
Definition: network.h:1807

TInt::Mn
static const int Mn
Definition: dt.h:1141

TSsParser::Eof
bool Eof() const
Checks for end of file.
Definition: ss.h:122

TTable::Aggregate
void Aggregate(const TStrV &GroupByAttrs, TAttrAggr AggOp, const TStr &ValAttr, const TStr &ResAttr, TBool Ordered=true)
Aggregates values of ValAttr after grouping with respect to GroupByAttrs. Result are stored as new at...
Definition: table.cpp:1585

TTable::GetSchemaColType
TAttrType GetSchemaColType(TInt Idx) const
Gets type of the column with index Idx in the schema.
Definition: table.h:640

TVec::Clr
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022

TPredicate::SetIntVal
void SetIntVal(TStr VarName, TInt VarVal)
Set int variable value in the predicate or all the children that use it.
Definition: table.h:98

TTable::GetEdgeIntAttrV
TStrV GetEdgeIntAttrV() const
Gets edge int attribute name vector.
Definition: table.cpp:1027

L1Norm
Definition: table.h:149

TTable::TLoadVecInit
Definition: table.h:891

Haversine
Definition: table.h:149

TPredicate::SetStrVal
void SetStrVal(TStr VarName, TStr VarVal)
Set str variable value in the predicate or all the children that use it.
Definition: table.h:102

TRowIteratorWithRemove::RemoveNext
void RemoveNext()
Removes next row.
Definition: table.cpp:278

TAtomicPredicate::StrConst
TStr StrConst
Str const value if this object is a string constant.
Definition: table.h:24

TTable::ToGraphPerGroup
TVec< PNEANet > ToGraphPerGroup(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates a sequence of graphs based on grouping specified by GroupAttr.
Definition: table.cpp:3662

TRowIterator::Table
const TTable * Table
Reference to table containing this row.
Definition: table.h:332

TExcept::Throw
static void Throw(const TStr &MsgStr)
Definition: ut.h:187

TTable::DenormalizeSchema
Schema DenormalizeSchema() const
Removes suffix to column names in the Schema.
Definition: table.cpp:4665

TTable::NextGraphIterator
PNEANet NextGraphIterator()
Calls to this must be preceded by a call to one of the above ToGraph*Iterator functions.
Definition: table.cpp:3681

TVec::PutAll
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1229

uint64
unsigned long long uint64
Definition: bd.h:38

TTable::BuildGraph
PNEANet BuildGraph(const TIntV &RowIds, TAttrAggr AggrPolicy)
Makes a single pass over the rows in the given row id set, and creates nodes, edges, assigns node and edge attributes.
Definition: table.cpp:3445

TPredicate::EvalAtomicPredicate
TBool EvalAtomicPredicate(const TAtomicPredicate &Atom)
Evaluate the give atomic predicate.
Definition: table.cpp:102

TTable::ColSub
void ColSub(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise subtraction. See TTable::ColGenericOp.
Definition: table.cpp:4820

TNGraphMP::TEdgeI::GetSrcNId
int GetSrcNId() const
Gets the source node of an edge.
Definition: graphmp.h:119

TVec::GetDat
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:838

TTable::GetEmptyRowsStart
int GetEmptyRowsStart(int NewRows)
Gets the start index to a chunk of empty rows of size NewRows.
Definition: table.cpp:4376

TTable::PrintSize
void PrintSize()
Definition: table.cpp:3930

TTable::FltColIndexes
THash< TStr, THash< TFlt, TIntV > > FltColIndexes
Indexes for Float Columns.
Definition: table.h:570

TAtomicPredicate::Lvar
TStr Lvar
Left variable of the comparison op.
Definition: table.h:20

TStrHash::GetKey
const char * GetKey(const int &KeyId) const
Definition: hash.h:893

TTable::ProjectInPlace
void ProjectInPlace(const TStrV &ProjectCols)
Keeps only the columns specified in ProjectCols.
Definition: table.cpp:5239

TPrimitive::GetStr
TStr GetStr() const
Definition: table.h:228

TRowIterator::CompareAtomicConst
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
Definition: table.cpp:190

TSize
size_t TSize
Definition: bd.h:58

Assert
#define Assert(Cond)
Definition: bd.h:251

TTable::Reindex
void Reindex()
Reinitializes row ids.
Definition: table.cpp:1889

TTable::CurrBucket
TInt CurrBucket
Current row id bucket - used when generating a sequence of graphs using an iterator.
Definition: table.h:600

TTable::IsNextK
PTable IsNextK(const TStr &OrderCol, TInt K, const TStr &GroupBy, const TStr &RankColName="")
Distance based filter.
Definition: table.cpp:3891

TTable::GetColType
TAttrType GetColType(const TStr &ColName) const
Gets type of column ColName.
Definition: table.h:1227

TTable::StrColMaps
TVec< TIntV > StrColMaps
Data columns of integer mappings of string attributes.
Definition: table.h:560

sync_bool_compare_and_swap
int sync_bool_compare_and_swap(int *lock)
Definition: table.cpp:4170

TRowIteratorWithRemove::Next
TRowIteratorWithRemove & Next()
Increments the iterator (For Python compatibility).
Definition: table.cpp:222

TTable::ToGraphSequenceIterator
PNEANet ToGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates the graph sequence one at a time.
Definition: table.cpp:3666

Jaccard
Definition: table.h:149

TNGraphMP::TEdgeI::GetDstNId
int GetDstNId() const
Gets destination node of an edge.
Definition: graphmp.h:121

THashSet::AddKey
int AddKey(const TKey &Key)
Definition: shash.h:1254

THash::GetMemUsed
::TSize GetMemUsed() const
Definition: hash.h:201

TTable::GroupByIntCol
void GroupByIntCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values.
Definition: table.h:1598

TTable::Join
PTable Join(const TStr &Col1, const TTable &Table, const TStr &Col2)
Performs equijoin.
Definition: table.cpp:2272

THashMP::IsKey
bool IsKey(const TKey &Key) const
Definition: hashmp.h:191

TTable::LoadSSPar
static void LoadSSPar(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Parallelly loads data from input file at InFNm into NewTable. Only work when NewTable has no string c...
Definition: table.cpp:507

TTable::GetIntRowIdxByVal
TIntV GetIntRowIdxByVal(const TStr &ColName, const TInt &Val) const
Gets the rows containing Val in int column ColName.
Definition: table.cpp:5410

TRowIterator::GetRowIdx
TInt GetRowIdx() const
Gets the id of the row pointed by this iterator.
Definition: table.cpp:151

TSsParser::GetFlt
bool GetFlt(const int &FldN, double &Val) const
If the field FldN is a float its value is returned in Val and the function returns true...
Definition: ss.cpp:485

GroupStmt
A class representing a cached grouping statement identifier.
Definition: table.h:266

TTable::GetSchemaColName
TStr GetSchemaColName(TInt Idx) const
Gets name of the column with index Idx in the schema.
Definition: table.h:638

TNEANet::TEdgeI::GetSrcNId
int GetSrcNId() const
Returns the source of the edge.
Definition: network.h:1884

TRowIterator::GetStrMapById
TInt GetStrMapById(TInt ColIdx) const
Returns integer mapping of a string attribute value specified by string column index for current row...
Definition: table.cpp:186

TTable::SrcNodeAttrV
TStrV SrcNodeAttrV
List of columns (attributes) to serve as source node attributes.
Definition: table.h:592

TTable::AggrPolicy
TAttrAggr AggrPolicy
Aggregation policy used for solving conflicts between different values of an attribute of the same no...
Definition: table.h:601

TTable::QSortKeyVal
static void QSortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5378

TTable::Select
void Select(TPredicate &Predicate, TIntV &SelectedRows, TBool Remove=true)
Selects rows that satisfy given Predicate.
Definition: table.cpp:2750

TTable::UnionAllInPlace
void UnionAllInPlace(const TTable &Table)
Same as TTable::ConcatTable.
Definition: table.cpp:4524

TTable::AddRowI
void AddRowI(const TRowIterator &RI)
Adds row corresponding to RI.
Definition: table.cpp:4295

TPrimitive::GetInt
TInt GetInt() const
Definition: table.h:226

TStr::GetCh
char GetCh(const int &ChN) const
Definition: dt.h:486

TTable::RowIdMap
TIntIntH RowIdMap
Mapping of permanent row ids to physical id.
Definition: table.h:566

TTable::SaveSS
void SaveSS(const TStr &OutFNm)
Saves table schema and content to a TSV file.
Definition: table.cpp:800

TTable::Union
PTable Union(const TTable &Table)
Returns union of this table with given Table.
Definition: table.cpp:4531

TTable::SelectAtomicConst
void SelectAtomicConst(const TStr &Col, const TPrimitive &Val, TPredComp Cmp, TIntV &SelectedRows, PTable &SelectedTable, TBool Remove=true, TBool Table=true)
Selects rows where the value of Col matches given primitive Val.
Definition: table.cpp:2873

NOT
Definition: table.h:5

TTable::UpdateFltFromTable
void UpdateFltFromTable(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
Definition: table.cpp:4242

TNGraphMP::TEdgeI
Edge iterator. Only forward iteration (operator++) is supported.
Definition: graphmp.h:102

TTable::ColConcatConst
void ColConcatConst(const TStr &Attr1, const TStr &Val, const TStr &Sep="", const TStr &ResAttr="")
Concatenates column values with given string value.
Definition: table.cpp:5182

TSOut
Definition: fl.h:128

TTable::GetCollidingRows
void GetCollidingRows(const TTable &T, THashSet< TInt > &Collisions)
Gets set of row ids of rows common with table T.
Definition: table.cpp:4014

TTable::AddGraphAttribute
void AddGraphAttribute(const TStr &Attr, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds names of columns to be used as graph attributes.
Definition: table.cpp:985

TTable::KeepSortedRows
void KeepSortedRows(const TIntV &KeepV)
Removes all rows that are not mentioned in the SORTED vector KeepV.
Definition: table.cpp:1152

TTable::GetColTypeMap
TPair< TAttrType, TInt > GetColTypeMap(const TStr &ColName) const
Gets column type and index of ColName.
Definition: table.h:666

TPrimitive::GetType
TAttrType GetType() const
Definition: table.h:229

TIntH
THash< TInt, TInt > TIntH
Definition: hash.h:607

TTable::GroupingSanityCheck
void GroupingSanityCheck(const TStr &GroupBy, const TAttrType &AttrType) const
Checks if grouping key exists and matches given attr type.
Definition: table.cpp:1215

TNEANet::TEdgeI::GetFltAttrNames
void GetFltAttrNames(TStrV &Names) const
Gets vector of flt attribute names.
Definition: network.h:1904

TTableContext::StringVals
TStrHash< TInt, TBigStrPool > StringVals
StringPool - stores string data values and maps them to integers.
Definition: table.h:182

TTable::UpdateTableForNewRow
void UpdateTableForNewRow()
Updates table state after adding one or more rows.
Definition: table.cpp:4140

TVec::SetVal
void SetVal(const TSizeTy &ValN, const TVal &Val)
Sets the value of element at position ValN to Val.
Definition: ds.h:653

TStrHash::AddKey
int AddKey(const char *Key)
Definition: hash.h:968

TTable::UseMP
static TInt UseMP
Global switch for choosing multi-threaded versions of TTable functions.
Definition: table.h:489

TSOut::Flush
virtual void Flush()=0

TAtomicPredicate::Compare
TPredComp Compare
Comparison op represented by this node.
Definition: table.h:19

TTable::DelColType
void DelColType(const TStr &ColName)
Adds column with name ColName and type ColType to the ColTypeMap.
Definition: table.h:661

TInt
Definition: dt.h:1137

TTable::ReadIntCol
void ReadIntCol(const TStr &ColName, TIntV &Result) const
Reads values of entire int column into Result.
Definition: table.cpp:5212

TTable::FillBucketsByWindow
void FillBucketsByWindow(TStr SplitAttr, TInt JumpSize, TInt WindowSize, TInt StartVal, TInt EndVal)
Fills RowIdBuckets with sets of row ids.
Definition: table.cpp:3547

TTable::NormalizeColName
static TStr NormalizeColName(const TStr &ColName)
Adds suffix to column name if it doesn't exist.
Definition: table.h:530

TTable::AddStrCol
void AddStrCol(const TStr &ColName)
Adds a string column with name ColName.
Definition: table.cpp:4687

TTable::GroupStmtNames
THash< TStr, GroupStmt > GroupStmtNames
Maps user-given grouping statement names to their group-by attributes.
Definition: table.h:573

TRowIterator::Next
TRowIterator & Next()
Increments the iterator (For Python compatibility).
Definition: table.cpp:135

TTable::SrcCol
TStr SrcCol
Column (attribute) to serve as src nodes when constructing the graph.
Definition: table.h:589

TNEANet::TNodeI::GetIntAttrNames
void GetIntAttrNames(TStrV &Names) const
Gets vector of int attribute names.
Definition: network.h:1849

TVec::ISort
void ISort(const TSizeTy &MnLValN, const TSizeTy &MxRValN, const bool &Asc)
Insertion sorts the values between positions MnLValN...MxLValN.
Definition: ds.h:1248

TTable::Project
PTable Project(const TStrV &ProjectCols)
Returns table with only the columns in ProjectCols.
Definition: table.cpp:4615

TTable::StoreStrCol
void StoreStrCol(const TStr &ColName, const TStrV &ColVals)
Adds entire str column to table.
Definition: table.cpp:4121

TPredicateNode::Right
TPredicateNode * Right
Definition: table.h:58

TVec::LoadShM
void LoadShM(TShMIn &ShMIn)
Constructs the vector from a shared memory input.
Definition: ds.h:932

TTable::FltCols
TVec< TFltV > FltCols
Data columns of floating point attributes.
Definition: table.h:559

TStrV
TVec< TStr > TStrV
Definition: ds.h:1599

TTable::GetDstNodeFltAttrV
TStrV GetDstNodeFltAttrV() const
Gets dst node float attribute name vector.
Definition: table.cpp:1049

TTable::DstNodeAttrV
TStrV DstNodeAttrV
List of columns (attributes) to serve as destination node attributes.
Definition: table.h:593

TSsParserMP::CountNewLinesInRange
uint64 CountNewLinesInRange(uint64 Lb, uint64 Ub) const
Counts number of occurences of ' ' in [Lb, Ub)
Definition: ssmp.cpp:102

TNEANet::TEdgeI
Edge iterator. Only forward iteration (operator++) is supported.
Definition: network.h:1867

TTable::Next
TIntV Next
A vector describing the logical order of the rows.
Definition: table.h:555

TInt::GetRnd
static int GetRnd(const int &Range=0)
Definition: dt.h:1178

TPair
Definition: ds.h:32

THashMP::Gen
void Gen(const int &ExpectVals)
Definition: hashmp.h:160

THash::AddKey
int AddKey(const TKey &Key)
Definition: hash.h:373

TTable::EndRI
TRowIterator EndRI() const
Gets iterator to the last valid row of the table.
Definition: table.h:1243

TTable::AddStrVal
void AddStrVal(const TInt &ColIdx, const TStr &Val)
Adds Val in column with id ColIdx.
Definition: table.cpp:971

TRowIteratorWithRemove::Table
TTable * Table
Reference to table containing this row.
Definition: table.h:376

TSsParserMP::GetIntFromFldV
int GetIntFromFldV(TVec< char * > &FieldsV, const int &FldN)
Gets integer at field FldN.
Definition: ssmp.cpp:152

TSsParserMP::NextFromIndex
void NextFromIndex(uint64 Index, TVec< char * > &FieldsV)
Loads next line starting from a given position.
Definition: ssmp.cpp:128

TTable::NumRows
TInt NumRows
Number of rows in the table (valid and invalid).
Definition: table.h:551

TTable::GetFltVal
TFlt GetFltVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of float attribute ColName at row RowIdx.
Definition: table.h:1024

TTable::LoadSS
static PTable LoadSS(const Schema &S, const TStr &InFNm, TTableContext *Context, const char &Separator= '\t', TBool HasTitleLine=false)
Loads table from spread sheet (TSV, CSV, etc). Note: HasTitleLine = true is not supported. Please comment title lines instead.
Definition: table.cpp:795

TFltV
TVec< TFlt > TFltV
Definition: ds.h:1596

TTable::GetStrVal
TStr GetStrVal(const TStr &ColName, const TInt &RowIdx) const
Gets the value of string attribute ColName at row RowIdx.
Definition: table.h:1028

TTable::Unique
void Unique(const TStr &Col)
Removes rows with duplicate values in given column.
Definition: table.cpp:1266

TUInt64::GetStr
TStr GetStr() const
Definition: dt.h:1363

TRowIteratorWithRemove::operator++
TRowIteratorWithRemove & operator++(int)
Increments the iterator.
Definition: table.cpp:218

TTable::AddJointRow
void AddJointRow(const TTable &T1, const TTable &T2, TInt RowIdx1, TInt RowIdx2)
Adds joint row T1[RowIdx1]<=>T2[RowIdx2].
Definition: table.cpp:1957

TTable::Classify
void Classify(TPredicate &Predicate, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Definition: table.cpp:2805

TTable::Merge
void Merge(TIntV &V, TInt Idx1, TInt Idx2, TInt Idx3, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Helper function for parallel QSort.
Definition: table.cpp:3178

TTable::DstCol
TStr DstCol
Column (attribute) to serve as dst nodes when constructing the graph.
Definition: table.h:590

THashMP::BegI
TIter BegI() const
Definition: hashmp.h:153

TTable::ReadStrCol
void ReadStrCol(const TStr &ColName, TStrV &Result) const
Reads values of entire string column into Result.
Definition: table.cpp:5230

int64
long long int64
Definition: bd.h:27

THash::GetKeyV
void GetKeyV(TVec< TKey > &KeyV) const
Definition: hash.h:484

TTable::GetEdgeTable
static PTable GetEdgeTable(const PNEANet &Network, TTableContext *Context)
Extracts edge TTable from PNEANet.
Definition: table.cpp:3741

TTable::Invalid
static const TInt Invalid
Special value for Next vector entry - logically removed row.
Definition: table.h:487

TTable::AddColType
void AddColType(const TStr &ColName, TPair< TAttrType, TInt > ColType)
Adds column with name ColName and type ColType to the ColTypeMap.
Definition: table.h:651

TStr
Definition: dt.h:412

TTable::GetNextGraphFromSequence
PNEANet GetNextGraphFromSequence()
Returns the next graph in sequence corresponding to RowIdBuckets.
Definition: table.cpp:3634

TStr::Empty
bool Empty() const
Definition: dt.h:491

TRowIteratorWithRemove::CompareAtomicConst
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
Definition: table.cpp:282

TTable::StoreFltCol
void StoreFltCol(const TStr &ColName, const TFltV &ColVals)
Adds entire flt column to table.
Definition: table.cpp:4104

TTable::GroupIDMapping
THash< GroupStmt, THash< TInt, TGroupKey > > GroupIDMapping
Maps grouping statements to their (group id –> group-by key) mapping.
Definition: table.h:577

TAtomicPredicate::IntConst
TInt IntConst
Int const value if this object is an integer constant.
Definition: table.h:22

TVec::BegI
TIter BegI() const
Returns an iterator pointing to the first element in the vector.
Definition: ds.h:593

TPredicateNode::Op
TPredOp Op
Logical op represented by this node.
Definition: table.h:53

TTable::LoadTableShM
void LoadTableShM(TShMIn &ShMIn, TTableContext *ContextTable)
Definition: table.cpp:360

TTable::GroupByStrCol
void GroupByStrCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with string values. Returns hash table with grouping.
Definition: table.h:1653

TTable::ChangeContext
TTableContext * ChangeContext(TTableContext *Context)
Changes the current context. Moves all object items to the new context.
Definition: table.cpp:921

THash
Definition: hash.h:97

TRowIteratorWithRemove::CurrRowIdx
TInt CurrRowIdx
Physical row index of current row pointer by iterator.
Definition: table.h:375

TPredicate::Root
TPredicateNode * Root
Rood node of the current predicate tree.
Definition: table.h:87

atFlt
Definition: gbase.h:23

aoMax
Definition: table.h:259

TTable::AggregateCols
void AggregateCols(const TStrV &AggrAttrs, TAttrAggr AggOp, const TStr &ResAttr)
Aggregates attributes in AggrAttrs across columns.
Definition: table.cpp:1750

TRowIteratorWithRemove::operator==
bool operator==(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to the same row pointed by RowI.
Definition: table.cpp:235

TTable
Table class: Relational table with columnar data storage.
Definition: table.h:484

TRowIterator::operator<
bool operator<(const TRowIterator &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
Definition: table.cpp:141

TSsParserMP::SetStreamPos
void SetStreamPos(uint64 Pos)
Sets position of stream pointer.
Definition: ssmp.h:97

TTable::UpdateFltFromTableMP
void UpdateFltFromTableMP(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
Definition: table.cpp:4174

TTable::GetEdgeTablePN
static PTable GetEdgeTablePN(const PNGraphMP &Network, TTableContext *Context)
Extracts edge TTable from parallel graph PNGraphMP.
Definition: table.cpp:3799

TTable::ISort
void ISort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs insertion sort on given vector V.
Definition: table.cpp:3096

TRowIteratorWithRemove::GetRowIdx
TInt GetRowIdx() const
Gets physical index of current row.
Definition: table.cpp:239

TTable::RequestIndexFlt
TInt RequestIndexFlt(const TStr &ColName)
Creates Index for Flt Column ColName.
Definition: table.cpp:5495

TPredicate::EvalAtom
static TBool EvalAtom(T Val1, T Val2, TPredComp Cmp)
Compare atomic values Val1 and Val2 using predicate Cmp.
Definition: table.h:110

TRowIteratorWithRemove::operator<
bool operator<(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
Definition: table.cpp:229

TTable::InitRowIdBuckets
void InitRowIdBuckets(int NumBuckets)
Initializes the RowIdBuckets vector which will be used for the graph sequence creation.
Definition: table.cpp:3535

TTable::GetSrcNodeFltAttrV
TStrV GetSrcNodeFltAttrV() const
Gets src node float attribute name vector.
Definition: table.cpp:1038

TTable::GetFltNodePropertyTable
static PTable GetFltNodePropertyTable(const PNEANet &Network, const TIntFltH &Property, const TStr &NodeAttrName, const TAttrType &NodeAttrType, const TStr &PropertyAttrName, TTableContext *Context)
Extracts node and edge property TTables from THash.
Definition: table.cpp:3852

THashMP
Hash-Table with multiprocessing support.
Definition: hashmp.h:81

TTable::ThresholdJoinPerJoinKeyOutputTable
PTable ThresholdJoinPerJoinKeyOutputTable(const THash< TIntTr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
Definition: table.cpp:2622

TPair::Val1
TVal1 Val1
Definition: ds.h:34

TTable::ThresholdJoin
PTable ThresholdJoin(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2, TInt Threshold, TBool PerJoinKey=false)
Definition: table.cpp:2644

TTable::ISortKeyVal
static void ISortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5321

TAtomicPredicate::IsConst
TBool IsConst
Flag if this atomic node represents a constant value.
Definition: table.h:18

TRowIterator::CurrRowIdx
TInt CurrRowIdx
Physical row index of current row pointed by iterator.
Definition: table.h:331

TPair::Val2
TVal2 Val2
Definition: ds.h:35

TIntV
TVec< TInt > TIntV
Definition: ds.h:1594

TTable::GetPivotKeyVal
static TInt GetPivotKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5338

THash::Clr
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361

SUPERSTR
Definition: table.h:7

TSsParser::Next
bool Next()
Loads next line from the input file.
Definition: ss.cpp:412

TPt
Definition: bd.h:196

TTable::IsNextDirty
TInt IsNextDirty
Flag to signify whether the rows are stored in logical sequence or reordered. Used for optimizing Get...
Definition: table.h:603

TTable::GetEdgeStrAttrV
TStrV GetEdgeStrAttrV() const
Gets edge str attribute name vector.
Definition: table.cpp:1094

OR
Definition: table.h:5

TTable::AddFltCol
void AddFltCol(const TStr &ColName)
Adds a float column with name ColName.
Definition: table.cpp:4680

TTable::CompareRows
TInt CompareRows(TInt R1, TInt R2, const TAttrType &CompareByType, const TInt &CompareByIndex, TBool Asc=true)
Returns positive value if R1 is bigger, negative value if R2 is bigger, and 0 if they are equal (strc...
Definition: table.cpp:3064

TTable::RenumberColName
TStr RenumberColName(const TStr &ColName) const
Returns a re-numbered column name based on number of existing columns with conflicting names...
Definition: table.cpp:4632

TIntTr
TTriple< TInt, TInt, TInt > TIntTr
Definition: ds.h:171

TTable::NumValidRows
TInt NumValidRows
Number of valid rows in the table (i.e. rows that were not logically removed).
Definition: table.h:552

TTable::TTable
TTable()
Definition: table.cpp:302

TVec::Gen
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523

TTable::ThresholdJoinOutputTable
PTable ThresholdJoinOutputTable(const THash< TIntPr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
Definition: table.cpp:2608

TTable::Count
void Count(const TStr &CountColName, const TStr &Col)
Counts number of unique elements.
Definition: table.cpp:1802

TTable::InitializeJointTable
PTable InitializeJointTable(const TTable &Table)
Initializes an empty table for the join of this table with the given table.
Definition: table.cpp:1916

TTable::ColMax
void ColMax(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs max of two columns. See TTable::ColGenericOp.
Definition: table.cpp:4840

TVec::Reserve
void Reserve(const TSizeTy &_MxVals)
Reserves enough memory for the vector to store _MxVals elements.
Definition: ds.h:543

TTable::ClassifyAtomic
void ClassifyAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Definition: table.cpp:2866

Cmp
bool Cmp(const int &RelOp, const TRec &Rec1, const TRec &Rec2)
Definition: bd.h:426

TTable::StoreIntCol
void StoreIntCol(const TStr &ColName, const TIntV &ColVals)
Adds entire int column to table.
Definition: table.cpp:4087

TTable::AddIdColumn
void AddIdColumn(const TStr &IdColName)
Adds a column of explicit integer identifiers to the rows.
Definition: table.cpp:1900

TPredicateNode::GetVariables
void GetVariables(TStrV &Variables)
Get variables in the predicate tree rooted at this node.
Definition: table.cpp:1

TTable::CheckSortedKeyVal
static TInt CheckSortedKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5310

TTable::AddEdgeAttributes
void AddEdgeAttributes(PNEANet &Graph, int RowId)
Adds attributes of edge corresponding to RowId to the Graph.
Definition: table.cpp:3395

TNEANet::TEdgeI::GetIntAttrNames
void GetIntAttrNames(TStrV &Names) const
Gets vector of int attribute names.
Definition: network.h:1892

NOP
Definition: table.h:5

atStr
Definition: gbase.h:23

TTable::ToVarGraphSequence
TVec< PNEANet > ToVarGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates a sequence of graphs based on values of column SplitAttr and intervals specified by SplitInte...
Definition: table.cpp:3657

TStr::CStr
char * CStr()
Definition: dt.h:479

TRowIteratorWithRemove::GetNextIntAttr
TInt GetNextIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for next row.
Definition: table.cpp:248

TTable::ColGenericOp
void ColGenericOp(const TStr &Attr1, const TStr &Attr2, const TStr &ResAttr, TArithOp op)
Performs columnwise arithmetic operation.
Definition: table.cpp:4752

TTable::SelectAtomic
void SelectAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, TIntV &SelectedRows, TBool Remove=true)
Selects rows using atomic compare operation.
Definition: table.cpp:2813

TRowIterator::operator++
TRowIterator & operator++(int)
Increments the iterator.
Definition: table.cpp:131

THash::IsKey
bool IsKey(const TKey &Key) const
Definition: hash.h:258

TPredicate::GetVariables
void GetVariables(TStrV &Variables)
Get variables in current predicate.
Definition: table.cpp:10

TSsParser::IsInt
bool IsInt(const int &FldN) const
Checks whether fields FldN is an integer.
Definition: ss.h:143

TVec::Add
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602

TTable::ColMin
void ColMin(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs min of two columns. See TTable::ColGenericOp.
Definition: table.cpp:4836

TBool
Definition: dt.h:974

TTable::ColMod
void ColMod(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise modulus. See TTable::ColGenericOp.
Definition: table.cpp:4832

TNEANet::New
static PNEANet New()
Static cons returns pointer to graph. Ex: PNEANet Graph=TNEANet::New().
Definition: network.h:2226

TNEANet::TNodeI::GetFltAttrNames
void GetFltAttrNames(TStrV &Names) const
Gets vector of flt attribute names.
Definition: network.h:1861

TTable::RemoveFirstRow
void RemoveFirstRow()
Removes first valid row of the table.
Definition: table.cpp:1122

TStr::IsStrIn
bool IsStrIn(const TStr &Str) const
Definition: dt.h:557

TRowIteratorWithRemove::IsFirst
TBool IsFirst() const
Checks whether iterator points to first valid row of the table.
Definition: table.cpp:274

TVec::Trunc
void Trunc(const TSizeTy &_Vals=-1)
Truncates the vector's length and capacity to _Vals elements.
Definition: ds.h:1033

TAtomicPredicate
Atomic predicate - encapsulates comparison operations.
Definition: table.h:15

TTable::IsColName
TBool IsColName(const TStr &ColName) const
Definition: table.h:646

aoSub
Definition: table.h:259

TTable::CheckAndAddFltNode
TInt CheckAndAddFltNode(T Graph, THash< TFlt, TInt > &NodeVals, TFlt FNodeVal)
Checks if given NodeVal is seen earlier; if not, add it to Graph and hashmap NodeVals.
Definition: table.h:1533

TPrimitive::GetFlt
TFlt GetFlt() const
Definition: table.h:227

TPredicateNode
Predicate node - represents a binary predicate operation on two predicate nodes.
Definition: table.h:51

THash::Len
int Len() const
Definition: hash.h:228

TTable::New
static PTable New()
Definition: table.h:932

TTable::AddNodeAttributes
void AddNodeAttributes(TInt NId, TStrV NodeAttrV, TInt RowId, THash< TInt, TStrIntVH > &NodeIntAttrs, THash< TInt, TStrFltVH > &NodeFltAttrs, THash< TInt, TStrStrVH > &NodeStrAttrs)
Takes as parameters, and updates, maps NodeXAttrs: Node Id –> (attribute name –> Vector of attribut...
Definition: table.cpp:3414

TNEANet::TNodeI::GetStrAttrNames
void GetStrAttrNames(TStrV &Names) const
Gets vector of str attribute names.
Definition: network.h:1857

TTable::GetFirstGraphFromSequence
PNEANet GetFirstGraphFromSequence(TAttrAggr AggrPolicy)
Returns the first graph of the sequence.
Definition: table.cpp:3628

THash::AddDat
TDat & AddDat(const TKey &Key)
Definition: hash.h:238

TTable::Intersection
PTable Intersection(const TTable &Table)
Returns intersection of this table with given Table.
Definition: table.cpp:4567

TSsParserMP
Definition: ssmp.h:7

TTable::AddNJointRowsMP
void AddNJointRowsMP(const TTable &T1, const TTable &T2, const TVec< TIntPrV > &JointRowIDSet)
Adds rows from T1 and T2 to this table in a parallel manner. Used by Join.
Definition: table.cpp:4442

TTable::AddRowV
void AddRowV(const TIntV &IntVals, const TFltV &FltVals, const TStrV &StrVals)
Adds row with values corresponding to the given vectors by type.
Definition: table.cpp:4317

THashMP::GetDat
const TDat & GetDat(const TKey &Key) const
Definition: hashmp.h:195

TAtomicPredicate::FltConst
TFlt FltConst
Flt const value if this object is a float constant.
Definition: table.h:23

TPredicate::Eval
TBool Eval()
Return the result of evaluating current predicate.
Definition: table.cpp:14

TTable::GetFltRowIdxByVal
TIntV GetFltRowIdxByVal(const TStr &ColName, const TFlt &Val) const
Gets the rows containing Val in flt column ColName.
Definition: table.cpp:5453

aoMin
Definition: table.h:259

TTable::GetContextMemUsedKB
TSize GetContextMemUsedKB()
Returns approximate memory used by table context in [KB].
Definition: table.cpp:3969

TSsParserMP::GetStreamLen
uint64 GetStreamLen() const
Returns length of stream.
Definition: ssmp.h:93

TPredicateNode::Parent
TPredicateNode * Parent
Parent node of this node.
Definition: table.h:56

THash::GetKey
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252

TTable::GetIntVal
TInt GetIntVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of integer attribute ColName at row RowIdx.
Definition: table.h:1020

TTable::AddTable
void AddTable(const TTable &T)
Adds all the rows of the input table. Allows duplicate rows (not a union).
Definition: table.cpp:3975

TSsParser::IsCmt
bool IsCmt() const
Checks whether the current line is a comment (starts with '#').
Definition: ss.h:120

TTable::ColMul
void ColMul(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise multiplication. See TTable::ColGenericOp.
Definition: table.cpp:4824

TTriple::Val3
TVal3 Val3
Definition: ds.h:134

TTable::ClassifyAux
void ClassifyAux(const TIntV &SelectedRows, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Adds a label attribute with positive labels on selected rows and negative labels on the rest...
Definition: table.cpp:4694

TPredicate::FltVars
THash< TStr, TFlt > FltVars
Float variables in the current predicate tree.
Definition: table.h:85

TTable::AddNRows
void AddNRows(int NewRows, const TVec< TIntV > &IntColsP, const TVec< TFltV > &FltColsP, const TVec< TIntV > &StrColMapsP)
Adds NewRows rows from the given vectors for each column type.
Definition: table.cpp:4421

TTable::SpliceByGroup
TVec< PTable > SpliceByGroup(const TStrV &GroupByAttrs, TBool Ordered=true)
Splices table into subtables according to a grouping statement.
Definition: table.cpp:1808

aaCount
Definition: table.h:257

TStrHash::GetKeyId
int GetKeyId(const char *Key) const
Definition: hash.h:994

AND
Definition: table.h:5

TTable::ColGenericOpMP
void ColGenericOpMP(TInt ArgColIdx1, TInt ArgColIdx2, TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op)
Definition: table.cpp:4708

TVec< TStr >

TTable::GetGraphsFromSequence
TVec< PNEANet > GetGraphsFromSequence(TAttrAggr AggrPolicy)
Returns a sequence of graphs.
Definition: table.cpp:3616

TTable::GetDstNodeIntAttrV
TStrV GetDstNodeIntAttrV() const
Gets dst node int attribute name vector.
Definition: table.cpp:1016

TPredicateNode::Atom
TAtomicPredicate Atom
Atomic predicate at this node.
Definition: table.h:55

TSsParser::IsFlt
bool IsFlt(const int &FldN) const
Checks whether fields FldN is a float.
Definition: ss.h:148

TVec::AddV
TSizeTy AddV(const TVec< TVal, TSizeTy > &ValV)
Adds the elements of the vector ValV to the to end of the vector.
Definition: ds.h:1110

TTable::Partition
TInt Partition(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Partitions vector for QSort.
Definition: table.cpp:3126

TSsParserMP::GetFltFromFldV
double GetFltFromFldV(TVec< char * > &FieldsV, const int &FldN)
Gets float at field FldN.
Definition: ssmp.cpp:170