SNAP Library, Developer Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
lx.h
Go to the documentation of this file.
00001 
00002 // Lexical-Char-Definition
00003 typedef enum {lctUndef, lctSpace, lctNum, lctAlpha, lctSSym, lctTerm} TLxChTy;
00004 typedef enum {lcdtUsAscii, lcdtYuAscii} TLxChDefTy;
00005 
00006 ClassTP(TLxChDef, PLxChDef)//{
00007 private:
00008   TIntV ChTyV;
00009   TChV UcChV;
00010   void SetUcCh(const TStr& Str);
00011   void SetChTy(const TLxChTy& ChTy, const TStr& Str);
00012 public:
00013   TLxChDef(const TLxChDefTy& ChDefTy);
00014   static PLxChDef New(const TLxChDefTy& ChDefTy=lcdtUsAscii){
00015     return PLxChDef(new TLxChDef(ChDefTy));}
00016   TLxChDef(TSIn& SIn): ChTyV(SIn), UcChV(SIn){}
00017   static PLxChDef Load(TSIn& SIn){return new TLxChDef(SIn);}
00018   void Save(TSOut& SOut){ChTyV.Save(SOut); UcChV.Save(SOut);}
00019 
00020   TLxChDef& operator=(const TLxChDef& ChDef){
00021     ChTyV=ChDef.ChTyV; UcChV=ChDef.UcChV; return *this;}
00022 
00023   int GetChTy(const char& Ch) const {return ChTyV[Ch-TCh::Mn];}
00024   bool IsTerm(const char& Ch) const {return ChTyV[Ch-TCh::Mn]==TInt(lctTerm);}
00025   bool IsSpace(const char& Ch) const {return ChTyV[Ch-TCh::Mn]==TInt(lctSpace);}
00026   bool IsAlpha(const char& Ch) const {return ChTyV[Ch-TCh::Mn]==TInt(lctAlpha);}
00027   bool IsNum(const char& Ch) const {return ChTyV[Ch-TCh::Mn]==TInt(lctNum);}
00028   bool IsAlNum(const char& Ch) const {
00029     return (ChTyV[Ch-TCh::Mn]==TInt(lctAlpha))||(ChTyV[Ch-TCh::Mn]==TInt(lctNum));}
00030   char GetUc(const char& Ch) const {return UcChV[Ch-TCh::Mn];}
00031 
00032   bool IsNmStr(const TStr& Str) const;
00033   TStr GetUcStr(const TStr& Str) const;
00034 
00035   // standard entry points
00036   static PLxChDef GetChDef(const TLxChDefTy& ChDefTy=lcdtUsAscii);
00037 //  static TLxChDef& GetChDefRef(const TLxChDefTy& ChDefTy=lcdtUsAscii);
00038 };
00039 
00041 // Lexical-Symbols
00042 typedef enum {
00043   syUndef, syLn, syTab, syBool, syInt, syFlt, syStr, syIdStr, syQStr,
00044   syPeriod, syDPeriod, syComma, syColon, syDColon, sySemicolon,
00045   syPlus, syMinus, syAsterisk, sySlash, syPercent,
00046   syExclamation, syVBar, syAmpersand, syQuestion, syHash,
00047   syEq, syNEq, syLss, syGtr, syLEq, syGEq,
00048   syLParen, syRParen, syLBracket, syRBracket, syLBrace, syRBrace,
00049   syEoln, syEof,
00050   syMnRw, syRw1, syRw2, syRw3, syRw4, syRw5, syRw6, syRw7, syRw8, syRw9,
00051   syRw10, syRw11, syRw12, syRw13, syRw14, syRw15, syRw16, syRw17, syMxRw
00052 } TLxSym;
00053 
00054 class TLxSymStr{
00055 public:
00056   static const TStr UndefStr;
00057   static const TStr LnStr;
00058   static const TStr TabStr;
00059   static const TStr BoolStr;
00060   static const TStr IntStr;
00061   static const TStr FltStr;
00062   static const TStr StrStr;
00063   static const TStr IdStrStr;
00064   static const TStr QStrStr;
00065   static const TStr PeriodStr;
00066   static const TStr DPeriodStr;
00067   static const TStr CommaStr;
00068   static const TStr ColonStr;
00069   static const TStr DColonStr;
00070   static const TStr SemicolonStr;
00071   static const TStr PlusStr;
00072   static const TStr MinusStr;
00073   static const TStr AsteriskStr;
00074   static const TStr SlashStr;
00075   static const TStr PercentStr;
00076   static const TStr ExclamationStr;
00077   static const TStr VBarStr;
00078   static const TStr AmpersandStr;
00079   static const TStr QuestionStr;
00080   static const TStr HashStr;
00081   static const TStr EqStr;
00082   static const TStr NEqStr;
00083   static const TStr LssStr;
00084   static const TStr GtrStr;
00085   static const TStr LEqStr;
00086   static const TStr GEqStr;
00087   static const TStr LParenStr;
00088   static const TStr RParenStr;
00089   static const TStr LBracketStr;
00090   static const TStr RBracketStr;
00091   static const TStr LBraceStr;
00092   static const TStr RBraceStr;
00093   static const TStr EolnStr;
00094   static const TStr EofStr;
00095   static TStr GetSymStr(const TLxSym& Sym);
00096   static TLxSym GetSSym(const TStr& Str);
00097 public:
00098   static bool IsSep(const TLxSym& PrevSym, const TLxSym& Sym);
00099 };
00100 
00102 // Lexical-Input-Symbol-State
00103 class TILx;
00104 
00105 class TILxSymSt{
00106 private:
00107   TLxSym Sym;
00108   TStr Str, UcStr, CmtStr;
00109   bool Bool; int Int; double Flt;
00110   int SymLnN, SymLnChN, SymChN;
00111 public:
00112   TILxSymSt();
00113   TILxSymSt(const TILxSymSt& SymSt);
00114   TILxSymSt(TILx& Lx);
00115   TILxSymSt(TSIn&){Fail;}
00116   void Save(TSOut&){Fail;}
00117 
00118   void Restore(TILx& Lx);
00119 };
00120 
00122 // Lexical-Input
00123 typedef enum {
00124   iloCmtAlw, iloRetEoln, iloSigNum, iloUniStr, iloCsSens,
00125   iloExcept, iloTabSep, iloList, iloMx} TILxOpt;
00126 
00127 class TILx{
00128 private:
00129   PLxChDef ChDef;
00130   PSIn SIn;
00131   TSIn& RSIn;
00132   char PrevCh, Ch;
00133   int LnN, LnChN, ChN;
00134   TSStack<TILxSymSt> PrevSymStStack;
00135   TStrIntH RwStrH;
00136   bool IsCmtAlw, IsRetEoln, IsSigNum, IsUniStr, IsCsSens;
00137   bool IsExcept, IsTabSep, IsList;
00138   char GetCh(){
00139     Assert(Ch!=TCh::EofCh);
00140     PrevCh=Ch; LnChN++; ChN++;
00141     Ch=((RSIn.Eof()) ? TCh::EofCh : RSIn.GetCh());
00142     if (IsList){putchar(Ch);}
00143     return Ch;
00144   }
00145   char GetChX(){char Ch=GetChX(); printf("%c", Ch); return Ch;}
00146 public: // symbol state
00147   TLxSym Sym;
00148   TChA Str, UcStr, CmtStr;
00149   bool Bool; int Int; double Flt;
00150   int SymLnN, SymLnChN, SymChN;
00151   bool QuoteP;
00152   char QuoteCh;
00153 public:
00154   TILx(const PSIn& _SIn, const TFSet& OptSet=TFSet(),
00155    const TLxChDefTy& ChDefTy=lcdtUsAscii);
00156 
00157   TILx& operator=(const TILx&){Fail; return *this;}
00158 
00159   void SetOpt(const int& Opt, const bool& Val);
00160   TLxSym AddRw(const TStr& Str);
00161   TLxSym GetRw(const TStr& Str){
00162     return TLxSym(int(RwStrH.GetDat(Str)));}
00163   PSIn GetSIn(const char& SepCh);
00164   int GetLnN() const {return LnN;}
00165   bool IsBof() const {return ChN==-1;}
00166   bool IsEof() const {return Ch==TCh::EofCh;}
00167 
00168   TLxSym GetSym(const TFSet& Expect);
00169   TLxSym GetSym(){return GetSym(TFSet());}
00170   TLxSym GetSym(const TLxSym& Sym){return GetSym(TFSet()|Sym);}
00171   TLxSym GetSym(const TLxSym& Sym1, const TLxSym& Sym2){
00172     return GetSym(TFSet()|Sym1|Sym2);}
00173   TLxSym GetSym(const TLxSym& Sym1, const TLxSym& Sym2, const TLxSym& Sym3){
00174     return GetSym(TFSet()|Sym1|Sym2|Sym3);}
00175   TLxSym GetSym(const TLxSym& Sym1, const TLxSym& Sym2, const TLxSym& Sym3,
00176    const TLxSym& Sym4){
00177     return GetSym(TFSet()|Sym1|Sym2|Sym3|Sym4);}
00178   bool GetBool(){GetSym(TFSet()|syBool); return Bool;}
00179   int GetInt(){GetSym(TFSet()|syInt); return Int;}
00180   double GetFlt(){GetSym(TFSet()|syFlt); return Flt;}
00181   TStr GetStr(const TStr& _Str=TStr()){
00182     GetSym(TFSet()|syStr); IAssert(_Str.Empty()||(_Str==Str)); return Str;}
00183   TStr GetIdStr(const TStr& IdStr=TStr()){
00184     GetSym(TFSet()|syIdStr); IAssert(IdStr.Empty()||(IdStr==Str)); return Str;}
00185   TStr GetQStr(const TStr& QStr=TStr()){
00186     GetSym(TFSet()|syQStr); IAssert(QStr.Empty()||(Str==QStr)); return Str;}
00187   void GetEoln(){GetSym(TFSet()|syEoln);}
00188   TStr GetStrToCh(const char& ToCh);
00189   TStr GetStrToEolnOrCh(const char& ToCh);
00190   TStr GetStrToEoln(const bool& DoTrunc=false);
00191   TStr GetStrToEolnAndCh(const char& ToCh);
00192   void SkipToEoln();
00193   void SkipToSym(const TLxSym& SkipToSym){
00194     while (Sym!=SkipToSym){GetSym();}}
00195 
00196   void PutSym(const TILxSymSt& SymSt){PrevSymStStack.Push(TILxSymSt(SymSt));}
00197   void PutSym(){PrevSymStStack.Push(TILxSymSt(*this));}
00198   TLxSym PeekSym(){TLxSym NextSym=GetSym(); PutSym(); return NextSym;}
00199   TLxSym PeekSym(const int& Syms);
00200 
00201   TStr GetSymStr() const;
00202   TStr GetFPosStr() const;
00203   static TStr GetQStr(const TStr& Str, const bool& QuoteP, const char& QuoteCh);
00204 
00205   bool IsVar(const TStr& VarNm){
00206     GetSym(); bool Var=((Sym==syIdStr)&&(Str==VarNm)); PutSym(); return Var;}
00207   void GetVar(const TStr& VarNm,
00208    const bool& LBracket=false, const bool& NewLn=false){
00209     GetIdStr(VarNm); GetSym(syColon);
00210     if (LBracket){GetSym(syLBracket);} if (NewLn){GetEoln();}}
00211   void GetVarEnd(const bool& RBracket=false, const bool& NewLn=false){
00212     if (RBracket){GetSym(syRBracket);}
00213     if (NewLn){GetEoln();}}
00214   bool PeekVarEnd(const bool& RBracket=false, const bool& NewLn=false){
00215     if (RBracket){return PeekSym()==syRBracket;}
00216     if (NewLn){return PeekSym()==syEoln;} Fail; return false;}
00217   bool GetVarBool(const TStr& VarNm, const bool& NewLn=true){
00218     GetIdStr(VarNm); GetSym(syColon); bool Bool=GetBool();
00219     if (NewLn){GetEoln();} return Bool;}
00220   int GetVarInt(const TStr& VarNm, const bool& NewLn=true){
00221     GetIdStr(VarNm); GetSym(syColon); int Int=GetInt();
00222     if (NewLn){GetEoln();} return Int;}
00223   double GetVarFlt(const TStr& VarNm, const bool& NewLn=true){
00224     GetIdStr(VarNm); GetSym(syColon); double Flt=GetFlt();
00225     if (NewLn){GetEoln();} return Flt;}
00226   TStr GetVarStr(const TStr& VarNm, const bool& NewLn=true){
00227     GetIdStr(VarNm); GetSym(syColon); TStr Str=GetQStr();
00228     if (NewLn){GetEoln();} return Str;}
00229   TSecTm GetVarSecTm(const TStr& VarNm, const bool& NewLn=true){
00230     GetIdStr(VarNm); GetSym(syColon); TSecTm SecTm=TSecTm::LoadTxt(*this);
00231     if (NewLn){GetEoln();} return SecTm;}
00232   void GetVarBoolV(const TStr& VarNm, TBoolV& BoolV, const bool& NewLn=true);
00233   void GetVarIntV(const TStr& VarNm, TIntV& IntV, const bool& NewLn=true);
00234   void GetVarFltV(const TStr& VarNm, TFltV& FltV, const bool& NewLn=true);
00235   void GetVarStrV(const TStr& VarNm, TStrV& StrV, const bool& NewLn=true);
00236   void GetVarStrPrV(const TStr& VarNm, TStrPrV& StrPrV, const bool& NewLn=true);
00237   void GetVarStrVV(const TStr& VarNm, TVec<TStrV>& StrVV, const bool& NewLn=true);
00238 
00239   // file-of-lines
00240   static void GetLnV(const TStr& FNm, TStrV& LnV);
00241 };
00242 
00244 // Lexical-Output
00245 typedef enum {
00246   oloCmtAlw, oloFrcEoln, oloSigNum, oloUniStr,
00247   oloCsSens, oloTabSep, oloVarIndent, oloMx} TOLxOpt;
00248 
00249 class TOLx{
00250 private:
00251   PLxChDef ChDef;
00252   PSOut SOut;
00253   TSOut& RSOut;
00254   bool IsCmtAlw, IsFrcEoln, IsSigNum, IsUniStr;
00255   bool IsCsSens, IsTabSep, IsVarIndent;
00256   int VarIndentLev;
00257   TStrIntH RwStrH;
00258   TIntStrH RwSymH;
00259   TLxSym PrevSym;
00260   void PutSep(const TLxSym& Sym);
00261 public:
00262   TOLx(const PSOut& _SOut, const TFSet& OptSet,
00263    const TLxChDefTy& ChDefTy=lcdtUsAscii);
00264 
00265   TOLx& operator=(const TOLx&){Fail; return *this;}
00266 
00267   void SetOpt(const int& Opt, const bool& Val);
00268   TLxSym AddRw(const TStr& Str);
00269   PSOut GetSOut(const char& SepCh){
00270     RSOut.PutCh(SepCh); return SOut;}
00271 
00272   void PutSym(const TLxSym& Sym);
00273   void PutBool(const TBool& Bool){
00274     PutSep(syIdStr); RSOut.PutStr(TBool::GetStr(Bool));}
00275   void PutInt(const TInt& Int){
00276     if (!IsSigNum){Assert(int(Int)>=0);}
00277     PutSep(syInt); RSOut.PutStr(TInt::GetStr(Int));}
00278   void PutFlt(const TFlt& Flt, const int& Width=-1, const int& Prec=-1){
00279     if (!IsSigNum){Assert(Flt>=0);}
00280     PutSep(syFlt); RSOut.PutStr(TFlt::GetStr(Flt, Width, Prec));}
00281   void PutStr(const TStr& Str){
00282     if ((IsUniStr)&&(ChDef->IsNmStr(Str))){PutSep(syIdStr); RSOut.PutStr(Str);}
00283     else {PutSep(syStr); RSOut.PutCh('"'); RSOut.PutStr(Str); RSOut.PutCh('"');}}
00284   void PutIdStr(const TStr& Str, const bool& CheckIdStr=true){
00285     if (CheckIdStr){Assert(ChDef->IsNmStr(Str));}
00286     PutSep(syIdStr); RSOut.PutStr(Str);}
00287   void PutQStr(const TStr& Str){
00288     PutSep(syQStr); RSOut.PutCh('"'); RSOut.PutStr(Str); RSOut.PutCh('"');}
00289   void PutQStr(const TChA& ChA){
00290     PutSep(syQStr); RSOut.PutCh('"'); RSOut.PutStr(ChA); RSOut.PutCh('"');}
00291   void PutUQStr(const TStr& Str){
00292     PutSep(syIdStr); RSOut.PutStr(Str);}
00293   void PutLnCmt(const TStr& Str, const int& IndentLev=0){
00294     Assert(IsCmtAlw); PutStr(" // "); PutStr(Str); PutLn(IndentLev);}
00295   void PutParCmt(const TStr& Str){
00296     Assert(IsCmtAlw); PutStr(" /* "); PutStr(Str); PutStr(" */ ");}
00297   void PutIndent(const int& IndentLev){
00298     RSOut.PutCh(' ', IndentLev*2);}
00299   void PutTab() const {RSOut.PutCh(TCh::TabCh);}
00300   void PutLn(const int& IndentLev=0){
00301     Assert(IsFrcEoln);
00302     PutSep(syEoln); RSOut.PutLn(); RSOut.PutCh(' ', IndentLev*2);}
00303   void PutDosLn(const int& IndentLev=0){
00304     Assert(IsFrcEoln);
00305     PutSep(syEoln); RSOut.PutDosLn(); RSOut.PutCh(' ', IndentLev*2);}
00306 
00307   void PutVar(const TStr& VarNm, const bool& LBracket=false,
00308    const bool& NewLn=false, const bool& CheckIdStr=true){
00309     if (IsVarIndent){PutIndent(VarIndentLev);}
00310     PutIdStr(VarNm, CheckIdStr); PutSym(syColon);
00311     if (LBracket){PutSym(syLBracket);}
00312     if (NewLn){PutLn(); VarIndentLev++;}}
00313   void PutVarEnd(const bool& RBracket=false, const bool& NewLn=false){
00314     if (IsVarIndent){PutIndent(VarIndentLev-1);}
00315     if (RBracket){PutSym(syRBracket);}
00316     if (NewLn){PutLn(); VarIndentLev--;}}
00317   void PutVarBool(const TStr& VarNm, const bool& Bool,
00318    const bool& NewLn=true, const bool& CheckIdStr=true){
00319     if (IsVarIndent){PutIndent(VarIndentLev);}
00320     PutIdStr(VarNm, CheckIdStr); PutSym(syColon); PutBool(Bool);
00321     if (NewLn){PutLn();}}
00322   void PutVarInt(const TStr& VarNm, const int& Int,
00323    const bool& NewLn=true, const bool& CheckIdStr=true){
00324     if (IsVarIndent){PutIndent(VarIndentLev);}
00325     PutIdStr(VarNm, CheckIdStr); PutSym(syColon); PutInt(Int);
00326     if (NewLn){PutLn();}}
00327   void PutVarFlt(const TStr& VarNm, const double& Flt,
00328    const bool& NewLn=true, const bool& CheckIdStr=true){
00329     if (IsVarIndent){PutIndent(VarIndentLev);}
00330     PutIdStr(VarNm, CheckIdStr); PutSym(syColon); PutFlt(Flt);
00331     if (NewLn){PutLn();}}
00332   void PutVarStr(const TStr& VarNm, const TStr& Str,
00333    const bool& NewLn=true, const bool& CheckIdStr=true){
00334     if (IsVarIndent){PutIndent(VarIndentLev);}
00335     PutIdStr(VarNm, CheckIdStr); PutSym(syColon); PutQStr(Str);
00336     if (NewLn){PutLn();}}
00337   void PutVarSecTm(const TStr& VarNm, const TSecTm& SecTm,
00338    const bool& NewLn=true, const bool& CheckIdStr=true){
00339     if (IsVarIndent){PutIndent(VarIndentLev);}
00340     PutIdStr(VarNm, CheckIdStr); PutSym(syColon); SecTm.SaveTxt(*this);
00341     if (NewLn){PutLn();}}
00342   void PutVarBoolV(const TStr& VarNm, const TBoolV& BoolV,
00343    const bool& NewLn=true, const bool& CheckIdStr=true);
00344   void PutVarIntV(const TStr& VarNm, const TIntV& IntV,
00345    const bool& NewLn=true, const bool& CheckIdStr=true);
00346   void PutVarFltV(const TStr& VarNm, const TFltV& FltV,
00347    const bool& NewLn=true, const bool& CheckIdStr=true);
00348   void PutVarStrV(const TStr& VarNm, const TStrV& StrV,
00349    const bool& NewLn=true, const bool& CheckIdStr=true);
00350   void PutVarStrPrV(const TStr& VarNm, const TStrPrV& StrPrV,
00351    const bool& NewLn=true, const bool& CheckIdStr=true);
00352   void PutVarStrVV(const TStr& VarNm, const TVec<TStrV>& StrVV,
00353    const bool& NewLn=true, const bool& CheckIdStr=true);
00354 };
00355 
00357 // Preprocessor
00358 class TPreproc{
00359 private:
00360   PSIn SIn;
00361   TStrV SubstKeyIdV;
00362   char PrevCh, Ch;
00363   THash<TStr, TStrPrV> SubstIdToKeyIdValPrVH;
00364   char GetCh();
00365   bool IsSubstId(const TStr& SubstId, TStr& SubstValStr) const;
00366   UndefDefaultCopyAssign(TPreproc);
00367 public:
00368   TPreproc(const TStr& InFNm, const TStr& OutFNm,
00369    const TStr& SubstFNm, const TStrV& _SubstKeyIdV);
00370 
00371   static void Execute(const TStr& InFNm, const TStr& OutFNm,
00372    const TStr& InSubstFNm, const TStrV& SubstKeyIdV){
00373     TPreproc Preproc(InFNm, OutFNm, InSubstFNm, SubstKeyIdV);}
00374 };
00375 
00376 /* Sample Subst-File
00377 <SubstList>
00378 
00379 <Subst Id="TId">
00380   <Str Key="MSSQL">TId</Str>
00381   <Str Key="Oracle">NUMBER(15) NOT NULL</Str>
00382 </Subst>
00383 
00384 <Subst Id="TStr2NN">
00385   <Str Key="MSSQL">TStr2NN</Str>
00386   <Str Key="Oracle">VARCHAR2(2) NOT NULL</Str>
00387 </Subst>
00388 
00389 </SubstList>
00390 */