SNAP Library, Developer Reference  2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
lx.cpp
Go to the documentation of this file.
00001 
00002 // Lexical-Chars
00003 void TLxChDef::SetUcCh(const TStr& Str){
00004   for (int CC=1; CC<Str.Len(); CC++){
00005     UcChV[Str[CC]-TCh::Mn]=TCh(Str[0]);}
00006 }
00007 
00008 void TLxChDef::SetChTy(const TLxChTy& ChTy, const TStr& Str){
00009   for (int CC=0; CC<Str.Len(); CC++){
00010     ChTyV[Str[CC]-TCh::Mn]=TInt(ChTy);}
00011 }
00012 
00013 TLxChDef::TLxChDef(const TLxChDefTy& ChDefTy):
00014   ChTyV(TCh::Vals), UcChV(TCh::Vals){
00015 
00016   if (ChDefTy==lcdtUsAscii){
00017     // Character-Types
00018     ChTyV.PutAll(TInt(lctSpace));
00019     SetChTy(lctNum, "0123456789");
00020     SetChTy(lctAlpha, "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
00021     SetChTy(lctAlpha, "abcdefghijklmnopqrstuvwxyz");
00022     SetChTy(lctAlpha, "@_");
00023     SetChTy(lctSSym, "\"'.,:;+-*/%!#|&<=>?()[]{}");
00024     SetChTy(lctTerm, TStr(TCh::CrCh));
00025     SetChTy(lctTerm, TStr(TCh::LfCh));
00026     SetChTy(lctTerm, TStr(TCh::EofCh));
00027 
00028     // Upper-Case
00029     for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){UcChV[Ch-TCh::Mn]=TCh(char(Ch));}
00030     SetUcCh("Aa"); SetUcCh("Bb"); SetUcCh("Cc"); SetUcCh("Dd"); SetUcCh("Ee");
00031     SetUcCh("Ff"); SetUcCh("Gg"); SetUcCh("Hh"); SetUcCh("Ii"); SetUcCh("Jj");
00032     SetUcCh("Kk"); SetUcCh("Ll"); SetUcCh("Mm"); SetUcCh("Nn"); SetUcCh("Oo");
00033     SetUcCh("Pp"); SetUcCh("Qq"); SetUcCh("Rr"); SetUcCh("Ss"); SetUcCh("Tt");
00034     SetUcCh("Uu"); SetUcCh("Vv"); SetUcCh("Ww"); SetUcCh("Xx"); SetUcCh("Yy");
00035     SetUcCh("Zz");
00036   } else
00037   if (ChDefTy==lcdtYuAscii){
00038     // Character-Types
00039     ChTyV.PutAll(TInt(lctSpace));
00040     SetChTy(lctNum, "0123456789");
00041     SetChTy(lctAlpha, "ABC^]D\\EFGHIJKLMNOPQRS[TUVWXYZ@");
00042     SetChTy(lctAlpha, "abc~}d|efghijklmnopqrs{tuvwxyz`");
00043     SetChTy(lctAlpha, "_");
00044     SetChTy(lctSSym, "\".,:;+-*/%!#&<=>?()");
00045     SetChTy(lctTerm, TStr(TCh::CrCh));
00046     SetChTy(lctTerm, TStr(TCh::LfCh));
00047     SetChTy(lctTerm, TStr(TCh::EofCh));
00048 
00049     // Upper-Case
00050     for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){UcChV[Ch-TCh::Mn]=TCh(char(Ch));}
00051     SetUcCh("Aa"); SetUcCh("Bb"); SetUcCh("Cc"); SetUcCh("^~"); SetUcCh("]}");
00052     SetUcCh("Dd"); SetUcCh("\\|"); SetUcCh("Ee"); SetUcCh("Ff"); SetUcCh("Gg");
00053     SetUcCh("Hh"); SetUcCh("Ii"); SetUcCh("Jj"); SetUcCh("Kk"); SetUcCh("Ll");
00054     SetUcCh("Mm"); SetUcCh("Nn"); SetUcCh("Oo"); SetUcCh("Pp"); SetUcCh("Qq");
00055     SetUcCh("Rr"); SetUcCh("Ss"); SetUcCh("[{"); SetUcCh("Tt"); SetUcCh("Uu");
00056     SetUcCh("Vv"); SetUcCh("Ww"); SetUcCh("Xx"); SetUcCh("Yy"); SetUcCh("Zz");
00057     SetUcCh("@`");
00058   } else {
00059     Fail;
00060   }
00061 }
00062 
00063 bool TLxChDef::IsNmStr(const TStr& Str) const {
00064   if (Str.Len()==0){return false;}
00065   if (!IsAlpha(Str.GetCh(0))){return false;}
00066   for (int ChN=1; ChN<Str.Len(); ChN++){
00067     if (!IsAlNum(Str.GetCh(ChN))){return false;}}
00068   return true;
00069 }
00070 
00071 TStr TLxChDef::GetUcStr(const TStr& Str) const {
00072   TChA UcStr;
00073   for (int ChN=0; ChN<Str.Len(); ChN++){
00074     UcStr.AddCh(GetUc(Str.GetCh(ChN)));}
00075   return UcStr;
00076 }
00077 
00078 
00079 PLxChDef TLxChDef::GetChDef(const TLxChDefTy& ChDefTy){
00080   static PLxChDef UsAsciiChDef=NULL;
00081   static PLxChDef YuAsciiChDef=NULL;
00082   switch (ChDefTy){
00083     case lcdtUsAscii:
00084       if (UsAsciiChDef.Empty()){UsAsciiChDef=TLxChDef::New(lcdtUsAscii);}
00085       return UsAsciiChDef;
00086     case lcdtYuAscii:
00087       if (YuAsciiChDef.Empty()){YuAsciiChDef=TLxChDef::New(lcdtYuAscii);}
00088       return YuAsciiChDef;
00089     default: Fail; return NULL;
00090   }
00091 }
00092 
00093 //TLxChDef& TLxChDef::GetChDefRef(const TLxChDefTy& ChDefTy){
00094 //  switch (ChDefTy){
00095 //    case lcdtUsAscii: return *UsAsciiChDef;
00096 //    case lcdtYuAscii: return *YuAsciiChDef;
00097 //    default: Fail; return *UsAsciiChDef;;
00098 //  }
00099 //}
00100 
00102 // Lexical-Symbols
00103 const TStr TLxSymStr::UndefStr="<undefined>";
00104 const TStr TLxSymStr::LnStr="<line>";
00105 const TStr TLxSymStr::TabStr="<tab>";
00106 const TStr TLxSymStr::IntStr="<integer>";
00107 const TStr TLxSymStr::FltStr="<float>";
00108 const TStr TLxSymStr::StrStr="<string>";
00109 const TStr TLxSymStr::IdStrStr="<id-string>";
00110 const TStr TLxSymStr::QStrStr="<q-string>";
00111 const TStr TLxSymStr::PeriodStr=".";
00112 const TStr TLxSymStr::DPeriodStr="..";
00113 const TStr TLxSymStr::CommaStr=",";
00114 const TStr TLxSymStr::ColonStr=":";
00115 const TStr TLxSymStr::DColonStr="::";
00116 const TStr TLxSymStr::SemicolonStr=";";
00117 const TStr TLxSymStr::PlusStr="+";
00118 const TStr TLxSymStr::MinusStr="-";
00119 const TStr TLxSymStr::AsteriskStr="*";
00120 const TStr TLxSymStr::SlashStr="/";
00121 const TStr TLxSymStr::PercentStr="%";
00122 const TStr TLxSymStr::ExclamationStr="!";
00123 const TStr TLxSymStr::VBarStr="|";
00124 const TStr TLxSymStr::AmpersandStr="&";
00125 const TStr TLxSymStr::QuestionStr="?";
00126 const TStr TLxSymStr::HashStr="#";
00127 const TStr TLxSymStr::EqStr="=";
00128 const TStr TLxSymStr::NEqStr="<>";
00129 const TStr TLxSymStr::LssStr="<";
00130 const TStr TLxSymStr::GtrStr=">";
00131 const TStr TLxSymStr::LEqStr="<=";
00132 const TStr TLxSymStr::GEqStr=">=";
00133 const TStr TLxSymStr::LParenStr="(";
00134 const TStr TLxSymStr::RParenStr=")";
00135 const TStr TLxSymStr::LBracketStr="[";
00136 const TStr TLxSymStr::RBracketStr="]";
00137 const TStr TLxSymStr::LBraceStr="{";
00138 const TStr TLxSymStr::RBraceStr="}";
00139 const TStr TLxSymStr::EolnStr="<end-of-line>";
00140 const TStr TLxSymStr::EofStr="<end-of-file>";
00141 
00142 TStr TLxSymStr::GetSymStr(const TLxSym& Sym){
00143   switch (Sym){
00144     case syUndef: return UndefStr;
00145     case syLn: return LnStr;
00146     case syTab: return TabStr;
00147     case syInt: return IntStr;
00148     case syFlt: return FltStr;
00149     case syStr: return StrStr;
00150     case syIdStr: return IdStrStr;
00151     case syQStr: return QStrStr;
00152     case syPeriod: return PeriodStr;
00153     case syDPeriod: return DPeriodStr;
00154     case syComma: return CommaStr;
00155     case syColon: return ColonStr;
00156     case syDColon: return DColonStr;
00157     case sySemicolon: return SemicolonStr;
00158     case syPlus: return PlusStr;
00159     case syMinus: return MinusStr;
00160     case syAsterisk: return AsteriskStr;
00161     case sySlash: return SlashStr;
00162     case syPercent: return PercentStr;
00163     case syExclamation: return ExclamationStr;
00164     case syVBar: return VBarStr;
00165     case syAmpersand: return AmpersandStr;
00166     case syQuestion: return QuestionStr;
00167     case syHash: return HashStr;
00168     case syEq: return EqStr;
00169     case syNEq: return NEqStr;
00170     case syLss: return LssStr;
00171     case syGtr: return GtrStr;
00172     case syLEq: return LEqStr;
00173     case syGEq: return GEqStr;
00174     case syLParen: return LParenStr;
00175     case syRParen: return RParenStr;
00176     case syLBracket: return LBracketStr;
00177     case syRBracket: return RBracketStr;
00178     case syLBrace: return LBraceStr;
00179     case syRBrace: return RBraceStr;
00180     case syEoln: return EolnStr;
00181     case syEof: return EofStr;
00182     default: Fail; return TStr();
00183   }
00184 }
00185 
00186 TLxSym TLxSymStr::GetSSym(const TStr& Str){
00187   static TStrIntH StrToLxSymH(100);
00188   if (StrToLxSymH.Len()==0){
00189     StrToLxSymH.AddDat(PeriodStr, syPeriod);
00190     StrToLxSymH.AddDat(DPeriodStr, syDPeriod);
00191     StrToLxSymH.AddDat(CommaStr, syComma);
00192     StrToLxSymH.AddDat(ColonStr, syColon);
00193     StrToLxSymH.AddDat(DColonStr, syDColon);
00194     StrToLxSymH.AddDat(SemicolonStr, sySemicolon);
00195     StrToLxSymH.AddDat(PlusStr, syPlus);
00196     StrToLxSymH.AddDat(MinusStr, syMinus);
00197     StrToLxSymH.AddDat(AsteriskStr, syAsterisk);
00198     StrToLxSymH.AddDat(SlashStr, sySlash);
00199     StrToLxSymH.AddDat(PercentStr, syPercent);
00200     StrToLxSymH.AddDat(ExclamationStr, syExclamation);
00201     StrToLxSymH.AddDat(VBarStr, syVBar);
00202     StrToLxSymH.AddDat(AmpersandStr, syAmpersand);
00203     StrToLxSymH.AddDat(QuestionStr, syQuestion);
00204     StrToLxSymH.AddDat(HashStr, syHash);
00205     StrToLxSymH.AddDat(EqStr, syEq);
00206     StrToLxSymH.AddDat(NEqStr, syNEq);
00207     StrToLxSymH.AddDat(LssStr, syLss);
00208     StrToLxSymH.AddDat(GtrStr, syGtr);
00209     StrToLxSymH.AddDat(LEqStr, syLEq);
00210     StrToLxSymH.AddDat(GEqStr, syGEq);
00211     StrToLxSymH.AddDat(LParenStr, syLParen);
00212     StrToLxSymH.AddDat(RParenStr, syRParen);
00213     StrToLxSymH.AddDat(LBracketStr, syLBracket);
00214     StrToLxSymH.AddDat(RBracketStr, syRBracket);
00215     StrToLxSymH.AddDat(LBraceStr, syLBrace);
00216     StrToLxSymH.AddDat(RBraceStr, syRBrace);
00217   }
00218   int KeyId=StrToLxSymH.GetKeyId(Str);
00219   if (KeyId==-1){
00220     return syUndef;
00221   } else {
00222     return TLxSym(int(StrToLxSymH[KeyId]));
00223   }
00224 }
00225 
00226 bool TLxSymStr::IsSep(const TLxSym& PrevSym, const TLxSym& Sym){
00227   static TFSet SepPrevSymSet=TFSet()|
00228     syUndef|syColon|syDColon|syEq|
00229     syLParen|syRParen|syLBracket|syRBracket|syLBrace|syRBrace|
00230     syEoln|syEof;
00231 
00232   static TFSet SepSymSet=TFSet()|
00233     syPeriod|syComma|syColon|syDColon|sySemicolon|
00234     syEq|
00235     syExclamation|syQuestion|
00236     syLParen|syRParen|syLBracket|syRBracket|syLBrace|syRBrace|
00237     syEoln|syEof;
00238 
00239   return !SepPrevSymSet.In(PrevSym) && !SepSymSet.In(Sym);
00240 }
00241 
00243 // Lexical-Symbol-State
00244 TILxSymSt::TILxSymSt():
00245   Sym(syUndef),
00246   Str(), UcStr(), CmtStr(),
00247   Bool(false), Int(0), Flt(0),
00248   SymLnN(-1), SymLnChN(-1), SymChN(-1){}
00249 
00250 TILxSymSt::TILxSymSt(const TILxSymSt& SymSt):
00251   Sym(SymSt.Sym),
00252   Str(SymSt.Str), UcStr(SymSt.UcStr), CmtStr(SymSt.CmtStr),
00253   Bool(SymSt.Bool), Int(SymSt.Int), Flt(SymSt.Flt),
00254   SymLnN(SymSt.SymLnN), SymLnChN(SymSt.SymLnChN), SymChN(SymSt.SymChN){Fail;}
00255 
00256 TILxSymSt::TILxSymSt(TILx& Lx):
00257   Sym(Lx.Sym),
00258   Str(Lx.Str), UcStr(Lx.UcStr), CmtStr(Lx.CmtStr),
00259   Bool(Lx.Bool), Int(Lx.Int), Flt(Lx.Flt),
00260   SymLnN(Lx.SymLnN), SymLnChN(Lx.SymLnChN), SymChN(Lx.SymChN){}
00261 
00262 void TILxSymSt::Restore(TILx& Lx){
00263   Lx.Sym=Sym;
00264   Lx.Str=Str; Lx.UcStr=UcStr; Lx.CmtStr=CmtStr;
00265   Lx.Bool=Bool; Lx.Int=Int; Lx.Flt=Flt;
00266   Lx.SymLnN=SymLnN; Lx.SymLnChN=SymLnChN; Lx.SymChN=Lx.SymChN;}
00267 
00269 // Lexical-Input
00270 TILx::TILx(const PSIn& _SIn, const TFSet& OptSet, const TLxChDefTy& ChDefTy):
00271   ChDef(TLxChDef::GetChDef(ChDefTy)),
00272   SIn(_SIn), RSIn(*SIn),
00273   PrevCh(' '), Ch(' '), LnN(0), LnChN(0-1), ChN(0-1),
00274   PrevSymStStack(), RwStrH(50),
00275   IsCmtAlw(false), IsRetEoln(false), IsSigNum(false),
00276   IsUniStr(false), IsCsSens(false), IsExcept(false),
00277   IsTabSep(false), IsList(false),
00278   Sym(syUndef),
00279   Str(), UcStr(), CmtStr(),
00280   Bool(false), Int(0), Flt(0),
00281   SymLnN(-1), SymLnChN(-1), SymChN(-1){
00282   for (int Opt=0; Opt<iloMx; Opt++){
00283     if (OptSet.In(Opt)){SetOpt(Opt, true);}}
00284 }
00285 
00286 void TILx::SetOpt(const int& Opt, const bool& Val){
00287   switch (Opt){
00288     case iloCmtAlw: IsCmtAlw=Val; break;
00289     case iloRetEoln: IsRetEoln=Val; break;
00290     case iloSigNum: IsSigNum=Val; break;
00291     case iloUniStr: IsUniStr=Val; break;
00292     case iloCsSens: IsCsSens=Val; break;
00293     case iloExcept: IsExcept=Val; break;
00294     case iloTabSep: IsTabSep=Val; break;
00295     case iloList: IsList=Val; break;
00296     default: Fail;
00297   }
00298 }
00299 
00300 TLxSym TILx::AddRw(const TStr& Str){
00301   IAssert(RwStrH.Len()<syMxRw-syMnRw+1);
00302   TStr UcStr=ChDef->GetUcStr(Str);
00303   IAssert(!RwStrH.IsKey(UcStr));
00304   TLxSym RwSym=TLxSym(syMnRw+RwStrH.Len());
00305   RwStrH.AddDat(Str, TInt(int(RwSym)));
00306   return RwSym;
00307 }
00308 
00309 PSIn TILx::GetSIn(const char& SepCh){
00310   IAssert(PrevSymStStack.Empty());
00311   while ((Ch!=TCh::EofCh)&&(Ch!=SepCh)){GetCh();}
00312   return SIn;
00313 }
00314 
00315 TLxSym TILx::GetSym(const TFSet& Expect){
00316   CmtStr.Clr();
00317   if (!PrevSymStStack.Empty()){
00318     // symbols already on the stack
00319     PrevSymStStack.Top().Restore(*this); PrevSymStStack.Pop();
00320   } else
00321   if (Expect.In(syLn)){
00322     // symbol is the whole line string
00323     if (Ch==TCh::EofCh){
00324       Sym=syEof;
00325     } else {
00326       Str.Clr();
00327       if (IsBof()){GetCh();}
00328       while (!ChDef->IsTerm(Ch)){Str.AddCh(Ch); GetCh();}
00329       bool _IsRetEoln=IsRetEoln; IsRetEoln=true;
00330       GetSym(TFSet()|syEoln|syEof); Sym=syLn;
00331       IsRetEoln=_IsRetEoln;
00332     }
00333   } else
00334   if (IsTabSep){
00335     // symbol is between tab characters
00336     if (IsBof()){GetCh();}
00337     if (Ch==TCh::TabCh){ // tab character
00338       Sym=syTab; GetCh();
00339     } else
00340     if (ChDef->IsTerm(Ch)){ // eoln & eof characters
00341       bool _IsRetEoln=IsRetEoln; IsRetEoln=true; IsTabSep=false;
00342       GetSym(TFSet()|syEoln|syEof);
00343       IsRetEoln=_IsRetEoln; IsTabSep=true;
00344     } else {
00345       Str.Clr();
00346       while ((!ChDef->IsTerm(Ch))&&(Ch!=TCh::TabCh)){
00347         Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh();}
00348       Sym=syStr; QuoteP=false;
00349     }
00350   } else {
00351     // usual symbol
00352     while (ChDef->IsSpace(Ch)){GetCh();}
00353     SymLnN=LnN; SymLnChN=LnChN; SymChN=ChN;
00354 
00355     if (ChDef->IsAlpha(Ch)){
00356       if (IsUniStr){Sym=syStr;} else {Sym=syIdStr;}
00357       Str.Clr(); UcStr.Clr(); QuoteP=false;
00358       do {Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch));}
00359       while (ChDef->IsAlNum(GetCh()));
00360       if (!RwStrH.Empty()){
00361         TStr RwStr=Str; if (!IsCsSens){RwStr=UcStr;}
00362         int SymKeyId=RwStrH.GetKeyId(RwStr);
00363         if (SymKeyId!=-1){Sym=TLxSym(int(RwStrH[SymKeyId]));}
00364       }
00365       if (Expect.In(syBool)){
00366         Sym=syBool; IAssert(TBool::IsValStr(Str));
00367         Bool=TBool::GetValFromStr(Str);
00368       }
00369     } else
00370     if ((Ch=='"')||(Ch=='\'')){
00371       if (IsUniStr){Sym=syStr;} else {Sym=syQStr;}
00372       Str.Clr(); UcStr.Clr(); QuoteP=true; QuoteCh=Ch;
00373       GetCh();
00374       forever{
00375         while ((Ch!=QuoteCh)&&(Ch!='\\')&&(Ch!=TCh::EofCh)){
00376           Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh();}
00377         if (Ch==TCh::EofCh){
00378           Sym=syUndef; break;
00379         } else if (Ch==QuoteCh){
00380           GetCh(); break;
00381         } else {
00382           GetCh();
00383           switch (Ch){
00384             case '"': Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
00385             case '\'': Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
00386             case '/': Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
00387             case 'b': Str.AddCh('\b'); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
00388             case 'f': Str.AddCh('\f'); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
00389             case 'n': Str.AddCh('\n'); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
00390             case 'r': Str.AddCh('\r'); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
00391             case 't': Str.AddCh('\t'); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
00392             case 'u': 
00393               // needs unicode support to be JSON compatible - now it replaces the code with blank
00394               GetCh(); GetCh(); GetCh(); Str.AddCh(' '); UcStr.AddCh(ChDef->GetUc(' ')); GetCh(); break; 
00395             default: Sym=syUndef; break;
00396           }
00397           if (Sym==syUndef){
00398             throw PExcept(new TExcept("Invalid Escape Sequence in Quoted String"));}
00399         }
00400       }
00401     } else
00402     if ((ChDef->IsNum(Ch))||(IsSigNum&&((Ch=='+')||(Ch=='-')))){
00403       Str.Clr(); bool IntP=true;
00404       do {Str.AddCh(Ch);} while (ChDef->IsNum(GetCh()));
00405       if (Expect.In(syFlt)){
00406         if (Ch=='.'){
00407           Str.AddCh(Ch); IntP=false;
00408           while (ChDef->IsNum(GetCh())){Str.AddCh(Ch);}
00409         }
00410         if ((Ch=='e')||(Ch=='E')){
00411           Str.AddCh(Ch); GetCh(); IntP=false;
00412           if ((Ch=='+')||(Ch=='-')){Str.AddCh(Ch); GetCh();}
00413           while (ChDef->IsNum(Ch)){Str.AddCh(Ch); GetCh();}
00414         }
00415       }
00416       UcStr=Str;
00417       if (IntP&&(Expect.In(syInt))){
00418         Sym=syInt; Int=atoi(Str.CStr());
00419       } else {
00420         Sym=syFlt; Flt=atof(Str.CStr());
00421       }
00422     } else
00423     if ((Ch==TCh::CrCh)||(Ch==TCh::LfCh)){
00424       Sym=syEoln;
00425       if (Ch==TCh::CrCh){if (GetCh()==TCh::LfCh){GetCh();}} else
00426       if (Ch==TCh::LfCh){if (GetCh()==TCh::CrCh){GetCh();}}
00427       LnN++; LnChN=0; if (!IsRetEoln){GetSym(Expect);}
00428     } else
00429     if (Ch=='/'){
00430       GetCh();
00431       if ((IsCmtAlw)&&(Ch=='/')){
00432         TChA _CmtStr;
00433         do {_CmtStr+=GetCh();} while (!ChDef->IsTerm(Ch));
00434         _CmtStr.Pop(); _CmtStr.Trunc();
00435         if (Ch==TCh::CrCh){
00436           if (GetCh()==TCh::LfCh){GetCh();}
00437         } else
00438         if (Ch==TCh::LfCh){
00439           if (GetCh()==TCh::CrCh){GetCh();}
00440         }
00441         if (IsRetEoln){Sym=syEoln;} else {GetSym(Expect);}
00442         CmtStr=_CmtStr;
00443       } else
00444       if (Ch=='*'){
00445         TChA _CmtStr;
00446         do {
00447           while (GetCh()!='*'){_CmtStr+=Ch;}
00448           _CmtStr+=GetCh();
00449         } while (Ch!='/');
00450         _CmtStr.Pop(); _CmtStr.Pop(); _CmtStr.Trunc();
00451         GetCh(); GetSym(Expect);
00452         CmtStr=_CmtStr;
00453       } else {
00454         Sym=sySlash;
00455       }
00456     } else
00457     if (Ch==TCh::EofCh){
00458       Sym=syEof;
00459     } else {
00460       switch (Ch){
00461         case '.':
00462           if (GetCh()=='.'){Sym=syDPeriod; GetCh();}
00463           else {Sym=syPeriod;} break;
00464         case ',': Sym=syComma; GetCh(); break;
00465         case ':':
00466           if (GetCh()==':'){Sym=syDColon; GetCh();}
00467           else {Sym=syColon;} break;
00468         case ';': Sym=sySemicolon; GetCh(); break;
00469         case '+': Sym=syPlus; GetCh(); break;
00470         case '-': Sym=syMinus; GetCh(); break;
00471         case '*': Sym=syAsterisk; GetCh(); break;
00472         case '/': Sym=sySlash; GetCh(); break;
00473         case '%': Sym=syPercent; GetCh(); break;
00474         case '!': Sym=syExclamation; GetCh(); break;
00475         case '|': Sym=syVBar; GetCh(); break;
00476         case '&': Sym=syAmpersand; GetCh(); break;
00477         case '=': Sym=syEq; GetCh(); break;
00478         case '<':
00479           GetCh();
00480           if (Ch=='='){Sym=syLEq; GetCh();}
00481           else if (Ch=='>'){Sym=syNEq; GetCh();}
00482           else {Sym=syLss;} break;
00483         case '>':
00484           if (GetCh()=='='){Sym=syGEq; GetCh();}
00485           else {Sym=syGtr;} break;
00486         case '?': Sym=syQuestion; GetCh(); break;
00487         case '#':
00488           if (IsCmtAlw){
00489             TChA _CmtStr;
00490             do {_CmtStr+=GetCh();} while (!ChDef->IsTerm(Ch));
00491             _CmtStr.Pop(); _CmtStr.Trunc();
00492             if (Ch==TCh::CrCh){
00493               if (GetCh()==TCh::LfCh){GetCh();}
00494             } else
00495             if (Ch==TCh::LfCh){
00496               if (GetCh()==TCh::CrCh){GetCh();}
00497             }
00498             if (IsRetEoln){Sym=syEoln;} else {GetSym(Expect);}
00499             CmtStr=_CmtStr;
00500           } else {
00501             Sym=syHash; GetCh();
00502           }
00503           break;
00504         case '(': Sym=syLParen; GetCh(); break;
00505         case ')': Sym=syRParen; GetCh(); break;
00506         case '[': Sym=syLBracket; GetCh(); break;
00507         case ']': Sym=syRBracket; GetCh(); break;
00508         case '{': Sym=syLBrace; GetCh(); break;
00509         case '}': Sym=syRBrace; GetCh(); break;
00510         default: Sym=syUndef; GetCh(); break;
00511       }
00512     }
00513   }
00514 
00515   if ((!Expect.In(Sym))&&(!Expect.Empty())){
00516     if (IsExcept){
00517      TStr MsgStr=
00518       TStr("Unexpected symbol (")+GetSymStr()+") ["+GetFPosStr()+"]";
00519      throw PExcept(new TExcept(MsgStr));
00520     } else {
00521       Fail;
00522     }
00523   }
00524   return Sym;
00525 }
00526 
00527 TStr TILx::GetStrToCh(const char& ToCh){
00528   Sym=syStr; Str.Clr(); UcStr.Clr();
00529   while ((Ch!=ToCh)&&(Ch!=TCh::EofCh)){
00530     Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh();}
00531   return Str;
00532 }
00533 
00534 TStr TILx::GetStrToEolnOrCh(const char& ToCh){
00535   Sym=syStr; Str.Clr(); UcStr.Clr();
00536   while ((Ch!=ToCh)&&(Ch!=TCh::CrCh)&&(Ch!=TCh::LfCh)&&(Ch!=TCh::EofCh)){
00537     Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh();}
00538   return Str;
00539 }
00540 
00541 TStr TILx::GetStrToEoln(const bool& DoTrunc){
00542   Sym=syStr; Str.Clr(); UcStr.Clr();
00543   while ((Ch!=TCh::CrCh)&&(Ch!=TCh::LfCh)&&(Ch!=TCh::EofCh)){
00544     Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh();}
00545   if (DoTrunc){Str.ToTrunc(); UcStr.ToTrunc();}
00546   return Str;
00547 }
00548 
00549 TStr TILx::GetStrToEolnAndCh(const char& ToCh){
00550   Sym=syStr; Str.Clr(); UcStr.Clr();
00551   if (IsBof()){GetCh();}
00552   forever {
00553     if (Ch==TCh::EofCh){break;}
00554     if (((ChN==0)||(PrevCh==TCh::CrCh)||(PrevCh==TCh::LfCh))&&(Ch==ToCh)){
00555       GetCh(); break;}
00556     else {Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh();}
00557   }
00558   return Str;
00559 }
00560 
00561 void TILx::SkipToEoln(){
00562   while ((Ch!=TCh::CrCh)&&(Ch!=TCh::LfCh)&&(Ch!=TCh::EofCh)){
00563     GetCh();}
00564   if (Ch==TCh::CrCh){if (GetCh()==TCh::LfCh){GetCh();}} else
00565   if (Ch==TCh::LfCh){if (GetCh()==TCh::CrCh){GetCh();}}
00566 }
00567 
00568 TLxSym TILx::PeekSym(const int& Syms){
00569   TILxSymSt CurSymSt(*this);
00570   TSStack<TILxSymSt> SymStStack;
00571   for (int SymN=0; SymN<Syms; SymN++){
00572     GetSym(); SymStStack.Push(TILxSymSt(*this));}
00573   TLxSym PeekedSym=Sym;
00574   while (!SymStStack.Empty()){
00575     SymStStack.Top().Restore(*this); SymStStack.Pop();
00576     PutSym();
00577   }
00578   CurSymSt.Restore(*this);
00579   return PeekedSym;
00580 }
00581 
00582 TStr TILx::GetSymStr() const {
00583   switch (Sym){
00584     case syInt: return Str;
00585     case syFlt: return Str;
00586     case syStr: return Str;
00587     case syIdStr: return Str;
00588     case syQStr: return Str;
00589     default:
00590       if ((syMnRw<=Sym)&&(Sym<=syMxRw)){return Str;}
00591       else {return TLxSymStr::GetSymStr(Sym);}
00592   }
00593 }
00594 
00595 TStr TILx::GetFPosStr() const {
00596   TChA ChA;
00597   ChA+="File:"; ChA+=SIn->GetSNm();
00598   ChA+=" Line:"; ChA+=TInt::GetStr(LnN+1);
00599   ChA+=" Char:"; ChA+=TInt::GetStr(LnChN);
00600   return ChA;
00601 }
00602 
00603 TStr TILx::GetQStr(const TStr& Str, const bool& QuoteP, const char& QuoteCh){
00604   if (QuoteP){
00605     TChA ChA;
00606     ChA+=QuoteCh;
00607     int StrLen=Str.Len();
00608     for (int ChN=0; ChN<StrLen; ChN++){
00609       char Ch=Str.CStr()[ChN];
00610       if (Ch==QuoteCh){ChA+=QuoteCh; ChA+=QuoteCh;}
00611       else {ChA+=Ch;}
00612     }
00613     ChA+=QuoteCh;
00614     return ChA;
00615   } else {
00616     return Str;
00617   }
00618 }
00619 
00620 void TILx::GetVarBoolV(const TStr& VarNm, TBoolV& BoolV, const bool& NewLn){
00621   BoolV.Clr();
00622   GetVar(VarNm, true, NewLn);
00623   while (GetSym(syRBracket, syBool)==syQStr){
00624     BoolV.Add(Bool); if (NewLn){GetEoln();}}
00625   if (NewLn){GetEoln();}
00626 }
00627 
00628 void TILx::GetVarIntV(const TStr& VarNm, TIntV& IntV, const bool& NewLn){
00629   IntV.Clr();
00630   GetVar(VarNm, true, NewLn);
00631   while (GetSym(syRBracket, syInt)==syInt){
00632     IntV.Add(Int); if (NewLn){GetEoln();}}
00633   if (NewLn){GetEoln();}
00634 }
00635 
00636 void TILx::GetVarFltV(const TStr& VarNm, TFltV& FltV, const bool& NewLn){
00637   FltV.Clr();
00638   GetVar(VarNm, true, NewLn);
00639   while (GetSym(syRBracket, syFlt)==syFlt){
00640     FltV.Add(Flt); if (NewLn){GetEoln();}}
00641   if (NewLn){GetEoln();}
00642 }
00643 
00644 void TILx::GetVarStrV(const TStr& VarNm, TStrV& StrV, const bool& NewLn){
00645   StrV.Clr();
00646   GetVar(VarNm, true, NewLn);
00647   while (GetSym(syRBracket, syQStr)==syQStr){
00648     StrV.Add(Str); if (NewLn){GetEoln();}}
00649   if (NewLn){GetEoln();}
00650 }
00651 
00652 void TILx::GetVarStrPrV(const TStr& VarNm, TStrPrV& StrPrV, const bool& NewLn){
00653   StrPrV.Clr();
00654   GetVar(VarNm, true, NewLn);
00655   while (GetSym(syRBracket, syLBracket)==syLBracket){
00656     TStr Str1=GetQStr(); TStr Str2=GetQStr();
00657     GetSym(syRBracket);
00658     StrPrV.Add(TStrPr(Str1, Str2)); if (NewLn){GetEoln();}
00659   }
00660   if (NewLn){GetEoln();}
00661 }
00662 
00663 void TILx::GetVarStrVV(const TStr& VarNm, TVec<TStrV>& StrVV, const bool& NewLn){
00664   StrVV.Clr();
00665   GetVar(VarNm, true, NewLn);
00666   while (GetSym(syRBracket, syLBracket)==syLBracket){
00667     StrVV.Add();
00668     while (GetSym(syQStr, syRBracket)==syQStr){
00669       StrVV.Last().Add(Str);}
00670     if (NewLn){GetEoln();}
00671   }
00672   if (NewLn){GetEoln();}
00673 }
00674 
00675 void TILx::GetLnV(const TStr& FNm, TStrV& LnV){
00676   TFIn SIn(FNm); LnV.Clr(); TChA Ln;
00677   if (!SIn.Eof()){
00678     char Ch=SIn.GetCh();
00679     while (!SIn.Eof()){
00680       if ((Ch==TCh::CrCh)||(Ch==TCh::LfCh)){
00681         if (!SIn.Eof()){
00682           char PrevCh=Ch; Ch=SIn.GetCh();
00683           if (!SIn.Eof()){
00684             if (PrevCh==TCh::CrCh){if (Ch==TCh::LfCh){Ch=SIn.GetCh();}} else
00685             if (PrevCh==TCh::LfCh){if (Ch==TCh::CrCh){Ch=SIn.GetCh();}}
00686           }
00687         }
00688         LnV.Add(Ln); Ln.Clr();
00689       } else {
00690         Ln+=Ch; Ch=SIn.GetCh();
00691       }
00692     }
00693     if (!Ln.Empty()){
00694       LnV.Add(Ln);}
00695   }
00696 }
00697 
00699 // Lexical-Output
00700 void TOLx::PutSep(const TLxSym& Sym){
00701   if (TLxSymStr::IsSep(PrevSym, Sym)){
00702     if (IsTabSep){RSOut.PutCh(TCh::TabCh);} else {RSOut.PutCh(' ');}}
00703   PrevSym=Sym;
00704 }
00705 
00706 TOLx::TOLx(const PSOut& _SOut, const TFSet& OptSet, const TLxChDefTy& ChDefTy):
00707   ChDef(TLxChDef::GetChDef(ChDefTy)), SOut(_SOut), RSOut(*SOut),
00708   IsCmtAlw(false), IsFrcEoln(false), IsSigNum(false),
00709   IsUniStr(false), IsCsSens(false), IsTabSep(false), IsVarIndent(false),
00710   VarIndentLev(0),
00711   RwStrH(50), RwSymH(50), PrevSym(syUndef){
00712   for (int Opt=0; Opt<oloMx; Opt++){
00713     if (OptSet.In(Opt)){SetOpt(Opt, true);}}
00714 }
00715 
00716 void TOLx::SetOpt(const int& Opt, const bool& Val){
00717   switch (Opt){
00718     case oloCmtAlw: IsCmtAlw=Val; break;
00719     case oloFrcEoln: IsFrcEoln=Val; break;
00720     case oloSigNum: IsSigNum=Val; break;
00721     case oloUniStr: IsUniStr=Val; break;
00722     case oloCsSens: IsCsSens=Val; break;
00723     case oloTabSep: IsTabSep=Val; break;
00724     case oloVarIndent: IsVarIndent=Val; break;
00725     default: Fail;
00726   }
00727 }
00728 
00729 TLxSym TOLx::AddRw(const TStr& Str){
00730   IAssert(RwStrH.Len()<syMxRw-syMnRw+1);
00731   TStr UcStr=ChDef->GetUcStr(Str);
00732   IAssert(!RwStrH.IsKey(UcStr));
00733   TLxSym RwSym=TLxSym(syMnRw+RwStrH.Len());
00734   RwStrH.AddDat(Str, TInt(int(RwSym)));
00735   RwSymH.AddDat(TInt(int(RwSym)), Str);
00736   return RwSym;
00737 }
00738 
00739 void TOLx::PutSym(const TLxSym& Sym){
00740   TStr Str;
00741   if ((syMnRw<=Sym)&&(Sym<=syMxRw)){
00742     Str=Str=RwSymH[Sym];
00743   } else {
00744     Str=TLxSymStr::GetSymStr(Sym);
00745   }
00746   PutSep(Sym); RSOut.PutStr(Str);
00747 }
00748 
00749 void TOLx::PutVarBoolV(const TStr& VarNm, const TBoolV& BoolV,
00750  const bool& NewLn, const bool& CheckIdStr){
00751   PutVar(VarNm, true, NewLn, CheckIdStr);
00752   for (int BoolN=0; BoolN<BoolV.Len(); BoolN++){
00753     if (IsVarIndent){PutIndent(VarIndentLev);}
00754     PutBool(BoolV[BoolN]);
00755     if (NewLn){PutLn();}
00756   }
00757   PutVarEnd(true, NewLn);
00758 }
00759 
00760 void TOLx::PutVarIntV(const TStr& VarNm, const TIntV& IntV,
00761  const bool& NewLn, const bool& CheckIdStr){
00762   PutVar(VarNm, true, NewLn, CheckIdStr);
00763   for (int IntN=0; IntN<IntV.Len(); IntN++){
00764     if (IsVarIndent){PutIndent(VarIndentLev);}
00765     PutInt(IntV[IntN]);
00766     if (NewLn){PutLn();}
00767   }
00768   PutVarEnd(true, NewLn);
00769 }
00770 
00771 void TOLx::PutVarFltV(const TStr& VarNm, const TFltV& FltV,
00772  const bool& NewLn, const bool& CheckIdStr){
00773   PutVar(VarNm, true, NewLn, CheckIdStr);
00774   for (int FltN=0; FltN<FltV.Len(); FltN++){
00775     if (IsVarIndent){PutIndent(VarIndentLev);}
00776     PutFlt(FltV[FltN]);
00777     if (NewLn){PutLn();}
00778   }
00779   PutVarEnd(true, NewLn);
00780 }
00781 
00782 void TOLx::PutVarStrV(const TStr& VarNm, const TStrV& StrV,
00783  const bool& NewLn, const bool& CheckIdStr){
00784   PutVar(VarNm, true, NewLn, CheckIdStr);
00785   for (int StrN=0; StrN<StrV.Len(); StrN++){
00786     if (IsVarIndent){PutIndent(VarIndentLev);}
00787     PutQStr(StrV[StrN]);
00788     if (NewLn){PutLn();}
00789   }
00790   PutVarEnd(true, NewLn);
00791 }
00792 
00793 void TOLx::PutVarStrPrV(const TStr& VarNm, const TStrPrV& StrPrV,
00794  const bool& NewLn, const bool& CheckIdStr){
00795   PutVar(VarNm, true, NewLn, CheckIdStr);
00796   for (int StrPrN=0; StrPrN<StrPrV.Len(); StrPrN++){
00797     if (IsVarIndent){PutIndent(VarIndentLev);}
00798     PutSym(syLBracket);
00799     PutQStr(StrPrV[StrPrN].Val1); PutQStr(StrPrV[StrPrN].Val2);
00800     PutSym(syRBracket);
00801     if (NewLn){PutLn();}
00802   }
00803   PutVarEnd(true, NewLn);
00804 }
00805 
00806 void TOLx::PutVarStrVV(const TStr& VarNm, const TVec<TStrV>& StrVV,
00807  const bool& NewLn, const bool& CheckIdStr){
00808   PutVar(VarNm, true, NewLn, CheckIdStr);
00809   for (int StrVN=0; StrVN<StrVV.Len(); StrVN++){
00810     if (IsVarIndent){PutIndent(VarIndentLev);}
00811     PutSym(syLBracket);
00812     for (int StrN=0; StrN<StrVV[StrVN].Len(); StrN++){
00813       PutQStr(StrVV[StrVN][StrN]);}
00814     PutSym(syRBracket);
00815     if (NewLn){PutLn();}
00816   }
00817   PutVarEnd(true, NewLn);
00818 }
00819 
00821 // Preprocessor
00822 char TPreproc::GetCh(){
00823   Assert(Ch!=TCh::EofCh);
00824   PrevCh=Ch;
00825   Ch=((SIn->Eof()) ? TCh::EofCh : SIn->GetCh());
00826   //putchar(Ch);
00827   return Ch;
00828 }
00829 
00830 bool TPreproc::IsSubstId(const TStr& SubstId, TStr& SubstValStr) const {
00831   if (SubstIdToKeyIdValPrVH.IsKey(SubstId)){
00832     const TStrPrV& KeyIdValPrV=SubstIdToKeyIdValPrVH.GetDat(SubstId);
00833     for (int KeyN=0; KeyN<KeyIdValPrV.Len(); KeyN++){
00834       if (SubstKeyIdV.IsIn(KeyIdValPrV[KeyN].Val1)){
00835         SubstValStr=KeyIdValPrV[KeyN].Val2;
00836         return true;
00837       }
00838     }
00839     return false;
00840   } else {
00841     return false;
00842   }
00843 }
00844 
00845 TPreproc::TPreproc(const TStr& InFNm, const TStr& OutFNm,
00846  const TStr& SubstFNm, const TStrV& _SubstKeyIdV):
00847   SIn(), SubstKeyIdV(_SubstKeyIdV),
00848   PrevCh('\0'), Ch('\0'){
00849   // load substitution file
00850   if (!SubstFNm.Empty()){
00851     PXmlDoc XmlDoc=TXmlDoc::LoadTxt(SubstFNm);
00852     // get list of substitutions
00853     TXmlTokV SubstTokV; XmlDoc->GetTok()->GetTagTokV("Subst", SubstTokV);
00854     for (int SubstTokN=0; SubstTokN<SubstTokV.Len(); SubstTokN++){
00855       PXmlTok SubstTok=SubstTokV[SubstTokN];
00856       // get substitution-id
00857       TStr SubstId=SubstTok->GetArgVal("Id", "");
00858       if (!SubstId.Empty()){
00859         // create substitution
00860         TStrPrV& KeyIdValPrV=SubstIdToKeyIdValPrVH.AddDat(SubstId);
00861         // get list of substitution-strings
00862         TXmlTokV StrTokV; SubstTok->GetTagTokV("Str", StrTokV);
00863         for (int StrTokN=0; StrTokN<StrTokV.Len(); StrTokN++){
00864           PXmlTok StrTok=StrTokV[StrTokN];
00865           // get key-value pair
00866           TStr KeyId=StrTok->GetArgVal("Key", "");
00867           TStr ValStr=StrTok->GetTokStr(false);
00868           // assign key-value-pair
00869           if (!KeyId.Empty()){
00870             KeyIdValPrV.Add(TStrPr(KeyId, ValStr));
00871           }
00872         }
00873       }
00874     }
00875   }
00876   // substitution
00877   // open files
00878   SIn=TFIn::New(InFNm);
00879   PSOut SOut=TFOut::New(OutFNm);
00880   // set copy & ignore mode
00881   bool CopyModeP=false; bool IgnoreModeP=false;
00882   GetCh();
00883   while (Ch!=TCh::EofCh){
00884     if (isalpha(Ch)||(((PrevCh=='\0')||(PrevCh=='\r')||(PrevCh=='\n'))&&(Ch=='#'))){
00885       // collect identifier
00886       TChA IdChA;
00887       do {
00888         IdChA+=Ch; GetCh();
00889       } while ((Ch!=TCh::EofCh)&&(isalnum(Ch)));
00890       // check identifier
00891       if (IdChA=="#ifdef"){
00892         // collect condition-key-id
00893         TChA CondKeyIdChA;
00894         while ((Ch!=TCh::EofCh)&&(Ch!='\n')&&(Ch!='\r')){
00895           CondKeyIdChA+=Ch; GetCh();}
00896         // skip eoln
00897         if (Ch=='\n'){GetCh(); if (Ch=='\r'){GetCh();}}
00898         else if (Ch=='\r'){GetCh(); if (Ch=='\n'){GetCh();}}
00899         // check for key
00900         CondKeyIdChA.Trunc();
00901         IAssert(CopyModeP==false);
00902         IAssert(IgnoreModeP==false);
00903         if (SubstKeyIdV.IsIn(CondKeyIdChA)){
00904           CopyModeP=true; IgnoreModeP=false;
00905         } else {
00906           CopyModeP=false; IgnoreModeP=true;
00907         }
00908       } else
00909       if (IdChA=="#endif"){
00910         // move to eoln
00911         while ((Ch!=TCh::EofCh)&&(Ch!='\n')&&(Ch!='\r')){
00912           GetCh();}
00913         // skip eoln
00914         if (Ch=='\n'){GetCh(); if (Ch=='\r'){GetCh();}}
00915         else if (Ch=='\r'){GetCh(); if (Ch=='\n'){GetCh();}}
00916         // reset copy&ignore modes
00917         IAssert(CopyModeP||IgnoreModeP);
00918         CopyModeP=false; IgnoreModeP=false;
00919       } else {
00920         // substitution or add id-as-seen
00921         TStr SubstValStr;
00922         if ((!CopyModeP)&&(IsSubstId(IdChA, SubstValStr))){
00923           if (!IgnoreModeP){SOut->PutStr(SubstValStr);}
00924         } else {
00925           if (!IgnoreModeP){SOut->PutStr(IdChA);}
00926         }
00927       }
00928     } else {
00929       // single character
00930       if (!IgnoreModeP){SOut->PutCh(Ch);}
00931       GetCh();
00932     }
00933   }
00934 }
00935