SNAP Library, Developer Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
xml.cpp
Go to the documentation of this file.
00001 
00002 // Xml-Object-Saving
00003 TStrStrH TXmlObjSer::TypeNmToTagNmH;
00004 
00005 TStr TXmlObjSer::GetTagNm(const TStr& TypeNm){
00006   TStr& XmlTagNm=TypeNmToTagNmH.AddDat(TypeNm);
00007   if (XmlTagNm.Empty()){
00008     TChA XmlTagChA=TypeNm;
00009     for (int ChN=0; ChN<XmlTagChA.Len(); ChN++){
00010       char Ch=XmlTagChA[ChN];
00011       if (!((('A'<=Ch)&&(Ch<='Z'))||(('a'<=Ch)&&(Ch<='z'))||(('0'<=Ch)&&(Ch<='9')))){
00012         XmlTagChA.PutCh(ChN, '_');
00013       }
00014     }
00015     while ((XmlTagChA.Len()>0)&&(XmlTagChA.LastCh()=='_')){
00016       XmlTagChA.Pop();}
00017     XmlTagNm=XmlTagChA;
00018   }
00019   return XmlTagNm;
00020 }
00021 
00022 void TXmlObjSer::AssertXmlHd(
00023  const PXmlTok& XmlTok, const TStr& Nm, const TStr& TypeNm){
00024   // check if the token is full
00025   EAssertR(!XmlTok.Empty(), "Xml-Token Empty");
00026   // if name is empty then tag=type else tag=name
00027   if (!Nm.Empty()){
00028     // check if the token is tag
00029     if (!XmlTok->IsTag()){
00030       TStr ArgStr1="Expected: Tag";
00031       TStr ArgStr2=TStr("Found: ")+XmlTok->GetSymStr();
00032       TExcept::Throw("Invalid Xml-Token", ArgStr1, ArgStr2);
00033     }
00034     if (Nm!="-"){
00035       // check if the tag is correct
00036       if (!XmlTok->IsTag(Nm)){
00037         TStr ArgStr1=TStr("Expected: ")+Nm;
00038         TStr ArgStr2=TStr("Found: ")+XmlTok->GetStr();
00039         TExcept::Throw("Invalid Xml-Tag", ArgStr1, ArgStr2);
00040       }
00041       // check if the type is correct
00042       TStr TypeArgVal=XmlTok->GetStrArgVal("Type");
00043       if (TypeArgVal!=TypeNm){
00044         TStr ArgStr1=TStr("Expected: ")+TypeNm;
00045         TStr ArgStr2=TStr("Found: ")+TypeArgVal;
00046         TExcept::Throw("Invalid Xml-Type", ArgStr1, ArgStr2);
00047       }
00048     }
00049   } else {
00050     // check if the tag is correct
00051     if (!XmlTok->IsTag(TypeNm)){
00052       TStr ArgStr1=TStr("Expected: ")+TypeNm;
00053       TStr ArgStr2=TStr("Found: ")+XmlTok->GetSymStr();
00054       TExcept::Throw("Invalid Xml-Type-Tag", ArgStr1, ArgStr2);
00055     }
00056   }
00057 }
00058 
00059 bool TXmlObjSer::GetBoolArg(const PXmlTok& XmlTok, const TStr& Nm){
00060   TStr ValStr;
00061   if (XmlTok->IsArg(Nm, ValStr)){
00062     bool Val;
00063     if (ValStr.IsBool(Val)){
00064       return Val;
00065     } else {
00066       TExcept::Throw("Invalid Xml-Argument Boolean-Value", Nm, ValStr);
00067     }
00068   } else {
00069     TExcept::Throw("Xml-Argument Missing", Nm);
00070   }
00071   Fail; return 0;
00072 }
00073 
00074 int TXmlObjSer::GetIntArg(const PXmlTok& XmlTok, const TStr& Nm){
00075   TStr ValStr;
00076   if (XmlTok->IsArg(Nm, ValStr)){
00077     int Val;
00078     if (ValStr.IsInt(Val)){
00079       return Val;
00080     } else {
00081       TExcept::Throw("Invalid Xml-Argument Integer-Value", Nm, ValStr);
00082     }
00083   } else {
00084     TExcept::Throw("Xml-Argument Missing", Nm);
00085   }
00086   Fail; return 0;
00087 }
00088 
00089 int64 TXmlObjSer::GetInt64Arg(const PXmlTok& XmlTok, const TStr& Nm){
00090   TStr ValStr;
00091   if (XmlTok->IsArg(Nm, ValStr)){
00092     int64 Val;
00093     if (ValStr.IsInt64(Val)){
00094       return Val;
00095     } else {
00096       TExcept::Throw("Invalid Xml-Argument Integer64-Value", Nm, ValStr);
00097     }
00098   } else {
00099     TExcept::Throw("Xml-Argument Missing", Nm);
00100   }
00101   Fail; return 0;
00102 }
00103 
00104 double TXmlObjSer::GetFltArg(const PXmlTok& XmlTok, const TStr& Nm){
00105   TStr ValStr;
00106   if (XmlTok->IsArg(Nm, ValStr)){
00107     double Val;
00108     if (ValStr.IsFlt(Val)){
00109       return Val;
00110     } else {
00111       TExcept::Throw("Invalid Xml-Argument Double-Value", Nm, ValStr);
00112     }
00113   } else {
00114     TExcept::Throw("Xml-Argument Missing", Nm);
00115   }
00116   Fail; return 0;
00117 }
00118 
00120 // Xml-Object-Serialization-Tag-Name
00121 TXmlObjSerTagNm::TXmlObjSerTagNm(
00122  TSOut& _SOut, const bool& ETagP,
00123  const TStr& Nm, const TStr& TypeNm,
00124  const TStr& ArgNm, const TStr& ArgVal):
00125   TagNm(), SOut(&_SOut){
00126   if (Nm!="-"){
00127     SOut->PutCh('<');
00128     if (Nm.Empty()){
00129       SOut->PutStr(TagNm=TypeNm);
00130     } else {
00131       SOut->PutStr(TagNm=Nm);
00132       SOut->PutStr(" Type=\""); SOut->PutStr(TypeNm); SOut->PutCh('"');
00133     }
00134     if (!ArgNm.Empty()){
00135       SOut->PutCh(' '); SOut->PutStr(ArgNm); SOut->PutCh('=');
00136       SOut->PutCh('"'); SOut->PutStr(ArgVal); SOut->PutCh('"');
00137     }
00138     if (ETagP){
00139       SOut->PutCh('/'); TagNm="";}
00140     SOut->PutCh('>');
00141   }
00142 }
00143 
00144 TXmlObjSerTagNm::TXmlObjSerTagNm(
00145  TSOut& _SOut, const bool& ETagP,
00146  const TStr& Nm, const TStr& TypeNm,
00147  const TStr& ArgNm1, const TStr& ArgVal1,
00148  const TStr& ArgNm2, const TStr& ArgVal2,
00149  const TStr& ArgNm3, const TStr& ArgVal3,
00150  const TStr& ArgNm4, const TStr& ArgVal4):
00151   TagNm(), SOut(&_SOut){
00152   if (Nm!="-"){
00153     SOut->PutCh('<');
00154     if (Nm.Empty()){
00155       SOut->PutStr(TagNm=TypeNm);
00156     } else {
00157       SOut->PutStr(TagNm=Nm);
00158       SOut->PutStr(" Type=\""); SOut->PutStr(TypeNm); SOut->PutCh('"');
00159     }
00160     if (!ArgNm1.Empty()){
00161       SOut->PutCh(' '); SOut->PutStr(ArgNm1); SOut->PutCh('=');
00162       SOut->PutCh('"'); SOut->PutStr(ArgVal1); SOut->PutCh('"');
00163     }
00164     if (!ArgNm2.Empty()){
00165       SOut->PutCh(' '); SOut->PutStr(ArgNm2); SOut->PutCh('=');
00166       SOut->PutCh('"'); SOut->PutStr(ArgVal2); SOut->PutCh('"');
00167     }
00168     if (!ArgNm3.Empty()){
00169       SOut->PutCh(' '); SOut->PutStr(ArgNm3); SOut->PutCh('=');
00170       SOut->PutCh('"'); SOut->PutStr(ArgVal3); SOut->PutCh('"');
00171     }
00172     if (!ArgNm4.Empty()){
00173       SOut->PutCh(' '); SOut->PutStr(ArgNm4); SOut->PutCh('=');
00174       SOut->PutCh('"'); SOut->PutStr(ArgVal4); SOut->PutCh('"');
00175     }
00176     if (ETagP){
00177       SOut->PutCh('/'); TagNm="";}
00178     SOut->PutCh('>');
00179   }
00180 }
00181 
00182 TXmlObjSerTagNm::~TXmlObjSerTagNm(){
00183   if (!TagNm.Empty()){
00184     SOut->PutCh('<'); SOut->PutCh('/'); SOut->PutStr(TagNm); SOut->PutCh('>');
00185   }
00186 }
00187 
00189 // Xml-Chars
00190 void TXmlChDef::SetChTy(TBSet& ChSet, const int& MnCh, const int& MxCh){
00191   IAssert((0<=MnCh)&&((MxCh==-1)||((MnCh<=MxCh)&&(MxCh<Chs))));
00192   ChSet.Incl(MnCh);
00193   for (int Ch=MnCh+1; Ch<=MxCh; Ch++){
00194     ChSet.Incl(Ch);}
00195 }
00196 
00197 void TXmlChDef::SetChTy(TBSet& ChSet, const TStr& Str){
00198   for (int ChN=0; ChN<Str.Len(); ChN++){
00199     uchar Ch=Str[ChN];
00200     ChSet.Incl(Ch);
00201   }
00202 }
00203 
00204 void TXmlChDef::SetEntityVal(const TStr& Nm, const TStr& Val){
00205   EntityNmToValH.AddDat(Nm, Val);
00206 }
00207 
00208 TXmlChDef::TXmlChDef():
00209   Chs(TUCh::Vals),
00210   CharChSet(), CombChSet(), ExtChSet(),
00211   LetterChSet(), DigitChSet(), NameChSet(), PubidChSet(),
00212   EntityNmToValH(100){
00213 
00214   // Character-Sets
00215   // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | ...
00216   CharChSet.Gen(Chs);
00217   // ... because of DMoz (temporary patch)
00218   SetChTy(CharChSet, 0x1); SetChTy(CharChSet, 0x3); SetChTy(CharChSet, 0x6);
00219   SetChTy(CharChSet, 11); SetChTy(CharChSet, 24); SetChTy(CharChSet, 27);
00220   // regular characters
00221   SetChTy(CharChSet, 0x9); SetChTy(CharChSet, 0xA); SetChTy(CharChSet, 0xD);
00222   SetChTy(CharChSet, 0x20, TUCh::Mx);
00223   // BaseChar ::=  [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] |
00224   //  [#x00D8-#x00F6] | [#x00F8-#x00FF] | ...
00225   TBSet BaseChSet(Chs);
00226   SetChTy(BaseChSet, 0x41, 0x5A); SetChTy(BaseChSet, 0x61, 0x7A);
00227   SetChTy(BaseChSet, 0xC0, 0xD6); SetChTy(BaseChSet, 0xD8, 0xF6);
00228   SetChTy(BaseChSet, 0xF8, 0xFF);
00229   // Ideographic ::= ...
00230   TBSet IdeoChSet(Chs);
00231   // CombiningChar ::= ...
00232   CombChSet.Gen(Chs);
00233   // Extender ::=  #x00B7 | ...
00234   ExtChSet.Gen(Chs);
00235   SetChTy(ExtChSet, 0xB7);
00236   // Letter ::=  BaseChar | Ideographic
00237   LetterChSet=BaseChSet|IdeoChSet;
00238   // Digit ::=  [#x0030-#x0039] | ...
00239   DigitChSet.Gen(Chs);
00240   SetChTy(DigitChSet, 0x30, 0x39);
00241   // NameChar ::=  Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar
00242   NameChSet=LetterChSet|DigitChSet|
00243    uchar('.')|uchar('-')|uchar('_')|uchar(':')|CombChSet;
00244   // PubidChar ::=  #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
00245   PubidChSet.Gen(Chs);
00246   SetChTy(PubidChSet, 0x20); SetChTy(PubidChSet, 0xD); SetChTy(PubidChSet, 0xA);
00247   SetChTy(PubidChSet, 'a', 'z'); SetChTy(PubidChSet, 'A', 'Z');
00248   SetChTy(PubidChSet, '0', '9'); SetChTy(PubidChSet, "-'()+,./:=?;!*#@$_%");
00249 
00250   // Standard-Entity-Sequences
00251   SetEntityVal("amp", "&");
00252   SetEntityVal("lt", "<"); SetEntityVal("gt", ">");
00253   SetEntityVal("apos", "'"); SetEntityVal("quot", "\"");
00254 }
00255 
00257 // Xml-Lexical
00258 TXmlChDef TXmlLx::ChDef;
00259 
00260 uchar TXmlLx::GetCh(){
00261   EAssert(Ch!=TCh::EofCh);
00262   PrevCh=Ch;
00263   if (ChStack.Empty()){Ch=(RSIn.Eof()) ? TCh::EofCh : RSIn.GetCh();}
00264   else {Ch=ChStack.Pop();}
00265   ChN++; if (Ch==TCh::LfCh){LnN++; LnChN=0;} else {LnChN++;}
00266   //putchar(Ch);
00267   return Ch;
00268 }
00269 
00270 void TXmlLx::ToNrSpacing(){
00271   if (Spacing==xspIntact){
00272   } else
00273   if (Spacing==xspPreserve){
00274     int SrcChN=0; int DstChN=0;
00275     while (SrcChN<TxtChA.Len()){
00276       if (TxtChA[SrcChN]==TCh::CrCh){
00277         TxtChA.PutCh(DstChN, TCh::LfCh); SrcChN++; DstChN++;
00278         if ((SrcChN<TxtChA.Len())&&(TxtChA[SrcChN]==TCh::LfCh)){SrcChN++;}
00279       } else {
00280         if (SrcChN!=DstChN){
00281           TxtChA.PutCh(DstChN, TxtChA[SrcChN]);}
00282         SrcChN++; DstChN++;
00283       }
00284     }
00285     TxtChA.Trunc(DstChN);
00286   } else
00287   if (Spacing==xspSeparate){
00288     // squeeze series of white-spaces to single space
00289     int SrcChN=0; int DstChN=0;
00290     while (SrcChN<TxtChA.Len()){
00291       if (ChDef.IsWs(TxtChA[SrcChN])){
00292         if ((DstChN>0)&&(TxtChA[DstChN-1]==' ')){
00293           SrcChN++;
00294         } else {
00295           TxtChA.PutCh(DstChN, ' ');
00296           SrcChN++; DstChN++;
00297         }
00298       } else {
00299         TxtChA.PutCh(DstChN, TxtChA[SrcChN]);
00300         SrcChN++; DstChN++;
00301       }
00302     }
00303     TxtChA.Trunc(DstChN);
00304   } else
00305   if (Spacing==xspTruncate){
00306     // cut leading and trailing white-spaces and
00307     // squeeze series of white-spaces to single space
00308     int SrcChN=0; int DstChN=0;
00309     while (SrcChN<TxtChA.Len()){
00310       if (ChDef.IsWs(TxtChA[SrcChN])){
00311         if ((DstChN>0)&&(TxtChA[DstChN-1]==' ')){
00312           SrcChN++;
00313         } else {
00314           TxtChA.PutCh(DstChN, ' ');
00315           SrcChN++; DstChN++;
00316         }
00317       } else {
00318         TxtChA.PutCh(DstChN, TxtChA[SrcChN]);
00319         SrcChN++; DstChN++;
00320       }
00321     }
00322     TxtChA.Trunc(DstChN);
00323     // delete trailing white-spaces
00324     while ((TxtChA.Len()>0)&&(ChDef.IsWs(TxtChA.LastCh()))){
00325       TxtChA.Pop();}
00326   } else {
00327     Fail;
00328   }
00329 }
00330 
00331 void TXmlLx::GetWs(const bool& IsRq){
00332   // [3] S ::=  (#x20 | #x9 | #xD | #xA)+
00333   int WSpaces=0; TxtChA.Clr();
00334   while (ChDef.IsWs(Ch)){
00335     WSpaces++; TxtChA+=Ch; GetCh();}
00336   if (IsRq&&(WSpaces==0)){
00337     EThrow("White-space required.");}
00338 }
00339 
00340 TStr TXmlLx::GetReference(){
00341   // [67] Reference ::=  EntityRef | CharRef
00342   if (Ch=='#'){
00343     // [66]  CharRef ::=  '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
00344     TChA RefChA; int RefCd=0;
00345     if (GetCh()=='x'){
00346       // hex-decimal character code
00347       forever {
00348         GetCh();
00349         if (TCh::IsHex(Ch)){
00350           RefChA+=Ch;
00351           RefCd=RefCd*16+TCh::GetHex(Ch);
00352         } else {
00353           break;
00354         }
00355       }
00356     } else {
00357       // decimal character code
00358       forever {
00359         if (TCh::IsNum(Ch)){
00360           RefChA+=Ch;
00361           RefCd=RefCd*10+TCh::GetNum(Ch);
00362         } else {
00363           break;
00364         }
00365         GetCh();
00366       }
00367     }
00368     if ((!RefChA.Empty())&&(Ch==';')){
00369       GetCh();
00370           if (RefCd < 0x100) {
00371                   // 8-bit char
00372               uchar RefCh=uchar(RefCd);
00373                   return TStr(RefCh);
00374           } else {
00375                   TStr ResStr = TUStr::EncodeUtf8(RefCd);
00376                   return ResStr;
00377           }
00378     } else {
00379       EThrow("Invalid Char-Reference."); Fail; return TStr();
00380     }
00381   } else {
00382     // [68]  EntityRef ::=  '&' Name ';'
00383     TStr EntityNm=GetName();
00384     if ((!EntityNm.Empty())&&(Ch==';')){
00385       GetCh();
00386       TStr EntityVal;
00387       if (IsEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/}
00388       else if (ChDef.IsEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/}
00389       else {EThrow(TStr("Entity-Reference (")+EntityNm+") does not exist.");}
00390       return EntityVal;
00391     } else {
00392       EThrow("Invalid Entity-Reference."); Fail; return TStr();
00393     }
00394   }
00395 }
00396 
00397 TStr TXmlLx::GetPEReference(){
00398   // [69]  PEReference ::=  '%' Name ';'
00399   TStr EntityNm=GetName();
00400   if ((EntityNm.Empty())||(Ch!=';')){EThrow("Invalid PEntity-Reference.");}
00401   GetCh();
00402   TStr EntityVal;
00403   if (IsPEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/}
00404   else {EThrow(TStr("PEntity-Reference (")+EntityNm+") does not exist.");}
00405   return EntityVal;
00406 }
00407 
00408 void TXmlLx::GetEq(){
00409   // [25] Eq ::=  S? '=' S?
00410   GetWs(false);
00411   if (Ch=='='){GetCh();}
00412   else {EThrow("Equality ('=') character expected.");}
00413   GetWs(false);
00414 }
00415 
00416 TStr TXmlLx::GetName(){
00417   // [5] Name ::=  (Letter | '_' | ':') (NameChar)*
00418   TChA NmChA;
00419   if (ChDef.IsFirstNameCh(Ch)){
00420     do {NmChA+=Ch;} while (ChDef.IsName(GetCh()));
00421   } else {
00422     EThrow("Invalid first name character.");
00423     // EThrow(TStr::Fmt("Invalid first name character [%u:'%c%c%c%c%c'].",
00424     //  uint(Ch), Ch, RSIn.GetCh(), RSIn.GetCh(), RSIn.GetCh(), RSIn.GetCh()));
00425   }
00426   return NmChA;
00427 }
00428 
00429 TStr TXmlLx::GetName(const TStr& RqNm){
00430   TStr Nm=GetName();
00431   // test if the name is equal to the required name
00432   if (Nm==RqNm){return RqNm;}
00433   else {EThrow(TStr("Name '")+RqNm+"' expected."); Fail; return TStr();}
00434 }
00435 
00436 void TXmlLx::GetComment(){
00437   // [15] Comment ::=  {{'<!-}}-' ((Char - '-') | ('-' (Char - '-')))* '-->'
00438   if (GetCh()!='-'){EThrow("Invalid comment start.");}
00439   TxtChA.Clr();
00440   forever {
00441     GetCh();
00442     if (!ChDef.IsChar(Ch)){EThrow("Invalid comment character.");}
00443     if (Ch=='-'){
00444       if (GetCh()=='-'){
00445         if (GetCh()=='>'){GetCh(); break;} // final bracket
00446         else {EThrow("Invalid comment end.");}
00447       } else {
00448         if (!ChDef.IsChar(Ch)){EThrow("Invalid comment character.");}
00449         TxtChA+='-'; TxtChA+=Ch; // special case if single '-'
00450       }
00451     } else {
00452       TxtChA+=Ch; // usual char
00453     }
00454   }
00455 }
00456 
00457 TStr TXmlLx::GetAttValue(){
00458   // [10]  AttValue ::=  '"' ([^<&"] | Reference)* '"'
00459   //  |  "'" ([^<&'] | Reference)* "'"
00460   uchar QCh=Ch;
00461   if ((QCh!='"')&&(QCh!='\'')){EThrow("Invalid attribute-value start.");}
00462   TChA ValChA; GetCh();
00463   forever {
00464     if ((Ch=='<')||(!ChDef.IsChar(Ch))){
00465       EThrow("Invalid attribute-value character.");}
00466     if (Ch==QCh){GetCh(); break;} // final quote
00467     else if (Ch=='&'){GetCh(); ValChA+=GetReference();} // reference
00468     else {ValChA+=Ch; GetCh();} // usual char
00469   }
00470   return ValChA;
00471 }
00472 
00473 TStr TXmlLx::GetVersionNum(){
00474   // [24] VersionInfo ::=  {{S 'version' Eq}} (' VersionNum ' | " VersionNum ")
00475   // [26] VersionNum ::=  ([a-zA-Z0-9_.:] | '-')+
00476   char QCh=Ch;
00477   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00478   TChA VerNumChA;
00479   GetCh();
00480   do {
00481     if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))||
00482      (('0'<=Ch)&&(Ch<='9'))||(Ch=='_')||(Ch=='.')||(Ch==':')||(Ch=='-')){
00483       VerNumChA+=Ch;
00484     } else {
00485       EThrow("Invalid version-number character.");
00486     }
00487     GetCh();
00488   } while (Ch!=QCh);
00489   GetCh();
00490   return VerNumChA;
00491 }
00492 
00493 TStr TXmlLx::GetEncName(){
00494   // [80] EncodingDecl ::=  {{S 'encoding' Eq}} ('"' EncName '"' |  "'" EncName "'" )
00495   // [81] EncName ::=  [A-Za-z] ([A-Za-z0-9._] | '-')*
00496   char QCh=Ch;
00497   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00498   TChA EncNmChA;
00499   GetCh();
00500   if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))){EncNmChA+=Ch;}
00501   else {EThrow("Invalid encoding-name character.");}
00502   GetCh();
00503   while (Ch!=QCh){
00504     if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))||
00505      (('0'<=Ch)&&(Ch<='9'))||(Ch=='.')||(Ch=='_')||(Ch=='-')){EncNmChA+=Ch;}
00506     else {EThrow("Invalid version-number character.");}
00507     GetCh();
00508   }
00509   GetCh();
00510   return EncNmChA;
00511 }
00512 
00513 TStr TXmlLx::GetStalVal(){
00514   // [32] SDDecl ::=  {{S 'standalone' Eq}}
00515   //  (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
00516   char QCh=Ch;
00517   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00518   TChA StalChA;
00519   GetCh();
00520   while (Ch!=QCh){
00521     if (('a'<=Ch)&&(Ch<='z')){StalChA+=Ch;}
00522     else {EThrow("Invalid standalone-value character.");}
00523     GetCh();
00524   }
00525   GetCh();
00526   TStr StalVal=StalChA;
00527   if ((StalVal=="yes")||(StalVal=="no")){return StalVal;}
00528   else {EThrow("Invalid standalone-value."); Fail; return TStr();}
00529 }
00530 
00531 void TXmlLx::GetXmlDecl(){
00532   // [23] XMLDecl ::=  {{'<?xml'}}... VersionInfo EncodingDecl? SDDecl? S? '?>'
00533   // [24] VersionInfo ::=  S 'version' Eq (' VersionNum ' | " VersionNum ")
00534   GetWs(true);
00535   TStr VerNm=GetName("version"); GetEq(); TStr VerVal=GetVersionNum();
00536   if (VerVal!="1.0"){EThrow("Invalid XML version.");}
00537   AddArg(VerNm, VerVal);
00538   GetWs(false);
00539   if (Ch!='?'){
00540     // EncodingDecl ::=  {{S}} 'encoding' Eq
00541     //  ('"' EncName '"' |  "'" EncName "'" )
00542     TStr EncNm=GetName("encoding"); GetEq(); TStr EncVal=GetEncName();
00543     AddArg(EncNm, EncVal);
00544   }
00545   GetWs(false);
00546   if (Ch!='?'){
00547     // SDDecl ::=  {{S}} 'standalone' Eq
00548     //  (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
00549     TStr StalNm=GetName("standalone"); GetEq(); TStr StalVal=GetStalVal();
00550     AddArg(StalNm, StalVal);
00551   }
00552   GetWs(false);
00553   if (Ch=='?'){
00554     GetCh();
00555     if (Ch=='>'){GetCh();}
00556     else {EThrow("Invalid end-of-tag in XML-declaration.");}
00557   } else {
00558     EThrow("Invalid end-of-tag in XML-declaration.");
00559   }
00560 }
00561 
00562 void TXmlLx::GetPI(){
00563   // [16]  PI ::=  {{'<?' PITarget}} (S (Char* - (Char* '?>' Char*)))? '?>'
00564   // [17]  PITarget ::=  Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
00565   GetWs(false);
00566   TxtChA.Clr();
00567   forever {
00568     if (!ChDef.IsChar(Ch)){EThrow("Invalid PI character.");}
00569     if (Ch=='?'){
00570       if (GetCh()=='>'){
00571         GetCh(); break;
00572       } else {
00573         if (!ChDef.IsChar(Ch)){EThrow("Invalid PI character.");}
00574         TxtChA+='?'; TxtChA+=Ch; // special case if single '?'
00575       }
00576     } else {
00577       TxtChA+=Ch; // usual char
00578     }
00579     GetCh();
00580   }
00581 }
00582 
00583 TStr TXmlLx::GetSystemLiteral(){
00584   // [11]  SystemLiteral ::=  ('"' [^"]* '"') | ("'" [^']* "'")
00585   char QCh=Ch;
00586   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00587   TChA LitChA; GetCh();
00588   while (Ch!=QCh){
00589     if (!ChDef.IsChar(Ch)){EThrow("Invalid System-Literal character.");}
00590     LitChA+=Ch; GetCh();
00591   }
00592   GetCh();
00593   return LitChA;
00594 }
00595 
00596 TStr TXmlLx::GetPubidLiteral(){
00597   // [12]  PubidLiteral ::=  '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
00598   char QCh=Ch;
00599   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00600   TChA LitChA; GetCh();
00601   while (Ch!=QCh){
00602     if (!ChDef.IsPubid(Ch)){EThrow("Invalid Public-Id-Literal character.");}
00603     LitChA+=Ch; GetCh();
00604   }
00605   GetCh();
00606   return LitChA;
00607 }
00608 
00609 void TXmlLx::GetExternalId(){
00610   // ExternalID ::=  'SYSTEM' S SystemLiteral
00611   //  | 'PUBLIC' S PubidLiteral S SystemLiteral
00612   TStr ExtIdNm=GetName();
00613   if (ExtIdNm=="SYSTEM"){
00614     GetWs(true); GetSystemLiteral();
00615   } else if (ExtIdNm=="PUBLIC"){
00616     GetWs(true); GetPubidLiteral(); GetWs(true); GetSystemLiteral();
00617   } else {
00618     EThrow("Invalid external-id ('SYSTEM' or 'PUBLIC' expected).");
00619   }
00620 }
00621 
00622 void TXmlLx::GetNData(){
00623   // [76]  NDataDecl ::=  S 'NDATA' S Name
00624   GetName("NDATA"); GetWs(true); GetName();
00625 }
00626 
00627 void TXmlLx::GetDocTypeDecl(){
00628   // [28] doctypedecl ::=  {{'<!DOCTYPE'}} S Name (S ExternalID)? S?
00629   //  ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
00630   GetWs(true);
00631   TStr DocTypeDeclNm=GetName();
00632   GetWs(false);
00633   if (Ch=='>'){GetCh(); return;}
00634   if (Ch!='['){GetExternalId();}
00635   GetWs(false);
00636   if (Ch=='['){
00637     GetCh();
00638     // [28] (markupdecl | PEReference | S)*
00639     GetWs(false);
00640     while (Ch!=']'){
00641       if (ChDef.IsWs(Ch)){GetWs(true);}
00642       else if (Ch=='%'){GetPEReference();}
00643       else {
00644         GetSym();
00645       }
00646     }
00647     GetCh();
00648   }
00649   GetWs(false);
00650   // '>'
00651   if (Ch=='>'){GetCh();}
00652   else {EThrow("Invalid end-of-tag in document-type-declaration.");}
00653   TagNm=DocTypeDeclNm;
00654 }
00655 
00656 void TXmlLx::GetElement(){
00657   TxtChA.Clr();
00658   while (Ch!='>'){
00659     if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");}
00660     TxtChA+=Ch; GetCh();
00661   }
00662   GetCh();
00663 }
00664 
00665 void TXmlLx::GetAttList(){
00666   TxtChA.Clr();
00667   while (Ch!='>'){
00668     if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");}
00669     TxtChA+=Ch; GetCh();
00670   }
00671   GetCh();
00672 }
00673 
00674 TStr TXmlLx::GetEntityValue(){
00675   // [9]  EntityValue ::=  '"' ([^%&"] | PEReference | Reference)* '"'
00676   //  | "'" ([^%&'] | PEReference | Reference)* "'"
00677   uchar QCh=Ch;
00678   if ((QCh!='"')&&(QCh!='\'')){EThrow("Invalid entity-value start.");}
00679   TChA ValChA; GetCh();
00680   forever {
00681     if (!ChDef.IsChar(Ch)){EThrow("Invalid entity-value character.");}
00682     if (Ch==QCh){GetCh(); break;} // final quote
00683     else if (Ch=='&'){GetCh(); ValChA+=GetReference();} // reference
00684     else if (Ch=='%'){GetCh(); ValChA+=GetPEReference();} // pereference
00685     else {ValChA+=Ch; GetCh();} // usual char
00686   }
00687   return ValChA;
00688 }
00689 
00690 void TXmlLx::GetEntity(){
00691   // [70] EntityDecl ::=  GEDecl | PEDecl
00692   // [71] GEDecl ::=  '<!ENTITY' S Name S EntityDef S? '>'
00693   // [72] PEDecl ::=  '<!ENTITY' S '%' S Name S PEDef S? '>'
00694   GetWs(true); TStr EntityNm;
00695   if (Ch=='%'){
00696     GetCh(); GetWs(true); EntityNm=GetName(); GetWs(true);
00697     // [74] PEDef ::=  EntityValue | ExternalID
00698     if ((Ch=='\"')||(Ch=='\'')){
00699       TStr EntityVal=GetEntityValue();
00700       PutPEntityVal(EntityNm, EntityVal);
00701     } else {
00702       GetExternalId();
00703       GetWs(false);
00704       if (Ch!='>'){GetNData();}
00705     }
00706   } else {
00707     EntityNm=GetName(); GetWs(true);
00708     // [73] EntityDef ::=  EntityValue | (ExternalID NDataDecl?)
00709     if ((Ch=='\"')||(Ch=='\'')){
00710       TStr EntityVal=GetEntityValue();
00711       PutEntityVal(EntityNm, EntityVal);
00712     } else {
00713       GetExternalId();
00714     }
00715   }
00716   GetWs(false);
00717   if (Ch=='>'){GetCh();}
00718   else {EThrow("Invalid end-of-tag in entity-declaration.");}
00719   TagNm=EntityNm;
00720 }
00721 
00722 void TXmlLx::GetNotation(){
00723   // [82] NotationDecl ::=  '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
00724   // [83]  PublicID ::=  'PUBLIC' S PubidLiteral
00725   TxtChA.Clr();
00726   while (Ch!='>'){
00727     if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");}
00728     TxtChA+=Ch; GetCh();
00729   }
00730   GetCh();
00731 }
00732 
00733 void TXmlLx::GetCDSect(){
00734   // [18]  CDSect ::=  CDStart CData CDEnd
00735   // [19]  CDStart ::=  '<![CDATA{{['}}
00736   // [20]  CData ::=  (Char* - (Char* ']]>' Char*))
00737   // [21]  CDEnd ::=  ']]>'
00738   if (Ch=='['){GetCh();}
00739   else {EThrow("Invalid start of CDATA section.");}
00740   TxtChA.Clr();
00741   forever {
00742     if (!ChDef.IsChar(Ch)){EThrow("Invalid CDATA character.");}
00743     if ((Ch=='>')&&(TxtChA.Len()>=2)&&
00744      (TxtChA.LastLastCh()==']') && (TxtChA.LastCh()==']')){
00745       GetCh(); TxtChA.Pop(); TxtChA.Pop(); break;
00746     } else {
00747       TxtChA+=Ch; GetCh();
00748     }
00749   }
00750 }
00751 
00752 void TXmlLx::SkipWs(){
00753   // [3] S ::=  (#x20 | #x9 | #xD | #xA)+
00754   while (ChDef.IsWs(Ch)){GetCh();}
00755 }
00756 
00757 TXmlLxSym TXmlLx::GetSym(){
00758   if (Ch=='<'){
00759     GetCh(); ClrArgV();
00760     if (Ch=='?'){
00761       GetCh(); TagNm=GetName();
00762       if (TagNm.GetLc()=="xml"){Sym=xsyXmlDecl; GetXmlDecl();}
00763       else {Sym=xsyPI; GetPI();}
00764     } else
00765     if (Ch=='!'){
00766       GetCh();
00767       if (Ch=='['){
00768         GetCh(); TagNm=GetName();
00769         if (TagNm=="CDATA"){Sym=xsyQStr; GetCDSect();}
00770         else {EThrow(TStr("Invalid tag after '<![' (")+TagNm+").");}
00771       } else
00772       if (Ch=='-'){
00773         Sym=xsyComment; GetComment();
00774       } else {
00775         TagNm=GetName();
00776         if (TagNm=="DOCTYPE"){GetDocTypeDecl(); Sym=xsyDocTypeDecl;}
00777         else if (TagNm=="ELEMENT"){GetElement(); Sym=xsyElement;}
00778         else if (TagNm=="ATTLIST"){GetAttList(); Sym=xsyAttList;}
00779         else if (TagNm=="ENTITY"){GetEntity(); Sym=xsyEntity;}
00780         else if (TagNm=="NOTATION"){GetNotation(); Sym=xsyNotation;}
00781         else {EThrow(TStr("Invalid tag (")+TagNm+").");}
00782       }
00783     } else
00784     if (Ch=='/'){
00785       // xsyETag
00786       GetCh(); Sym=xsyETag; TagNm=GetName(); GetWs(false);
00787       if (Ch=='>'){GetCh();}
00788       else {EThrow("Invalid End-Tag.");}
00789     } else {
00790       // xsySTag or xsySETag
00791       TagNm=GetName(); GetWs(false);
00792       while ((Ch!='>')&&(Ch!='/')){
00793         TStr AttrNm=GetName();
00794         GetEq();
00795         TStr AttrVal=GetAttValue();
00796         GetWs(false);
00797         AddArg(AttrNm, AttrVal);
00798       }
00799       if (Ch=='/'){
00800         if (GetCh()=='>'){Sym=xsySETag; GetCh();}
00801         else {EThrow("Invalid Empty-Element-Tag.");}
00802       } else {
00803         Sym=xsySTag; GetCh();
00804       }
00805     }
00806     if (Spacing==xspTruncate){SkipWs();}
00807   } else
00808   if (ChDef.IsWs(Ch)){
00809     Sym=xsyWs; GetWs(true); ToNrSpacing();
00810     if (Spacing==xspTruncate){GetSym();}
00811   } else
00812   if (Ch==TCh::EofCh){
00813     Sym=xsyEof;
00814   } else {
00815     Sym=xsyStr; TxtChA.Clr();
00816     // [14]  CharData ::=  [^<&]* - ([^<&]* ']]>' [^<&]*)
00817     forever {
00818       if (!ChDef.IsChar(Ch)){
00819         EThrow(TUInt::GetStr(Ch, "Invalid character (%d)."));}
00820                 // GetCh();  continue; // skip invalid characters
00821       if (Ch=='<'){break;} // tag
00822       if (Ch=='&'){GetCh(); TxtChA+=GetReference();} // reference
00823       else {
00824         if ((Ch=='>')&&(TxtChA.Len()>=2)&&
00825          (TxtChA.LastLastCh()==']')&&(TxtChA.LastCh()==']')){
00826           EThrow("Forbidden substring ']]>' in character data.");}
00827         TxtChA+=Ch; GetCh(); // usual char
00828       }
00829     }
00830     ToNrSpacing();
00831   }
00832   return Sym;
00833 }
00834 
00835 TStr TXmlLx::GetSymStr() const {
00836   TChA SymChA;
00837   switch (Sym){
00838     case xsyUndef:
00839       SymChA="{Undef}"; break;
00840     case xsyWs:
00841       SymChA+="{Space:'"; SymChA+=TStr(TxtChA).GetHex(); SymChA+="'}"; break;
00842     case xsyComment:
00843       SymChA+="<!--"; SymChA+=TxtChA; SymChA+="-->"; break;
00844     case xsyXmlDecl:{
00845       SymChA+="<?"; SymChA+=TagNm;
00846       for (int ArgN=0; ArgN<GetArgs(); ArgN++){
00847         TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal);
00848         char ArgValQCh=GetArgValQCh(ArgVal);
00849         SymChA+=' '; SymChA+=ArgNm; SymChA+='=';
00850         SymChA+=ArgValQCh; SymChA+=ArgVal; SymChA+=ArgValQCh;
00851       }
00852       SymChA+="?>"; break;}
00853     case xsyPI:
00854       SymChA+="<?"; SymChA+=TagNm;
00855       if (!TxtChA.Empty()){SymChA+=' '; SymChA+=TxtChA;}
00856       SymChA+="?>"; break;
00857     case xsyDocTypeDecl:
00858       SymChA+="<!DOCTYPE "; SymChA+=TagNm; SymChA+=">"; break;
00859     case xsySTag:
00860     case xsySETag:{
00861       SymChA+="<"; SymChA+=TagNm;
00862       for (int ArgN=0; ArgN<GetArgs(); ArgN++){
00863         TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal);
00864         char ArgValQCh=GetArgValQCh(ArgVal);
00865         SymChA+=' '; SymChA+=ArgNm; SymChA+='=';
00866         SymChA+=ArgValQCh; SymChA+=ArgVal; SymChA+=ArgValQCh;
00867       }
00868       if (Sym==xsySTag){SymChA+=">";}
00869       else if (Sym==xsySETag){SymChA+="/>";}
00870       else {Fail;}
00871       break;}
00872     case xsyETag:
00873       SymChA+="</"; SymChA+=TagNm; SymChA+=">"; break;
00874     case xsyStr:
00875       SymChA="{String:'"; SymChA+=TxtChA; SymChA+="'}"; break;
00876     case xsyQStr:
00877       SymChA="{QString:'"; SymChA+=TxtChA; SymChA+="'}"; break;
00878     case xsyEof:
00879       SymChA="{Eof}"; break;
00880     default: Fail;
00881   }
00882   return SymChA;
00883 }
00884 
00885 void TXmlLx::EThrow(const TStr& MsgStr) const {
00886   TChA FPosChA;
00887   FPosChA+=" [File:"; FPosChA+=SIn->GetSNm();
00888   FPosChA+=" Line:"; FPosChA+=TInt::GetStr(LnN);
00889   FPosChA+=" Char:"; FPosChA+=TInt::GetStr(LnChN);
00890   FPosChA+="]";
00891   TStr FullMsgStr=MsgStr+FPosChA;
00892   TExcept::Throw(FullMsgStr);
00893 }
00894 
00895 TStr TXmlLx::GetFPosStr() const {
00896   TChA FPosChA;
00897   FPosChA+=" [File:"; FPosChA+=SIn->GetSNm();
00898   FPosChA+=" Line:"; FPosChA+=TInt::GetStr(LnN);
00899   FPosChA+=" Char:"; FPosChA+=TInt::GetStr(LnChN);
00900   FPosChA+="]";
00901   return FPosChA;
00902 }
00903 
00904 TStr TXmlLx::GetXmlLxSymStr(const TXmlLxSym& XmlLxSym){
00905   switch (XmlLxSym){
00906     case xsyUndef: return "Undef";
00907     case xsyWs: return "White-Space";
00908     case xsyComment: return "Comment";
00909     case xsyXmlDecl: return "Declaration";
00910     case xsyPI: return "PI";
00911     case xsyDocTypeDecl: return "Document-Type";
00912     case xsyElement: return "Element";
00913     case xsyAttList: return "Attribute-List";
00914     case xsyEntity: return "Entity";
00915     case xsyNotation: return "Notation";
00916     case xsyTag: return "Tag";
00917     case xsySTag: return "Start-Tag";
00918     case xsyETag: return "End-Tag";
00919     case xsySETag: return "Start-End-Tag";
00920     case xsyStr: return "String";
00921     case xsyQStr: return "Quoted-String";
00922     case xsyEof: return "Eon-Of-File";
00923     default: return "Undef";
00924   }
00925 }
00926 
00927 bool TXmlLx::IsTagNm(const TStr& Str){
00928   TChA ChA=Str;
00929   if (ChA.Len()>0){
00930     if (TXmlLx::ChDef.IsFirstNameCh(ChA[0])){
00931       for (int ChN=1; ChN<ChA.Len(); ChN++){
00932         if (!TXmlLx::ChDef.IsName(ChA[ChN])){
00933           return false;
00934         }
00935       }
00936       return true;
00937     } else {
00938       return false;
00939     }
00940   } else {
00941     return false;
00942   }
00943 }
00944 
00945 TStr TXmlLx::GetXmlStrFromPlainMem(const TMem& PlainMem){
00946   TChA XmlChA;
00947   for (int ChN=0; ChN<PlainMem.Len(); ChN++){
00948     uchar Ch=PlainMem[ChN];
00949     if ((' '<=Ch)&&(Ch<='~')){
00950       switch (Ch){
00951         case '"': XmlChA+="&quot;"; break;
00952         case '&': XmlChA+="&amp;"; break;
00953         case '\'': XmlChA+="&apos;"; break;
00954         case '<': XmlChA+="&lt;"; break;
00955         case '>': XmlChA+="&gt;"; break;
00956         default: XmlChA+=Ch;
00957       }
00958     } else
00959     if ((Ch=='\r')||(Ch=='\n')){
00960       XmlChA+=Ch;
00961     } else {
00962       XmlChA+='&'; XmlChA+='#'; XmlChA+=TUInt::GetStr(Ch); XmlChA+=';';
00963     }
00964   }
00965   return XmlChA;
00966 }
00967 
00968 TStr TXmlLx::GetXmlStrFromPlainStr(const TChA& PlainChA){
00969   TChA XmlChA;
00970   for (int ChN=0; ChN<PlainChA.Len(); ChN++){
00971     uchar Ch=PlainChA[ChN];
00972     if ((' '<=Ch)&&(Ch<='~')){
00973       switch (Ch){
00974         case '"': XmlChA+="&quot;"; break;
00975         case '&': XmlChA+="&amp;"; break;
00976         case '\'': XmlChA+="&apos;"; break;
00977         case '<': XmlChA+="&lt;"; break;
00978         case '>': XmlChA+="&gt;"; break;
00979         default: XmlChA+=Ch;
00980       }
00981     } else
00982     if ((Ch=='\r')||(Ch=='\n')){
00983       XmlChA+=Ch;
00984     } else {
00985       XmlChA+='&'; XmlChA+='#'; XmlChA+=TUInt::GetStr(Ch); XmlChA+=';';
00986     }
00987   }
00988   return XmlChA;
00989 }
00990 
00991 TStr TXmlLx::GetPlainStrFromXmlStr(const TStr& XmlStr){
00992   TChA PlainChA;
00993   TChRet Ch(TStrIn::New(XmlStr));
00994   Ch.GetCh();
00995   while (!Ch.Eof()){
00996     if (Ch()!='&'){
00997       PlainChA+=Ch(); Ch.GetCh();
00998     } else {
00999       // [67] Reference ::=  EntityRef | CharRef
01000       if (Ch.GetCh()=='#'){
01001         // [66]  CharRef ::=  '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
01002         TChA RefChA; int RefCd=0;
01003         if (Ch.GetCh()=='x'){
01004           // hex-decimal character code
01005           forever {
01006             Ch.GetCh();
01007             if (TCh::IsHex(Ch())){
01008               RefChA+=Ch();
01009               RefCd=RefCd*16+TCh::GetHex(Ch());
01010             } else {
01011               break;
01012             }
01013           }
01014         } else {
01015           // decimal character code
01016           forever {
01017             if (TCh::IsNum(Ch())){
01018               RefChA+=Ch();
01019               RefCd=RefCd*10+TCh::GetNum(Ch());
01020             } else {
01021               break;
01022             }
01023             Ch.GetCh();
01024           }
01025         }
01026         if ((!RefChA.Empty())&&(Ch()==';')){
01027           Ch.GetCh();
01028           uchar RefCh=uchar(RefCd);
01029           PlainChA+=RefCh;
01030         }
01031       } else {
01032         // [68]  EntityRef ::=  '&' Name ';'
01033         TChA EntityNm;
01034         while ((!Ch.Eof())&&(Ch()!=';')){
01035           EntityNm+=Ch(); Ch.GetCh();}
01036         if ((!EntityNm.Empty())&&(Ch()==';')){
01037           Ch.GetCh();
01038           if (EntityNm=="quot"){PlainChA+='"';}
01039           else if (EntityNm=="amp"){PlainChA+='&';}
01040           else if (EntityNm=="apos"){PlainChA+='\'';}
01041           else if (EntityNm=="lt"){PlainChA+='<';}
01042           else if (EntityNm=="gt"){PlainChA+='>';}
01043         }
01044       }
01045     }
01046   }
01047   return PlainChA;
01048 }
01049 
01050 TStr TXmlLx::GetUsAsciiStrFromXmlStr(const TStr& XmlStr){
01051   TStr UsAsciiStr=XmlStr;
01052   UsAsciiStr.ChangeStrAll("&#232;", "c");
01053   UsAsciiStr.ChangeStrAll("&#200;", "C");
01054   UsAsciiStr.ChangeStrAll("&#154;", "s");
01055   UsAsciiStr.ChangeStrAll("&#138;", "S");
01056   UsAsciiStr.ChangeStrAll("&#158;", "z");
01057   UsAsciiStr.ChangeStrAll("&#142;", "Z");
01058   TChA UsAsciiChA=TXmlLx::GetPlainStrFromXmlStr(UsAsciiStr);
01059   for (int ChN=0; ChN<UsAsciiChA.Len(); ChN++){
01060     char Ch=UsAsciiChA[ChN];
01061     if ((Ch<' ')||('~'<Ch)){UsAsciiChA.PutCh(ChN, 'x');}
01062   }
01063   return UsAsciiChA;
01064 }
01065 
01066 TStr TXmlLx::GetChRefFromYuEntRef(const TStr& YuEntRefStr){
01067   TStr ChRefStr=YuEntRefStr;
01068   ChRefStr.ChangeStrAll("&ch;", "&#232;");
01069   ChRefStr.ChangeStrAll("&Ch;", "&#200;");
01070   ChRefStr.ChangeStrAll("&sh;", "&#154;");
01071   ChRefStr.ChangeStrAll("&Sh;", "&#138;");
01072   ChRefStr.ChangeStrAll("&zh;", "&#158;");
01073   ChRefStr.ChangeStrAll("&Zh;", "&#142;");
01074   ChRefStr.ChangeStrAll("&cs", "c");
01075   ChRefStr.ChangeStrAll("&Cs;", "C");
01076   ChRefStr.ChangeStrAll("&dz;", "dz");
01077   ChRefStr.ChangeStrAll("&Dz;", "Dz");
01078   return ChRefStr;
01079 }
01080 
01082 // Xml-Token
01083 bool TXmlTok::GetBoolArgVal(const TStr& ArgNm, const bool& DfVal) const {
01084   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01085   return (ArgN==-1) ? DfVal : (ArgNmValV[ArgN].Dat==TBool::TrueStr);
01086 }
01087 
01088 bool TXmlTok::GetBoolArgVal(
01089  const TStr& ArgNm, const TStr& TrueVal, const bool& DfVal) const {
01090   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01091   return (ArgN==-1) ? DfVal : (ArgNmValV[ArgN].Dat==TrueVal);
01092 }
01093 
01094 bool TXmlTok::GetBoolArgVal(const TStr& ArgNm,
01095  const TStr& TrueVal, const TStr& FalseVal, const bool& DfVal) const {
01096   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01097   if (ArgN==-1){return DfVal;}
01098   TStr ArgVal=ArgNmValV[ArgN].Dat;
01099   if (ArgVal==TrueVal){return true;}
01100   IAssert(ArgVal == FalseVal); return false;
01101 }
01102 
01103 int TXmlTok::GetIntArgVal(const TStr& ArgNm, const int& DfVal) const {
01104   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01105   if (ArgN==-1){
01106     return DfVal;
01107   } else {
01108     int Val;
01109     if (ArgNmValV[ArgN].Dat.IsInt(Val)){return Val;} else {return DfVal;}
01110   }
01111 }
01112 
01113 double TXmlTok::GetFltArgVal(const TStr& ArgNm, const double& DfVal) const {
01114   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01115   if (ArgN==-1){
01116     return DfVal;
01117   } else {
01118     double Val;
01119     if (ArgNmValV[ArgN].Dat.IsFlt(Val)){return Val;} else {return DfVal;}
01120   }
01121 }
01122 
01123 TStr TXmlTok::GetStrArgVal(const TStr& ArgNm, const TStr& DfVal) const {
01124   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01125   return (ArgN==-1) ? DfVal : ArgNmValV[ArgN].Dat;
01126 }
01127 
01128 void TXmlTok::PutSubTok(const PXmlTok& Tok, const int& SubTokN){
01129   if (SubTokN==-1){
01130     ClrSubTok(); AddSubTok(Tok);
01131   } else {
01132     SubTokV[SubTokN]=Tok;
01133   }
01134 }
01135 
01136 PXmlTok TXmlTok::GetTagTok(const TStr& TagPath) const {
01137   if (TagPath.Empty()){
01138     return (TXmlTok*)this;
01139   } else {
01140     TStr TagNm; TStr RestTagPath; TagPath.SplitOnCh(TagNm, '|', RestTagPath);
01141     PXmlTok SubTok;
01142     for (int SubTokN=0; SubTokN<SubTokV.Len(); SubTokN++){
01143       SubTok=SubTokV[SubTokN];
01144       if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){break;}
01145       else {SubTok=NULL;}
01146     }
01147     if ((SubTok.Empty())||(RestTagPath.Empty())){return SubTok;}
01148     else {return SubTok->GetTagTok(RestTagPath);}
01149   }
01150 }
01151 
01152 void TXmlTok::GetTagTokV(const TStr& TagPath, TXmlTokV& XmlTokV) const {
01153   XmlTokV.Clr();
01154   TStr PreTagPath; TStr TagNm; TagPath.SplitOnLastCh(PreTagPath, '|', TagNm);
01155   PXmlTok Tok=GetTagTok(PreTagPath);
01156   if (!Tok.Empty()){
01157     for (int SubTokN=0; SubTokN<Tok->GetSubToks(); SubTokN++){
01158       PXmlTok SubTok=Tok->GetSubTok(SubTokN);
01159       if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){
01160         XmlTokV.Add(SubTok);}
01161     }
01162   }
01163 }
01164 
01165 void TXmlTok::GetTagValV(const TStr& TagNm, const bool& XmlP, TStrV& ValV) const {
01166   if ((Sym==xsyTag)&&(Str==TagNm)){
01167     ValV.Add(GetTokStr(XmlP));
01168   } else {
01169     for (int SubTokN=0; SubTokN<GetSubToks(); SubTokN++){
01170       GetSubTok(SubTokN)->GetTagValV(TagNm, XmlP, ValV);}
01171   }
01172 }
01173 
01174 TStr TXmlTok::GetTagVal(const TStr& TagNm, const bool& XmlP) const {
01175   TStrV ValV; GetTagValV(TagNm, XmlP, ValV);
01176   if (ValV.Len()>0){return ValV[0];} else {return "";}
01177 }
01178 
01179 void TXmlTok::AddTokToChA(const bool& XmlP, TChA& ChA) const {
01180   switch (Sym){
01181     case xsyWs:
01182       ChA+=Str; break;
01183     case xsyStr:
01184       if (XmlP){ChA+=TXmlLx::GetXmlStrFromPlainStr(Str);} else {ChA+=Str;} break;
01185     case xsyQStr:
01186       if (XmlP){ChA+="<![CDATA[";}
01187       ChA+=Str;
01188       if (XmlP){ChA+="]]>";} break;
01189     case xsyTag:
01190       if (XmlP){
01191         ChA+='<'; ChA+=Str;
01192         for (int ArgN=0; ArgN<GetArgs(); ArgN++){
01193           TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal);
01194           if (XmlP){ArgVal=TXmlLx::GetXmlStrFromPlainStr(ArgVal);}
01195           char ArgValQCh=TXmlLx::GetArgValQCh(ArgVal);
01196           ChA+=' '; ChA+=ArgNm; ChA+='=';
01197           ChA+=ArgValQCh; ChA+=ArgVal; ChA+=ArgValQCh;
01198         }
01199       }
01200       if (GetSubToks()==0){
01201         if (XmlP){ChA+="/>";}
01202       } else {
01203         if (XmlP){ChA+=">";}
01204         for (int SubTokN=0; SubTokN<GetSubToks(); SubTokN++){
01205           GetSubTok(SubTokN)->AddTokToChA(XmlP, ChA);}
01206         if (XmlP){ChA+="</"; ChA+=Str; ChA+='>';}
01207       }
01208       break;
01209     default: Fail;
01210   }
01211 }
01212 
01213 TStr TXmlTok::GetTokVStr(const TXmlTokV& TokV, const bool& XmlP){
01214   TChA TokVChA;
01215   for (int TokN=0; TokN<TokV.Len(); TokN++){
01216     if (TokN>0){TokVChA+=' ';}
01217     TokVChA+=TokV[TokN]->GetTokStr(XmlP);
01218   }
01219   return TokVChA;
01220 }
01221 
01222 PXmlTok TXmlTok::GetTok(TXmlLx& Lx){
01223   switch (Lx.Sym){
01224     case xsyWs:
01225     case xsyStr:
01226     case xsyQStr:
01227       return TXmlTok::New(Lx.Sym, Lx.TxtChA);
01228     case xsySTag:
01229     case xsySETag:
01230       return TXmlTok::New(xsyTag, Lx.TagNm, Lx.ArgNmValKdV);
01231     default: Fail; return NULL;
01232   }
01233 }
01234 
01236 // Xml-Document
01237 void TXmlDoc::LoadTxtMiscStar(TXmlLx& Lx){
01238   // [27] Misc ::=  Comment | PI |  S
01239   while ((Lx.Sym==xsyComment)||(Lx.Sym==xsyPI)||(Lx.Sym==xsyWs)){
01240     Lx.GetSym();}
01241 }
01242 
01243 PXmlTok TXmlDoc::LoadTxtElement(TXmlLx& Lx){
01244   // [39]  element ::=  EmptyElemTag | STag content ETag
01245   PXmlTok Tok;
01246   if (Lx.Sym==xsySETag){
01247     Tok=TXmlTok::GetTok(Lx);
01248   } else
01249   if (Lx.Sym==xsySTag){
01250     Tok=TXmlTok::GetTok(Lx);
01251     forever {
01252       Lx.GetSym();
01253       if (Lx.Sym==xsyETag){
01254         if (Tok->GetStr()==Lx.TagNm){
01255           break;
01256         } else {
01257           TStr MsgStr=TStr("Invalid End-Tag '")+Lx.TagNm+
01258            "' ('"+Tok->GetStr()+"' expected).";
01259           Lx.EThrow(MsgStr);
01260         }
01261       } else {
01262         PXmlTok SubTok;
01263         switch (Lx.Sym){
01264           case xsySTag:
01265             SubTok=LoadTxtElement(Lx); break;
01266           case xsySETag:
01267           case xsyStr:
01268           case xsyQStr:
01269           case xsyWs:
01270             SubTok=TXmlTok::GetTok(Lx); break;
01271           case xsyPI:
01272           case xsyComment:
01273             break;
01274           default: Lx.EThrow("Content or End-Tag expected.");
01275         }
01276         if (!SubTok.Empty()){
01277           Tok->AddSubTok(SubTok);}
01278       }
01279     }
01280   } else
01281   if (Lx.Sym==xsyETag){
01282     TStr MsgStr=
01283      TStr("Xml-Element (Start-Tag or Empty-Element-Tag) required.")+
01284      TStr::GetStr(Lx.TagNm, " End-Tag </%s> encountered.");
01285     Lx.EThrow(MsgStr);
01286   } else {
01287     Lx.EThrow("Xml-Element (Start-Tag or Empty-Element-Tag) required.");
01288   }
01289   return Tok;
01290 }
01291 
01292 PXmlTok TXmlDoc::GetTagTok(const TStr& TagPath) const {
01293   if (TagPath.Empty()){
01294     return Tok;
01295   } else {
01296     TStr TagNm; TStr RestTagPath; TagPath.SplitOnCh(TagNm, '|', RestTagPath);
01297     if ((Tok->GetSym()==xsyTag)&&(Tok->GetStr()==TagNm)){
01298       if (RestTagPath.Empty()){return Tok;}
01299       else {return Tok->GetTagTok(RestTagPath);}
01300     } else {
01301       return NULL;
01302     }
01303   }
01304 }
01305 
01306 void TXmlDoc::PutTagTokStr(const TStr& TagPath, const TStr& TokStr) const {
01307   PXmlTok Tok=GetTagTok(TagPath);
01308   Tok->ClrSubTok();
01309   PXmlTok StrTok=TXmlTok::New(xsyStr, TokStr);
01310   Tok->AddSubTok(StrTok);
01311 }
01312 
01313 void TXmlDoc::GetTagTokV(const TStr& TagPath, TXmlTokV& XmlTokV) const {
01314   XmlTokV.Clr();
01315   TStr PreTagPath; TStr TagNm; TagPath.SplitOnLastCh(PreTagPath, '|', TagNm);
01316   PXmlTok Tok=GetTagTok(PreTagPath);
01317   if (!Tok.Empty()){
01318     for (int SubTokN=0; SubTokN<Tok->GetSubToks(); SubTokN++){
01319       PXmlTok SubTok=Tok->GetSubTok(SubTokN);
01320       if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){
01321         XmlTokV.Add(SubTok);}
01322     }
01323   }
01324 }
01325 
01326 bool TXmlDoc::GetTagTokBoolArgVal(
01327  const TStr& TagPath, const TStr& ArgNm, const bool& DfVal) const {
01328   PXmlTok TagTok;
01329   if (IsTagTok(TagPath, TagTok)){
01330     return TagTok->GetBoolArgVal(ArgNm, DfVal);}
01331   else {return DfVal;}
01332 }
01333 
01334 int TXmlDoc::GetTagTokIntArgVal(
01335  const TStr& TagPath, const TStr& ArgNm, const int& DfVal) const {
01336   PXmlTok TagTok;
01337   if (IsTagTok(TagPath, TagTok)){
01338     return TagTok->GetIntArgVal(ArgNm, DfVal);}
01339   else {return DfVal;}
01340 }
01341 
01342 double TXmlDoc::GetTagTokFltArgVal(
01343  const TStr& TagPath, const TStr& ArgNm, const double& DfVal) const {
01344   PXmlTok TagTok;
01345   if (IsTagTok(TagPath, TagTok)){
01346     return TagTok->GetFltArgVal(ArgNm, DfVal);}
01347   else {return DfVal;}
01348 }
01349 
01350 TStr TXmlDoc::GetTagTokStrArgVal(
01351  const TStr& TagPath, const TStr& ArgNm, const TStr& DfVal) const {
01352   PXmlTok TagTok;
01353   if (IsTagTok(TagPath, TagTok)){
01354     return TagTok->GetStrArgVal(ArgNm, DfVal);}
01355   else {return DfVal;}
01356 }
01357 
01358 TStr TXmlDoc::GetXmlStr(const TStr& Str){
01359   TChA ChA=Str;
01360   TChA XmlChA;
01361   for (int ChN=0; ChN<ChA.Len(); ChN++){
01362     uchar Ch=ChA[ChN];
01363     if ((' '<=Ch)&&(Ch<='~')){
01364       if (Ch=='&'){XmlChA+="&amp;";}
01365       else if (Ch=='>'){XmlChA+="&lt;";}
01366       else if (Ch=='<'){XmlChA+="&gt;";}
01367       else if (Ch=='\''){XmlChA+="&apos;";}
01368       else if (Ch=='\"'){XmlChA+="&quot;";}
01369       else {XmlChA+=Ch;}
01370     } else {
01371       XmlChA+="&#"; XmlChA+=TUInt::GetStr(Ch); XmlChA+=";";
01372     }
01373   }
01374   return XmlChA;
01375 }
01376 
01377 bool TXmlDoc::SkipTopTag(const PSIn& SIn){
01378   bool Ok=true;
01379   TXmlLx Lx(SIn, xspIntact);
01380   try {
01381     Lx.GetSym();
01382     // [22] prolog ::=  XMLDecl? Misc* (doctypedecl Misc*)?
01383     if (Lx.Sym==xsyXmlDecl){Lx.GetSym();}
01384     LoadTxtMiscStar(Lx);
01385     if (Lx.Sym==xsyDocTypeDecl){Lx.GetSym();}
01386     LoadTxtMiscStar(Lx);
01387     Ok=true;
01388   }
01389   catch (PExcept Except){
01390     Ok=false;
01391   }
01392   return Ok;
01393 }
01394 
01395 PXmlDoc TXmlDoc::LoadTxt(TXmlLx& Lx){
01396   PXmlDoc Doc=TXmlDoc::New();
01397   // [1]  document ::=  prolog element Misc*
01398   try {
01399     Lx.GetSym();
01400     // [22] prolog ::=  XMLDecl? Misc* (doctypedecl Misc*)?
01401     if (Lx.Sym==xsyXmlDecl){Lx.GetSym();}
01402     LoadTxtMiscStar(Lx);
01403     if (Lx.Sym==xsyDocTypeDecl){Lx.GetSym();}
01404     LoadTxtMiscStar(Lx);
01405     Doc->Tok=LoadTxtElement(Lx);
01406     LoadTxtMiscStar(Lx);
01407     Doc->Ok=true; Doc->MsgStr="Ok";
01408   }
01409   catch (PExcept& Except){
01410     Doc->Ok=false; Doc->MsgStr=Except->GetMsgStr();
01411   }
01412   return Doc;
01413 }
01414 
01415 PXmlDoc TXmlDoc::LoadTxt(const PSIn& SIn, const TXmlSpacing& Spacing){
01416   TXmlLx Lx(SIn, Spacing); return LoadTxt(Lx);
01417 }
01418 
01419 PXmlDoc TXmlDoc::LoadTxt(const TStr& FNm, const TXmlSpacing& Spacing){
01420   PSIn SIn=TFIn::New(FNm); return LoadTxt(SIn, Spacing);
01421 }
01422 
01423 void TXmlDoc::LoadTxt(
01424  const TStr& FNm, TXmlDocV& XmlDocV, const TXmlSpacing& Spacing){
01425   XmlDocV.Clr();
01426   PSIn SIn=TFIn::New(FNm);
01427   TXmlLx Lx(SIn, Spacing);
01428   PXmlDoc XmlDoc;
01429   forever {
01430     Lx.SkipWs();
01431     XmlDoc=LoadTxt(Lx);
01432     if (XmlDoc->IsOk()){XmlDocV.Add(XmlDoc);}
01433     else {break;}
01434   }
01435 }
01436 
01437 PXmlDoc TXmlDoc::LoadStr(const TStr& Str){
01438   PSIn SIn=TStrIn::New(Str);
01439   return LoadTxt(SIn);
01440 }
01441 
01442 void TXmlDoc::SaveStr(TStr& Str){
01443   PSOut SOut=TMOut::New(); TMOut& MOut=*(TMOut*)SOut();
01444   SaveTxt(SOut);
01445   Str=MOut.GetAsStr();
01446 }
01447 
01449 // Fast and dirty XML parser
01450 // very basic it does only <item>string</item>, no comments, no arguments
01451 TXmlLxSym TXmlParser::GetSym() {
01452   if (NextSym != xsyUndef) {
01453     Sym = NextSym;  NextSym=xsyUndef;
01454     SymStr=NextSymStr;  NextSymStr.Clr();
01455     return Sym;
01456   }
01457   SymStr.Clr();
01458   char Ch;
01459   while (TCh::IsWs(Ch=GetCh())) { }
01460   if (Ch == TCh::EofCh) { Sym = xsyEof; return xsyEof; }
01461   if (Ch == '<') { // load tag
01462     Ch = GetCh();
01463     if (Ch == '/') { Sym = xsyETag; }
01464     else { Sym = xsySTag;  SymStr.Push(Ch); }
01465     while((Ch=GetCh())!='>' && Ch!=TCh::EofCh) { SymStr.Push(Ch); }
01466     const int StrLen = SymStr.Len();
01467     if (StrLen > 1 && SymStr[StrLen-1] == '/') {
01468       Sym = xsyETag; SymStr[StrLen-1] = 0;
01469       for (char *c = SymStr.CStr()+StrLen-2; TCh::IsWs(*c); c--) { *c=0; }
01470     }
01471   } else { // load string
01472     _SymStr.Clr();  _SymStr.Push(Ch);
01473     while (! RSIn.Eof() && RSIn.PeekCh() != '<') { _SymStr.Push(GetCh()); }
01474     GetPlainStrFromXmlStr(_SymStr, SymStr);
01475     Sym = xsyStr;
01476   }
01477   if (Ch == TCh::EofCh) { SymStr.Clr(); Sym = xsyEof; return xsyEof; }
01478   return Sym;
01479 }
01480 
01481 TXmlLxSym TXmlParser::GetSym(TChA& _SymStr) {
01482   GetSym();
01483   _SymStr = SymStr;
01484   return Sym;
01485 }
01486 
01487 TXmlLxSym TXmlParser::PeekSym() {
01488   if (NextSym == xsyUndef) {
01489     const TXmlLxSym TmpSim=Sym;
01490     const TChA TmpSymStr=SymStr;
01491     NextSym=GetSym(NextSymStr);
01492     Sym=TmpSim;
01493     SymStr=TmpSymStr;
01494   }
01495   return NextSym;
01496 }
01497 
01498 TXmlLxSym TXmlParser::PeekSym(TChA& _SymStr) {
01499   PeekSym();
01500   _SymStr = NextSymStr;
01501   return NextSym;
01502 }
01503 
01504 void TXmlParser::SkipTillTag(const TChA& _SymStr) {
01505   while(PeekSym() != xsyEof) {
01506     if (NextSymStr == _SymStr) { return; }
01507     GetSym();
01508   }
01509 }
01510 
01511 // get <tag>value</tag>
01512 void TXmlParser::GetTagVal(const TChA& TagStr, TChA& TagVal) {
01513   EAssertR(GetTag(TagStr) == xsySTag, TStr::Fmt("Expected '<%s>'. Found '%s'", TagStr.CStr(), SymStr.CStr()).CStr());
01514   EAssertR(GetSym(TagVal) == xsyStr, "Expected string tag.");
01515   EAssertR(GetTag(TagStr) == xsyETag, TStr::Fmt("Expected '</%s>'. Found '%s'", TagStr.CStr(), SymStr.CStr()).CStr());
01516 }
01517 
01518 TXmlLxSym TXmlParser::GetTag(const TChA& TagStr) {
01519   GetSym();
01520   EAssertR(TagStr==SymStr, TStr::Fmt("Expected xml symbol '%s'. Found '%s'",
01521     TagStr.CStr(), SymStr.CStr()).CStr());
01522   return Sym;
01523 }
01524 
01525 void TXmlParser::GetPlainStrFromXmlStr(const TChA& XmlStr, TChA& PlainChA) {
01526   static TChA EntityNm;
01527   PlainChA.Clr();
01528   const char *Ch = XmlStr.CStr();
01529   while (*Ch){
01530     if (*Ch!='&'){ PlainChA+=*Ch; Ch++; }
01531     else {
01532       if (*++Ch=='#'){
01533         TChA RefChA; int RefCd=0;
01534         if (*++Ch=='x'){
01535           forever {  Ch++;
01536             if (TCh::IsHex(*Ch)){ RefChA+=*Ch;  RefCd=RefCd*16+TCh::GetHex(*Ch); }
01537             else { break; } }
01538         } else { // decimal character code
01539           forever {
01540             if (TCh::IsNum(*Ch)){ RefChA+=*Ch; RefCd=RefCd*10+TCh::GetNum(*Ch); }
01541             else { break; } Ch++; }
01542         }
01543         if ((!RefChA.Empty())&&(*Ch==';')){
01544           Ch++;  const uchar RefCh=uchar(RefCd);  PlainChA+=RefCh; }
01545       } else {
01546         EntityNm.Clr();
01547         while ((*Ch)&&(*Ch!=';')){EntityNm+=*Ch; Ch++;}
01548         if ((!EntityNm.Empty())&&(*Ch==';')){  Ch++;
01549           if (EntityNm=="quot"){PlainChA+='"';}
01550           else if (EntityNm=="amp"){PlainChA+='&';}
01551           else if (EntityNm=="apos"){PlainChA+='\'';}
01552           else if (EntityNm=="lt"){PlainChA+='<';}
01553           else if (EntityNm=="gt"){PlainChA+='>';}
01554         }
01555       }
01556     }
01557   }
01558 }