SNAP Library 2.2, User Reference  2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
xml.cpp
Go to the documentation of this file.
00001 
00002 // Xml-Object-Saving
00003 TStrStrH TXmlObjSer::TypeNmToTagNmH;
00004 
00005 TStr TXmlObjSer::GetTagNm(const TStr& TypeNm){
00006   TStr& XmlTagNm=TypeNmToTagNmH.AddDat(TypeNm);
00007   if (XmlTagNm.Empty()){
00008     TChA XmlTagChA=TypeNm;
00009     for (int ChN=0; ChN<XmlTagChA.Len(); ChN++){
00010       char Ch=XmlTagChA[ChN];
00011       if (!((('A'<=Ch)&&(Ch<='Z'))||(('a'<=Ch)&&(Ch<='z'))||(('0'<=Ch)&&(Ch<='9')))){
00012         XmlTagChA.PutCh(ChN, '_');
00013       }
00014     }
00015     while ((XmlTagChA.Len()>0)&&(XmlTagChA.LastCh()=='_')){
00016       XmlTagChA.Pop();}
00017     XmlTagNm=XmlTagChA;
00018   }
00019   return XmlTagNm;
00020 }
00021 
00022 void TXmlObjSer::AssertXmlHd(
00023  const PXmlTok& XmlTok, const TStr& Nm, const TStr& TypeNm){
00024   // check if the token is full
00025   EAssertR(!XmlTok.Empty(), "Xml-Token Empty");
00026   // if name is empty then tag=type else tag=name
00027   if (!Nm.Empty()){
00028     // check if the token is tag
00029     if (!XmlTok->IsTag()){
00030       TStr ArgStr1="Expected: Tag";
00031       TStr ArgStr2=TStr("Found: ")+XmlTok->GetSymStr();
00032       TExcept::Throw("Invalid Xml-Token", ArgStr1, ArgStr2);
00033     }
00034     if (Nm!="-"){
00035       // check if the tag is correct
00036       if (!XmlTok->IsTag(Nm)){
00037         TStr ArgStr1=TStr("Expected: ")+Nm;
00038         TStr ArgStr2=TStr("Found: ")+XmlTok->GetStr();
00039         TExcept::Throw("Invalid Xml-Tag", ArgStr1, ArgStr2);
00040       }
00041       // check if the type is correct
00042       TStr TypeArgVal=XmlTok->GetStrArgVal("Type");
00043       if (TypeArgVal!=TypeNm){
00044         TStr ArgStr1=TStr("Expected: ")+TypeNm;
00045         TStr ArgStr2=TStr("Found: ")+TypeArgVal;
00046         TExcept::Throw("Invalid Xml-Type", ArgStr1, ArgStr2);
00047       }
00048     }
00049   } else {
00050     // check if the tag is correct
00051     if (!XmlTok->IsTag(TypeNm)){
00052       TStr ArgStr1=TStr("Expected: ")+TypeNm;
00053       TStr ArgStr2=TStr("Found: ")+XmlTok->GetSymStr();
00054       TExcept::Throw("Invalid Xml-Type-Tag", ArgStr1, ArgStr2);
00055     }
00056   }
00057 }
00058 
00059 bool TXmlObjSer::GetBoolArg(const PXmlTok& XmlTok, const TStr& Nm){
00060   TStr ValStr;
00061   if (XmlTok->IsArg(Nm, ValStr)){
00062     bool Val;
00063     if (ValStr.IsBool(Val)){
00064       return Val;
00065     } else {
00066       TExcept::Throw("Invalid Xml-Argument Boolean-Value", Nm, ValStr);
00067     }
00068   } else {
00069     TExcept::Throw("Xml-Argument Missing", Nm);
00070   }
00071   Fail; return 0;
00072 }
00073 
00074 int TXmlObjSer::GetIntArg(const PXmlTok& XmlTok, const TStr& Nm){
00075   TStr ValStr;
00076   if (XmlTok->IsArg(Nm, ValStr)){
00077     int Val;
00078     if (ValStr.IsInt(Val)){
00079       return Val;
00080     } else {
00081       TExcept::Throw("Invalid Xml-Argument Integer-Value", Nm, ValStr);
00082     }
00083   } else {
00084     TExcept::Throw("Xml-Argument Missing", Nm);
00085   }
00086   Fail; return 0;
00087 }
00088 
00089 int64 TXmlObjSer::GetInt64Arg(const PXmlTok& XmlTok, const TStr& Nm){
00090   TStr ValStr;
00091   if (XmlTok->IsArg(Nm, ValStr)){
00092     int64 Val;
00093     if (ValStr.IsInt64(Val)){
00094       return Val;
00095     } else {
00096       TExcept::Throw("Invalid Xml-Argument Integer64-Value", Nm, ValStr);
00097     }
00098   } else {
00099     TExcept::Throw("Xml-Argument Missing", Nm);
00100   }
00101   Fail; return 0;
00102 }
00103 
00104 double TXmlObjSer::GetFltArg(const PXmlTok& XmlTok, const TStr& Nm){
00105   TStr ValStr;
00106   if (XmlTok->IsArg(Nm, ValStr)){
00107     double Val;
00108     if (ValStr.IsFlt(Val)){
00109       return Val;
00110     } else {
00111       TExcept::Throw("Invalid Xml-Argument Double-Value", Nm, ValStr);
00112     }
00113   } else {
00114     TExcept::Throw("Xml-Argument Missing", Nm);
00115   }
00116   Fail; return 0;
00117 }
00118 
00120 // Xml-Object-Serialization-Tag-Name
00121 TXmlObjSerTagNm::TXmlObjSerTagNm(
00122  TSOut& _SOut, const bool& ETagP,
00123  const TStr& Nm, const TStr& TypeNm,
00124  const TStr& ArgNm, const TStr& ArgVal):
00125   TagNm(), SOut(&_SOut){
00126   if (Nm!="-"){
00127     SOut->PutCh('<');
00128     if (Nm.Empty()){
00129       SOut->PutStr(TagNm=TypeNm);
00130     } else {
00131       SOut->PutStr(TagNm=Nm);
00132       SOut->PutStr(" Type=\""); SOut->PutStr(TypeNm); SOut->PutCh('"');
00133     }
00134     if (!ArgNm.Empty()){
00135       SOut->PutCh(' '); SOut->PutStr(ArgNm); SOut->PutCh('=');
00136       SOut->PutCh('"'); SOut->PutStr(ArgVal); SOut->PutCh('"');
00137     }
00138     if (ETagP){
00139       SOut->PutCh('/'); TagNm="";}
00140     SOut->PutCh('>');
00141   }
00142 }
00143 
00144 TXmlObjSerTagNm::TXmlObjSerTagNm(
00145  TSOut& _SOut, const bool& ETagP,
00146  const TStr& Nm, const TStr& TypeNm,
00147  const TStr& ArgNm1, const TStr& ArgVal1,
00148  const TStr& ArgNm2, const TStr& ArgVal2,
00149  const TStr& ArgNm3, const TStr& ArgVal3,
00150  const TStr& ArgNm4, const TStr& ArgVal4):
00151   TagNm(), SOut(&_SOut){
00152   if (Nm!="-"){
00153     SOut->PutCh('<');
00154     if (Nm.Empty()){
00155       SOut->PutStr(TagNm=TypeNm);
00156     } else {
00157       SOut->PutStr(TagNm=Nm);
00158       SOut->PutStr(" Type=\""); SOut->PutStr(TypeNm); SOut->PutCh('"');
00159     }
00160     if (!ArgNm1.Empty()){
00161       SOut->PutCh(' '); SOut->PutStr(ArgNm1); SOut->PutCh('=');
00162       SOut->PutCh('"'); SOut->PutStr(ArgVal1); SOut->PutCh('"');
00163     }
00164     if (!ArgNm2.Empty()){
00165       SOut->PutCh(' '); SOut->PutStr(ArgNm2); SOut->PutCh('=');
00166       SOut->PutCh('"'); SOut->PutStr(ArgVal2); SOut->PutCh('"');
00167     }
00168     if (!ArgNm3.Empty()){
00169       SOut->PutCh(' '); SOut->PutStr(ArgNm3); SOut->PutCh('=');
00170       SOut->PutCh('"'); SOut->PutStr(ArgVal3); SOut->PutCh('"');
00171     }
00172     if (!ArgNm4.Empty()){
00173       SOut->PutCh(' '); SOut->PutStr(ArgNm4); SOut->PutCh('=');
00174       SOut->PutCh('"'); SOut->PutStr(ArgVal4); SOut->PutCh('"');
00175     }
00176     if (ETagP){
00177       SOut->PutCh('/'); TagNm="";}
00178     SOut->PutCh('>');
00179   }
00180 }
00181 
00182 TXmlObjSerTagNm::~TXmlObjSerTagNm(){
00183   if (!TagNm.Empty()){
00184     SOut->PutCh('<'); SOut->PutCh('/'); SOut->PutStr(TagNm); SOut->PutCh('>');
00185   }
00186 }
00187 
00189 // Xml-Chars
00190 void TXmlChDef::SetChTy(TBSet& ChSet, const int& MnCh, const int& MxCh){
00191   IAssert((0<=MnCh)&&((MxCh==-1)||((MnCh<=MxCh)&&(MxCh<Chs))));
00192   ChSet.Incl(MnCh);
00193   for (int Ch=MnCh+1; Ch<=MxCh; Ch++){
00194     ChSet.Incl(Ch);}
00195 }
00196 
00197 void TXmlChDef::SetChTy(TBSet& ChSet, const TStr& Str){
00198   for (int ChN=0; ChN<Str.Len(); ChN++){
00199     uchar Ch=Str[ChN];
00200     ChSet.Incl(Ch);
00201   }
00202 }
00203 
00204 void TXmlChDef::SetEntityVal(const TStr& Nm, const TStr& Val){
00205   EntityNmToValH.AddDat(Nm, Val);
00206 }
00207 
00208 TXmlChDef::TXmlChDef():
00209   Chs(TUCh::Vals),
00210   CharChSet(), CombChSet(), ExtChSet(),
00211   LetterChSet(), DigitChSet(), NameChSet(), PubidChSet(),
00212   EntityNmToValH(100){
00213 
00214   // Character-Sets
00215   // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | ...
00216   CharChSet.Gen(Chs);
00217   // ... because of DMoz (temporary patch)
00218   SetChTy(CharChSet, 0x1); SetChTy(CharChSet, 0x3); SetChTy(CharChSet, 0x6);
00219   SetChTy(CharChSet, 11); SetChTy(CharChSet, 24); SetChTy(CharChSet, 27);
00220   // regular characters
00221   SetChTy(CharChSet, 0x9); SetChTy(CharChSet, 0xA); SetChTy(CharChSet, 0xD);
00222   SetChTy(CharChSet, 0x20, TUCh::Mx);
00223   // BaseChar ::=  [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] |
00224   //  [#x00D8-#x00F6] | [#x00F8-#x00FF] | ...
00225   TBSet BaseChSet(Chs);
00226   SetChTy(BaseChSet, 0x41, 0x5A); SetChTy(BaseChSet, 0x61, 0x7A);
00227   SetChTy(BaseChSet, 0xC0, 0xD6); SetChTy(BaseChSet, 0xD8, 0xF6);
00228   SetChTy(BaseChSet, 0xF8, 0xFF);
00229   // Ideographic ::= ...
00230   TBSet IdeoChSet(Chs);
00231   // CombiningChar ::= ...
00232   CombChSet.Gen(Chs);
00233   // Extender ::=  #x00B7 | ...
00234   ExtChSet.Gen(Chs);
00235   SetChTy(ExtChSet, 0xB7);
00236   // Letter ::=  BaseChar | Ideographic
00237   LetterChSet=BaseChSet|IdeoChSet;
00238   // Digit ::=  [#x0030-#x0039] | ...
00239   DigitChSet.Gen(Chs);
00240   SetChTy(DigitChSet, 0x30, 0x39);
00241   // NameChar ::=  Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar
00242   NameChSet=LetterChSet|DigitChSet|
00243    uchar('.')|uchar('-')|uchar('_')|uchar(':')|CombChSet;
00244   // PubidChar ::=  #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
00245   PubidChSet.Gen(Chs);
00246   SetChTy(PubidChSet, 0x20); SetChTy(PubidChSet, 0xD); SetChTy(PubidChSet, 0xA);
00247   SetChTy(PubidChSet, 'a', 'z'); SetChTy(PubidChSet, 'A', 'Z');
00248   SetChTy(PubidChSet, '0', '9'); SetChTy(PubidChSet, "-'()+,./:=?;!*#@$_%");
00249 
00250   // Standard-Entity-Sequences
00251   SetEntityVal("amp", "&");
00252   SetEntityVal("lt", "<"); SetEntityVal("gt", ">");
00253   SetEntityVal("apos", "'"); SetEntityVal("quot", "\"");
00254 }
00255 
00257 // Xml-Lexical
00258 TXmlChDef TXmlLx::ChDef;
00259 
00260 uchar TXmlLx::GetCh(){
00261   EAssert(Ch!=TCh::EofCh);
00262   PrevCh=Ch;
00263   if (ChStack.Empty()){Ch=(RSIn.Eof()) ? TCh::EofCh : RSIn.GetCh();}
00264   else {Ch=ChStack.Pop();}
00265   ChN++; if (Ch==TCh::LfCh){LnN++; LnChN=0;} else {LnChN++;}
00266   //putchar(Ch);
00267   return Ch;
00268 }
00269 
00270 void TXmlLx::ToNrSpacing(){
00271   if (Spacing==xspIntact){
00272   } else
00273   if (Spacing==xspPreserve){
00274     int SrcChN=0; int DstChN=0;
00275     while (SrcChN<TxtChA.Len()){
00276       if (TxtChA[SrcChN]==TCh::CrCh){
00277         TxtChA.PutCh(DstChN, TCh::LfCh); SrcChN++; DstChN++;
00278         if ((SrcChN<TxtChA.Len())&&(TxtChA[SrcChN]==TCh::LfCh)){SrcChN++;}
00279       } else {
00280         if (SrcChN!=DstChN){
00281           TxtChA.PutCh(DstChN, TxtChA[SrcChN]);}
00282         SrcChN++; DstChN++;
00283       }
00284     }
00285     TxtChA.Trunc(DstChN);
00286   } else
00287   if (Spacing==xspSeparate){
00288     // squeeze series of white-spaces to single space
00289     int SrcChN=0; int DstChN=0;
00290     while (SrcChN<TxtChA.Len()){
00291       if (ChDef.IsWs(TxtChA[SrcChN])){
00292         if ((DstChN>0)&&(TxtChA[DstChN-1]==' ')){
00293           SrcChN++;
00294         } else {
00295           TxtChA.PutCh(DstChN, ' ');
00296           SrcChN++; DstChN++;
00297         }
00298       } else {
00299         TxtChA.PutCh(DstChN, TxtChA[SrcChN]);
00300         SrcChN++; DstChN++;
00301       }
00302     }
00303     TxtChA.Trunc(DstChN);
00304   } else
00305   if (Spacing==xspTruncate){
00306     // cut leading and trailing white-spaces and
00307     // squeeze series of white-spaces to single space
00308     int SrcChN=0; int DstChN=0;
00309     while (SrcChN<TxtChA.Len()){
00310       if (ChDef.IsWs(TxtChA[SrcChN])){
00311         if ((DstChN>0)&&(TxtChA[DstChN-1]==' ')){
00312           SrcChN++;
00313         } else {
00314           TxtChA.PutCh(DstChN, ' ');
00315           SrcChN++; DstChN++;
00316         }
00317       } else {
00318         TxtChA.PutCh(DstChN, TxtChA[SrcChN]);
00319         SrcChN++; DstChN++;
00320       }
00321     }
00322     TxtChA.Trunc(DstChN);
00323     // delete trailing white-spaces
00324     while ((TxtChA.Len()>0)&&(ChDef.IsWs(TxtChA.LastCh()))){
00325       TxtChA.Pop();}
00326   } else {
00327     Fail;
00328   }
00329 }
00330 
00331 void TXmlLx::GetWs(const bool& IsRq){
00332   // [3] S ::=  (#x20 | #x9 | #xD | #xA)+
00333   int WSpaces=0; TxtChA.Clr();
00334   while (ChDef.IsWs(Ch)){
00335     WSpaces++; TxtChA+=Ch; GetCh();}
00336   if (IsRq&&(WSpaces==0)){
00337     EThrow("White-space required.");}
00338 }
00339 
00340 TStr TXmlLx::GetReference(){
00341   // [67] Reference ::=  EntityRef | CharRef
00342   if (Ch=='#'){
00343     // [66]  CharRef ::=  '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
00344     TChA RefChA; int RefCd=0;
00345     if (GetCh()=='x'){
00346       // hex-decimal character code
00347       forever {
00348         GetCh();
00349         if (TCh::IsHex(Ch)){
00350           RefChA+=Ch;
00351           RefCd=RefCd*16+TCh::GetHex(Ch);
00352         } else {
00353           break;
00354         }
00355       }
00356     } else {
00357       // decimal character code
00358       forever {
00359         if (TCh::IsNum(Ch)){
00360           RefChA+=Ch;
00361           RefCd=RefCd*10+TCh::GetNum(Ch);
00362         } else {
00363           break;
00364         }
00365         GetCh();
00366       }
00367     }
00368     if ((!RefChA.Empty())&&(Ch==';')){
00369       GetCh();
00370           if (RefCd < 0x80) {
00371                   // 8-bit char
00372               uchar RefCh=uchar(RefCd);
00373                   return TStr(RefCh);
00374           } else {
00375                   TStr ResStr = TUnicode::EncodeUtf8(RefCd);
00376                   return ResStr;
00377           }
00378     } else {
00379       EThrow("Invalid Char-Reference."); Fail; return TStr();
00380     }
00381   } else {
00382     // [68]  EntityRef ::=  '&' Name ';'
00383     TStr EntityNm=GetName();
00384     if ((!EntityNm.Empty())&&(Ch==';')){
00385       GetCh();
00386       TStr EntityVal;
00387       if (IsEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/}
00388       else if (ChDef.IsEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/}
00389       else {EThrow(TStr("Entity-Reference (")+EntityNm+") does not exist.");}
00390       return EntityVal;
00391     } else {
00392       EThrow("Invalid Entity-Reference."); Fail; return TStr();
00393     }
00394   }
00395 }
00396 
00397 TStr TXmlLx::GetPEReference(){
00398   // [69]  PEReference ::=  '%' Name ';'
00399   TStr EntityNm=GetName();
00400   if ((EntityNm.Empty())||(Ch!=';')){EThrow("Invalid PEntity-Reference.");}
00401   GetCh();
00402   TStr EntityVal;
00403   if (IsPEntityNm(EntityNm, EntityVal)){/*intentionaly empty*/}
00404   else {EThrow(TStr("PEntity-Reference (")+EntityNm+") does not exist.");}
00405   return EntityVal;
00406 }
00407 
00408 void TXmlLx::GetEq(){
00409   // [25] Eq ::=  S? '=' S?
00410   GetWs(false);
00411   if (Ch=='='){GetCh();}
00412   else {EThrow("Equality ('=') character expected.");}
00413   GetWs(false);
00414 }
00415 
00416 TStr TXmlLx::GetName(){
00417   // [5] Name ::=  (Letter | '_' | ':') (NameChar)*
00418   TChA NmChA;
00419   if (ChDef.IsFirstNameCh(Ch)){
00420     do {NmChA+=Ch;} while (ChDef.IsName(GetCh()));
00421   } else {
00422     EThrow("Invalid first name character.");
00423     // EThrow(TStr::Fmt("Invalid first name character [%u:'%c%c%c%c%c'].", 
00424     //  uint(Ch), Ch, RSIn.GetCh(), RSIn.GetCh(), RSIn.GetCh(), RSIn.GetCh()));
00425   }
00426   return NmChA;
00427 }
00428 
00429 TStr TXmlLx::GetName(const TStr& RqNm){
00430   TStr Nm=GetName();
00431   // test if the name is equal to the required name
00432   if (Nm==RqNm){return RqNm;}
00433   else {EThrow(TStr("Name '")+RqNm+"' expected."); Fail; return TStr();}
00434 }
00435 
00436 void TXmlLx::GetComment(){
00437   // [15] Comment ::=  {{'<!-}}-' ((Char - '-') | ('-' (Char - '-')))* '-->'
00438   if (GetCh()!='-'){EThrow("Invalid comment start.");}
00439   TxtChA.Clr();
00440   forever {
00441     GetCh();
00442     if (!ChDef.IsChar(Ch)){EThrow("Invalid comment character.");}
00443     if (Ch=='-'){
00444       if (GetCh()=='-'){
00445         if (GetCh()=='>'){GetCh(); break;} // final bracket
00446         else {EThrow("Invalid comment end.");}
00447       } else {
00448         if (!ChDef.IsChar(Ch)){EThrow("Invalid comment character.");}
00449         TxtChA+='-'; TxtChA+=Ch; // special case if single '-'
00450       }
00451     } else {
00452       TxtChA+=Ch; // usual char
00453     }
00454   }
00455 }
00456 
00457 TStr TXmlLx::GetAttValue(){
00458   // [10]  AttValue ::=  '"' ([^<&"] | Reference)* '"'
00459   //  |  "'" ([^<&'] | Reference)* "'"
00460   uchar QCh=Ch;
00461   if ((QCh!='"')&&(QCh!='\'')){EThrow("Invalid attribute-value start.");}
00462   TChA ValChA; GetCh();
00463   forever {
00464     if ((Ch=='<')||(!ChDef.IsChar(Ch))){
00465       EThrow("Invalid attribute-value character.");}
00466     if (Ch==QCh){GetCh(); break;} // final quote
00467     else if (Ch=='&'){GetCh(); ValChA+=GetReference();} // reference
00468     else {ValChA+=Ch; GetCh();} // usual char
00469   }
00470   return ValChA;
00471 }
00472 
00473 TStr TXmlLx::GetVersionNum(){
00474   // [24] VersionInfo ::=  {{S 'version' Eq}} (' VersionNum ' | " VersionNum ")
00475   // [26] VersionNum ::=  ([a-zA-Z0-9_.:] | '-')+
00476   char QCh=Ch;
00477   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00478   TChA VerNumChA;
00479   GetCh();
00480   do {
00481     if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))||
00482      (('0'<=Ch)&&(Ch<='9'))||(Ch=='_')||(Ch=='.')||(Ch==':')||(Ch=='-')){
00483       VerNumChA+=Ch;
00484     } else {
00485       EThrow("Invalid version-number character.");
00486     }
00487     GetCh();
00488   } while (Ch!=QCh);
00489   GetCh();
00490   return VerNumChA;
00491 }
00492 
00493 TStr TXmlLx::GetEncName(){
00494   // [80] EncodingDecl ::=  {{S 'encoding' Eq}} ('"' EncName '"' |  "'" EncName "'" )
00495   // [81] EncName ::=  [A-Za-z] ([A-Za-z0-9._] | '-')*
00496   char QCh=Ch;
00497   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00498   TChA EncNmChA;
00499   GetCh();
00500   if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))){EncNmChA+=Ch;}
00501   else {EThrow("Invalid encoding-name character.");}
00502   GetCh();
00503   while (Ch!=QCh){
00504     if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))||
00505      (('0'<=Ch)&&(Ch<='9'))||(Ch=='.')||(Ch=='_')||(Ch=='-')){EncNmChA+=Ch;}
00506     else {EThrow("Invalid version-number character.");}
00507     GetCh();
00508   }
00509   GetCh();
00510   return EncNmChA;
00511 }
00512 
00513 TStr TXmlLx::GetStalVal(){
00514   // [32] SDDecl ::=  {{S 'standalone' Eq}}
00515   //  (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
00516   char QCh=Ch;
00517   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00518   TChA StalChA;
00519   GetCh();
00520   while (Ch!=QCh){
00521     if (('a'<=Ch)&&(Ch<='z')){StalChA+=Ch;}
00522     else {EThrow("Invalid standalone-value character.");}
00523     GetCh();
00524   }
00525   GetCh();
00526   TStr StalVal=StalChA;
00527   if ((StalVal=="yes")||(StalVal=="no")){return StalVal;}
00528   else {EThrow("Invalid standalone-value."); Fail; return TStr();}
00529 }
00530 
00531 void TXmlLx::GetXmlDecl(){
00532   // [23] XMLDecl ::=  {{'<?xml'}}... VersionInfo EncodingDecl? SDDecl? S? '?>'
00533   // [24] VersionInfo ::=  S 'version' Eq (' VersionNum ' | " VersionNum ")
00534   GetWs(true);
00535   TStr VerNm=GetName("version"); GetEq(); TStr VerVal=GetVersionNum();
00536   if (VerVal!="1.0"){EThrow("Invalid XML version.");}
00537   AddArg(VerNm, VerVal);
00538   GetWs(false);
00539   if (Ch!='?'){
00540     // EncodingDecl ::=  {{S}} 'encoding' Eq
00541     //  ('"' EncName '"' |  "'" EncName "'" )
00542     TStr EncNm=GetName("encoding"); GetEq(); TStr EncVal=GetEncName();
00543     AddArg(EncNm, EncVal);
00544   }
00545   GetWs(false);
00546   if (Ch!='?'){
00547     // SDDecl ::=  {{S}} 'standalone' Eq
00548     //  (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
00549     TStr StalNm=GetName("standalone"); GetEq(); TStr StalVal=GetStalVal();
00550     AddArg(StalNm, StalVal);
00551   }
00552   GetWs(false);
00553   if (Ch=='?'){
00554     GetCh();
00555     if (Ch=='>'){GetCh();}
00556     else {EThrow("Invalid end-of-tag in XML-declaration.");}
00557   } else {
00558     EThrow("Invalid end-of-tag in XML-declaration.");
00559   }
00560 }
00561 
00562 void TXmlLx::GetPI(){
00563   // [16]  PI ::=  {{'<?' PITarget}} (S (Char* - (Char* '?>' Char*)))? '?>'
00564   // [17]  PITarget ::=  Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
00565   GetWs(false);
00566   TxtChA.Clr();
00567   forever {
00568     if (!ChDef.IsChar(Ch)){EThrow("Invalid PI character.");}
00569     if (Ch=='?'){
00570       if (GetCh()=='>'){
00571         GetCh(); break;
00572       } else {
00573         if (!ChDef.IsChar(Ch)){EThrow("Invalid PI character.");}
00574         TxtChA+='?'; TxtChA+=Ch; // special case if single '?'
00575       }
00576     } else {
00577       TxtChA+=Ch; // usual char
00578     }
00579     GetCh();
00580   }
00581 }
00582 
00583 TStr TXmlLx::GetSystemLiteral(){
00584   // [11]  SystemLiteral ::=  ('"' [^"]* '"') | ("'" [^']* "'")
00585   char QCh=Ch;
00586   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00587   TChA LitChA; GetCh();
00588   while (Ch!=QCh){
00589     if (!ChDef.IsChar(Ch)){EThrow("Invalid System-Literal character.");}
00590     LitChA+=Ch; GetCh();
00591   }
00592   GetCh();
00593   return LitChA;
00594 }
00595 
00596 TStr TXmlLx::GetPubidLiteral(){
00597   // [12]  PubidLiteral ::=  '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
00598   char QCh=Ch;
00599   if ((Ch!='\'')&&(Ch!='"')){EThrow("Quote character (' or \") expected.");}
00600   TChA LitChA; GetCh();
00601   while (Ch!=QCh){
00602     if (!ChDef.IsPubid(Ch)){EThrow("Invalid Public-Id-Literal character.");}
00603     LitChA+=Ch; GetCh();
00604   }
00605   GetCh();
00606   return LitChA;
00607 }
00608 
00609 void TXmlLx::GetExternalId(){
00610   // ExternalID ::=  'SYSTEM' S SystemLiteral
00611   //  | 'PUBLIC' S PubidLiteral S SystemLiteral
00612   TStr ExtIdNm=GetName();
00613   if (ExtIdNm=="SYSTEM"){
00614     GetWs(true); GetSystemLiteral();
00615   } else if (ExtIdNm=="PUBLIC"){
00616     GetWs(true); GetPubidLiteral(); GetWs(true); GetSystemLiteral();
00617   } else {
00618     EThrow("Invalid external-id ('SYSTEM' or 'PUBLIC' expected).");
00619   }
00620 }
00621 
00622 void TXmlLx::GetNData(){
00623   // [76]  NDataDecl ::=  S 'NDATA' S Name
00624   GetName("NDATA"); GetWs(true); GetName();
00625 }
00626 
00627 void TXmlLx::GetDocTypeDecl(){
00628   // [28] doctypedecl ::=  {{'<!DOCTYPE'}} S Name (S ExternalID)? S?
00629   //  ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
00630   GetWs(true);
00631   TStr DocTypeDeclNm=GetName();
00632   GetWs(false);
00633   if (Ch=='>'){GetCh(); return;}
00634   if (Ch!='['){GetExternalId();}
00635   GetWs(false);
00636   if (Ch=='['){
00637     GetCh();
00638     // [28] (markupdecl | PEReference | S)*
00639     GetWs(false);
00640     while (Ch!=']'){
00641       if (ChDef.IsWs(Ch)){GetWs(true);}
00642       else if (Ch=='%'){GetPEReference();}
00643       else {
00644         GetSym();
00645       }
00646     }
00647     GetCh();
00648   }
00649   GetWs(false);
00650   // '>'
00651   if (Ch=='>'){GetCh();}
00652   else {EThrow("Invalid end-of-tag in document-type-declaration.");}
00653   TagNm=DocTypeDeclNm;
00654 }
00655 
00656 void TXmlLx::GetElement(){
00657   TxtChA.Clr();
00658   while (Ch!='>'){
00659     if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");}
00660     TxtChA+=Ch; GetCh();
00661   }
00662   GetCh();
00663 }
00664 
00665 void TXmlLx::GetAttList(){
00666   TxtChA.Clr();
00667   while (Ch!='>'){
00668     if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");}
00669     TxtChA+=Ch; GetCh();
00670   }
00671   GetCh();
00672 }
00673 
00674 TStr TXmlLx::GetEntityValue(){
00675   // [9]  EntityValue ::=  '"' ([^%&"] | PEReference | Reference)* '"'
00676   //  | "'" ([^%&'] | PEReference | Reference)* "'"
00677   uchar QCh=Ch;
00678   if ((QCh!='"')&&(QCh!='\'')){EThrow("Invalid entity-value start.");}
00679   TChA ValChA; GetCh();
00680   forever {
00681     if (!ChDef.IsChar(Ch)){EThrow("Invalid entity-value character.");}
00682     if (Ch==QCh){GetCh(); break;} // final quote
00683     else if (Ch=='&'){GetCh(); ValChA+=GetReference();} // reference
00684     else if (Ch=='%'){GetCh(); ValChA+=GetPEReference();} // pereference
00685     else {ValChA+=Ch; GetCh();} // usual char
00686   }
00687   return ValChA;
00688 }
00689 
00690 void TXmlLx::GetEntity(){
00691   // [70] EntityDecl ::=  GEDecl | PEDecl
00692   // [71] GEDecl ::=  '<!ENTITY' S Name S EntityDef S? '>'
00693   // [72] PEDecl ::=  '<!ENTITY' S '%' S Name S PEDef S? '>'
00694   GetWs(true); TStr EntityNm;
00695   if (Ch=='%'){
00696     GetCh(); GetWs(true); EntityNm=GetName(); GetWs(true);
00697     // [74] PEDef ::=  EntityValue | ExternalID
00698     if ((Ch=='\"')||(Ch=='\'')){
00699       TStr EntityVal=GetEntityValue();
00700       PutPEntityVal(EntityNm, EntityVal);
00701     } else {
00702       GetExternalId();
00703       GetWs(false);
00704       if (Ch!='>'){GetNData();}
00705     }
00706   } else {
00707     EntityNm=GetName(); GetWs(true);
00708     // [73] EntityDef ::=  EntityValue | (ExternalID NDataDecl?)
00709     if ((Ch=='\"')||(Ch=='\'')){
00710       TStr EntityVal=GetEntityValue();
00711       PutEntityVal(EntityNm, EntityVal);
00712     } else {
00713       GetExternalId();
00714     }
00715   }
00716   GetWs(false);
00717   if (Ch=='>'){GetCh();}
00718   else {EThrow("Invalid end-of-tag in entity-declaration.");}
00719   TagNm=EntityNm;
00720 }
00721 
00722 void TXmlLx::GetNotation(){
00723   // [82] NotationDecl ::=  '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
00724   // [83]  PublicID ::=  'PUBLIC' S PubidLiteral
00725   TxtChA.Clr();
00726   while (Ch!='>'){
00727     if (!ChDef.IsChar(Ch)){EThrow("Invalid Element character.");}
00728     TxtChA+=Ch; GetCh();
00729   }
00730   GetCh();
00731 }
00732 
00733 void TXmlLx::GetCDSect(){
00734   // [18]  CDSect ::=  CDStart CData CDEnd
00735   // [19]  CDStart ::=  '<![CDATA{{['}}
00736   // [20]  CData ::=  (Char* - (Char* ']]>' Char*))
00737   // [21]  CDEnd ::=  ']]>'
00738   if (Ch=='['){GetCh();}
00739   else {EThrow("Invalid start of CDATA section.");}
00740   TxtChA.Clr();
00741   forever {
00742     if (!ChDef.IsChar(Ch)){EThrow("Invalid CDATA character.");}
00743     if ((Ch=='>')&&(TxtChA.Len()>=2)&&
00744      (TxtChA.LastLastCh()==']') && (TxtChA.LastCh()==']')){
00745       GetCh(); TxtChA.Pop(); TxtChA.Pop(); break;
00746     } else {
00747       TxtChA+=Ch; GetCh();
00748     }
00749   }
00750 }
00751 
00752 void TXmlLx::SkipWs(){
00753   // [3] S ::=  (#x20 | #x9 | #xD | #xA)+
00754   while (ChDef.IsWs(Ch)){GetCh();}
00755 }
00756 
00757 TXmlLxSym TXmlLx::GetSym(){
00758   if (Ch=='<'){
00759     GetCh(); ClrArgV();
00760     if (Ch=='?'){
00761       GetCh(); TagNm=GetName();
00762       if (TagNm.GetLc()=="xml"){Sym=xsyXmlDecl; GetXmlDecl();}
00763       else {Sym=xsyPI; GetPI();}
00764     } else
00765     if (Ch=='!'){
00766       GetCh();
00767       if (Ch=='['){
00768         GetCh(); TagNm=GetName();
00769         if (TagNm=="CDATA"){Sym=xsyQStr; GetCDSect();}
00770         else {EThrow(TStr("Invalid tag after '<![' (")+TagNm+").");}
00771       } else
00772       if (Ch=='-'){
00773         Sym=xsyComment; GetComment();
00774       } else {
00775         TagNm=GetName();
00776         if (TagNm=="DOCTYPE"){GetDocTypeDecl(); Sym=xsyDocTypeDecl;}
00777         else if (TagNm=="ELEMENT"){GetElement(); Sym=xsyElement;}
00778         else if (TagNm=="ATTLIST"){GetAttList(); Sym=xsyAttList;}
00779         else if (TagNm=="ENTITY"){GetEntity(); Sym=xsyEntity;}
00780         else if (TagNm=="NOTATION"){GetNotation(); Sym=xsyNotation;}
00781         else {EThrow(TStr("Invalid tag (")+TagNm+").");}
00782       }
00783     } else
00784     if (Ch=='/'){
00785       // xsyETag
00786       GetCh(); Sym=xsyETag; TagNm=GetName(); GetWs(false);
00787       if (Ch=='>'){GetCh();}
00788       else {EThrow("Invalid End-Tag.");}
00789     } else {
00790       // xsySTag or xsySETag
00791       TagNm=GetName(); GetWs(false);
00792       while ((Ch!='>')&&(Ch!='/')){
00793         TStr AttrNm=GetName();
00794         GetEq();
00795         TStr AttrVal=GetAttValue();
00796         GetWs(false);
00797         AddArg(AttrNm, AttrVal);
00798       }
00799       if (Ch=='/'){
00800         if (GetCh()=='>'){Sym=xsySETag; GetCh();}
00801         else {EThrow("Invalid Empty-Element-Tag.");}
00802       } else {
00803         Sym=xsySTag; GetCh();
00804       }
00805     }
00806     if (Spacing==xspTruncate){SkipWs();}
00807   } else
00808   if (ChDef.IsWs(Ch)){
00809     Sym=xsyWs; GetWs(true); ToNrSpacing();
00810     if (Spacing==xspTruncate){GetSym();}
00811   } else
00812   if (Ch==TCh::EofCh){
00813     Sym=xsyEof;
00814   } else {
00815     Sym=xsyStr; TxtChA.Clr();
00816     // [14]  CharData ::=  [^<&]* - ([^<&]* ']]>' [^<&]*)
00817     forever {
00818       if (!ChDef.IsChar(Ch)){
00819         EThrow(TUInt::GetStr(Ch, "Invalid character (%d)."));}
00820                 // GetCh();  continue; // skip invalid characters
00821       if (Ch=='<'){break;} // tag
00822       if (Ch=='&'){GetCh(); TxtChA+=GetReference();} // reference
00823       else {
00824         if ((Ch=='>')&&(TxtChA.Len()>=2)&&
00825          (TxtChA.LastLastCh()==']')&&(TxtChA.LastCh()==']')){
00826           EThrow("Forbidden substring ']]>' in character data.");}
00827         TxtChA+=Ch; GetCh(); // usual char
00828       }
00829     }
00830     ToNrSpacing();
00831   }
00832   return Sym;
00833 }
00834 
00835 TStr TXmlLx::GetSymStr() const {
00836   TChA SymChA;
00837   switch (Sym){
00838     case xsyUndef:
00839       SymChA="{Undef}"; break;
00840     case xsyWs:
00841       SymChA+="{Space:'"; SymChA+=TStr(TxtChA).GetHex(); SymChA+="'}"; break;
00842     case xsyComment:
00843       SymChA+="<!--"; SymChA+=TxtChA; SymChA+="-->"; break;
00844     case xsyXmlDecl:{
00845       SymChA+="<?"; SymChA+=TagNm;
00846       for (int ArgN=0; ArgN<GetArgs(); ArgN++){
00847         TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal);
00848         char ArgValQCh=GetArgValQCh(ArgVal);
00849         SymChA+=' '; SymChA+=ArgNm; SymChA+='=';
00850         SymChA+=ArgValQCh; SymChA+=ArgVal; SymChA+=ArgValQCh;
00851       }
00852       SymChA+="?>"; break;}
00853     case xsyPI:
00854       SymChA+="<?"; SymChA+=TagNm;
00855       if (!TxtChA.Empty()){SymChA+=' '; SymChA+=TxtChA;}
00856       SymChA+="?>"; break;
00857     case xsyDocTypeDecl:
00858       SymChA+="<!DOCTYPE "; SymChA+=TagNm; SymChA+=">"; break;
00859     case xsySTag:
00860     case xsySETag:{
00861       SymChA+="<"; SymChA+=TagNm;
00862       for (int ArgN=0; ArgN<GetArgs(); ArgN++){
00863         TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal);
00864         char ArgValQCh=GetArgValQCh(ArgVal);
00865         SymChA+=' '; SymChA+=ArgNm; SymChA+='=';
00866         SymChA+=ArgValQCh; SymChA+=ArgVal; SymChA+=ArgValQCh;
00867       }
00868       if (Sym==xsySTag){SymChA+=">";}
00869       else if (Sym==xsySETag){SymChA+="/>";}
00870       else {Fail;}
00871       break;}
00872     case xsyETag:
00873       SymChA+="</"; SymChA+=TagNm; SymChA+=">"; break;
00874     case xsyStr:
00875       SymChA="{String:'"; SymChA+=TxtChA; SymChA+="'}"; break;
00876     case xsyQStr:
00877       SymChA="{QString:'"; SymChA+=TxtChA; SymChA+="'}"; break;
00878     case xsyEof:
00879       SymChA="{Eof}"; break;
00880     default: Fail;
00881   }
00882   return SymChA;
00883 }
00884 
00885 void TXmlLx::EThrow(const TStr& MsgStr) const {
00886   TChA FPosChA;
00887   FPosChA+=" [File:"; FPosChA+=SIn->GetSNm();
00888   FPosChA+=" Line:"; FPosChA+=TInt::GetStr(LnN);
00889   FPosChA+=" Char:"; FPosChA+=TInt::GetStr(LnChN);
00890   FPosChA+="]";
00891   TStr FullMsgStr=MsgStr+FPosChA;
00892   TExcept::Throw(FullMsgStr);
00893 }
00894 
00895 TStr TXmlLx::GetFPosStr() const {
00896   TChA FPosChA;
00897   FPosChA+=" [File:"; FPosChA+=SIn->GetSNm();
00898   FPosChA+=" Line:"; FPosChA+=TInt::GetStr(LnN);
00899   FPosChA+=" Char:"; FPosChA+=TInt::GetStr(LnChN);
00900   FPosChA+="]";
00901   return FPosChA;
00902 }
00903 
00904 TStr TXmlLx::GetXmlLxSymStr(const TXmlLxSym& XmlLxSym){
00905   switch (XmlLxSym){
00906     case xsyUndef: return "Undef";
00907     case xsyWs: return "White-Space";
00908     case xsyComment: return "Comment";
00909     case xsyXmlDecl: return "Declaration";
00910     case xsyPI: return "PI";
00911     case xsyDocTypeDecl: return "Document-Type";
00912     case xsyElement: return "Element";
00913     case xsyAttList: return "Attribute-List";
00914     case xsyEntity: return "Entity";
00915     case xsyNotation: return "Notation";
00916     case xsyTag: return "Tag";
00917     case xsySTag: return "Start-Tag";
00918     case xsyETag: return "End-Tag";
00919     case xsySETag: return "Start-End-Tag";
00920     case xsyStr: return "String";
00921     case xsyQStr: return "Quoted-String";
00922     case xsyEof: return "Eon-Of-File";
00923     default: return "Undef";
00924   }
00925 }
00926 
00927 bool TXmlLx::IsTagNm(const TStr& Str){
00928   TChA ChA=Str;
00929   if (ChA.Len()>0){
00930     if (TXmlLx::ChDef.IsFirstNameCh(ChA[0])){
00931       for (int ChN=1; ChN<ChA.Len(); ChN++){
00932         if (!TXmlLx::ChDef.IsName(ChA[ChN])){
00933           return false;
00934         }
00935       }
00936       return true;
00937     } else {
00938       return false;
00939     }
00940   } else {
00941     return false;
00942   }
00943 }
00944 
00945 TStr TXmlLx::GetXmlStrFromPlainMem(const TMem& PlainMem){
00946   TChA XmlChA;
00947   for (int ChN=0; ChN<PlainMem.Len(); ChN++){
00948     uchar Ch=PlainMem[ChN];
00949     if ((' '<=Ch)&&(Ch<='~')){
00950       switch (Ch){
00951         case '"': XmlChA+="&quot;"; break;
00952         case '&': XmlChA+="&amp;"; break;
00953         case '\'': XmlChA+="&apos;"; break;
00954         case '<': XmlChA+="&lt;"; break;
00955         case '>': XmlChA+="&gt;"; break;
00956         default: XmlChA+=Ch;
00957       }
00958     } else
00959     if ((Ch=='\r')||(Ch=='\n')){
00960       XmlChA+=Ch;
00961     } else {
00962       XmlChA+='&'; XmlChA+='#'; XmlChA+=TUInt::GetStr(Ch); XmlChA+=';';
00963     }
00964   }
00965   return XmlChA;
00966 }
00967 
00968 TStr TXmlLx::GetXmlStrFromPlainStr(const TChA& PlainChA){
00969   TChA XmlChA;
00970   for (int ChN=0; ChN<PlainChA.Len(); ChN++){
00971     uchar Ch=PlainChA[ChN];
00972     if ((' '<=Ch)&&(Ch<='~')){
00973       switch (Ch){
00974         case '"': XmlChA+="&quot;"; break;
00975         case '&': XmlChA+="&amp;"; break;
00976         case '\'': XmlChA+="&apos;"; break;
00977         case '<': XmlChA+="&lt;"; break;
00978         case '>': XmlChA+="&gt;"; break;
00979         default: XmlChA+=Ch;
00980       }
00981     } else
00982     if ((Ch=='\r')||(Ch=='\n')){
00983       XmlChA+=Ch;
00984     } else {
00985       XmlChA+='&'; XmlChA+='#'; XmlChA+=TUInt::GetStr(Ch); XmlChA+=';';
00986     }
00987   }
00988   return XmlChA;
00989 }
00990 
00991 TStr TXmlLx::GetPlainStrFromXmlStr(const TStr& XmlStr){
00992   TChA PlainChA;
00993   TChRet Ch(TStrIn::New(XmlStr));
00994   Ch.GetCh();
00995   while (!Ch.Eof()){
00996     if (Ch()!='&'){
00997       PlainChA+=Ch(); Ch.GetCh();
00998     } else {
00999       // [67] Reference ::=  EntityRef | CharRef
01000       if (Ch.GetCh()=='#'){
01001         // [66]  CharRef ::=  '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
01002         TChA RefChA; int RefCd=0;
01003         if (Ch.GetCh()=='x'){
01004           // hex-decimal character code
01005           forever {
01006             Ch.GetCh();
01007             if (TCh::IsHex(Ch())){
01008               RefChA+=Ch();
01009               RefCd=RefCd*16+TCh::GetHex(Ch());
01010             } else {
01011               break;
01012             }
01013           }
01014         } else {
01015           // decimal character code
01016           forever {
01017             if (TCh::IsNum(Ch())){
01018               RefChA+=Ch();
01019               RefCd=RefCd*10+TCh::GetNum(Ch());
01020             } else {
01021               break;
01022             }
01023             Ch.GetCh();
01024           }
01025         }
01026         if ((!RefChA.Empty())&&(Ch()==';')){
01027           Ch.GetCh();
01028                   if (RefCd < 0x80) {
01029                         // ascii character
01030                         uchar RefCh=uchar(RefCd);
01031                         PlainChA+=RefCh;
01032                   } else {
01033                         // unicode
01034                         TUnicode::EncodeUtf8(RefCd, PlainChA);
01035                   }
01036         }
01037       } else {
01038         // [68]  EntityRef ::=  '&' Name ';'
01039         TChA EntityNm;
01040         while ((!Ch.Eof())&&(Ch()!=';')){
01041           EntityNm+=Ch(); Ch.GetCh();}
01042         if ((!EntityNm.Empty())&&(Ch()==';')){
01043           Ch.GetCh();
01044           if (EntityNm=="quot"){PlainChA+='"';}
01045           else if (EntityNm=="amp"){PlainChA+='&';}
01046           else if (EntityNm=="apos"){PlainChA+='\'';}
01047           else if (EntityNm=="lt"){PlainChA+='<';}
01048           else if (EntityNm=="gt"){PlainChA+='>';}
01049         }
01050       }
01051     }
01052   }
01053   return PlainChA;
01054 }
01055 
01056 TStr TXmlLx::GetUsAsciiStrFromXmlStr(const TStr& XmlStr){
01057   TStr UsAsciiStr=XmlStr;
01058   UsAsciiStr.ChangeStrAll("&#232;", "c");
01059   UsAsciiStr.ChangeStrAll("&#200;", "C");
01060   UsAsciiStr.ChangeStrAll("&#154;", "s");
01061   UsAsciiStr.ChangeStrAll("&#138;", "S");
01062   UsAsciiStr.ChangeStrAll("&#158;", "z");
01063   UsAsciiStr.ChangeStrAll("&#142;", "Z");
01064   TChA UsAsciiChA=TXmlLx::GetPlainStrFromXmlStr(UsAsciiStr);
01065   for (int ChN=0; ChN<UsAsciiChA.Len(); ChN++){
01066     char Ch=UsAsciiChA[ChN];
01067     if ((Ch<' ')||('~'<Ch)){UsAsciiChA.PutCh(ChN, 'x');}
01068   }
01069   return UsAsciiChA;
01070 }
01071 
01072 TStr TXmlLx::GetChRefFromYuEntRef(const TStr& YuEntRefStr){
01073   TStr ChRefStr=YuEntRefStr;
01074   ChRefStr.ChangeStrAll("&ch;", "&#232;");
01075   ChRefStr.ChangeStrAll("&Ch;", "&#200;");
01076   ChRefStr.ChangeStrAll("&sh;", "&#154;");
01077   ChRefStr.ChangeStrAll("&Sh;", "&#138;");
01078   ChRefStr.ChangeStrAll("&zh;", "&#158;");
01079   ChRefStr.ChangeStrAll("&Zh;", "&#142;");
01080   ChRefStr.ChangeStrAll("&cs", "c");
01081   ChRefStr.ChangeStrAll("&Cs;", "C");
01082   ChRefStr.ChangeStrAll("&dz;", "dz");
01083   ChRefStr.ChangeStrAll("&Dz;", "Dz");
01084   return ChRefStr;
01085 }
01086 
01088 // Xml-Token
01089 bool TXmlTok::GetBoolArgVal(const TStr& ArgNm, const bool& DfVal) const {
01090   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01091   return (ArgN==-1) ? DfVal : (ArgNmValV[ArgN].Dat==TBool::TrueStr);
01092 }
01093 
01094 bool TXmlTok::GetBoolArgVal(
01095  const TStr& ArgNm, const TStr& TrueVal, const bool& DfVal) const {
01096   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01097   return (ArgN==-1) ? DfVal : (ArgNmValV[ArgN].Dat==TrueVal);
01098 }
01099 
01100 bool TXmlTok::GetBoolArgVal(const TStr& ArgNm,
01101  const TStr& TrueVal, const TStr& FalseVal, const bool& DfVal) const {
01102   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01103   if (ArgN==-1){return DfVal;}
01104   TStr ArgVal=ArgNmValV[ArgN].Dat;
01105   if (ArgVal==TrueVal){return true;}
01106   IAssert(ArgVal == FalseVal); return false;
01107 }
01108 
01109 int TXmlTok::GetIntArgVal(const TStr& ArgNm, const int& DfVal) const {
01110   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01111   if (ArgN==-1){
01112     return DfVal;
01113   } else {
01114     int Val;
01115     if (ArgNmValV[ArgN].Dat.IsInt(Val)){return Val;} else {return DfVal;}
01116   }
01117 }
01118 
01119 double TXmlTok::GetFltArgVal(const TStr& ArgNm, const double& DfVal) const {
01120   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01121   if (ArgN==-1){
01122     return DfVal;
01123   } else {
01124     double Val;
01125     if (ArgNmValV[ArgN].Dat.IsFlt(Val)){return Val;} else {return DfVal;}
01126   }
01127 }
01128 
01129 TStr TXmlTok::GetStrArgVal(const TStr& ArgNm, const TStr& DfVal) const {
01130   int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
01131   return (ArgN==-1) ? DfVal : ArgNmValV[ArgN].Dat;
01132 }
01133 
01134 void TXmlTok::PutSubTok(const PXmlTok& Tok, const int& SubTokN){
01135   if (SubTokN==-1){
01136     ClrSubTok(); AddSubTok(Tok);
01137   } else {
01138     SubTokV[SubTokN]=Tok;
01139   }
01140 }
01141 
01142 PXmlTok TXmlTok::GetTagTok(const TStr& TagPath) const {
01143   if (TagPath.Empty()){
01144     return (TXmlTok*)this;
01145   } else {
01146     TStr TagNm; TStr RestTagPath; TagPath.SplitOnCh(TagNm, '|', RestTagPath);
01147     PXmlTok SubTok;
01148     for (int SubTokN=0; SubTokN<SubTokV.Len(); SubTokN++){
01149       SubTok=SubTokV[SubTokN];
01150       if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){break;}
01151       else {SubTok=NULL;}
01152     }
01153     if ((SubTok.Empty())||(RestTagPath.Empty())){return SubTok;}
01154     else {return SubTok->GetTagTok(RestTagPath);}
01155   }
01156 }
01157 
01158 void TXmlTok::GetTagTokV(const TStr& TagPath, TXmlTokV& XmlTokV) const {
01159   XmlTokV.Clr();
01160   TStr PreTagPath; TStr TagNm; TagPath.SplitOnLastCh(PreTagPath, '|', TagNm);
01161   PXmlTok Tok=GetTagTok(PreTagPath);
01162   if (!Tok.Empty()){
01163     for (int SubTokN=0; SubTokN<Tok->GetSubToks(); SubTokN++){
01164       PXmlTok SubTok=Tok->GetSubTok(SubTokN);
01165       if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){
01166         XmlTokV.Add(SubTok);}
01167     }
01168   }
01169 }
01170 
01171 void TXmlTok::GetTagValV(const TStr& TagNm, const bool& XmlP, TStrV& ValV) const {
01172   if ((Sym==xsyTag)&&(Str==TagNm)){
01173     ValV.Add(GetTokStr(XmlP));
01174   } else {
01175     for (int SubTokN=0; SubTokN<GetSubToks(); SubTokN++){
01176       GetSubTok(SubTokN)->GetTagValV(TagNm, XmlP, ValV);}
01177   }
01178 }
01179 
01180 TStr TXmlTok::GetTagVal(const TStr& TagNm, const bool& XmlP) const {
01181   TStrV ValV; GetTagValV(TagNm, XmlP, ValV);
01182   if (ValV.Len()>0){return ValV[0];} else {return "";}
01183 }
01184 
01185 void TXmlTok::AddTokToChA(const bool& XmlP, TChA& ChA) const {
01186   switch (Sym){
01187     case xsyWs:
01188       ChA+=Str; break;
01189     case xsyStr:
01190       if (XmlP){ChA+=TXmlLx::GetXmlStrFromPlainStr(Str);} else {ChA+=Str;} break;
01191     case xsyQStr:
01192       if (XmlP){ChA+="<![CDATA[";}
01193       ChA+=Str;
01194       if (XmlP){ChA+="]]>";} break;
01195     case xsyTag:
01196       if (XmlP){
01197         ChA+='<'; ChA+=Str;
01198         for (int ArgN=0; ArgN<GetArgs(); ArgN++){
01199           TStr ArgNm; TStr ArgVal; GetArg(ArgN, ArgNm, ArgVal);
01200           if (XmlP){ArgVal=TXmlLx::GetXmlStrFromPlainStr(ArgVal);}
01201           char ArgValQCh=TXmlLx::GetArgValQCh(ArgVal);
01202           ChA+=' '; ChA+=ArgNm; ChA+='=';
01203           ChA+=ArgValQCh; ChA+=ArgVal; ChA+=ArgValQCh;
01204         }
01205       }
01206       if (GetSubToks()==0){
01207         if (XmlP){ChA+="/>";}
01208       } else {
01209         if (XmlP){ChA+=">";}
01210         for (int SubTokN=0; SubTokN<GetSubToks(); SubTokN++){
01211           GetSubTok(SubTokN)->AddTokToChA(XmlP, ChA);}
01212         if (XmlP){ChA+="</"; ChA+=Str; ChA+='>';}
01213       }
01214       break;
01215     default: Fail;
01216   }
01217 }
01218 
01219 TStr TXmlTok::GetTokVStr(const TXmlTokV& TokV, const bool& XmlP){
01220   TChA TokVChA;
01221   for (int TokN=0; TokN<TokV.Len(); TokN++){
01222     if (TokN>0){TokVChA+=' ';}
01223     TokVChA+=TokV[TokN]->GetTokStr(XmlP);
01224   }
01225   return TokVChA;
01226 }
01227 
01228 PXmlTok TXmlTok::GetTok(TXmlLx& Lx){
01229   switch (Lx.Sym){
01230     case xsyWs:
01231     case xsyStr:
01232     case xsyQStr:
01233       return TXmlTok::New(Lx.Sym, Lx.TxtChA);
01234     case xsySTag:
01235     case xsySETag:
01236       return TXmlTok::New(xsyTag, Lx.TagNm, Lx.ArgNmValKdV);
01237     default: Fail; return NULL;
01238   }
01239 }
01240 
01242 // Xml-Document
01243 void TXmlDoc::LoadTxtMiscStar(TXmlLx& Lx){
01244   // [27] Misc ::=  Comment | PI |  S
01245   while ((Lx.Sym==xsyComment)||(Lx.Sym==xsyPI)||(Lx.Sym==xsyWs)){
01246     Lx.GetSym();}
01247 }
01248 
01249 PXmlTok TXmlDoc::LoadTxtElement(TXmlLx& Lx){
01250   // [39]  element ::=  EmptyElemTag | STag content ETag
01251   PXmlTok Tok;
01252   if (Lx.Sym==xsySETag){
01253     Tok=TXmlTok::GetTok(Lx);
01254   } else
01255   if (Lx.Sym==xsySTag){
01256     Tok=TXmlTok::GetTok(Lx);
01257     forever {
01258       Lx.GetSym();
01259       if (Lx.Sym==xsyETag){
01260         if (Tok->GetStr()==Lx.TagNm){
01261           break;
01262         } else {
01263           TStr MsgStr=TStr("Invalid End-Tag '")+Lx.TagNm+
01264            "' ('"+Tok->GetStr()+"' expected).";
01265           Lx.EThrow(MsgStr);
01266         }
01267       } else {
01268         PXmlTok SubTok;
01269         switch (Lx.Sym){
01270           case xsySTag:
01271             SubTok=LoadTxtElement(Lx); break;
01272           case xsySETag:
01273           case xsyStr:
01274           case xsyQStr:
01275           case xsyWs:
01276             SubTok=TXmlTok::GetTok(Lx); break;
01277           case xsyPI:
01278           case xsyComment:
01279             break;
01280           default: Lx.EThrow("Content or End-Tag expected.");
01281         }
01282         if (!SubTok.Empty()){
01283           Tok->AddSubTok(SubTok);}
01284       }
01285     }
01286   } else
01287   if (Lx.Sym==xsyETag){
01288     TStr MsgStr=
01289      TStr("Xml-Element (Start-Tag or Empty-Element-Tag) required.")+
01290      TStr::GetStr(Lx.TagNm, " End-Tag </%s> encountered.");
01291     Lx.EThrow(MsgStr);
01292   } else {
01293     Lx.EThrow("Xml-Element (Start-Tag or Empty-Element-Tag) required.");
01294   }
01295   return Tok;
01296 }
01297 
01298 PXmlTok TXmlDoc::GetTagTok(const TStr& TagPath) const {
01299   if (TagPath.Empty()){
01300     return Tok;
01301   } else {
01302     TStr TagNm; TStr RestTagPath; TagPath.SplitOnCh(TagNm, '|', RestTagPath);
01303     if ((Tok->GetSym()==xsyTag)&&(Tok->GetStr()==TagNm)){
01304       if (RestTagPath.Empty()){return Tok;}
01305       else {return Tok->GetTagTok(RestTagPath);}
01306     } else {
01307       return NULL;
01308     }
01309   }
01310 }
01311 
01312 void TXmlDoc::PutTagTokStr(const TStr& TagPath, const TStr& TokStr) const {
01313   PXmlTok Tok=GetTagTok(TagPath);
01314   Tok->ClrSubTok();
01315   PXmlTok StrTok=TXmlTok::New(xsyStr, TokStr);
01316   Tok->AddSubTok(StrTok);
01317 }
01318 
01319 void TXmlDoc::GetTagTokV(const TStr& TagPath, TXmlTokV& XmlTokV) const {
01320   XmlTokV.Clr();
01321   TStr PreTagPath; TStr TagNm; TagPath.SplitOnLastCh(PreTagPath, '|', TagNm);
01322   PXmlTok Tok=GetTagTok(PreTagPath);
01323   if (!Tok.Empty()){
01324     for (int SubTokN=0; SubTokN<Tok->GetSubToks(); SubTokN++){
01325       PXmlTok SubTok=Tok->GetSubTok(SubTokN);
01326       if ((SubTok->GetSym()==xsyTag)&&(SubTok->GetStr()==TagNm)){
01327         XmlTokV.Add(SubTok);}
01328     }
01329   }
01330 }
01331 
01332 bool TXmlDoc::GetTagTokBoolArgVal(
01333  const TStr& TagPath, const TStr& ArgNm, const bool& DfVal) const {
01334   PXmlTok TagTok;
01335   if (IsTagTok(TagPath, TagTok)){
01336     return TagTok->GetBoolArgVal(ArgNm, DfVal);}
01337   else {return DfVal;}
01338 }
01339 
01340 int TXmlDoc::GetTagTokIntArgVal(
01341  const TStr& TagPath, const TStr& ArgNm, const int& DfVal) const {
01342   PXmlTok TagTok;
01343   if (IsTagTok(TagPath, TagTok)){
01344     return TagTok->GetIntArgVal(ArgNm, DfVal);}
01345   else {return DfVal;}
01346 }
01347 
01348 double TXmlDoc::GetTagTokFltArgVal(
01349  const TStr& TagPath, const TStr& ArgNm, const double& DfVal) const {
01350   PXmlTok TagTok;
01351   if (IsTagTok(TagPath, TagTok)){
01352     return TagTok->GetFltArgVal(ArgNm, DfVal);}
01353   else {return DfVal;}
01354 }
01355 
01356 TStr TXmlDoc::GetTagTokStrArgVal(
01357  const TStr& TagPath, const TStr& ArgNm, const TStr& DfVal) const {
01358   PXmlTok TagTok;
01359   if (IsTagTok(TagPath, TagTok)){
01360     return TagTok->GetStrArgVal(ArgNm, DfVal);}
01361   else {return DfVal;}
01362 }
01363 
01364 TStr TXmlDoc::GetXmlStr(const TStr& Str){
01365   TChA ChA=Str;
01366   TChA XmlChA;
01367   for (int ChN=0; ChN<ChA.Len(); ChN++){
01368     uchar Ch=ChA[ChN];
01369     if ((' '<=Ch)&&(Ch<='~')){
01370       if (Ch=='&'){XmlChA+="&amp;";}
01371       else if (Ch=='>'){XmlChA+="&lt;";}
01372       else if (Ch=='<'){XmlChA+="&gt;";}
01373       else if (Ch=='\''){XmlChA+="&apos;";}
01374       else if (Ch=='\"'){XmlChA+="&quot;";}
01375       else {XmlChA+=Ch;}
01376     } else {
01377       XmlChA+="&#"; XmlChA+=TUInt::GetStr(Ch); XmlChA+=";";
01378     }
01379   }
01380   return XmlChA;
01381 }
01382 
01383 bool TXmlDoc::SkipTopTag(const PSIn& SIn){
01384   bool Ok=true;
01385   TXmlLx Lx(SIn, xspIntact);
01386   try {
01387     Lx.GetSym();
01388     // [22] prolog ::=  XMLDecl? Misc* (doctypedecl Misc*)?
01389     if (Lx.Sym==xsyXmlDecl){Lx.GetSym();}
01390     LoadTxtMiscStar(Lx);
01391     if (Lx.Sym==xsyDocTypeDecl){Lx.GetSym();}
01392     LoadTxtMiscStar(Lx);
01393     Ok=true;
01394   }
01395   catch (PExcept Except){
01396     Ok=false;
01397   }
01398   return Ok;
01399 }
01400 
01401 PXmlDoc TXmlDoc::LoadTxt(TXmlLx& Lx){
01402   PXmlDoc Doc=TXmlDoc::New();
01403   // [1]  document ::=  prolog element Misc*
01404   try {
01405     Lx.GetSym();
01406     // [22] prolog ::=  XMLDecl? Misc* (doctypedecl Misc*)?
01407     if (Lx.Sym==xsyXmlDecl){Lx.GetSym();}
01408     LoadTxtMiscStar(Lx);
01409     if (Lx.Sym==xsyDocTypeDecl){Lx.GetSym();}
01410     LoadTxtMiscStar(Lx);
01411     Doc->Tok=LoadTxtElement(Lx);
01412     LoadTxtMiscStar(Lx);
01413     Doc->Ok=true; Doc->MsgStr="Ok";
01414   }
01415   catch (PExcept& Except){
01416     Doc->Ok=false; Doc->MsgStr=Except->GetMsgStr();
01417   }
01418   return Doc;
01419 }
01420 
01421 PXmlDoc TXmlDoc::LoadTxt(const PSIn& SIn, const TXmlSpacing& Spacing){
01422   TXmlLx Lx(SIn, Spacing); return LoadTxt(Lx);
01423 }
01424 
01425 PXmlDoc TXmlDoc::LoadTxt(const TStr& FNm, const TXmlSpacing& Spacing){
01426   PSIn SIn=TFIn::New(FNm); return LoadTxt(SIn, Spacing);
01427 }
01428 
01429 void TXmlDoc::LoadTxt(
01430  const TStr& FNm, TXmlDocV& XmlDocV, const TXmlSpacing& Spacing){
01431   XmlDocV.Clr();
01432   PSIn SIn=TFIn::New(FNm);
01433   TXmlLx Lx(SIn, Spacing);
01434   PXmlDoc XmlDoc;
01435   forever {
01436     Lx.SkipWs();
01437     XmlDoc=LoadTxt(Lx);
01438     if (XmlDoc->IsOk()){XmlDocV.Add(XmlDoc);}
01439     else {break;}
01440   }
01441 }
01442 
01443 PXmlDoc TXmlDoc::LoadStr(const TStr& Str){
01444   PSIn SIn=TStrIn::New(Str);
01445   return LoadTxt(SIn);
01446 }
01447 
01448 void TXmlDoc::SaveStr(TStr& Str){
01449   PSOut SOut=TMOut::New(); TMOut& MOut=*(TMOut*)SOut();
01450   SaveTxt(SOut);
01451   Str=MOut.GetAsStr();
01452 }
01453 
01455 // Fast and dirty XML parser
01456 // very basic it does only <item>string</item>, no comments, no arguments
01457 TXmlLxSym TXmlParser::GetSym() {
01458   if (NextSym != xsyUndef) {
01459     Sym = NextSym;  NextSym=xsyUndef;
01460     SymStr=NextSymStr;  NextSymStr.Clr();
01461     return Sym;
01462   }
01463   SymStr.Clr();
01464   char Ch;
01465   while (TCh::IsWs(Ch=GetCh())) { }
01466   if (Ch == TCh::EofCh) { Sym = xsyEof; return xsyEof; }
01467   if (Ch == '<') { // load tag
01468     Ch = GetCh();
01469     if (Ch == '/') { Sym = xsyETag; }
01470     else { Sym = xsySTag;  SymStr.Push(Ch); }
01471     while((Ch=GetCh())!='>' && Ch!=TCh::EofCh) { SymStr.Push(Ch); }
01472     const int StrLen = SymStr.Len();
01473     if (StrLen > 1 && SymStr[StrLen-1] == '/') {
01474       Sym = xsyETag; SymStr[StrLen-1] = 0;
01475       for (char *c = SymStr.CStr()+StrLen-2; TCh::IsWs(*c); c--) { *c=0; }
01476     }
01477   } else { // load string
01478     _SymStr.Clr();  _SymStr.Push(Ch);
01479     while (! RSIn.Eof() && RSIn.PeekCh() != '<') { _SymStr.Push(GetCh()); }
01480     GetPlainStrFromXmlStr(_SymStr, SymStr);
01481     Sym = xsyStr;
01482   }
01483   if (Ch == TCh::EofCh) { SymStr.Clr(); Sym = xsyEof; return xsyEof; }
01484   return Sym;
01485 }
01486 
01487 TXmlLxSym TXmlParser::GetSym(TChA& _SymStr) {
01488   GetSym();
01489   _SymStr = SymStr;
01490   return Sym;
01491 }
01492 
01493 TXmlLxSym TXmlParser::PeekSym() {
01494   if (NextSym == xsyUndef) {
01495     const TXmlLxSym TmpSim=Sym;
01496     const TChA TmpSymStr=SymStr;
01497     NextSym=GetSym(NextSymStr);
01498     Sym=TmpSim;
01499     SymStr=TmpSymStr;
01500   }
01501   return NextSym;
01502 }
01503 
01504 TXmlLxSym TXmlParser::PeekSym(TChA& _SymStr) {
01505   PeekSym();
01506   _SymStr = NextSymStr;
01507   return NextSym;
01508 }
01509 
01510 void TXmlParser::SkipTillTag(const TChA& _SymStr) {
01511   while(PeekSym() != xsyEof) {
01512     if (NextSymStr == _SymStr) { return; }
01513     GetSym();
01514   }
01515 }
01516 
01517 // get <tag>value</tag>
01518 void TXmlParser::GetTagVal(const TChA& TagStr, TChA& TagVal) {
01519   EAssertR(GetTag(TagStr) == xsySTag, TStr::Fmt("Expected '<%s>'. Found '%s'", TagStr.CStr(), SymStr.CStr()).CStr());
01520   EAssertR(GetSym(TagVal) == xsyStr, "Expected string tag.");
01521   EAssertR(GetTag(TagStr) == xsyETag, TStr::Fmt("Expected '</%s>'. Found '%s'", TagStr.CStr(), SymStr.CStr()).CStr());
01522 }
01523 
01524 TXmlLxSym TXmlParser::GetTag(const TChA& TagStr) {
01525   GetSym();
01526   EAssertR(TagStr==SymStr, TStr::Fmt("Expected xml symbol '%s'. Found '%s'",
01527     TagStr.CStr(), SymStr.CStr()).CStr());
01528   return Sym;
01529 }
01530 
01531 void TXmlParser::GetPlainStrFromXmlStr(const TChA& XmlStr, TChA& PlainChA) {
01532   static TChA EntityNm;
01533   PlainChA.Clr();
01534   const char *Ch = XmlStr.CStr();
01535   while (*Ch){
01536     if (*Ch!='&'){ PlainChA+=*Ch; Ch++; }
01537     else {
01538       if (*++Ch=='#'){
01539         TChA RefChA; int RefCd=0;
01540         if (*++Ch=='x'){
01541           forever {  Ch++;
01542             if (TCh::IsHex(*Ch)){ RefChA+=*Ch;  RefCd=RefCd*16+TCh::GetHex(*Ch); }
01543             else { break; } }
01544         } else { // decimal character code
01545           forever {
01546             if (TCh::IsNum(*Ch)){ RefChA+=*Ch; RefCd=RefCd*10+TCh::GetNum(*Ch); }
01547             else { break; } Ch++; }
01548         }
01549         if ((!RefChA.Empty())&&(*Ch==';')){
01550           Ch++;  const uchar RefCh=uchar(RefCd);  PlainChA+=RefCh; }
01551       } else {
01552         EntityNm.Clr();
01553         while ((*Ch)&&(*Ch!=';')){EntityNm+=*Ch; Ch++;}
01554         if ((!EntityNm.Empty())&&(*Ch==';')){  Ch++;
01555           if (EntityNm=="quot"){PlainChA+='"';}
01556           else if (EntityNm=="amp"){PlainChA+='&';}
01557           else if (EntityNm=="apos"){PlainChA+='\'';}
01558           else if (EntityNm=="lt"){PlainChA+='<';}
01559           else if (EntityNm=="gt"){PlainChA+='>';}
01560         }
01561       }
01562     }
01563   }
01564 }