SNAP Library 2.1, User Reference  2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
THtmlLxChDef Class Reference

#include <html.h>

List of all members.

Public Member Functions

 THtmlLxChDef ()
 THtmlLxChDef (TSIn &SIn)
void Save (TSOut &SOut)
THtmlLxChDefoperator= (const THtmlLxChDef &)
int GetChTy (const char &Ch) const
bool IsEoln (const char &Ch) const
bool IsWs (const char &Ch) const
bool IsSpace (const char &Ch) const
bool IsAlpha (const char &Ch) const
bool IsNum (const char &Ch) const
bool IsAlNum (const char &Ch) const
bool IsSym (const char &Ch) const
bool IsUrl (const char &Ch) const
bool IsUc (const char &Ch) const
bool IsLc (const char &Ch) const
char GetUc (const char &Ch) const
char GetLc (const char &Ch) const
void GetUcChA (TChA &ChA) const
void GetLcChA (TChA &ChA) const
TStr GetUcStr (const TStr &Str) const
TStr GetLcStr (const TStr &Str) const
TStr GetEscStr (const TStr &Str) const

Static Public Member Functions

static PHtmlLxChDef Load (TSIn &SIn)
static PHtmlLxChDef GetChDef ()
static THtmlLxChDefGetChDefRef ()
static TStr GetCSZFromYuascii (const TChA &ChA)
static TStr GetCSZFromWin1250 (const TChA &ChA)
static TStr GetWin1250FromYuascii (const TChA &ChA)
static TStr GetIsoCeFromYuascii (const TChA &ChA)

Static Public Attributes

static PHtmlLxChDef ChDef = PHtmlLxChDef(new THtmlLxChDef())

Private Member Functions

void SetUcCh (const char &UcCh, const char &LcCh)
void SetUcCh (const TStr &Str)
void SetChTy (const THtmlLxChTy &ChTy, const TStr &Str)
void SetEscStr (const TStr &SrcStr, const TStr &DstStr)

Private Attributes

TCRef CRef
TIntV ChTyV
TChV UcChV
TChV LcChV
TStrStrH EscStrH

Friends

class TPt< THtmlLxChDef >

Detailed Description

Definition at line 14 of file html.h.


Constructor & Destructor Documentation

Definition at line 48 of file html.cpp.

                          :
  ChTyV(TCh::Vals), UcChV(TCh::Vals), LcChV(TCh::Vals), EscStrH(100){

  // Character-Types
  ChTyV.PutAll(TInt(hlctSpace));
  SetChTy(hlctAlpha, "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
  SetChTy(hlctAlpha, "abcdefghijklmnopqrstuvwxyz");
  SetChTy(hlctAlpha, "@_");
  SetChTy(hlctNum, "0123456789");
  SetChTy(hlctSym, "`~!#$%^&*()-=+[{]}\\|;:'\",<.>/?");
  SetChTy(hlctLTag, "<"); SetChTy(hlctRTag, ">");
  SetChTy(hlctEof, TStr(TCh::EofCh));
  for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){
    if ((Ch<0)||(127<Ch)){SetChTy(hlctAlpha, TStr(TCh(char(Ch))));}}
  //SetChTy(hlctSpace, TStr(TCh(char(160))));

  // Upper-Case
  {for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){
    SetUcCh(char(Ch), char(Ch));}}
  SetUcCh("Aa"); SetUcCh("\xc0\xe0"); SetUcCh("\xc1\xe1"); SetUcCh("\xc2\xe2");
  SetUcCh("\xc3\xe3"); SetUcCh("\xc4\xe4"); SetUcCh("\xc5\xe5"); SetUcCh("\xc6\xe6");
  SetUcCh("Bb"); SetUcCh("Cc"); SetUcCh("\xc7\xe7"); SetUcCh("Dd");
  SetUcCh("\xd0\xf0"); SetUcCh("Ee"); SetUcCh("\xc8\xe8"); SetUcCh("\xc9\xe9");
  SetUcCh("\xca\xea"); SetUcCh("\xcb\xeb"); SetUcCh("Ff"); SetUcCh("Gg");
  SetUcCh("Hh"); SetUcCh("Ii"); SetUcCh("\xcc\xec"); SetUcCh("\xcd\xed");
  SetUcCh("\xce\xee"); SetUcCh("\xcf\xef"); SetUcCh("Jj"); SetUcCh("Kk");
  SetUcCh("Ll"); SetUcCh("Mm"); SetUcCh("Nn"); SetUcCh("\xd1\xf1");
  SetUcCh("Oo"); SetUcCh("\xd2\xf2"); SetUcCh("\xd3\xf3"); SetUcCh("\xd4\xf4");
  SetUcCh("\xd5\xf5"); SetUcCh("\xd6\xf6"); SetUcCh("\xd8\xf8"); SetUcCh("Pp");
  SetUcCh("Qq"); SetUcCh("Rr"); SetUcCh("Ss"); SetUcCh("\x8a\x9a");
  SetUcCh("Tt"); SetUcCh("Uu"); SetUcCh("\xd9\xf9"); SetUcCh("\xda\xfa");
  SetUcCh("\xdb\xfb"); SetUcCh("\xdc\xfc"); SetUcCh("Vv"); SetUcCh("Ww");
  SetUcCh("Xx"); SetUcCh("Yy\xff"); SetUcCh("\xdd\xfd"); SetUcCh("Zz");
  SetUcCh("\x8e\x9e");
  // ISO-CE
  //SetUcCh(uchar(169), uchar(185)); /*Sh - \xa9\xb9*/
  //SetUcCh(uchar(174), uchar(190)); /*Zh - \xae\xbe*/
  //SetUcCh(uchar(200), uchar(232)); /*Ch - \xc8\xe8*/
  //SetUcCh(uchar(198), uchar(230)); /*Cs - \xc6\xe6*/
  //SetUcCh(uchar(208), uchar(240)); /*Dz - \xd0\xf0*/

  // Annoying Unicode-characters
  //SetChTy(hlctSpace, "\xc2\xef");

  // Escape-Sequences
  SetEscStr("&quot", "\""); SetEscStr("&amp", "&");
  SetEscStr("&lt", "<"); SetEscStr("&gt", ">");
  SetEscStr("&nbsp", " ");

  SetEscStr("&auml", "\xe4"); SetEscStr("&Auml", "\xc4");
  SetEscStr("&ouml", "\xf6"); SetEscStr("&Ouml", "\xd6");
  SetEscStr("&uuml", "\xfc"); SetEscStr("&Uuml", "\xdc");
  SetEscStr("&aring", "\xe5"); SetEscStr("&Aring", "\xc5");
  SetEscStr("&oslash", "\xf8"); SetEscStr("&Oslash", "\xd8");
  SetEscStr("&Aelig", "\xc6"); SetEscStr("&aelig", "\xe6");

  SetEscStr("&eacute", "e"); SetEscStr("&Eacute", "E");
  SetEscStr("&egrave", "e"); SetEscStr("&Egrave", "E");
  SetEscStr("&agrave", "a"); SetEscStr("&Agrave", "A");
}
THtmlLxChDef::THtmlLxChDef ( TSIn SIn) [inline]

Definition at line 26 of file html.h.

: ChTyV(SIn), UcChV(SIn), LcChV(SIn), EscStrH(SIn){}

Member Function Documentation

static PHtmlLxChDef THtmlLxChDef::GetChDef ( ) [inline, static]

Definition at line 66 of file html.h.

{IAssert(!ChDef.Empty()); return ChDef;}
static THtmlLxChDef& THtmlLxChDef::GetChDefRef ( ) [inline, static]

Definition at line 67 of file html.h.

{IAssert(!ChDef.Empty()); return *ChDef;}
int THtmlLxChDef::GetChTy ( const char &  Ch) const [inline]

Definition at line 34 of file html.h.

{return ChTyV[Ch-TCh::Mn];}
TStr THtmlLxChDef::GetCSZFromWin1250 ( const TChA ChA) [static]

Definition at line 132 of file html.cpp.

                                                   {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    const uchar Ch=ChA[ChN];
    switch (Ch){
      case 232: DstChA+='c'; break;
      case 200: DstChA+='C'; break;
      case 154: DstChA+='s'; break;
      case 138: DstChA+='S'; break;
      case 158: DstChA+='z'; break;
      case 142: DstChA+='Z'; break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}
TStr THtmlLxChDef::GetCSZFromYuascii ( const TChA ChA) [static]

Definition at line 111 of file html.cpp.

                                                   {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    char Ch=ChA[ChN];
    switch (Ch){
      case '~': DstChA+='c'; break;
      case '^': DstChA+='C'; break;
      case '}': DstChA+='c'; break;
      case ']': DstChA+='C'; break;
      case '|': DstChA+='d'; break;
      case '\\': DstChA+='D'; break;
      case '{': DstChA+='s'; break;
      case '[': DstChA+='S'; break;
      case '`': DstChA+='z'; break;
      case '@': DstChA+='Z'; break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}
TStr THtmlLxChDef::GetEscStr ( const TStr Str) const

Definition at line 33 of file html.cpp.

                                                  {
  int EscStrId;
  if ((EscStrId=EscStrH.GetKeyId(Str))!=-1){
    return EscStrH[EscStrId];
  } else
  if ((Str.Len()>=2)&&(Str[0]=='&')&&(Str[1]=='#')){
    int ChCd=0;
    for (int ChN=2; ChN<Str.Len(); ChN++){
      if (ChCd<=0xFFFF){ChCd=ChCd*10+Str[ChN]-'0';}}
    return TStr((char)ChCd);
  } else {
    return TStr(' ');
  }
}
TStr THtmlLxChDef::GetIsoCeFromYuascii ( const TChA ChA) [static]

Definition at line 170 of file html.cpp.

                                                     {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    char Ch=ChA[ChN];
    switch (Ch){
      case '~': DstChA+=uchar(232); break;
      case '^': DstChA+=uchar(200); break;
      case '}': DstChA+=uchar(230); break;
      case ']': DstChA+=uchar(198); break;
      case '|': DstChA+=uchar(240); break;
      case '\\': DstChA+=uchar(208); break;
      case '{': DstChA+=uchar(185); break;
      case '[': DstChA+=uchar(169); break;
      case '`': DstChA+=uchar(190); break;
      case '@': DstChA+=uchar(174); break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}
char THtmlLxChDef::GetLc ( const char &  Ch) const [inline]

Definition at line 53 of file html.h.

{return LcChV[Ch-TCh::Mn];}
void THtmlLxChDef::GetLcChA ( TChA ChA) const [inline]

Definition at line 56 of file html.h.

                                 {
    for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetLc(ChA[ChN]));}}
TStr THtmlLxChDef::GetLcStr ( const TStr Str) const [inline]

Definition at line 60 of file html.h.

                                       {
    TChA ChA(Str); GetLcChA(ChA); return ChA;}
char THtmlLxChDef::GetUc ( const char &  Ch) const [inline]

Definition at line 52 of file html.h.

{return UcChV[Ch-TCh::Mn];}
void THtmlLxChDef::GetUcChA ( TChA ChA) const [inline]

Definition at line 54 of file html.h.

                                 {
    for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetUc(ChA[ChN]));}}
TStr THtmlLxChDef::GetUcStr ( const TStr Str) const [inline]

Definition at line 58 of file html.h.

                                       {
    TChA ChA(Str); GetUcChA(ChA); return ChA;}
TStr THtmlLxChDef::GetWin1250FromYuascii ( const TChA ChA) [static]

Definition at line 149 of file html.cpp.

                                                       {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    char Ch=ChA[ChN];
    switch (Ch){
      case '~': DstChA+=uchar(232); break;
      case '^': DstChA+=uchar(200); break;
      case '}': DstChA+='c'; break;
      case ']': DstChA+='C'; break;
      case '|': DstChA+='d'; break;
      case '\\': DstChA+='D'; break;
      case '{': DstChA+=uchar(154); break;
      case '[': DstChA+=uchar(138); break;
      case '`': DstChA+=uchar(158); break;
      case '@': DstChA+=uchar(142); break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}
bool THtmlLxChDef::IsAlNum ( const char &  Ch) const [inline]

Definition at line 41 of file html.h.

                                     {
    return (int(ChTyV[Ch-TCh::Mn])==hlctAlpha)||(int(ChTyV[Ch-TCh::Mn])==hlctNum);}
bool THtmlLxChDef::IsAlpha ( const char &  Ch) const [inline]

Definition at line 39 of file html.h.

{return int(ChTyV[Ch-TCh::Mn])==hlctAlpha;}
bool THtmlLxChDef::IsEoln ( const char &  Ch) const [inline]

Definition at line 35 of file html.h.

{return (Ch==TCh::CrCh)||(Ch==TCh::LfCh);}
bool THtmlLxChDef::IsLc ( const char &  Ch) const [inline]

Definition at line 51 of file html.h.

{return Ch==LcChV[Ch-TCh::Mn];}
bool THtmlLxChDef::IsNum ( const char &  Ch) const [inline]

Definition at line 40 of file html.h.

{return int(ChTyV[Ch-TCh::Mn])==hlctNum;}
bool THtmlLxChDef::IsSpace ( const char &  Ch) const [inline]

Definition at line 38 of file html.h.

{return int(ChTyV[Ch-TCh::Mn])==hlctSpace;}
bool THtmlLxChDef::IsSym ( const char &  Ch) const [inline]

Definition at line 43 of file html.h.

{return int(ChTyV[Ch-TCh::Mn])==hlctSym;}
bool THtmlLxChDef::IsUc ( const char &  Ch) const [inline]

Definition at line 50 of file html.h.

{return Ch==UcChV[Ch-TCh::Mn];}
bool THtmlLxChDef::IsUrl ( const char &  Ch) const [inline]

Definition at line 44 of file html.h.

                                   {
    int ChTy=ChTyV[Ch-TCh::Mn];
    return (ChTy==hlctAlpha)||(ChTy==hlctNum)||
     (Ch=='.')||(Ch=='-')||(Ch==':')||(Ch=='/')||(Ch=='~');}
bool THtmlLxChDef::IsWs ( const char &  Ch) const [inline]

Definition at line 36 of file html.h.

                                  {
    return (Ch==' ')||(Ch==TCh::TabCh)||(Ch==TCh::CrCh)||(Ch==TCh::LfCh);}
static PHtmlLxChDef THtmlLxChDef::Load ( TSIn SIn) [inline, static]

Definition at line 27 of file html.h.

{return new THtmlLxChDef(SIn);}
THtmlLxChDef& THtmlLxChDef::operator= ( const THtmlLxChDef ) [inline]

Definition at line 31 of file html.h.

{Fail; return *this;}
void THtmlLxChDef::Save ( TSOut SOut) [inline]

Definition at line 28 of file html.h.

                        {
    ChTyV.Save(SOut); UcChV.Save(SOut); LcChV.Save(SOut); EscStrH.Save(SOut);}
void THtmlLxChDef::SetChTy ( const THtmlLxChTy ChTy,
const TStr Str 
) [private]

Definition at line 24 of file html.cpp.

                                                                  {
  for (int ChN=0; ChN<Str.Len(); ChN++){
    ChTyV[Str[ChN]-TCh::Mn]=TInt(ChTy);}
}
void THtmlLxChDef::SetEscStr ( const TStr SrcStr,
const TStr DstStr 
) [private]

Definition at line 29 of file html.cpp.

                                                                  {
  EscStrH.AddDat(SrcStr, DstStr);
}
void THtmlLxChDef::SetUcCh ( const char &  UcCh,
const char &  LcCh 
) [private]

Definition at line 3 of file html.cpp.

                                                            {
  // update upper-case (more lower cases may have one upper case)
  IAssert(
   (UcChV[LcCh-TCh::Mn]==TCh(0))||
   (UcChV[LcCh-TCh::Mn]==TCh(LcCh)));
  UcChV[LcCh-TCh::Mn]=TCh(UcCh);
  // update lower-case (one upper case may have only one lower case)
  if ((LcChV[UcCh-TCh::Mn]==TCh(0))||(LcChV[UcCh-TCh::Mn]==TCh(UcCh))){
    LcChV[UcCh-TCh::Mn]=TCh(LcCh);
  }
}
void THtmlLxChDef::SetUcCh ( const TStr Str) [private]

Definition at line 15 of file html.cpp.

                                         {
  // set type of characters as letters
  SetChTy(hlctAlpha, Str);
  // first char in string is upper-case, rest are lower-case
  for (int ChN=1; ChN<Str.Len(); ChN++){
    SetUcCh(Str[0], Str[ChN]);
  }
}

Friends And Related Function Documentation

friend class TPt< THtmlLxChDef > [friend]

Definition at line 14 of file html.h.


Member Data Documentation

Definition at line 65 of file html.h.

Definition at line 16 of file html.h.

Definition at line 14 of file html.h.

Definition at line 19 of file html.h.

Definition at line 18 of file html.h.

Definition at line 17 of file html.h.


The documentation for this class was generated from the following files: