SNAP Library 2.1, Developer Reference  2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
THtmlLxChDef Class Reference

#include <html.h>

Collaboration diagram for THtmlLxChDef:

List of all members.

Public Member Functions

 THtmlLxChDef ()
 THtmlLxChDef (TSIn &SIn)
void Save (TSOut &SOut)
THtmlLxChDefoperator= (const THtmlLxChDef &)
int GetChTy (const char &Ch) const
bool IsEoln (const char &Ch) const
bool IsWs (const char &Ch) const
bool IsSpace (const char &Ch) const
bool IsAlpha (const char &Ch) const
bool IsNum (const char &Ch) const
bool IsAlNum (const char &Ch) const
bool IsSym (const char &Ch) const
bool IsUrl (const char &Ch) const
bool IsUc (const char &Ch) const
bool IsLc (const char &Ch) const
char GetUc (const char &Ch) const
char GetLc (const char &Ch) const
void GetUcChA (TChA &ChA) const
void GetLcChA (TChA &ChA) const
TStr GetUcStr (const TStr &Str) const
TStr GetLcStr (const TStr &Str) const
TStr GetEscStr (const TStr &Str) const

Static Public Member Functions

static PHtmlLxChDef Load (TSIn &SIn)
static PHtmlLxChDef GetChDef ()
static THtmlLxChDefGetChDefRef ()
static TStr GetCSZFromYuascii (const TChA &ChA)
static TStr GetCSZFromWin1250 (const TChA &ChA)
static TStr GetWin1250FromYuascii (const TChA &ChA)
static TStr GetIsoCeFromYuascii (const TChA &ChA)

Static Public Attributes

static PHtmlLxChDef ChDef = PHtmlLxChDef(new THtmlLxChDef())

Private Member Functions

void SetUcCh (const char &UcCh, const char &LcCh)
void SetUcCh (const TStr &Str)
void SetChTy (const THtmlLxChTy &ChTy, const TStr &Str)
void SetEscStr (const TStr &SrcStr, const TStr &DstStr)

Private Attributes

TCRef CRef
TIntV ChTyV
TChV UcChV
TChV LcChV
TStrStrH EscStrH

Friends

class TPt< THtmlLxChDef >

Detailed Description

Definition at line 14 of file html.h.


Constructor & Destructor Documentation

Definition at line 48 of file html.cpp.

References ChTyV, TCh::EofCh, hlctAlpha, hlctEof, hlctLTag, hlctNum, hlctRTag, hlctSpace, hlctSym, TCh::Mn, TCh::Mx, TVec< TVal, TSizeTy >::PutAll(), SetChTy(), SetEscStr(), and SetUcCh().

                          :
  ChTyV(TCh::Vals), UcChV(TCh::Vals), LcChV(TCh::Vals), EscStrH(100){

  // Character-Types
  ChTyV.PutAll(TInt(hlctSpace));
  SetChTy(hlctAlpha, "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
  SetChTy(hlctAlpha, "abcdefghijklmnopqrstuvwxyz");
  SetChTy(hlctAlpha, "@_");
  SetChTy(hlctNum, "0123456789");
  SetChTy(hlctSym, "`~!#$%^&*()-=+[{]}\\|;:'\",<.>/?");
  SetChTy(hlctLTag, "<"); SetChTy(hlctRTag, ">");
  SetChTy(hlctEof, TStr(TCh::EofCh));
  for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){
    if ((Ch<0)||(127<Ch)){SetChTy(hlctAlpha, TStr(TCh(char(Ch))));}}
  //SetChTy(hlctSpace, TStr(TCh(char(160))));

  // Upper-Case
  {for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){
    SetUcCh(char(Ch), char(Ch));}}
  SetUcCh("Aa"); SetUcCh("\xc0\xe0"); SetUcCh("\xc1\xe1"); SetUcCh("\xc2\xe2");
  SetUcCh("\xc3\xe3"); SetUcCh("\xc4\xe4"); SetUcCh("\xc5\xe5"); SetUcCh("\xc6\xe6");
  SetUcCh("Bb"); SetUcCh("Cc"); SetUcCh("\xc7\xe7"); SetUcCh("Dd");
  SetUcCh("\xd0\xf0"); SetUcCh("Ee"); SetUcCh("\xc8\xe8"); SetUcCh("\xc9\xe9");
  SetUcCh("\xca\xea"); SetUcCh("\xcb\xeb"); SetUcCh("Ff"); SetUcCh("Gg");
  SetUcCh("Hh"); SetUcCh("Ii"); SetUcCh("\xcc\xec"); SetUcCh("\xcd\xed");
  SetUcCh("\xce\xee"); SetUcCh("\xcf\xef"); SetUcCh("Jj"); SetUcCh("Kk");
  SetUcCh("Ll"); SetUcCh("Mm"); SetUcCh("Nn"); SetUcCh("\xd1\xf1");
  SetUcCh("Oo"); SetUcCh("\xd2\xf2"); SetUcCh("\xd3\xf3"); SetUcCh("\xd4\xf4");
  SetUcCh("\xd5\xf5"); SetUcCh("\xd6\xf6"); SetUcCh("\xd8\xf8"); SetUcCh("Pp");
  SetUcCh("Qq"); SetUcCh("Rr"); SetUcCh("Ss"); SetUcCh("\x8a\x9a");
  SetUcCh("Tt"); SetUcCh("Uu"); SetUcCh("\xd9\xf9"); SetUcCh("\xda\xfa");
  SetUcCh("\xdb\xfb"); SetUcCh("\xdc\xfc"); SetUcCh("Vv"); SetUcCh("Ww");
  SetUcCh("Xx"); SetUcCh("Yy\xff"); SetUcCh("\xdd\xfd"); SetUcCh("Zz");
  SetUcCh("\x8e\x9e");
  // ISO-CE
  //SetUcCh(uchar(169), uchar(185)); /*Sh - \xa9\xb9*/
  //SetUcCh(uchar(174), uchar(190)); /*Zh - \xae\xbe*/
  //SetUcCh(uchar(200), uchar(232)); /*Ch - \xc8\xe8*/
  //SetUcCh(uchar(198), uchar(230)); /*Cs - \xc6\xe6*/
  //SetUcCh(uchar(208), uchar(240)); /*Dz - \xd0\xf0*/

  // Annoying Unicode-characters
  //SetChTy(hlctSpace, "\xc2\xef");

  // Escape-Sequences
  SetEscStr("&quot", "\""); SetEscStr("&amp", "&");
  SetEscStr("&lt", "<"); SetEscStr("&gt", ">");
  SetEscStr("&nbsp", " ");

  SetEscStr("&auml", "\xe4"); SetEscStr("&Auml", "\xc4");
  SetEscStr("&ouml", "\xf6"); SetEscStr("&Ouml", "\xd6");
  SetEscStr("&uuml", "\xfc"); SetEscStr("&Uuml", "\xdc");
  SetEscStr("&aring", "\xe5"); SetEscStr("&Aring", "\xc5");
  SetEscStr("&oslash", "\xf8"); SetEscStr("&Oslash", "\xd8");
  SetEscStr("&Aelig", "\xc6"); SetEscStr("&aelig", "\xe6");

  SetEscStr("&eacute", "e"); SetEscStr("&Eacute", "E");
  SetEscStr("&egrave", "e"); SetEscStr("&Egrave", "E");
  SetEscStr("&agrave", "a"); SetEscStr("&Agrave", "A");
}

Here is the call graph for this function:

THtmlLxChDef::THtmlLxChDef ( TSIn SIn) [inline]

Definition at line 26 of file html.h.

: ChTyV(SIn), UcChV(SIn), LcChV(SIn), EscStrH(SIn){}

Member Function Documentation

static PHtmlLxChDef THtmlLxChDef::GetChDef ( ) [inline, static]

Definition at line 66 of file html.h.

References TPt< TRec >::Empty(), and IAssert.

{IAssert(!ChDef.Empty()); return ChDef;}

Here is the call graph for this function:

static THtmlLxChDef& THtmlLxChDef::GetChDefRef ( ) [inline, static]

Definition at line 67 of file html.h.

References TPt< TRec >::Empty(), and IAssert.

{IAssert(!ChDef.Empty()); return *ChDef;}

Here is the call graph for this function:

int THtmlLxChDef::GetChTy ( const char &  Ch) const [inline]

Definition at line 34 of file html.h.

References TCh::Mn.

Referenced by THtmlLx::GetSym().

{return ChTyV[Ch-TCh::Mn];}

Here is the caller graph for this function:

TStr THtmlLxChDef::GetCSZFromWin1250 ( const TChA ChA) [static]

Definition at line 132 of file html.cpp.

References TChA::Len().

                                                   {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    const uchar Ch=ChA[ChN];
    switch (Ch){
      case 232: DstChA+='c'; break;
      case 200: DstChA+='C'; break;
      case 154: DstChA+='s'; break;
      case 138: DstChA+='S'; break;
      case 158: DstChA+='z'; break;
      case 142: DstChA+='Z'; break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}

Here is the call graph for this function:

TStr THtmlLxChDef::GetCSZFromYuascii ( const TChA ChA) [static]

Definition at line 111 of file html.cpp.

References TChA::Len().

                                                   {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    char Ch=ChA[ChN];
    switch (Ch){
      case '~': DstChA+='c'; break;
      case '^': DstChA+='C'; break;
      case '}': DstChA+='c'; break;
      case ']': DstChA+='C'; break;
      case '|': DstChA+='d'; break;
      case '\\': DstChA+='D'; break;
      case '{': DstChA+='s'; break;
      case '[': DstChA+='S'; break;
      case '`': DstChA+='z'; break;
      case '@': DstChA+='Z'; break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}

Here is the call graph for this function:

TStr THtmlLxChDef::GetEscStr ( const TStr Str) const

Definition at line 33 of file html.cpp.

References EscStrH, THash< TKey, TDat, THashFunc >::GetKeyId(), and TStr::Len().

Referenced by THtmlLx::GetEscCh().

                                                  {
  int EscStrId;
  if ((EscStrId=EscStrH.GetKeyId(Str))!=-1){
    return EscStrH[EscStrId];
  } else
  if ((Str.Len()>=2)&&(Str[0]=='&')&&(Str[1]=='#')){
    int ChCd=0;
    for (int ChN=2; ChN<Str.Len(); ChN++){
      if (ChCd<=0xFFFF){ChCd=ChCd*10+Str[ChN]-'0';}}
    return TStr((char)ChCd);
  } else {
    return TStr(' ');
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

TStr THtmlLxChDef::GetIsoCeFromYuascii ( const TChA ChA) [static]

Definition at line 170 of file html.cpp.

References TChA::Len().

                                                     {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    char Ch=ChA[ChN];
    switch (Ch){
      case '~': DstChA+=uchar(232); break;
      case '^': DstChA+=uchar(200); break;
      case '}': DstChA+=uchar(230); break;
      case ']': DstChA+=uchar(198); break;
      case '|': DstChA+=uchar(240); break;
      case '\\': DstChA+=uchar(208); break;
      case '{': DstChA+=uchar(185); break;
      case '[': DstChA+=uchar(169); break;
      case '`': DstChA+=uchar(190); break;
      case '@': DstChA+=uchar(174); break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}

Here is the call graph for this function:

char THtmlLxChDef::GetLc ( const char &  Ch) const [inline]

Definition at line 53 of file html.h.

References TCh::Mn.

{return LcChV[Ch-TCh::Mn];}
void THtmlLxChDef::GetLcChA ( TChA ChA) const [inline]

Definition at line 56 of file html.h.

References TChA::Len(), and TChA::PutCh().

                                 {
    for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetLc(ChA[ChN]));}}

Here is the call graph for this function:

TStr THtmlLxChDef::GetLcStr ( const TStr Str) const [inline]

Definition at line 60 of file html.h.

                                       {
    TChA ChA(Str); GetLcChA(ChA); return ChA;}
char THtmlLxChDef::GetUc ( const char &  Ch) const [inline]

Definition at line 52 of file html.h.

References TCh::Mn.

Referenced by THtmlLx::GetSym(), and THtmlLx::GetTag().

{return UcChV[Ch-TCh::Mn];}

Here is the caller graph for this function:

void THtmlLxChDef::GetUcChA ( TChA ChA) const [inline]

Definition at line 54 of file html.h.

References TChA::Len(), and TChA::PutCh().

                                 {
    for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetUc(ChA[ChN]));}}

Here is the call graph for this function:

TStr THtmlLxChDef::GetUcStr ( const TStr Str) const [inline]

Definition at line 58 of file html.h.

                                       {
    TChA ChA(Str); GetUcChA(ChA); return ChA;}
TStr THtmlLxChDef::GetWin1250FromYuascii ( const TChA ChA) [static]

Definition at line 149 of file html.cpp.

References TChA::Len().

                                                       {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    char Ch=ChA[ChN];
    switch (Ch){
      case '~': DstChA+=uchar(232); break;
      case '^': DstChA+=uchar(200); break;
      case '}': DstChA+='c'; break;
      case ']': DstChA+='C'; break;
      case '|': DstChA+='d'; break;
      case '\\': DstChA+='D'; break;
      case '{': DstChA+=uchar(154); break;
      case '[': DstChA+=uchar(138); break;
      case '`': DstChA+=uchar(158); break;
      case '@': DstChA+=uchar(142); break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}

Here is the call graph for this function:

bool THtmlLxChDef::IsAlNum ( const char &  Ch) const [inline]

Definition at line 41 of file html.h.

References hlctAlpha, hlctNum, and TCh::Mn.

Referenced by THtmlLx::GetSym(), and THtmlLx::GetTag().

                                     {
    return (int(ChTyV[Ch-TCh::Mn])==hlctAlpha)||(int(ChTyV[Ch-TCh::Mn])==hlctNum);}

Here is the caller graph for this function:

bool THtmlLxChDef::IsAlpha ( const char &  Ch) const [inline]

Definition at line 39 of file html.h.

References hlctAlpha, and TCh::Mn.

Referenced by THtmlLx::GetSym(), and THtmlLx::GetTag().

{return int(ChTyV[Ch-TCh::Mn])==hlctAlpha;}

Here is the caller graph for this function:

bool THtmlLxChDef::IsEoln ( const char &  Ch) const [inline]

Definition at line 35 of file html.h.

References TCh::CrCh, and TCh::LfCh.

Referenced by THtmlLx::GetTag().

{return (Ch==TCh::CrCh)||(Ch==TCh::LfCh);}

Here is the caller graph for this function:

bool THtmlLxChDef::IsLc ( const char &  Ch) const [inline]

Definition at line 51 of file html.h.

References TCh::Mn.

{return Ch==LcChV[Ch-TCh::Mn];}
bool THtmlLxChDef::IsNum ( const char &  Ch) const [inline]

Definition at line 40 of file html.h.

References hlctNum, and TCh::Mn.

Referenced by THtmlLx::GetSym().

{return int(ChTyV[Ch-TCh::Mn])==hlctNum;}

Here is the caller graph for this function:

bool THtmlLxChDef::IsSpace ( const char &  Ch) const [inline]

Definition at line 38 of file html.h.

References hlctSpace, and TCh::Mn.

Referenced by THtmlLx::GetSym().

{return int(ChTyV[Ch-TCh::Mn])==hlctSpace;}

Here is the caller graph for this function:

bool THtmlLxChDef::IsSym ( const char &  Ch) const [inline]

Definition at line 43 of file html.h.

References hlctSym, and TCh::Mn.

{return int(ChTyV[Ch-TCh::Mn])==hlctSym;}
bool THtmlLxChDef::IsUc ( const char &  Ch) const [inline]

Definition at line 50 of file html.h.

References TCh::Mn.

{return Ch==UcChV[Ch-TCh::Mn];}
bool THtmlLxChDef::IsUrl ( const char &  Ch) const [inline]

Definition at line 44 of file html.h.

References hlctAlpha, hlctNum, and TCh::Mn.

                                   {
    int ChTy=ChTyV[Ch-TCh::Mn];
    return (ChTy==hlctAlpha)||(ChTy==hlctNum)||
     (Ch=='.')||(Ch=='-')||(Ch==':')||(Ch=='/')||(Ch=='~');}
bool THtmlLxChDef::IsWs ( const char &  Ch) const [inline]

Definition at line 36 of file html.h.

References TCh::CrCh, TCh::LfCh, and TCh::TabCh.

Referenced by THtmlLx::GetTag().

                                  {
    return (Ch==' ')||(Ch==TCh::TabCh)||(Ch==TCh::CrCh)||(Ch==TCh::LfCh);}

Here is the caller graph for this function:

static PHtmlLxChDef THtmlLxChDef::Load ( TSIn SIn) [inline, static]

Definition at line 27 of file html.h.

{return new THtmlLxChDef(SIn);}
THtmlLxChDef& THtmlLxChDef::operator= ( const THtmlLxChDef ) [inline]

Definition at line 31 of file html.h.

References Fail.

{Fail; return *this;}
void THtmlLxChDef::Save ( TSOut SOut) [inline]

Definition at line 28 of file html.h.

                        {
    ChTyV.Save(SOut); UcChV.Save(SOut); LcChV.Save(SOut); EscStrH.Save(SOut);}
void THtmlLxChDef::SetChTy ( const THtmlLxChTy ChTy,
const TStr Str 
) [private]

Definition at line 24 of file html.cpp.

References ChTyV, TStr::Len(), and TCh::Mn.

Referenced by SetUcCh(), and THtmlLxChDef().

                                                                  {
  for (int ChN=0; ChN<Str.Len(); ChN++){
    ChTyV[Str[ChN]-TCh::Mn]=TInt(ChTy);}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void THtmlLxChDef::SetEscStr ( const TStr SrcStr,
const TStr DstStr 
) [private]

Definition at line 29 of file html.cpp.

References THash< TKey, TDat, THashFunc >::AddDat(), and EscStrH.

Referenced by THtmlLxChDef().

                                                                  {
  EscStrH.AddDat(SrcStr, DstStr);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void THtmlLxChDef::SetUcCh ( const char &  UcCh,
const char &  LcCh 
) [private]

Definition at line 3 of file html.cpp.

References IAssert, LcChV, TCh::Mn, and UcChV.

Referenced by SetUcCh(), and THtmlLxChDef().

                                                            {
  // update upper-case (more lower cases may have one upper case)
  IAssert(
   (UcChV[LcCh-TCh::Mn]==TCh(0))||
   (UcChV[LcCh-TCh::Mn]==TCh(LcCh)));
  UcChV[LcCh-TCh::Mn]=TCh(UcCh);
  // update lower-case (one upper case may have only one lower case)
  if ((LcChV[UcCh-TCh::Mn]==TCh(0))||(LcChV[UcCh-TCh::Mn]==TCh(UcCh))){
    LcChV[UcCh-TCh::Mn]=TCh(LcCh);
  }
}

Here is the caller graph for this function:

void THtmlLxChDef::SetUcCh ( const TStr Str) [private]

Definition at line 15 of file html.cpp.

References hlctAlpha, TStr::Len(), SetChTy(), and SetUcCh().

                                         {
  // set type of characters as letters
  SetChTy(hlctAlpha, Str);
  // first char in string is upper-case, rest are lower-case
  for (int ChN=1; ChN<Str.Len(); ChN++){
    SetUcCh(Str[0], Str[ChN]);
  }
}

Here is the call graph for this function:


Friends And Related Function Documentation

friend class TPt< THtmlLxChDef > [friend]

Definition at line 14 of file html.h.


Member Data Documentation

Definition at line 65 of file html.h.

Definition at line 16 of file html.h.

Referenced by SetChTy(), and THtmlLxChDef().

Definition at line 14 of file html.h.

Definition at line 19 of file html.h.

Referenced by GetEscStr(), and SetEscStr().

Definition at line 18 of file html.h.

Referenced by SetUcCh().

Definition at line 17 of file html.h.

Referenced by SetUcCh().


The documentation for this class was generated from the following files: