SNAP Library, Developer Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
html.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

class  THtmlLx

Enumerations

enum  THtmlLxChTy {
  hlctSpace, hlctAlpha, hlctNum, hlctSym,
  hlctLTag, hlctRTag, hlctEof
}
enum  THtmlLxSym {
  hsyUndef, hsyStr, hsyNum, hsySSym,
  hsyUrl, hsyBTag, hsyETag, hsyMTag,
  hsyEof
}
enum  THtmlDocType {
  hdtAll, hdtStr, hdtStrNum, hdtTag,
  hdtA, hdtHRef, hdtUL
}

Functions

 ClassHdTP (THtmlTok, PHtmlTok) ClassHdTP(THtmlDoc
void SetUcCh (const char &UcCh, const char &LcCh)
void SetUcCh (const TStr &Str)
void SetChTy (const THtmlLxChTy &ChTy, const TStr &Str)
void SetEscStr (const TStr &SrcStr, const TStr &DstStr)
 THtmlLxChDef ()
 THtmlLxChDef (TSIn &SIn)
static PHtmlLxChDef Load (TSIn &SIn)
void Save (TSOut &SOut)
THtmlLxChDefoperator= (const THtmlLxChDef &)
int GetChTy (const char &Ch) const
bool IsEoln (const char &Ch) const
bool IsWs (const char &Ch) const
bool IsSpace (const char &Ch) const
bool IsAlpha (const char &Ch) const
bool IsNum (const char &Ch) const
bool IsAlNum (const char &Ch) const
bool IsSym (const char &Ch) const
bool IsUrl (const char &Ch) const
bool IsUc (const char &Ch) const
bool IsLc (const char &Ch) const
char GetUc (const char &Ch) const
char GetLc (const char &Ch) const
void GetUcChA (TChA &ChA) const
void GetLcChA (TChA &ChA) const
TStr GetUcStr (const TStr &Str) const
TStr GetLcStr (const TStr &Str) const
TStr GetEscStr (const TStr &Str) const
static PHtmlLxChDef GetChDef ()
static THtmlLxChDefGetChDefRef ()
static TStr GetCSZFromYuascii (const TChA &ChA)
static TStr GetCSZFromWin1250 (const TChA &ChA)
static TStr GetWin1250FromYuascii (const TChA &ChA)
static TStr GetIsoCeFromYuascii (const TChA &ChA)
 THtmlTok ()
 THtmlTok (const THtmlLxSym &_Sym)
 THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str)
 THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str, const THtmlLx::TArgNmValV &_ArgNmValV)
 THtmlTok (TSIn &)
THtmlTokoperator= (const THtmlTok &)
THtmlLxSym GetSym () const
TStr GetStr () const
TStr GetFullStr () const
bool IsArg (const TStr &ArgNm) const
TStr GetArg (const TStr &ArgNm) const
TStr GetArg (const TStr &ArgNm, const TStr &DfArgVal) const
bool IsUrlTok (TStr &RelUrlStr) const
bool IsRedirUrlTok () const
void SaveTxt (const PSOut &SOut, const bool &TxtMode=true)
static bool IsBreakTag (const TStr &TagNm)
static bool IsBreakTok (const PHtmlTok &Tok)
static bool IsHTag (const TStr &TagNm, int &HTagN)
static PHtmlTok GetHTok (const bool &IsBTag, const int &HTagN)
 THtmlDoc ()
 THtmlDoc (const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true)
static PHtmlDoc New (const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true)
 THtmlDoc (TSIn &)
THtmlDocoperator= (const THtmlDoc &)
int GetToks () const
PHtmlTok GetTok (const int &TokN) const
PHtmlTok GetTok (const int &TokN, THtmlLxSym &Sym, TStr &Str) const
void AddTokV (const THtmlTokV &_TokV)
static TStr GetTxtLnDoc (const TStr &HtmlStr)
static TStr GetTxtLnDoc (const TStr &HtmlStr, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutTagsP)
static PHtmlDoc LoadTxt (const TStr &FNm, const THtmlDocType &Type=hdtAll, const bool &DoUc=true)
static void SaveHtmlToTxt (const TStr &HtmlStr, const PSOut &TxtSOut, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutToksP)
static void SaveHtmlToTxt (const TStr &HtmlStr, const TStr &TxtFNm, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutToksP)
static void SaveHtmlToXml (const TStr &HtmlStr, const PSOut &XmlSOut, const TStr &BaseUrlStr, const bool &OutTextP, const bool &OutUrlP, const bool &OutToksP, const bool &OutTagsP, const bool &OutArgsP)
static void SaveHtmlToXml (const TStr &HtmlStr, const TStr &XmlFNm, const TStr &BaseUrlStr, const bool &OutTextP, const bool &OutUrlP, const bool &OutToksP, const bool &OutTagsP, const bool &OutArgsP)
static TLxSym GetLxSym (const THtmlLxSym &HtmlLxSym, const TChA &ChA)
static bool _IsTagRedir (const TStr &TagStr, const TStr &ArgNm, THtmlLx &Lx, const TStr &BaseUrlStr, const TStr &RedirUrlStr)
static TStr GetRedirHtmlDocStr (const TStr &HtmlStr, const TStr &BaseUrlStr, const TStr &RedirUrlStr)
 THtmlHldV (const PHtmlDoc &_RefHtmlDoc, const int &HldWnLen=10)
 THtmlHldV (TSIn &)
THtmlHldVoperator= (const THtmlHldV &)
PHtmlDoc GetRefHtmlDoc ()
int GetHlds ()
PHtmlDoc GetHld (const int &HldN)
 TWebPg ()
 TWebPg (const TStrV &_UrlStrV, const TStrV &_IpNumV, const PHttpResp &_HttpResp)
static PWebPg New (const TStrV &UrlStrV, const TStrV &IpNumV, const PHttpResp &HttpResp)
static PWebPg New (const TStrV &UrlStrV, const PHttpResp &HttpResp)
static PWebPg New (const TStr &UrlStr, const PHttpResp &HttpResp)
 ~TWebPg ()
 TWebPg (TSIn &)
TWebPgoperator= (const TWebPg &)
int GetUrls () const
TStr GetUrlStr (const int &UrlN=-1) const
PUrl GetUrl (const int &UrlN=-1) const
int GetIps () const
TStr GetIpNum (const int &IpN=-1) const
PHttpResp GetHttpResp () const
TStr GetHttpHdStr () const
TStr GetHttpBodyAsStr () const
void GetOutUrlV (TUrlV &OutUrlV, TUrlV &OutRedirUrlV) const
void GetOutUrlV (TUrlV &OutUrlV) const
void GetOutDescUrlStrKdV (TStrKdV &OutDescUrlStrKdV) const
void PutFetchMSecs (const uint64 &_FetchMSecs)
uint64 GetFetchMSecs () const
void SaveAsHttpBody (const TStr &FNm) const
void SaveAsHttp (const TStr &FNm) const
bool IsTxt () const

Variables

ClassTP(THtmlLxChDef,
PHtmlLxChDef) private TChV 
UcChV
TChV LcChV
TStrStrH EscStrH
static PHtmlLxChDef ChDef
ClassTPV(THtmlTok, PHtmlTok,
THtmlTokV) private TStr 
Str
THtmlLx::TArgNmValV ArgNmValV
static const TStr ATagNm = "<A>"
static const TStr AreaTagNm = "<AREA>"
static const TStr BrTagNm = "<BR>"
static const TStr CardTagNm = "<CARD>"
static const TStr CenterTagNm = "<CENTER>"
static const TStr FrameTagNm = "<FRAME>"
static const TStr H1TagNm = "<H1>"
static const TStr H2TagNm = "<H2>"
static const TStr H3TagNm = "<H3>"
static const TStr H4TagNm = "<H4>"
static const TStr H5TagNm = "<H5>"
static const TStr H6TagNm = "<H6>"
static const TStr ImgTagNm = "<IMG>"
static const TStr LiTagNm = "<LI>"
static const TStr MetaTagNm = "<META>"
static const TStr PTagNm = "<P>"
static const TStr UlTagNm = "<UL>"
static const TStr TitleTagNm = "<TITLE>"
static const TStr TitleETagNm = "</TITLE>"
static const TStr AltArgNm = "ALT"
static const TStr HRefArgNm = "HREF"
static const TStr SrcArgNm = "SRC"
static const TStr TitleArgNm = "TITLE"
static const TStr HttpEquivArgNm = "HTTP-EQUIV"
ClassTP(THtmlHldV, PHtmlHldV)
private THtmlDocV 
HldV
ClassTPV(TWebPg, PWebPg,
TWebPgV) private TStrV 
IpNumV
PHttpResp HttpResp
uint64 FetchMSecs

Enumeration Type Documentation

Enumerator:
hdtAll 
hdtStr 
hdtStrNum 
hdtTag 
hdtA 
hdtHRef 
hdtUL 

Definition at line 249 of file html.h.

Enumerator:
hlctSpace 
hlctAlpha 
hlctNum 
hlctSym 
hlctLTag 
hlctRTag 
hlctEof 

Definition at line 8 of file html.h.

enum THtmlLxSym
Enumerator:
hsyUndef 
hsyStr 
hsyNum 
hsySSym 
hsyUrl 
hsyBTag 
hsyETag 
hsyMTag 
hsyEof 

Definition at line 76 of file html.h.


Function Documentation

bool THtmlDoc::_IsTagRedir ( const TStr TagStr,
const TStr ArgNm,
THtmlLx Lx,
const TStr BaseUrlStr,
const TStr RedirUrlStr 
) [static]

Definition at line 1106 of file html.cpp.

References THtmlLx::ChA, THtmlLx::GetArg(), hsyBTag, IAssert, THtmlLx::IsArg(), New(), THtmlLx::PutArg(), THtmlLx::Sym, UrlStr, and usHttp.

Referenced by GetRedirHtmlDocStr().

                                                 {
  IAssert(Lx.Sym==hsyBTag);
  if ((Lx.ChA==TagStr)&&(Lx.IsArg(ArgNm))){
    TStr RelUrlStr=Lx.GetArg(ArgNm);
    PUrl Url=TUrl::New(RelUrlStr, BaseUrlStr);
    if (Url->IsOk(usHttp)){
      TStr UrlStr=Url->GetUrlStr();
      PUrlEnv RedirUrlEnv=TUrlEnv::New(RedirUrlStr, "url", UrlStr);
      Lx.PutArg(ArgNm, RedirUrlEnv->GetFullUrlStr());
      return true;
    } else {
      return false;
    }
  } else {
    return false;
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void AddTokV ( const THtmlTokV &  _TokV)

Definition at line 272 of file html.h.

{TokV.AddV(_TokV);}
ClassHdTP ( THtmlTok  ,
PHtmlTok   
)
TStr GetArg ( const TStr ArgNm) const

Definition at line 205 of file html.h.

References ArgNmValV, and TVec< TVal >::SearchForw().

Referenced by AddTokToChA(), IsRedirUrlTok(), and IsUrlTok().

                                       {
    return ArgNmValV[ArgNmValV.SearchForw(TStrKd(ArgNm))].Dat;}

Here is the call graph for this function:

Here is the caller graph for this function:

TStr GetArg ( const TStr ArgNm,
const TStr DfArgVal 
) const

Definition at line 207 of file html.h.

References ArgNmValV, and TVec< TVal >::SearchForw().

                                                             {
    int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
    if (ArgN==-1){return DfArgVal;} else {return ArgNmValV[ArgN].Dat;}}

Here is the call graph for this function:

static PHtmlLxChDef GetChDef ( ) [static]

Definition at line 64 of file html.h.

References ChDef, and IAssert.

{IAssert(!ChDef.Empty()); return ChDef;}
static THtmlLxChDef& GetChDefRef ( ) [static]

Definition at line 65 of file html.h.

References ChDef, and IAssert.

{IAssert(!ChDef.Empty()); return *ChDef;}
int GetChTy ( const char &  Ch) const

Definition at line 32 of file html.h.

References TCh::Mn.

{return ChTyV[Ch-TCh::Mn];}
TStr THtmlLxChDef::GetCSZFromWin1250 ( const TChA ChA) [static]

Definition at line 132 of file html.cpp.

References TChA::Len().

                                                   {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    const uchar Ch=ChA[ChN];
    switch (Ch){
      case 232: DstChA+='c'; break;
      case 200: DstChA+='C'; break;
      case 154: DstChA+='s'; break;
      case 138: DstChA+='S'; break;
      case 158: DstChA+='z'; break;
      case 142: DstChA+='Z'; break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}

Here is the call graph for this function:

TStr THtmlLxChDef::GetCSZFromYuascii ( const TChA ChA) [static]

Definition at line 111 of file html.cpp.

References TChA::Len().

                                                   {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    char Ch=ChA[ChN];
    switch (Ch){
      case '~': DstChA+='c'; break;
      case '^': DstChA+='C'; break;
      case '}': DstChA+='c'; break;
      case ']': DstChA+='C'; break;
      case '|': DstChA+='d'; break;
      case '\\': DstChA+='D'; break;
      case '{': DstChA+='s'; break;
      case '[': DstChA+='S'; break;
      case '`': DstChA+='z'; break;
      case '@': DstChA+='Z'; break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}

Here is the call graph for this function:

TStr THtmlLxChDef::GetEscStr ( const TStr Str) const

Definition at line 33 of file html.cpp.

References EscStrH, THash< TKey, TDat, THashFunc >::GetKeyId(), and TStr::Len().

                                                  {
  int EscStrId;
  if ((EscStrId=EscStrH.GetKeyId(Str))!=-1){
    return EscStrH[EscStrId];
  } else
  if ((Str.Len()>=2)&&(Str[0]=='&')&&(Str[1]=='#')){
    int ChCd=0;
    for (int ChN=2; ChN<Str.Len(); ChN++){
      if (ChCd<=0xFFFF){ChCd=ChCd*10+Str[ChN]-'0';}}
    return TStr((char)ChCd);
  } else {
    return TStr(' ');
  }
}

Here is the call graph for this function:

uint64 GetFetchMSecs ( ) const

Definition at line 375 of file html.h.

References FetchMSecs.

{return FetchMSecs;}

Definition at line 628 of file html.cpp.

References ArgNmValV, GetStr(), TStr::GetSubStr(), hsyBTag, hsyETag, TVec< TVal >::Len(), and TStr::Len().

                                {
  if ((Sym==hsyBTag)&&(ArgNmValV.Len()>0)){
    TChA FullChA;
    FullChA+=Str.GetSubStr(0, Str.Len()-2);
    for (int ArgNmValN=0; ArgNmValN<ArgNmValV.Len(); ArgNmValN++){
      FullChA+=' '; FullChA+=ArgNmValV[ArgNmValN].Key; FullChA+='=';
      FullChA+='"'; FullChA+=ArgNmValV[ArgNmValN].Dat; FullChA+='"';
    }
    FullChA+='>';
    return FullChA;
  } else
  if (Sym==hsyETag){
    TChA FullChA;
    FullChA+='<'; FullChA+='/'; FullChA+=Str.GetSubStr(1, Str.Len()-1);
    return FullChA;
  } else {
    return GetStr();
  }
}

Here is the call graph for this function:

PHtmlDoc GetHld ( const int &  HldN)

Definition at line 323 of file html.h.

References HldV.

{return HldV[HldN];}
int GetHlds ( )

Definition at line 322 of file html.h.

References HldV.

{return HldV.Len();}
PHtmlTok THtmlTok::GetHTok ( const bool &  IsBTag,
const int &  HTagN 
) [static]

Definition at line 762 of file html.cpp.

References Fail, H1TagNm, H2TagNm, H3TagNm, H4TagNm, H5TagNm, H6TagNm, hsyBTag, hsyETag, and THtmlTok().

Referenced by THtmlHldV().

                                                              {
  THtmlLxSym HTagSym=IsBTag?hsyBTag:hsyETag;
  TStr HTagNm;
  switch (HTagN){
    case 1: HTagNm=H1TagNm; break;
    case 2: HTagNm=H2TagNm; break;
    case 3: HTagNm=H3TagNm; break;
    case 4: HTagNm=H4TagNm; break;
    case 5: HTagNm=H5TagNm; break;
    case 6: HTagNm=H6TagNm; break;
    default: Fail;
  }
  return PHtmlTok(new THtmlTok(HTagSym, HTagNm));
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 366 of file html.h.

References GetHttpResp().

Referenced by GetOutDescUrlStrKdV(), and GetOutUrlV().

{return GetHttpResp()->GetBodyAsStr();}

Here is the call graph for this function:

Here is the caller graph for this function:

TStr GetHttpHdStr ( ) const

Definition at line 365 of file html.h.

References GetHttpResp().

{return GetHttpResp()->GetHdStr();}

Here is the call graph for this function:

PHttpResp GetHttpResp ( ) const

Definition at line 364 of file html.h.

References HttpResp.

Referenced by GetHttpBodyAsStr(), and GetHttpHdStr().

{return HttpResp;}

Here is the caller graph for this function:

TStr GetIpNum ( const int &  IpN = -1) const

Definition at line 361 of file html.h.

References IpNumV, and TVec< TVal >::Last().

Referenced by GetIpNumOrHostNm().

                                         {
    if (IpN==-1){return IpNumV.Last();} else {return IpNumV[IpN];}}

Here is the call graph for this function:

Here is the caller graph for this function:

int GetIps ( ) const

Definition at line 360 of file html.h.

References IpNumV, and TVec< TVal >::Len().

{return IpNumV.Len();}

Here is the call graph for this function:

TStr THtmlLxChDef::GetIsoCeFromYuascii ( const TChA ChA) [static]

Definition at line 170 of file html.cpp.

References TChA::Len().

                                                     {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    char Ch=ChA[ChN];
    switch (Ch){
      case '~': DstChA+=uchar(232); break;
      case '^': DstChA+=uchar(200); break;
      case '}': DstChA+=uchar(230); break;
      case ']': DstChA+=uchar(198); break;
      case '|': DstChA+=uchar(240); break;
      case '\\': DstChA+=uchar(208); break;
      case '{': DstChA+=uchar(185); break;
      case '[': DstChA+=uchar(169); break;
      case '`': DstChA+=uchar(190); break;
      case '@': DstChA+=uchar(174); break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}

Here is the call graph for this function:

char GetLc ( const char &  Ch) const

Definition at line 51 of file html.h.

References LcChV, and TCh::Mn.

Referenced by GetLcChA().

{return LcChV[Ch-TCh::Mn];}

Here is the caller graph for this function:

void GetLcChA ( TChA ChA) const

Definition at line 54 of file html.h.

References GetLc(), TChA::Len(), and TChA::PutCh().

Referenced by GetLcStr().

                                 {
    for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetLc(ChA[ChN]));}}

Here is the call graph for this function:

Here is the caller graph for this function:

TStr GetLcStr ( const TStr Str) const

Definition at line 58 of file html.h.

References GetLcChA().

                                       {
    TChA ChA(Str); GetLcChA(ChA); return ChA;}

Here is the call graph for this function:

TLxSym THtmlDoc::GetLxSym ( const THtmlLxSym HtmlLxSym,
const TChA ChA 
) [static]

Definition at line 1092 of file html.cpp.

References Fail, TLxSymStr::GetSSym(), hsyBTag, hsyEof, hsyETag, hsyNum, hsySSym, hsyStr, hsyUndef, hsyUrl, syEof, syFlt, syStr, and syUndef.

                                                                     {
  switch (HtmlLxSym){
    case hsyUndef: return syUndef;
    case hsyStr: return syStr;
    case hsyNum: return syFlt;
    case hsySSym: return TLxSymStr::GetSSym(ChA);
    case hsyUrl: return syStr;
    case hsyBTag: return syStr;
    case hsyETag: return syStr;
    case hsyEof: return syEof;
    default: Fail; return syUndef;
  }
}

Here is the call graph for this function:

void TWebPg::GetOutDescUrlStrKdV ( TStrKdV OutDescUrlStrKdV) const

Definition at line 1258 of file html.cpp.

References TVec< TVal >::Add(), TVec< TVal >::Clr(), TChA::Empty(), GetHttpBodyAsStr(), GetUrlStr(), hsyBTag, hsyETag, hsyNum, hsySSym, hsyStr, New(), TStrIn::New(), RelUrlStr, Tok, and UrlStr.

                                                                {
  // create outgoing url vector
  OutDescUrlStrKdV.Clr();
  // take interesting web-page components
  TStr UrlStr=GetUrlStr();
  TStr HtmlStr=GetHttpBodyAsStr();
  // prepare html parsing
  PSIn HtmlSIn=TStrIn::New(HtmlStr);
  PHtmlDoc HtmlDoc=THtmlDoc::New(HtmlSIn);
  // traverse html documents
  PHtmlTok Tok; THtmlLxSym TokSym; TStr TokStr;
  int TokN=0; int Toks=HtmlDoc->GetToks();
  while (TokN<Toks){
    Tok=HtmlDoc->GetTok(TokN, TokSym, TokStr); TokN++;
    if ((TokSym==hsyBTag)&&(TokStr==THtmlTok::ATagNm)){
      TStr RelUrlStr;
      if (Tok->IsUrlTok(RelUrlStr)){
        PUrl Url=TUrl::New(RelUrlStr, UrlStr);
        if (Url->IsOk()){
          TChA DescChA;
          while (TokN<Toks){
            Tok=HtmlDoc->GetTok(TokN, TokSym, TokStr); TokN++;
            if ((TokSym==hsyETag)&&(TokStr==THtmlTok::ATagNm)){
              break;
            } else {
              if ((TokSym==hsyStr)||(TokSym==hsyNum)||(TokSym==hsySSym)){
                if (!DescChA.Empty()){DescChA+=' ';}
                DescChA+=TokStr;
              }
            }
          }
          OutDescUrlStrKdV.Add(TStrKd(DescChA, Url->GetUrlStr()));
        }
      }
    }
  }
}

Here is the call graph for this function:

void TWebPg::GetOutUrlV ( TUrlV &  OutUrlV,
TUrlV &  OutRedirUrlV 
) const

Definition at line 1230 of file html.cpp.

References GetHttpBodyAsStr(), GetUrlStr(), hsyBTag, New(), TStrIn::New(), RelUrlStr, Tok, UrlStr, and usHttp.

Referenced by GetOutUrlV().

                                                                 {
  // create outgoing url vector
  OutUrlV.Clr(); OutRedirUrlV.Clr();
  // take interesting web-page components
  TStr UrlStr=GetUrlStr();
  TStr HtmlStr=GetHttpBodyAsStr();
  // prepare html parsing
  PSIn HtmlSIn=TStrIn::New(HtmlStr);
  PHtmlDoc HtmlDoc=THtmlDoc::New(HtmlSIn);
  PHtmlTok Tok;
  // traverse html
  for (int TokN=0; TokN<HtmlDoc->GetToks(); TokN++){
    PHtmlTok Tok=HtmlDoc->GetTok(TokN);
    if (Tok->GetSym()==hsyBTag){
      TStr RelUrlStr;
      if (Tok->IsUrlTok(RelUrlStr)){
        PUrl Url=TUrl::New(RelUrlStr, UrlStr);
        if (Url->IsOk(usHttp)){
          OutUrlV.Add(Url);
          if (Tok->IsRedirUrlTok()){
            OutRedirUrlV.Add(Url);
          }
        }
      }
    }
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void GetOutUrlV ( TUrlV &  OutUrlV) const

Definition at line 369 of file html.h.

References GetOutUrlV().

                                        {
    TUrlV OutRedirUrlV; GetOutUrlV(OutUrlV, OutRedirUrlV);}

Here is the call graph for this function:

TStr THtmlDoc::GetRedirHtmlDocStr ( const TStr HtmlStr,
const TStr BaseUrlStr,
const TStr RedirUrlStr 
) [static]

Definition at line 1126 of file html.cpp.

References _IsTagRedir(), TMOut::GetAsStr(), THtmlLx::GetFullBTagStr(), THtmlLx::GetSym(), hsyBTag, hsyEof, TStrIn::New(), THtmlLx::PreSpaceChA, TSOut::PutStr(), THtmlLx::Sym, and THtmlLx::SymChA.

                                                 {
  PSIn SIn=TStrIn::New(HtmlStr);
  TMOut SOut;
  THtmlLx Lx(SIn);
  while (Lx.GetSym()!=hsyEof){
    SOut.PutStr(Lx.PreSpaceChA);
    if ((Lx.Sym==hsyBTag)&&(
     (_IsTagRedir(THtmlTok::ATagNm, THtmlTok::HRefArgNm, Lx, BaseUrlStr, RedirUrlStr))||
     (_IsTagRedir(THtmlTok::AreaTagNm, THtmlTok::HRefArgNm, Lx, BaseUrlStr, RedirUrlStr))||
     (_IsTagRedir(THtmlTok::FrameTagNm, THtmlTok::SrcArgNm, Lx, BaseUrlStr, RedirUrlStr))||
     (_IsTagRedir(THtmlTok::ImgTagNm, THtmlTok::SrcArgNm, Lx, BaseUrlStr, RedirUrlStr)))){
      SOut.PutStr(Lx.GetFullBTagStr());
    } else {
      SOut.PutStr(Lx.SymChA());
    }
  }
  return SOut.GetAsStr();
}

Here is the call graph for this function:

PHtmlDoc GetRefHtmlDoc ( )

Definition at line 321 of file html.h.

{return RefHtmlDoc;}
TStr GetStr ( ) const

Definition at line 201 of file html.h.

{return Str;}
THtmlLxSym GetSym ( ) const

Definition at line 200 of file html.h.

Referenced by IsRedirUrlTok(), and IsUrlTok().

{return Sym;}

Here is the caller graph for this function:

PHtmlTok GetTok ( const int &  TokN) const

Definition at line 269 of file html.h.

Referenced by LoadTxtElement(), and SaveTxt().

{return TokV[TokN];}

Here is the caller graph for this function:

PHtmlTok GetTok ( const int &  TokN,
THtmlLxSym Sym,
TStr Str 
) const

Definition at line 270 of file html.h.

References TStr::GetStr().

                                                                     {
    Sym=TokV[TokN]->GetSym(); Str=TokV[TokN]->GetStr(); return TokV[TokN];}

Here is the call graph for this function:

int GetToks ( ) const

Definition at line 268 of file html.h.

{return TokV.Len();}
static TStr GetTxtLnDoc ( const TStr HtmlStr) [static]
static TStr GetTxtLnDoc ( const TStr HtmlStr,
const TStr BaseUrlStr,
const bool &  OutUrlP,
const bool &  OutTagsP 
) [static]
char GetUc ( const char &  Ch) const

Definition at line 50 of file html.h.

References TCh::Mn, and UcChV.

Referenced by GetUcChA(), and THttp::IsHtmlFExt().

{return UcChV[Ch-TCh::Mn];}

Here is the caller graph for this function:

void GetUcChA ( TChA ChA) const

Definition at line 52 of file html.h.

References GetUc(), TChA::Len(), and TChA::PutCh().

Referenced by GetUcStr().

                                 {
    for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetUc(ChA[ChN]));}}

Here is the call graph for this function:

Here is the caller graph for this function:

TStr TLxChDef::GetUcStr ( const TStr Str) const

Definition at line 56 of file html.h.

References GetUcChA().

                                       {
    TChA ChA(Str); GetUcChA(ChA); return ChA;}

Here is the call graph for this function:

PUrl GetUrl ( const int &  UrlN = -1) const

Definition at line 355 of file html.h.

References New(), and UrlStr.

                                        {
    TStr UrlStr;
    if (UrlN==-1){UrlStr=UrlStrV.Last();} else {UrlStr=UrlStrV[UrlN];}
    return TUrl::New(UrlStr);}

Here is the call graph for this function:

int GetUrls ( ) const

Definition at line 352 of file html.h.

{return UrlStrV.Len();}
TStr GetUrlStr ( const int &  UrlN = -1) const

Definition at line 353 of file html.h.

Referenced by GetOutDescUrlStrKdV(), and GetOutUrlV().

                                           {
    if (UrlN==-1){return UrlStrV.Last();} else {return UrlStrV[UrlN];}}

Here is the caller graph for this function:

TStr THtmlLxChDef::GetWin1250FromYuascii ( const TChA ChA) [static]

Definition at line 149 of file html.cpp.

References TChA::Len().

                                                       {
  TChA DstChA;
  for (int ChN=0; ChN<ChA.Len(); ChN++){
    char Ch=ChA[ChN];
    switch (Ch){
      case '~': DstChA+=uchar(232); break;
      case '^': DstChA+=uchar(200); break;
      case '}': DstChA+='c'; break;
      case ']': DstChA+='C'; break;
      case '|': DstChA+='d'; break;
      case '\\': DstChA+='D'; break;
      case '{': DstChA+=uchar(154); break;
      case '[': DstChA+=uchar(138); break;
      case '`': DstChA+=uchar(158); break;
      case '@': DstChA+=uchar(142); break;
      default: DstChA+=Ch;
    }
  }
  return DstChA;
}

Here is the call graph for this function:

bool IsAlNum ( const char &  Ch) const

Definition at line 39 of file html.h.

References hlctAlpha, hlctNum, and TCh::Mn.

Referenced by IsNmStr(), and TStrUtil::SplitWords().

                                     {
    return (int(ChTyV[Ch-TCh::Mn])==hlctAlpha)||(int(ChTyV[Ch-TCh::Mn])==hlctNum);}

Here is the caller graph for this function:

bool IsAlpha ( const char &  Ch) const

Definition at line 37 of file html.h.

References hlctAlpha, and TCh::Mn.

Referenced by IsNmStr().

{return int(ChTyV[Ch-TCh::Mn])==hlctAlpha;}

Here is the caller graph for this function:

bool IsArg ( const TStr ArgNm) const

Definition at line 203 of file html.h.

References ArgNmValV, and TVec< TVal >::SearchForw().

Referenced by IsRedirUrlTok(), and IsUrlTok().

                                      {
    return ArgNmValV.SearchForw(TStrKd(ArgNm))!=-1;}

Here is the call graph for this function:

Here is the caller graph for this function:

bool THtmlTok::IsBreakTag ( const TStr TagNm) [static]

Definition at line 726 of file html.cpp.

References THash< TKey, TDat, THashFunc >::AddKey(), THash< TKey, TDat, THashFunc >::IsKey(), and THash< TKey, TDat, THashFunc >::Len().

Referenced by IsBreakTok().

                                          {
  static TStrH BreakTagNmH(50);
  if (BreakTagNmH.Len()==0){
    BreakTagNmH.AddKey(TStr("<H1>")); BreakTagNmH.AddKey(TStr("<H2>"));
    BreakTagNmH.AddKey(TStr("<H3>")); BreakTagNmH.AddKey(TStr("<H4>"));
    BreakTagNmH.AddKey(TStr("<H5>")); BreakTagNmH.AddKey(TStr("<H6>"));
    BreakTagNmH.AddKey(TStr("<BR>")); BreakTagNmH.AddKey(TStr("<HR>"));
    BreakTagNmH.AddKey(TStr("<P>")); BreakTagNmH.AddKey(TStr("<DL>"));
    BreakTagNmH.AddKey(TStr("<UL>")); BreakTagNmH.AddKey(TStr("<OL>"));
    BreakTagNmH.AddKey(TStr("<LI>")); BreakTagNmH.AddKey(TStr("<DT>"));
    BreakTagNmH.AddKey(TStr("<DD>")); BreakTagNmH.AddKey(TStr("<HEAD>"));
    BreakTagNmH.AddKey(TStr("<TITLE>")); BreakTagNmH.AddKey(TStr("<META>"));
    BreakTagNmH.AddKey(TStr("<SCRIPT>"));
    BreakTagNmH.AddKey(TStr("<HEAD>")); BreakTagNmH.AddKey(TStr("<BODY>"));
  }
  return BreakTagNmH.IsKey(TagNm);
}

Here is the call graph for this function:

Here is the caller graph for this function:

bool THtmlTok::IsBreakTok ( const PHtmlTok &  Tok) [static]

Definition at line 744 of file html.cpp.

References hsyBTag, hsyETag, and IsBreakTag().

Referenced by THtmlHldV().

                                            {
  if ((Tok->GetSym()==hsyBTag)||(Tok->GetSym()==hsyETag)){
    return IsBreakTag(Tok->GetStr());
  } else {
    return false;
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

bool IsEoln ( const char &  Ch) const

Definition at line 33 of file html.h.

References TCh::CrCh, and TCh::LfCh.

{return (Ch==TCh::CrCh)||(Ch==TCh::LfCh);}
bool THtmlTok::IsHTag ( const TStr TagNm,
int &  HTagN 
) [static]

Definition at line 752 of file html.cpp.

References TStr::Len().

Referenced by THtmlHldV().

                                                  {
  if ((TagNm.Len()==4)&&(TagNm[0]=='<')&&(TagNm[1]=='H')&&(TagNm[3]=='>')){
    char Ch=TagNm[2];
    if (('1'<=Ch)&&(Ch<='6')){HTagN=Ch-'0'; return true;}
    else {HTagN=-1; return false;}
  } else {
    HTagN=-1; return false;
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

bool IsLc ( const char &  Ch) const

Definition at line 49 of file html.h.

References LcChV, and TCh::Mn.

{return Ch==LcChV[Ch-TCh::Mn];}
bool IsNum ( const char &  Ch) const

Definition at line 38 of file html.h.

References hlctNum, and TCh::Mn.

Referenced by GetNum().

{return int(ChTyV[Ch-TCh::Mn])==hlctNum;}

Here is the caller graph for this function:

bool THtmlTok::IsRedirUrlTok ( ) const

Definition at line 676 of file html.cpp.

References GetArg(), GetStr(), GetSym(), TStr::GetUc(), hsyBTag, HttpEquivArgNm, IsArg(), and MetaTagNm.

                                   {
  if (GetSym()==hsyBTag){
    TStr TagNm=GetStr();
    if ((TagNm==MetaTagNm)&&(IsArg(HttpEquivArgNm))){
      TStr HttpEquivArgVal=GetArg(HttpEquivArgNm).GetUc();
      if ((HttpEquivArgVal=="REFRESH")&&IsArg("CONTENT")){
        return true;
      } else {
        return false;
      }
    }
  }
  return false;
}

Here is the call graph for this function:

bool IsSpace ( const char &  Ch) const

Definition at line 36 of file html.h.

References hlctSpace, and TCh::Mn.

{return int(ChTyV[Ch-TCh::Mn])==hlctSpace;}
bool IsSym ( const char &  Ch) const

Definition at line 41 of file html.h.

References hlctSym, and TCh::Mn.

{return int(ChTyV[Ch-TCh::Mn])==hlctSym;}
bool TWebPg::IsTxt ( ) const

Definition at line 1310 of file html.cpp.

References TCh::CrCh, HttpResp, TStr::Len(), TCh::LfCh, TCh::TabCh, and THttp::TextFldVal.

                         {
  if ((!HttpResp->IsContType())||HttpResp->IsContType(THttp::TextFldVal)){
    TStr Str=HttpResp->GetBodyAsStr();
    int StrLen=Str.Len(); int ChN=0; int PrintChs=0;
    while ((ChN<100)&&(ChN<StrLen)){
      char Ch=Str[ChN++];
      if (((' '<=Ch)&&(Ch<='~'))||(Ch==TCh::TabCh)||(Ch==TCh::LfCh)||(Ch==TCh::CrCh)){
        PrintChs++;}
    }
    double PrintPrb=double(PrintChs)/double(ChN+1);
    return PrintPrb>0.9;
  } else {
    return false;
  }
}

Here is the call graph for this function:

bool IsUc ( const char &  Ch) const

Definition at line 48 of file html.h.

References TCh::Mn, and UcChV.

{return Ch==UcChV[Ch-TCh::Mn];}
bool IsUrl ( const char &  Ch) const

Definition at line 42 of file html.h.

References hlctAlpha, hlctNum, and TCh::Mn.

                                   {
    int ChTy=ChTyV[Ch-TCh::Mn];
    return (ChTy==hlctAlpha)||(ChTy==hlctNum)||
     (Ch=='.')||(Ch=='-')||(Ch==':')||(Ch=='/')||(Ch=='~');}
bool THtmlTok::IsUrlTok ( TStr RelUrlStr) const

Definition at line 648 of file html.cpp.

References AreaTagNm, ATagNm, TStr::Empty(), FrameTagNm, GetArg(), GetStr(), TStr::GetSubStr(), GetSym(), TStr::GetUc(), HRefArgNm, hsyBTag, HttpEquivArgNm, ImgTagNm, IsArg(), TStr::Len(), MetaTagNm, TStr::SplitOnStr(), and SrcArgNm.

                                             {
  if (GetSym()==hsyBTag){
    TStr TagNm=GetStr();
    if ((TagNm==ATagNm)&&(IsArg(HRefArgNm))){
      RelUrlStr=GetArg(HRefArgNm); return true;}
    else if ((TagNm==AreaTagNm)&&(IsArg(HRefArgNm))){
      RelUrlStr=GetArg(HRefArgNm); return true;}
    else if ((TagNm==FrameTagNm)&&(IsArg(SrcArgNm))){
      RelUrlStr=GetArg(SrcArgNm); return true;}
    else if ((TagNm==ImgTagNm)&&(IsArg(SrcArgNm))){
      RelUrlStr=GetArg(SrcArgNm); return true;}
    else if ((TagNm==MetaTagNm)&&(IsArg(HttpEquivArgNm))){
      TStr HttpEquivArgVal=GetArg(HttpEquivArgNm).GetUc();
      if ((HttpEquivArgVal=="REFRESH")&&IsArg("CONTENT")){
        TStr ContentStr=GetArg("CONTENT");
        TStr LeftStr; TStr RightStr; TStr UrlEqStr="URL=";
        ContentStr.GetUc().SplitOnStr(LeftStr, UrlEqStr, RightStr);
        RelUrlStr=ContentStr.GetSubStr(
         LeftStr.Len()+UrlEqStr.Len(), ContentStr.Len());
        return !RelUrlStr.Empty();
      } else {
        return false;
      }
    }
  }
  return false;
}

Here is the call graph for this function:

bool IsWs ( const char &  Ch) const

Definition at line 34 of file html.h.

References TCh::CrCh, TCh::LfCh, and TCh::TabCh.

Referenced by TXmlParser::GetSym().

                                  {
    return (Ch==' ')||(Ch==TCh::TabCh)||(Ch==TCh::CrCh)||(Ch==TCh::LfCh);}

Here is the caller graph for this function:

static PWebPg Load ( TSIn SIn) [static]

Definition at line 25 of file html.h.

References THtmlLxChDef().

{return new THtmlLxChDef(SIn);}

Here is the call graph for this function:

static PHtmlDoc LoadTxt ( const TStr FNm,
const THtmlDocType Type = hdtAll,
const bool &  DoUc = true 
) [static]

Definition at line 278 of file html.h.

References TFIn::New(), and THtmlDoc().

                                                                           {
    PSIn SIn=TFIn::New(FNm); return PHtmlDoc(new THtmlDoc(SIn, Type, DoUc));}

Here is the call graph for this function:

static PHtmlDoc New ( const PSIn SIn,
const THtmlDocType Type = hdtAll,
const bool &  DoUc = true 
) [static]

Definition at line 259 of file html.h.

References THtmlDoc().

                                                                           {
    return PHtmlDoc(new THtmlDoc(SIn, Type, DoUc));}

Here is the call graph for this function:

static PWebPg New ( const TStrV UrlStrV,
const TStrV IpNumV,
const PHttpResp &  HttpResp 
) [static]

Definition at line 338 of file html.h.

References TWebPg().

                                                                                         {
    return new TWebPg(UrlStrV, IpNumV, HttpResp);}

Here is the call graph for this function:

static PWebPg New ( const TStrV UrlStrV,
const PHttpResp &  HttpResp 
) [static]

Definition at line 340 of file html.h.

References TWebPg().

                                                                    {
    return new TWebPg(UrlStrV, TStrV(), HttpResp);}

Here is the call graph for this function:

static PWebPg New ( const TStr UrlStr,
const PHttpResp &  HttpResp 
) [static]

Definition at line 342 of file html.h.

References TVec< TVal >::Add(), and TWebPg().

                                                                  {
    TStrV UrlStrV; UrlStrV.Add(UrlStr);
    return new TWebPg(UrlStrV, TStrV(), HttpResp);}

Here is the call graph for this function:

THtmlLxChDef& operator= ( const THtmlLxChDef )

Definition at line 29 of file html.h.

References Fail.

{Fail; return *this;}
THtmlTok& operator= ( const THtmlTok )

Definition at line 198 of file html.h.

References Fail.

{Fail; return *this;}
THtmlDoc& operator= ( const THtmlDoc )

Definition at line 266 of file html.h.

References Fail.

{Fail; return *this;}
THtmlHldV& operator= ( const THtmlHldV )

Definition at line 319 of file html.h.

References Fail.

{Fail; return *this;}
TWebPg& operator= ( const TWebPg )

Definition at line 350 of file html.h.

References Fail.

{Fail; return *this;}
void PutFetchMSecs ( const uint64 _FetchMSecs)

Definition at line 374 of file html.h.

References FetchMSecs.

{FetchMSecs=_FetchMSecs;}
void Save ( TSOut SOut)

Definition at line 26 of file html.h.

References EscStrH, LcChV, THash< TKey, TDat, THashFunc >::Save(), TVec< TVal >::Save(), and UcChV.

                        {
    ChTyV.Save(SOut); UcChV.Save(SOut); LcChV.Save(SOut); EscStrH.Save(SOut);}

Here is the call graph for this function:

void TWebPg::SaveAsHttp ( const TStr FNm) const

Definition at line 1303 of file html.cpp.

References HttpResp, and TFOut::New().

                                             {
  // create output file
  PSOut SOut=TFOut::New(FNm);
  // save http
  HttpResp->SaveTxt(SOut);
}

Here is the call graph for this function:

void TWebPg::SaveAsHttpBody ( const TStr FNm) const

Definition at line 1296 of file html.cpp.

References HttpResp, and TFOut::New().

                                                 {
  // create output file
  PSOut SOut=TFOut::New(FNm);
  // save http-body
  HttpResp->SaveBody(SOut);
}

Here is the call graph for this function:

static void SaveHtmlToTxt ( const TStr HtmlStr,
const PSOut TxtSOut,
const TStr BaseUrlStr,
const bool &  OutUrlP,
const bool &  OutToksP 
) [static]
static void SaveHtmlToTxt ( const TStr HtmlStr,
const TStr TxtFNm,
const TStr BaseUrlStr,
const bool &  OutUrlP,
const bool &  OutToksP 
) [static]
static void SaveHtmlToXml ( const TStr HtmlStr,
const PSOut XmlSOut,
const TStr BaseUrlStr,
const bool &  OutTextP,
const bool &  OutUrlP,
const bool &  OutToksP,
const bool &  OutTagsP,
const bool &  OutArgsP 
) [static]
static void SaveHtmlToXml ( const TStr HtmlStr,
const TStr XmlFNm,
const TStr BaseUrlStr,
const bool &  OutTextP,
const bool &  OutUrlP,
const bool &  OutToksP,
const bool &  OutTagsP,
const bool &  OutArgsP 
) [static]
void SaveTxt ( const PSOut SOut,
const bool &  TxtMode = true 
)
void SetChTy ( const THtmlLxChTy ChTy,
const TStr Str 
)

Referenced by SetUcCh(), THtmlLxChDef(), TLxChDef(), and TXmlChDef().

Here is the caller graph for this function:

void THtmlLxChDef::SetEscStr ( const TStr SrcStr,
const TStr DstStr 
)

Definition at line 29 of file html.cpp.

References THash< TKey, TDat, THashFunc >::AddDat(), and EscStrH.

Referenced by THtmlLxChDef().

                                                                  {
  EscStrH.AddDat(SrcStr, DstStr);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void THtmlLxChDef::SetUcCh ( const char &  UcCh,
const char &  LcCh 
)

Definition at line 3 of file html.cpp.

References IAssert, LcChV, TCh::Mn, and UcChV.

Referenced by SetUcCh(), THtmlLxChDef(), and TLxChDef().

                                                            {
  // update upper-case (more lower cases may have one upper case)
  IAssert(
   (UcChV[LcCh-TCh::Mn]==TCh(0))||
   (UcChV[LcCh-TCh::Mn]==TCh(LcCh)));
  UcChV[LcCh-TCh::Mn]=TCh(UcCh);
  // update lower-case (one upper case may have only one lower case)
  if ((LcChV[UcCh-TCh::Mn]==TCh(0))||(LcChV[UcCh-TCh::Mn]==TCh(UcCh))){
    LcChV[UcCh-TCh::Mn]=TCh(LcCh);
  }
}

Here is the caller graph for this function:

void TLxChDef::SetUcCh ( const TStr Str)

Definition at line 15 of file html.cpp.

References hlctAlpha, TStr::Len(), SetChTy(), and SetUcCh().

                                         {
  // set type of characters as letters
  SetChTy(hlctAlpha, Str);
  // first char in string is upper-case, rest are lower-case
  for (int ChN=1; ChN<Str.Len(); ChN++){
    SetUcCh(Str[0], Str[ChN]);
  }
}

Here is the call graph for this function:

THtmlDoc ( )

Definition at line 256 of file html.h.

Referenced by LoadTxt(), New(), and THtmlHldV().

: TokV(){}

Here is the caller graph for this function:

THtmlDoc::THtmlDoc ( const PSIn SIn,
const THtmlDocType Type = hdtAll,
const bool &  DoUc = true 
)

Definition at line 779 of file html.cpp.

References AreaTagNm, Fail, THtmlLx::GetSym(), THtmlLx::GetTok(), hdtA, hdtAll, hdtHRef, hdtStr, hdtStrNum, hdtTag, hdtUL, hsyBTag, hsyEof, hsyETag, hsyNum, hsyStr, ImgTagNm, THtmlLx::Sym, THtmlTok(), THtmlLx::UcChA, and UlTagNm.

                                                                             :
  TokV(1000, 0){
  THtmlLx Lx(SIn);
  bool MkTok=false; bool InUL=false;
  while (Lx.GetSym()!=hsyEof){
    switch (Type){
      case hdtAll: MkTok=true; break;
      case hdtStr: MkTok=(Lx.Sym==hsyStr); break;
      case hdtStrNum: MkTok=(Lx.Sym==hsyStr)||(Lx.Sym==hsyNum); break;
      case hdtTag: MkTok=(Lx.Sym==hsyBTag)||(Lx.Sym==hsyETag); break;
      case hdtA: MkTok=(Lx.Sym==hsyBTag)&&(Lx.UcChA==THtmlTok::ATagNm); break;
      case hdtHRef:
        MkTok=(Lx.Sym==hsyBTag)&&
         ((Lx.UcChA==THtmlTok::ATagNm)||(Lx.UcChA==THtmlTok::AreaTagNm)||
         (Lx.UcChA==THtmlTok::FrameTagNm)||(Lx.UcChA==THtmlTok::ImgTagNm)||
         (Lx.UcChA==THtmlTok::MetaTagNm));
        break;
      case hdtUL:
        if ((Lx.Sym==hsyBTag)&&(Lx.UcChA==THtmlTok::UlTagNm)){InUL=true;}
        MkTok=InUL;
        if ((Lx.Sym==hsyETag)&&(Lx.UcChA==THtmlTok::UlTagNm)){InUL=false;}
        break;
      default: Fail;
    }
    if (MkTok){TokV.Add(Lx.GetTok(DoUc));}
  }
  TokV.Add(PHtmlTok(new THtmlTok(hsyEof)));
}

Here is the call graph for this function:

THtmlDoc ( TSIn )

Definition at line 262 of file html.h.

References Fail.

{Fail;}
THtmlHldV::THtmlHldV ( const PHtmlDoc &  _RefHtmlDoc,
const int &  HldWnLen = 10 
)

Definition at line 1148 of file html.cpp.

References TVec< TVal >::Add(), TVec< TVal >::Clr(), forever, GetHTok(), HldV, hsyBTag, hsyETag, hsyNum, hsySSym, hsyStr, IsBreakTok(), IsHTag(), TSOut::StdOut, THtmlDoc(), and Tok.

                                                                    :
  RefHtmlDoc(_RefHtmlDoc), HldV(){
  bool IsTitleAct=false; THtmlTokV TitleTokV;
  bool IsHAct=false; int ActHTagN=-1;
  TVec<THtmlTokV> HTokV(6);
  PHtmlTok Tok; THtmlLxSym TokSym; TStr TokStr;
  for (int TokN=0; TokN<RefHtmlDoc->GetToks(); TokN++){
    Tok=RefHtmlDoc->GetTok(TokN, TokSym, TokStr);
    if ((TokSym==hsyBTag)&&(TokStr==THtmlTok::ATagNm)){
      // collect tokens before, inside and after <a> ... </a> tags
      int ATokN; PHtmlTok ATok; THtmlLxSym ATokSym; TStr ATokStr;
      // inside <A> tags
      THtmlTokV ATokV; ATokN=TokN;
      forever{
        ATok=RefHtmlDoc->GetTok(ATokN, ATokSym, ATokStr);
        if (ATokSym!=hsySSym){ATokV.Add(ATok);}
        if ((ATokSym==hsyETag)&&(ATokStr==THtmlTok::ATagNm)){break;}
        ATokN++;
        if (ATokN>=RefHtmlDoc->GetToks()){break;}
      }
      int ETagATokN=ATokN+1;
      // before <A> tags
      THtmlTokV PrevATokV; ATokN=TokN;
      forever{
        ATokN--;
        if (ATokN<0){break;}
        ATok=RefHtmlDoc->GetTok(ATokN, ATokSym, ATokStr);
        if (THtmlTok::IsBreakTok(ATok)){break;}
        if ((ATokSym==hsyStr)||(ATokSym==hsyNum)){PrevATokV.Add(ATok);}
        if (ATokV.Len()>=HldWnLen){break;}
      }
      // after <A> tags
      THtmlTokV NextATokV; ATokN=ETagATokN;
      forever{
        ATokN++;
        if (ATokN>=RefHtmlDoc->GetToks()){break;}
        ATok=RefHtmlDoc->GetTok(ATokN, ATokSym, ATokStr);
        if (THtmlTok::IsBreakTok(ATok)){break;}
        if ((ATokSym==hsyStr)||(ATokSym==hsyNum)){NextATokV.Add(ATok);}
        if (ATokV.Len()>=HldWnLen){break;}
      }
      // construct html-document with hyper-link context
      PHtmlDoc HtmlDoc=PHtmlDoc(new THtmlDoc());
      HtmlDoc->AddTokV(TitleTokV);
      for (int HTagN=1; HTagN<=6; HTagN++){HtmlDoc->AddTokV(HTokV[HTagN-1]);}
      HtmlDoc->AddTokV(PrevATokV);
      HtmlDoc->AddTokV(ATokV);
      HtmlDoc->AddTokV(NextATokV);
      HldV.Add(HtmlDoc);
      HtmlDoc->SaveTxt(TSOut::StdOut);
    } else
    if (TokSym==hsyBTag){
      int HTagN;
      if (TokStr==THtmlTok::TitleTagNm){
        IsTitleAct=true; TitleTokV.Clr(); TitleTokV.Add(Tok);
      } else
      if (THtmlTok::IsHTag(TokStr, HTagN)){
        if (IsHAct){// conclude previous <H?> tag if left open
          HTokV[ActHTagN-1].Add(THtmlTok::GetHTok(false, ActHTagN));}
        IsHAct=true; ActHTagN=HTagN;
        {for (int HTagN=ActHTagN; HTagN<=6; HTagN++){HTokV[HTagN-1].Clr();}}
        HTokV[ActHTagN-1].Add(Tok);
      }
    } else
    if (TokSym==hsyETag){
      int HTagN;
      if (TokStr==THtmlTok::TitleTagNm){
        if (IsTitleAct){TitleTokV.Add(Tok); IsTitleAct=false;}
      } else
      if (THtmlTok::IsHTag(TokStr, HTagN)){
        if (IsHAct){HTokV[ActHTagN-1].Add(Tok); IsHAct=false;}
      }
    } else
    if (TokSym!=hsySSym){
      if (IsTitleAct){TitleTokV.Add(Tok);}
      if (IsHAct){HTokV[ActHTagN-1].Add(Tok);}
    }
  }
}

Here is the call graph for this function:

THtmlHldV ( TSIn )

Definition at line 315 of file html.h.

References Fail.

{Fail;}

Definition at line 48 of file html.cpp.

References TCh::EofCh, hlctAlpha, hlctEof, hlctLTag, hlctNum, hlctRTag, hlctSpace, hlctSym, TCh::Mn, TCh::Mx, SetChTy(), SetEscStr(), and SetUcCh().

Referenced by Load().

                          :
  ChTyV(TCh::Vals), UcChV(TCh::Vals), LcChV(TCh::Vals), EscStrH(100){

  // Character-Types
  ChTyV.PutAll(TInt(hlctSpace));
  SetChTy(hlctAlpha, "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
  SetChTy(hlctAlpha, "abcdefghijklmnopqrstuvwxyz");
  SetChTy(hlctAlpha, "@_");
  SetChTy(hlctNum, "0123456789");
  SetChTy(hlctSym, "`~!#$%^&*()-=+[{]}\\|;:'\",<.>/?");
  SetChTy(hlctLTag, "<"); SetChTy(hlctRTag, ">");
  SetChTy(hlctEof, TStr(TCh::EofCh));
  for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){
    if ((Ch<0)||(127<Ch)){SetChTy(hlctAlpha, TStr(TCh(char(Ch))));}}
  //SetChTy(hlctSpace, TStr(TCh(char(160))));

  // Upper-Case
  {for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){
    SetUcCh(char(Ch), char(Ch));}}
  SetUcCh("Aa"); SetUcCh("\xc0\xe0"); SetUcCh("\xc1\xe1"); SetUcCh("\xc2\xe2");
  SetUcCh("\xc3\xe3"); SetUcCh("\xc4\xe4"); SetUcCh("\xc5\xe5"); SetUcCh("\xc6\xe6");
  SetUcCh("Bb"); SetUcCh("Cc"); SetUcCh("\xc7\xe7"); SetUcCh("Dd");
  SetUcCh("\xd0\xf0"); SetUcCh("Ee"); SetUcCh("\xc8\xe8"); SetUcCh("\xc9\xe9");
  SetUcCh("\xca\xea"); SetUcCh("\xcb\xeb"); SetUcCh("Ff"); SetUcCh("Gg");
  SetUcCh("Hh"); SetUcCh("Ii"); SetUcCh("\xcc\xec"); SetUcCh("\xcd\xed");
  SetUcCh("\xce\xee"); SetUcCh("\xcf\xef"); SetUcCh("Jj"); SetUcCh("Kk");
  SetUcCh("Ll"); SetUcCh("Mm"); SetUcCh("Nn"); SetUcCh("\xd1\xf1");
  SetUcCh("Oo"); SetUcCh("\xd2\xf2"); SetUcCh("\xd3\xf3"); SetUcCh("\xd4\xf4");
  SetUcCh("\xd5\xf5"); SetUcCh("\xd6\xf6"); SetUcCh("\xd8\xf8"); SetUcCh("Pp");
  SetUcCh("Qq"); SetUcCh("Rr"); SetUcCh("Ss"); SetUcCh("\x8a\x9a");
  SetUcCh("Tt"); SetUcCh("Uu"); SetUcCh("\xd9\xf9"); SetUcCh("\xda\xfa");
  SetUcCh("\xdb\xfb"); SetUcCh("\xdc\xfc"); SetUcCh("Vv"); SetUcCh("Ww");
  SetUcCh("Xx"); SetUcCh("Yy\xff"); SetUcCh("\xdd\xfd"); SetUcCh("Zz");
  SetUcCh("\x8e\x9e");
  // ISO-CE
  //SetUcCh(uchar(169), uchar(185)); /*Sh - \xa9\xb9*/
  //SetUcCh(uchar(174), uchar(190)); /*Zh - \xae\xbe*/
  //SetUcCh(uchar(200), uchar(232)); /*Ch - \xc8\xe8*/
  //SetUcCh(uchar(198), uchar(230)); /*Cs - \xc6\xe6*/
  //SetUcCh(uchar(208), uchar(240)); /*Dz - \xd0\xf0*/

  // Annoying Unicode-characters
  //SetChTy(hlctSpace, "\xc2\xef");

  // Escape-Sequences
  SetEscStr("&quot", "\""); SetEscStr("&amp", "&");
  SetEscStr("&lt", "<"); SetEscStr("&gt", ">");
  SetEscStr("&nbsp", " ");

  SetEscStr("&auml", "\xe4"); SetEscStr("&Auml", "\xc4");
  SetEscStr("&ouml", "\xf6"); SetEscStr("&Ouml", "\xd6");
  SetEscStr("&uuml", "\xfc"); SetEscStr("&Uuml", "\xdc");
  SetEscStr("&aring", "\xe5"); SetEscStr("&Aring", "\xc5");
  SetEscStr("&oslash", "\xf8"); SetEscStr("&Oslash", "\xd8");
  SetEscStr("&Aelig", "\xc6"); SetEscStr("&aelig", "\xe6");

  SetEscStr("&eacute", "e"); SetEscStr("&Eacute", "E");
  SetEscStr("&egrave", "e"); SetEscStr("&Egrave", "E");
  SetEscStr("&agrave", "a"); SetEscStr("&Agrave", "A");
}

Here is the call graph for this function:

Here is the caller graph for this function:

THtmlLxChDef ( TSIn SIn)

Definition at line 24 of file html.h.

: ChTyV(SIn), UcChV(SIn), LcChV(SIn), EscStrH(SIn){}
THtmlTok ( )

Definition at line 186 of file html.h.

Referenced by GetHTok(), THtmlLx::GetTok(), and THtmlDoc().

: Sym(hsyUndef), Str(), ArgNmValV(){}

Here is the caller graph for this function:

THtmlTok ( const THtmlLxSym _Sym)

Definition at line 187 of file html.h.

                                  :
    Sym(_Sym), Str(), ArgNmValV(){}
THtmlTok ( const THtmlLxSym _Sym,
const TStr _Str 
)

Definition at line 189 of file html.h.

                                                    :
    Sym(_Sym), Str(_Str), ArgNmValV(){}
THtmlTok ( const THtmlLxSym _Sym,
const TStr _Str,
const THtmlLx::TArgNmValV _ArgNmValV 
)

Definition at line 191 of file html.h.

                                       :
    Sym(_Sym), Str(_Str), ArgNmValV(_ArgNmValV){}
THtmlTok ( TSIn )

Definition at line 194 of file html.h.

References Fail.

{Fail;}
TWebPg ( )

Definition at line 335 of file html.h.

Referenced by New().

: UrlStrV(), IpNumV(), HttpResp(){}

Here is the caller graph for this function:

TWebPg ( const TStrV _UrlStrV,
const TStrV _IpNumV,
const PHttpResp &  _HttpResp 
)

Definition at line 336 of file html.h.

                                                                                 :
    UrlStrV(_UrlStrV), IpNumV(_IpNumV), HttpResp(_HttpResp){}
TWebPg ( TSIn )

Definition at line 346 of file html.h.

References Fail.

{Fail;}
~TWebPg ( )

Definition at line 345 of file html.h.

{}

Variable Documentation

const TStr THtmlTok::AltArgNm = "ALT" [static]

Definition at line 235 of file html.h.

const TStr THtmlTok::AreaTagNm = "<AREA>" [static]

Definition at line 216 of file html.h.

Referenced by IsUrlTok(), and THtmlDoc().

const TStr THtmlTok::ATagNm = "<A>" [static]

Definition at line 215 of file html.h.

Referenced by IsUrlTok().

const TStr THtmlTok::BrTagNm = "<BR>" [static]

Definition at line 217 of file html.h.

const TStr THtmlTok::CardTagNm = "<CARD>" [static]

Definition at line 218 of file html.h.

const TStr THtmlTok::CenterTagNm = "<CENTER>" [static]

Definition at line 219 of file html.h.

PHtmlLxChDef ChDef [static]

Definition at line 63 of file html.h.

Referenced by GetChDef(), and GetChDefRef().

Definition at line 17 of file html.h.

Referenced by GetEscStr(), Save(), and SetEscStr().

Definition at line 333 of file html.h.

Referenced by GetFetchMSecs(), and PutFetchMSecs().

const TStr THtmlTok::FrameTagNm = "<FRAME>" [static]

Definition at line 220 of file html.h.

Referenced by IsUrlTok().

const TStr THtmlTok::H1TagNm = "<H1>" [static]

Definition at line 221 of file html.h.

Referenced by GetHTok().

const TStr THtmlTok::H2TagNm = "<H2>" [static]

Definition at line 222 of file html.h.

Referenced by GetHTok().

const TStr THtmlTok::H3TagNm = "<H3>" [static]

Definition at line 223 of file html.h.

Referenced by GetHTok().

const TStr THtmlTok::H4TagNm = "<H4>" [static]

Definition at line 224 of file html.h.

Referenced by GetHTok().

const TStr THtmlTok::H5TagNm = "<H5>" [static]

Definition at line 225 of file html.h.

Referenced by GetHTok().

const TStr THtmlTok::H6TagNm = "<H6>" [static]

Definition at line 226 of file html.h.

Referenced by GetHTok().

ClassTP (THtmlHldV, PHtmlHldV) private THtmlDocV HldV

Definition at line 309 of file html.h.

Referenced by GetHld(), GetHlds(), and THtmlHldV().

const TStr THtmlTok::HRefArgNm = "HREF" [static]

Definition at line 236 of file html.h.

Referenced by IsUrlTok().

const TStr THtmlTok::HttpEquivArgNm = "HTTP-EQUIV" [static]

Definition at line 239 of file html.h.

Referenced by IsRedirUrlTok(), and IsUrlTok().

PHttpResp HttpResp

Definition at line 332 of file html.h.

Referenced by GetHttpResp(), IsTxt(), SaveAsHttp(), and SaveAsHttpBody().

const TStr THtmlTok::ImgTagNm = "<IMG>" [static]

Definition at line 227 of file html.h.

Referenced by IsUrlTok(), and THtmlDoc().

ClassTPV (TWebPg, PWebPg, TWebPgV) private TStrV IpNumV

Definition at line 328 of file html.h.

Referenced by GetIpNum(), and GetIps().

Definition at line 16 of file html.h.

Referenced by GetLc(), IsLc(), Save(), and SetUcCh().

const TStr THtmlTok::LiTagNm = "<LI>" [static]

Definition at line 228 of file html.h.

const TStr THtmlTok::MetaTagNm = "<META>" [static]

Definition at line 229 of file html.h.

Referenced by IsRedirUrlTok(), and IsUrlTok().

const TStr THtmlTok::PTagNm = "<P>" [static]

Definition at line 230 of file html.h.

const TStr THtmlTok::SrcArgNm = "SRC" [static]

Definition at line 237 of file html.h.

Referenced by IsUrlTok().

ClassTPV (THtmlTok, PHtmlTok, THtmlTokV) private TStr Str

Definition at line 180 of file html.h.

const TStr THtmlTok::TitleArgNm = "TITLE" [static]

Definition at line 238 of file html.h.

const TStr THtmlTok::TitleETagNm = "</TITLE>" [static]

Definition at line 233 of file html.h.

const TStr THtmlTok::TitleTagNm = "<TITLE>" [static]

Definition at line 232 of file html.h.

ClassTP (THtmlLxChDef, PHtmlLxChDef) private TChV UcChV

Definition at line 12 of file html.h.

Referenced by GetUc(), IsUc(), operator=(), Save(), SetUcCh(), and TLxChDef().

const TStr THtmlTok::UlTagNm = "<UL>" [static]

Definition at line 231 of file html.h.

Referenced by THtmlDoc().