SNAP Library, User Reference  2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
html.h File Reference

Go to the source code of this file.

Classes

class  THtmlLx

Enumerations

enum  THtmlLxChTy {
  hlctSpace, hlctAlpha, hlctNum, hlctSym,
  hlctLTag, hlctRTag, hlctEof
}
enum  THtmlLxSym {
  hsyUndef, hsyStr, hsyNum, hsySSym,
  hsyUrl, hsyBTag, hsyETag, hsyMTag,
  hsyEof
}
enum  THtmlDocType {
  hdtAll, hdtStr, hdtStrNum, hdtTag,
  hdtA, hdtHRef, hdtUL
}

Functions

 ClassHdTP (THtmlTok, PHtmlTok) ClassHdTP(THtmlDoc
void SetUcCh (const char &UcCh, const char &LcCh)
void SetUcCh (const TStr &Str)
void SetChTy (const THtmlLxChTy &ChTy, const TStr &Str)
void SetEscStr (const TStr &SrcStr, const TStr &DstStr)
 THtmlLxChDef ()
 THtmlLxChDef (TSIn &SIn)
static PHtmlLxChDef Load (TSIn &SIn)
void Save (TSOut &SOut)
THtmlLxChDefoperator= (const THtmlLxChDef &)
int GetChTy (const char &Ch) const
bool IsEoln (const char &Ch) const
bool IsWs (const char &Ch) const
bool IsSpace (const char &Ch) const
bool IsAlpha (const char &Ch) const
bool IsNum (const char &Ch) const
bool IsAlNum (const char &Ch) const
bool IsSym (const char &Ch) const
bool IsUrl (const char &Ch) const
bool IsUc (const char &Ch) const
bool IsLc (const char &Ch) const
char GetUc (const char &Ch) const
char GetLc (const char &Ch) const
void GetUcChA (TChA &ChA) const
void GetLcChA (TChA &ChA) const
TStr GetUcStr (const TStr &Str) const
TStr GetLcStr (const TStr &Str) const
TStr GetEscStr (const TStr &Str) const
static PHtmlLxChDef GetChDef ()
static THtmlLxChDefGetChDefRef ()
static TStr GetCSZFromYuascii (const TChA &ChA)
static TStr GetCSZFromWin1250 (const TChA &ChA)
static TStr GetWin1250FromYuascii (const TChA &ChA)
static TStr GetIsoCeFromYuascii (const TChA &ChA)
 THtmlTok ()
 THtmlTok (const THtmlLxSym &_Sym)
 THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str)
 THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str, const THtmlLx::TArgNmValV &_ArgNmValV)
 THtmlTok (TSIn &)
THtmlTokoperator= (const THtmlTok &)
THtmlLxSym GetSym () const
TStr GetStr () const
TStr GetFullStr () const
bool IsArg (const TStr &ArgNm) const
TStr GetArg (const TStr &ArgNm) const
TStr GetArg (const TStr &ArgNm, const TStr &DfArgVal) const
bool IsUrlTok (TStr &RelUrlStr) const
bool IsRedirUrlTok () const
void SaveTxt (const PSOut &SOut, const bool &TxtMode=true)
static bool IsBreakTag (const TStr &TagNm)
static bool IsBreakTok (const PHtmlTok &Tok)
static bool IsHTag (const TStr &TagNm, int &HTagN)
static PHtmlTok GetHTok (const bool &IsBTag, const int &HTagN)
 THtmlDoc ()
 THtmlDoc (const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true)
static PHtmlDoc New (const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true)
 THtmlDoc (TSIn &)
THtmlDocoperator= (const THtmlDoc &)
int GetToks () const
PHtmlTok GetTok (const int &TokN) const
PHtmlTok GetTok (const int &TokN, THtmlLxSym &Sym, TStr &Str) const
void AddTokV (const THtmlTokV &_TokV)
static TStr GetTxtLnDoc (const TStr &HtmlStr)
static TStr GetTxtLnDoc (const TStr &HtmlStr, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutTagsP)
static PHtmlDoc LoadTxt (const TStr &FNm, const THtmlDocType &Type=hdtAll, const bool &DoUc=true)
static void SaveHtmlToTxt (const TStr &HtmlStr, const PSOut &TxtSOut, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutToksP)
static void SaveHtmlToTxt (const TStr &HtmlStr, const TStr &TxtFNm, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutToksP)
static void SaveHtmlToXml (const TStr &HtmlStr, const PSOut &XmlSOut, const TStr &BaseUrlStr, const bool &OutTextP, const bool &OutUrlP, const bool &OutToksP, const bool &OutTagsP, const bool &OutArgsP)
static void SaveHtmlToXml (const TStr &HtmlStr, const TStr &XmlFNm, const TStr &BaseUrlStr, const bool &OutTextP, const bool &OutUrlP, const bool &OutToksP, const bool &OutTagsP, const bool &OutArgsP)
static TLxSym GetLxSym (const THtmlLxSym &HtmlLxSym, const TChA &ChA)
static bool _IsTagRedir (const TStr &TagStr, const TStr &ArgNm, THtmlLx &Lx, const TStr &BaseUrlStr, const TStr &RedirUrlStr)
static TStr GetRedirHtmlDocStr (const TStr &HtmlStr, const TStr &BaseUrlStr, const TStr &RedirUrlStr)
 THtmlHldV (const PHtmlDoc &_RefHtmlDoc, const int &HldWnLen=10)
 THtmlHldV (TSIn &)
THtmlHldVoperator= (const THtmlHldV &)
PHtmlDoc GetRefHtmlDoc ()
int GetHlds ()
PHtmlDoc GetHld (const int &HldN)
 TWebPg ()
 TWebPg (const TStrV &_UrlStrV, const TStrV &_IpNumV, const PHttpResp &_HttpResp)
static PWebPg New (const TStrV &UrlStrV, const TStrV &IpNumV, const PHttpResp &HttpResp)
static PWebPg New (const TStrV &UrlStrV, const PHttpResp &HttpResp)
static PWebPg New (const TStr &UrlStr, const PHttpResp &HttpResp)
 ~TWebPg ()
 TWebPg (TSIn &)
TWebPgoperator= (const TWebPg &)
int GetUrls () const
TStr GetUrlStr (const int &UrlN=-1) const
PUrl GetUrl (const int &UrlN=-1) const
int GetIps () const
TStr GetIpNum (const int &IpN=-1) const
PHttpResp GetHttpResp () const
TStr GetHttpHdStr () const
TStr GetHttpBodyAsStr () const
void GetOutUrlV (TUrlV &OutUrlV, TUrlV &OutRedirUrlV) const
void GetOutUrlV (TUrlV &OutUrlV) const
void GetOutDescUrlStrKdV (TStrKdV &OutDescUrlStrKdV) const
void PutFetchMSecs (const uint64 &_FetchMSecs)
uint64 GetFetchMSecs () const
void SaveAsHttpBody (const TStr &FNm) const
void SaveAsHttp (const TStr &FNm) const
bool IsTxt () const

Variables

ClassTP(THtmlLxChDef,
PHtmlLxChDef) private TChV 
UcChV
TChV LcChV
TStrStrH EscStrH
static PHtmlLxChDef ChDef
ClassTPV(THtmlTok, PHtmlTok,
THtmlTokV) private TStr 
Str
THtmlLx::TArgNmValV ArgNmValV
static const TStr ATagNm = "<A>"
static const TStr AreaTagNm = "<AREA>"
static const TStr BrTagNm = "<BR>"
static const TStr CardTagNm = "<CARD>"
static const TStr CenterTagNm = "<CENTER>"
static const TStr FrameTagNm = "<FRAME>"
static const TStr H1TagNm = "<H1>"
static const TStr H2TagNm = "<H2>"
static const TStr H3TagNm = "<H3>"
static const TStr H4TagNm = "<H4>"
static const TStr H5TagNm = "<H5>"
static const TStr H6TagNm = "<H6>"
static const TStr ImgTagNm = "<IMG>"
static const TStr LiTagNm = "<LI>"
static const TStr MetaTagNm = "<META>"
static const TStr PTagNm = "<P>"
static const TStr UlTagNm = "<UL>"
static const TStr TitleTagNm = "<TITLE>"
static const TStr TitleETagNm = "</TITLE>"
static const TStr AltArgNm = "ALT"
static const TStr HRefArgNm = "HREF"
static const TStr SrcArgNm = "SRC"
static const TStr TitleArgNm = "TITLE"
static const TStr HttpEquivArgNm = "HTTP-EQUIV"
ClassTP(THtmlHldV, PHtmlHldV)
private THtmlDocV 
HldV
ClassTPV(TWebPg, PWebPg,
TWebPgV) private TStrV 
IpNumV
PHttpResp HttpResp
uint64 FetchMSecs

Enumeration Type Documentation

Enumerator:
hdtAll 
hdtStr 
hdtStrNum 
hdtTag 
hdtA 
hdtHRef 
hdtUL 
Enumerator:
hlctSpace 
hlctAlpha 
hlctNum 
hlctSym 
hlctLTag 
hlctRTag 
hlctEof 
enum THtmlLxSym
Enumerator:
hsyUndef 
hsyStr 
hsyNum 
hsySSym 
hsyUrl 
hsyBTag 
hsyETag 
hsyMTag 
hsyEof 

Function Documentation

bool THtmlDoc::_IsTagRedir ( const TStr TagStr,
const TStr ArgNm,
THtmlLx Lx,
const TStr BaseUrlStr,
const TStr RedirUrlStr 
) [static]
void AddTokV ( const THtmlTokV &  _TokV)
ClassHdTP ( THtmlTok  ,
PHtmlTok   
)
TStr GetArg ( const TStr ArgNm) const
TStr GetArg ( const TStr ArgNm,
const TStr DfArgVal 
) const
static PHtmlLxChDef GetChDef ( ) [static]
static THtmlLxChDef& GetChDefRef ( ) [static]
int GetChTy ( const char &  Ch) const
TStr THtmlLxChDef::GetCSZFromWin1250 ( const TChA ChA) [static]
TStr THtmlLxChDef::GetCSZFromYuascii ( const TChA ChA) [static]
TStr THtmlLxChDef::GetEscStr ( const TStr Str) const
uint64 GetFetchMSecs ( ) const
PHtmlDoc GetHld ( const int &  HldN)
int GetHlds ( )
PHtmlTok THtmlTok::GetHTok ( const bool &  IsBTag,
const int &  HTagN 
) [static]
TStr GetHttpHdStr ( ) const
PHttpResp GetHttpResp ( ) const
TStr GetIpNum ( const int &  IpN = -1) const
int GetIps ( ) const
TStr THtmlLxChDef::GetIsoCeFromYuascii ( const TChA ChA) [static]
char GetLc ( const char &  Ch) const
void GetLcChA ( TChA ChA) const
TStr GetLcStr ( const TStr Str) const
TLxSym THtmlDoc::GetLxSym ( const THtmlLxSym HtmlLxSym,
const TChA ChA 
) [static]
void TWebPg::GetOutDescUrlStrKdV ( TStrKdV OutDescUrlStrKdV) const
void TWebPg::GetOutUrlV ( TUrlV &  OutUrlV,
TUrlV &  OutRedirUrlV 
) const
void GetOutUrlV ( TUrlV &  OutUrlV) const
TStr THtmlDoc::GetRedirHtmlDocStr ( const TStr HtmlStr,
const TStr BaseUrlStr,
const TStr RedirUrlStr 
) [static]
PHtmlDoc GetRefHtmlDoc ( )
TStr GetStr ( ) const
THtmlLxSym GetSym ( ) const
PHtmlTok GetTok ( const int &  TokN) const
PHtmlTok GetTok ( const int &  TokN,
THtmlLxSym Sym,
TStr Str 
) const
int GetToks ( ) const
static TStr GetTxtLnDoc ( const TStr HtmlStr) [static]
static TStr GetTxtLnDoc ( const TStr HtmlStr,
const TStr BaseUrlStr,
const bool &  OutUrlP,
const bool &  OutTagsP 
) [static]
char GetUc ( const char &  Ch) const
void GetUcChA ( TChA ChA) const
TStr TLxChDef::GetUcStr ( const TStr Str) const
PUrl GetUrl ( const int &  UrlN = -1) const
int GetUrls ( ) const
TStr GetUrlStr ( const int &  UrlN = -1) const
TStr THtmlLxChDef::GetWin1250FromYuascii ( const TChA ChA) [static]
bool IsAlNum ( const char &  Ch) const
bool IsAlpha ( const char &  Ch) const
bool IsArg ( const TStr ArgNm) const
bool THtmlTok::IsBreakTag ( const TStr TagNm) [static]
bool THtmlTok::IsBreakTok ( const PHtmlTok &  Tok) [static]
bool IsEoln ( const char &  Ch) const
bool THtmlTok::IsHTag ( const TStr TagNm,
int &  HTagN 
) [static]
bool IsLc ( const char &  Ch) const
bool IsNum ( const char &  Ch) const
bool THtmlTok::IsRedirUrlTok ( ) const
bool IsSpace ( const char &  Ch) const
bool IsSym ( const char &  Ch) const
bool TWebPg::IsTxt ( ) const
bool IsUc ( const char &  Ch) const
bool IsUrl ( const char &  Ch) const
bool THtmlTok::IsUrlTok ( TStr RelUrlStr) const
bool IsWs ( const char &  Ch) const
static PWebPg Load ( TSIn SIn) [static]
static PHtmlDoc LoadTxt ( const TStr FNm,
const THtmlDocType Type = hdtAll,
const bool &  DoUc = true 
) [static]
static PHtmlDoc New ( const PSIn SIn,
const THtmlDocType Type = hdtAll,
const bool &  DoUc = true 
) [static]
static PWebPg New ( const TStrV UrlStrV,
const TStrV IpNumV,
const PHttpResp &  HttpResp 
) [static]
static PWebPg New ( const TStrV UrlStrV,
const PHttpResp &  HttpResp 
) [static]
static PWebPg New ( const TStr UrlStr,
const PHttpResp &  HttpResp 
) [static]
THtmlLxChDef& operator= ( const THtmlLxChDef )
THtmlTok& operator= ( const THtmlTok )
THtmlDoc& operator= ( const THtmlDoc )
THtmlHldV& operator= ( const THtmlHldV )
TWebPg& operator= ( const TWebPg )
void PutFetchMSecs ( const uint64 _FetchMSecs)
void Save ( TSOut SOut)
void TWebPg::SaveAsHttp ( const TStr FNm) const
void TWebPg::SaveAsHttpBody ( const TStr FNm) const
static void SaveHtmlToTxt ( const TStr HtmlStr,
const PSOut TxtSOut,
const TStr BaseUrlStr,
const bool &  OutUrlP,
const bool &  OutToksP 
) [static]
static void SaveHtmlToTxt ( const TStr HtmlStr,
const TStr TxtFNm,
const TStr BaseUrlStr,
const bool &  OutUrlP,
const bool &  OutToksP 
) [static]
static void SaveHtmlToXml ( const TStr HtmlStr,
const PSOut XmlSOut,
const TStr BaseUrlStr,
const bool &  OutTextP,
const bool &  OutUrlP,
const bool &  OutToksP,
const bool &  OutTagsP,
const bool &  OutArgsP 
) [static]
static void SaveHtmlToXml ( const TStr HtmlStr,
const TStr XmlFNm,
const TStr BaseUrlStr,
const bool &  OutTextP,
const bool &  OutUrlP,
const bool &  OutToksP,
const bool &  OutTagsP,
const bool &  OutArgsP 
) [static]
void SaveTxt ( const PSOut SOut,
const bool &  TxtMode = true 
)
void SetChTy ( const THtmlLxChTy ChTy,
const TStr Str 
)
void THtmlLxChDef::SetEscStr ( const TStr SrcStr,
const TStr DstStr 
)
void THtmlLxChDef::SetUcCh ( const char &  UcCh,
const char &  LcCh 
)
void TLxChDef::SetUcCh ( const TStr Str)
THtmlDoc ( )
THtmlDoc::THtmlDoc ( const PSIn SIn,
const THtmlDocType Type = hdtAll,
const bool &  DoUc = true 
)
THtmlDoc ( TSIn )
THtmlHldV::THtmlHldV ( const PHtmlDoc &  _RefHtmlDoc,
const int &  HldWnLen = 10 
)
THtmlHldV ( TSIn )
THtmlLxChDef ( TSIn SIn)
THtmlTok ( )
THtmlTok ( const THtmlLxSym _Sym)
THtmlTok ( const THtmlLxSym _Sym,
const TStr _Str 
)
THtmlTok ( const THtmlLxSym _Sym,
const TStr _Str,
const THtmlLx::TArgNmValV _ArgNmValV 
)
THtmlTok ( TSIn )
TWebPg ( )
TWebPg ( const TStrV _UrlStrV,
const TStrV _IpNumV,
const PHttpResp &  _HttpResp 
)
TWebPg ( TSIn )
~TWebPg ( )

Variable Documentation

const TStr THtmlTok::AltArgNm = "ALT" [static]
const TStr THtmlTok::AreaTagNm = "<AREA>" [static]
const TStr THtmlTok::ATagNm = "<A>" [static]
const TStr THtmlTok::BrTagNm = "<BR>" [static]
const TStr THtmlTok::CardTagNm = "<CARD>" [static]
const TStr THtmlTok::CenterTagNm = "<CENTER>" [static]
PHtmlLxChDef ChDef [static]
const TStr THtmlTok::FrameTagNm = "<FRAME>" [static]
const TStr THtmlTok::H1TagNm = "<H1>" [static]
const TStr THtmlTok::H2TagNm = "<H2>" [static]
const TStr THtmlTok::H3TagNm = "<H3>" [static]
const TStr THtmlTok::H4TagNm = "<H4>" [static]
const TStr THtmlTok::H5TagNm = "<H5>" [static]
const TStr THtmlTok::H6TagNm = "<H6>" [static]
ClassTP (THtmlHldV, PHtmlHldV) private THtmlDocV HldV
const TStr THtmlTok::HRefArgNm = "HREF" [static]
const TStr THtmlTok::HttpEquivArgNm = "HTTP-EQUIV" [static]
PHttpResp HttpResp
const TStr THtmlTok::ImgTagNm = "<IMG>" [static]
ClassTPV (TWebPg, PWebPg, TWebPgV) private TStrV IpNumV
const TStr THtmlTok::LiTagNm = "<LI>" [static]
const TStr THtmlTok::MetaTagNm = "<META>" [static]
const TStr THtmlTok::PTagNm = "<P>" [static]
const TStr THtmlTok::SrcArgNm = "SRC" [static]
ClassTPV (THtmlTok, PHtmlTok, THtmlTokV) private TStr Str
const TStr THtmlTok::TitleArgNm = "TITLE" [static]
const TStr THtmlTok::TitleETagNm = "</TITLE>" [static]
const TStr THtmlTok::TitleTagNm = "<TITLE>" [static]
ClassTP (THtmlLxChDef, PHtmlLxChDef) private TChV UcChV
const TStr THtmlTok::UlTagNm = "<UL>" [static]