SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
html.h
Go to the documentation of this file.
1 #include "bd.h"
2 
4 // Forward
7 
9 // Html-Lexical-Chars
10 typedef enum {
13 
15 private:
16  TIntV ChTyV;
17  TChV UcChV;
18  TChV LcChV;
19  TStrStrH EscStrH;
20  void SetUcCh(const char& UcCh, const char& LcCh);
21  void SetUcCh(const TStr& Str);
22  void SetChTy(const THtmlLxChTy& ChTy, const TStr& Str);
23  void SetEscStr(const TStr& SrcStr, const TStr& DstStr);
24 public:
25  THtmlLxChDef();
26  THtmlLxChDef(TSIn& SIn): ChTyV(SIn), UcChV(SIn), LcChV(SIn), EscStrH(SIn){}
27  static PHtmlLxChDef Load(TSIn& SIn){return new THtmlLxChDef(SIn);}
28  void Save(TSOut& SOut){
29  ChTyV.Save(SOut); UcChV.Save(SOut); LcChV.Save(SOut); EscStrH.Save(SOut);}
30 
31  THtmlLxChDef& operator=(const THtmlLxChDef&){Fail; return *this;}
32 
33  // character type operations
34  int GetChTy(const char& Ch) const {return ChTyV[Ch-TCh::Mn];}
35  bool IsEoln(const char& Ch) const {return (Ch==TCh::CrCh)||(Ch==TCh::LfCh);}
36  bool IsWs(const char& Ch) const {
37  return (Ch==' ')||(Ch==TCh::TabCh)||(Ch==TCh::CrCh)||(Ch==TCh::LfCh);}
38  bool IsSpace(const char& Ch) const {return int(ChTyV[Ch-TCh::Mn])==hlctSpace;}
39  bool IsAlpha(const char& Ch) const {return int(ChTyV[Ch-TCh::Mn])==hlctAlpha;}
40  bool IsNum(const char& Ch) const {return int(ChTyV[Ch-TCh::Mn])==hlctNum;}
41  bool IsAlNum(const char& Ch) const {
42  return (int(ChTyV[Ch-TCh::Mn])==hlctAlpha)||(int(ChTyV[Ch-TCh::Mn])==hlctNum);}
43  bool IsSym(const char& Ch) const {return int(ChTyV[Ch-TCh::Mn])==hlctSym;}
44  bool IsUrl(const char& Ch) const {
45  int ChTy=ChTyV[Ch-TCh::Mn];
46  return (ChTy==hlctAlpha)||(ChTy==hlctNum)||
47  (Ch=='.')||(Ch=='-')||(Ch==':')||(Ch=='/')||(Ch=='~');}
48 
49  // upper/lower-case & escape-string operations
50  bool IsUc(const char& Ch) const {return Ch==UcChV[Ch-TCh::Mn];}
51  bool IsLc(const char& Ch) const {return Ch==LcChV[Ch-TCh::Mn];}
52  char GetUc(const char& Ch) const {return UcChV[Ch-TCh::Mn];}
53  char GetLc(const char& Ch) const {return LcChV[Ch-TCh::Mn];}
54  void GetUcChA(TChA& ChA) const {
55  for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetUc(ChA[ChN]));}}
56  void GetLcChA(TChA& ChA) const {
57  for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetLc(ChA[ChN]));}}
58  TStr GetUcStr(const TStr& Str) const {
59  TChA ChA(Str); GetUcChA(ChA); return ChA;}
60  TStr GetLcStr(const TStr& Str) const {
61  TChA ChA(Str); GetLcChA(ChA); return ChA;}
62  TStr GetEscStr(const TStr& Str) const;
63 
64  // standard entry points
66  static PHtmlLxChDef GetChDef(){IAssert(!ChDef.Empty()); return ChDef;}
67  static THtmlLxChDef& GetChDefRef(){IAssert(!ChDef.Empty()); return *ChDef;}
68 
69  // character-set transformations
70  static TStr GetCSZFromYuascii(const TChA& ChA);
71  static TStr GetCSZFromWin1250(const TChA& ChA);
72  static TStr GetWin1250FromYuascii(const TChA& ChA);
73  static TStr GetIsoCeFromYuascii(const TChA& ChA);
74 };
75 
77 // Html-Lexical
78 typedef enum {
81 
82 class THtmlLx{
83 private:
87  bool DoParseArg;
89  char Ch;
90  int ChX;
91  bool EscCh;
95  void GetCh(){
96  if (ChStack.Empty()){
97  if (RSIn.Eof()){Ch=TCh::EofCh;} else {Ch=RSIn.GetCh(); ChX++;}
98  } else {
99  Ch=ChStack.Pop(); ChX++;
100  }
101  SymChA+=Ch;
102  }
103  void GetEscCh();
104  void GetMetaTag();
105  void GetTag();
106 public:
116 public:
117  THtmlLx(const PSIn& _SIn, const bool& _DoParseArg=true):
118  SIn(_SIn), RSIn(*SIn), DoParseArg(_DoParseArg),
119  ChStack(), Ch(' '), ChX(0), EscCh(false),
120  EscChA(), ArgNm(), ArgVal(),
121  Sym(hsyUndef), SymBChX(0), SymEChX(0), ChA(), UcChA(),
122  PreSpaces(0), PreSpaceChA(), ArgNmValV(){}
123 
124  THtmlLx& operator=(const THtmlLx&){Fail; return *this;}
125 
126  void PutCh(const char& _Ch){
127  ChStack.Push(Ch); if (!SymChA.Empty()){SymChA.Pop();} Ch=_Ch; ChX--;}
128  void PutStr(const TStr& Str){
129  for (int ChN=Str.Len()-1; ChN>=0; ChN--){PutCh(Str[ChN]);}}
130  THtmlLxSym GetSym();
131  PHtmlTok GetTok(const bool& DoUc=true);
133  return TStr::GetSpaceStr(PreSpaces);}
134 
135  int GetArgs() const {return ArgNmValV.Len();}
136  TStr GetArgNm(const int& ArgN) const {return ArgNmValV[ArgN].Key;}
137  TStr GetArgVal(const int& ArgN) const {return ArgNmValV[ArgN].Dat;}
138  bool IsArg(const TStr& ArgNm) const {return ArgNmValV.IsIn(TStrKd(ArgNm));}
139  TStr GetArg(const TStr& ArgNm, const TStr& DfArgVal=TStr()) const {
140  int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
141  if (ArgN==-1){return DfArgVal;} else {return ArgNmValV[ArgN].Dat;}}
142  void PutArg(const TStr& ArgNm, const TStr& ArgVal){
143  int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
144  if (ArgN==-1){ArgNmValV.Add(TStrKd(ArgNm, ArgVal));}
145  else {ArgNmValV[ArgN]=TStrKd(ArgNm, ArgVal);}}
146  TStr GetFullBTagStr() const;
147 
148  void MoveToStrOrEof(const TStr& Str);
149  void MoveToBTagOrEof(const TStr& TagNm);
150  void MoveToBTag2OrEof(const TStr& TagNm1, const TStr& TagNm2);
151  void MoveToBTag3OrEof(const TStr& TagNm1, const TStr& TagNm2, const TStr& TagNm3);
152  void MoveToBTagOrETagOrEof(const TStr& BTagNm, const TStr& ETagNm);
153  void MoveToBTagArgOrEof(
154  const TStr& TagNm, const TStr& ArgNm, const TStr& ArgVal);
155  void MoveToBTagArg2OrEof(const TStr& TagNm,
156  const TStr& ArgNm1, const TStr& ArgVal1,
157  const TStr& ArgNm2, const TStr& ArgVal2, const bool& AndOpP=true);
158  void MoveToBTagOrEof(
159  const TStr& TagNm1, const TStr& ArgNm1, const TStr& ArgVal1,
160  const TStr& TagNm2, const TStr& ArgNm2, const TStr& ArgVal2);
161  void MoveToETagOrEof(const TStr& TagNm);
163  TStr GetStrToBTag(const TStr& TagNm, const bool& TxtOnlyP=false);
164  TStr GetStrToBTag(const TStr& TagNm, const TStr& ArgNm,
165  const TStr& ArgVal, const bool& TxtOnlyP=false);
166  TStr GetStrToETag(const TStr& TagNm, const bool& TxtOnlyP=false);
167  TStr GetStrToETag2(const TStr& TagNm1, const TStr& TagNm2, const bool& TxtOnlyP=false);
168  TStr GetStrInTag(const TStr& TagNm, const bool& TxtOnlyP=false);
169  TStr GetHRefBeforeStr(const TStr& Str);
170  bool IsGetBTag(const TStr& TagNm);
171  bool IsGetETag(const TStr& TagNm);
172 
173  static TStr GetSymStr(const THtmlLxSym& Sym);
174  static TStr GetEscapedStr(const TChA& ChA);
175  static TStr GetAsciiStr(const TChA& ChA, const char& GenericCh='_');
176  static void GetTokStrV(const TStr& Str, TStrV& TokStrV);
177  static TStr GetNoTag(const TStr& Str);
178 };
179 
181 // Html-Token
183 private:
185  TStr Str;
186  THtmlLx::TArgNmValV ArgNmValV;
187 public:
188  THtmlTok(): Sym(hsyUndef), Str(), ArgNmValV(){}
189  THtmlTok(const THtmlLxSym& _Sym):
190  Sym(_Sym), Str(), ArgNmValV(){}
191  THtmlTok(const THtmlLxSym& _Sym, const TStr& _Str):
192  Sym(_Sym), Str(_Str), ArgNmValV(){}
193  THtmlTok(const THtmlLxSym& _Sym, const TStr& _Str,
194  const THtmlLx::TArgNmValV& _ArgNmValV):
195  Sym(_Sym), Str(_Str), ArgNmValV(_ArgNmValV){}
197  static PHtmlTok Load(TSIn&){Fail; return NULL;}
198  void Save(TSOut&){Fail;}
199 
200  THtmlTok& operator=(const THtmlTok&){Fail; return *this;}
201 
202  THtmlLxSym GetSym() const {return Sym;}
203  TStr GetStr() const {return Str;}
204  TStr GetFullStr() const;
205  bool IsArg(const TStr& ArgNm) const {
206  return ArgNmValV.SearchForw(TStrKd(ArgNm))!=-1;}
207  TStr GetArg(const TStr& ArgNm) const {
208  return ArgNmValV[ArgNmValV.SearchForw(TStrKd(ArgNm))].Dat;}
209  TStr GetArg(const TStr& ArgNm, const TStr& DfArgVal) const {
210  int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
211  if (ArgN==-1){return DfArgVal;} else {return ArgNmValV[ArgN].Dat;}}
212  bool IsUrlTok(TStr& RelUrlStr) const;
213  bool IsRedirUrlTok() const;
214 
215  void SaveTxt(const PSOut& SOut, const bool& TxtMode=true);
216 
217  static const TStr ATagNm;
218  static const TStr AreaTagNm;
219  static const TStr BrTagNm;
220  static const TStr CardTagNm;
221  static const TStr CenterTagNm;
222  static const TStr FrameTagNm;
223  static const TStr H1TagNm;
224  static const TStr H2TagNm;
225  static const TStr H3TagNm;
226  static const TStr H4TagNm;
227  static const TStr H5TagNm;
228  static const TStr H6TagNm;
229  static const TStr ImgTagNm;
230  static const TStr LiTagNm;
231  static const TStr MetaTagNm;
232  static const TStr PTagNm;
233  static const TStr UlTagNm;
234  static const TStr TitleTagNm;
235  static const TStr TitleETagNm;
236 
237  static const TStr AltArgNm;
238  static const TStr HRefArgNm;
239  static const TStr SrcArgNm;
240  static const TStr TitleArgNm;
241  static const TStr HttpEquivArgNm;
242 
243  static bool IsBreakTag(const TStr& TagNm);
244  static bool IsBreakTok(const PHtmlTok& Tok);
245  static bool IsHTag(const TStr& TagNm, int& HTagN);
246  static PHtmlTok GetHTok(const bool& IsBTag, const int& HTagN);
247 };
248 
250 // Html-Document
251 typedef enum {
253 
255 private:
256  THtmlTokV TokV;
257 public:
258  THtmlDoc(): TokV(){}
259  THtmlDoc(
260  const PSIn& SIn, const THtmlDocType& Type=hdtAll, const bool& DoUc=true);
261  static PHtmlDoc New(
262  const PSIn& SIn, const THtmlDocType& Type=hdtAll, const bool& DoUc=true){
263  return PHtmlDoc(new THtmlDoc(SIn, Type, DoUc));}
265  static PHtmlDoc Load(TSIn&){Fail; return NULL;}
266  void Save(TSOut&){Fail;}
267 
268  THtmlDoc& operator=(const THtmlDoc&){Fail; return *this;}
269 
270  int GetToks() const {return TokV.Len();}
271  PHtmlTok GetTok(const int& TokN) const {return TokV[TokN];}
272  PHtmlTok GetTok(const int& TokN, THtmlLxSym& Sym, TStr& Str) const {
273  Sym=TokV[TokN]->GetSym(); Str=TokV[TokN]->GetStr(); return TokV[TokN];}
274  void AddTokV(const THtmlTokV& _TokV){TokV.AddV(_TokV);}
275 
276  static TStr GetTxtLnDoc(const TStr& HtmlStr);
277  static TStr GetTxtLnDoc(const TStr& HtmlStr, const TStr& BaseUrlStr,
278  const bool& OutUrlP, const bool& OutTagsP);
279 
281  const TStr& FNm, const THtmlDocType& Type=hdtAll, const bool& DoUc=true){
282  PSIn SIn=TFIn::New(FNm); return PHtmlDoc(new THtmlDoc(SIn, Type, DoUc));}
283  void SaveTxt(const PSOut& SOut, const bool& TxtMode=true) const;
284 
285  static void SaveHtmlToTxt(
286  const TStr& HtmlStr, const PSOut& TxtSOut, const TStr& BaseUrlStr,
287  const bool& OutUrlP, const bool& OutToksP);
288  static void SaveHtmlToTxt(
289  const TStr& HtmlStr, const TStr& TxtFNm, const TStr& BaseUrlStr,
290  const bool& OutUrlP, const bool& OutToksP);
291  static void SaveHtmlToXml(
292  const TStr& HtmlStr, const PSOut& XmlSOut, const TStr& BaseUrlStr,
293  const bool& OutTextP, const bool& OutUrlP, const bool& OutToksP,
294  const bool& OutTagsP, const bool& OutArgsP);
295  static void SaveHtmlToXml(
296  const TStr& HtmlStr, const TStr& XmlFNm, const TStr& BaseUrlStr,
297  const bool& OutTextP, const bool& OutUrlP, const bool& OutToksP,
298  const bool& OutTagsP, const bool& OutArgsP);
299 
300  static TLxSym GetLxSym(const THtmlLxSym& HtmlLxSym, const TChA& ChA);
301 
302  static bool _IsTagRedir(
303  const TStr& TagStr, const TStr& ArgNm, THtmlLx& Lx,
304  const TStr& BaseUrlStr, const TStr& RedirUrlStr);
305  static TStr GetRedirHtmlDocStr(const TStr& HtmlStr,
306  const TStr& BaseUrlStr, const TStr& RedirUrlStr);
307 };
308 
310 // Html-Hyper-Link-Document-Vector
312 private:
313  PHtmlDoc RefHtmlDoc;
314  THtmlDocV HldV;
315 public:
316  THtmlHldV(const PHtmlDoc& _RefHtmlDoc, const int& HldWnLen=10);
318  static PHtmlHldV Load(TSIn&){Fail; return NULL;}
319  void Save(TSOut&){Fail;}
320 
321  THtmlHldV& operator=(const THtmlHldV&){Fail; return *this;}
322 
323  PHtmlDoc GetRefHtmlDoc(){return RefHtmlDoc;}
324  int GetHlds(){return HldV.Len();}
325  PHtmlDoc GetHld(const int& HldN){return HldV[HldN];}
326 };
327 
329 // Web-Page
331 private:
332  TStrV UrlStrV;
333  TStrV IpNumV;
334  PHttpResp HttpResp;
335  uint64 FetchMSecs;
336 public:
337  TWebPg(): UrlStrV(), IpNumV(), HttpResp(){}
338  TWebPg(const TStrV& _UrlStrV, const TStrV& _IpNumV, const PHttpResp& _HttpResp):
339  UrlStrV(_UrlStrV), IpNumV(_IpNumV), HttpResp(_HttpResp){}
340  static PWebPg New(const TStrV& UrlStrV, const TStrV& IpNumV, const PHttpResp& HttpResp){
341  return new TWebPg(UrlStrV, IpNumV, HttpResp);}
342  static PWebPg New(const TStrV& UrlStrV, const PHttpResp& HttpResp){
343  return new TWebPg(UrlStrV, TStrV(), HttpResp);}
344  static PWebPg New(const TStr& UrlStr, const PHttpResp& HttpResp){
345  TStrV UrlStrV; UrlStrV.Add(UrlStr);
346  return new TWebPg(UrlStrV, TStrV(), HttpResp);}
349  static PWebPg Load(TSIn&){Fail; return NULL;}
350  void Save(TSOut&){Fail;}
351 
352  TWebPg& operator=(const TWebPg&){Fail; return *this;}
353 
354  int GetUrls() const {return UrlStrV.Len();}
355  TStr GetUrlStr(const int& UrlN=-1) const {
356  if (UrlN==-1){return UrlStrV.Last();} else {return UrlStrV[UrlN];}}
357  PUrl GetUrl(const int& UrlN=-1) const {
358  TStr UrlStr;
359  if (UrlN==-1){UrlStr=UrlStrV.Last();} else {UrlStr=UrlStrV[UrlN];}
360  return TUrl::New(UrlStr);}
361 
362  int GetIps() const {return IpNumV.Len();}
363  TStr GetIpNum(const int& IpN=-1) const {
364  if (IpN==-1){return IpNumV.Last();} else {return IpNumV[IpN];}}
365 
366  PHttpResp GetHttpResp() const {return HttpResp;}
367  TStr GetHttpHdStr() const {return GetHttpResp()->GetHdStr();}
368  TStr GetHttpBodyAsStr() const {return GetHttpResp()->GetBodyAsStr();}
369  //void GetOutUrlStrV(TStrV& OutUrlStrV) const;
370  void GetOutUrlV(TUrlV& OutUrlV, TUrlV& OutRedirUrlV) const;
371  void GetOutUrlV(TUrlV& OutUrlV) const {
372  TUrlV OutRedirUrlV; GetOutUrlV(OutUrlV, OutRedirUrlV);}
373  void GetOutDescUrlStrKdV(TStrKdV& OutDescUrlStrKdV) const;
374 
375  // fetch time
376  void PutFetchMSecs(const uint64& _FetchMSecs){FetchMSecs=_FetchMSecs;}
377  uint64 GetFetchMSecs() const {return FetchMSecs;}
378 
379  void SaveAsHttpBody(const TStr& FNm) const;
380  void SaveAsHttp(const TStr& FNm) const;
381 
382  bool IsTxt() const;
383 };
Definition: html.h:252
THtmlDocType
Definition: html.h:251
#define IAssert(Cond)
Definition: bd.h:262
static PWebPg Load(TSIn &)
Definition: html.h:349
static const TStr H5TagNm
Definition: html.h:227
THtmlLxSym
Definition: html.h:78
bool IsGetETag(const TStr &TagNm)
Definition: html.cpp:547
THtmlDoc(TSIn &)
Definition: html.h:264
static const TStr H4TagNm
Definition: html.h:226
TChA ArgNm
Definition: html.h:93
TStr GetHRefBeforeStr(const TStr &Str)
Definition: html.cpp:530
static TStr GetSpaceStr(const int &Spaces)
Definition: dt.cpp:1608
TStr GetHttpHdStr() const
Definition: html.h:367
static const TStr FrameTagNm
Definition: html.h:222
static THtmlLxChDef ChDef
Definition: html.h:84
static const TStr H3TagNm
Definition: html.h:225
bool IsWs(const char &Ch) const
Definition: html.h:36
int Len() const
Definition: dt.h:487
static const TStr H1TagNm
Definition: html.h:223
#define ClassHdTP(TNm, PNm)
Definition: bd.h:135
static const TStr TitleArgNm
Definition: html.h:240
void GetLcChA(TChA &ChA) const
Definition: html.h:56
TSIn & RSIn
Definition: html.h:86
Definition: html.h:252
PUrl GetUrl(const int &UrlN=-1) const
Definition: html.h:357
static const TStr LiTagNm
Definition: html.h:230
TChA EscChA
Definition: html.h:92
PHtmlDoc GetRefHtmlDoc()
Definition: html.h:323
Definition: html.h:252
Definition: html.h:79
THtmlTok(const THtmlLxSym &_Sym, const TStr &_Str)
Definition: html.h:191
void MoveToETagOrEof(const TStr &TagNm)
Definition: html.cpp:441
static const TStr HRefArgNm
Definition: html.h:238
TStr GetFullBTagStr() const
Definition: html.cpp:358
TStr GetUrlStr(const int &UrlN=-1) const
Definition: html.h:355
int SymBChX
Definition: html.h:108
void PutStr(const TStr &Str)
Definition: html.h:128
bool IsIn(const TVal &Val) const
Checks whether element Val is a member of the vector.
Definition: ds.h:797
bool IsNum(const char &Ch) const
Definition: html.h:40
Definition: html.h:182
void MoveToBTagArgOrEof(const TStr &TagNm, const TStr &ArgNm, const TStr &ArgVal)
Definition: html.cpp:400
bool IsSym(const char &Ch) const
Definition: html.h:43
bool Empty() const
Definition: dt.h:260
#define Fail
Definition: bd.h:238
static void GetTokStrV(const TStr &Str, TStrV &TokStrV)
Definition: html.cpp:595
TStrKdV TArgNmValV
Definition: html.h:114
bool Empty() const
Definition: bd.h:501
void PutCh(const int &ChN, const char &Ch)
Definition: dt.h:278
Definition: html.h:79
TStr GetArg(const TStr &ArgNm, const TStr &DfArgVal) const
Definition: html.h:209
void GetUcChA(TChA &ChA) const
Definition: html.h:54
TLxSym
Definition: lx.h:44
bool IsEoln(const char &Ch) const
Definition: html.h:35
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
void PutCh(const char &_Ch)
Definition: html.h:126
bool DoParseArg
Definition: html.h:87
THtmlLxSym GetSym()
Definition: html.cpp:277
static THtmlLxChDef & GetChDefRef()
Definition: html.h:67
THtmlLx & operator=(const THtmlLx &)
Definition: html.h:124
void Save(TSOut &)
Definition: html.h:266
TStr GetArg(const TStr &ArgNm) const
Definition: html.h:207
void Save(TSOut &)
Definition: html.h:350
PHtmlTok GetTok(const int &TokN, THtmlLxSym &Sym, TStr &Str) const
Definition: html.h:272
static const TStr MetaTagNm
Definition: html.h:231
Definition: html.h:12
static PUrl New(const TStr &RelUrlStr, const TStr &BaseUrlStr=TStr())
Definition: url.h:25
int Len() const
Definition: dt.h:259
void MoveToBTagArg2OrEof(const TStr &TagNm, const TStr &ArgNm1, const TStr &ArgVal1, const TStr &ArgNm2, const TStr &ArgVal2, const bool &AndOpP=true)
Definition: html.cpp:410
static TStr GetEscapedStr(const TChA &ChA)
Definition: html.cpp:568
void Save(TSOut &)
Definition: html.h:319
int GetHlds()
Definition: html.h:324
bool IsUrl(const char &Ch) const
Definition: html.h:44
TStr GetStr() const
Definition: html.h:203
static PHtmlLxChDef ChDef
Definition: html.h:65
static const TStr TitleTagNm
Definition: html.h:234
static PWebPg New(const TStrV &UrlStrV, const PHttpResp &HttpResp)
Definition: html.h:342
char Ch
Definition: html.h:89
int GetUrls() const
Definition: html.h:354
THtmlTok(const THtmlLxSym &_Sym, const TStr &_Str, const THtmlLx::TArgNmValV &_ArgNmValV)
Definition: html.h:193
void GetTag()
Definition: html.cpp:236
bool IsArg(const TStr &ArgNm) const
Definition: html.h:205
THtmlTok(TSIn &)
Definition: html.h:196
bool IsGetBTag(const TStr &TagNm)
Definition: html.cpp:541
static const TStr AreaTagNm
Definition: html.h:218
TPt< THtmlDoc > PHtmlDoc
Definition: html.h:6
void Save(TSOut &)
Definition: html.h:198
THtmlLxSym GetSym() const
Definition: html.h:202
Definition: html.h:11
Definition: fl.h:58
Definition: html.h:11
static PWebPg New(const TStrV &UrlStrV, const TStrV &IpNumV, const PHttpResp &HttpResp)
Definition: html.h:340
TChA PreSpaceChA
Definition: html.h:113
static const TStr HttpEquivArgNm
Definition: html.h:241
TStr GetLcStr(const TStr &Str) const
Definition: html.h:60
TStr GetArgVal(const int &ArgN) const
Definition: html.h:137
TChA ChStack
Definition: html.h:88
Definition: html.h:330
static const char EofCh
Definition: dt.h:947
Definition: html.h:252
#define ClassTP(TNm, PNm)
Definition: bd.h:126
Definition: html.h:252
static const TStr H2TagNm
Definition: html.h:224
THtmlDoc & operator=(const THtmlDoc &)
Definition: html.h:268
static PSIn New(const TStr &FNm)
Definition: fl.cpp:290
static const TStr ATagNm
Definition: html.h:217
#define ClassTPV(TNm, PNm, TNmV)
Definition: bd.h:162
static const TStr CenterTagNm
Definition: html.h:221
void Save(TSOut &SOut)
Definition: html.h:28
THtmlTok & operator=(const THtmlTok &)
Definition: html.h:200
static PHtmlHldV Load(TSIn &)
Definition: html.h:318
bool IsArg(const TStr &ArgNm) const
Definition: html.h:138
static const TStr UlTagNm
Definition: html.h:233
virtual bool Eof()=0
static TStr GetNoTag(const TStr &Str)
Definition: html.cpp:606
THtmlLx(const PSIn &_SIn, const bool &_DoParseArg=true)
Definition: html.h:117
static const TStr CardTagNm
Definition: html.h:220
unsigned long long uint64
Definition: bd.h:38
static const char TabCh
Definition: dt.h:944
static const char Mn
Definition: dt.h:939
TStr GetArgNm(const int &ArgN) const
Definition: html.h:136
void GetOutUrlV(TUrlV &OutUrlV) const
Definition: html.h:371
TStr GetStrToETag(const TStr &TagNm, const bool &TxtOnlyP=false)
Definition: html.cpp:494
bool IsUc(const char &Ch) const
Definition: html.h:50
void PutArg(const TStr &ArgNm, const TStr &ArgVal)
Definition: html.h:142
static PHtmlDoc LoadTxt(const TStr &FNm, const THtmlDocType &Type=hdtAll, const bool &DoUc=true)
Definition: html.h:280
bool EscCh
Definition: html.h:91
Definition: html.h:80
PHtmlTok GetTok(const bool &DoUc=true)
Definition: html.cpp:353
int SymEChX
Definition: html.h:108
Definition: html.h:12
TStr GetArg(const TStr &ArgNm, const TStr &DfArgVal=TStr()) const
Definition: html.h:139
~TWebPg()
Definition: html.h:347
int GetToks() const
Definition: html.h:270
TWebPg & operator=(const TWebPg &)
Definition: html.h:352
PHttpResp GetHttpResp() const
Definition: html.h:366
Definition: html.h:11
bool IsAlNum(const char &Ch) const
Definition: html.h:41
TStr GetStrInTag(const TStr &TagNm, const bool &TxtOnlyP=false)
Definition: html.cpp:525
TChA UcChA
Definition: html.h:110
static PHtmlDoc Load(TSIn &)
Definition: html.h:265
static const TStr H6TagNm
Definition: html.h:228
void MoveToBTagOrETagOrEof(const TStr &BTagNm, const TStr &ETagNm)
Definition: html.cpp:394
TChA ChA
Definition: html.h:109
Definition: html.h:79
Definition: fl.h:128
TStr GetIpNum(const int &IpN=-1) const
Definition: html.h:363
void MoveToBTag3OrEof(const TStr &TagNm1, const TStr &TagNm2, const TStr &TagNm3)
Definition: html.cpp:388
static const char LfCh
Definition: dt.h:945
int GetChTy(const char &Ch) const
Definition: html.h:34
void GetMetaTag()
Definition: html.cpp:225
TStr GetStr() const
Definition: dt.h:678
int ChX
Definition: html.h:90
bool IsSpace(const char &Ch) const
Definition: html.h:38
void AddTokV(const THtmlTokV &_TokV)
Definition: html.h:274
Definition: dt.h:201
TStr GetTextOnlyStrToEof()
Definition: html.cpp:447
Definition: html.h:79
TStr GetUcStr(const TStr &Str) const
Definition: html.h:58
Definition: html.h:80
PSIn SIn
Definition: html.h:85
void GetCh()
Definition: html.h:95
TVec< TStr > TStrV
Definition: ds.h:1534
TWebPg(const TStrV &_UrlStrV, const TStrV &_IpNumV, const PHttpResp &_HttpResp)
Definition: html.h:338
uint64 GetFetchMSecs() const
Definition: html.h:377
Definition: html.h:12
int GetIps() const
Definition: html.h:362
static const TStr ImgTagNm
Definition: html.h:229
TStr GetPreSpaceStr() const
Definition: html.h:132
static const TStr PTagNm
Definition: html.h:232
TStr GetStrToETag2(const TStr &TagNm1, const TStr &TagNm2, const bool &TxtOnlyP=false)
Definition: html.cpp:509
void MoveToStrOrEof(const TStr &Str)
Definition: html.cpp:370
bool IsAlpha(const char &Ch) const
Definition: html.h:39
static TStr GetAsciiStr(const TChA &ChA, const char &GenericCh='_')
Definition: html.cpp:584
void GetEscCh()
Definition: html.cpp:195
Definition: html.h:80
static const TStr AltArgNm
Definition: html.h:237
Definition: dt.h:412
TChA SymChA
Definition: html.h:111
static PHtmlLxChDef GetChDef()
Definition: html.h:66
char GetUc(const char &Ch) const
Definition: html.h:52
static PHtmlLxChDef Load(TSIn &SIn)
Definition: html.h:27
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:79
void MoveToBTag2OrEof(const TStr &TagNm1, const TStr &TagNm2)
Definition: html.cpp:382
TSizeTy SearchForw(const TVal &Val, const TSizeTy &BValN=0) const
Returns the position of an element with value Val.
Definition: ds.h:1487
Definition: html.h:252
static const char CrCh
Definition: dt.h:946
int GetArgs() const
Definition: html.h:135
Definition: html.h:80
void Push(const char &Ch)
Definition: dt.h:264
THtmlHldV & operator=(const THtmlHldV &)
Definition: html.h:321
Definition: html.h:82
TWebPg(TSIn &)
Definition: html.h:348
Definition: bd.h:196
char GetLc(const char &Ch) const
Definition: html.h:53
static const TStr BrTagNm
Definition: html.h:219
THtmlTok(const THtmlLxSym &_Sym)
Definition: html.h:189
virtual char GetCh()=0
Definition: html.h:254
THtmlLxChTy
Definition: html.h:10
TArgNmValV ArgNmValV
Definition: html.h:115
TChA ArgVal
Definition: html.h:94
void PutFetchMSecs(const uint64 &_FetchMSecs)
Definition: html.h:376
PHtmlDoc GetHld(const int &HldN)
Definition: html.h:325
Definition: html.h:11
int PreSpaces
Definition: html.h:112
THtmlLxChDef & operator=(const THtmlLxChDef &)
Definition: html.h:31
char Pop()
Definition: dt.h:265
void MoveToBTagOrEof(const TStr &TagNm)
Definition: html.cpp:376
static PHtmlDoc New(const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true)
Definition: html.h:261
static PWebPg New(const TStr &UrlStr, const PHttpResp &HttpResp)
Definition: html.h:344
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
static const TStr TitleETagNm
Definition: html.h:235
PHtmlTok GetTok(const int &TokN) const
Definition: html.h:271
static const TStr SrcArgNm
Definition: html.h:239
static PHtmlTok Load(TSIn &)
Definition: html.h:197
TKeyDat< TStr, TStr > TStrKd
Definition: ds.h:404
static TStr GetSymStr(const THtmlLxSym &Sym)
Definition: html.cpp:553
bool IsLc(const char &Ch) const
Definition: html.h:51
TStr GetStrToBTag(const TStr &TagNm, const bool &TxtOnlyP=false)
Definition: html.cpp:462
TStr GetHttpBodyAsStr() const
Definition: html.h:368