SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
url.h
Go to the documentation of this file.
1 #include "bd.h"
2 
4 // Url
5 typedef enum {usUndef, usHttp, usOther} TUrlScheme;
6 
8 private:
9  static const TStr UrlHttpPrefixStr;
10  static const TStr UrlHttpAbsPrefixStr;
11  TUrlScheme Scheme;
12  TStr UrlStr, RelUrlStr, BaseUrlStr;
13  TStr SchemeNm, HostNm;
14  TStr PortStr, PathStr, SearchStr, FragIdStr;
15  int PortN;
16  TStrV PathSegV;
17  TStr IpNum;
18  TStr FinalUrlStr, FinalHostNm;
19  TStr HttpRqStr;
20  void GetAbs(const TStr& AbsUrlStr);
21  void GetAbsFromBase(const TStr& RelUrlStr, const TStr& BaseUrlStr);
23 public:
24  TUrl(const TStr& _RelUrlStr, const TStr& _BaseUrlStr=TStr());
25  static PUrl New(const TStr& RelUrlStr, const TStr& BaseUrlStr=TStr()){
26  return PUrl(new TUrl(RelUrlStr, BaseUrlStr));}
27  ~TUrl(){}
29  static PUrl Load(TSIn&){Fail; return NULL;}
30  void Save(TSOut&){Fail;}
31 
32  bool IsOk(const TUrlScheme _Scheme=usUndef) const {
33  if (_Scheme==usUndef){return Scheme!=usUndef;}
34  else {return Scheme==_Scheme;}}
35  TUrlScheme GetScheme(){return Scheme;}
36  TStr GetUrlStr() const {return UrlStr;}
37  TStr GetRelUrlStr() const {return RelUrlStr;}
38  bool IsBaseUrl(){return !BaseUrlStr.Empty();}
39  TStr GetBaseUrlStr() const {return BaseUrlStr;}
40  TStr GetSchemeNm() const {EAssert(IsOk()); return SchemeNm;}
41  TStr GetHostNm() const {EAssert(IsOk()); return HostNm;}
42  TStr GetDmNm(const int& MxDmSegs=-1) const;
43  bool IsPortOk() const { EAssert(IsOk()); return (PortN > 0); }
44  TStr GetPortStr() const {EAssert(IsOk()); return PortStr;}
45  int GetPortN() const {EAssert(IsOk()&&(PortN!=-1)); return PortN;}
46  TStr GetPathStr() const {EAssert(IsOk()); return PathStr;}
47  int GetPathSegs() const {return PathSegV.Len();}
48  TStr GetPathSeg(const int& PathSegN) const {return PathSegV[PathSegN];}
49  TStr GetSearchStr() const {EAssert(IsOk()); return SearchStr;}
50  TStr GetFragIdStr() const {EAssert(IsOk()); return FragIdStr;}
51 
52  bool IsIpNum() const {return !IpNum.Empty();}
53  void PutIpNum(const TStr& _IpNum){IpNum=_IpNum;}
54  TStr GetIpNum() const {EAssert(IsIpNum()); return IpNum;}
55  TStr GetIpNumOrHostNm() const {return IsIpNum() ? GetIpNum() : GetHostNm();}
56 
57  bool IsDefFinalUrl() const {
58  EAssert(IsOk(usHttp)); return !FinalUrlStr.Empty();}
59  TStr GetFinalUrlStr() const {
60  EAssert(IsDefFinalUrl()); return FinalUrlStr;}
62  if (IsDefFinalUrl()){return FinalUrlStr;} else {return UrlStr;}}
63  TStr GetFinalHostNm() const {
64  EAssert(IsDefFinalUrl()); return FinalHostNm;}
66  if (IsDefFinalUrl()){return FinalHostNm;} else {return HostNm;}}
67  void DefUrlAsFinal(){
68  EAssert(IsOk(usHttp)); EAssert(!IsDefFinalUrl());
69  FinalUrlStr=UrlStr; FinalHostNm=HostNm;}
70  void DefFinalUrl(const TStr& _FinalHostNm);
71 
72  void PutHttpRqStr(const TStr& _HttpRqStr){HttpRqStr=_HttpRqStr;}
73  TStr GetHttpRqStr() const {return HttpRqStr;}
74  bool IsHttpRqStr() const {return !HttpRqStr.Empty();}
75  void ChangeHttpRqStr(const TStr& SrcStr, const TStr& DstStr){
76  HttpRqStr.ChangeStr(SrcStr, DstStr);}
77 
78  bool IsInHost(const TStr& _HostNm) const {
79  EAssert(IsOk()); return HostNm.GetUc().IsSuffix(_HostNm.GetUc());}
80  bool IsInPath(const TStr& _PathStr) const {
81  EAssert(IsOk()); return PathStr.GetUc().IsPrefix(_PathStr.GetUc());}
82  void ToLcPath();
83 
84  static bool IsAbs(const TStr& UrlStr);
85  static bool IsScript(const TStr& UrlStr);
86  static bool IsSite(const TStr& UrlStr);
87 
88  static PUrl GetUrlFromShortcut(const TStr& ShortcutUrlStr,
89  const TStr& DfHostNmPrefix, const TStr& DfHostNmSufix);
90  static TStr GetUrlSearchStr(const TStr& Str);
91  static TStr EncodeUrlStr(const TStr& Str){return GetUrlSearchStr(Str);}
92  static TStr DecodeUrlStr(const TStr& UrlStr);
93  static TStr GetDocStrFromUrlStr(const TStr& UrlStr, const int& Copies=1);
94  static TStr GetTopDownDocNm(
95  const TStr& UrlStr, const int& MxLen=-1, const bool& HostOnlyP=false);
96 };
100 
102 // Url-Environment
104 private:
105  TStr BaseUrlStr;
106  TStrV KeyNmV;
107  TStrStrVH KeyNmToValH;
108 public:
110  KeyNmV(), KeyNmToValH(10){}
111  TUrlEnv(const TUrlEnv& UrlEnv):
112  KeyNmV(UrlEnv.KeyNmV), KeyNmToValH(UrlEnv.KeyNmToValH){}
113  static PUrlEnv New(){return new TUrlEnv();}
114  static PUrlEnv New(const TStr& BaseUrlStr,
115  const TStr& KeyNm1=TStr(), const TStr& ValStr1=TStr(),
116  const TStr& KeyNm2=TStr(), const TStr& ValStr2=TStr(),
117  const TStr& KeyNm3=TStr(), const TStr& ValStr3=TStr(),
118  const TStr& KeyNm4=TStr(), const TStr& ValStr4=TStr()){
119  PUrlEnv UrlEnv=New();
120  UrlEnv->PutBaseUrlStr(BaseUrlStr);
121  if (!KeyNm1.Empty()){UrlEnv->AddKeyVal(KeyNm1, ValStr1);}
122  if (!KeyNm2.Empty()){UrlEnv->AddKeyVal(KeyNm2, ValStr2);}
123  if (!KeyNm3.Empty()){UrlEnv->AddKeyVal(KeyNm3, ValStr3);}
124  if (!KeyNm4.Empty()){UrlEnv->AddKeyVal(KeyNm4, ValStr4);}
125  return UrlEnv;}
127  TUrlEnv(TSIn& SIn): KeyNmV(SIn), KeyNmToValH(SIn){}
128  static PUrlEnv Load(TSIn& SIn){return new TUrlEnv(SIn);}
129  void Save(TSOut& SOut){KeyNmV.Save(SOut); KeyNmToValH.Save(SOut);}
130 
132  if (this!=&Env){KeyNmV=Env.KeyNmV; KeyNmToValH=Env.KeyNmToValH;}
133  return *this;}
134 
135  // base url
136  void PutBaseUrlStr(const TStr& _BaseUrlStr){BaseUrlStr=_BaseUrlStr;}
137  TStr GetBaseUrlStr() const {return BaseUrlStr;}
138 
139  // adding key-value
140  void AddKeyVal(const TStr& KeyNm, const TStr& ValStr){
141  if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);}
142  KeyNmToValH.GetDat(KeyNm).Clr();
143  KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
144  void AddToKeyVal(const TStr& KeyNm, const TStr& ValStr){
145  if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);}
146  KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
147 
148  // key retrieval
149  bool Empty() const {return KeyNmV.Empty();}
150  int GetKeys() const {return KeyNmV.Len();}
151  bool IsKey(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm)!=-1;}
152  int GetKeyN(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm);}
153  TStr GetKeyNm(const int& KeyN) const {return KeyNmV[KeyN];}
154 
155  // value retrieval
156  int GetVals(const int& KeyN) const {
157  return KeyNmToValH.GetDat(KeyNmV[KeyN]).Len();}
158  int GetVals(const TStr& KeyNm) const {
159  return KeyNmToValH.GetDat(KeyNm).Len();}
160  TStr GetVal(const int& KeyN, const int& ValN=0) const {
161  return KeyNmToValH.GetDat(KeyNmV[KeyN])[ValN];}
162  TStr GetVal(const TStr& KeyNm, const int& ValN=0, const TStr& DfVal="") const {
163  if (KeyNmToValH.IsKey(KeyNm)){
164  return KeyNmToValH.GetDat(KeyNm)[ValN];}
165  else {return DfVal;}}
166 
167  // full-url-string
168  TStr GetFullUrlStr() const;
169 
170  static PUrlEnv MkClone(const PUrlEnv& UrlEnv);
171 };
172 
~TUrlEnv()
Definition: url.h:126
TStr GetIpNum() const
Definition: url.h:54
TStr GetRelUrlStr() const
Definition: url.h:37
TUrlScheme
Definition: url.h:5
TStr GetSchemeNm() const
Definition: url.h:40
#define UndefDefaultCopyAssign(TNm)
Definition: bd.h:203
static TStr EncodeUrlStr(const TStr &Str)
Definition: url.h:91
TStr GetVal(const TStr &KeyNm, const int &ValN=0, const TStr &DfVal="") const
Definition: url.h:162
TStr GetBaseUrlStr() const
Definition: url.h:39
static PUrlEnv New(const TStr &BaseUrlStr, const TStr &KeyNm1=TStr(), const TStr &ValStr1=TStr(), const TStr &KeyNm2=TStr(), const TStr &ValStr2=TStr(), const TStr &KeyNm3=TStr(), const TStr &ValStr3=TStr(), const TStr &KeyNm4=TStr(), const TStr &ValStr4=TStr())
Definition: url.h:114
Definition: url.h:7
Definition: url.h:5
TStr GetFinalHostNm() const
Definition: url.h:63
void AddKeyVal(const TStr &KeyNm, const TStr &ValStr)
Definition: url.h:140
#define Fail
Definition: bd.h:238
TUrlEnv(const TUrlEnv &UrlEnv)
Definition: url.h:111
TStr GetUc() const
Definition: dt.h:493
int GetPathSegs() const
Definition: url.h:47
void AddToKeyVal(const TStr &KeyNm, const TStr &ValStr)
Definition: url.h:144
bool IsHttpRqStr() const
Definition: url.h:74
void Save(TSOut &)
Definition: url.h:30
bool IsInPath(const TStr &_PathStr) const
Definition: url.h:80
bool IsOk(const TUrlScheme _Scheme=usUndef) const
Definition: url.h:32
THash< TInt, PUrl > TIdToUrlH
Definition: url.h:99
TStr GetAsFinalUrlStr() const
Definition: url.h:61
~TUrl()
Definition: url.h:27
bool IsBaseUrl()
Definition: url.h:38
int GetPortN() const
Definition: url.h:45
bool IsIpNum() const
Definition: url.h:52
TPt< TUrl > PUrl
Definition: url.h:7
bool Empty() const
Definition: url.h:149
void PutHttpRqStr(const TStr &_HttpRqStr)
Definition: url.h:72
Definition: url.h:5
static PUrl Load(TSIn &)
Definition: url.h:29
Definition: fl.h:58
static PUrlEnv Load(TSIn &SIn)
Definition: url.h:128
TStrV KeyNmV
Definition: url.h:106
bool IsPortOk() const
Definition: url.h:43
#define ClassTP(TNm, PNm)
Definition: bd.h:126
int GetKeys() const
Definition: url.h:150
TStr GetPathStr() const
Definition: url.h:46
Definition: url.h:103
#define ClassTPV(TNm, PNm, TNmV)
Definition: bd.h:162
TEnv Env
Definition: env.cpp:297
TStr GetHostNm() const
Definition: url.h:41
TStr GetIpNumOrHostNm() const
Definition: url.h:55
void ChangeHttpRqStr(const TStr &SrcStr, const TStr &DstStr)
Definition: url.h:75
bool IsInHost(const TStr &_HostNm) const
Definition: url.h:78
TStr GetVal(const int &KeyN, const int &ValN=0) const
Definition: url.h:160
int GetVals(const int &KeyN) const
Definition: url.h:156
TStr GetPathSeg(const int &PathSegN) const
Definition: url.h:48
TQQueue< TIdUrlPr > TIdUrlPrQ
Definition: url.h:98
static PUrlEnv New()
Definition: url.h:113
TStr GetUrlStr() const
Definition: url.h:36
TStr GetFragIdStr() const
Definition: url.h:50
Definition: ds.h:2609
Definition: fl.h:128
TStr GetKeyNm(const int &KeyN) const
Definition: url.h:153
void DefUrlAsFinal()
Definition: url.h:67
bool IsKey(const TStr &KeyNm) const
Definition: url.h:151
TStr GetSearchStr() const
Definition: url.h:49
#define EAssert(Cond)
Definition: bd.h:280
void PutBaseUrlStr(const TStr &_BaseUrlStr)
Definition: url.h:136
TStr GetAsFinalHostNm() const
Definition: url.h:65
Definition: ds.h:32
TUrlEnv(TSIn &SIn)
Definition: url.h:127
Definition: url.h:5
Definition: dt.h:412
Definition: hash.h:97
bool IsDefFinalUrl() const
Definition: url.h:57
int GetKeyN(const TStr &KeyNm) const
Definition: url.h:152
TStr GetPortStr() const
Definition: url.h:44
TUrl(TSIn &)
Definition: url.h:28
void PutIpNum(const TStr &_IpNum)
Definition: url.h:53
Definition: bd.h:196
TStrStrVH KeyNmToValH
Definition: url.h:107
TStr GetFinalUrlStr() const
Definition: url.h:59
TStr GetHttpRqStr() const
Definition: url.h:73
TPair< TInt, PUrl > TIdUrlPr
Definition: url.h:97
int GetVals(const TStr &KeyNm) const
Definition: url.h:158
void Save(TSOut &SOut)
Definition: url.h:129
TStr GetBaseUrlStr() const
Definition: url.h:137
TUrlEnv & operator=(const TUrlEnv &Env)
Definition: url.h:131
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
TUrlScheme GetScheme()
Definition: url.h:35