1 // Url-Lexical-Chars
3 class TUrlLxChDef{
4 private:
11  void InclCh(TBoolV& BoolV, const char& Ch);
12  void InclStr(TBoolV& BoolV, const TStr& Str);
13  void InclBoolV(TBoolV& BoolV, const TBoolV& OrBoolV);
14 public:
15  static const char EofCh;
16  static const char EscCh;
17  TUrlLxChDef();
19  bool IsDigitCh(const char& Ch) const {return (Ch>=0)&&IsDigitV[Ch];}
20  bool IsSchemeCh(const char& Ch) const {return (Ch>=0)&&IsSchemeV[Ch];}
21  bool IsHostCh(const char& Ch) const {return (Ch>=0)&&IsHostV[Ch];}
22  bool IsHSegmentCh(const char& Ch) const {
23  return (Ch<0)||((Ch>=0)&&IsHSegmentV[Ch]);}
24 };
25 const char TUrlLxChDef::EofCh=0;
26 const char TUrlLxChDef::EscCh='%';
28 void TUrlLxChDef::InclCh(TBoolV& BoolV, const char& Ch){BoolV[Ch]=true;}
30 void TUrlLxChDef::InclStr(TBoolV& BoolV, const TStr& Str){
31  for (int CC=0; CC<Str.Len(); CC++){BoolV[Str.GetCh(CC)]=true;}}
33 void TUrlLxChDef::InclBoolV(TBoolV& BoolV, const TBoolV& OrBoolV){
34  for (int BoolN=0; BoolN<BoolV.Len(); BoolN++){
35  BoolV[BoolN]=BoolV[BoolN]||OrBoolV[BoolN];}}
38  IsLoAlphaV(TCh::Vals), IsHiAlphaV(TCh::Vals), IsAlphaV(TCh::Vals),
39  IsDigitV(TCh::Vals), IsSafeV(TCh::Vals), IsExtraV(TCh::Vals),
40  IsNationalV(TCh::Vals), IsPunctuationV(TCh::Vals),
41  IsReservedV(TCh::Vals), IsHexV(TCh::Vals),
42  IsUnreservedV(TCh::Vals), IsUCharV(TCh::Vals), IsXCharV(TCh::Vals),
43  IsSchemeV(TCh::Vals), IsHostV(TCh::Vals), IsHSegmentV(TCh::Vals){
45  InclStr(IsLoAlphaV, "abcdefghijklmnopqrstuvwxyz");
48  InclStr(IsDigitV, "0123456789");
49  InclStr(IsSafeV, "$-_.+");
50  InclStr(IsExtraV, "!*'(),");
51  InclStr(IsNationalV, "{}|\\^~[]`");
52  InclStr(IsPunctuationV, "<>#%\"");
53  InclStr(IsReservedV, ";/?:@&=");
54  InclBoolV(IsHexV, IsDigitV); InclStr(IsHexV, "ABCDEFabcdef");
65  InclStr(IsSchemeV, "+-.");
68  InclStr(IsHostV, "-_");
72 }
75 // Url-Lexical
76 class TUrlLx{
77 private:
78  static const char EofCh;
80  int BfC;
81 public:
82  static const TUrlLxChDef ChDef;
83  TUrlLx(const TStr& _Str): Bf(_Str), BfC(0){}
84  bool Eof() const {return BfC==Bf.Len();};
85  char GetCh(){if (Eof()){return EofCh;} else {return Bf[BfC++];}}
86  char PeekCh() const {if (Eof()){return EofCh;} else {return Bf[BfC];}}
87  char GetCh(const char& Ch){EAssertR(GetCh()==Ch, ""); return Ch;}
88  TStr GetStr(const TStr& Str){
89  for (int ChN=0; ChN<Str.Len(); ChN++){GetCh(Str[ChN]);} return Str;}
90  const char* GetStr(const char *Str){
91  int Len = (int) strlen(Str);
92  for (int ChN=0; ChN<Len; ChN++){GetCh(Str[ChN]);}
93  return Str;
94  }
96  bool IsSchemeCh() const {return ChDef.IsSchemeCh(PeekCh());}
97  char GetSchemeCh(){EAssertR(IsSchemeCh(), ""); return GetCh();}
98  bool IsDigitCh() const {return ChDef.IsDigitCh(PeekCh());}
99  char GetDigitCh(){EAssertR(IsDigitCh(), ""); return GetCh();}
100  bool IsHSegmentCh() const {return ChDef.IsHSegmentCh(PeekCh());}
101  char GetHSegmentCh(){EAssertR(IsHSegmentCh(), ""); return GetCh();}
102  TStr GetToCh(const char& Ch=TUrlLxChDef::EofCh){TChA Str;
103  while ((PeekCh()!=EofCh)&&(PeekCh()!=Ch)){Str+=GetCh();} return Str;}
106  Str+=GetSchemeCh(); while (IsSchemeCh()){Str+=GetCh();}
107  Str.ToLc(); return Str;}
108  TStr GetHost();
110  do {Str+=GetDigitCh();} while (IsDigitCh()); return Str;}
111  TStr GetHostPort(TStr& HostNm, TStr& PortStr, int& PortN);
112  TStr GetHPath(TStrV& PathSegV);
113  TStr GetSearch(){return GetToCh('#');}
114 };
120  EAssertR(ChDef.IsHostCh(PeekCh()), "");
121  do {
122  while (ChDef.IsHostCh(PeekCh())){Str+=GetCh();}
123  if (PeekCh()=='.'){Str+=GetCh('.');}
124  else if (PeekCh()=='@'){GetCh('@'); Str.Clr();} // still unexplained
125  } while (ChDef.IsHostCh(PeekCh()));
126  Str.ToLc();
127  return Str;
128 }
130 TStr TUrlLx::GetHostPort(TStr& HostNm, TStr& PortStr, int& PortN){TChA Str;
131  Str+=HostNm=GetHost();
132  if (PeekCh()==':'){
133  Str+=GetCh(':');
134  if (IsDigitCh()){Str+=PortStr=GetDigits(); PortN=PortStr.GetInt();}
135  }
136  return Str;
137 }
139 TStr TUrlLx::GetHPath(TStrV& PathSegV){TChA Str; TChA HSegStr; bool Cont;
140  do {
141  while (PeekCh()=='/'){GetCh('/');} // prevent multiple '/'
142  HSegStr.Clr(); while (IsHSegmentCh()){HSegStr+=GetHSegmentCh();}
143  Str+=HSegStr; PathSegV.Add(HSegStr);
144  Cont=(PeekCh()=='/'); if (Cont){Str+=GetCh('/');}
145  } while (Cont);
146  return Str;
147 }
150 // Url
151 const TStr TUrl::UrlHttpPrefixStr="http:";
152 const TStr TUrl::UrlHttpAbsPrefixStr="http://";
154 void TUrl::GetAbs(const TStr& AbsUrlStr){
155  EAssertR(IsAbs(AbsUrlStr), AbsUrlStr);
156  TUrlLx Lx(AbsUrlStr); TChA Str;
157  Str+=SchemeNm=Lx.GetScheme(); Str+=Lx.GetCh(':');
158  if (SchemeNm=="http"){
159  Scheme=usHttp;
160  const char *DbSlashStr="//";
161  Str+=Lx.GetStr(DbSlashStr);
162  Str+=Lx.GetHostPort(HostNm, PortStr, PortN);
163  if (PortN==-1){PortN=THttp::DfPortN; PortStr.Clr();}
164  else if (PortN==THttp::DfPortN){PortStr.Clr();}
165  //**if (!PortStr.Empty()){Str+=':'; Str+=PortStr;}
166  if (Lx.PeekCh()=='/'){
167  PathStr=Lx.GetCh('/'); PathStr+=Lx.GetHPath(PathSegV); Str+=PathStr;}
168  if (PathStr.Empty()){PathStr="/"; Str+=PathStr;}
169  if (Lx.PeekCh()=='?'){
170  SearchStr=Lx.GetCh('?'); SearchStr+=Lx.GetSearch(); Str+=SearchStr;}
171  } else {
172  Scheme=usOther; Str+=Lx.GetToCh();
173  }
174  while (Lx.PeekCh()==' '){Lx.GetCh();}
175  if (Lx.PeekCh()=='#'){
176  FragIdStr=Lx.GetCh('#'); FragIdStr+=Lx.GetToCh();
177  }
178  EAssertR(Lx.Eof(), "");
179  UrlStr=Str;
180 }
182 void TUrl::GetAbsFromBase(const TStr& RelUrlStr, const TStr& BaseUrlStr){
183  EAssertR(!BaseUrlStr.Empty(), "");
184  PUrl Url=TUrl::New(BaseUrlStr); EAssertR(Url->IsOk(), "");
185  EAssertR(IsAbs(BaseUrlStr), "");
186  TStr AbsUrlStr=BaseUrlStr;
187  TStr NrRelUrlStr=RelUrlStr;
188  if (NrRelUrlStr.GetLc().IsPrefix(UrlHttpPrefixStr)){
189  NrRelUrlStr.DelSubStr(0, UrlHttpPrefixStr.Len()-1);}
190  if (NrRelUrlStr.Len()>0){
191  if (NrRelUrlStr[0]=='/'){
192  TStr SlashStr; int SlashChN=0;
193  while ((SlashChN<NrRelUrlStr.Len())&&(NrRelUrlStr[SlashChN]=='/')){
194  SlashChN++; SlashStr+="/";}
195  int ChN=0; bool Found=false;
196  while ((!Found)&&((ChN=AbsUrlStr.SearchStr(SlashStr, ChN))!=-1)){
197  TStr Str=AbsUrlStr.GetSubStr(ChN-1, ChN+SlashStr.Len()-1+1);
198  Found=((ChN==0)||(Str[0]!='/'))&&
199  ((ChN+SlashStr.Len()-1==AbsUrlStr.Len()-1)||(Str[Str.Len()-1]!='/'));
200  if (!Found){ChN++;}
201  }
202  if (Found){
203  AbsUrlStr.DelSubStr(ChN, AbsUrlStr.Len()-1);
204  AbsUrlStr+=NrRelUrlStr;
205  }
206  } else {
207  int ChN=AbsUrlStr.Len()-1;
208  while ((ChN>=0)&&(AbsUrlStr[ChN]!='/')){ChN--;}
209  AbsUrlStr.DelSubStr(ChN+1, AbsUrlStr.Len()-1);
210  AbsUrlStr+=NrRelUrlStr;
211  }
212  }
214  const char *PrevDirStr="/../";
215  {int ChN;
216  while ((ChN=AbsUrlStr.SearchStr(PrevDirStr))!=-1){
217  int BChN=ChN; int EChN=ChN+(int) strlen(PrevDirStr)-1;
218  while ((BChN-1>=0)&&(AbsUrlStr[BChN-1]!='/')){BChN--;}
219  AbsUrlStr.DelSubStr(BChN, EChN);
220  }}
222  const char *CurDirStr="/.";
223  while (AbsUrlStr.DelStr(CurDirStr)){}
225  GetAbs(AbsUrlStr);
226 }
228 TUrl::TUrl(const TStr& _RelUrlStr, const TStr& _BaseUrlStr):
229  Scheme(usUndef),
230  UrlStr(), RelUrlStr(_RelUrlStr), BaseUrlStr(_BaseUrlStr),
231  SchemeNm(), HostNm(),
232  PortStr(), PathStr(), SearchStr(), FragIdStr(),
233  PortN(-1), PathSegV(),
234  IpNum(),
235  FinalUrlStr(), FinalHostNm(),
236  HttpRqStr(){
237  RelUrlStr.ToTrunc();
238  RelUrlStr.ChangeStrAll(" ", "%20");
239  try {
240  if (IsAbs(RelUrlStr)){
241  GetAbs(RelUrlStr);
242  } else
243  if (IsAbs(BaseUrlStr)){
245  } else {
246  Scheme=usUndef;
247  }
248  }
249  catch (PExcept&){Scheme=usUndef;}
251  //** old version
252  /*
253  PUrl BaseUrl;
254  if (!BaseUrlStr.Empty()){ // must be outside try-block (CBuilder3.0 bug)
255  BaseUrl=TUrl::New(BaseUrlStr);}
256  try {
257  if (!BaseUrlStr.Empty()){
258  EAssertR(BaseUrl->IsOk(), "");}
259  if (IsAbs(RelUrlStr)){
260  GetAbs(RelUrlStr);
261  } else {
262  GetAbsFromBase(RelUrlStr, BaseUrlStr);
263  }
264  }
265  catch (PExcept&){Scheme=usUndef;}
266  */
267 }
269 TStr TUrl::GetDmNm(const int& MxDmSegs) const {
270  EAssert(IsOk());
271  TChA DmChA; int DmSegs=0;
272  for (int ChN=HostNm.Len()-1; ChN>=0; ChN--){
273  if (HostNm[ChN]=='.'){
274  DmSegs++;
275  if (DmSegs==MxDmSegs){break;} else {DmChA+='.';}
276  } else {
277  DmChA+=HostNm[ChN];
278  }
279  }
280  DmChA.Reverse();
281  return DmChA;
282 }
284 void TUrl::DefFinalUrl(const TStr& _FinalHostNm){
285  EAssert(IsOk(usHttp));
287  FinalHostNm=_FinalHostNm.GetLc();
288  if (HostNm==FinalHostNm){
290  } else {
291  TChA FinalUrlChA;
292  FinalUrlChA+=SchemeNm; FinalUrlChA+="://";
293  FinalUrlChA+=FinalHostNm;
294  if (!PortStr.Empty()){
295  FinalUrlChA+=":"; FinalUrlChA+=PortStr;}
296  FinalUrlChA+=PathStr;
297  FinalUrlChA+=SearchStr;
298  FinalUrlStr=FinalUrlChA;
299  }
300 }
303  // test if the conversion is needed
304  if (!PathStr.IsLc()){
305  // convert path strings to lower-case
306  PathStr.ToLc();
307  for (int PathSegN=0; PathSegN<PathSegV.Len(); PathSegN++){
308  PathSegV[PathSegN].ToLc();}
309  // recompose url
310  TChA UrlChA;
311  UrlChA+=SchemeNm; UrlChA+="://";
312  UrlChA+=HostNm;
313  if (!PortStr.Empty()){
314  UrlChA+=":"; UrlChA+=PortStr;}
315  UrlChA+=PathStr;
316  UrlChA+=SearchStr;
317  UrlStr=UrlChA;
318  // recompose final-url
319  if (IsDefFinalUrl()){
321  }
322 }
324 bool TUrl::IsAbs(const TStr& UrlStr){
325  if (UrlStr.GetLc().IsPrefix(UrlHttpPrefixStr)){
326  return UrlStr.GetLc().IsPrefix(UrlHttpAbsPrefixStr);
327  } else {
328  int ColonChN=UrlStr.SearchCh(':'); int SlashChN=UrlStr.SearchCh('/');
329  return (ColonChN!=-1)&&((SlashChN==-1)||((SlashChN!=-1)&&(ColonChN<SlashChN)));
330  }
331 }
333 bool TUrl::IsScript(const TStr& UrlStr){
334  return UrlStr.IsChIn('?');
335 }
337 bool TUrl::IsSite(const TStr& UrlStr){
338  PUrl Url=TUrl::New(UrlStr);
339  return Url->IsOk(usHttp) && (Url->GetPathStr()=="/") &&
340  Url->GetSearchStr().Empty() && Url->GetFragIdStr().Empty();
341 }
343 PUrl TUrl::GetUrlFromShortcut(const TStr& ShortcutUrlStr,
344  const TStr& DfHostNmPrefix, const TStr& DfHostNmSufix){
345  // shortcut is already correct url
346  TStr UrlStr=ShortcutUrlStr;
347  PUrl Url=TUrl::New(UrlStr);
348  if (Url->IsOk()){return Url;}
349  // add 'http://' to shortcut (if shortcut is from more segments)
350  if (ShortcutUrlStr.IsChIn('.')){
351  UrlStr=TUrl::UrlHttpAbsPrefixStr+ShortcutUrlStr;
352  Url=TUrl::New(UrlStr);
353  if (Url->IsOk()){return Url;}
354  }
355  // add 'http://' and '/' to shortcut (if shortcut is from more segments)
356  if (ShortcutUrlStr.IsChIn('.')){
357  UrlStr=TUrl::UrlHttpAbsPrefixStr+ShortcutUrlStr+"/";
358  Url=TUrl::New(UrlStr);
359  if (Url->IsOk()){return Url;}
360  }
361  // add 'http://', prefix, postfix and '/' to shortcut
362  UrlStr=UrlHttpAbsPrefixStr+
363  DfHostNmPrefix+"."+ShortcutUrlStr+"."+DfHostNmSufix+"/";
364  Url=TUrl::New(UrlStr);
365  return Url;
366 }
369  TChA InChA=Str; TChA OutChA;
370  for (int ChN=0; ChN<InChA.Len(); ChN++){
371  char Ch=InChA[ChN];
372  if (Ch==' '){
373  OutChA+='+';
374  } else
375  if ((' '<Ch)&&(Ch<='~')&&(Ch!='+')&&(Ch!='&')&&(Ch!='%')){
376  OutChA+=Ch;
377  } else {
378  OutChA+='%';
379  OutChA+=TInt::GetHexStr(uchar(Ch)/16);
380  OutChA+=TInt::GetHexStr(uchar(Ch)%16);
381  }
382  }
383  return OutChA;
384 }
386 TStr TUrl::DecodeUrlStr(const TStr& UrlStr) {
387  TChA InChA=UrlStr; TChA OutChA;
388  for (int ChN=0; ChN<InChA.Len(); ChN++){
389  char Ch=InChA[ChN];
390  if (Ch=='+'){
391  OutChA+=' ';
392  } else if (Ch=='%') {
393  ChN++; if (ChN==InChA.Len()) { break; }
394  char FirstCh = InChA[ChN];
395  if (!TCh::IsHex(FirstCh)) { break; }
396  ChN++; if (ChN==InChA.Len()) { break; }
397  char SecondCh = InChA[ChN];
398  if (!TCh::IsHex(SecondCh)) { break; }
399  OutChA+=char(TCh::GetHex(FirstCh)*16 + TCh::GetHex(SecondCh));
400  } else {
401  OutChA+=Ch;
402  }
403  }
404  return OutChA;
405 }
407 TStr TUrl::GetDocStrFromUrlStr(const TStr& UrlStr, const int& Copies){
408  TStrV StrV; UrlStr.SplitOnNonAlNum(StrV);
409  TChA DocChA;
410  for (int StrN=0; StrN<StrV.Len(); StrN++){
411  TStr UcStr=StrV[StrN].GetUc();
412  if ((UcStr.Len()>3)&&(UcStr!="HTTP")&&(UcStr!="HTML")&&(UcStr!="INDEX")&&(UcStr!="DEFAULT")){
413  for (int CopyN=0; CopyN<Copies; CopyN++){
414  if (!DocChA.Empty()){DocChA+=' ';} DocChA+=StrV[StrN];
415  }
416  }
417  }
418  return DocChA;
419 }
422  const TStr& UrlStr, const int& MxLen, const bool& HostOnlyP){
423  PUrl Url=TUrl::New(UrlStr);
424  TChA DocNm;
425  if (Url->IsOk()){
426  TStr HostNm=Url->GetHostNm().GetLc();
427  TStrV HostNmSegV; HostNm.SplitOnAllCh('.', HostNmSegV, false);
428  for (int HostNmSegN=0; HostNmSegN<HostNmSegV.Len(); HostNmSegN++){
429  if (HostNmSegN>0){DocNm+='.';}
430  DocNm+=HostNmSegV[HostNmSegV.Len()-HostNmSegN-1];
431  }
432  if (!HostOnlyP){
433  DocNm+=Url->GetPathStr().GetLc();
434  }
435  } else {
436  DocNm=UrlStr.GetLc();
437  }
438  if (MxLen!=-1){
439  DocNm.Trunc(MxLen);}
440  return DocNm;
441 }
444 // Url-Search-Environment
446  if (GetKeys()==0){return TStr();}
447  TChA SearchChA;
448  SearchChA+=BaseUrlStr;
449  SearchChA+="?";
450  int KeyVals=0;
451  for (int KeyN=0; KeyN<GetKeys(); KeyN++){
452  TStr KeyNm=GetKeyNm(KeyN);
453  TStrV ValStrV=KeyNmToValH.GetDat(KeyNm);
454  for (int ValStrN=0; ValStrN<ValStrV.Len(); ValStrN++){
455  if (KeyVals>0){SearchChA+="&";}
456  SearchChA+=TUrl::GetUrlSearchStr(KeyNm);
457  SearchChA+='=';
458  SearchChA+=TUrl::GetUrlSearchStr(ValStrV[ValStrN]);
459  KeyVals++;
460  }
461  }
462  return SearchChA;
463 }
466  PUrlEnv CloneUrlEnv=
467  PUrlEnv(new TUrlEnv(*UrlEnv));
468  return CloneUrlEnv;
469 }
