SNAP Library 2.0, User Reference  2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
TUrl Class Reference

#include <url.h>

List of all members.

Public Member Functions

 TUrl (const TStr &_RelUrlStr, const TStr &_BaseUrlStr=TStr())
 ~TUrl ()
 TUrl (TSIn &)
void Save (TSOut &)
bool IsOk (const TUrlScheme _Scheme=usUndef) const
TUrlScheme GetScheme ()
TStr GetUrlStr () const
TStr GetRelUrlStr () const
bool IsBaseUrl ()
TStr GetBaseUrlStr () const
TStr GetSchemeNm () const
TStr GetHostNm () const
TStr GetDmNm (const int &MxDmSegs=-1) const
bool IsPortOk () const
TStr GetPortStr () const
int GetPortN () const
TStr GetPathStr () const
int GetPathSegs () const
TStr GetPathSeg (const int &PathSegN) const
TStr GetSearchStr () const
TStr GetFragIdStr () const
bool IsIpNum () const
void PutIpNum (const TStr &_IpNum)
TStr GetIpNum () const
TStr GetIpNumOrHostNm () const
bool IsDefFinalUrl () const
TStr GetFinalUrlStr () const
TStr GetAsFinalUrlStr () const
TStr GetFinalHostNm () const
TStr GetAsFinalHostNm () const
void DefUrlAsFinal ()
void DefFinalUrl (const TStr &_FinalHostNm)
void PutHttpRqStr (const TStr &_HttpRqStr)
TStr GetHttpRqStr () const
bool IsHttpRqStr () const
void ChangeHttpRqStr (const TStr &SrcStr, const TStr &DstStr)
bool IsInHost (const TStr &_HostNm) const
bool IsInPath (const TStr &_PathStr) const
void ToLcPath ()

Static Public Member Functions

static PUrl New (const TStr &RelUrlStr, const TStr &BaseUrlStr=TStr())
static PUrl Load (TSIn &)
static bool IsAbs (const TStr &UrlStr)
static bool IsScript (const TStr &UrlStr)
static bool IsSite (const TStr &UrlStr)
static PUrl GetUrlFromShortcut (const TStr &ShortcutUrlStr, const TStr &DfHostNmPrefix, const TStr &DfHostNmSufix)
static TStr GetUrlSearchStr (const TStr &Str)
static TStr EncodeUrlStr (const TStr &Str)
static TStr DecodeUrlStr (const TStr &UrlStr)
static TStr GetDocStrFromUrlStr (const TStr &UrlStr, const int &Copies=1)
static TStr GetTopDownDocNm (const TStr &UrlStr, const int &MxLen=-1, const bool &HostOnlyP=false)

Private Member Functions

void GetAbs (const TStr &AbsUrlStr)
void GetAbsFromBase (const TStr &RelUrlStr, const TStr &BaseUrlStr)
 UndefDefaultCopyAssign (TUrl)

Private Attributes

TCRef CRef
TUrlScheme Scheme
TStr UrlStr
TStr RelUrlStr
TStr BaseUrlStr
TStr SchemeNm
TStr HostNm
TStr PortStr
TStr PathStr
TStr SearchStr
TStr FragIdStr
int PortN
TStrV PathSegV
TStr IpNum
TStr FinalUrlStr
TStr FinalHostNm
TStr HttpRqStr

Static Private Attributes

static const TStr UrlHttpPrefixStr = "http:"
static const TStr UrlHttpAbsPrefixStr = "http://"

Friends

class TPt< TUrl >

Detailed Description

Definition at line 7 of file url.h.


Constructor & Destructor Documentation

TUrl::TUrl ( const TStr _RelUrlStr,
const TStr _BaseUrlStr = TStr() 
)

Definition at line 228 of file url.cpp.

                                                         :
  Scheme(usUndef),
  UrlStr(), RelUrlStr(_RelUrlStr), BaseUrlStr(_BaseUrlStr),
  SchemeNm(), HostNm(),
  PortStr(), PathStr(), SearchStr(), FragIdStr(),
  PortN(-1), PathSegV(),
  IpNum(),
  FinalUrlStr(), FinalHostNm(),
  HttpRqStr(){
  RelUrlStr.ToTrunc();
  RelUrlStr.ChangeStrAll(" ", "%20");
  try {
    if (IsAbs(RelUrlStr)){
      GetAbs(RelUrlStr);
    } else
    if (IsAbs(BaseUrlStr)){
      GetAbsFromBase(RelUrlStr, BaseUrlStr);
    } else {
      Scheme=usUndef;
    }
  }
  catch (PExcept&){Scheme=usUndef;}

  //** old version
  /*
  PUrl BaseUrl;
  if (!BaseUrlStr.Empty()){ // must be outside try-block (CBuilder3.0 bug)
    BaseUrl=TUrl::New(BaseUrlStr);}
  try {
    if (!BaseUrlStr.Empty()){
      EAssertR(BaseUrl->IsOk(), "");}
    if (IsAbs(RelUrlStr)){
      GetAbs(RelUrlStr);
    } else {
      GetAbsFromBase(RelUrlStr, BaseUrlStr);
    }
  }
  catch (PExcept&){Scheme=usUndef;}
  */
}
TUrl::~TUrl ( ) [inline]

Definition at line 27 of file url.h.

{}
TUrl::TUrl ( TSIn ) [inline]

Definition at line 28 of file url.h.

{Fail;}

Member Function Documentation

void TUrl::ChangeHttpRqStr ( const TStr SrcStr,
const TStr DstStr 
) [inline]

Definition at line 75 of file url.h.

                                                              {
    HttpRqStr.ChangeStr(SrcStr, DstStr);}
TStr TUrl::DecodeUrlStr ( const TStr UrlStr) [static]

Definition at line 386 of file url.cpp.

                                          {
  TChA InChA=UrlStr; TChA OutChA;
  for (int ChN=0; ChN<InChA.Len(); ChN++){
    char Ch=InChA[ChN];
    if (Ch=='+'){
      OutChA+=' ';
    } else if (Ch=='%') {
      ChN++; if (ChN==InChA.Len()) { break; }
      char FirstCh = InChA[ChN];
      if (!TCh::IsHex(FirstCh)) { break; }
      ChN++; if (ChN==InChA.Len()) { break; }
      char SecondCh = InChA[ChN];
      if (!TCh::IsHex(SecondCh)) { break; }
      OutChA+=char(TCh::GetHex(FirstCh)*16 + TCh::GetHex(SecondCh));
    } else {
      OutChA+=Ch;
    }
  }
  return OutChA;
}
void TUrl::DefFinalUrl ( const TStr _FinalHostNm)

Definition at line 284 of file url.cpp.

                                              {
  EAssert(IsOk(usHttp));
  EAssert(!IsDefFinalUrl());
  FinalHostNm=_FinalHostNm.GetLc();
  if (HostNm==FinalHostNm){
    FinalUrlStr=UrlStr;
  } else {
    TChA FinalUrlChA;
    FinalUrlChA+=SchemeNm; FinalUrlChA+="://";
    FinalUrlChA+=FinalHostNm;
    if (!PortStr.Empty()){
      FinalUrlChA+=":"; FinalUrlChA+=PortStr;}
    FinalUrlChA+=PathStr;
    FinalUrlChA+=SearchStr;
    FinalUrlStr=FinalUrlChA;
  }
}
void TUrl::DefUrlAsFinal ( ) [inline]

Definition at line 67 of file url.h.

static TStr TUrl::EncodeUrlStr ( const TStr Str) [inline, static]

Definition at line 91 of file url.h.

{return GetUrlSearchStr(Str);}
void TUrl::GetAbs ( const TStr AbsUrlStr) [private]

Definition at line 154 of file url.cpp.

                                      {
  EAssertR(IsAbs(AbsUrlStr), AbsUrlStr);
  TUrlLx Lx(AbsUrlStr); TChA Str;
  Str+=SchemeNm=Lx.GetScheme(); Str+=Lx.GetCh(':');
  if (SchemeNm=="http"){
    Scheme=usHttp;
    const char *DbSlashStr="//";
    Str+=Lx.GetStr(DbSlashStr);
    Str+=Lx.GetHostPort(HostNm, PortStr, PortN);
    if (PortN==-1){PortN=THttp::DfPortN; PortStr.Clr();}
    else if (PortN==THttp::DfPortN){PortStr.Clr();}
    //**if (!PortStr.Empty()){Str+=':'; Str+=PortStr;}
    if (Lx.PeekCh()=='/'){
      PathStr=Lx.GetCh('/'); PathStr+=Lx.GetHPath(PathSegV); Str+=PathStr;}
    if (PathStr.Empty()){PathStr="/"; Str+=PathStr;}
    if (Lx.PeekCh()=='?'){
      SearchStr=Lx.GetCh('?'); SearchStr+=Lx.GetSearch(); Str+=SearchStr;}
  } else {
    Scheme=usOther; Str+=Lx.GetToCh();
  }
  while (Lx.PeekCh()==' '){Lx.GetCh();}
  if (Lx.PeekCh()=='#'){
    FragIdStr=Lx.GetCh('#'); FragIdStr+=Lx.GetToCh();
  }
  EAssertR(Lx.Eof(), "");
  UrlStr=Str;
}
void TUrl::GetAbsFromBase ( const TStr RelUrlStr,
const TStr BaseUrlStr 
) [private]

Definition at line 182 of file url.cpp.

                                                                      {
  EAssertR(!BaseUrlStr.Empty(), "");
  PUrl Url=TUrl::New(BaseUrlStr); EAssertR(Url->IsOk(), "");
  EAssertR(IsAbs(BaseUrlStr), "");
  TStr AbsUrlStr=BaseUrlStr;
  TStr NrRelUrlStr=RelUrlStr;
  if (NrRelUrlStr.GetLc().IsPrefix(UrlHttpPrefixStr)){
    NrRelUrlStr.DelSubStr(0, UrlHttpPrefixStr.Len()-1);}
  if (NrRelUrlStr.Len()>0){
    if (NrRelUrlStr[0]=='/'){
      TStr SlashStr; int SlashChN=0;
      while ((SlashChN<NrRelUrlStr.Len())&&(NrRelUrlStr[SlashChN]=='/')){
        SlashChN++; SlashStr+="/";}
      int ChN=0; bool Found=false;
      while ((!Found)&&((ChN=AbsUrlStr.SearchStr(SlashStr, ChN))!=-1)){
        TStr Str=AbsUrlStr.GetSubStr(ChN-1, ChN+SlashStr.Len()-1+1);
        Found=((ChN==0)||(Str[0]!='/'))&&
         ((ChN+SlashStr.Len()-1==AbsUrlStr.Len()-1)||(Str[Str.Len()-1]!='/'));
        if (!Found){ChN++;}
      }
      if (Found){
        AbsUrlStr.DelSubStr(ChN, AbsUrlStr.Len()-1);
        AbsUrlStr+=NrRelUrlStr;
      }
    } else {
      int ChN=AbsUrlStr.Len()-1;
      while ((ChN>=0)&&(AbsUrlStr[ChN]!='/')){ChN--;}
      AbsUrlStr.DelSubStr(ChN+1, AbsUrlStr.Len()-1);
      AbsUrlStr+=NrRelUrlStr;
    }
  }

  const char *PrevDirStr="/../";
  {int ChN;
  while ((ChN=AbsUrlStr.SearchStr(PrevDirStr))!=-1){
    int BChN=ChN; int EChN=ChN+(int) strlen(PrevDirStr)-1;
    while ((BChN-1>=0)&&(AbsUrlStr[BChN-1]!='/')){BChN--;}
    AbsUrlStr.DelSubStr(BChN, EChN);
  }}

  const char *CurDirStr="/.";
  while (AbsUrlStr.DelStr(CurDirStr)){}

  GetAbs(AbsUrlStr);
}
TStr TUrl::GetAsFinalHostNm ( ) const [inline]

Definition at line 65 of file url.h.

                                {
    if (IsDefFinalUrl()){return FinalHostNm;} else {return HostNm;}}
TStr TUrl::GetAsFinalUrlStr ( ) const [inline]

Definition at line 61 of file url.h.

                                {
    if (IsDefFinalUrl()){return FinalUrlStr;} else {return UrlStr;}}
TStr TUrl::GetBaseUrlStr ( ) const [inline]

Definition at line 39 of file url.h.

{return BaseUrlStr;}
TStr TUrl::GetDmNm ( const int &  MxDmSegs = -1) const

Definition at line 269 of file url.cpp.

                                            {
  EAssert(IsOk());
  TChA DmChA; int DmSegs=0;
  for (int ChN=HostNm.Len()-1; ChN>=0; ChN--){
    if (HostNm[ChN]=='.'){
      DmSegs++;
      if (DmSegs==MxDmSegs){break;} else {DmChA+='.';}
    } else {
      DmChA+=HostNm[ChN];
    }
  }
  DmChA.Reverse();
  return DmChA;
}
TStr TUrl::GetDocStrFromUrlStr ( const TStr UrlStr,
const int &  Copies = 1 
) [static]

Definition at line 407 of file url.cpp.

                                                                   {
  TStrV StrV; UrlStr.SplitOnNonAlNum(StrV);
  TChA DocChA;
  for (int StrN=0; StrN<StrV.Len(); StrN++){
    TStr UcStr=StrV[StrN].GetUc();
    if ((UcStr.Len()>3)&&(UcStr!="HTTP")&&(UcStr!="HTML")&&(UcStr!="INDEX")&&(UcStr!="DEFAULT")){
      for (int CopyN=0; CopyN<Copies; CopyN++){
        if (!DocChA.Empty()){DocChA+=' ';} DocChA+=StrV[StrN];
      }
    }
  }
  return DocChA;
}
TStr TUrl::GetFinalHostNm ( ) const [inline]

Definition at line 63 of file url.h.

TStr TUrl::GetFinalUrlStr ( ) const [inline]

Definition at line 59 of file url.h.

TStr TUrl::GetFragIdStr ( ) const [inline]

Definition at line 50 of file url.h.

{EAssert(IsOk()); return FragIdStr;}
TStr TUrl::GetHostNm ( ) const [inline]

Definition at line 41 of file url.h.

{EAssert(IsOk()); return HostNm;}
TStr TUrl::GetHttpRqStr ( ) const [inline]

Definition at line 73 of file url.h.

{return HttpRqStr;}
TStr TUrl::GetIpNum ( ) const [inline]

Definition at line 54 of file url.h.

{EAssert(IsIpNum()); return IpNum;}
TStr TUrl::GetIpNumOrHostNm ( ) const [inline]

Definition at line 55 of file url.h.

{return IsIpNum() ? GetIpNum() : GetHostNm();}
TStr TUrl::GetPathSeg ( const int &  PathSegN) const [inline]

Definition at line 48 of file url.h.

{return PathSegV[PathSegN];}
int TUrl::GetPathSegs ( ) const [inline]

Definition at line 47 of file url.h.

{return PathSegV.Len();}
TStr TUrl::GetPathStr ( ) const [inline]

Definition at line 46 of file url.h.

{EAssert(IsOk()); return PathStr;}
int TUrl::GetPortN ( ) const [inline]

Definition at line 45 of file url.h.

{EAssert(IsOk()&&(PortN!=-1)); return PortN;}
TStr TUrl::GetPortStr ( ) const [inline]

Definition at line 44 of file url.h.

{EAssert(IsOk()); return PortStr;}
TStr TUrl::GetRelUrlStr ( ) const [inline]

Definition at line 37 of file url.h.

{return RelUrlStr;}

Definition at line 35 of file url.h.

{return Scheme;}
TStr TUrl::GetSchemeNm ( ) const [inline]

Definition at line 40 of file url.h.

{EAssert(IsOk()); return SchemeNm;}
TStr TUrl::GetSearchStr ( ) const [inline]

Definition at line 49 of file url.h.

{EAssert(IsOk()); return SearchStr;}
TStr TUrl::GetTopDownDocNm ( const TStr UrlStr,
const int &  MxLen = -1,
const bool &  HostOnlyP = false 
) [static]

Definition at line 421 of file url.cpp.

                                                             {
  PUrl Url=TUrl::New(UrlStr);
  TChA DocNm;
  if (Url->IsOk()){
    TStr HostNm=Url->GetHostNm().GetLc();
    TStrV HostNmSegV; HostNm.SplitOnAllCh('.', HostNmSegV, false);
    for (int HostNmSegN=0; HostNmSegN<HostNmSegV.Len(); HostNmSegN++){
      if (HostNmSegN>0){DocNm+='.';}
      DocNm+=HostNmSegV[HostNmSegV.Len()-HostNmSegN-1];
    }
    if (!HostOnlyP){
      DocNm+=Url->GetPathStr().GetLc();
    }
  } else {
    DocNm=UrlStr.GetLc();
  }
  if (MxLen!=-1){
    DocNm.Trunc(MxLen);}
  return DocNm;
}
PUrl TUrl::GetUrlFromShortcut ( const TStr ShortcutUrlStr,
const TStr DfHostNmPrefix,
const TStr DfHostNmSufix 
) [static]

Definition at line 343 of file url.cpp.

                                                       {
  // shortcut is already correct url
  TStr UrlStr=ShortcutUrlStr;
  PUrl Url=TUrl::New(UrlStr);
  if (Url->IsOk()){return Url;}
  // add 'http://' to shortcut (if shortcut is from more segments)
  if (ShortcutUrlStr.IsChIn('.')){
    UrlStr=TUrl::UrlHttpAbsPrefixStr+ShortcutUrlStr;
    Url=TUrl::New(UrlStr);
    if (Url->IsOk()){return Url;}
  }
  // add 'http://' and '/' to shortcut (if shortcut is from more segments)
  if (ShortcutUrlStr.IsChIn('.')){
    UrlStr=TUrl::UrlHttpAbsPrefixStr+ShortcutUrlStr+"/";
    Url=TUrl::New(UrlStr);
    if (Url->IsOk()){return Url;}
  }
  // add 'http://', prefix, postfix and '/' to shortcut
  UrlStr=UrlHttpAbsPrefixStr+
   DfHostNmPrefix+"."+ShortcutUrlStr+"."+DfHostNmSufix+"/";
  Url=TUrl::New(UrlStr);
  return Url;
}
TStr TUrl::GetUrlSearchStr ( const TStr Str) [static]

Definition at line 368 of file url.cpp.

                                         {
  TChA InChA=Str; TChA OutChA;
  for (int ChN=0; ChN<InChA.Len(); ChN++){
    char Ch=InChA[ChN];
    if (Ch==' '){
      OutChA+='+';
    } else
    if ((' '<Ch)&&(Ch<='~')&&(Ch!='+')&&(Ch!='&')&&(Ch!='%')){
      OutChA+=Ch;
    } else {
      OutChA+='%';
      OutChA+=TInt::GetHexStr(uchar(Ch)/16);
      OutChA+=TInt::GetHexStr(uchar(Ch)%16);
    }
  }
  return OutChA;
}
TStr TUrl::GetUrlStr ( ) const [inline]

Definition at line 36 of file url.h.

{return UrlStr;}
bool TUrl::IsAbs ( const TStr UrlStr) [static]

Definition at line 324 of file url.cpp.

                                  {
  if (UrlStr.GetLc().IsPrefix(UrlHttpPrefixStr)){
    return UrlStr.GetLc().IsPrefix(UrlHttpAbsPrefixStr);
  } else {
    int ColonChN=UrlStr.SearchCh(':'); int SlashChN=UrlStr.SearchCh('/');
    return (ColonChN!=-1)&&((SlashChN==-1)||((SlashChN!=-1)&&(ColonChN<SlashChN)));
  }
}
bool TUrl::IsBaseUrl ( ) [inline]

Definition at line 38 of file url.h.

{return !BaseUrlStr.Empty();}
bool TUrl::IsDefFinalUrl ( ) const [inline]

Definition at line 57 of file url.h.

                             {
    EAssert(IsOk(usHttp)); return !FinalUrlStr.Empty();}
bool TUrl::IsHttpRqStr ( ) const [inline]

Definition at line 74 of file url.h.

{return !HttpRqStr.Empty();}
bool TUrl::IsInHost ( const TStr _HostNm) const [inline]

Definition at line 78 of file url.h.

                                           {
    EAssert(IsOk()); return HostNm.GetUc().IsSuffix(_HostNm.GetUc());}
bool TUrl::IsInPath ( const TStr _PathStr) const [inline]

Definition at line 80 of file url.h.

                                            {
    EAssert(IsOk()); return PathStr.GetUc().IsPrefix(_PathStr.GetUc());}
bool TUrl::IsIpNum ( ) const [inline]

Definition at line 52 of file url.h.

{return !IpNum.Empty();}
bool TUrl::IsOk ( const TUrlScheme  _Scheme = usUndef) const [inline]

Definition at line 32 of file url.h.

                                                    {
    if (_Scheme==usUndef){return Scheme!=usUndef;}
    else {return Scheme==_Scheme;}}
bool TUrl::IsPortOk ( ) const [inline]

Definition at line 43 of file url.h.

{ EAssert(IsOk()); return (PortN > 0); }
bool TUrl::IsScript ( const TStr UrlStr) [static]

Definition at line 333 of file url.cpp.

                                     {
  return UrlStr.IsChIn('?');
}
bool TUrl::IsSite ( const TStr UrlStr) [static]

Definition at line 337 of file url.cpp.

                                   {
  PUrl Url=TUrl::New(UrlStr);
  return Url->IsOk(usHttp) && (Url->GetPathStr()=="/") &&
   Url->GetSearchStr().Empty() && Url->GetFragIdStr().Empty();
}
static PUrl TUrl::Load ( TSIn ) [inline, static]

Definition at line 29 of file url.h.

{Fail; return NULL;}
static PUrl TUrl::New ( const TStr RelUrlStr,
const TStr BaseUrlStr = TStr() 
) [inline, static]

Definition at line 25 of file url.h.

                                                                       {
    return PUrl(new TUrl(RelUrlStr, BaseUrlStr));}
void TUrl::PutHttpRqStr ( const TStr _HttpRqStr) [inline]

Definition at line 72 of file url.h.

{HttpRqStr=_HttpRqStr;}
void TUrl::PutIpNum ( const TStr _IpNum) [inline]

Definition at line 53 of file url.h.

{IpNum=_IpNum;}
void TUrl::Save ( TSOut ) [inline]

Definition at line 30 of file url.h.

{Fail;}
void TUrl::ToLcPath ( )

Definition at line 302 of file url.cpp.

                   {
  // test if the conversion is needed
  if (!PathStr.IsLc()){
    // convert path strings to lower-case
    PathStr.ToLc();
    for (int PathSegN=0; PathSegN<PathSegV.Len(); PathSegN++){
      PathSegV[PathSegN].ToLc();}
    // recompose url
    TChA UrlChA;
    UrlChA+=SchemeNm; UrlChA+="://";
    UrlChA+=HostNm;
    if (!PortStr.Empty()){
      UrlChA+=":"; UrlChA+=PortStr;}
    UrlChA+=PathStr;
    UrlChA+=SearchStr;
    UrlStr=UrlChA;
    // recompose final-url
    if (IsDefFinalUrl()){
      FinalUrlStr.Clr(); DefFinalUrl(FinalHostNm);}
  }
}

Friends And Related Function Documentation

friend class TPt< TUrl > [friend]

Definition at line 7 of file url.h.


Member Data Documentation

Definition at line 12 of file url.h.

TCRef TUrl::CRef [private]

Definition at line 7 of file url.h.

Definition at line 18 of file url.h.

Definition at line 18 of file url.h.

TStr TUrl::FragIdStr [private]

Definition at line 14 of file url.h.

TStr TUrl::HostNm [private]

Definition at line 13 of file url.h.

TStr TUrl::HttpRqStr [private]

Definition at line 19 of file url.h.

TStr TUrl::IpNum [private]

Definition at line 17 of file url.h.

TStrV TUrl::PathSegV [private]

Definition at line 16 of file url.h.

TStr TUrl::PathStr [private]

Definition at line 14 of file url.h.

int TUrl::PortN [private]

Definition at line 15 of file url.h.

TStr TUrl::PortStr [private]

Definition at line 14 of file url.h.

TStr TUrl::RelUrlStr [private]

Definition at line 12 of file url.h.

Definition at line 11 of file url.h.

TStr TUrl::SchemeNm [private]

Definition at line 13 of file url.h.

TStr TUrl::SearchStr [private]

Definition at line 14 of file url.h.

const TStr TUrl::UrlHttpAbsPrefixStr = "http://" [static, private]

Definition at line 10 of file url.h.

const TStr TUrl::UrlHttpPrefixStr = "http:" [static, private]

Definition at line 9 of file url.h.

TStr TUrl::UrlStr [private]

Definition at line 12 of file url.h.


The documentation for this class was generated from the following files: