SNAP Library , Developer Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
url.h
Go to the documentation of this file.
00001 #include "bd.h"
00002 
00004 // Url
00005 typedef enum {usUndef, usHttp, usOther} TUrlScheme;
00006 
00007 ClassTPV(TUrl, PUrl, TUrlV)//{
00008 private:
00009   static const TStr UrlHttpPrefixStr;
00010   static const TStr UrlHttpAbsPrefixStr;
00011   TUrlScheme Scheme;
00012   TStr UrlStr, RelUrlStr, BaseUrlStr;
00013   TStr SchemeNm, HostNm;
00014   TStr PortStr, PathStr, SearchStr, FragIdStr;
00015   int PortN;
00016   TStrV PathSegV;
00017   TStr IpNum;
00018   TStr FinalUrlStr, FinalHostNm;
00019   TStr HttpRqStr;
00020   void GetAbs(const TStr& AbsUrlStr);
00021   void GetAbsFromBase(const TStr& RelUrlStr, const TStr& BaseUrlStr);
00022   UndefDefaultCopyAssign(TUrl);
00023 public:
00024   TUrl(const TStr& _RelUrlStr, const TStr& _BaseUrlStr=TStr());
00025   static PUrl New(const TStr& RelUrlStr, const TStr& BaseUrlStr=TStr()){
00026     return PUrl(new TUrl(RelUrlStr, BaseUrlStr));}
00027   ~TUrl(){}
00028   TUrl(TSIn&){Fail;}
00029   static PUrl Load(TSIn&){Fail; return NULL;}
00030   void Save(TSOut&){Fail;}
00031 
00032   bool IsOk(const TUrlScheme _Scheme=usUndef) const {
00033     if (_Scheme==usUndef){return Scheme!=usUndef;}
00034     else {return Scheme==_Scheme;}}
00035   TUrlScheme GetScheme(){return Scheme;}
00036   TStr GetUrlStr() const {return UrlStr;}
00037   TStr GetRelUrlStr() const {return RelUrlStr;}
00038   bool IsBaseUrl(){return !BaseUrlStr.Empty();}
00039   TStr GetBaseUrlStr() const {return BaseUrlStr;}
00040   TStr GetSchemeNm() const {EAssert(IsOk()); return SchemeNm;}
00041   TStr GetHostNm() const {EAssert(IsOk()); return HostNm;}
00042   TStr GetDmNm(const int& MxDmSegs=-1) const;
00043   bool IsPortOk() const { EAssert(IsOk()); return (PortN > 0); }
00044   TStr GetPortStr() const {EAssert(IsOk()); return PortStr;}
00045   int GetPortN() const {EAssert(IsOk()&&(PortN!=-1)); return PortN;}
00046   TStr GetPathStr() const {EAssert(IsOk()); return PathStr;}
00047   int GetPathSegs() const {return PathSegV.Len();}
00048   TStr GetPathSeg(const int& PathSegN) const {return PathSegV[PathSegN];}
00049   TStr GetSearchStr() const {EAssert(IsOk()); return SearchStr;}
00050   TStr GetFragIdStr() const {EAssert(IsOk()); return FragIdStr;}
00051 
00052   bool IsIpNum() const {return !IpNum.Empty();}
00053   void PutIpNum(const TStr& _IpNum){IpNum=_IpNum;}
00054   TStr GetIpNum() const {EAssert(IsIpNum()); return IpNum;}
00055   TStr GetIpNumOrHostNm() const {return IsIpNum() ? GetIpNum() : GetHostNm();}
00056 
00057   bool IsDefFinalUrl() const {
00058     EAssert(IsOk(usHttp)); return !FinalUrlStr.Empty();}
00059   TStr GetFinalUrlStr() const {
00060     EAssert(IsDefFinalUrl()); return FinalUrlStr;}
00061   TStr GetAsFinalUrlStr() const {
00062     if (IsDefFinalUrl()){return FinalUrlStr;} else {return UrlStr;}}
00063   TStr GetFinalHostNm() const {
00064     EAssert(IsDefFinalUrl()); return FinalHostNm;}
00065   TStr GetAsFinalHostNm() const {
00066     if (IsDefFinalUrl()){return FinalHostNm;} else {return HostNm;}}
00067   void DefUrlAsFinal(){
00068     EAssert(IsOk(usHttp)); EAssert(!IsDefFinalUrl());
00069     FinalUrlStr=UrlStr; FinalHostNm=HostNm;}
00070   void DefFinalUrl(const TStr& _FinalHostNm);
00071 
00072   void PutHttpRqStr(const TStr& _HttpRqStr){HttpRqStr=_HttpRqStr;}
00073   TStr GetHttpRqStr() const {return HttpRqStr;}
00074   bool IsHttpRqStr() const {return !HttpRqStr.Empty();}
00075   void ChangeHttpRqStr(const TStr& SrcStr, const TStr& DstStr){
00076     HttpRqStr.ChangeStr(SrcStr, DstStr);}
00077 
00078   bool IsInHost(const TStr& _HostNm) const {
00079     EAssert(IsOk()); return HostNm.GetUc().IsSuffix(_HostNm.GetUc());}
00080   bool IsInPath(const TStr& _PathStr) const {
00081     EAssert(IsOk()); return PathStr.GetUc().IsPrefix(_PathStr.GetUc());}
00082   void ToLcPath();
00083 
00084   static bool IsAbs(const TStr& UrlStr);
00085   static bool IsScript(const TStr& UrlStr);
00086   static bool IsSite(const TStr& UrlStr);
00087 
00088   static PUrl GetUrlFromShortcut(const TStr& ShortcutUrlStr,
00089    const TStr& DfHostNmPrefix, const TStr& DfHostNmSufix);
00090   static TStr GetUrlSearchStr(const TStr& Str);
00091   static TStr DecodeUrlStr(const TStr& UrlStr);
00092   static TStr GetDocStrFromUrlStr(const TStr& UrlStr, const int& Copies=1);
00093   static TStr GetTopDownDocNm(
00094    const TStr& UrlStr, const int& MxLen=-1, const bool& HostOnlyP=false);
00095 };
00096 typedef TPair<TInt, PUrl> TIdUrlPr;
00097 typedef TQQueue<TIdUrlPr> TIdUrlPrQ;
00098 typedef THash<TInt, PUrl> TIdToUrlH;
00099 
00101 // Url-Environment
00102 ClassTP(TUrlEnv, PUrlEnv)//{
00103 private:
00104   TStr BaseUrlStr;
00105   TStrV KeyNmV;
00106   TStrStrVH KeyNmToValH;
00107 public:
00108   TUrlEnv():
00109     KeyNmV(), KeyNmToValH(10){}
00110   TUrlEnv(const TUrlEnv& UrlEnv):
00111     KeyNmV(UrlEnv.KeyNmV), KeyNmToValH(UrlEnv.KeyNmToValH){}
00112   static PUrlEnv New(){return new TUrlEnv();}
00113   static PUrlEnv New(const TStr& BaseUrlStr,
00114    const TStr& KeyNm1=TStr(), const TStr& ValStr1=TStr(),
00115    const TStr& KeyNm2=TStr(), const TStr& ValStr2=TStr(),
00116    const TStr& KeyNm3=TStr(), const TStr& ValStr3=TStr(),
00117    const TStr& KeyNm4=TStr(), const TStr& ValStr4=TStr()){
00118     PUrlEnv UrlEnv=New();
00119     UrlEnv->PutBaseUrlStr(BaseUrlStr);
00120     if (!KeyNm1.Empty()){UrlEnv->AddKeyVal(KeyNm1, ValStr1);}
00121     if (!KeyNm2.Empty()){UrlEnv->AddKeyVal(KeyNm2, ValStr2);}
00122     if (!KeyNm3.Empty()){UrlEnv->AddKeyVal(KeyNm3, ValStr3);}
00123     if (!KeyNm4.Empty()){UrlEnv->AddKeyVal(KeyNm4, ValStr4);}
00124     return UrlEnv;}
00125   ~TUrlEnv(){}
00126   TUrlEnv(TSIn& SIn): KeyNmV(SIn), KeyNmToValH(SIn){}
00127   static PUrlEnv Load(TSIn& SIn){return new TUrlEnv(SIn);}
00128   void Save(TSOut& SOut){KeyNmV.Save(SOut); KeyNmToValH.Save(SOut);}
00129 
00130   TUrlEnv& operator=(const TUrlEnv& Env){
00131     if (this!=&Env){KeyNmV=Env.KeyNmV; KeyNmToValH=Env.KeyNmToValH;}
00132     return *this;}
00133 
00134   // base url
00135   void PutBaseUrlStr(const TStr& _BaseUrlStr){BaseUrlStr=_BaseUrlStr;}
00136   TStr GetBaseUrlStr() const {return BaseUrlStr;}
00137 
00138   // adding key-value
00139   void AddKeyVal(const TStr& KeyNm, const TStr& ValStr){
00140     if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);}
00141     KeyNmToValH.GetDat(KeyNm).Clr();
00142     KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
00143   void AddToKeyVal(const TStr& KeyNm, const TStr& ValStr){
00144     if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);}
00145     KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
00146 
00147   // key retrieval
00148   bool Empty() const {return KeyNmV.Empty();}
00149   int GetKeys() const {return KeyNmV.Len();}
00150   bool IsKey(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm)!=-1;}
00151   int GetKeyN(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm);}
00152   TStr GetKeyNm(const int& KeyN) const {return KeyNmV[KeyN];}
00153 
00154   // value retrieval
00155   int GetVals(const int& KeyN) const {
00156     return KeyNmToValH.GetDat(KeyNmV[KeyN]).Len();}
00157   int GetVals(const TStr& KeyNm) const {
00158     return KeyNmToValH.GetDat(KeyNm).Len();}
00159   TStr GetVal(const int& KeyN, const int& ValN=0) const {
00160     return KeyNmToValH.GetDat(KeyNmV[KeyN])[ValN];}
00161   TStr GetVal(const TStr& KeyNm, const int& ValN=0, const TStr& DfVal="") const {
00162     if (KeyNmToValH.IsKey(KeyNm)){
00163       return KeyNmToValH.GetDat(KeyNm)[ValN];}
00164     else {return DfVal;}}
00165 
00166   // full-url-string
00167   TStr GetFullUrlStr() const;
00168 
00169   static PUrlEnv MkClone(const PUrlEnv& UrlEnv);
00170 };
00171