SNAP Library 2.0, Developer Reference  2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
url.h
Go to the documentation of this file.
00001 #include "bd.h"
00002 
00004 // Url
00005 typedef enum {usUndef, usHttp, usOther} TUrlScheme;
00006 
00007 ClassTPV(TUrl, PUrl, TUrlV)//{
00008 private:
00009   static const TStr UrlHttpPrefixStr;
00010   static const TStr UrlHttpAbsPrefixStr;
00011   TUrlScheme Scheme;
00012   TStr UrlStr, RelUrlStr, BaseUrlStr;
00013   TStr SchemeNm, HostNm;
00014   TStr PortStr, PathStr, SearchStr, FragIdStr;
00015   int PortN;
00016   TStrV PathSegV;
00017   TStr IpNum;
00018   TStr FinalUrlStr, FinalHostNm;
00019   TStr HttpRqStr;
00020   void GetAbs(const TStr& AbsUrlStr);
00021   void GetAbsFromBase(const TStr& RelUrlStr, const TStr& BaseUrlStr);
00022   UndefDefaultCopyAssign(TUrl);
00023 public:
00024   TUrl(const TStr& _RelUrlStr, const TStr& _BaseUrlStr=TStr());
00025   static PUrl New(const TStr& RelUrlStr, const TStr& BaseUrlStr=TStr()){
00026     return PUrl(new TUrl(RelUrlStr, BaseUrlStr));}
00027   ~TUrl(){}
00028   TUrl(TSIn&){Fail;}
00029   static PUrl Load(TSIn&){Fail; return NULL;}
00030   void Save(TSOut&){Fail;}
00031 
00032   bool IsOk(const TUrlScheme _Scheme=usUndef) const {
00033     if (_Scheme==usUndef){return Scheme!=usUndef;}
00034     else {return Scheme==_Scheme;}}
00035   TUrlScheme GetScheme(){return Scheme;}
00036   TStr GetUrlStr() const {return UrlStr;}
00037   TStr GetRelUrlStr() const {return RelUrlStr;}
00038   bool IsBaseUrl(){return !BaseUrlStr.Empty();}
00039   TStr GetBaseUrlStr() const {return BaseUrlStr;}
00040   TStr GetSchemeNm() const {EAssert(IsOk()); return SchemeNm;}
00041   TStr GetHostNm() const {EAssert(IsOk()); return HostNm;}
00042   TStr GetDmNm(const int& MxDmSegs=-1) const;
00043   bool IsPortOk() const { EAssert(IsOk()); return (PortN > 0); }
00044   TStr GetPortStr() const {EAssert(IsOk()); return PortStr;}
00045   int GetPortN() const {EAssert(IsOk()&&(PortN!=-1)); return PortN;}
00046   TStr GetPathStr() const {EAssert(IsOk()); return PathStr;}
00047   int GetPathSegs() const {return PathSegV.Len();}
00048   TStr GetPathSeg(const int& PathSegN) const {return PathSegV[PathSegN];}
00049   TStr GetSearchStr() const {EAssert(IsOk()); return SearchStr;}
00050   TStr GetFragIdStr() const {EAssert(IsOk()); return FragIdStr;}
00051 
00052   bool IsIpNum() const {return !IpNum.Empty();}
00053   void PutIpNum(const TStr& _IpNum){IpNum=_IpNum;}
00054   TStr GetIpNum() const {EAssert(IsIpNum()); return IpNum;}
00055   TStr GetIpNumOrHostNm() const {return IsIpNum() ? GetIpNum() : GetHostNm();}
00056 
00057   bool IsDefFinalUrl() const {
00058     EAssert(IsOk(usHttp)); return !FinalUrlStr.Empty();}
00059   TStr GetFinalUrlStr() const {
00060     EAssert(IsDefFinalUrl()); return FinalUrlStr;}
00061   TStr GetAsFinalUrlStr() const {
00062     if (IsDefFinalUrl()){return FinalUrlStr;} else {return UrlStr;}}
00063   TStr GetFinalHostNm() const {
00064     EAssert(IsDefFinalUrl()); return FinalHostNm;}
00065   TStr GetAsFinalHostNm() const {
00066     if (IsDefFinalUrl()){return FinalHostNm;} else {return HostNm;}}
00067   void DefUrlAsFinal(){
00068     EAssert(IsOk(usHttp)); EAssert(!IsDefFinalUrl());
00069     FinalUrlStr=UrlStr; FinalHostNm=HostNm;}
00070   void DefFinalUrl(const TStr& _FinalHostNm);
00071 
00072   void PutHttpRqStr(const TStr& _HttpRqStr){HttpRqStr=_HttpRqStr;}
00073   TStr GetHttpRqStr() const {return HttpRqStr;}
00074   bool IsHttpRqStr() const {return !HttpRqStr.Empty();}
00075   void ChangeHttpRqStr(const TStr& SrcStr, const TStr& DstStr){
00076     HttpRqStr.ChangeStr(SrcStr, DstStr);}
00077 
00078   bool IsInHost(const TStr& _HostNm) const {
00079     EAssert(IsOk()); return HostNm.GetUc().IsSuffix(_HostNm.GetUc());}
00080   bool IsInPath(const TStr& _PathStr) const {
00081     EAssert(IsOk()); return PathStr.GetUc().IsPrefix(_PathStr.GetUc());}
00082   void ToLcPath();
00083 
00084   static bool IsAbs(const TStr& UrlStr);
00085   static bool IsScript(const TStr& UrlStr);
00086   static bool IsSite(const TStr& UrlStr);
00087 
00088   static PUrl GetUrlFromShortcut(const TStr& ShortcutUrlStr,
00089    const TStr& DfHostNmPrefix, const TStr& DfHostNmSufix);
00090   static TStr GetUrlSearchStr(const TStr& Str);
00091   static TStr EncodeUrlStr(const TStr& Str){return GetUrlSearchStr(Str);}
00092   static TStr DecodeUrlStr(const TStr& UrlStr);
00093   static TStr GetDocStrFromUrlStr(const TStr& UrlStr, const int& Copies=1);
00094   static TStr GetTopDownDocNm(
00095    const TStr& UrlStr, const int& MxLen=-1, const bool& HostOnlyP=false);
00096 };
00097 typedef TPair<TInt, PUrl> TIdUrlPr;
00098 typedef TQQueue<TIdUrlPr> TIdUrlPrQ;
00099 typedef THash<TInt, PUrl> TIdToUrlH;
00100 
00102 // Url-Environment
00103 ClassTP(TUrlEnv, PUrlEnv)//{
00104 private:
00105   TStr BaseUrlStr;
00106   TStrV KeyNmV;
00107   TStrStrVH KeyNmToValH;
00108 public:
00109   TUrlEnv():
00110     KeyNmV(), KeyNmToValH(10){}
00111   TUrlEnv(const TUrlEnv& UrlEnv):
00112     KeyNmV(UrlEnv.KeyNmV), KeyNmToValH(UrlEnv.KeyNmToValH){}
00113   static PUrlEnv New(){return new TUrlEnv();}
00114   static PUrlEnv New(const TStr& BaseUrlStr,
00115    const TStr& KeyNm1=TStr(), const TStr& ValStr1=TStr(),
00116    const TStr& KeyNm2=TStr(), const TStr& ValStr2=TStr(),
00117    const TStr& KeyNm3=TStr(), const TStr& ValStr3=TStr(),
00118    const TStr& KeyNm4=TStr(), const TStr& ValStr4=TStr()){
00119     PUrlEnv UrlEnv=New();
00120     UrlEnv->PutBaseUrlStr(BaseUrlStr);
00121     if (!KeyNm1.Empty()){UrlEnv->AddKeyVal(KeyNm1, ValStr1);}
00122     if (!KeyNm2.Empty()){UrlEnv->AddKeyVal(KeyNm2, ValStr2);}
00123     if (!KeyNm3.Empty()){UrlEnv->AddKeyVal(KeyNm3, ValStr3);}
00124     if (!KeyNm4.Empty()){UrlEnv->AddKeyVal(KeyNm4, ValStr4);}
00125     return UrlEnv;}
00126   ~TUrlEnv(){}
00127   TUrlEnv(TSIn& SIn): KeyNmV(SIn), KeyNmToValH(SIn){}
00128   static PUrlEnv Load(TSIn& SIn){return new TUrlEnv(SIn);}
00129   void Save(TSOut& SOut){KeyNmV.Save(SOut); KeyNmToValH.Save(SOut);}
00130 
00131   TUrlEnv& operator=(const TUrlEnv& Env){
00132     if (this!=&Env){KeyNmV=Env.KeyNmV; KeyNmToValH=Env.KeyNmToValH;}
00133     return *this;}
00134 
00135   // base url
00136   void PutBaseUrlStr(const TStr& _BaseUrlStr){BaseUrlStr=_BaseUrlStr;}
00137   TStr GetBaseUrlStr() const {return BaseUrlStr;}
00138 
00139   // adding key-value
00140   void AddKeyVal(const TStr& KeyNm, const TStr& ValStr){
00141     if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);}
00142     KeyNmToValH.GetDat(KeyNm).Clr();
00143     KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
00144   void AddToKeyVal(const TStr& KeyNm, const TStr& ValStr){
00145     if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);}
00146     KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
00147 
00148   // key retrieval
00149   bool Empty() const {return KeyNmV.Empty();}
00150   int GetKeys() const {return KeyNmV.Len();}
00151   bool IsKey(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm)!=-1;}
00152   int GetKeyN(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm);}
00153   TStr GetKeyNm(const int& KeyN) const {return KeyNmV[KeyN];}
00154 
00155   // value retrieval
00156   int GetVals(const int& KeyN) const {
00157     return KeyNmToValH.GetDat(KeyNmV[KeyN]).Len();}
00158   int GetVals(const TStr& KeyNm) const {
00159     return KeyNmToValH.GetDat(KeyNm).Len();}
00160   TStr GetVal(const int& KeyN, const int& ValN=0) const {
00161     return KeyNmToValH.GetDat(KeyNmV[KeyN])[ValN];}
00162   TStr GetVal(const TStr& KeyNm, const int& ValN=0, const TStr& DfVal="") const {
00163     if (KeyNmToValH.IsKey(KeyNm)){
00164       return KeyNmToValH.GetDat(KeyNm)[ValN];}
00165     else {return DfVal;}}
00166 
00167   // full-url-string
00168   TStr GetFullUrlStr() const;
00169 
00170   static PUrlEnv MkClone(const PUrlEnv& UrlEnv);
00171 };
00172