SNAP Library 4.0, Developer Reference
2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
String helper functions and utilities. Quick and ditry! More...
#include <util.h>
Static Public Member Functions | |
static TChA & | GetXmlTagVal (TXmlLx &XmlLx, const TChA &TagNm) |
static void | GetXmlTagNmVal (TXmlLx &XmlLx, TChA &TagNm, TChA &TagVal) |
static bool | GetXmlTagNmVal2 (TXmlLx &XmlLx, TChA &TagNm, TChA &TagVal, const bool &TakeTagNms) |
static TChA | GetDomNm (const TChA &UrlChA) |
static TChA | GetDomNm2 (const TChA &UrlChA) |
static TChA | GetWebsiteNm (const TChA &UrlChA) |
static bool | GetNormalizedUrl (const TChA &UrlIn, const TChA &BaseUrl, TChA &UrlOut) |
Quick URL nomalization: Remove ending /, /index.html, etc. and strip starting www. More... | |
static bool | StripEnd (const TChA &Str, const TChA &SearchStr, TChA &NewStr) |
static TChA | GetShorStr (const TChA &LongStr, const int MaxLen=50) |
static TChA | GetCleanStr (const TChA &ChA) |
static TChA | GetCleanWrdStr (const TChA &ChA) |
static int | CountWords (const char *CStr) |
static int | CountWords (const TChA &ChA) |
static int | CountWords (const TChA &ChA, const TStrHash< TInt > &StopWordH) |
static int | SplitWords (TChA &ChA, TVec< char * > &WrdV, const bool &SplitOnWs=true) |
static int | SplitOnCh (TChA &ChA, TVec< char * > &WrdV, const char &Ch, const bool &SkipEmpty=false) |
static int | SplitLines (TChA &ChA, TVec< char * > &LineV, const bool &SkipEmpty=false) |
static int | SplitSentences (TChA &ChA, TVec< char * > &SentenceV) |
static void | RemoveHtmlTags (const TChA &HtmlStr, TChA &TextStr) |
static bool | IsLatinStr (const TChA &Str, const double &MinAlFrac) |
static void | GetWIdV (const TStrHash< TInt > &StrH, const char *CStr, TIntV &WIdV) |
static void | GetAddWIdV (TStrHash< TInt > &StrH, const char *CStr, TIntV &WIdV) |
static bool | GetTmFromStr (const char *TmStr, TSecTm &Tm) |
Parses time in many different text formats. See source code for details. More... | |
static TStr | GetStdName (TStr AuthorName) |
Puts person's name (fist middle last) in a standard form: <last_name>_<first name innitial> More... | |
static void | GetStdNameV (TStr AuthorNames, TStrV &StdNameV) |
Splits a list of people's names. More... | |
|
static |
Definition at line 393 of file util.cpp.
References TCh::IsWs().
Referenced by CountWords().
|
static |
Definition at line 389 of file util.cpp.
References CountWords(), and TChA::CStr().
Definition at line 401 of file util.cpp.
References TStrHash< TDat, TStringPool, THashFunc >::IsKey(), TVec< TVal, TSizeTy >::Len(), and SplitWords().
Definition at line 552 of file util.cpp.
Definition at line 372 of file util.cpp.
References TChA::AddCh(), TChA::CStr(), TCh::IsAlNum(), TCh::IsWs(), and TChA::Len().
Definition at line 350 of file util.cpp.
References TChA::AddCh(), TChA::CStr(), TCh::IsAlNum(), and TChA::Len().
Definition at line 187 of file util.cpp.
References TChA::GetSubStr(), TChA::IsPrefix(), TChA::Len(), TChA::SearchCh(), TChA::SearchChBack(), and TChA::ToLc().
Referenced by GetDomNm2().
Definition at line 201 of file util.cpp.
References GetDomNm(), TChA::GetSubStr(), TChA::IsPrefix(), and TInt::Mx.
Referenced by GetWebsiteNm().
Quick URL nomalization: Remove ending /, /index.html, etc. and strip starting www.
Definition at line 306 of file util.cpp.
References TChA::AddCh(), TChA::Empty(), TChA::GetSubStr(), TChA::IsPrefix(), TInt::Mx, StripEnd(), and TChA::ToLc().
Definition at line 342 of file util.cpp.
References TChA::GetSubStr(), and TChA::Len().
Splits a list of people's names.
Definition at line 664 of file util.cpp.
|
static |
Parses time in many different text formats. See source code for details.
Definition at line 571 of file util.cpp.
Definition at line 218 of file util.cpp.
References GetDomNm2(), GetNthOccurence(), TChA::GetSubStr(), TChA::IsPrefix(), TChA::IsSuffix(), TChA::Len(), TChA::SearchCh(), and TChA::SearchStr().
Definition at line 538 of file util.cpp.
Definition at line 149 of file util.cpp.
References EAssertR, TXmlLx::GetSym(), TXmlLx::TagNm, TXmlLx::TxtChA, xsyETag, xsySTag, and xsyStr.
|
static |
Definition at line 163 of file util.cpp.
References TChA::Clr(), TChA::CStr(), TXmlLx::GetSym(), TXmlLx::Sym, TXmlLx::TagNm, TXmlLx::TxtChA, xsyETag, xsySTag, and xsyStr.
Definition at line 132 of file util.cpp.
References TStr::CStr(), EAssertR, TXmlLx::GetSym(), TXmlLx::TagNm, TXmlLx::TxtChA, xsyETag, xsySTag, and xsyStr.
|
static |
Definition at line 481 of file util.cpp.
References TChA::AddCh(), TChA::Clr(), TChA::CStr(), and TChA::Len().
|
static |
Definition at line 439 of file util.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), TChA::CStr(), and TVec< TVal, TSizeTy >::Len().
|
static |
Definition at line 425 of file util.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), TChA::CStr(), TVec< TVal, TSizeTy >::DelLast(), TVec< TVal, TSizeTy >::Empty(), TVec< TVal, TSizeTy >::Last(), and TVec< TVal, TSizeTy >::Len().
Definition at line 460 of file util.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), TChA::CStr(), TCh::IsAlNum(), TCh::IsWs(), TChA::Len(), and TVec< TVal, TSizeTy >::Len().
|
static |
Definition at line 412 of file util.cpp.
References TVec< TVal, TSizeTy >::Add(), TVec< TVal, TSizeTy >::Clr(), TChA::CStr(), TVec< TVal, TSizeTy >::DelLast(), TVec< TVal, TSizeTy >::Empty(), TCh::IsAlNum(), TVec< TVal, TSizeTy >::Last(), and TVec< TVal, TSizeTy >::Len().
Referenced by CountWords().
Definition at line 331 of file util.cpp.
References TChA::GetSubStr(), and TChA::Len().
Referenced by GetNormalizedUrl().