SNAP Library 2.2, Developer Reference  2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
util.h
Go to the documentation of this file.
00001 //#//////////////////////////////////////////////
00003 class TGUtil {
00004 public:
00005   static void GetCdf(const TIntPrV& PdfV, TIntPrV& CdfV);
00006   static void GetCdf(const TFltPrV& PdfV, TFltPrV& CdfV);
00007   static void GetCdf(const TIntFltKdV& PdfV, TIntFltKdV& CdfV);
00008   static TIntPrV GetCdf(const TIntPrV& PdfV);
00009   static TFltPrV GetCdf(const TFltPrV& PdfV);
00010 
00011   static void GetCCdf(const TIntPrV& PdfV, TIntPrV& CCdfV);
00012   static void GetCCdf(const TFltPrV& PdfV, TFltPrV& CCdfV);
00013   static void GetCCdf(const TIntFltKdV& PdfV, TIntFltKdV& CCdfV);
00014   static TIntPrV GetCCdf(const TIntPrV& PdfV);
00015   static TFltPrV GetCCdf(const TFltPrV& PdfV);
00016 
00017   static void GetPdf(const TIntPrV& CdfV, TIntPrV& PdfV);
00018   static void GetPdf(const TFltPrV& CdfV, TFltPrV& PdfV);
00019   static void GetPdf(const TIntFltKdV& CdfV, TIntFltKdV& PdfV);
00020 
00021   static void Normalize(TFltPrV& PdfV);
00022   static void Normalize(TIntFltKdV& PdfV);
00023 
00024   static void MakeExpBins(const TFltPrV& XYValV, TFltPrV& ExpXYValV,
00025     const double& BinFactor = 2, const double& MinYVal = 1);
00026   static void MakeExpBins(const TFltKdV& XYValV, TFltKdV& ExpXYValV,
00027     const double& BinFactor = 2, const double& MinYVal = 1);
00028   static void MakeExpBins(const TFltV& YValV, TFltV& ExpYValV, const double& BinFactor = 1.01);
00029   static void MakeExpBins(const TIntV& YValV, TIntV& ExpYValV, const double& BinFactor = 1.01);
00030 };
00031 
00032 //#//////////////////////////////////////////////
00034 class TStrUtil {
00035 public:
00036   static TChA& GetXmlTagVal(TXmlLx& XmlLx, const TChA& TagNm);
00037   static void GetXmlTagNmVal(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal);
00038   static bool GetXmlTagNmVal2(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal, const bool& TakeTagNms);
00039   static TChA GetDomNm(const TChA& UrlChA);  // www.cs.cmu.edu
00040   static TChA GetDomNm2(const TChA& UrlChA); // also strip starting www.
00041   static TChA GetWebsiteNm(const TChA& UrlChA); // get website (GetDomNm2 or blog url)
00042   static bool GetNormalizedUrl(const TChA& UrlIn, const TChA& BaseUrl, TChA& UrlOut);
00043   static bool StripEnd(const TChA& Str, const TChA& SearchStr, TChA& NewStr);
00044   
00045   static TChA GetShorStr(const TChA& LongStr, const int MaxLen=50);
00046   static TChA GetCleanStr(const TChA& ChA);
00047   static TChA GetCleanWrdStr(const TChA& ChA);
00048   static int CountWords(const char* CStr);
00049   static int CountWords(const TChA& ChA);
00050   static int CountWords(const TChA& ChA, const TStrHash<TInt>& StopWordH);
00051   static int SplitWords(TChA& ChA, TVec<char *>& WrdV, const bool& SplitOnWs=true);
00052   static int SplitOnCh(TChA& ChA, TVec<char *>& WrdV, const char& Ch, const bool& SkipEmpty=false);
00053   static int SplitLines(TChA& ChA, TVec<char *>& LineV, const bool& SkipEmpty=false);
00054   static int SplitSentences(TChA& ChA, TVec<char *>& SentenceV);
00055   static void RemoveHtmlTags(const TChA& HtmlStr, TChA& TextStr);
00056   static bool IsLatinStr(const TChA& Str, const double& MinAlFrac);
00057   static void GetWIdV(const TStrHash<TInt>& StrH, const char *CStr, TIntV& WIdV);
00058   static void GetAddWIdV(TStrHash<TInt>& StrH, const char *CStr, TIntV& WIdV);
00060   static bool GetTmFromStr(const char* TmStr, TSecTm& Tm);
00061 
00063   static TStr GetStdName(TStr AuthorName);
00065   static void GetStdNameV(TStr AuthorNames, TStrV& StdNameV);
00066 };
00067 
00068 //#//////////////////////////////////////////////
00070 
00071 #if defined(SW_WRITEN)
00072 
00074 extern int WriteN(int fd, char *ptr, int nbytes);
00075 
00078 template <class TVal, class TSizeTy>
00079 int SendVec(const TVec<TVal, TSizeTy>& V, int FileDesc) {
00080   int l = 0;
00081   int n;
00082   int r;
00083   TSizeTy Vals = V.Len();
00084   int ChunkSize = 25600;
00085 
00086   r = WriteN(FileDesc, (char *) &Vals, (int) sizeof(TSizeTy));
00087   if (r < 0) {
00088     return r;
00089   }
00090   l += r;
00091 
00092   r = WriteN(FileDesc, (char *) &Vals, (int) sizeof(TSizeTy));
00093   if (r < 0) {
00094     return r;
00095   }
00096   l += r;
00097 
00098   for (TSizeTy ValN = 0; ValN < Vals; ValN += ChunkSize) {
00099     n = ChunkSize;
00100     if ((Vals - ValN) < ChunkSize) {
00101       n = Vals - ValN;
00102     }
00103     r = WriteN(FileDesc, (char *) &V[ValN], (int) (n*sizeof(TVal)));
00104     if (r < 0) {
00105       return r;
00106     }
00107     l += r;
00108   }
00109   return l;
00110 }
00111 #endif
00112