SNAP Library 2.4, User Reference  2015-05-11 19:40:56
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
util.h
Go to the documentation of this file.
1 //#//////////////////////////////////////////////
3 class TGUtil {
4 public:
5  static void GetCdf(const TIntPrV& PdfV, TIntPrV& CdfV);
6  static void GetCdf(const TFltPrV& PdfV, TFltPrV& CdfV);
7  static void GetCdf(const TIntFltKdV& PdfV, TIntFltKdV& CdfV);
8  static TIntPrV GetCdf(const TIntPrV& PdfV);
9  static TFltPrV GetCdf(const TFltPrV& PdfV);
10 
11  static void GetCCdf(const TIntPrV& PdfV, TIntPrV& CCdfV);
12  static void GetCCdf(const TFltPrV& PdfV, TFltPrV& CCdfV);
13  static void GetCCdf(const TIntFltKdV& PdfV, TIntFltKdV& CCdfV);
14  static TIntPrV GetCCdf(const TIntPrV& PdfV);
15  static TFltPrV GetCCdf(const TFltPrV& PdfV);
16 
17  static void GetPdf(const TIntPrV& CdfV, TIntPrV& PdfV);
18  static void GetPdf(const TFltPrV& CdfV, TFltPrV& PdfV);
19  static void GetPdf(const TIntFltKdV& CdfV, TIntFltKdV& PdfV);
20 
21  static void Normalize(TFltPrV& PdfV);
22  static void Normalize(TIntFltKdV& PdfV);
23 
24  static void MakeExpBins(const TFltPrV& XYValV, TFltPrV& ExpXYValV,
25  const double& BinFactor = 2, const double& MinYVal = 1);
26  static void MakeExpBins(const TFltKdV& XYValV, TFltKdV& ExpXYValV,
27  const double& BinFactor = 2, const double& MinYVal = 1);
28  static void MakeExpBins(const TFltV& YValV, TFltV& ExpYValV, const double& BinFactor = 1.01);
29  static void MakeExpBins(const TIntV& YValV, TIntV& ExpYValV, const double& BinFactor = 1.01);
30 };
31 
32 //#//////////////////////////////////////////////
34 class TStrUtil {
35 public:
36  static TChA& GetXmlTagVal(TXmlLx& XmlLx, const TChA& TagNm);
37  static void GetXmlTagNmVal(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal);
38  static bool GetXmlTagNmVal2(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal, const bool& TakeTagNms);
39  static TChA GetDomNm(const TChA& UrlChA); // www.cs.cmu.edu
40  static TChA GetDomNm2(const TChA& UrlChA); // also strip starting www.
41  static TChA GetWebsiteNm(const TChA& UrlChA); // get website (GetDomNm2 or blog url)
42  static bool GetNormalizedUrl(const TChA& UrlIn, const TChA& BaseUrl, TChA& UrlOut);
43  static bool StripEnd(const TChA& Str, const TChA& SearchStr, TChA& NewStr);
44 
45  static TChA GetShorStr(const TChA& LongStr, const int MaxLen=50);
46  static TChA GetCleanStr(const TChA& ChA);
47  static TChA GetCleanWrdStr(const TChA& ChA);
48  static int CountWords(const char* CStr);
49  static int CountWords(const TChA& ChA);
50  static int CountWords(const TChA& ChA, const TStrHash<TInt>& StopWordH);
51  static int SplitWords(TChA& ChA, TVec<char *>& WrdV, const bool& SplitOnWs=true);
52  static int SplitOnCh(TChA& ChA, TVec<char *>& WrdV, const char& Ch, const bool& SkipEmpty=false);
53  static int SplitLines(TChA& ChA, TVec<char *>& LineV, const bool& SkipEmpty=false);
54  static int SplitSentences(TChA& ChA, TVec<char *>& SentenceV);
55  static void RemoveHtmlTags(const TChA& HtmlStr, TChA& TextStr);
56  static bool IsLatinStr(const TChA& Str, const double& MinAlFrac);
57  static void GetWIdV(const TStrHash<TInt>& StrH, const char *CStr, TIntV& WIdV);
58  static void GetAddWIdV(TStrHash<TInt>& StrH, const char *CStr, TIntV& WIdV);
60  static bool GetTmFromStr(const char* TmStr, TSecTm& Tm);
61 
63  static TStr GetStdName(TStr AuthorName);
65  static void GetStdNameV(TStr AuthorNames, TStrV& StdNameV);
66 };
67 
68 //#//////////////////////////////////////////////
70 
71 #if defined(SW_WRITEN)
72 
74 extern int WriteN(int fd, char *ptr, int nbytes);
75 
77 
80 template <class TVal, class TSizeTy>
81 int64 SendVec(const TVec<TVal, TSizeTy>& V, int FileDesc) {
82  int64 l = 0;
83  int n;
84  int r;
85  TSizeTy Vals = V.Len();
86  int ChunkSize = 25600;
87 
88  r = WriteN(FileDesc, (char *) &Vals, (int) sizeof(TSizeTy));
89  if (r < 0) {
90  return r;
91  }
92  l += r;
93 
94  r = WriteN(FileDesc, (char *) &Vals, (int) sizeof(TSizeTy));
95  if (r < 0) {
96  return r;
97  }
98  l += r;
99 
100  for (TSizeTy ValN = 0; ValN < Vals; ValN += ChunkSize) {
101  n = ChunkSize;
102  if ((Vals - ValN) < ChunkSize) {
103  n = Vals - ValN;
104  }
105  r = WriteN(FileDesc, (char *) &V[ValN], (int) (n*sizeof(TVal)));
106  if (r < 0) {
107  return r;
108  }
109  l += r;
110  }
111  return l;
112 }
113 
114 
116 
119 template <class TVal, class TSizeTy>
120 int64 SendVec64(const TVec< TVec< TVal, TSizeTy > , TSizeTy >&Vec64, int FileDesc) {
121  TSizeTy N =Vec64.Len();
122  int64 l=0;
123  int r;
124 
125  r = WriteN(FileDesc, (char *) &N, (int) sizeof(TSizeTy));
126  if (r < 0) {
127  return r;
128  }
129  l += r;
130 
131  r = WriteN(FileDesc, (char *) &N, (int) sizeof(TSizeTy));
132  if (r < 0) {
133  return r;
134  }
135  l += r;
136 
137  for (typename TVec< TVec< TVal, TSizeTy >, TSizeTy >::TIter it=Vec64.BegI(); it!=Vec64.EndI(); ++it) {
138  r = SendVec(*it, FileDesc);
139  if (r < 0) {
140  return r;
141  }
142  l += r;
143  }
144 
145  return l;
146 }
147 #endif
String helper functions and utilities. Quick and ditry!
Definition: util.h:34
static TChA GetDomNm(const TChA &UrlChA)
Definition: util.cpp:187
static bool GetNormalizedUrl(const TChA &UrlIn, const TChA &BaseUrl, TChA &UrlOut)
Quick URL nomalization: Remove ending /, /index.html, etc. and strip starting www.
Definition: util.cpp:306
static void GetStdNameV(TStr AuthorNames, TStrV &StdNameV)
Splits a list of people's names.
Definition: util.cpp:664
static TChA GetWebsiteNm(const TChA &UrlChA)
Definition: util.cpp:218
static TChA GetDomNm2(const TChA &UrlChA)
Definition: util.cpp:201
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:535
static void GetWIdV(const TStrHash< TInt > &StrH, const char *CStr, TIntV &WIdV)
Definition: util.cpp:538
static int SplitSentences(TChA &ChA, TVec< char * > &SentenceV)
Definition: util.cpp:460
static void GetXmlTagNmVal(TXmlLx &XmlLx, TChA &TagNm, TChA &TagVal)
Definition: util.cpp:149
static void MakeExpBins(const TFltPrV &XYValV, TFltPrV &ExpXYValV, const double &BinFactor=2, const double &MinYVal=1)
Definition: util.cpp:99
static void RemoveHtmlTags(const TChA &HtmlStr, TChA &TextStr)
Definition: util.cpp:481
static bool GetXmlTagNmVal2(TXmlLx &XmlLx, TChA &TagNm, TChA &TagVal, const bool &TakeTagNms)
Definition: util.cpp:163
static void GetPdf(const TIntPrV &CdfV, TIntPrV &PdfV)
Definition: util.cpp:63
static TChA GetShorStr(const TChA &LongStr, const int MaxLen=50)
Definition: util.cpp:342
static int SplitLines(TChA &ChA, TVec< char * > &LineV, const bool &SkipEmpty=false)
Definition: util.cpp:439
Definition: xml.h:98
static int CountWords(const char *CStr)
Definition: util.cpp:393
static int SplitOnCh(TChA &ChA, TVec< char * > &WrdV, const char &Ch, const bool &SkipEmpty=false)
Definition: util.cpp:425
static int SplitWords(TChA &ChA, TVec< char * > &WrdV, const bool &SplitOnWs=true)
Definition: util.cpp:412
static TChA GetCleanWrdStr(const TChA &ChA)
Definition: util.cpp:350
static TChA & GetXmlTagVal(TXmlLx &XmlLx, const TChA &TagNm)
Definition: util.cpp:132
static void GetCdf(const TIntPrV &PdfV, TIntPrV &CdfV)
Definition: util.cpp:3
static void GetCCdf(const TIntPrV &PdfV, TIntPrV &CCdfV)
Definition: util.cpp:33
Definition: hash.h:716
Definition: dt.h:201
Definition: tm.h:81
static TStr GetStdName(TStr AuthorName)
Puts person's name (fist middle last) in a standard form: _ ...
Definition: util.cpp:621
long long int64
Definition: bd.h:27
Definition: dt.h:412
static bool IsLatinStr(const TChA &Str, const double &MinAlFrac)
Definition: util.cpp:527
TIter BegI() const
Returns an iterator pointing to the first element in the vector.
Definition: ds.h:550
static void Normalize(TFltPrV &PdfV)
Definition: util.cpp:81
static bool GetTmFromStr(const char *TmStr, TSecTm &Tm)
Parses time in many different text formats. See source code for details.
Definition: util.cpp:571
static TChA GetCleanStr(const TChA &ChA)
Definition: util.cpp:372
static bool StripEnd(const TChA &Str, const TChA &SearchStr, TChA &NewStr)
Definition: util.cpp:331
static void GetAddWIdV(TStrHash< TInt > &StrH, const char *CStr, TIntV &WIdV)
Definition: util.cpp:552
Graph Utilities.
Definition: util.h:3