SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
util.h
Go to the documentation of this file.
1 //#//////////////////////////////////////////////
3 class TGUtil {
4 public:
5  static void GetCdf(const TIntPrV& PdfV, TIntPrV& CdfV);
6  static void GetCdf(const TFltPrV& PdfV, TFltPrV& CdfV);
7  static void GetCdf(const TIntFltKdV& PdfV, TIntFltKdV& CdfV);
8  static TIntPrV GetCdf(const TIntPrV& PdfV);
9  static TFltPrV GetCdf(const TFltPrV& PdfV);
10 
11  static void GetCCdf(const TIntPrV& PdfV, TIntPrV& CCdfV);
12  static void GetCCdf(const TFltPrV& PdfV, TFltPrV& CCdfV);
13  static void GetCCdf(const TIntFltKdV& PdfV, TIntFltKdV& CCdfV);
14  static TIntPrV GetCCdf(const TIntPrV& PdfV);
15  static TFltPrV GetCCdf(const TFltPrV& PdfV);
16 
17  static void GetPdf(const TIntPrV& CdfV, TIntPrV& PdfV);
18  static void GetPdf(const TFltPrV& CdfV, TFltPrV& PdfV);
19  static void GetPdf(const TIntFltKdV& CdfV, TIntFltKdV& PdfV);
20 
21  static void Normalize(TFltPrV& PdfV);
22  static void Normalize(TIntFltKdV& PdfV);
23 
24  static void MakeExpBins(const TFltPrV& XYValV, TFltPrV& ExpXYValV,
25  const double& BinFactor = 2, const double& MinYVal = 1);
26  static void MakeExpBins(const TFltKdV& XYValV, TFltKdV& ExpXYValV,
27  const double& BinFactor = 2, const double& MinYVal = 1);
28  static void MakeExpBins(const TFltV& YValV, TFltV& ExpYValV, const double& BinFactor = 1.01);
29  static void MakeExpBins(const TIntV& YValV, TIntV& ExpYValV, const double& BinFactor = 1.01);
30 };
31 
32 //#//////////////////////////////////////////////
34 class TStrUtil {
35 public:
36  static TChA& GetXmlTagVal(TXmlLx& XmlLx, const TChA& TagNm);
37  static void GetXmlTagNmVal(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal);
38  static bool GetXmlTagNmVal2(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal, const bool& TakeTagNms);
39  static TChA GetDomNm(const TChA& UrlChA); // www.cs.cmu.edu
40  static TChA GetDomNm2(const TChA& UrlChA); // also strip starting www.
41  static TChA GetWebsiteNm(const TChA& UrlChA); // get website (GetDomNm2 or blog url)
42  static bool GetNormalizedUrl(const TChA& UrlIn, const TChA& BaseUrl, TChA& UrlOut);
43  static bool StripEnd(const TChA& Str, const TChA& SearchStr, TChA& NewStr);
44 
45  static TChA GetShorStr(const TChA& LongStr, const int MaxLen=50);
46  static TChA GetCleanStr(const TChA& ChA);
47  static TChA GetCleanWrdStr(const TChA& ChA);
48  static int CountWords(const char* CStr);
49  static int CountWords(const TChA& ChA);
50  static int CountWords(const TChA& ChA, const TStrHash<TInt>& StopWordH);
51  static int SplitWords(TChA& ChA, TVec<char *>& WrdV, const bool& SplitOnWs=true);
52  static int SplitOnCh(TChA& ChA, TVec<char *>& WrdV, const char& Ch, const bool& SkipEmpty=false);
53  static int SplitLines(TChA& ChA, TVec<char *>& LineV, const bool& SkipEmpty=false);
54  static int SplitSentences(TChA& ChA, TVec<char *>& SentenceV);
55  static void RemoveHtmlTags(const TChA& HtmlStr, TChA& TextStr);
56  static bool IsLatinStr(const TChA& Str, const double& MinAlFrac);
57  static void GetWIdV(const TStrHash<TInt>& StrH, const char *CStr, TIntV& WIdV);
58  static void GetAddWIdV(TStrHash<TInt>& StrH, const char *CStr, TIntV& WIdV);
60  static bool GetTmFromStr(const char* TmStr, TSecTm& Tm);
61 
63  static TStr GetStdName(TStr AuthorName);
65  static void GetStdNameV(TStr AuthorNames, TStrV& StdNameV);
66 };
67 
68 //#//////////////////////////////////////////////
70 
71 class TStopwatch {
72 public:
75  // graph construction
77  // subgraph construction
80  static const int NEXPS = 25;
81 
82  static TStopwatch* GetInstance() {
83  static TStopwatch instance; // Guaranteed to be destroyed. Instantiated on first use.
84  return &instance;
85  }
86 
87 private:
89  for (int i = 0; i < NEXPS; i++) { Mins[i] = 100000000000.0; }
90  }; // Constructor? (the {} brackets) are needed here.
91 
92  // Dont forget to declare these two. You want to make sure they
93  // are unacceptable otherwise you may accidentally get copies of
94  // your singleton appearing.
95  TStopwatch(TStopwatch const&); // Don't Implement
96  void operator=(TStopwatch const&); // Don't implement
97 
98  double Starts[NEXPS];
99  int Cnts[NEXPS];
100  double Sums[NEXPS];
101  double Maxs[NEXPS];
102  double Mins[NEXPS];
103 
104 private:
105  double Tick();
106 
107 public:
109  void Start(const TExperiment Exp);
111  void Stop(const TExperiment Exp);
113  int Cnt(const TExperiment Exp) const;
115  double Sum(const TExperiment Exp) const;
117  double Avg(const TExperiment Exp) const;
119  double Max(const TExperiment Exp) const;
121  double Min(const TExperiment Exp) const;
122 };
123 
124 //#//////////////////////////////////////////////
126 
127 #if defined(SW_WRITEN)
128 
130 extern int WriteN(int fd, char *ptr, int nbytes);
131 
133 
136 template <class TVal, class TSizeTy>
137 int64 SendVec(const TVec<TVal, TSizeTy>& V, int FileDesc) {
138  int64 l = 0;
139  int n;
140  int r;
141  TSizeTy Vals = V.Len();
142  int ChunkSize = 25600;
143 
144  r = WriteN(FileDesc, (char *) &Vals, (int) sizeof(TSizeTy));
145  if (r < 0) {
146  return r;
147  }
148  l += r;
149 
150  r = WriteN(FileDesc, (char *) &Vals, (int) sizeof(TSizeTy));
151  if (r < 0) {
152  return r;
153  }
154  l += r;
155 
156  for (TSizeTy ValN = 0; ValN < Vals; ValN += ChunkSize) {
157  n = ChunkSize;
158  if ((Vals - ValN) < ChunkSize) {
159  n = Vals - ValN;
160  }
161  r = WriteN(FileDesc, (char *) &V[ValN], (int) (n*sizeof(TVal)));
162  if (r < 0) {
163  return r;
164  }
165  l += r;
166  }
167  return l;
168 }
169 
170 
172 
175 template <class TVal, class TSizeTy>
176 int64 SendVec64(const TVec< TVec< TVal, TSizeTy > , TSizeTy >&Vec64, int FileDesc) {
177  TSizeTy N =Vec64.Len();
178  int64 l=0;
179  int r;
180 
181  r = WriteN(FileDesc, (char *) &N, (int) sizeof(TSizeTy));
182  if (r < 0) {
183  return r;
184  }
185  l += r;
186 
187  r = WriteN(FileDesc, (char *) &N, (int) sizeof(TSizeTy));
188  if (r < 0) {
189  return r;
190  }
191  l += r;
192 
193  for (typename TVec< TVec< TVal, TSizeTy >, TSizeTy >::TIter it=Vec64.BegI(); it!=Vec64.EndI(); ++it) {
194  r = SendVec(*it, FileDesc);
195  if (r < 0) {
196  return r;
197  }
198  l += r;
199  }
200 
201  return l;
202 }
203 #endif
String helper functions and utilities. Quick and ditry!
Definition: util.h:34
static const int NEXPS
The expected number of experiments (must be at least equal to the size of the above list) ...
Definition: util.h:80
static TChA GetDomNm(const TChA &UrlChA)
Definition: util.cpp:187
static bool GetNormalizedUrl(const TChA &UrlIn, const TChA &BaseUrl, TChA &UrlOut)
Quick URL nomalization: Remove ending /, /index.html, etc. and strip starting www.
Definition: util.cpp:306
static void GetStdNameV(TStr AuthorNames, TStrV &StdNameV)
Splits a list of people's names.
Definition: util.cpp:664
static TChA GetWebsiteNm(const TChA &UrlChA)
Definition: util.cpp:218
TStopwatch()
Definition: util.h:88
static TChA GetDomNm2(const TChA &UrlChA)
Definition: util.cpp:201
double Tick()
Routines to benchmark table operations.
Definition: util.cpp:701
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
double Min(const TExperiment Exp) const
Returns the minimum time of all experiments.
Definition: util.cpp:761
static void GetWIdV(const TStrHash< TInt > &StrH, const char *CStr, TIntV &WIdV)
Definition: util.cpp:538
static int SplitSentences(TChA &ChA, TVec< char * > &SentenceV)
Definition: util.cpp:460
static void GetXmlTagNmVal(TXmlLx &XmlLx, TChA &TagNm, TChA &TagVal)
Definition: util.cpp:149
void Start(const TExperiment Exp)
Start a new experiment.
Definition: util.cpp:733
static void MakeExpBins(const TFltPrV &XYValV, TFltPrV &ExpXYValV, const double &BinFactor=2, const double &MinYVal=1)
Definition: util.cpp:99
static void RemoveHtmlTags(const TChA &HtmlStr, TChA &TextStr)
Definition: util.cpp:481
int Cnt(const TExperiment Exp) const
Returns the number of experiments.
Definition: util.cpp:745
static bool GetXmlTagNmVal2(TXmlLx &XmlLx, TChA &TagNm, TChA &TagVal, const bool &TakeTagNms)
Definition: util.cpp:163
void Stop(const TExperiment Exp)
Stop the current experiment.
Definition: util.cpp:737
void operator=(TStopwatch const &)
static void GetPdf(const TIntPrV &CdfV, TIntPrV &PdfV)
Definition: util.cpp:63
static TChA GetShorStr(const TChA &LongStr, const int MaxLen=50)
Definition: util.cpp:342
static int SplitLines(TChA &ChA, TVec< char * > &LineV, const bool &SkipEmpty=false)
Definition: util.cpp:439
Definition: xml.h:98
static int CountWords(const char *CStr)
Definition: util.cpp:393
static int SplitOnCh(TChA &ChA, TVec< char * > &WrdV, const char &Ch, const bool &SkipEmpty=false)
Definition: util.cpp:425
static int SplitWords(TChA &ChA, TVec< char * > &WrdV, const bool &SplitOnWs=true)
Definition: util.cpp:412
static TChA GetCleanWrdStr(const TChA &ChA)
Definition: util.cpp:350
static TChA & GetXmlTagVal(TXmlLx &XmlLx, const TChA &TagNm)
Definition: util.cpp:132
static void GetCdf(const TIntPrV &PdfV, TIntPrV &CdfV)
Definition: util.cpp:3
double Starts[NEXPS]
Definition: util.h:98
double Max(const TExperiment Exp) const
Returns the maximum time of all experiments.
Definition: util.cpp:757
static void GetCCdf(const TIntPrV &PdfV, TIntPrV &CCdfV)
Definition: util.cpp:33
Definition: hash.h:781
Definition: dt.h:201
Definition: tm.h:81
static TStr GetStdName(TStr AuthorName)
Puts person's name (fist middle last) in a standard form: _ ...
Definition: util.cpp:621
double Avg(const TExperiment Exp) const
Returns the average time of all experiments.
Definition: util.cpp:753
long long int64
Definition: bd.h:27
Definition: dt.h:412
static bool IsLatinStr(const TChA &Str, const double &MinAlFrac)
Definition: util.cpp:527
int Cnts[NEXPS]
Definition: util.h:99
TIter BegI() const
Returns an iterator pointing to the first element in the vector.
Definition: ds.h:593
static void Normalize(TFltPrV &PdfV)
Definition: util.cpp:81
static bool GetTmFromStr(const char *TmStr, TSecTm &Tm)
Parses time in many different text formats. See source code for details.
Definition: util.cpp:571
static TStopwatch * GetInstance()
Definition: util.h:82
static TChA GetCleanStr(const TChA &ChA)
Definition: util.cpp:372
static bool StripEnd(const TChA &Str, const TChA &SearchStr, TChA &NewStr)
Definition: util.cpp:331
TExperiment
List of all experiments.
Definition: util.h:74
double Sums[NEXPS]
Definition: util.h:100
double Sum(const TExperiment Exp) const
Returns the total time of all experiments.
Definition: util.cpp:749
double Mins[NEXPS]
Definition: util.h:102
double Maxs[NEXPS]
Definition: util.h:101
static void GetAddWIdV(TStrHash< TInt > &StrH, const char *CStr, TIntV &WIdV)
Definition: util.cpp:552
Graph Utilities.
Definition: util.h:3
Routines to benchmark table operations.
Definition: util.h:71