SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
cascdynetinf.h
Go to the documentation of this file.
1 #ifndef snap_cascdynetinf_h
2 #define snap_cascdynetinf_h
3 
4 #include "Snap.h"
5 
6 // pairwise transmission models
7 typedef enum {
8  EXP, // exponential
9  POW, // powerlaw
10  RAY, // rayleigh
11  WEI // weibull
12 } TModel;
13 
14 // tx rates trends over time for synthetic experiments
15 typedef enum {
16  CONSTANT, // constant
17  LINEAR, // linear trend up/down
18  EXPONENTIAL, // exponential trend
19  RAYLEIGH, // rayleigh trend
20  SLAB, // slab
21  SQUARE, // square
22  CHAINSAW, // chainsaw
23  RANDOM // random noise around alpha value
24 } TVarying;
25 
26 // optimization methods
27 typedef enum {
28  OSG, // stochastic gradient
29  OWSG, // windowed stochastic gradient
30  OESG, // exponential decay stochastic gradient
31  OWESG, // windowed exponential decay stochastic gradient
32  ORSG, // rayleigh decay stochastic gradient
33  OBSG, // no decay batch stochastic gradient
34  OWBSG, // windowed batch stochastic gradient
35  OEBSG, // exponential decay batch stochastic gradient
36  ORBSG, // rayleigh decay batch stochastic gradient
38 } TOptMethod;
39 
40 typedef enum {
46 } TSampling;
47 
48 // l2 regularizer on/off
49 typedef enum {
50  NONE, // no regularizer
51  L2REG // L2 regularizer
52 } TRegularizer;
53 
54 typedef enum {
55  TIME_STEP, // run inference every time step
56  INFECTION_STEP, // run inference every # number of infections
57  CASCADE_STEP, // run inference every time a cascade "finishes"
59 } TRunningMode;
60 
63 
66 
67 // Hit info (node id, timestamp) about a node in a cascade
68 class THitInfo {
69 public:
73 public:
74  THitInfo(const int& NodeId=-1, const double& HitTime=0) : NId(NodeId), Tm(HitTime) { }
75  THitInfo(TSIn& SIn) : NId(SIn), Tm(SIn), Keywords(SIn) { }
76  void AddKeyword(const int& KId) { Keywords.AddUnique(KId); }
77  void DelKeywords() { Keywords.Clr(); }
78  void Save(TSOut& SOut) const { NId.Save(SOut); Tm.Save(SOut); Keywords.Save(SOut); }
79  bool operator < (const THitInfo& Hit) const {
80  return Tm < Hit.Tm; }
81 };
82 
83 // Cascade
84 class TCascade {
85 public:
86  TInt CId; // cascade id
87  THash<TInt, THitInfo> NIdHitH; // infected nodes
88  TInt Model; // pairwise transmission model
89 public:
90  TCascade() : CId(0), NIdHitH(), Model(0) { }
91  TCascade(const int &model) : NIdHitH() { Model = model; }
92  TCascade(const int &cid, const int& model) : NIdHitH() { CId = cid; Model = model; }
93  TCascade(TSIn& SIn) : CId(SIn), NIdHitH(SIn), Model(SIn) { }
94  void Save(TSOut& SOut) const { CId.Save(SOut); NIdHitH.Save(SOut); Model.Save(SOut); }
95  void Clr() { NIdHitH.Clr(); }
96  int GetId() { return CId; }
97  int Len() const { return NIdHitH.Len(); }
98  int LenBeforeT(const double& T) { int len = 0; while (len < NIdHitH.Len() && NIdHitH[len].Tm <= T) { len++; } return len; }
99  int LenAfterT(const double& T) { int len = 0; while (len < NIdHitH.Len() && NIdHitH[NIdHitH.Len()-1-len].Tm >= T) { len++; } return len; }
100  int GetNode(const int& i) const { return NIdHitH.GetKey(i); }
103  int GetModel() const { return Model; }
104  double GetTm(const int& NId) const { return NIdHitH.GetDat(NId).Tm; }
105  double GetMaxTm() const { return NIdHitH[NIdHitH.Len()-1].Tm; } // we assume the cascade is sorted
106  double GetMinTm() const { return NIdHitH[0].Tm; } // we assume the cascade is sorted
107  void Add(const int& NId, const double& HitTm) { NIdHitH.AddDat(NId, THitInfo(NId, HitTm)); }
108  void Del(const int& NId) { NIdHitH.DelKey(NId); }
109  bool IsNode(const int& NId) const { return NIdHitH.IsKey(NId); }
110  void Sort() { NIdHitH.SortByDat(true); }
111  bool operator < (const TCascade& Cascade) const {
112  return Len() < Cascade.Len(); }
113 };
114 
115 // Node info (name and number of cascades)
116 class TNodeInfo {
117 public:
120 public:
121  TNodeInfo() { }
122  TNodeInfo(const TStr& NodeNm, const int& Volume) : Name(NodeNm), Vol(Volume) { }
123  TNodeInfo(TSIn& SIn) : Name(SIn), Vol(SIn) { }
124  void Save(TSOut& SOut) const { Name.Save(SOut); Vol.Save(SOut); }
125  bool operator < (const TNodeInfo& NodeInfo) const {
126  return Vol < NodeInfo.Vol; }
127 };
128 
129 // Stochastic gradient network inference class
130 class TNIBs {
131 public:
132  THash<TInt, TCascade> CascH; // cascades, indexed by id
133  THash<TInt, TNodeInfo> NodeNmH; // node info (name, volume), indexed by node id
134  TStrIntH DomainsIdH; // domain, DomainId hash table
135  TStrIntH CascadeIdH; // quote, CascadeId hash table, QuoteId is equivalent to cascadeId
136 
137  // cascades per edge
139 
140  // network
142 
143  // pairwise transmission model
145 
146  // time horizon per cascade (if it is fixed), and totaltime
148 
149  // delta for power-law and k for weibull
151 
152  // step (gamma), regularizer (mu), tolerance, and min/max alpha for stochastic gradient descend
156 
157  // inferred network
160 
161  // gradients (per alpha & cascade)
164 
165  // sampled cascades
167 
168  // performance measures
171 
172 public:
173  TNIBs( ) { }
174  TNIBs(TSIn& SIn) : CascH(SIn), NodeNmH(SIn), CascPerEdge(SIn), InferredNetwork(SIn) { Model = EXP; }
175  void Save(TSOut& SOut) const { CascH.Save(SOut); NodeNmH.Save(SOut); CascPerEdge.Save(SOut); InferredNetwork.Save(SOut); }
176 
177  // functions to load text cascades & network files
178  void LoadCascadesTxt(TSIn& SIn);
179  void LoadGroundTruthTxt(TSIn& SIn);
180  void LoadGroundTruthNodesTxt(TSIn& SIn);
181  void LoadInferredTxt(TSIn& SIn);
182  void LoadInferredNodesTxt(TSIn& SIn);
183 
184  // maximum time for synthetic generation, tx model & window per cascade (if any)
185  void SetTotalTime(const float& tt) { TotalTime = tt; }
186  void SetModel(const TModel& model) { Model = model; }
187  void SetWindow(const double& window) { Window = window; }
188 
189  // delta for power law & k for weibull
190  void SetDelta(const double& delta) { Delta = delta; }
191  void SetK(const double& k) { K = k; }
192 
193  // optimization parameters
194  void SetGamma(const double& gamma) { Gamma = gamma; }
195  void SetAging(const double& aging) { Aging = aging; }
196  void SetRegularizer(const TRegularizer& reg) { Regularizer = reg; }
197  void SetMu(const double& mu) { Mu = mu; }
198  void SetTolerance(const double& tol) { Tol = tol; }
199  void SetMaxAlpha(const double& ma) { MaxAlpha = ma; }
200  void SetMinAlpha(const double& ma) { MinAlpha = ma; }
201  void SetInitAlpha(const double& ia) { InitAlpha = ia; }
202 
203  // processing cascades
204  void AddCasc(const TStr& CascStr, const TModel& Model=EXP);
205  void AddCasc(const TCascade& Cascade) { CascH.AddDat(Cascade.CId) = Cascade; }
206  void AddCasc(const TIntFltH& Cascade, const int& CId=-1, const TModel& Model=EXP);
207  void GenCascade(TCascade& C);
208  bool IsCascade(int c) { return CascH.IsKey(c); }
209  TCascade & GetCasc(int c) { return CascH.GetDat(c); }
210  int GetCascs() { return CascH.Len(); }
211  int GetCascadeId(const TStr& Cascade) { return CascadeIdH.GetDat(Cascade); }
212 
213  // node info
214  int GetNodes() { return InferredNetwork.GetNodes(); }
215  void AddNodeNm(const int& NId, const TNodeInfo& Info) { NodeNmH.AddDat(NId, Info); }
216  TStr GetNodeNm(const int& NId) const { return NodeNmH.GetDat(NId).Name; }
217  TNodeInfo GetNodeInfo(const int& NId) const { return NodeNmH.GetDat(NId); }
218  bool IsNodeNm(const int& NId) const { return NodeNmH.IsKey(NId); }
219  void SortNodeNmByVol(const bool& asc=false) { NodeNmH.SortByDat(asc); }
220 
221  // domains
222  void AddDomainNm(const TStr& Domain, const int& DomainId=-1) { DomainsIdH.AddDat(Domain) = TInt(DomainId==-1? DomainsIdH.Len() : DomainId); }
223  bool IsDomainNm(const TStr& Domain) const { return DomainsIdH.IsKey(Domain); }
224  int GetDomainId(const TStr& Domain) { return DomainsIdH.GetDat(Domain); }
225 
226  // get network or graph at a given time
227  void GetGroundTruthGraphAtT(const double& Step, PNGraph &GraphAtT);
228  void GetGroundTruthNetworkAtT(const double& Step, PStrFltNEDNet& NetworkAtT);
229  void GetInferredGraphAtT(const double& Step, PNGraph &GraphAtT);
230  void GetInferredNetworkAtT(const double& Step, PStrFltNEDNet& NetworkAtT);
231 
232  // reset/init for optimization
233  void Reset();
234  void Init(const TFltV& Steps);
235 
236  // optimization methods
237  void SG(const int& NId, const int& Iters, const TFltV& Steps, const TSampling& Sampling, const TStr& ParamSampling=TStr(""), const bool& PlotPerformance=false);
238  void BSG(const int& NId, const int& Iters, const TFltV& Steps, const int& BatchLen, const TSampling& Sampling, const TStr& ParamSampling=TStr(""), const bool& PlotPerformance=false);
239  void FG(const int& NId, const int& Iters, const TFltV& Steps);
240 
241  // auxiliary function for optimization
242  void UpdateDiff(const TOptMethod& OptMethod, const int& NId, TCascade& Cascade, TIntPrV& AlphasToUpdate, const double& CurrentTime=TFlt::Mx);
243 
244  // functions to compute burstiness
245  void find_C( int t, TFltV &x, TFltVV &C, const int& k, const double& s, const double& gamma, const double& T );
246  void find_min_state( TFltVV &C, TIntV &states, const int& k, const double& s, const double& gamma, const double& T );
247  void LabelBurstAutomaton(const int& SrcId, const int& DstId, TIntV &state_labels, TFltV &state_times, const bool& inferred=false, const int& k = 5, const double& s = 2.0, const double& gamma = 1.0, const TSecTm& MinTime=TSecTm(), const TSecTm& MaxTime=TSecTm() );
248 
249  // function to compute performance for a particular time step and node given groundtruth + inferred network
250  void ComputePerformanceNId(const int& NId, const int& Step, const TFltV& Steps);
251 
252  // storing ground truth and inferred network in pajek and text format
253  void SaveInferredPajek(const TStr& OutFNm, const double& Step, const TIntV& NIdV=TIntV());
254  void SaveInferred(const TStr& OutFNm, const TIntV& NIdV=TIntV());
255  void SaveInferred(const TStr& OutFNm, const double& Step, const TIntV& NIdV=TIntV());
256  void SaveInferredEdges(const TStr& OutFNm);
257 
258  // store network
259  void SaveGroundTruthPajek(const TStr& OutFNm, const double& Step);
260  void SaveGroundTruth(const TStr& OutFNm);
261 
262  // storing NodeId, site name
263  void SaveSites(const TStr& OutFNm, const TIntFltVH& CascadesPerNode=TIntFltVH());
264 
265  // storing cascades in text format
266  void SaveCascades(const TStr& OutFNm);
267 };
268 
269 #endif
TSizeTy AddUnique(const TVal &Val)
Adds element Val to a vector only if the element Val is not already in the vector.
Definition: ds.h:1162
TIntV Keywords
Definition: cascdynetinf.h:72
int GetCascs()
Definition: cascdynetinf.h:210
TNIBs(TSIn &SIn)
Definition: cascdynetinf.h:174
void BSG(const int &NId, const int &Iters, const TFltV &Steps, const int &BatchLen, const TSampling &Sampling, const TStr &ParamSampling=TStr(""), const bool &PlotPerformance=false)
void SaveGroundTruth(const TStr &OutFNm)
void Save(TSOut &SOut) const
Definition: cascdynetinf.h:94
void SaveInferred(const TStr &OutFNm, const TIntV &NIdV=TIntV())
void SortNodeNmByVol(const bool &asc=false)
Definition: cascdynetinf.h:219
void AddCasc(const TCascade &Cascade)
Definition: cascdynetinf.h:205
TNodeInfo GetNodeInfo(const int &NId) const
Definition: cascdynetinf.h:217
THitInfo(const int &NodeId=-1, const double &HitTime=0)
Definition: cascdynetinf.h:74
void SetK(const double &k)
Definition: cascdynetinf.h:191
bool operator<(const TCascade &Cascade) const
Definition: cascdynetinf.h:111
TIntIntPrH SampledCascadesH
Definition: cascdynetinf.h:166
virtual void Save(TSOut &SOut) const
Saves the network to a (binary) stream SOut.
Definition: network.h:659
void SaveGroundTruthPajek(const TStr &OutFNm, const double &Step)
void SetInitAlpha(const double &ia)
Definition: cascdynetinf.h:201
int GetId()
Definition: cascdynetinf.h:96
TInt NId
Definition: cascdynetinf.h:70
TFlt Mu
Definition: cascdynetinf.h:153
TCascade(TSIn &SIn)
Definition: cascdynetinf.h:93
TCascade(const int &model)
Definition: cascdynetinf.h:91
void DelKeywords()
Definition: cascdynetinf.h:77
TFlt TotalTime
Definition: cascdynetinf.h:147
void Save(TSOut &SOut) const
Definition: dt.h:1150
void Init(const TFltV &Steps)
THash< TInt, TCascade > CascH
Definition: cascdynetinf.h:132
THash< TInt, THitInfo >::TIter EndI() const
Definition: cascdynetinf.h:102
int Len() const
Definition: cascdynetinf.h:97
TStrFltFltHNEDNet Network
Definition: cascdynetinf.h:141
void SetMu(const double &mu)
Definition: cascdynetinf.h:197
TStrFltFltHNEDNet InferredNetwork
Definition: cascdynetinf.h:158
TCascade & GetCasc(int c)
Definition: cascdynetinf.h:209
TFlt MaxAlpha
Definition: cascdynetinf.h:155
void AddDomainNm(const TStr &Domain, const int &DomainId=-1)
Definition: cascdynetinf.h:222
void LoadInferredNodesTxt(TSIn &SIn)
TIter BegI() const
Definition: hash.h:213
void LoadCascadesTxt(TSIn &SIn)
Definition: cascdynetinf.cpp:4
int GetNodes()
Definition: cascdynetinf.h:214
TRunningMode
Definition: cascdynetinf.h:54
THash< TInt, TNodeInfo > NodeNmH
Definition: cascdynetinf.h:133
bool IsDomainNm(const TStr &Domain) const
Definition: cascdynetinf.h:223
void Save(TSOut &SOut) const
Definition: hash.h:183
TModel
Definition: cascdynetinf.h:7
TNodeInfo(const TStr &NodeNm, const int &Volume)
Definition: cascdynetinf.h:122
void AddCasc(const TStr &CascStr, const TModel &Model=EXP)
TFlt Window
Definition: cascdynetinf.h:147
TNodeInfo(TSIn &SIn)
Definition: cascdynetinf.h:123
void LoadGroundTruthTxt(TSIn &SIn)
TPt< TStrFltFltHNEDNet > PStrFltFltHNEDNet
Definition: cascdynetinf.h:62
void AddKeyword(const int &KId)
Definition: cascdynetinf.h:76
THash< TInt, THitInfo > NIdHitH
Definition: cascdynetinf.h:87
void SetRegularizer(const TRegularizer &reg)
Definition: cascdynetinf.h:196
void SetMinAlpha(const double &ma)
Definition: cascdynetinf.h:200
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
TIter EndI() const
Definition: hash.h:218
int LenAfterT(const double &T)
Definition: cascdynetinf.h:99
void SetDelta(const double &delta)
Definition: cascdynetinf.h:190
void GetInferredGraphAtT(const double &Step, PNGraph &GraphAtT)
TFlt K
Definition: cascdynetinf.h:150
bool operator<(const THitInfo &Hit) const
Definition: cascdynetinf.h:79
static const double Mx
Definition: dt.h:1388
TIntFltH AveDiffAlphas
Definition: cascdynetinf.h:162
void SaveInferredPajek(const TStr &OutFNm, const double &Step, const TIntV &NIdV=TIntV())
double GetMinTm() const
Definition: cascdynetinf.h:106
Definition: dt.h:1383
TFlt Delta
Definition: cascdynetinf.h:150
TNodeEDatNet< TStr, TFltFltH > TStrFltFltHNEDNet
Definition: cascdynetinf.h:61
Definition: fl.h:58
void Save(TSOut &SOut) const
Definition: ds.h:954
TCascade(const int &cid, const int &model)
Definition: cascdynetinf.h:92
TInt Model
Definition: cascdynetinf.h:88
TFltPrV PrecisionRecall
Definition: cascdynetinf.h:169
double GetTm(const int &NId) const
Definition: cascdynetinf.h:104
TFlt MinAlpha
Definition: cascdynetinf.h:155
THitInfo(TSIn &SIn)
Definition: cascdynetinf.h:75
void DelKey(const TKey &Key)
Definition: hash.h:404
TFlt Gamma
Definition: cascdynetinf.h:153
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
THash< TInt, TFltV > TIntFltVH
Definition: hash.h:618
TOptMethod
Definition: cascdynetinf.h:27
void Save(TSOut &SOut, const bool &IsSmall=false) const
Definition: dt.h:440
TFltPrV MSE
Definition: cascdynetinf.h:170
void SG(const int &NId, const int &Iters, const TFltV &Steps, const TSampling &Sampling, const TStr &ParamSampling=TStr(""), const bool &PlotPerformance=false)
TInt CId
Definition: cascdynetinf.h:86
void Del(const int &NId)
Definition: cascdynetinf.h:108
void LoadInferredTxt(TSIn &SIn)
void SetWindow(const double &window)
Definition: cascdynetinf.h:187
void SetAging(const double &aging)
Definition: cascdynetinf.h:195
void GenCascade(TCascade &C)
void Save(TSOut &SOut) const
Definition: cascdynetinf.h:175
void Save(TSOut &SOut) const
Definition: cascdynetinf.h:78
TPt< TStrFltNEDNet > PStrFltNEDNet
Definition: cascdynetinf.h:65
bool operator<(const TNodeInfo &NodeInfo) const
Definition: cascdynetinf.h:125
void ComputePerformanceNId(const int &NId, const int &Step, const TFltV &Steps)
void FG(const int &NId, const int &Iters, const TFltV &Steps)
TStrIntH DomainsIdH
Definition: cascdynetinf.h:134
TFlt InitAlpha
Definition: cascdynetinf.h:155
void SaveSites(const TStr &OutFNm, const TIntFltVH &CascadesPerNode=TIntFltVH())
Definition: fl.h:128
TFltPrV Accuracy
Definition: cascdynetinf.h:170
void SaveInferredEdges(const TStr &OutFNm)
void GetInferredNetworkAtT(const double &Step, PStrFltNEDNet &NetworkAtT)
bool IsNodeNm(const int &NId) const
Definition: cascdynetinf.h:218
void Clr()
Definition: cascdynetinf.h:95
void GetGroundTruthNetworkAtT(const double &Step, PStrFltNEDNet &NetworkAtT)
void Reset()
int GetModel() const
Definition: cascdynetinf.h:103
Definition: dt.h:1134
int GetCascadeId(const TStr &Cascade)
Definition: cascdynetinf.h:211
void SetTolerance(const double &tol)
Definition: cascdynetinf.h:198
TStrIntH CascadeIdH
Definition: cascdynetinf.h:135
TNodeEDatNet< TStr, TFlt > TStrFltNEDNet
Definition: cascdynetinf.h:64
Definition: tm.h:81
TFlt Aging
Definition: cascdynetinf.h:153
void UpdateDiff(const TOptMethod &OptMethod, const int &NId, TCascade &Cascade, TIntPrV &AlphasToUpdate, const double &CurrentTime=TFlt::Mx)
void find_C(int t, TFltV &x, TFltVV &C, const int &k, const double &s, const double &gamma, const double &T)
THash< TInt, THitInfo >::TIter BegI() const
Definition: cascdynetinf.h:101
double GetMaxTm() const
Definition: cascdynetinf.h:105
Definition: dt.h:412
void LoadGroundTruthNodesTxt(TSIn &SIn)
TFltPrV MAE
Definition: cascdynetinf.h:170
void Add(const int &NId, const double &HitTm)
Definition: cascdynetinf.h:107
bool IsCascade(int c)
Definition: cascdynetinf.h:208
TRegularizer Regularizer
Definition: cascdynetinf.h:154
void LabelBurstAutomaton(const int &SrcId, const int &DstId, TIntV &state_labels, TFltV &state_times, const bool &inferred=false, const int &k=5, const double &s=2.0, const double &gamma=1.0, const TSecTm &MinTime=TSecTm(), const TSecTm &MaxTime=TSecTm())
void SetMaxAlpha(const double &ma)
Definition: cascdynetinf.h:199
void SetModel(const TModel &model)
Definition: cascdynetinf.h:186
TVec< TInt > TIntV
Definition: ds.h:1594
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361
Definition: bd.h:196
int LenBeforeT(const double &T)
Definition: cascdynetinf.h:98
void SetTotalTime(const float &tt)
Definition: cascdynetinf.h:185
void SaveCascades(const TStr &OutFNm)
TSampling
Definition: cascdynetinf.h:40
int GetNode(const int &i) const
Definition: cascdynetinf.h:100
void Save(TSOut &SOut) const
Definition: cascdynetinf.h:124
THash< TIntPr, TIntV > CascPerEdge
Definition: cascdynetinf.h:138
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TStr GetNodeNm(const int &NId) const
Definition: cascdynetinf.h:216
int Len() const
Definition: hash.h:228
int GetDomainId(const TStr &Domain)
Definition: cascdynetinf.h:224
TVarying
Definition: cascdynetinf.h:15
void Sort()
Definition: cascdynetinf.h:110
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void SetGamma(const double &gamma)
Definition: cascdynetinf.h:194
void AddNodeNm(const int &NId, const TNodeInfo &Info)
Definition: cascdynetinf.h:215
THash< TInt, TIntFltH > DiffAlphas
Definition: cascdynetinf.h:163
void Save(TSOut &SOut) const
Definition: dt.h:1399
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
TRegularizer
Definition: cascdynetinf.h:49
void GetGroundTruthGraphAtT(const double &Step, PNGraph &GraphAtT)
bool IsNode(const int &NId) const
Definition: cascdynetinf.h:109
void find_min_state(TFltVV &C, TIntV &states, const int &k, const double &s, const double &gamma, const double &T)
TModel Model
Definition: cascdynetinf.h:144
TFlt Tol
Definition: cascdynetinf.h:155
int GetNodes() const
Returns the number of nodes in the network.
Definition: network.h:680
TIntFltH TotalCascadesAlpha
Definition: cascdynetinf.h:159
void SortByDat(const bool &Asc=true)
Definition: hash.h:292