SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
agmfit.h
Go to the documentation of this file.
1 #ifndef snap_agmfit_h
2 #define snap_agmfit_h
3 
4 #include "Snap.h"
5 
8 class TAGMFit {
9 private:
24 
25 public:
26  TAGMFit() { }
27  ~TAGMFit() { }
29  TAGMFit(const PUNGraph& GraphPt, const TVec<TIntV>& CmtyVVPt, const int RndSeed = 0): G(GraphPt), PNoCom(0.0), Rnd(RndSeed), MinLambda(0.00001), MaxLambda(10.0), RegCoef(0), BaseCID(-1) { SetCmtyVV(CmtyVVPt); }
31  TAGMFit(const PUNGraph& GraphPt, const int InitComs, const int RndSeed = 0): G(GraphPt), PNoCom(0.0), Rnd(RndSeed), MinLambda(0.00001), MaxLambda(10.0), RegCoef(0), BaseCID(-1) { NeighborComInit(InitComs); }//RandomInitCmtyVV(InitComs); }
33  TAGMFit(const PUNGraph& GraphPt, const TVec<TIntV>& CmtyVVPt, const TRnd& RndPt): G(GraphPt), PNoCom(0.0), Rnd(RndPt), MinLambda(0.00001), MaxLambda(10.0), RegCoef(0), BaseCID(-1) { SetCmtyVV(CmtyVVPt); }
34  void Save(TSOut& SOut);
35  void Load(TSIn& SIn, const int& RndSeed = 0);
36 
38  void RandomInitCmtyVV(const int InitComs, const double ComSzAlpha = 1.3, const double MemAlpha = 1.8, const int MinComSz = 8, const int MaxComSz = 200, const int MinMem = 1, const int MaxMem = 10);
40  void AddBaseCmty();
42  double Likelihood();
43  double Likelihood(const TFltV& NewLambdaV) { double Tmp1, Tmp2; return Likelihood(NewLambdaV, Tmp1, Tmp2); }
44  double Likelihood(const TFltV& NewLambdaV, double& LEdges, double& LNoEdges);
45  void SetRegCoef(const double Val) { RegCoef = Val; }
47  void GetEdgeJointCom();
49  void NeighborComInit(const int InitComs);
50  // Gradient of likelihood for \c P_c.
51  void GradLogLForLambda(TFltV& GradV);
53  int MLEGradAscentGivenCAG(const double& Thres=0.001, const int& MaxIter=10000, const TStr PlotNm = TStr());
55  void SetDefaultPNoCom();
57  void SetPNoCom(const double& Epsilon) { if (BaseCID == -1 && Epsilon > 0.0) { PNoCom = Epsilon; } }
58  double GetPNoCom() { return PNoCom; }
60  double CalcPNoComByCmtyVV(const int& SamplePairs = -1);
61  // OP RS 2014/04/10 commented out since there is no implementation
62  //void GetNewtonStep(TFltVV& HVV, TFltV& GradV, TFltV& DeltaLV);
64  double SelectLambdaSum(const TIntSet& ComK);
66  double SelectLambdaSum(const TFltV& NewLambdaV, const TIntSet& ComK);
67 
69  void RandomInit(const int& MaxK);
71  void RunMCMC(const int& MaxIter, const int& EvalLambdaIter, const TStr& PlotFPrx = TStr());
73  void SampleTransition(int& NID, int& JoinCID, int& LeaveCID, double& DeltaL);
75  void InitNodeData();
77  void LeaveCom(const int& NID, const int& CID);
78  // After MCMC, \c NID joins community \c CID.
79  void JoinCom(const int& NID, const int& JoinCID);
81  int RemoveEmptyCom();
83  double SeekLeave(const int& UID, const int& CID);
85  double SeekJoin(const int& UID, const int& CID);
86  // Compute the change in likelihood (Delta) if node \c UID switches from \c CurCID to \c NewCID.
87  double SeekSwitch(const int& UID, const int& CurCID, const int& NewCID);
88 
90  double GetStepSizeByLineSearchForLambda(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta);
92  void SetLambdaV(const TFltV& LambdaPt) {LambdaV = LambdaPt;}
94  void GetLambdaV(TFltV& OutV) {OutV = LambdaV;}
96  void GetQV(TFltV& OutV);
98  void GetCmtyVV(TVec<TIntV>& CmtyVV, const double QMax = 2.0);
100  void GetCmtyVV(TVec<TIntV>& CmtyVV, TFltV& QV, const double QMax = 2.0);
102  void SetCmtyVV(const TVec<TIntV>& CmtyVV);
104  void PrintSummary();
105 };
106 
107 #endif
void Load(TSIn &SIn, const int &RndSeed=0)
Definition: agmfit.cpp:24
void SetLambdaV(const TFltV &LambdaPt)
COMMENT.
Definition: agmfit.h:92
TRnd Rnd
Definition: agmfit.h:17
double SeekSwitch(const int &UID, const int &CurCID, const int &NewCID)
Definition: agmfit.cpp:525
Definition: dt.h:11
~TAGMFit()
Definition: agmfit.h:27
void SetDefaultPNoCom()
Set Epsilon (the probability that two nodes sharing no communities connect) to the default value...
Definition: agmfit.cpp:72
void SetPNoCom(const double &Epsilon)
Set Epsilon (the probability that two nodes sharing no communities connect) to the default value...
Definition: agmfit.h:57
THash< TIntPr, TInt > NIDCIDPrS
pairs (for sampling MCMC moves).
Definition: agmfit.h:19
double GetStepSizeByLineSearchForLambda(const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta)
Step size search for updating P_c (which is parametarized by regularization parameter lambda)...
Definition: agmfit.cpp:109
double Likelihood()
COMMENT.
Definition: agmfit.cpp:104
TFltV LambdaV
Parametrization of P_c (edge probability in community c), P_c = 1 - exp(-lambda). ...
Definition: agmfit.h:16
void PrintSummary()
COMMENT.
Definition: agmfit.cpp:659
TAGMFit(const PUNGraph &GraphPt, const int InitComs, const int RndSeed=0)
COMMENT. Use to describribe parameters.
Definition: agmfit.h:31
TInt BaseCID
ID of the Epsilon-community (in case we fit P_c of the epsilon community). We do not fit for the Epsi...
Definition: agmfit.h:23
void GetLambdaV(TFltV &OutV)
COMMENT.
Definition: agmfit.h:94
THash< TIntPr, TIntSet > EdgeComVH
Edge -> Shared Community ID Set.
Definition: agmfit.h:12
Fitting the Affilialiton Graph Model (AGM).
Definition: agmfit.h:8
void GetEdgeJointCom()
For each (u, v) in edges, precompute C_uv (the set of communities nodes u and v share).
Definition: agmfit.cpp:50
TAGMFit()
Definition: agmfit.h:26
Definition: dt.h:1383
Definition: fl.h:58
void GradLogLForLambda(TFltV &GradV)
Definition: agmfit.cpp:595
TAGMFit(const PUNGraph &GraphPt, const TVec< TIntV > &CmtyVVPt, const TRnd &RndPt)
COMMENT. Use to describribe parameters.
Definition: agmfit.h:33
double CalcPNoComByCmtyVV(const int &SamplePairs=-1)
Compute the empirical edge probability between a pair of nodes who share no community (epsilon)...
Definition: agmfit.cpp:632
void Save(TSOut &SOut)
Definition: agmfit.cpp:8
void LeaveCom(const int &NID, const int &CID)
After MCMC, NID leaves community CID.
Definition: agmfit.cpp:293
TFlt MinLambda
Minimum value of regularization parameter lambda (default = 1e-5).
Definition: agmfit.h:20
void GetCmtyVV(TVec< TIntV > &CmtyVV, const double QMax=2.0)
Get communities whose p_c is higher than 1 - QMax.
Definition: agmfit.cpp:554
int RemoveEmptyCom()
Remove all communities with no members.
Definition: agmfit.cpp:459
int MLEGradAscentGivenCAG(const double &Thres=0.001, const int &MaxIter=10000, const TStr PlotNm=TStr())
Gradient descent for p_c while keeping the community affiliation graph (CAG) fixed.
Definition: agmfit.cpp:130
THash< TInt, TIntSet > NIDComVH
Node ID -> Communitie IDs the node belongs to.
Definition: agmfit.h:13
double SelectLambdaSum(const TIntSet &ComK)
Compute sum of lambda_c (which is log (1 - p_c)) over C_uv (ComK). The function is used to compute ed...
Definition: agmfit.cpp:618
TFlt RegCoef
Regularization parameter when we fit for P_c (for finding # communities).
Definition: agmfit.h:22
void SetRegCoef(const double Val)
Definition: agmfit.h:45
void NeighborComInit(const int InitComs)
Initialize node community memberships using best neighborhood communities (see D. Gleich et al...
Definition: agmfit.cpp:186
void GetQV(TFltV &OutV)
Returns QV, a vector of (1 - p_c) for each community c.
Definition: agmfit.cpp:451
TAGMFit(const PUNGraph &GraphPt, const TVec< TIntV > &CmtyVVPt, const int RndSeed=0)
COMMENT. Use to describribe parameters.
Definition: agmfit.h:29
Definition: fl.h:128
Definition: dt.h:1134
double SeekJoin(const int &UID, const int &CID)
Compute the change in likelihood (Delta) if node UID joins community CID.
Definition: agmfit.cpp:502
void RunMCMC(const int &MaxIter, const int &EvalLambdaIter, const TStr &PlotFPrx=TStr())
Main procedure for fitting the AGM to a given graph using MCMC.
Definition: agmfit.cpp:366
Definition: dt.h:412
void InitNodeData()
COMMENT.
Definition: agmfit.cpp:264
double SeekLeave(const int &UID, const int &CID)
Compute the change in likelihood (Delta) if node UID leaves community CID.
Definition: agmfit.cpp:475
void JoinCom(const int &NID, const int &JoinCID)
Definition: agmfit.cpp:309
Definition: hash.h:97
void RandomInitCmtyVV(const int InitComs, const double ComSzAlpha=1.3, const double MemAlpha=1.8, const int MinComSz=8, const int MaxComSz=200, const int MinMem=1, const int MaxMem=10)
Randomly initialize bipartite community affiliation graph.
Definition: agmfit.cpp:42
TFlt MaxLambda
Maximum value of regularization parameter lambda (default = 10).
Definition: agmfit.h:21
TVec< TIntSet > CIDNSetV
Community ID -> Member Node ID Sets.
Definition: agmfit.h:11
void AddBaseCmty()
Add Epsilon community (base community which includes all nodes) into community affiliation graph...
Definition: agmfit.cpp:251
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
COMMENT.
Definition: agmfit.cpp:584
double Likelihood(const TFltV &NewLambdaV)
Definition: agmfit.h:43
PUNGraph G
Graph to fit.
Definition: agmfit.h:10
TIntV ComEdgesV
The number of edges in each community.
Definition: agmfit.h:14
void SampleTransition(int &NID, int &JoinCID, int &LeaveCID, double &DeltaL)
Sample MMCM transitions: Choose among (join, leave, switch), and then sample (NID, CID).
Definition: agmfit.cpp:325
TFlt PNoCom
Probability of edge when two nodes share no community (epsilon in the paper).
Definition: agmfit.h:15
void RandomInit(const int &MaxK)
COMMENT.
Definition: agmfit.cpp:169
THash< TIntPr, TFlt > NIDCIDPrH
pairs (for sampling MCMC moves).
Definition: agmfit.h:18
double GetPNoCom()
Definition: agmfit.h:58
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430