SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
agmfit.h
Go to the documentation of this file.
1 #ifndef snap_agmfit_h
2 #define snap_agmfit_h
3 
4 #include "Snap.h"
5 
8 class TAGMFit {
9 private:
24 
25 public:
26  TAGMFit() { }
27  ~TAGMFit() { }
29  TAGMFit(const PUNGraph& GraphPt, const TVec<TIntV>& CmtyVVPt, const int RndSeed = 0): G(GraphPt), PNoCom(0.0), Rnd(RndSeed), MinLambda(0.00001), MaxLambda(10.0), RegCoef(0), BaseCID(-1) { SetCmtyVV(CmtyVVPt); }
31  TAGMFit(const PUNGraph& GraphPt, const int InitComs, const int RndSeed = 0): G(GraphPt), PNoCom(0.0), Rnd(RndSeed), MinLambda(0.00001), MaxLambda(10.0), RegCoef(0), BaseCID(-1) { NeighborComInit(InitComs); }//RandomInitCmtyVV(InitComs); }
33  TAGMFit(const PUNGraph& GraphPt, const TVec<TIntV>& CmtyVVPt, const TRnd& RndPt): G(GraphPt), PNoCom(0.0), Rnd(RndPt), MinLambda(0.00001), MaxLambda(10.0), RegCoef(0), BaseCID(-1) { SetCmtyVV(CmtyVVPt); }
34  void Save(TSOut& SOut);
35  void Load(TSIn& SIn, const int& RndSeed = 0);
36 
38  void RandomInitCmtyVV(const int InitComs, const double ComSzAlpha = 1.3, const double MemAlpha = 1.8, const int MinComSz = 8, const int MaxComSz = 200, const int MinMem = 1, const int MaxMem = 10);
40  void AddBaseCmty();
42  double Likelihood();
43  double Likelihood(const TFltV& NewLambdaV) { double Tmp1, Tmp2; return Likelihood(NewLambdaV, Tmp1, Tmp2); }
44  double Likelihood(const TFltV& NewLambdaV, double& LEdges, double& LNoEdges);
45  void SetRegCoef(const double Val) { RegCoef = Val; }
47  void GetEdgeJointCom();
49  void NeighborComInit(const int InitComs);
50  // Gradient of likelihood for \c P_c.
51  void GradLogLForLambda(TFltV& GradV);
53  int MLEGradAscentGivenCAG(const double& Thres=0.001, const int& MaxIter=10000, const TStr PlotNm = TStr());
55  void SetDefaultPNoCom();
57  void SetPNoCom(const double& Epsilon) { if (BaseCID == -1 && Epsilon > 0.0) { PNoCom = Epsilon; } }
58  double GetPNoCom() { return PNoCom; }
60  double CalcPNoComByCmtyVV(const int& SamplePairs = -1);
61  // OP RS 2014/04/10 commented out since there is no implementation
62  //void GetNewtonStep(TFltVV& HVV, TFltV& GradV, TFltV& DeltaLV);
64  double SelectLambdaSum(const TIntSet& ComK);
66  double SelectLambdaSum(const TFltV& NewLambdaV, const TIntSet& ComK);
67 
69  void RandomInit(const int& MaxK);
71  void RunMCMC(const int& MaxIter, const int& EvalLambdaIter, const TStr& PlotFPrx = TStr());
73  void SampleTransition(int& NID, int& JoinCID, int& LeaveCID, double& DeltaL);
75  void InitNodeData();
77  void LeaveCom(const int& NID, const int& CID);
78  // After MCMC, \c NID joins community \c CID.
79  void JoinCom(const int& NID, const int& JoinCID);
81  int RemoveEmptyCom();
83  double SeekLeave(const int& UID, const int& CID);
85  double SeekJoin(const int& UID, const int& CID);
86  // Compute the change in likelihood (Delta) if node \c UID switches from \c CurCID to \c NewCID.
87  double SeekSwitch(const int& UID, const int& CurCID, const int& NewCID);
88 
90  double GetStepSizeByLineSearchForLambda(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta);
92  void SetLambdaV(const TFltV& LambdaPt) {LambdaV = LambdaPt;}
94  void GetLambdaV(TFltV& OutV) {OutV = LambdaV;}
96  void GetQV(TFltV& OutV);
98  void GetCmtyVV(TVec<TIntV>& CmtyVV, const double QMax = 2.0);
100  void GetCmtyVV(TVec<TIntV>& CmtyVV, TFltV& QV, const double QMax = 2.0);
102  void SetCmtyVV(const TVec<TIntV>& CmtyVV);
104  void PrintSummary();
105 };
106 
107 #endif
void Load(TSIn &SIn, const int &RndSeed=0)
Definition: agmfit.cpp:24
void SetLambdaV(const TFltV &LambdaPt)
COMMENT.
Definition: agmfit.h:92
TRnd Rnd
Definition: agmfit.h:17
double SeekSwitch(const int &UID, const int &CurCID, const int &NewCID)
Definition: agmfit.cpp:525
Definition: dt.h:11
~TAGMFit()
Definition: agmfit.h:27
void SetDefaultPNoCom()
Set Epsilon (the probability that two nodes sharing no communities connect) to the default value...
Definition: agmfit.cpp:72
void SetPNoCom(const double &Epsilon)
Set Epsilon (the probability that two nodes sharing no communities connect) to the default value...
Definition: agmfit.h:57
THash< TIntPr, TInt > NIDCIDPrS
pairs (for sampling MCMC moves).
Definition: agmfit.h:19
double GetStepSizeByLineSearchForLambda(const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta)
Step size search for updating P_c (which is parametarized by regularization parameter lambda)...
Definition: agmfit.cpp:109
double Likelihood()
COMMENT.
Definition: agmfit.cpp:104
TFltV LambdaV
Parametrization of P_c (edge probability in community c), P_c = 1 - exp(-lambda). ...
Definition: agmfit.h:16
void PrintSummary()
COMMENT.
Definition: agmfit.cpp:659
TAGMFit(const PUNGraph &GraphPt, const int InitComs, const int RndSeed=0)
COMMENT. Use to describribe parameters.
Definition: agmfit.h:31
TInt BaseCID
ID of the Epsilon-community (in case we fit P_c of the epsilon community). We do not fit for the Epsi...
Definition: agmfit.h:23
void GetLambdaV(TFltV &OutV)
COMMENT.
Definition: agmfit.h:94
THash< TIntPr, TIntSet > EdgeComVH
Edge -> Shared Community ID Set.
Definition: agmfit.h:12
Fitting the Affilialiton Graph Model (AGM).
Definition: agmfit.h:8
void GetEdgeJointCom()
For each (u, v) in edges, precompute C_uv (the set of communities nodes u and v share).
Definition: agmfit.cpp:50
TAGMFit()
Definition: agmfit.h:26
Definition: dt.h:1386
Definition: fl.h:58
void GradLogLForLambda(TFltV &GradV)
Definition: agmfit.cpp:595
TAGMFit(const PUNGraph &GraphPt, const TVec< TIntV > &CmtyVVPt, const TRnd &RndPt)
COMMENT. Use to describribe parameters.
Definition: agmfit.h:33
double CalcPNoComByCmtyVV(const int &SamplePairs=-1)
Compute the empirical edge probability between a pair of nodes who share no community (epsilon)...
Definition: agmfit.cpp:632
void Save(TSOut &SOut)
Definition: agmfit.cpp:8
void LeaveCom(const int &NID, const int &CID)
After MCMC, NID leaves community CID.
Definition: agmfit.cpp:293
TFlt MinLambda
Minimum value of regularization parameter lambda (default = 1e-5).
Definition: agmfit.h:20
void GetCmtyVV(TVec< TIntV > &CmtyVV, const double QMax=2.0)
Get communities whose p_c is higher than 1 - QMax.
Definition: agmfit.cpp:554
int RemoveEmptyCom()
Remove all communities with no members.
Definition: agmfit.cpp:459
int MLEGradAscentGivenCAG(const double &Thres=0.001, const int &MaxIter=10000, const TStr PlotNm=TStr())
Gradient descent for p_c while keeping the community affiliation graph (CAG) fixed.
Definition: agmfit.cpp:130
THash< TInt, TIntSet > NIDComVH
Node ID -> Communitie IDs the node belongs to.
Definition: agmfit.h:13
double SelectLambdaSum(const TIntSet &ComK)
Compute sum of lambda_c (which is log (1 - p_c)) over C_uv (ComK). The function is used to compute ed...
Definition: agmfit.cpp:618
TFlt RegCoef
Regularization parameter when we fit for P_c (for finding # communities).
Definition: agmfit.h:22
void SetRegCoef(const double Val)
Definition: agmfit.h:45
void NeighborComInit(const int InitComs)
Initialize node community memberships using best neighborhood communities (see D. Gleich et al...
Definition: agmfit.cpp:186
void GetQV(TFltV &OutV)
Returns QV, a vector of (1 - p_c) for each community c.
Definition: agmfit.cpp:451
TAGMFit(const PUNGraph &GraphPt, const TVec< TIntV > &CmtyVVPt, const int RndSeed=0)
COMMENT. Use to describribe parameters.
Definition: agmfit.h:29
Definition: fl.h:128
Definition: dt.h:1137
double SeekJoin(const int &UID, const int &CID)
Compute the change in likelihood (Delta) if node UID joins community CID.
Definition: agmfit.cpp:502
void RunMCMC(const int &MaxIter, const int &EvalLambdaIter, const TStr &PlotFPrx=TStr())
Main procedure for fitting the AGM to a given graph using MCMC.
Definition: agmfit.cpp:366
Definition: dt.h:412
void InitNodeData()
COMMENT.
Definition: agmfit.cpp:264
double SeekLeave(const int &UID, const int &CID)
Compute the change in likelihood (Delta) if node UID leaves community CID.
Definition: agmfit.cpp:475
void JoinCom(const int &NID, const int &JoinCID)
Definition: agmfit.cpp:309
Definition: hash.h:97
void RandomInitCmtyVV(const int InitComs, const double ComSzAlpha=1.3, const double MemAlpha=1.8, const int MinComSz=8, const int MaxComSz=200, const int MinMem=1, const int MaxMem=10)
Randomly initialize bipartite community affiliation graph.
Definition: agmfit.cpp:42
TFlt MaxLambda
Maximum value of regularization parameter lambda (default = 10).
Definition: agmfit.h:21
TVec< TIntSet > CIDNSetV
Community ID -> Member Node ID Sets.
Definition: agmfit.h:11
void AddBaseCmty()
Add Epsilon community (base community which includes all nodes) into community affiliation graph...
Definition: agmfit.cpp:251
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
COMMENT.
Definition: agmfit.cpp:584
double Likelihood(const TFltV &NewLambdaV)
Definition: agmfit.h:43
PUNGraph G
Graph to fit.
Definition: agmfit.h:10
TIntV ComEdgesV
The number of edges in each community.
Definition: agmfit.h:14
void SampleTransition(int &NID, int &JoinCID, int &LeaveCID, double &DeltaL)
Sample MMCM transitions: Choose among (join, leave, switch), and then sample (NID, CID).
Definition: agmfit.cpp:325
TFlt PNoCom
Probability of edge when two nodes share no community (epsilon in the paper).
Definition: agmfit.h:15
void RandomInit(const int &MaxK)
COMMENT.
Definition: agmfit.cpp:169
THash< TIntPr, TFlt > NIDCIDPrH
pairs (for sampling MCMC moves).
Definition: agmfit.h:18
double GetPNoCom()
Definition: agmfit.h:58
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430