SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20 SNAP, a general purpose, high performance system for analysis and manipulation of large networks
localmotifcluster.h
Go to the documentation of this file.
1 #ifndef snap_localmotifcluster_h
2 #define snap_localmotifcluster_h
3
4 #include "Snap.h"
5
8
9
10 /*
11 Section: Motif
12 */
13 // Definition of motifs, on both directed graphs and undirected graphs
14 enum MotifType {
15  // directed graph 2-node motifs
16  UniDE, // uni-directed edge, u --> v but not v --> u
17  BiDE, // bi-directed edge, u <--> v
18  DE, // directed edge, = UniDE + 2 * BiDE
19  DE_any, // any edge, counted as 1 if(u --> v or v --> u), = UniDE + BiDE
20
21  // directed graph 3-node motifs, see [Austin et al. 2016 Science] for more information.
22  M1, // u --> v, v --> w, w --> u
23  M2, // u <--> v, v --> w, w --> u
24  M3, // u <--> v, v <--> w, w --> u
25  M4, // u <--> v, v <--> w, w <--> u
26  M5, // u --> v, v --> w, u --> w
27  M6, // u <--> v, w --> u, w --> v
28  M7, // u <--> v, u --> w, v --> w
29  triad, // M1 + M2 + M3 + M4 + M5 + M6 + M7
30  cycle, // M1 + M2 + M3 + 2 * M4
31  FFLoop, // feed-forward loop, M5 + 2 * M6 + 2 * M7
32
33  // undirected graph motifs
34  UEdge, // undirected edges
35  clique3, // |
36  clique4, // |
37  clique5, // |
38  clique6, // |
39  clique7, // |
40  clique8, // |
41  clique9, // |
42 };
43
44 // Given a string representation of a motif name, parse it to a MotifType.
45 // Will throw an exception if (1) the graph is directed but the motif is for undirected graph, or (2) the other way.
46 MotifType ParseMotifType(const TStr& motif, const bool& IsDirected = false);
47
48
49
50
51 /*
52 Section: Preprocessing graphs, including:
53 1. counting motif on each edge
54 2. obtain a transformed weighted graph
55 */
57  private :
58  PUNGraph Graph_org; // A pointer to the original input graph.
59  // If the input is a directed graph, we will create its undirected version.
60  // This is used in forming the weighted graph.
61
62  PUNGraph Graph_trans; // The transformed graph which we will use in MAPPR (the first step of MAPPR)
63  // Note this is an unweighted graph, and the weights are stored in variable {WeightVH Weights}
64  // Note that is graph may be much more sparse than the original graph
65
66  CountVH Counts; // To store the number of motif instances on each edge.
67  // vec = Counts[u].GetDat(v) are the numbers of instances of several motif types on edge (u, v)
68  // For undirected graph, vec[i] is the number of (i+3)-cliques on this edge, i.e., vec[0] is for triangle
69  // For directed graph motif,
70  // vec[0] is for uni-directed edges,
71  // vec[1] is for bi-directed edges,
72  // vec[i] is for M_{i-1} as defined in Austin et al. (Science 2016).
73
74  WeightVH Weights; // The weights of the weighted graph
75  // Weights[u].GetDat(v) is the weight of edge (u, v)
76  // Weights[u].GetDat(u) is the weighted degree of node u
77
78  float TotalVol; // The total volume of the whole graph, needed in computing the conductance
79
80
81  // This function counts the undirected graph motif (clique) instances on each edge.
82  // It uses recursive method for clique enumeration proposed by Chiba and Nishizeki (SIAM J. Comput. 1985).
83  // Input {int KSize} denotes the size of the clique we are to enumerate in the current graph {PUNGraph& G}
84  // Input {TIntV& PrevNodes} denotes a set of nodes that are directed connected to any node in the current graph G
85  // and {int level = PrevNodes.Len()} is the number of PreNodes. Therefore, any k-clique in G corresponds to
86  // a (k+level)-clique after all nodes in PrevNodes are added in the current graph G.
87  void countClique(PUNGraph& G, int KSize, TIntV& PrevNodes, int level);
88
89  // This function counts the directed graph motif instances on each edge.
90  // void countDirEdgeMotif(PNGraph graph);
92
93
94  public :
95  // Initialing, which will run assignWeights* functions and obtain the weighted transformed graph.
97  // For undirected input graph.
99  // For directed input graph
100  ProcessedGraph(PNGraph graph, MotifType mt);
101
102  // Two functions, for undirected graph and directed graph respectively, that
103  // 1) counts motifs on each pair of nodes
104  // 2) assign weights
105  // 3) obtain the transformed graph
107  void assignWeights_dir(MotifType mt);
108
109  // Output and printing
110  PUNGraph getOriginalGraph() const {return Graph_org; };
112  CountVH getCounts() const { return Counts; };
113  const WeightVH& getWeights() const { return Weights; };
115  void printCounts();
116  void printWeights();
117 };
118
119
120
121
122 /*
123 Section: MAPPR main part, including:
124 1. compute the APPR vector on the weighted transformed graph
125 2. obtain the cluster by sweeping the NCP profile
126 */
127 class MAPPR {
128  private:
129  THash<TInt, TFlt> appr_vec; // To store the APPR vector of the specified input
130
131  int NumPushs; // Number of pushes in computing the APPR vector
132
133  float appr_norm; // L1-norm of the APPR vector
134
135  TIntV NodeInOrder; // Node Ids in the NCP in order
136
137  TFltV MtfCondProfile; // Conductance of the NCP in order
138
139  int SizeGlobalMin; // Size of the cluster given by the global minimum of NCP
140  // Value is initialized as 0 when this has not been computed.
141
142  int SizeFirstLocalMin; // Size of the cluster given by the first local minimum of NCP
143  // Value is initialized as -1 when this has not been computed.
144
145  TIntV Cluster; // To store the desired cluster
146
147  // To compute the NCP of the graph with precomputed APPR vector
148  // Results are stored in {this->NodeInOrder and MtfCondProfile}.
149  // It will also compute the global min and first local min of the NCP.
150  void computeProfile(const ProcessedGraph& graph_p);
151
152  // To compute the size the global minimum in NCP
153  void findGlobalMin();
154
155  // To compute the size of the first local minimum in NCP
156  void findFirstlocalMin();
157
158  // To check whether the specified {idx} is a local min in NCP.
159  // Local minimum is defined in [Yin et al 2017 KDD}.
160  bool isLocalMin(int idx, double thresh=1.2);
161
162
163  public:
164  MAPPR();
165
166  // Function to compute the APPR vector on the weighted transformed graph {ProcessedGraph graph_p}
167  // with seed {int SeedNodeId} alpha = {float alpha}, and epsilon = {float eps}.
168  // It will also compute the NCP as well as with {SizeGlobalMin} and {SizeFirstLocalMin}, using {computeProfile(*)}.
169  // Results are stored in {this->appr_vec}, {this->NodeInOrder, MtfCondProfile}, and {this->SizeGlobalMin, SizeFirstLocalMin}.
170  void computeAPPR(const ProcessedGraph& graph_p, const int SeedNodeId, float alpha, float eps);
171
172  // Function to find the desired cluster based on the NCP.
173  // Option > 0 will give the cluster of size option;
174  // Option = 0 will give the cluster of global minimum conductance;
175  // Option = -1 will give the cluster of first local minimum in the NCP.
176  // Result is stored in {TIntV Cluster}.
177  // Note that this function can only be run after finishing {computeAPPR(...)}
178  void sweepAPPR(int option = -1);
179
180  // Output and printing
182  TIntV getCluster() { return Cluster; };
183  void printAPPR();
184  void printProfile();
185 };
186
187
188
189
190 #endif // snap_localmotifcluster_h
THash< TInt, TFlt > appr_vec
void computeAPPR(const ProcessedGraph &graph_p, const int SeedNodeId, float alpha, float eps)
void countClique(PUNGraph &G, int KSize, TIntV &PrevNodes, int level)
void assignWeights_undir(MotifType mt)
void computeProfile(const ProcessedGraph &graph_p)
const WeightVH & getWeights() const
THash< TInt, TFlt > getAPPR()
TIntV Cluster
void findFirstlocalMin()
TVec< THash< TInt, TIntV > > CountVH
int SizeFirstLocalMin
void printAPPR()
void assignWeights_dir(MotifType mt)
MotifType
TFltV MtfCondProfile
int SizeGlobalMin
MotifType ParseMotifType(const TStr &motif, const bool &IsDirected=false)
TVec< THash< TInt, TFlt > > WeightVH
void sweepAPPR(int option=-1)
void printProfile()
void findGlobalMin()
CountVH getCounts() const
Definition: dt.h:412
PUNGraph getTransformedGraph() const
TIntV NodeInOrder
PUNGraph getOriginalGraph() const
TIntV getCluster()
bool isLocalMin(int idx, double thresh=1.2)
float getTotalVolume() const
float appr_norm
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430