SNAP Library 4.0, Developer Reference  2017-07-27 13:18:06
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
localmotifcluster.h
Go to the documentation of this file.
1 #ifndef snap_localmotifcluster_h
2 #define snap_localmotifcluster_h
3 
4 #include "Snap.h"
5 
8 
9 
10 /*
11 Section: Motif
12 */
13 // Definition of motifs, on both directed graphs and undirected graphs
14 enum MotifType {
15  // directed graph 2-node motifs
16  UniDE, // uni-directed edge, u --> v but not v --> u
17  BiDE, // bi-directed edge, u <--> v
18  DE, // directed edge, = UniDE + 2 * BiDE
19  DE_any, // any edge, counted as 1 if(u --> v or v --> u), = UniDE + BiDE
20 
21  // directed graph 3-node motifs, see [Austin et al. 2016 Science] for more information.
22  M1, // u --> v, v --> w, w --> u
23  M2, // u <--> v, v --> w, w --> u
24  M3, // u <--> v, v <--> w, w --> u
25  M4, // u <--> v, v <--> w, w <--> u
26  M5, // u --> v, v --> w, u --> w
27  M6, // u <--> v, w --> u, w --> v
28  M7, // u <--> v, u --> w, v --> w
29  triad, // M1 + M2 + M3 + M4 + M5 + M6 + M7
30  cycle, // M1 + M2 + M3 + 2 * M4
31  FFLoop, // feed-forward loop, M5 + 2 * M6 + 2 * M7
32 
33  // undirected graph motifs
34  UEdge, // undirected edges
35  clique3, // |
36  clique4, // |
37  clique5, // |
38  clique6, // |
39  clique7, // |
40  clique8, // |
41  clique9, // |
42 };
43 
44 // Given a string representation of a motif name, parse it to a MotifType.
45 // Will throw an exception if (1) the graph is directed but the motif is for undirected graph, or (2) the other way.
46 MotifType ParseMotifType(const TStr& motif, const bool& IsDirected = false);
47 
48 
49 
50 
51 /*
52 Section: Preprocessing graphs, including:
53 1. counting motif on each edge
54 2. obtain a transformed weighted graph
55 */
57  private :
58  PUNGraph Graph_org; // A pointer to the original input graph.
59  // If the input is a directed graph, we will create its undirected version.
60  // This is used in forming the weighted graph.
61 
62  PUNGraph Graph_trans; // The transformed graph which we will use in MAPPR (the first step of MAPPR)
63  // Note this is an unweighted graph, and the weights are stored in variable {WeightVH Weights}
64  // Note that is graph may be much more sparse than the original graph
65 
66  CountVH Counts; // To store the number of motif instances on each edge.
67  // vec = Counts[u].GetDat(v) are the numbers of instances of several motif types on edge (u, v)
68  // For undirected graph, vec[i] is the number of (i+3)-cliques on this edge, i.e., vec[0] is for triangle
69  // For directed graph motif,
70  // vec[0] is for uni-directed edges,
71  // vec[1] is for bi-directed edges,
72  // vec[i] is for M_{i-1} as defined in Austin et al. (Science 2016).
73 
74  WeightVH Weights; // The weights of the weighted graph
75  // Weights[u].GetDat(v) is the weight of edge (u, v)
76  // Weights[u].GetDat(u) is the weighted degree of node u
77 
78  float TotalVol; // The total volume of the whole graph, needed in computing the conductance
79 
80 
81  // This function counts the undirected graph motif (clique) instances on each edge.
82  // It uses recursive method for clique enumeration proposed by Chiba and Nishizeki (SIAM J. Comput. 1985).
83  // Input {int KSize} denotes the size of the clique we are to enumerate in the current graph {PUNGraph& G}
84  // Input {TIntV& PrevNodes} denotes a set of nodes that are directed connected to any node in the current graph G
85  // and {int level = PrevNodes.Len()} is the number of PreNodes. Therefore, any k-clique in G corresponds to
86  // a (k+level)-clique after all nodes in PrevNodes are added in the current graph G.
87  void countClique(PUNGraph& G, int KSize, TIntV& PrevNodes, int level);
88 
89  // This function counts the directed graph motif instances on each edge.
90  // void countDirEdgeMotif(PNGraph graph);
91  void countDirTriadMotif(PNGraph graph);
92 
93 
94  public :
95  // Initialing, which will run assignWeights* functions and obtain the weighted transformed graph.
97  // For undirected input graph.
99  // For directed input graph
100  ProcessedGraph(PNGraph graph, MotifType mt);
101 
102  // Two functions, for undirected graph and directed graph respectively, that
103  // 1) counts motifs on each pair of nodes
104  // 2) assign weights
105  // 3) obtain the transformed graph
107  void assignWeights_dir(MotifType mt);
108 
109  // Output and printing
110  PUNGraph getOriginalGraph() const {return Graph_org; };
112  CountVH getCounts() const { return Counts; };
113  const WeightVH& getWeights() const { return Weights; };
114  float getTotalVolume() const { return TotalVol; };
115  void printCounts();
116  void printWeights();
117 };
118 
119 
120 
121 
122 /*
123 Section: MAPPR main part, including:
124 1. compute the APPR vector on the weighted transformed graph
125 2. obtain the cluster by sweeping the NCP profile
126 */
127 class MAPPR {
128  private:
129  THash<TInt, TFlt> appr_vec; // To store the APPR vector of the specified input
130 
131  int NumPushs; // Number of pushes in computing the APPR vector
132 
133  float appr_norm; // L1-norm of the APPR vector
134 
135  TIntV NodeInOrder; // Node Ids in the NCP in order
136 
137  TFltV MtfCondProfile; // Conductance of the NCP in order
138 
139  int SizeGlobalMin; // Size of the cluster given by the global minimum of NCP
140  // Value is initialized as 0 when this has not been computed.
141 
142  int SizeFirstLocalMin; // Size of the cluster given by the first local minimum of NCP
143  // Value is initialized as -1 when this has not been computed.
144 
145  TIntV Cluster; // To store the desired cluster
146 
147  // To compute the NCP of the graph with precomputed APPR vector
148  // Results are stored in {this->NodeInOrder and MtfCondProfile}.
149  // It will also compute the global min and first local min of the NCP.
150  void computeProfile(const ProcessedGraph& graph_p);
151 
152  // To compute the size the global minimum in NCP
153  void findGlobalMin();
154 
155  // To compute the size of the first local minimum in NCP
156  void findFirstlocalMin();
157 
158  // To check whether the specified {idx} is a local min in NCP.
159  // Local minimum is defined in [Yin et al 2017 KDD}.
160  bool isLocalMin(int idx, double thresh=1.2);
161 
162 
163  public:
164  MAPPR();
165 
166  // Function to compute the APPR vector on the weighted transformed graph {ProcessedGraph graph_p}
167  // with seed {int SeedNodeId} alpha = {float alpha}, and epsilon = {float eps}.
168  // It will also compute the NCP as well as with {SizeGlobalMin} and {SizeFirstLocalMin}, using {computeProfile(*)}.
169  // Results are stored in {this->appr_vec}, {this->NodeInOrder, MtfCondProfile}, and {this->SizeGlobalMin, SizeFirstLocalMin}.
170  void computeAPPR(const ProcessedGraph& graph_p, const int SeedNodeId, float alpha, float eps);
171 
172  // Function to find the desired cluster based on the NCP.
173  // Option > 0 will give the cluster of size option;
174  // Option = 0 will give the cluster of global minimum conductance;
175  // Option = -1 will give the cluster of first local minimum in the NCP.
176  // Result is stored in {TIntV Cluster}.
177  // Note that this function can only be run after finishing {computeAPPR(...)}
178  void sweepAPPR(int option = -1);
179 
180  // Output and printing
182  TIntV getCluster() { return Cluster; };
183  void printAPPR();
184  void printProfile();
185 };
186 
187 
188 
189 
190 #endif // snap_localmotifcluster_h
THash< TInt, TFlt > appr_vec
void computeAPPR(const ProcessedGraph &graph_p, const int SeedNodeId, float alpha, float eps)
void countClique(PUNGraph &G, int KSize, TIntV &PrevNodes, int level)
void assignWeights_undir(MotifType mt)
void computeProfile(const ProcessedGraph &graph_p)
const WeightVH & getWeights() const
THash< TInt, TFlt > getAPPR()
TIntV Cluster
void findFirstlocalMin()
TVec< THash< TInt, TIntV > > CountVH
int SizeFirstLocalMin
void printAPPR()
void assignWeights_dir(MotifType mt)
MotifType
void countDirTriadMotif(PNGraph graph)
TFltV MtfCondProfile
int SizeGlobalMin
MotifType ParseMotifType(const TStr &motif, const bool &IsDirected=false)
TVec< THash< TInt, TFlt > > WeightVH
void sweepAPPR(int option=-1)
void printProfile()
void findGlobalMin()
CountVH getCounts() const
Definition: dt.h:412
PUNGraph getTransformedGraph() const
TIntV NodeInOrder
PUNGraph getOriginalGraph() const
TIntV getCluster()
bool isLocalMin(int idx, double thresh=1.2)
float getTotalVolume() const
float appr_norm
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430