SNAP Library 6.0, User Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
TCodaAnalyzer Class Reference

#include <agmdirected.h>

Public Member Functions

 TCodaAnalyzer ()
 
 TCodaAnalyzer (TCoda &Coda, const double MemThres=-1.0)
 
void GetAllCmtyVV (TVec< TIntV > &CmtyVV, const int MinSz)
 
double GetFrac2Mode (const double Thres2Mode=0.2, const int MinSzEach=2)
 
void Summary (const int TopK=10, const double Thres2Mode=0.2)
 
int GetNumComs ()
 
void GetCmtyVAll (TIntV &CmtyVAll, const int CID)
 save bipartite community affiliation into gexf file More...
 
PNGraph Net2ModeCommunities (const double MaxJac, const double JacEdge, const bool GetWcc=true)
 
void Dump2ModeCommunities (const TStr &OutFNm, const double MaxJac, const TIntStrH &NIDNameH)
 
void Draw2ModeCommunity (const int CID, const TStr &OutFNm, const TIntStrH &NIDNameH, const THash< TInt, TIntTr > &NIDColorH)
 

Public Attributes

PNGraph G
 
TVec< TIntFltHInCmtyValHV
 
TVec< TIntFltHOutCmtyValHV
 
TVec< TIntFltHInOutCmtyValHV
 

Detailed Description

Definition at line 198 of file agmdirected.h.

Constructor & Destructor Documentation

TCodaAnalyzer::TCodaAnalyzer ( )
inline

Definition at line 204 of file agmdirected.h.

204 { G = TNGraph::New(); }
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:481
TCodaAnalyzer::TCodaAnalyzer ( TCoda Coda,
const double  MemThres = -1.0 
)
inline

Definition at line 205 of file agmdirected.h.

205  {
206  G = Coda.GetGraphRawNID();
207  printf("graph copied (%d nodes %d edges)\n", G->GetNodes(), G->GetEdges());
208  TIntV CIdV;
209  Coda.GetTopCIDs(CIdV, Coda.GetNumComs());
210  double Delta = MemThres == -1.0 ? sqrt(Coda.PNoCom): MemThres;
211  for (int c = 0; c < CIdV.Len(); c++) {
212  int CID = CIdV[c];
213  TIntFltH InMemH, OutMemH, InOutMemH;
214  Coda.GetNIDValH(InOutMemH, OutMemH, InMemH, CID, Delta);
215  InCmtyValHV.Add(InMemH);
216  OutCmtyValHV.Add(OutMemH);
217  InOutCmtyValHV.Add(InOutMemH);
218  }
219  printf("Communities copied (%d communities)\n", InCmtyValHV.Len());
220  }
PNGraph GetGraphRawNID()
void GetNIDValH(TIntFltH &NIdValInOutH, TIntFltH &NIdValOutH, TIntFltH &NIdValInH, const int CID, const double Thres)
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:313
TVec< TIntFltH > OutCmtyValHV
Definition: agmdirected.h:202
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:503
TVec< TIntFltH > InCmtyValHV
Definition: agmdirected.h:201
void GetTopCIDs(TIntV &CIdV, const int TopK, const int IsAverage=1, const int MinSz=1)
int GetNumComs()
Definition: agmdirected.h:36
TFlt PNoCom
Definition: agmdirected.h:24
TVec< TIntFltH > InOutCmtyValHV
Definition: agmdirected.h:203

Member Function Documentation

void TCodaAnalyzer::Draw2ModeCommunity ( const int  CID,
const TStr OutFNm,
const TIntStrH NIDNameH,
const THash< TInt, TIntTr > &  NIDColorH 
)
inline

Plot bipartite graph for the 2-mode community

Definition at line 355 of file agmdirected.h.

355  {
356  TIntV CmtyVIn, CmtyVOut, CmtyVAll;
357  InCmtyValHV[CID].GetKeyV(CmtyVIn);
358  OutCmtyValHV[CID].GetKeyV(CmtyVOut);
359  GetCmtyVAll(CmtyVAll, CID);
360 
361  //adjust for the nodes who belong to both cmtyvin and cmtyvout
362  for (int u = 0; u < InOutCmtyValHV[CID].Len(); u++) {
363  int UID = InOutCmtyValHV[CID].GetKey(u);
364  if (CmtyVIn.Len() >= CmtyVOut.Len()) {
365  CmtyVIn.DelIfIn(UID);
366  } else {
367  CmtyVOut.DelIfIn(UID);
368  }
369  }
370 
371  PNGraph SG = TSnap::GetSubGraph(G, CmtyVAll);
373  if (CmtyVAll.Len() == 0) { return; }
374  double OXMin = 0.1, YMin = 0.1, OXMax = 2500.00, YMax = 1000.0, IXMin = 0.1, IXMax = 2500.00;
375  double OStep = (OXMax - OXMin) / (double) CmtyVOut.Len(), IStep = (IXMax - IXMin) / (double) CmtyVIn.Len();
376 
377  FILE* F = fopen(OutFNm.CStr(), "wt");
378  fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
379  fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
380  fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n");
381  fprintf(F, "\t\t<nodes>\n");
382  for (int c = 0; c < CmtyVOut.Len(); c++) {
383  int NID = CmtyVOut[c];
384  double XPos = c * OStep + OXMin;
385  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
386  Label.ChangeChAll('<', ' ');
387  Label.ChangeChAll('>', ' ');
388  Label.ChangeChAll('&', ' ');
389  Label.ChangeChAll('\'', ' ');
390  TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
391  fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
392  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val);
393  fprintf(F, "\t\t\t\t<viz:size value='4.0'/>\n");
394  fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
395  fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax);
396  fprintf(F, "\t\t\t</node>\n");
397  }
398 
399  for (int u = 0; u < CmtyVIn.Len(); u++) {
400  int NID = CmtyVIn[u];
401  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
402  Label.ChangeChAll('<', ' ');
403  Label.ChangeChAll('>', ' ');
404  Label.ChangeChAll('&', ' ');
405  Label.ChangeChAll('\'', ' ');
406  double XPos = IXMin + u * IStep;
407  TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
408  double Alpha = 1.0;
409  fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
410  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
411  fprintf(F, "\t\t\t\t<viz:size value='4.0'/>\n");
412  fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
413  fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin);
414  fprintf(F, "\t\t\t</node>\n");
415  }
416  fprintf(F, "\t\t</nodes>\n");
417  //plot edges
418  int EID = 0;
419  fprintf(F, "\t\t<edges>\n");
420  for (TNGraph::TNodeI NI = SG->BegNI(); NI < SG->EndNI(); NI++) {
421  if (NI.GetOutDeg() == 0 && NI.GetInDeg() == 0 ) { continue; }
422  for (int e = 0; e < NI.GetOutDeg(); e++) {
423  fprintf(F, "\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, NI.GetId(), NI.GetOutNId(e));
424  }
425  }
426  fprintf(F, "\t\t</edges>\n");
427  fprintf(F, "\t</graph>\n");
428  fprintf(F, "</gexf>\n");
429  fclose(F);
430  }
bool DelIfIn(const TVal &Val)
Removes the first occurrence of element Val.
Definition: ds.h:1212
Definition: ds.h:130
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
TVec< TIntFltH > OutCmtyValHV
Definition: agmdirected.h:202
TVal1 Val1
Definition: ds.h:132
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
TVec< TIntFltH > InCmtyValHV
Definition: agmdirected.h:201
int ChangeChAll(const char &SrcCh, const char &DstCh)
Definition: dt.cpp:1113
TVal2 Val2
Definition: ds.h:133
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
Definition: dt.h:412
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:383
Definition: bd.h:196
TTriple< TInt, TInt, TInt > TIntTr
Definition: ds.h:171
void GetCmtyVAll(TIntV &CmtyVAll, const int CID)
save bipartite community affiliation into gexf file
Definition: agmdirected.h:263
char * CStr()
Definition: dt.h:479
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TVec< TIntFltH > InOutCmtyValHV
Definition: agmdirected.h:203
TVal3 Val3
Definition: ds.h:134
void TCodaAnalyzer::Dump2ModeCommunities ( const TStr OutFNm,
const double  MaxJac,
const TIntStrH NIDNameH 
)
inline

Definition at line 317 of file agmdirected.h.

317  {
318  FILE* F = fopen(OutFNm.CStr(), "wt");
319  for (int c = 0; c < InCmtyValHV.Len(); c++) {
320  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
321  if (Jacc > MaxJac) { continue; }
322  TIntV CmtyVIn, CmtyVOut, CmtyVAll;
323  InCmtyValHV[c].GetKeyV(CmtyVIn);
324  OutCmtyValHV[c].GetKeyV(CmtyVOut);
325  GetCmtyVAll(CmtyVAll, c);
326  //adjust for the nodes who belong to both cmtyvin and cmtyvout
327  for (int u = 0; u < InOutCmtyValHV[c].Len(); u++) {
328  int UID = InOutCmtyValHV[c].GetKey(u);
329  if (CmtyVIn.Len() >= CmtyVOut.Len()) {
330  CmtyVIn.DelIfIn(UID);
331  } else {
332  CmtyVOut.DelIfIn(UID);
333  }
334  }
335  if (CmtyVAll.Len() == 0) { continue; }
336  fprintf(F, "Com %d\n", c);
337  for (int u = 0; u < CmtyVOut.Len(); u++) {
338  int NID = CmtyVOut[u];
339  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): TStr::Fmt("Concept %d", NID);
340  fprintf(F, "%s:%f\n", Label.CStr(), OutCmtyValHV[c].GetDat(NID).Val);
341  }
342  fprintf(F, "||==>||\n");
343  for (int u = 0; u < CmtyVIn.Len(); u++) {
344  int NID = CmtyVIn[u];
345  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): TStr::Fmt("Concept %d", NID);
346  fprintf(F, "%s:%f\n", Label.CStr(), InCmtyValHV[c].GetDat(NID).Val);
347  }
348  fprintf(F, "\n");
349  }
350  fclose(F);
351  }
bool DelIfIn(const TVal &Val)
Removes the first occurrence of element Val.
Definition: ds.h:1212
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
TVec< TIntFltH > OutCmtyValHV
Definition: agmdirected.h:202
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
TVec< TIntFltH > InCmtyValHV
Definition: agmdirected.h:201
Definition: dt.h:412
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
void GetCmtyVAll(TIntV &CmtyVAll, const int CID)
save bipartite community affiliation into gexf file
Definition: agmdirected.h:263
char * CStr()
Definition: dt.h:479
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TVec< TIntFltH > InOutCmtyValHV
Definition: agmdirected.h:203
void TCodaAnalyzer::GetAllCmtyVV ( TVec< TIntV > &  CmtyVV,
const int  MinSz 
)
inline

Definition at line 221 of file agmdirected.h.

221  {
222  for (int c = 0; c < InCmtyValHV.Len(); c++) {
223  TIntV CmtyVIn, CmtyVOut, CmtyVInOut;
224  if (InCmtyValHV[c].Len() < MinSz || OutCmtyValHV[c].Len() < MinSz) { continue; }
225  InOutCmtyValHV[c].GetKeyV(CmtyVInOut);
226  InCmtyValHV[c].GetKeyV(CmtyVIn);
227  OutCmtyValHV[c].GetKeyV(CmtyVOut);
228  CmtyVV.Add(CmtyVInOut);
229  CmtyVV.Add(CmtyVOut);
230  CmtyVV.Add(CmtyVIn);
231  }
232  }
TVec< TIntFltH > OutCmtyValHV
Definition: agmdirected.h:202
TVec< TIntFltH > InCmtyValHV
Definition: agmdirected.h:201
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TVec< TIntFltH > InOutCmtyValHV
Definition: agmdirected.h:203
void TCodaAnalyzer::GetCmtyVAll ( TIntV CmtyVAll,
const int  CID 
)
inline

save bipartite community affiliation into gexf file

Definition at line 263 of file agmdirected.h.

263  {
264  TIntV CmtyVIn, CmtyVOut;
265  InCmtyValHV[CID].GetKeyV(CmtyVIn);
266  OutCmtyValHV[CID].GetKeyV(CmtyVOut);
267  CmtyVIn.Sort();
268  CmtyVOut.Sort();
269  CmtyVAll.Gen(CmtyVIn.Len() + CmtyVOut.Len(), 0);
270  CmtyVIn.Union(CmtyVOut, CmtyVAll);
271  }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
TVec< TIntFltH > OutCmtyValHV
Definition: agmdirected.h:202
TVec< TIntFltH > InCmtyValHV
Definition: agmdirected.h:201
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
void Union(const TVec< TVal, TSizeTy > &ValV)
Sets this vector to its union with ValV. Assumes the vectors are sorted!
Definition: ds.h:1418
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
double TCodaAnalyzer::GetFrac2Mode ( const double  Thres2Mode = 0.2,
const int  MinSzEach = 2 
)
inline

Definition at line 234 of file agmdirected.h.

234  {
235  int Cnt2Mode = 0;
236  int CntAll = 0;
237  for (int c = 0; c < InCmtyValHV.Len(); c++) {
238  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
239  if (InCmtyValHV[c].Len() < MinSzEach || OutCmtyValHV[c].Len() < MinSzEach) { continue; }
240  if (Jacc <= Thres2Mode) { Cnt2Mode++; }
241  CntAll++;
242  }
243  return (double) Cnt2Mode / (double) CntAll;
244  }
TVec< TIntFltH > OutCmtyValHV
Definition: agmdirected.h:202
TVec< TIntFltH > InCmtyValHV
Definition: agmdirected.h:201
TVec< TIntFltH > InOutCmtyValHV
Definition: agmdirected.h:203
int TCodaAnalyzer::GetNumComs ( )
inline

Definition at line 260 of file agmdirected.h.

260 { return InCmtyValHV.Len(); }
TVec< TIntFltH > InCmtyValHV
Definition: agmdirected.h:201
PNGraph TCodaAnalyzer::Net2ModeCommunities ( const double  MaxJac,
const double  JacEdge,
const bool  GetWcc = true 
)
inline

Definition at line 273 of file agmdirected.h.

273  {
274  //if In(A) is similar to Out(B), create an edge A->B between 2 communities A, B
275  int Coms = InCmtyValHV.Len();
276  PNGraph ComG = TNGraph::New(Coms, -1);
277  for (int c = 0; c < InCmtyValHV.Len(); c++) {
278  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
279  if (Jacc > MaxJac) { continue; }
280  ComG->AddNode(c);
281  }
282  TVec<TIntSet> CmtySVIn, CmtySVOut;
283  for (int c = 0; c < Coms; c++) {
284  TIntV CmtyVIn, CmtyVOut;
285  InCmtyValHV[c].GetKeyV(CmtyVIn);
286  OutCmtyValHV[c].GetKeyV(CmtyVOut);
287  TIntSet CmtySIn(CmtyVIn), CmtySOut(CmtyVOut);
288  CmtySVIn.Add(CmtySIn);
289  CmtySVOut.Add(CmtySOut);
290  }
291  for (int c1 = 0; c1 < Coms; c1++) {
292  if (! ComG->IsNode(c1)) { continue; }
293  for (int c2 = 0; c2 < Coms; c2++) {
294  if (! ComG->IsNode(c2)) { continue; }
295  int IntC1C2 = TAGMUtil::Intersection(CmtySVIn[c1], CmtySVOut[c2]);
296  double Jac = (double) IntC1C2 / (CmtySVIn[c1].Len() + CmtySVOut[c2].Len() - IntC1C2);
297  if (Jac >= JacEdge) {
298  ComG->AddEdge(c1, c2);
299  }
300  }
301  }
302  //PNGraph Wcc = TSnap::GetMxWcc(ComG);
303  TIntV NIDV;
304  ComG->GetNIdV(NIDV);
305  for (int u = 0; u < NIDV.Len(); u++) {
306  int NID = NIDV[u];
307  TNGraph::TNodeI NI = ComG->GetNI(NID);
308  if (NI.GetDeg() == 0) { ComG->DelNode(NID); }
309  if (NI.GetInDeg() == 1 && NI.GetOutDeg() == 1 && NI.GetOutNId(0) == NID) { ComG->DelNode(NID); }
310  }
311  printf("Community graph made (Jaccard similarity for edges: %f, %d nodes, %d edges)\n", JacEdge, ComG->GetNodes(), ComG->GetEdges());
312  return ComG;
313  }
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:481
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
static int Intersection(const TIntV &C1, const TIntV &C2)
Definition: agm.cpp:399
TVec< TIntFltH > OutCmtyValHV
Definition: agmdirected.h:202
TVec< TIntFltH > InCmtyValHV
Definition: agmdirected.h:201
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
Definition: graph.h:402
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:406
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:383
Definition: bd.h:196
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:404
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TVec< TIntFltH > InOutCmtyValHV
Definition: agmdirected.h:203
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:416
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
void TCodaAnalyzer::Summary ( const int  TopK = 10,
const double  Thres2Mode = 0.2 
)
inline

Definition at line 246 of file agmdirected.h.

246  {
247  int Cnt2Mode = 0;
248  double SumJacc = 0.0;
249  for (int c = 0; c < InCmtyValHV.Len(); c++) {
250  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
251  if (Jacc <= Thres2Mode) { Cnt2Mode++; }
252  SumJacc += Jacc;
253  if (c < TopK) {
254  printf("Cmty %d: InOut: %d, In:%d, Out:%d, Jacc;%.3f\n", c, InCmtyValHV[c].Len(), InCmtyValHV[c].Len(), OutCmtyValHV[c].Len(), Jacc);
255  }
256  }
257  double AvgJacc = SumJacc / (double) InCmtyValHV.Len();
258  printf("Average jaccard similarity = %.3f. (%d / %d communities are 2-mode)\n", AvgJacc, Cnt2Mode, InCmtyValHV.Len());
259  }
TVec< TIntFltH > OutCmtyValHV
Definition: agmdirected.h:202
TVec< TIntFltH > InCmtyValHV
Definition: agmdirected.h:201
TVec< TIntFltH > InOutCmtyValHV
Definition: agmdirected.h:203

Member Data Documentation

PNGraph TCodaAnalyzer::G

Definition at line 200 of file agmdirected.h.

TVec<TIntFltH> TCodaAnalyzer::InCmtyValHV

Definition at line 201 of file agmdirected.h.

TVec<TIntFltH> TCodaAnalyzer::InOutCmtyValHV

Definition at line 203 of file agmdirected.h.

TVec<TIntFltH> TCodaAnalyzer::OutCmtyValHV

Definition at line 202 of file agmdirected.h.


The documentation for this class was generated from the following file: