1 #ifndef yanglib_agmdirected_h
2 #define yanglib_agmdirected_h
27 TCoda(
const PNGraph& GraphPt,
const int& InitComs,
const int RndSeed = 0): Rnd(RndSeed), RegCoef(0),
28 NodesOk(true), MinVal(0.0), MaxVal(1000.0), NegWgt(1.0) {
SetGraph(GraphPt);
RandomInit(InitComs); }
33 void SetRegCoef(
const double _RegCoef) { RegCoef = _RegCoef; }
40 double Likelihood(
const bool DoParallel =
false);
52 void GetTopCIDs(
TIntV& CIdV,
const int TopK,
const int IsAverage = 1,
const int MinSz = 1);
68 int FindComsByCV(
TIntV& ComsV,
const double HOFrac = 0.2,
const int NumThreads = 20,
const TStr PlotLFNm =
TStr(),
const int EdgesForCV = 100,
const double StepAlpha = 0.3,
const double StepBeta = 0.1);
69 int FindComsByCV(
const int NumThreads,
const int MaxComs,
const int MinComs,
const int DivComs,
const TStr OutFNm,
const int EdgesForCV = 100,
const double StepAlpha = 0.3,
const double StepBeta = 0.3);
72 int MLEGradAscent(
const double& Thres,
const int& MaxIter,
const TStr PlotNm,
const double StepAlpha = 0.3,
const double StepBeta = 0.1);
73 int MLEGradAscentParallel(
const double& Thres,
const int& MaxIter,
const int ChunkNum,
const int ChunkSize,
const TStr PlotNm,
const double StepAlpha = 0.3,
const double StepBeta = 0.1);
74 int MLEGradAscentParallel(
const double& Thres,
const int& MaxIter,
const int ChunkNum,
const TStr PlotNm =
TStr(),
const double StepAlpha = 0.3,
const double StepBeta = 0.1) {
75 int ChunkSize = G->
GetNodes() / 10 / ChunkNum;
76 if (ChunkSize == 0) { ChunkSize = 1; }
85 void Load(
TSIn& SIn,
const int& RndSeed = 0);
93 double inline GetCom(
const bool IsOut,
const int& NID,
const int& CID) {
100 double inline GetComOut(
const int& NID,
const int& CID) {
101 if (F[NID].IsKey(CID)) {
102 return F[NID].
GetDat(CID);
107 double inline GetComIn(
const int& NID,
const int& CID) {
108 if (H[NID].IsKey(CID)) {
109 return H[NID].
GetDat(CID);
114 void inline AddCom(
const bool IsOut,
const int& NID,
const int& CID,
const double& Val) {
121 void inline AddComOut(
const int& NID,
const int& CID,
const double& Val) {
122 if (F[NID].IsKey(CID)) {
123 SumFV[CID] -= F[NID].
GetDat(CID);
125 F[NID].AddDat(CID) = Val;
128 void inline AddComIn(
const int& NID,
const int& CID,
const double& Val) {
129 if (H[NID].IsKey(CID)) {
130 SumHV[CID] -= H[NID].
GetDat(CID);
132 H[NID].AddDat(CID) = Val;
135 void inline DelCom(
const bool IsOut,
const int& NID,
const int& CID) {
143 if (F[NID].IsKey(CID)) {
144 SumFV[CID] -= F[NID].
GetDat(CID);
148 void inline DelComIn(
const int& NID,
const int& CID) {
149 if (H[NID].IsKey(CID)) {
150 SumHV[CID] -= H[NID].
GetDat(CID);
156 if (UV.
Len() > VV.
Len()) {
158 if (VV.
IsKey(HI.GetKey())) {
159 DP += VV.
GetDat(HI.GetKey()) * HI.GetDat();
164 if (UV.
IsKey(HI.GetKey())) {
165 DP += UV.
GetDat(HI.GetKey()) * HI.GetDat();
175 double DP = log (1.0 / (1.0 - PNoCom)) +
DotProduct(FU, HV);
192 N += HI.GetDat() * HI.GetDat();
210 double Delta = MemThres == -1.0 ? sqrt(Coda.
PNoCom): MemThres;
211 for (
int c = 0; c < CIdV.Len(); c++) {
213 TIntFltH InMemH, OutMemH, InOutMemH;
214 Coda.
GetNIDValH(InOutMemH, OutMemH, InMemH, CID, Delta);
215 InCmtyValHV.
Add(InMemH);
216 OutCmtyValHV.
Add(OutMemH);
217 InOutCmtyValHV.
Add(InOutMemH);
219 printf(
"Communities copied (%d communities)\n", InCmtyValHV.
Len());
222 for (
int c = 0; c < InCmtyValHV.
Len(); c++) {
223 TIntV CmtyVIn, CmtyVOut, CmtyVInOut;
224 if (InCmtyValHV[c].Len() < MinSz || OutCmtyValHV[c].
Len() < MinSz) {
continue; }
225 InOutCmtyValHV[c].GetKeyV(CmtyVInOut);
226 InCmtyValHV[c].GetKeyV(CmtyVIn);
227 OutCmtyValHV[c].GetKeyV(CmtyVOut);
228 CmtyVV.
Add(CmtyVInOut);
229 CmtyVV.
Add(CmtyVOut);
234 double GetFrac2Mode(
const double Thres2Mode = 0.2,
const int MinSzEach = 2) {
237 for (
int c = 0; c < InCmtyValHV.
Len(); c++) {
238 double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].
Len() - InOutCmtyValHV[c].
Len());
239 if (InCmtyValHV[c].Len() < MinSzEach || OutCmtyValHV[c].
Len() < MinSzEach) {
continue; }
240 if (Jacc <= Thres2Mode) { Cnt2Mode++; }
243 return (
double) Cnt2Mode / (double) CntAll;
246 void Summary(
const int TopK = 10,
const double Thres2Mode = 0.2) {
248 double SumJacc = 0.0;
249 for (
int c = 0; c < InCmtyValHV.
Len(); c++) {
250 double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].
Len() - InOutCmtyValHV[c].
Len());
251 if (Jacc <= Thres2Mode) { Cnt2Mode++; }
254 printf(
"Cmty %d: InOut: %d, In:%d, Out:%d, Jacc;%.3f\n", c, InCmtyValHV[c].Len(), InCmtyValHV[c].Len(), OutCmtyValHV[c].Len(), Jacc);
257 double AvgJacc = SumJacc / (double) InCmtyValHV.
Len();
258 printf(
"Average jaccard similarity = %.3f. (%d / %d communities are 2-mode)\n", AvgJacc, Cnt2Mode, InCmtyValHV.
Len());
264 TIntV CmtyVIn, CmtyVOut;
265 InCmtyValHV[CID].GetKeyV(CmtyVIn);
266 OutCmtyValHV[CID].GetKeyV(CmtyVOut);
269 CmtyVAll.
Gen(CmtyVIn.
Len() + CmtyVOut.
Len(), 0);
270 CmtyVIn.
Union(CmtyVOut, CmtyVAll);
275 int Coms = InCmtyValHV.
Len();
277 for (
int c = 0; c < InCmtyValHV.
Len(); c++) {
278 double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].
Len() - InOutCmtyValHV[c].
Len());
279 if (Jacc > MaxJac) {
continue; }
283 for (
int c = 0; c < Coms; c++) {
284 TIntV CmtyVIn, CmtyVOut;
285 InCmtyValHV[c].GetKeyV(CmtyVIn);
286 OutCmtyValHV[c].GetKeyV(CmtyVOut);
287 TIntSet CmtySIn(CmtyVIn), CmtySOut(CmtyVOut);
288 CmtySVIn.
Add(CmtySIn);
289 CmtySVOut.
Add(CmtySOut);
291 for (
int c1 = 0; c1 < Coms; c1++) {
292 if (! ComG->
IsNode(c1)) {
continue; }
293 for (
int c2 = 0; c2 < Coms; c2++) {
294 if (! ComG->
IsNode(c2)) {
continue; }
296 double Jac = (double) IntC1C2 / (CmtySVIn[c1].Len() + CmtySVOut[c2].
Len() - IntC1C2);
297 if (Jac >= JacEdge) {
305 for (
int u = 0; u < NIDV.
Len(); u++) {
311 printf(
"Community graph made (Jaccard similarity for edges: %f, %d nodes, %d edges)\n", JacEdge, ComG->
GetNodes(), ComG->
GetEdges());
318 FILE* F = fopen(OutFNm.
CStr(),
"wt");
319 for (
int c = 0; c < InCmtyValHV.
Len(); c++) {
320 double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].
Len() - InOutCmtyValHV[c].
Len());
321 if (Jacc > MaxJac) {
continue; }
322 TIntV CmtyVIn, CmtyVOut, CmtyVAll;
323 InCmtyValHV[c].GetKeyV(CmtyVIn);
324 OutCmtyValHV[c].GetKeyV(CmtyVOut);
327 for (
int u = 0; u < InOutCmtyValHV[c].
Len(); u++) {
328 int UID = InOutCmtyValHV[c].GetKey(u);
329 if (CmtyVIn.
Len() >= CmtyVOut.
Len()) {
335 if (CmtyVAll.
Len() == 0) {
continue; }
336 fprintf(F,
"Com %d\n", c);
337 for (
int u = 0; u < CmtyVOut.
Len(); u++) {
338 int NID = CmtyVOut[u];
340 fprintf(F,
"%s:%f\n", Label.
CStr(), OutCmtyValHV[c].
GetDat(NID).Val);
342 fprintf(F,
"||==>||\n");
343 for (
int u = 0; u < CmtyVIn.
Len(); u++) {
344 int NID = CmtyVIn[u];
346 fprintf(F,
"%s:%f\n", Label.
CStr(), InCmtyValHV[c].
GetDat(NID).Val);
356 TIntV CmtyVIn, CmtyVOut, CmtyVAll;
357 InCmtyValHV[CID].GetKeyV(CmtyVIn);
358 OutCmtyValHV[CID].GetKeyV(CmtyVOut);
362 for (
int u = 0; u < InOutCmtyValHV[CID].
Len(); u++) {
363 int UID = InOutCmtyValHV[CID].GetKey(u);
364 if (CmtyVIn.
Len() >= CmtyVOut.
Len()) {
373 if (CmtyVAll.
Len() == 0) {
return; }
374 double OXMin = 0.1, YMin = 0.1, OXMax = 2500.00, YMax = 1000.0, IXMin = 0.1, IXMax = 2500.00;
375 double OStep = (OXMax - OXMin) / (
double) CmtyVOut.
Len(), IStep = (IXMax - IXMin) / (
double) CmtyVIn.
Len();
377 FILE* F = fopen(OutFNm.
CStr(),
"wt");
378 fprintf(F,
"<?xml version='1.0' encoding='UTF-8'?>\n");
379 fprintf(F,
"<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
380 fprintf(F,
"\t<graph mode='static' defaultedgetype='directed'>\n");
381 fprintf(F,
"\t\t<nodes>\n");
382 for (
int c = 0; c < CmtyVOut.
Len(); c++) {
383 int NID = CmtyVOut[c];
384 double XPos = c * OStep + OXMin;
391 fprintf(F,
"\t\t\t<node id='%d' label='%s'>\n", NID, Label.
CStr());
392 fprintf(F,
"\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.
Val1.Val, Color.
Val2.Val, Color.
Val3.Val);
393 fprintf(F,
"\t\t\t\t<viz:size value='4.0'/>\n");
394 fprintf(F,
"\t\t\t\t<viz:shape value='square'/>\n");
395 fprintf(F,
"\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax);
396 fprintf(F,
"\t\t\t</node>\n");
399 for (
int u = 0; u < CmtyVIn.
Len(); u++) {
400 int NID = CmtyVIn[u];
406 double XPos = IXMin + u * IStep;
409 fprintf(F,
"\t\t\t<node id='%d' label='%s'>\n", NID, Label.
CStr());
410 fprintf(F,
"\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.
Val1.Val, Color.
Val2.Val, Color.
Val3.Val, Alpha);
411 fprintf(F,
"\t\t\t\t<viz:size value='4.0'/>\n");
412 fprintf(F,
"\t\t\t\t<viz:shape value='square'/>\n");
413 fprintf(F,
"\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin);
414 fprintf(F,
"\t\t\t</node>\n");
416 fprintf(F,
"\t\t</nodes>\n");
419 fprintf(F,
"\t\t<edges>\n");
421 if (NI.GetOutDeg() == 0 && NI.GetInDeg() == 0 ) {
continue; }
422 for (
int e = 0; e < NI.GetOutDeg(); e++) {
423 fprintf(F,
"\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, NI.GetId(), NI.GetOutNId(e));
426 fprintf(F,
"\t\t</edges>\n");
427 fprintf(F,
"\t</graph>\n");
428 fprintf(F,
"</gexf>\n");
bool DelIfIn(const TVal &Val)
Removes the first occurrence of element Val.
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
#define IAssertR(Cond, Reason)
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
double LikelihoodForNode(const bool IsRow, const int UID)
void AddComIn(const int &NID, const int &CID, const double &Val)
void DelComOut(const int &NID, const int &CID)
void Dump2ModeCommunities(const TStr &OutFNm, const double MaxJac, const TIntStrH &NIDNameH)
void DumpMemberships(const TStr &OutFNm)
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
void SetGraph(const PNGraph &GraphPt)
void GetNIDValH(TIntFltH &NIdValInOutH, TIntFltH &NIdValOutH, TIntFltH &NIdValInH, const int CID, const double Thres)
void GetAllCmtyVV(TVec< TIntV > &CmtyVV, const int MinSz)
double Likelihood(const bool DoParallel=false)
void NeighborComInit(const int InitComs)
int GetEdges() const
Returns the number of edges in the graph.
TSizeTy Len() const
Returns the number of elements in the vector.
void DelComIn(const int &NID, const int &CID)
static int Intersection(const TIntV &C1, const TIntV &C2)
TVec< TIntFltH > OutCmtyValHV
double GetComOut(const int &NID, const int &CID)
void SetCmtyVV(const TVec< TIntV > &CmtyVVOut, const TVec< TIntV > &CmtyVVIn)
int GetNodes() const
Returns the number of nodes in the graph.
TFlt & GetSumVal(const bool IsOut, const int CID)
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
void GetCmtyS(TIntSet &CmtySOut, TIntSet &CmtySIn, const int CID, const double Thres)
int FindComsByCV(TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const int EdgesForCV=100, const double StepAlpha=0.3, const double StepBeta=0.1)
const TDat & GetDat(const TKey &Key) const
TVec< TIntFltH > InCmtyValHV
void GetTopCIDs(TIntV &CIdV, const int TopK, const int IsAverage=1, const int MinSz=1)
void SetRegCoef(const double _RegCoef)
int ChangeChAll(const char &SrcCh, const char &DstCh)
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const TStr PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
void GetCmtyVVUnSorted(const bool IsOut, TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
double GetFrac2Mode(const double Thres2Mode=0.2, const int MinSzEach=2)
void DelNode(const int &NId)
Deletes node of ID NId from the graph.
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
double Prediction(const int &UID, const int &VID)
double GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
void AddComOut(const int &NID, const int &CID, const double &Val)
void GetCmtyVV(TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn, const int MinSz=3)
double LikelihoodHoldOut(const bool DoParallel=false)
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node SrcNId to node DstNId to the graph.
TCoda(const PNGraph &GraphPt, const int &InitComs, const int RndSeed=0)
void Summary(const int TopK=10, const double Thres2Mode=0.2)
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
void Load(TSIn &SIn, const int &RndSeed=0)
TCodaAnalyzer(TCoda &Coda, const double MemThres=-1.0)
double Prediction(const TIntFltH &FU, const TIntFltH &HV)
double GetComIn(const int &NID, const int &CID)
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
void DelCom(const bool IsOut, const int &NID, const int &CID)
void GetNonEdgePairScores(TFltIntIntTrV &ScoreV)
void RandomInit(const int InitComs)
PNGraph Net2ModeCommunities(const double MaxJac, const double JacEdge, const bool GetWcc=true)
double Norm2(const TIntFltH &UV)
void SetHoldOut(const double HOFrac)
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
double DotProductUtoV(const int &UID, const int &VID)
void Union(const TVec< TVal, TSizeTy > &ValV)
Sets this vector to its union with ValV. Assumes the vectors are sorted!
int GetOutDeg() const
Returns out-degree of the current node.
static TStr Fmt(const char *FmtStr,...)
Node iterator. Only forward iteration (operator++) is supported.
void DumpMemberships(const TStr &OutFNm, const TStrHash< TInt > &NodeNameH)
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
void AddCom(const bool IsOut, const int &NID, const int &CID, const double &Val)
TTriple< TInt, TInt, TInt > TIntTr
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
void GetCommunity(TIntV &CmtyVIn, TIntV &CmtyVOut, const int CID)
void GetCmtyVAll(TIntV &CmtyVAll, const int CID)
save bipartite community affiliation into gexf file
int GetInDeg() const
Returns in-degree of the current node.
bool IsKey(const TKey &Key) const
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
double Sum(const TIntFltH &UV)
double GetCom(const bool IsOut, const int &NID, const int &CID)
TVec< TIntFltH > InOutCmtyValHV
void GradientForNode(const bool IsRow, const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
void Draw2ModeCommunity(const int CID, const TStr &OutFNm, const TIntStrH &NIDNameH, const THash< TInt, TIntTr > &NIDColorH)
Vector is a sequence TVal objects representing an array that can change in size.