SNAP Library 2.2, Developer Reference  2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
bignet.h
Go to the documentation of this file.
00001 //#//////////////////////////////////////////////
00003 
00010 template <class TNodeData, bool IsDir>
00011 class TBigNet {
00012 public:
00013   typedef TNodeData TNodeDat;
00014   typedef TBigNet<TNodeData, IsDir> TNet;
00015   typedef TPt<TNet> PNet;
00016 public:
00017   class TNode;
00018   typedef THash<TInt, TNode> TNodeH; // use SaveToDisk to convert between the two hash table types
00019   typedef TPt<TBigNet<TNodeData, IsDir> > PBigNet;
00020   typedef TVecPool<TInt> TVPool;
00021   typedef TPt<TVPool> PVPool;
00022 
00024 
00026   class TNode {
00027   public:
00029     TInt InVId;
00031 
00033     TInt OutVId; 
00035     TNodeDat Dat;
00036   public:
00037     TNode() : InVId(-1), OutVId(-1), Dat() { }
00038     TNode(const int& InVecId, const int& OutVecId) : InVId(InVecId), OutVId(OutVecId), Dat() { }
00039     TNode(const int& InVecId, const int& OutVecId, const TNodeDat& NodeDat) :
00040       InVId(InVecId), OutVId(OutVecId), Dat(NodeDat) { }
00041     TNode(const TNode& Node) : InVId(Node.InVId), OutVId(Node.OutVId), Dat(Node.Dat) { }
00042     TNode(TSIn& SIn) : InVId(SIn), OutVId(SIn), Dat(SIn) { }
00043     void Save(TSOut& SOut) const { SOut.Save(InVId); SOut.Save(OutVId); Dat.Save(SOut); }
00045     bool IsUnused() const { return InVId==-1 && OutVId==-1; }
00046   };
00048 
00050   class TNodeI {
00051   protected:
00052     typedef typename TNodeH::TIter THashIter;
00053     THashIter NodeHI;
00054     TVPool *Pool;
00055     int InDeg, OutDeg, *InNIdV, *OutNIdV; // if undirected, InNIdV==OutNIdV
00056   public:
00057     inline void GetInOutNIdV();
00058     int GetInVId() const { return NodeHI->Dat.InVId; }
00059     int GetOutVId() const { return NodeHI->Dat.OutVId; }
00060   public:
00061     TNodeI() : NodeHI(), Pool(NULL), InDeg(0), OutDeg(0), InNIdV(NULL), OutNIdV(NULL) { }
00062     TNodeI(const THashIter& NodeHIter, TVPool *PoolPt) : NodeHI(NodeHIter), Pool(PoolPt) { GetInOutNIdV(); }
00063     TNodeI(const TNodeI& NodeI) : NodeHI(NodeI.NodeHI), Pool(NodeI.Pool) { GetInOutNIdV(); }
00064     TNodeI& operator = (const TNodeI& NI) { NodeHI=NI.NodeHI; Pool=NI.Pool; GetInOutNIdV(); return *this; }
00066     TNodeI& operator++ (int) { NodeHI++; GetInOutNIdV(); return *this; }
00067     bool operator < (const TNodeI& NI) const { return NodeHI < NI.NodeHI; }
00068     bool operator == (const TNodeI& NI) const { return NodeHI == NI.NodeHI; }
00069     int GetId() const { return NodeHI->Key(); }
00070     int GetDeg() const { return GetInDeg()+(InNIdV!=OutNIdV?GetOutDeg():0); }
00071     int GetInDeg() const { return InDeg; }
00072     int GetOutDeg() const { return OutDeg; }
00073     int GetInNId(const int& NodeN) const { return InNIdV[NodeN]; }
00074     int GetOutNId(const int& NodeN) const { return OutNIdV[NodeN]; }
00075     int GetOutNbrId(const int& NodeN) const { return NodeN<OutDeg ? OutNIdV[NodeN]:InNIdV[NodeN-OutDeg]; }
00076     bool IsInNId(const int& NId) const { return BinSearch(InNIdV, InNIdV+InDeg, NId)!=NULL; }
00077     bool IsOutNId(const int& NId) const { return BinSearch(OutNIdV, OutNIdV+OutDeg, NId)!=NULL; }
00078     bool IsNbrNId(const int& NId) const { return IsOutNId(NId) || IsInNId(NId); }
00079     const TNodeData& operator () () const { return GetDat(); }
00080     TNodeData& operator () () { return GetDat(); }
00081     const TNodeData& GetDat() const { return NodeHI->Dat.Dat; }
00082     TNodeData& GetDat() { return NodeHI->Dat.Dat; }
00083     // requires pointer back to the network (see as in TNodeNet)
00084     //const TNodeData& GetInNDat(const int& NodeN) const { return Net->GetNDat(GetInNId(NodeN)); }
00085     //TNodeData& GetInNDat(const int& NodeN) { return Net->GetNDat(GetInNId(NodeN)); }
00086     //const TNodeData& GetOutNDat(const int& NodeN) const { return Net->GetNDat(GetOutNId(NodeN)); }
00087     //TNodeData& GetOutNDat(const int& NodeN) { return Net->GetNDat(GetOutNId(NodeN)); }
00088     //const TNodeData& GetNbrNDat(const int& NodeN) const { return Net->GetNDat(GetNbrNId(NodeN)); }
00089     //TNodeData& GetNbrNDat(const int& NodeN) { return Net->GetNDat(GetNbrNId(NodeN)); }
00090     void Dump() const;
00091     friend class TBigNet<TNodeData, IsDir>;
00092   };
00093 
00095 
00097   class TEdgeI {
00098   private:
00099     TNodeI CurNode, EndNode;
00100     int CurEdge;
00101   public:
00102     TEdgeI() : CurNode(), EndNode(), CurEdge(0) { }
00103     TEdgeI(const TNodeI& NodeI, const TNodeI& EndNodeI, const int& EdgeN=0) : CurNode(NodeI), EndNode(EndNodeI), CurEdge(0) { }
00104     TEdgeI(const TEdgeI& EdgeI) : CurNode(EdgeI.CurNode), EndNode(EdgeI.EndNode), CurEdge(EdgeI.CurEdge) { }
00105     TEdgeI& operator = (const TEdgeI& EdgeI) { if (this!=&EdgeI) { CurNode=EdgeI.CurNode;  EndNode=EdgeI.EndNode;  CurEdge=EdgeI.CurEdge; }  return *this; }
00106     TEdgeI& operator++ (int) { CurEdge++; if (CurEdge >= CurNode.GetOutDeg()) { CurEdge=0;  CurNode++;
00107       while (CurNode < EndNode && CurNode.GetOutDeg()==0) { CurNode++; } }  return *this; }
00108     bool operator < (const TEdgeI& EdgeI) const { return CurNode<EdgeI.CurNode || (CurNode==EdgeI.CurNode && CurEdge<EdgeI.CurEdge); }
00109     bool operator == (const TEdgeI& EdgeI) const { return CurNode == EdgeI.CurNode && CurEdge == EdgeI.CurEdge; }
00110     int GetId() const { return -1; }
00111     int GetSrcNId() const { return CurNode.GetId(); }
00112     int GetDstNId() const { return CurNode.GetOutNId(CurEdge); }
00113     const TNodeData& GetSrcNDat() const { return CurNode.GetNDat(); }
00114     TNodeData& GetDstNDat() { return CurNode.GetOutNDat(CurEdge); }
00115     friend class TNodeNet<TNodeData>;
00116   };
00117 protected:
00118   bool IsNode(const int& NId, TNode& Node) const { return NodeH.IsKeyGetDat(NId, Node); }
00119   int* GetInNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).InVId); }
00120   int* GetOutNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).OutVId); }
00121   static void AddSorted(int* Beg, int* End, const int& Val);
00122   static const int* BinSearch(const int* Beg, const int* End, const int& Val);
00123   static const int* BinSearch2(const int* Beg, const int* End, const int& Val);
00124   const TNode& GetNode(const int& NId) const { return NodeH.GetDat(NId); }
00125   TNode& GetNode(const int& NId) { return NodeH.GetDat(NId); }
00126 public:
00127   enum { DelNId = INT_MAX }; // id of a deleted node
00128 protected:
00129   TCRef CRef;
00130   TInt MxNId;
00131   TB32Set Flags;
00132   TVPool Pool;
00133   TNodeH NodeH;
00134 public:
00135   TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources=false);
00136   TBigNet(TSIn& SIn) : MxNId(SIn), Flags(SIn), Pool(SIn), NodeH(SIn) { TBool Dir(SIn); IAssert(Dir()==IsDir); }
00137   virtual ~TBigNet() { }
00138   virtual void Save(TSOut& SOut) const;
00139   static PBigNet New(const int& Nodes, const TSize& Edges, const bool& Sources=false) {
00140     return PBigNet(new TBigNet(Nodes, Edges, Sources)); }
00141   static PBigNet Load(TSIn& SIn) { return PBigNet(new TBigNet(SIn)); }
00142   TBigNet& operator = (const TBigNet& Net) { if (this!=&Net) {
00143     MxNId=Net.MxNId; Flags=Net.Flags; Pool=Net.Pool; NodeH=Net.NodeH; }  return *this; }
00144 
00145   bool OnlySources() const { return Flags.In(gfSources); }
00146   bool HasFlag(const TGraphFlag& Flag) const {
00147     return HasGraphFlag(typename TBigNet, Flag) || (Flag==gfSources && OnlySources()); }
00148   void DumpFlags() const;
00149 
00150   // vertices
00151   int GetNodes() const { return NodeH.Len(); }
00153   int GetMxNId() const { return MxNId; }
00154   int AddNode(int NId, const int& InDeg, const int& OutDeg);
00155   int AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeDat& NodeDat);
00156   int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV);
00157   int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeDat& NodeDat);
00158   int AddUndirNode(int NId, const int& Deg);
00159   int AddUndirNode(int NId, const int& Deg, const TNodeDat& NodeDat);
00160   int AddUndirNode(int NId, const TIntV& EdgeNIdV);
00161   int AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeDat& NodeDat);
00162   void SetInNIdV(int NId, const TIntV& InNIdV);
00163   void SetOutNIdV(int NId, const TIntV& OutNIdV);
00164   void GetInNIdV(int NId, TIntV& OutNIdV) const;
00165   void GetOutNIdV(int NId, TIntV& OutNIdV) const;
00166   bool IsNode(const int& NId) const { return NodeH.IsKey(NId); }
00167   TNodeI BegNI() const { return TNodeI(NodeH.BegI(), (TVPool *)&Pool); }
00168   TNodeI EndNI() const { return TNodeI(NodeH.EndI(), (TVPool *)&Pool); }
00169   TNodeI GetNI(const int& NId) const { return TNodeI(NodeH.GetI(NId), (TVPool *)&Pool); }
00170   TNodeDat& GetNDat(const int& NId) { return NodeH.GetDat(NId).Dat; }
00171   const TNodeDat& GetNDat(const int& NId) const { return NodeH.GetDat(NId).Dat; }
00172   // edges
00173   TEdgeI BegEI() const { TNodeI NI=BegNI();  while(NI<EndNI() && NI.GetOutDeg()==0) NI++;  return TEdgeI(NI, EndNI()); }
00174   TEdgeI EndEI() const { return TEdgeI(EndNI(), EndNI()); }
00175   TEdgeI GetEI(const int& EId) const; // not supported
00176 
00177   // delete nodes
00178   int IsolateNode(int NId); // isolate the node by setting edge endpoints to point to node id DelNId, IsNode(DelNId)==false)
00179   int DelNode(int NId); // set neighbors to point to DelNId, delete node from the node table
00180   bool IsIsoNode(const int& NId) const;
00181   uint GetDelEdges(); // the number deleted edges
00182   void CompactEdgePool(); // after nodes are isolated and deleted, we compact the empty space
00183 
00184   // edges
00185   ::TSize GetEdges() const { return Pool.GetVals(); }
00186   int AddEdge(const int& SrcNId, const int& DstNId);
00187   bool IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir=true) const;
00188 
00189   void SortEdgeV();
00190   void InvertFromSources(uint ExpectNodes=0); // add missing nodes and in-links
00191   void Rewire(TRnd& Rnd = TInt::Rnd); // create a random network with same degree distribution (configuration model)
00192 
00193   // manipulation
00194   PNGraph GetNGraph(const bool& RenumberNodes=false) const;
00195   PNGraph GetSubNGraph(const TIntV& NIdV) const;
00196   PBigNet GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes=false) const;
00197   void GetSubGraph(const TIntV& NIdV, TBigNet* NewNet, const bool& RenumberNodes=false) const;
00198 
00199   int GetRndNId(TRnd& Rnd=TInt::Rnd) const { return NodeH.GetKey(NodeH.GetRndKeyId(Rnd)); }
00200   TNodeI GetRndNI(TRnd& Rnd=TInt::Rnd) const { return GetNI(GetRndNId(Rnd)); }
00201   void GetNIdV(TIntV& NIdV) const;
00202 
00203   bool Empty() const { return GetNodes()==0; }
00204   void Clr(const bool& DoDel = true) { MxNId = 0;  NodeH.Clr(DoDel); Pool.Clr(DoDel); }
00205   void Reserve(const int& Nodes, const TSize& Edges);
00206   void Defrag(const bool& OnlyNodeLinks = false) { }
00207   bool IsOk() const;
00208   void Dump(const TStr& Desc = TStr()) const;
00209   void SaveForDisk(const TStr& OutFNm) const;
00210 
00211   static void LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH);
00212   static void SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash);
00213   friend class TPt<TBigNet>;
00214 };
00215 
00216 // set flags
00217 namespace TSnap {
00218 template <class TNodeData, bool IsDir> struct IsDirected<TBigNet<TNodeData, IsDir> > { enum { Val = 0 }; };
00219 template <class TNodeData> struct IsDirected<TBigNet<TNodeData, true> > { enum { Val = 1 }; };
00220 template <class TNodeData, bool IsDir> struct IsNodeDat<TBigNet<TNodeData, IsDir> > { enum { Val = 1 }; };
00221 }
00222 
00223 template <class TNodeData, bool IsDir>
00224 inline void TBigNet<TNodeData, IsDir>::TNodeI::GetInOutNIdV() {
00225   if (NodeHI.IsEnd()) return;
00226   const TNode& N = NodeHI->Dat;
00227   if (! Pool->IsVId(N.InVId)) {
00228     InDeg=0; OutDeg=0; }
00229   else {
00230     InDeg=Pool->GetVLen(N.InVId);
00231     OutDeg=Pool->GetVLen(N.OutVId);
00232     InNIdV=(int *)Pool->GetValVPt(N.InVId);
00233     OutNIdV=(int *)Pool->GetValVPt(N.OutVId);
00234   }
00235 }
00236 
00237 template <class TNodeData, bool IsDir>
00238 void TBigNet<TNodeData, IsDir>::TNodeI::Dump() const {
00239   printf("NodeId: %d\n", GetId());
00240   printf("  I:%4d]", GetInDeg());
00241   for (int i = 0; i < GetInDeg(); i++) { printf("  %d", GetInNId(i)); }
00242   printf("\n  O:%4d]", GetOutDeg());
00243   for (int i = 0; i < GetOutDeg(); i++) { printf("  %d", GetOutNId(i)); }
00244   printf("\n");
00245 }
00246 
00247 template <class TNodeData, bool IsDir>
00248 void TBigNet<TNodeData, IsDir>::AddSorted(int* Beg, int* End, const int& Val) {
00249   // there is at least one free slot available for the Val
00250   IAssertR(*(End-1)==TInt::Mx, "Edge can not be added: no free space");
00251   // find position to insert
00252   int Half, Len = int(End-Beg);
00253   while (Len > 0) {
00254     Half = Len/2;
00255     int* Mid=Beg+Half;
00256     if (*Mid < Val) { Beg=Mid+1; Len=Len-Half-1; } else { Len=Half; } }
00257   IAssertR(*Beg != Val, "Value already existis");
00258   // insert
00259   memmove(Beg+1, Beg, sizeof(int)*(End-Beg-1));
00260   *Beg = Val;
00261 }
00262 
00263 template <class TNodeData, bool IsDir>
00264 const int* TBigNet<TNodeData, IsDir>::BinSearch(const int* Beg, const int* End, const int& Val) {
00265   End--;  const int *Mid;
00266   while (Beg <= End) { Mid = Beg+(End-Beg)/2;
00267     if (*Mid == Val) { return Mid; }
00268     else if (Val < *Mid) { End=Mid-1; } else { Beg=Mid+1; }
00269   }
00270   return NULL;
00271 }
00272 
00273 template <class TNodeData, bool IsDir>
00274 const int* TBigNet<TNodeData, IsDir>::BinSearch2(const int* Beg, const int* End, const int& Val) {
00275   const int* First = Beg;
00276   const int* Last = End;
00277   const int* Mid;
00278   TSize len = End-Beg, half;
00279   while (len > 0) {
00280     half = len>>1;  Mid=First+half;
00281     if (*Mid < Val) { First = Mid; First++; len=len-half-1; }
00282     else { len=half; }
00283   }
00284   return First==Last ? Last-1 : First;
00285 }
00286 
00287 template <class TNodeData, bool IsDir>
00288 TBigNet<TNodeData, IsDir>::TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources) :
00289 CRef(), MxNId(0), Flags(), Pool(IsDir ? 2*Edges:Edges, 10000000, true, TInt::Mx), NodeH(Nodes) {
00290   //Flags.Incl(gfNodeGraph);
00291   //Flags.Incl(gfNetwork);
00292   //if (IsDir) { Flags.Incl(gfDirected); }
00293   if (Sources) {
00294     Flags.Incl(gfSources);
00295     IAssertR(! IsDir, "Jure: not clear what happens is graph is Undirected and has only sources.");
00296   }
00297   //DumpFlags();
00298 }
00299 
00300 template <class TNodeData, bool IsDir>
00301 void TBigNet<TNodeData, IsDir>::Save(TSOut& SOut) const {
00302   MxNId.Save(SOut);
00303   Flags.Save(SOut);
00304   Pool.Save(SOut);
00305   NodeH.Save(SOut);
00306   TBool(IsDir).Save(SOut);
00307 }
00308 
00309 template <class TNodeData, bool IsDir>
00310 void TBigNet<TNodeData, IsDir>::DumpFlags() const {
00311   for (int i = 1; i <int(gfMx); i++) {
00312     if (HasFlag(TGraphFlag(i))) { printf("  +"); }
00313     else { printf("  -"); }
00314     printf("%s", TSnap::GetFlagStr(TGraphFlag(i)).CStr());
00315   }
00316   printf("\n");
00317 }
00318 
00319 template <class TNodeData, bool IsDir>
00320 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg) {
00321   CAssert(IsDir);
00322   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00323   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00324   TNode& Node = NodeH.AddDat(NId);
00325   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00326   Node.InVId = Pool.AddEmptyV(InDeg);
00327   Node.OutVId = Pool.AddEmptyV(OutDeg);
00328   return NId;
00329 }
00330 
00331 template <class TNodeData, bool IsDir>
00332 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeData& NodeDat) {
00333   CAssert(IsDir);
00334   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00335   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00336   TNode& Node = NodeH.AddDat(NId);
00337   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00338   Node.InVId = Pool.AddEmptyV(InDeg);
00339   Node.OutVId = Pool.AddEmptyV(OutDeg);
00340   Node.Dat = NodeDat;
00341   return NId;
00342 }
00343 
00344 template <class TNodeData, bool IsDir>
00345 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg) {
00346   CAssert(! IsDir);
00347   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00348   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00349   TNode& Node = NodeH.AddDat(NId);
00350   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00351   Node.InVId = Pool.AddEmptyV(Deg);
00352   Node.OutVId = Node.InVId; // same vector
00353   return NId;
00354 }
00355 
00356 template <class TNodeData, bool IsDir>
00357 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg, const TNodeData& NodeDat) {
00358   CAssert(! IsDir);
00359   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00360   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00361   TNode& Node = NodeH.AddDat(NId);
00362   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00363   Node.InVId = Pool.AddEmptyV(Deg);
00364   Node.OutVId = Node.InVId; // same vector
00365   Node.Dat = NodeDat;
00366   return NId;
00367 }
00368 
00369 template <class TNodeData, bool IsDir>
00370 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV) {
00371   CAssert(IsDir);
00372   IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted());
00373   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00374   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00375   TNode& Node = NodeH.AddDat(NId);
00376   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00377   Node.InVId = Pool.AddV(InNIdV);
00378   Node.OutVId = Pool.AddV(OutNIdV);
00379   return NId;
00380 }
00381 
00382 template <class TNodeData, bool IsDir>
00383 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeData& NodeDat) {
00384   CAssert(IsDir);
00385   IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted());
00386   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00387   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00388   TNode& Node = NodeH.AddDat(NId);
00389   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00390   Node.InVId = Pool.AddV(InNIdV);
00391   Node.OutVId = Pool.AddV(OutNIdV);
00392   Node.Dat = NodeDat;
00393   return NId;
00394 }
00395 
00396 template <class TNodeData, bool IsDir>
00397 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV) {
00398   CAssert(! IsDir);
00399   IAssert(EdgeNIdV.IsSorted());
00400   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00401   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00402   TNode& Node = NodeH.AddDat(NId);
00403   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00404   Node.InVId = Pool.AddV(EdgeNIdV);
00405   Node.OutVId = Node.InVId;
00406   return NId;
00407 }
00408 
00409 template <class TNodeData, bool IsDir>
00410 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeData& NodeDat) {
00411   CAssert(! IsDir);
00412   IAssert(EdgeNIdV.IsSorted());
00413   if (NId == -1) { NId = MxNId;  MxNId.Val++; }
00414   else { MxNId = TMath::Mx(NId+1, MxNId()); }
00415   TNode& Node = NodeH.AddDat(NId);
00416   IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId));
00417   Node.InVId = Pool.AddV(EdgeNIdV);
00418   Node.OutVId = Node.InVId;
00419   Node.Dat = NodeDat;
00420   return NId;
00421 }
00422 
00423 template <class TNodeData, bool IsDir>
00424 void TBigNet<TNodeData, IsDir>::SetInNIdV(int NId, const TIntV& InNIdV) {
00425   TNode Node;  EAssert(IsNode(NId, Node));
00426   TIntV InNodesV;  Pool.GetV(Node.InVId, InNodesV);
00427   EAssert(InNIdV.Len() == InNodesV.Len());
00428   memcpy(InNodesV.BegI(), InNIdV.BegI(), sizeof(TInt)*InNIdV.Len());
00429 }
00430 
00431 template <class TNodeData, bool IsDir>
00432 void TBigNet<TNodeData, IsDir>::SetOutNIdV(int NId, const TIntV& OutNIdV) {
00433   TNode Node;  EAssert(IsNode(NId, Node));
00434   TIntV OutNodesV;  Pool.GetV(Node.OutVId, OutNodesV);
00435   EAssert(OutNIdV.Len() == OutNodesV.Len());
00436   memcpy(OutNodesV.BegI(), OutNIdV.BegI(), sizeof(TInt)*OutNIdV.Len());
00437 }
00438 
00439 template <class TNodeData, bool IsDir>
00440 void TBigNet<TNodeData, IsDir>::GetInNIdV(int NId, TIntV& InNIdV) const {
00441   TNode Node;  EAssertR(IsNode(NId, Node), TStr::Fmt("Not node: NId=%d\n", NId).CStr());
00442   Pool.GetV(Node.InVId, InNIdV);
00443 }
00444 
00445 template <class TNodeData, bool IsDir>
00446 void TBigNet<TNodeData, IsDir>::GetOutNIdV(int NId, TIntV& OutNIdV) const {
00447   TNode Node;  EAssert(IsNode(NId, Node));
00448   Pool.GetV(Node.OutVId, OutNIdV);
00449 }
00450 
00451 // Node is deleted by setting edge endpoints to point to node id -1 (DelNId)
00452 // No memory is freed
00453 template <class TNodeData, bool IsDir>
00454 int  TBigNet<TNodeData, IsDir>::IsolateNode(int NId) {
00455   TIntV OutV;
00456   int NDel = 0;
00457   // out-edges
00458   GetOutNIdV(NId, OutV);
00459   for (int i = 0; i < OutV.Len(); i++) {
00460     if (OutV[i] == DelNId) { break; } // because they are sorted
00461     // fast implementation
00462     const TNode& N = NodeH.GetDat(OutV[i]);
00463     int *InNIdV = (int *) Pool.GetValVPt(N.InVId);
00464     const int Deg = Pool.GetVLen(N.InVId);
00465     int* Val = (int *) BinSearch(InNIdV, InNIdV+Deg, NId);
00466     if (Val == NULL) {
00467       printf("BAD: Can't find: OUT: NId: %d -- OutNbrId: %d\n", NId, OutV[i]());
00468       continue;
00469     }
00470     IAssert(Val != 0);
00471     memcpy(Val, Val+1, sizeof(int)*int(InNIdV+Deg-Val));
00472     *(InNIdV+Deg-1) = DelNId;  NDel++;
00473   }
00474   OutV.PutAll(DelNId);
00475   // in-edges
00476   if (IsDir) {
00477     TIntV InV;
00478     GetInNIdV(NId, InV);
00479     for (int i = 0; i < InV.Len(); i++) {
00480       if (InV[i] == DelNId) { break; }
00481       // fast implementation
00482       const TNode& N = NodeH.GetDat(InV[i]);
00483       int *OutNIdV = (int *) Pool.GetValVPt(N.OutVId);
00484       const int Deg = Pool.GetVLen(N.OutVId);
00485       int* Val = (int *) BinSearch(OutNIdV, OutNIdV+Deg, NId);
00486       if (Val == NULL) {
00487         printf("IN: NId: %d -- InNbrId: %d\n", NId, OutV[i]());
00488         continue;
00489       }
00490       IAssert(Val != 0);
00491       memcpy(Val, Val+1, sizeof(int)*int(OutNIdV+Deg-Val));
00492       *(OutNIdV+Deg-1) = DelNId;  NDel++;
00493     }
00494     InV.PutAll(DelNId);
00495   }
00496   return NDel;
00497 }
00498 
00499 // set neigbors to point to DelNId, delete node from the node table
00500 template <class TNodeData, bool IsDir>
00501 int  TBigNet<TNodeData, IsDir>::DelNode(int NId) {
00502   const int DelEdges = IsolateNode(NId);
00503   NodeH.DelKey(NId);
00504   return DelEdges;
00505 }
00506 
00507 template <class TNodeData, bool IsDir>
00508 bool TBigNet<TNodeData, IsDir>::IsIsoNode(const int& NId) const {
00509   if (NId == DelNId) { return true; }
00510   TIntV OutV;
00511   GetOutNIdV(NId, OutV);
00512   if (OutV.Empty() || OutV[0] == DelNId) { return true; }
00513   return false;
00514 }
00515 
00516 // the number deleted edges
00517 template <class TNodeData, bool IsDir>
00518 uint TBigNet<TNodeData, IsDir>::GetDelEdges() {
00519   uint DelEdges = 0;
00520   TIntV OutV;
00521   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00522     const int NId = NI.GetId();
00523     GetOutNIdV(NId, OutV);
00524     for (int i = 0; i < OutV.Len(); i++) {
00525       if (OutV[i] == DelNId) { DelEdges++; }
00526     }
00527   }
00528   return DelEdges;
00529 }
00530 
00531 template <class TNodeData, bool IsDir>
00532 void TBigNet<TNodeData, IsDir>::CompactEdgePool() {
00533   Pool.CompactPool(DelNId);
00534 }
00535 
00536 template <class TNodeData, bool IsDir>
00537 int TBigNet<TNodeData, IsDir>::AddEdge(const int& SrcNId, const int& DstNId) {
00538   TNode Src;  IAssert(IsNode(SrcNId, Src));
00539   int* OutV = (int *)Pool.GetValVPt(Src.OutVId);
00540   const int OutVLen = Pool.GetVLen(Src.OutVId);
00541   Assert(BinSearch(OutV, OutV+OutVLen, DstNId)==NULL); // no edge yet
00542   AddSorted(OutV, OutV+OutVLen, DstNId);
00543   if (! OnlySources()) {
00544     TNode Dst;  IAssert(IsNode(DstNId, Dst));
00545     int* InV = (int *)Pool.GetValVPt(Dst.InVId);
00546     const int InVLen = Pool.GetVLen(Dst.InVId);
00547     AddSorted(InV, InV+InVLen, SrcNId);
00548   }
00549   return -1; // edge id
00550 }
00551 
00552 template <class TNodeData, bool IsDir>
00553 bool TBigNet<TNodeData, IsDir>::IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir) const {
00554   TNode Src, Dst;
00555   if (! IsNode(SrcNId, Src)) { return false; } // no source node
00556   int* OutV1 = (int *)Pool.GetValVPt(Src.OutVId);
00557   const bool IsEdge = BinSearch(OutV1, OutV1+Pool.GetVLen(Src.OutVId), DstNId) != NULL;
00558   if (IsEdge && OnlySources()) { return true; }
00559   const bool IsDstNode = IsNode(DstNId, Dst);
00560   if (! IsDstNode) { return false; } // no destination node
00561   else if (IsEdge) { return true; } // destination and link found
00562   else if (! Dir) { // check for the undirected version of the edge
00563     int* OutV2 = (int *)Pool.GetValVPt(Dst.OutVId);
00564     return BinSearch(OutV2, OutV2+Pool.GetVLen(Dst.OutVId), SrcNId) != NULL; }
00565   else { return false; }
00566 }
00567 
00568 template <class TNodeData, bool IsDir>
00569 void TBigNet<TNodeData, IsDir>::GetNIdV(TIntV& NIdV) const {
00570   NIdV.Reserve(GetNodes(), 0);
00571   for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) {
00572     NIdV.Add(I->Key); }
00573 }
00574 
00575 template <class TNodeData, bool IsDir>
00576 void TBigNet<TNodeData, IsDir>::SortEdgeV() {
00577   printf("Sorting Edges... ");
00578   TExeTm ExeTm;
00579   TIntV OutV, InV;
00580   int cnt = 0;
00581   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00582     const int NId = NI.GetId();
00583     GetOutNIdV(NId, OutV);
00584     OutV.Sort();
00585     if (IsDir) {
00586       GetInNIdV(NId, InV);
00587       InV.Sort();
00588     }
00589     if (++cnt % Mega(1) == 0) { printf("\r  sort:%dm  %s", cnt/Mega(1), ExeTm.GetStr()); }
00590   }
00591   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00592     const int NId = NI.GetId();
00593     GetOutNIdV(NId, OutV);
00594     IAssert(OutV.IsSorted());
00595     GetInNIdV(NId, InV);
00596     IAssert(InV.IsSorted());
00597     if (++cnt % Mega(1) == 0) { printf("\r  check sorted:%dm  %s", cnt/Mega(1), ExeTm.GetStr()); }
00598   }
00599   printf("done. [%s]\n", ExeTm.GetStr());
00600 }
00601 
00602 // add missing nodes and in-links from a network of sources
00603 template <class TNodeData, bool IsDir>
00604 void TBigNet<TNodeData, IsDir>::InvertFromSources(uint ExpectNodes) {
00605   typedef THash<TInt, TInt> TDegHash;
00606   typedef int* TIntPt;
00607   if (ExpectNodes == 0) ExpectNodes=4*GetNodes();
00608   printf("\nInverting BigNet: reserved for %s nodes\n", TInt::GetMegaStr(ExpectNodes).CStr());
00609   CAssert(IsDir);
00610   IAssert(OnlySources());
00611   TDegHash InDegH(ExpectNodes);
00612   TSize NDest=0;
00613   // get node in-degree
00614   uint c=0;  TExeTm ExeTm;
00615   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) {
00616     for (int e = 0; e < NI.GetOutDeg(); e++) {
00617       InDegH.AddDat(NI.GetOutNId(e)) += 1; }
00618     if (c%100000==0) printf("\r%s h:%d [%g]    ", TInt::GetMegaStr(c).CStr(), InDegH.Len(), ExeTm.GetSecs());
00619   }
00620   printf("\n Resizing NodePool: %lld -> %lld\n", uint64(Pool.Reserved()), uint64(GetEdges()));
00621   if (2*GetEdges() > Pool.Reserved()) {
00622     Pool.Reserve(2*GetEdges()); }
00623   // add nodes
00624   printf("NodeH: %s nodes, InDeg: %s nodes, Reserve: %s\n", TInt::GetMegaStr(NodeH.Len()).CStr(),
00625     TInt::GetMegaStr(InDegH.Len()).CStr(), TInt::GetMegaStr(ExpectNodes).CStr());
00626   NodeH.Reserve(ExpectNodes);
00627   for (TDegHash::TIter I = InDegH.BegI(); I < InDegH.EndI(); I++) {
00628     const int NId = I->Key, InDeg = I->Dat;
00629     if (! IsNode(NId)) {
00630       AddNode(NId, InDeg, 0); NDest++; } // new destination node, no out-links
00631     else {
00632       TNode& Node = GetNode(NId);
00633       IAssert(Node.InVId == 0); // no in-links
00634       Node.InVId = Pool.AddEmptyV(InDeg); }
00635   }
00636   InDegH.Clr(true);
00637   printf("Added: %lld destination nodes\n", uint64(NDest));
00638   printf("Graph nodes: %lld nodes\n", uint64(GetNodes()));
00639   // pointer to in-links vector
00640   THash<TInt, int*> NIdToPtH(GetNodes());
00641   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++)
00642     NIdToPtH.AddDat(NI.GetId(), (int *)Pool.GetValVPt(NI.GetInVId()));
00643   // add in-edges
00644   printf("Adding edges...\n");
00645   c = 0;
00646   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) {
00647     const int SrcNId = NI.GetId();
00648     for (int e = 0; e < NI.GetOutDeg(); e++) {
00649       TIntPt& InV = NIdToPtH.GetDat(NI.GetOutNId(e));
00650       IAssert(*InV == TInt::Mx);
00651       *InV = SrcNId;  InV++;
00652     }
00653     if (c%100000==0) printf("\r%s [%g]   ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs());
00654   }
00655   // sort in-links
00656   printf("\nSorting in-links...\n");
00657   TIntV InNIdV;  c = 0;
00658   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) {
00659     Pool.GetV(NI.GetInVId(), InNIdV);
00660     InNIdV.Sort();
00661     if (c%100000==0) printf("\r%s [%g]    ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs());
00662   }
00663   printf("\r...done [%g]\n", ExeTm.GetSecs());
00664   Flags.Excl(gfSources);
00665 }
00666 
00667 template <class TNodeData, bool IsDir>
00668 void TBigNet<TNodeData, IsDir>::Rewire(TRnd& Rnd) {
00669   uint64 NDup=0, NResolve=0, NAddit=0, cnt = 0;
00670   TExeTm ExeTm;
00671   IAssertR(! IsDir, "Only undirected networks are supported");
00672   printf("Rewiring the network... 1");
00673   Pool.ShuffleAll(Rnd);  printf("[%s]\n", ExeTm.GetStr());
00674   //Pool.ShuffleAll(Rnd);  printf(" done [%s]\n", ExeTm.GetStr());
00675   printf("  sorting edges...\n");
00676   SortEdgeV(); // sort individual edge vectors
00677   printf(" done [%s]\n", ExeTm.GetStr());
00678   // remove duplicates and self edges
00679   printf("  removing duplicates...\n");
00680   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) {
00681     const int VId = NI.GetOutVId();
00682     int Len = Pool.GetVLen(VId);
00683     int* V = (int *)Pool.GetValVPt(VId);
00684     for (int i = 0; i < Len-1 && V[i]!=DelNId; i++) {
00685       if (V[i] == V[i+1] || V[i]==NI.GetId()) {
00686         memcpy(V+i, V+i+1, sizeof(int)*(Len-i-1)); i--;
00687         V[Len-1] = DelNId;  NDup++;
00688       }
00689     }
00690     if (Len > 0 && V[Len-1]==NI.GetId()) { V[Len-1] = DelNId;  NDup++; }
00691     if (cnt % Mega(10) == 0) { printf(".");  fflush(stdout); }
00692   }
00693   printf("\n    %llu duplicate edges removed [%s]\n", NDup, ExeTm.GetStr());
00694   if (OnlySources()) { return; }
00695   // resolve one way edges
00696   printf("  resolving one way edges...\n"); cnt=0; fflush(stdout);
00697   for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) {
00698     for (int e = 0; e < NI.GetOutDeg(); e++) {
00699       if (NI.GetOutNId(e) == DelNId) { continue; } // skip deleted nodes
00700       const int InVId = GetNode(NI.GetOutNId(e)).InVId;
00701       const int InVLen = Pool.GetVLen(InVId); IAssert(InVLen>=0 && InVLen < Mega(3));
00702       int* InV = (int *) Pool.GetValVPt(InVId);
00703       int* Val = (int *) BinSearch2(InV, InV+InVLen, NI.GetId());
00704       if (*Val == NI.GetId()) { continue; } // points back, everything is ok
00705       if (InVLen>0 && InV[InVLen-1] == DelNId) { // there is space at the end, insert
00706         IAssert((InVLen-(Val-InV)-1) >= 0);
00707         memmove(Val+1, Val, sizeof(int)*(InVLen-(Val-InV)-1));
00708         *Val = NI.GetId();
00709       } else { // the other end could point at us but it doesn't
00710         memmove(NI.OutNIdV+e, NI.OutNIdV+e+1, sizeof(int)*(NI.GetOutDeg()-e-1));
00711         NI.OutNIdV[NI.GetOutDeg()-1]=DelNId;  e--;
00712       }
00713       NResolve++;
00714     }
00715     if (cnt % Mega(10) == 0) { printf(".");  fflush(stdout); }
00716   }
00717   printf("\n    %llu resolved edges [%s]\n", NResolve, ExeTm.GetStr());
00718   // check if there are two nodes that still have space and are not yet connected and connect them
00719   printf("  filling empty edge slots...\n");
00720   TIntPrV SlotNIdV;
00721   THash<TInt, TInt> SlotNIdH;
00722   int CumSlots=0;
00723   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00724     if (NI.GetOutNId(NI.GetOutDeg()-1) == DelNId) {
00725       int NSlots = 0;
00726       for (int s = NI.GetOutDeg()-1; s >= 0; s--) {
00727         if (NI.GetOutNId(s) == DelNId) { NSlots++; }
00728         else { break; }
00729       }
00730       SlotNIdV.Add(TIntPr(NI.GetId(), NSlots));
00731       SlotNIdH.AddDat(NI.GetId(), NSlots);
00732       CumSlots+=NSlots;
00733     }
00734   }
00735   printf("    %d nodes, with %d spokes available, %d edges\n", SlotNIdH.Len(), CumSlots, CumSlots/2);
00736   TIntV NIdV;  SlotNIdH.GetKeyV(NIdV);
00737   for (int ni1 = 0; ni1 < NIdV.Len(); ni1++) {
00738     const int nid = NIdV[ni1];
00739     if (! SlotNIdH.IsKey(nid) || SlotNIdH.GetDat(nid) == 0) { continue; }
00740     const int NI1Slots = SlotNIdH.GetDat(nid);
00741     if ((SlotNIdH.GetMxKeyIds() - SlotNIdH.Len())/double(SlotNIdH.GetMxKeyIds()) > 0.5) { SlotNIdH.Defrag(); }
00742     TNodeI NI  = GetNI(nid);
00743     for (int NTries = 0; NTries < 4*NI1Slots && NI.GetOutNId(NI.GetOutDeg()-1) == DelNId; NTries++) {
00744       const int nid2 = SlotNIdH.GetKey(SlotNIdH.GetRndKeyId(Rnd));
00745       if (nid == nid2) { continue; }
00746       TNodeI NI2  = GetNI(nid2);
00747       // insert the edge
00748       if (NI2.GetOutNId(NI2.GetOutDeg()-1)==DelNId && ! NI2.IsInNId(NI.GetId())) {
00749         int *V1 = (int *) BinSearch2(NI.OutNIdV, NI.OutNIdV+NI.GetOutDeg(), NI2.GetId());
00750         int *V2 = (int *) BinSearch2(NI2.InNIdV, NI2.InNIdV+NI2.GetInDeg(), NI.GetId());
00751         if (*V1 != DelNId) { memmove(V1+1, V1, sizeof(int)*(NI.GetOutDeg()-(V1-NI.OutNIdV)-1)); }
00752         if (*V2 != DelNId) { memmove(V2+1, V2, sizeof(int)*(NI2.GetInDeg()-(V2-NI2.InNIdV)-1)); }
00753         *V1 = NI2.GetId();  *V2 = NI.GetId();
00754         NAddit++;
00755         SlotNIdH.GetDat(nid)--;  SlotNIdH.GetDat(nid2)--;
00756       }
00757       if (SlotNIdH.GetDat(nid2) == 0) { SlotNIdH.DelKey(nid2); continue; }
00758       if (SlotNIdH.GetDat(nid) == 0) { SlotNIdH.DelKey(nid); break; }
00759     }
00760     if (ni1 % Mega(1) == 0) { printf(".");  fflush(stdout); }
00761   }
00762   printf("    %llu edges added.\nDONE. TOTAL REWIRE TIME [%s]\n\n", NAddit, ExeTm.GetStr());
00763 }
00764 
00765 template <class TNodeData, bool IsDir>
00766 PNGraph TBigNet<TNodeData, IsDir>::GetNGraph(const bool& RenumberNodes) const {
00767   IAssert(RenumberNodes == false);
00768   PNGraph Graph = TNGraph::New();
00769   Graph->Reserve(GetNodes(), 0);
00770   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
00771     Graph->AddNode(NI.GetId(), Pool, NI.GetInVId(), NI.GetOutVId());
00772   }
00773   return Graph;
00774 }
00775 
00776 template <class TNodeData, bool IsDir>
00777 PNGraph TBigNet<TNodeData, IsDir>::GetSubNGraph(const TIntV& NIdV) const {
00778   PNGraph Graph = TNGraph::New(NIdV.Len(), 0);
00779   // add nodes
00780   for (int i = 0; i < NIdV.Len(); i++) {
00781     Graph->AddNode(NIdV[i]); }
00782   // reserve node in- and out-degree
00783   for (int i = 0; i < NIdV.Len(); i++) {
00784     const int SrcNId = NIdV[i];
00785     const TNodeI NI = GetNI(SrcNId);
00786     int InDeg=0, OutDeg=0;
00787     for (int e = 0; e < NI.GetInDeg(); e++) { if (Graph->IsNode(NI.GetInNId(e))) InDeg++; }
00788     for (int e = 0; e < NI.GetOutDeg(); e++) { if (Graph->IsNode(NI.GetOutNId(e))) OutDeg++; }
00789     Graph->ReserveNIdInDeg(SrcNId, InDeg);
00790     Graph->ReserveNIdOutDeg(SrcNId, OutDeg);
00791   }
00792   // add edges
00793   for (int i = 0; i < NIdV.Len(); i++) {
00794     const int SrcNId = NIdV[i];
00795     const TNodeI NI = GetNI(SrcNId);
00796     for (int e = 0; e < NI.GetOutDeg(); e++) {
00797       const int DstNId = NI.GetOutNId(e);
00798       if (Graph->IsNode(DstNId)) {
00799         Graph->AddEdge(SrcNId, DstNId); }
00800     }
00801   }
00802   return Graph;
00803 }
00804 
00805 template <class TNodeData, bool IsDir>
00806 TPt<TBigNet<TNodeData, IsDir> > TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes) const {
00807   const bool isDir = IsDir, onlySources = HasFlag(gfSources);
00808   TSize Edges=0;
00809   // find in- and out-degree
00810   TSparseHash<TInt, TIntPr> NIdDegH(NIdV.Len());
00811   for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); }
00812   for (int i = 0; i < NIdV.Len(); i++) {
00813     const TNodeI NI = GetNI(NIdV[i]);
00814     int InDeg=0, OutDeg=0;
00815     for (int n = 0; n < NI.GetOutDeg(); n++) {
00816       if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; }
00817     if (! onlySources && isDir) {
00818       for (int n = 0; n < NI.GetInDeg(); n++) {
00819         if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; }
00820     }
00821     Edges += OutDeg;
00822     NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg);
00823   }
00824   // make network
00825   typedef TBigNet<TNodeData, IsDir> TBNet;
00826   TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected));
00827   TBNet& NewNet = *NewNetPt;
00828   NewNet.Flags = Flags;
00829   // add nodes
00830   if (isDir || onlySources) {
00831     for (int i = 0; i < NIdV.Len(); i++) {
00832       const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00833       NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00834   } else {
00835     for (int i = 0; i < NIdV.Len(); i++) {
00836       const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00837       NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00838   }
00839   // add edges
00840   for (int i = 0; i < NIdV.Len(); i++) {
00841     int NId = NIdV[i];
00842     const TNodeI NI = GetNI(NId);
00843     int *NIdVPt = (int *) NewNet.GetOutNIdVPt(NId);
00844     int deg = 0;
00845     for (int e = 0; e < NI.GetOutDeg(); e++) {
00846       const int Dst = NI.GetOutNId(e);
00847       if (NewNet.IsNode(Dst)) {
00848         *NIdVPt = Dst;  NIdVPt++;  deg++; }
00849     }
00850     EAssert(deg == NIdDegH.GetDat(NId).Val2);
00851     if (isDir && ! onlySources) {
00852       EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0)
00853         || (NI.GetInVId() != NI.GetOutVId()));
00854       int * NIdVPt = (int *) NewNet.GetInNIdVPt(NId);
00855       int deg = 0;
00856       for (int e = 0; e < NI.GetInDeg(); e++) {
00857         const int Dst = NI.GetInNId(e);
00858         if (NewNet.IsNode(Dst)) {
00859           *NIdVPt = Dst;  NIdVPt++;  deg++; }
00860       }
00861       EAssert(deg == NIdDegH.GetDat(NId).Val1);
00862     }
00863   }
00864   return NewNetPt;
00865 }
00866 
00867 template <class TNodeData, bool IsDir>
00868 void TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, TBigNet* NewNetPt, const bool& RenumberNodes) const {
00869   printf("Set subgraph on %d nodes\n", NIdV.Len()); TExeTm ExeTm;
00870   const bool isDir = IsDir, onlySources = HasFlag(gfSources);
00871   TSize Edges=0;
00872   // find in- and out-degree
00873   THash<TInt, TIntPr> NIdDegH(NIdV.Len());
00874   for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); }
00875   for (int i = 0; i < NIdV.Len(); i++) {
00876     const TNodeI NI = GetNI(NIdV[i]);
00877     int InDeg=0, OutDeg=0;
00878     for (int n = 0; n < NI.GetOutDeg(); n++) {
00879       if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; }
00880     if (! onlySources && isDir) {
00881       for (int n = 0; n < NI.GetInDeg(); n++) {
00882         if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; }
00883     }
00884     Edges += OutDeg;
00885     NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg);
00886   }
00887   // make network
00888   //typedef TBigNet<TNodeData, IsDir> TBNet;
00889   //TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected));
00890   NewNetPt->Reserve(NIdV.Len(), Edges);
00891   TBigNet<TNodeData, IsDir>& NewNet = *NewNetPt;
00892   NewNet.Flags = Flags;
00893   TIntSet NIdMap;
00894   // add nodes
00895   if (! RenumberNodes) {
00896     if (isDir || onlySources) {
00897       for (int i = 0; i < NIdV.Len(); i++) {
00898         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00899         NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00900     } else {
00901       for (int i = 0; i < NIdV.Len(); i++) {
00902         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00903         NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00904     }
00905   } else { // renumber nodes
00906     NIdMap.Gen(NIdV.Len());
00907     for (int i = 0; i < NIdV.Len(); i++) { NIdMap.AddKey(NIdV[i]);  }
00908     if (isDir || onlySources) {
00909       for (int i = 0; i < NIdV.Len(); i++) {
00910         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00911         NewNet.AddNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00912     } else {
00913       for (int i = 0; i < NIdV.Len(); i++) {
00914         const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]);
00915         NewNet.AddUndirNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data
00916     }
00917   }
00918   // add edges
00919   for (int i = 0; i < NIdV.Len(); i++) {
00920     int NId = NIdV[i];
00921     const TNodeI NI = GetNI(NId);
00922     int *NIdVPt = (int *) NewNet.GetOutNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId);
00923     int deg = 0;
00924     for (int e = 0; e < NI.GetOutDeg(); e++) {
00925       const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetOutNId(e)):NI.GetOutNId(e);
00926       if (NewNet.IsNode(Dst)) {
00927         *NIdVPt = Dst;  NIdVPt++;  deg++; }
00928     }
00929     EAssert(deg == NIdDegH.GetDat(NId).Val2);
00930     if (isDir && ! onlySources) {
00931       EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0)
00932         || (NI.GetInVId() != NI.GetOutVId()));
00933       int * NIdVPt = (int *) NewNet.GetInNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId);
00934       int deg = 0;
00935       for (int e = 0; e < NI.GetInDeg(); e++) {
00936         const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetInNId(e)):NI.GetInNId(e);
00937         if (NewNet.IsNode(Dst)) {
00938           *NIdVPt = Dst;  NIdVPt++;  deg++; }
00939       }
00940       EAssert(deg == NIdDegH.GetDat(NId).Val1);
00941     }
00942   }
00943   printf("  %u edges [%s]\n", uint(Edges), ExeTm.GetStr());
00944 }
00945 
00946 template <class TNodeData, bool IsDir>
00947 void TBigNet<TNodeData, IsDir>::Reserve(const int& Nodes, const TSize& Edges) {
00948   NodeH.Gen(TMath::Mx(int(1.1*Nodes), 100));
00949   Pool = TVPool(TMath::Mx(Edges, (TSize) Mega(1)), Mega(100), true);
00950 }
00951 
00952 // check that in- and out-edges matchs, the table is sorted and so on
00953 template <class TNodeData, bool IsDir>
00954 bool TBigNet<TNodeData, IsDir>::IsOk() const {
00955   // check the node pool
00956   TIntV ValV; TExeTm ExeTm;
00957   printf("Is ok network:\n  Check Vec...");
00958   for (uint id = 1; id < Pool.GetVecs(); id++) {
00959     Pool.GetV(id, ValV);
00960     if (! ValV.Empty()) {
00961       EAssert((0<=ValV[0] && ValV[0]<MxNId) || ValV[0]==DelNId);
00962       try {
00963       for (int i = 1; i < ValV.Len(); i++) {
00964         //if (ValV[i]==DelNId) { continue; }
00965         // sorted & no duplicates & no empty values (can have deleted nodes)
00966         EAssertR((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId),
00967           TStr::Fmt("NId1: %d NId2:%d", ValV[i-1](),ValV[i]()));
00968         EAssertR((0<=ValV[i] && ValV[i]<MxNId) || ValV[i]==DelNId,
00969           TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId));
00970         if (! OnlySources()) {
00971           EAssertR(IsNode(ValV[i]) || ValV[i]==DelNId,
00972             TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId)); } // every link is a node
00973       }
00974       } catch (PExcept Except){
00975         printf("  %s\n", Except->GetStr().CStr());
00976         printf("  vec id: %d, lenght: %d\n", id, ValV.Len());
00977         for (int i = 1; i < ValV.Len(); i++) {
00978           printf("  %d", ValV[i]());
00979           if (!((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId))) { printf("*"); }
00980         }
00981         printf("\n");
00982         return false;
00983       }
00984     }
00985     if (id % 10000 == 0) {
00986       printf("\r  %dk / %dk [%s]", id/1000, Pool.GetVecs()/1000, ExeTm.GetStr()); }
00987   }
00988   printf("[%s]\n  Check Edges...\n", ExeTm.GetStr());
00989   // check edges
00990   int ErrCnt = 0;
00991   if (! OnlySources()) {
00992     int Cnt=0;
00993     for (TNodeI NI = BegNI(); NI < EndNI(); NI++, Cnt++) {
00994       // nodes that point to NI, have it on out-list
00995       for (int e = 0; e < NI.GetInDeg(); e++) {
00996         if (NI.GetInNId(e) == DelNId) { continue; } // skip deleted nodes
00997     if (! IsEdge(NI.GetInNId(e), NI.GetId())) {
00998       printf("\nno edge: %d --> %d", NI.GetInNId(e), NI.GetId());
00999       printf("NId: %d   deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg());
01000       for (int i = 0; i < NI.GetInDeg(); i++) { printf("  %d", NI.GetInNId(i)); }
01001       TNodeI NI2 = GetNI(NI.GetInNId(e));
01002       printf("\nNId2: %d   deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg());
01003       for (int j = 0; j < NI2.GetOutDeg(); j++) { printf("  %d", NI2.GetOutNId(j)); }
01004       printf("\n");
01005       ErrCnt++;
01006     }
01007         //EAssertR(IsEdge(NI.GetInNId(e), NI.GetId()),
01008         //  TStr::Fmt("no edge: %d --> %d", NI.GetInNId(e), NI.GetId()));
01009     }
01010       // nodes NI points to, have it on in-list
01011       for (int e = 0; e < NI.GetOutDeg(); e++) {
01012         if (NI.GetOutNId(e) == DelNId) { continue; }
01013         const int InVId = GetNode(NI.GetOutNId(e)).InVId;
01014         int* DstInV = (int *)Pool.GetValVPt(InVId);
01015     if (BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) == NULL) {
01016       printf("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e));
01017       printf("NId: %d   deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg());
01018       for (int i = 0; i < NI.GetOutDeg(); i++) { printf("  %d", NI.GetOutNId(i)); }
01019       TNodeI NI2 = GetNI(NI.GetOutNId(e));
01020       printf("\nNId2: %d   deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg());
01021       for (int j = 0; j < NI2.GetInDeg(); j++) { printf("  %d", NI2.GetInNId(j)); }
01022       printf("\n");  ErrCnt++;
01023     }
01024         //EAssertR(BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) != NULL,
01025         //  TStr::Fmt("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e)));
01026     }
01027     if (ErrCnt > 5) { FailR("error count too large!"); }
01028       if (Cnt % 100000 == 0) {
01029         printf("\r%s [%s]", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr()); }
01030     }
01031     printf("\r%s [%s]\n", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr());
01032   }
01033   return true;
01034 }
01035 
01036 template <class TNodeData, bool IsDir>
01037 void TBigNet<TNodeData, IsDir>::Dump(const TStr& Desc) const {
01038   if (! Desc.Empty()) { printf("\n%s (%d, %u):\n", Desc.CStr(), GetNodes(), GetEdges()); }
01039   else { printf("\nNodes: %d,  Edges: %u\n", GetNodes(), GetEdges()); }
01040   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
01041     printf("%d]\n  IN %d]", NI.GetId(), NI.GetInDeg());
01042     for (int i=0; i<NI.GetInDeg(); i++) { printf(" %d", NI.GetInNId(i)); }
01043     if (IsDir) {
01044       printf("\n  OUT %d]", NI.GetOutDeg());
01045       for (int i=0; i<NI.GetOutDeg(); i++) { printf(" %d", NI.GetOutNId(i)); }
01046     }
01047     printf("\n");
01048   }
01049 }
01050 
01051 // header: <Nodes, Edges, IsDir>
01052 // format: undirected <NId, Dat, OutDeg, OutNodeV>
01053 // format: directed <NId, Dat, OutDeg, OutNodeV, InDeg, InNodeV>
01054 template <class TNodeData, bool IsDir>
01055 void TBigNet<TNodeData, IsDir>::SaveForDisk(const TStr& OutFNm) const {
01056   const bool IsDirected = IsDir;
01057   TFOut FOut(OutFNm);
01058   FOut.Save(GetNodes());
01059   FOut.Save(GetEdges());
01060   FOut.Save(IsDirected);
01061   for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
01062     FOut.Save(NI.GetId());
01063     NI.GetDat().Save(FOut);
01064     FOut.Save(NI.GetOutDeg());
01065     for (int i = 0; i < NI.GetOutDeg(); i++) {
01066       FOut.Save(NI.GetOutNId(i)); }
01067     if (IsDirected) {
01068       FOut.Save(NI.GetInDeg());
01069       for (int i = 0; i < NI.GetInDeg(); i++) {
01070         FOut.Save(NI.GetInNId(i)); }
01071     }
01072   }
01073 }
01074 
01075 // skip the edge pool and only load the node data hash table
01076 template <class TNodeData, bool IsDir>
01077 void TBigNet<TNodeData, IsDir>::LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH) {
01078   TFIn SIn(InFNm);
01079   TInt MxNId(SIn);
01080   TB32Set Flags(SIn);
01081   printf("skipping Pool...\n");
01082   TBool FastCopy(SIn);
01083   uint64 _GrowBy, _MxVals, _Vals;
01084   SIn.Load(_GrowBy); SIn.Load(_MxVals);  SIn.Load(_Vals);
01085   TInt EmptyVal(SIn);
01086   int Tmp;
01087   for (TSize ValN = 0; ValN < _Vals; ValN++) { SIn.Load(Tmp); }
01088   TInt MxVals(SIn), Vals(SIn);
01089   uint64 Offset;
01090   for (int ValN = 0; ValN < Vals; ValN++) { SIn.Load(Offset); }
01091   printf("loading Hode hash table...\n");
01092   NodeH.Load(SIn);
01093 }
01094 
01095 // Save the network without loading it. Save the node hash table as THash or TSparseHash
01096 template <class TNodeData, bool IsDir>
01097 void TBigNet<TNodeData, IsDir>::SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash) {
01098   TFIn FIn(InFNm);
01099   TFOut FOut(OutFNm);
01100   { TInt MxNId(FIn);  MxNId.Save(FOut);
01101   TB32Set Flags(FIn);  Flags.Save(FOut);
01102   TVPool Pool(FIn);  Pool.Save(FOut); }
01103   { TNodeH NodeH(FIn);
01104   if (! SaveSparseHash) {
01105     THash<TInt, TNode> NewH(NodeH.Len(), true);
01106     for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) {
01107       NewH.AddDat(I->Key, I->Dat);
01108     }
01109     NewH.Save(FOut);
01110   } else {
01111     FailR("Not implemented");
01112   } }
01113 }