SNAP Library 2.0, User Reference  2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
zipfl.cpp
Go to the documentation of this file.
00001 
00002 // ZIP Input-File
00003 TStrStrH TZipIn::FExtToCmdH;
00004 const int TZipIn::MxBfL=32*1024;
00005 
00006 void TZipIn::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) {
00007   const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr());
00008   #ifdef GLib_WIN
00009   PROCESS_INFORMATION piProcInfo;
00010   STARTUPINFO siStartInfo;
00011   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00012   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00013   siStartInfo.cb = sizeof(STARTUPINFO);
00014   siStartInfo.hStdOutput = ZipStdoutWr;
00015   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00016   // Create the child process.
00017   const BOOL FuncRetn = CreateProcess(NULL,
00018     (LPSTR) CmdLine.CStr(),  // command line
00019     NULL,          // process security attributes
00020     NULL,          // primary thread security attributes
00021     TRUE,          // handles are inherited
00022     0,             // creation flags
00023     NULL,          // use parent's environment
00024     NULL,          // use parent's current directory
00025     &siStartInfo,  // STARTUPINFO pointer
00026     &piProcInfo);  // receives PROCESS_INFORMATION
00027   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00028   CloseHandle(piProcInfo.hProcess);
00029   CloseHandle(piProcInfo.hThread);
00030   #else
00031   ZipStdoutRd = popen(CmdLine.CStr(), "r");
00032   EAssertR(ZipStdoutRd != NULL,  TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00033   #endif
00034 }
00035 
00036 void TZipIn::FillBf(){
00037   EAssertR(CurFPos < FLen, "End of file "+GetSNm()+" reached.");
00038   EAssertR((BfC==BfL)/*&&((BfL==-1)||(BfL==MxBfL))*/, "Error reading file '"+GetSNm()+"'.");
00039   #ifdef GLib_WIN
00040   // Read output from the child process
00041   DWORD BytesRead;
00042   EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
00043   #else
00044   size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
00045   EAssert(BytesRead != 0);
00046   #endif
00047   BfL = (int) BytesRead;
00048   CurFPos += BytesRead;
00049   EAssertR((BfC!=0)||(BfL!=0), "Error reading file '"+GetSNm()+"'.");
00050   BfC = 0;
00051 }
00052 
00053 TZipIn::TZipIn(const TStr& FNm) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
00054   FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
00055   EAssertR(! FNm.Empty(), "Empty file-name.");
00056   EAssertR(TFile::Exists(FNm), TStr::Fmt("File %s does not exist", FNm.CStr()).CStr());
00057   FLen = TZipIn::GetFLen(FNm);
00058   if (FLen == 0) { return; } // empty file
00059   #ifdef GLib_WIN
00060   // create pipes
00061   SECURITY_ATTRIBUTES saAttr;
00062   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00063   saAttr.bInheritHandle = TRUE;
00064   saAttr.lpSecurityDescriptor = NULL;
00065     // Create a pipe for the child process's STDOUT.
00066   const int PipeBufferSz = 32*1024;
00067   EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
00068   // Ensure the read handle to the pipe for STDOUT is not inherited.
00069   SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00070   #else
00071   // no implementation needed
00072   #endif
00073   CreateZipProcess(GetCmd(FNm), FNm);
00074   Bf = new char[MxBfL]; BfC = BfL=-1;
00075   FillBf();
00076 }
00077 
00078 TZipIn::TZipIn(const TStr& FNm, bool& OpenedP) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
00079   FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
00080   EAssertR(! FNm.Empty(), "Empty file-name.");
00081   FLen = TZipIn::GetFLen(FNm);
00082   OpenedP = TFile::Exists(FNm);
00083   if (OpenedP) {
00084     #ifdef GLib_WIN
00085     SECURITY_ATTRIBUTES saAttr;
00086     saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00087     saAttr.bInheritHandle = TRUE;
00088     saAttr.lpSecurityDescriptor = NULL;
00089     // Create a pipe for the child process's STDOUT.
00090     EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, 0), "Stdout pipe creation failed");
00091     // Ensure the read handle to the pipe for STDOUT is not inherited.
00092     SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00093     #else
00094     // no implementation needed
00095     #endif
00096     CreateZipProcess(GetCmd(FNm.GetFExt()), FNm);
00097     Bf = new char[MxBfL]; BfC = BfL=-1;
00098     FillBf();
00099   }
00100 }
00101 
00102 PSIn TZipIn::New(const TStr& FNm) {
00103   return PSIn(new TZipIn(FNm));
00104 }
00105 
00106 PSIn TZipIn::New(const TStr& FNm, bool& OpenedP){
00107   return PSIn(new TZipIn(FNm, OpenedP));
00108 }
00109 
00110 TZipIn::~TZipIn(){
00111   #ifdef GLib_WIN
00112   if (ZipStdoutRd != NULL) {
00113     EAssertR(CloseHandle(ZipStdoutRd), "Closing read-end of pipe failed"); }
00114   if (ZipStdoutWr != NULL) {
00115     EAssertR(CloseHandle(ZipStdoutWr)!=0, "Closing write-end of pipe failed"); }
00116   #else
00117   if (ZipStdoutRd != NULL) {
00118     EAssertR(pclose(ZipStdoutRd) != -1, "Closing of the process failed"); }
00119   #endif
00120   if (Bf != NULL) { delete[] Bf; }
00121 }
00122 
00123 int TZipIn::GetBf(const void* LBf, const TSize& LBfL){
00124   int LBfS=0;
00125   if (TSize(BfC+LBfL)>TSize(BfL)){
00126     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00127       if (BfC==BfL){FillBf();}
00128       LBfS+=((char*)LBf)[LBfC]=Bf[BfC++];}
00129   } else {
00130     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00131       LBfS+=(((char*)LBf)[LBfC]=Bf[BfC++]);}
00132   }
00133   return LBfS;
00134 }
00135 
00136 // Gets the next line to LnChA.
00137 // Returns true, if LnChA contains a valid line.
00138 // Returns false, if LnChA is empty, such as end of file was encountered.
00139 
00140 bool TZipIn::GetNextLnBf(TChA& LnChA) {
00141   int Status;
00142   int BfN;        // new pointer to the end of line
00143   int BfP;        // previous pointer to the line start
00144 
00145   LnChA.Clr();
00146 
00147   do {
00148     if (BfC >= BfL) {
00149       // reset the current pointer, FindEol() will read a new buffer
00150       BfP = 0;
00151     } else {
00152       BfP = BfC;
00153     }
00154     Status = FindEol(BfN);
00155     if (Status >= 0) {
00156       LnChA.AddBf(&Bf[BfP],BfN-BfP);
00157       if (Status == 1) {
00158         // got a complete line
00159         return true;
00160       }
00161     }
00162     // get more data, if the line is incomplete
00163   } while (Status == 0);
00164 
00165   // eof or the last line has no newline
00166   return !LnChA.Empty();
00167 }
00168 
00169 // Sets BfN to the end of line or end of buffer. Reads more data, if needed.
00170 // Returns 1, when an end of line was found, BfN is end of line.
00171 // Returns 0, when an end of line was not found and more data is required,
00172 //    BfN is end of buffer.
00173 // Returns -1, when an end of file was found, BfN is not defined.
00174 
00175 int TZipIn::FindEol(int& BfN) {
00176   char Ch;
00177 
00178   if (BfC >= BfL) {
00179     // check for eof, read more data
00180     if (Eof()) {
00181       return -1;
00182     }
00183     FillBf();
00184   }
00185 
00186   while (BfC < BfL) {
00187     Ch = Bf[BfC++];
00188     if (Ch=='\n') {
00189       BfN = BfC-1;
00190       return 1;
00191     }
00192     if (Ch=='\r' && Bf[BfC+1]=='\n') {
00193       BfC++;
00194       BfN = BfC-2;
00195       return 1;
00196     }
00197   }
00198   BfN = BfC;
00199 
00200   return 0;
00201 }
00202 
00203 bool TZipIn::IsZipExt(const TStr& FNmExt) {
00204   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00205   return FExtToCmdH.IsKey(FNmExt);
00206 }
00207 
00208 void TZipIn::FillFExtToCmdH() {
00209   // 7za decompress: "e -y -bd -so";
00210   #ifdef GLib_WIN
00211   const char* ZipCmd = "7z.exe e -y -bd -so";
00212   #else
00213   const char* ZipCmd = "7za e -y -bd -so";
00214   #endif
00215   if (FExtToCmdH.Empty()) {
00216     FExtToCmdH.AddDat(".gz",  ZipCmd);
00217     FExtToCmdH.AddDat(".7z",  ZipCmd);
00218     FExtToCmdH.AddDat(".rar", ZipCmd);
00219     FExtToCmdH.AddDat(".zip", ZipCmd);
00220     FExtToCmdH.AddDat(".cab", ZipCmd);
00221     FExtToCmdH.AddDat(".arj", ZipCmd);
00222     FExtToCmdH.AddDat(".bzip2", ZipCmd);
00223     FExtToCmdH.AddDat(".bz2", ZipCmd);
00224   }
00225 }
00226 
00227 TStr TZipIn::GetCmd(const TStr& ZipFNm) {
00228   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00229   const TStr Ext = ZipFNm.GetFExt().GetLc();
00230   EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
00231   return FExtToCmdH.GetDat(Ext);
00232 }
00233 
00234 uint64 TZipIn::GetFLen(const TStr& ZipFNm) {
00235   #ifdef GLib_WIN
00236   HANDLE ZipStdoutRd, ZipStdoutWr;
00237   // create pipes
00238   SECURITY_ATTRIBUTES saAttr;
00239   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00240   saAttr.bInheritHandle = TRUE;
00241   saAttr.lpSecurityDescriptor = NULL;
00242     // Create a pipe for the child process's STDOUT.
00243   const int PipeBufferSz = 32*1024;
00244   EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
00245   // Ensure the read handle to the pipe for STDOUT is not inherited.
00246   SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00247   //CreateZipProcess(GetCmd(FNm), FNm);
00248   { const TStr CmdLine = TStr::Fmt("7z.exe l %s", ZipFNm.CStr());
00249   PROCESS_INFORMATION piProcInfo;
00250   STARTUPINFO siStartInfo;
00251   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00252   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00253   siStartInfo.cb = sizeof(STARTUPINFO);
00254   siStartInfo.hStdOutput = ZipStdoutWr;
00255   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00256   // Create the child process.
00257   const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(),
00258     NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo);
00259   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00260   CloseHandle(piProcInfo.hProcess);
00261   CloseHandle(piProcInfo.hThread); }
00262   #else
00263   const TStr CmdLine = TStr::Fmt("7za l %s", ZipFNm.CStr());
00264   FILE* ZipStdoutRd = popen(CmdLine.CStr(), "r");
00265   EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00266   #endif
00267   // Read output from the child process
00268   const int BfSz = 32*1024;
00269   char* Bf = new char [BfSz];
00270   int BfC=0, BfL=0;
00271   memset(Bf, 0, BfSz);
00272   #ifdef GLib_WIN
00273   DWORD BytesRead;
00274   EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
00275   #else
00276   size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
00277   EAssert(BytesRead != 0);
00278   EAssert(pclose(ZipStdoutRd) != -1);
00279   #endif
00280   BfL = (int) BytesRead;  IAssert((BfC!=0)||(BfL!=0));
00281   BfC = 0; Bf[BfL] = 0;
00282   // find file lenght
00283   TStr Str(Bf);  delete [] Bf;
00284   TStrV StrV; Str.SplitOnWs(StrV);
00285   int n = StrV.Len()-1;
00286   while (n > 0 && ! StrV[n].IsPrefix("-----")) { n--; }
00287   if (n-7 <= 0) {
00288     WrNotify(TStr::Fmt("Corrupt file %s: MESSAGE:\n", ZipFNm.CStr()).CStr(), Str.CStr());
00289     SaveToErrLog(TStr::Fmt("Corrupt file %s. Message:\n:%s\n", ZipFNm.CStr(), Str.CStr()).CStr());
00290     return 0;
00291   }
00292   return StrV[n-7].GetInt64();
00293 }
00294 
00296 // Output-File
00297 TStrStrH TZipOut::FExtToCmdH;
00298 const TSize TZipOut::MxBfL=4*1024;
00299 
00300 void TZipOut::FlushBf() {
00301   #ifdef GLib_WIN
00302   DWORD BytesOut;
00303   EAssertR(WriteFile(ZipStdinWr, Bf, DWORD(BfL), &BytesOut, NULL)!=0, "Error writting to the file '"+GetSNm()+"'.");
00304   #else
00305   size_t BytesOut = fwrite(Bf, 1, BfL, ZipStdinWr);
00306   #endif
00307   EAssert(BytesOut == BfL);
00308   BfL = 0;
00309 }
00310 
00311 void TZipOut::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) {
00312   const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr());
00313   #ifdef GLib_WIN
00314   PROCESS_INFORMATION piProcInfo;
00315   STARTUPINFO siStartInfo;
00316   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00317   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00318   siStartInfo.cb = sizeof(STARTUPINFO);
00319   siStartInfo.hStdInput = ZipStdinRd;
00320   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00321   // Create the child process.
00322   const BOOL FuncRetn = CreateProcess(NULL,
00323     (LPSTR) CmdLine.CStr(),  // command line
00324     NULL,          // process security attributes
00325     NULL,          // primary thread security attributes
00326     TRUE,          // handles are inherited
00327     0,             // creation flags
00328     NULL,          // use parent's environment
00329     NULL,          // use parent's current directory
00330     &siStartInfo,  // STARTUPINFO pointer
00331     &piProcInfo);  // receives PROCESS_INFORMATION
00332   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00333   CloseHandle(piProcInfo.hProcess);
00334   CloseHandle(piProcInfo.hThread);
00335   #else
00336   ZipStdinWr = popen(CmdLine.CStr(),"w");
00337   EAssertR(ZipStdinWr != NULL,  TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00338   #endif
00339 }
00340 
00341 TZipOut::TZipOut(const TStr& FNm) : TSBase(FNm.CStr()), TSOut(FNm), ZipStdinRd(NULL), ZipStdinWr(NULL), Bf(NULL), BfL(0){
00342   EAssertR(! FNm.Empty(), "Empty file-name.");
00343   #ifdef GLib_WIN
00344   // create pipes
00345   SECURITY_ATTRIBUTES saAttr;
00346   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00347   saAttr.bInheritHandle = TRUE;
00348   saAttr.lpSecurityDescriptor = NULL;
00349   // Create a pipe for the child process's STDOUT.
00350   EAssertR(CreatePipe(&ZipStdinRd, &ZipStdinWr, &saAttr, 0), "Stdout pipe creation failed");
00351   // Ensure the read handle to the pipe for STDOUT is not inherited.
00352   SetHandleInformation(ZipStdinWr, HANDLE_FLAG_INHERIT, 0);
00353   #else
00354   // no implementation necessary
00355   #endif
00356   CreateZipProcess(GetCmd(FNm), FNm);
00357   Bf=new char[MxBfL];  BfL=0;
00358 }
00359 
00360 PSOut TZipOut::New(const TStr& FNm){
00361   return PSOut(new TZipOut(FNm));
00362 }
00363 
00364 TZipOut::~TZipOut() {
00365   if (BfL!=0) { FlushBf(); }
00366   #ifdef GLib_WIN
00367   if (ZipStdinWr != NULL) { EAssertR(CloseHandle(ZipStdinWr), "Closing write-end of pipe failed"); }
00368   if (ZipStdinRd != NULL) { EAssertR(CloseHandle(ZipStdinRd), "Closing read-end of pipe failed"); }
00369   #else
00370   if (ZipStdinWr != NULL) { EAssertR(pclose(ZipStdinWr) != -1, "Closing of the process failed"); }
00371   #endif
00372   if (Bf!=NULL) { delete[] Bf; }
00373 }
00374 
00375 int TZipOut::PutCh(const char& Ch){
00376   if (BfL==MxBfL) {FlushBf();}
00377   return Bf[BfL++]=Ch;
00378 }
00379 
00380 int TZipOut::PutBf(const void* LBf, const TSize& LBfL){
00381   int LBfS=0;
00382   if (BfL+LBfL>MxBfL){
00383     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00384       LBfS+=PutCh(((char*)LBf)[LBfC]);}
00385   } else {
00386     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00387       LBfS+=(Bf[BfL++]=((char*)LBf)[LBfC]);}
00388   }
00389   return LBfS;
00390 }
00391 
00392 void TZipOut::Flush(){
00393   FlushBf();
00394   #ifdef GLib_WIN
00395   EAssertR(FlushFileBuffers(ZipStdinWr)!=0, "Can not flush file '"+GetSNm()+"'.");
00396   #else
00397   EAssertR(fflush(ZipStdinWr)==0, "Can not flush file '"+GetSNm()+"'.");
00398   #endif
00399 }
00400 
00401 bool TZipOut::IsZipExt(const TStr& FNmExt) {
00402   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00403   return FExtToCmdH.IsKey(FNmExt);
00404 }
00405 
00406 void TZipOut::FillFExtToCmdH() {
00407    // 7za compress: "a -y -bd -si{CompressedFNm}"
00408   #ifdef GLib_WIN
00409   const char* ZipCmd = "7z.exe a -y -bd -si";
00410   #else
00411   const char* ZipCmd = "7za a -y -bd -si";
00412   #endif
00413   if (FExtToCmdH.Empty()) {
00414     FExtToCmdH.AddDat(".gz",  ZipCmd);
00415     FExtToCmdH.AddDat(".7z",  ZipCmd);
00416     FExtToCmdH.AddDat(".rar", ZipCmd);
00417     FExtToCmdH.AddDat(".zip", ZipCmd);
00418     FExtToCmdH.AddDat(".cab", ZipCmd);
00419     FExtToCmdH.AddDat(".arj", ZipCmd);
00420     FExtToCmdH.AddDat(".bzip2", ZipCmd);
00421     FExtToCmdH.AddDat(".bz2", ZipCmd);
00422   }
00423 }
00424 
00425 TStr TZipOut::GetCmd(const TStr& ZipFNm) {
00426   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00427   const TStr Ext = ZipFNm.GetFExt().GetLc();
00428   EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
00429   return FExtToCmdH.GetDat(Ext)+ZipFNm.GetFMid();
00430 }