SNAP Library 2.2, Developer Reference  2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
zipfl.cpp
Go to the documentation of this file.
00001 
00002 // ZIP Input-File
00003 
00004 #if defined(GLib_WIN)
00005   TStr TZipIn::SevenZipPath = "C:\\7Zip";
00006 #elif defined(GLib_CYGWIN)
00007   TStr TZipIn::SevenZipPath = "/usr/bin";
00008 #elif defined(GLib_MACOSX) 
00009   TStr TZipIn::SevenZipPath = "/opt/local/bin";
00010 #else 
00011   TStr TZipIn::SevenZipPath = "/usr/bin";
00012 #endif
00013 
00014 
00015 TStrStrH TZipIn::FExtToCmdH;
00016 const int TZipIn::MxBfL=32*1024;
00017 
00018 void TZipIn::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) {
00019   const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr());
00020   #ifdef GLib_WIN
00021   PROCESS_INFORMATION piProcInfo;
00022   STARTUPINFO siStartInfo;
00023   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00024   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00025   siStartInfo.cb = sizeof(STARTUPINFO);
00026   siStartInfo.hStdOutput = ZipStdoutWr;
00027   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00028   // Create the child process.
00029   const BOOL FuncRetn = CreateProcess(NULL,
00030     (LPSTR) CmdLine.CStr(),  // command line
00031     NULL,          // process security attributes
00032     NULL,          // primary thread security attributes
00033     TRUE,          // handles are inherited
00034     0,             // creation flags
00035     NULL,          // use parent's environment
00036     NULL,          // use parent's current directory
00037     &siStartInfo,  // STARTUPINFO pointer
00038     &piProcInfo);  // receives PROCESS_INFORMATION
00039   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00040   CloseHandle(piProcInfo.hProcess);
00041   CloseHandle(piProcInfo.hThread);
00042   #else
00043   ZipStdoutRd = popen(CmdLine.CStr(), "r");
00044   if (ZipStdoutRd == 0) { // try using SevenZipPath
00045     ZipStdoutRd = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r");
00046   }
00047   EAssertR(ZipStdoutRd != NULL,  TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00048   #endif
00049 }
00050 
00051 void TZipIn::FillBf(){
00052   EAssertR(CurFPos < FLen, "End of file "+GetSNm()+" reached.");
00053   EAssertR((BfC==BfL)/*&&((BfL==-1)||(BfL==MxBfL))*/, "Error reading file '"+GetSNm()+"'.");
00054   #ifdef GLib_WIN
00055   // Read output from the child process
00056   DWORD BytesRead;
00057   EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
00058   #else
00059   size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
00060   EAssert(BytesRead != 0);
00061   #endif
00062   BfL = (int) BytesRead;
00063   CurFPos += BytesRead;
00064   EAssertR((BfC!=0)||(BfL!=0), "Error reading file '"+GetSNm()+"'.");
00065   BfC = 0;
00066 }
00067 
00068 TZipIn::TZipIn(const TStr& FNm) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
00069   FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
00070   EAssertR(! FNm.Empty(), "Empty file-name.");
00071   EAssertR(TFile::Exists(FNm), TStr::Fmt("File %s does not exist", FNm.CStr()).CStr());
00072   FLen = 0;
00073   // non-zip files not supported, need uncompressed file length information
00074   if (FNm.GetFExt() != ".zip") {
00075     printf("*** Error: file %s, compression format %s not supported\n", FNm.CStr(), FNm.GetFExt().CStr());
00076     EFailR(TStr::Fmt("File %s: compression format %s not supported", FNm.CStr(), FNm.GetFExt().CStr()).CStr());
00077   }
00078   FLen = TZipIn::GetFLen(FNm);
00079   // return for malformed files
00080   if (FLen == 0) { return; } // empty file
00081   #ifdef GLib_WIN
00082   // create pipes
00083   SECURITY_ATTRIBUTES saAttr;
00084   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00085   saAttr.bInheritHandle = TRUE;
00086   saAttr.lpSecurityDescriptor = NULL;
00087     // Create a pipe for the child process's STDOUT.
00088   const int PipeBufferSz = 32*1024;
00089   EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
00090   // Ensure the read handle to the pipe for STDOUT is not inherited.
00091   SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00092   #else
00093   // no implementation needed
00094   #endif
00095   CreateZipProcess(GetCmd(FNm), FNm);
00096   Bf = new char[MxBfL]; BfC = BfL=-1;
00097   FillBf();
00098 }
00099 
00100 TZipIn::TZipIn(const TStr& FNm, bool& OpenedP) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
00101   FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
00102   EAssertR(! FNm.Empty(), "Empty file-name.");
00103   FLen = TZipIn::GetFLen(FNm);
00104   OpenedP = TFile::Exists(FNm);
00105   if (OpenedP) {
00106     #ifdef GLib_WIN
00107     SECURITY_ATTRIBUTES saAttr;
00108     saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00109     saAttr.bInheritHandle = TRUE;
00110     saAttr.lpSecurityDescriptor = NULL;
00111     // Create a pipe for the child process's STDOUT.
00112     EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, 0), "Stdout pipe creation failed");
00113     // Ensure the read handle to the pipe for STDOUT is not inherited.
00114     SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00115     #else
00116     // no implementation needed
00117     #endif
00118     CreateZipProcess(GetCmd(FNm.GetFExt()), FNm);
00119     Bf = new char[MxBfL]; BfC = BfL=-1;
00120     FillBf();
00121   }
00122 }
00123 
00124 PSIn TZipIn::New(const TStr& FNm) {
00125   return PSIn(new TZipIn(FNm));
00126 }
00127 
00128 PSIn TZipIn::New(const TStr& FNm, bool& OpenedP){
00129   return PSIn(new TZipIn(FNm, OpenedP));
00130 }
00131 
00132 TZipIn::~TZipIn(){
00133   #ifdef GLib_WIN
00134   if (ZipStdoutRd != NULL) {
00135     EAssertR(CloseHandle(ZipStdoutRd), "Closing read-end of pipe failed"); }
00136   if (ZipStdoutWr != NULL) {
00137     EAssertR(CloseHandle(ZipStdoutWr)!=0, "Closing write-end of pipe failed"); }
00138   #else
00139   if (ZipStdoutRd != NULL) {
00140     EAssertR(pclose(ZipStdoutRd) != -1, "Closing of the process failed"); }
00141   #endif
00142   if (Bf != NULL) { delete[] Bf; }
00143 }
00144 
00145 int TZipIn::GetBf(const void* LBf, const TSize& LBfL){
00146   int LBfS=0;
00147   if (TSize(BfC+LBfL)>TSize(BfL)){
00148     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00149       if (BfC==BfL){FillBf();}
00150       LBfS+=((char*)LBf)[LBfC]=Bf[BfC++];}
00151   } else {
00152     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00153       LBfS+=(((char*)LBf)[LBfC]=Bf[BfC++]);}
00154   }
00155   return LBfS;
00156 }
00157 
00158 // Gets the next line to LnChA.
00159 // Returns true, if LnChA contains a valid line.
00160 // Returns false, if LnChA is empty, such as end of file was encountered.
00161 bool TZipIn::GetNextLnBf(TChA& LnChA) {
00162   int Status;
00163   int BfN;        // new pointer to the end of line
00164   int BfP;        // previous pointer to the line start
00165   LnChA.Clr();
00166   do {
00167     if (BfC >= BfL) { BfP = 0; } // reset the current pointer, FindEol() will read a new buffer
00168     else { BfP = BfC; }
00169     Status = FindEol(BfN);
00170     if (Status >= 0) {
00171       LnChA.AddBf(&Bf[BfP],BfN-BfP);
00172       if (Status == 1) { return true; } // got a complete line
00173     }
00174     // get more data, if the line is incomplete
00175   } while (Status == 0);
00176   // eof or the last line has no newline
00177   return !LnChA.Empty();
00178 }
00179 
00180 // Sets BfN to the end of line or end of buffer. Reads more data, if needed.
00181 // Returns 1, when an end of line was found, BfN is end of line.
00182 // Returns 0, when an end of line was not found and more data is required,
00183 //    BfN is end of buffer.
00184 // Returns -1, when an end of file was found, BfN is not defined.
00185 int TZipIn::FindEol(int& BfN) {
00186   char Ch;
00187   if (BfC >= BfL) { // check for eof, read more data
00188     if (Eof()) { return -1; }
00189     FillBf();
00190   }
00191   while (BfC < BfL) {
00192     Ch = Bf[BfC++];
00193     if (Ch=='\n') { BfN = BfC-1; return 1; }
00194     if (Ch=='\r' && Bf[BfC+1]=='\n') {
00195       BfC++;  BfN = BfC-2;  return 1; }
00196   }
00197   BfN = BfC;
00198   return 0;
00199 }
00200 
00201 bool TZipIn::IsZipExt(const TStr& FNmExt) {
00202   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00203   return FExtToCmdH.IsKey(FNmExt);
00204 }
00205 
00206 void TZipIn::FillFExtToCmdH() {
00207   // 7za decompress: "e -y -bd -so";
00208   #ifdef GLib_WIN
00209   const char* ZipCmd = "7z.exe e -y -bd -so";
00210   #else
00211   const char* ZipCmd = "7za e -y -bd -so";
00212   #endif
00213   if (FExtToCmdH.Empty()) {
00214     FExtToCmdH.AddDat(".gz",  ZipCmd);
00215     FExtToCmdH.AddDat(".7z",  ZipCmd);
00216     FExtToCmdH.AddDat(".rar", ZipCmd);
00217     FExtToCmdH.AddDat(".zip", ZipCmd);
00218     FExtToCmdH.AddDat(".cab", ZipCmd);
00219     FExtToCmdH.AddDat(".arj", ZipCmd);
00220     FExtToCmdH.AddDat(".bzip2", ZipCmd);
00221     FExtToCmdH.AddDat(".bz2", ZipCmd);
00222   }
00223 }
00224 
00225 TStr TZipIn::GetCmd(const TStr& ZipFNm) {
00226   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00227   const TStr Ext = ZipFNm.GetFExt().GetLc();
00228   EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
00229   return FExtToCmdH.GetDat(Ext);
00230 }
00231 
00232 uint64 TZipIn::GetFLen(const TStr& ZipFNm) {
00233   #ifdef GLib_WIN
00234   HANDLE ZipStdoutRd, ZipStdoutWr;
00235   // create pipes
00236   SECURITY_ATTRIBUTES saAttr;
00237   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00238   saAttr.bInheritHandle = TRUE;
00239   saAttr.lpSecurityDescriptor = NULL;
00240     // Create a pipe for the child process's STDOUT.
00241   const int PipeBufferSz = 32*1024;
00242   EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
00243   // Ensure the read handle to the pipe for STDOUT is not inherited.
00244   SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00245   //CreateZipProcess(GetCmd(FNm), FNm);
00246   { const TStr CmdLine = TStr::Fmt("7z.exe l %s", ZipFNm.CStr());
00247   PROCESS_INFORMATION piProcInfo;
00248   STARTUPINFO siStartInfo;
00249   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00250   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00251   siStartInfo.cb = sizeof(STARTUPINFO);
00252   siStartInfo.hStdOutput = ZipStdoutWr;
00253   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00254   // Create the child process.
00255   const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(),
00256     NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo);
00257   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00258   CloseHandle(piProcInfo.hProcess);
00259   CloseHandle(piProcInfo.hThread); }
00260   #else
00261   const TStr CmdLine = TStr::Fmt("7za l %s", ZipFNm.CStr());
00262   FILE* ZipStdoutRd = popen(CmdLine.CStr(), "r");
00263   if (ZipStdoutRd == NULL) { // try using SevenZipPath
00264     ZipStdoutRd = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r");
00265   }
00266   EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00267   #endif
00268   // Read output from the child process
00269   const int BfSz = 32*1024;
00270   char* Bf = new char [BfSz];
00271   int BfC=0, BfL=0;
00272   memset(Bf, 0, BfSz);
00273   #ifdef GLib_WIN
00274   DWORD BytesRead;
00275   EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
00276   #else
00277   size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
00278   EAssert(BytesRead != 0);
00279   EAssert(pclose(ZipStdoutRd) != -1);
00280   #endif
00281   BfL = (int) BytesRead;  IAssert((BfC!=0)||(BfL!=0));
00282   BfC = 0; Bf[BfL] = 0;
00283   // find file lenght
00284   TStr Str(Bf);  delete [] Bf;
00285   TStrV StrV; Str.SplitOnWs(StrV);
00286   int n = StrV.Len()-1;
00287   while (n > 0 && ! StrV[n].IsPrefix("-----")) { n--; }
00288   if (n-7 <= 0) {
00289     WrNotify(TStr::Fmt("Corrupt file %s: MESSAGE:\n", ZipFNm.CStr()).CStr(), Str.CStr());
00290     SaveToErrLog(TStr::Fmt("Corrupt file %s. Message:\n:%s\n", ZipFNm.CStr(), Str.CStr()).CStr());
00291     return 0;
00292   }
00293   return StrV[n-7].GetInt64();
00294 }
00295 
00297 // Output-File
00298 TStrStrH TZipOut::FExtToCmdH;
00299 const TSize TZipOut::MxBfL=4*1024;
00300 
00301 void TZipOut::FlushBf() {
00302   #ifdef GLib_WIN
00303   DWORD BytesOut;
00304   EAssertR(WriteFile(ZipStdinWr, Bf, DWORD(BfL), &BytesOut, NULL)!=0, "Error writting to the file '"+GetSNm()+"'.");
00305   #else
00306   size_t BytesOut = fwrite(Bf, 1, BfL, ZipStdinWr);
00307   #endif
00308   EAssert(BytesOut == BfL);
00309   BfL = 0;
00310 }
00311 
00312 void TZipOut::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) {
00313   const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr());
00314   #ifdef GLib_WIN
00315   PROCESS_INFORMATION piProcInfo;
00316   STARTUPINFO siStartInfo;
00317   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00318   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00319   siStartInfo.cb = sizeof(STARTUPINFO);
00320   siStartInfo.hStdInput = ZipStdinRd;
00321   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00322   // Create the child process.
00323   const BOOL FuncRetn = CreateProcess(NULL,
00324     (LPSTR) CmdLine.CStr(),  // command line
00325     NULL,          // process security attributes
00326     NULL,          // primary thread security attributes
00327     TRUE,          // handles are inherited
00328     0,             // creation flags
00329     NULL,          // use parent's environment
00330     NULL,          // use parent's current directory
00331     &siStartInfo,  // STARTUPINFO pointer
00332     &piProcInfo);  // receives PROCESS_INFORMATION
00333   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00334   CloseHandle(piProcInfo.hProcess);
00335   CloseHandle(piProcInfo.hThread);
00336   #else
00337   ZipStdinWr = popen(CmdLine.CStr(),"w");
00338   if (ZipStdinWr == NULL) { // try using SevenZipPath
00339     ZipStdinWr = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r");
00340   }
00341   EAssertR(ZipStdinWr != NULL,  TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00342   #endif
00343 }
00344 
00345 TZipOut::TZipOut(const TStr& FNm) : TSBase(FNm.CStr()), TSOut(FNm), ZipStdinRd(NULL), ZipStdinWr(NULL), Bf(NULL), BfL(0){
00346   EAssertR(! FNm.Empty(), "Empty file-name.");
00347   #ifdef GLib_WIN
00348   // create pipes
00349   SECURITY_ATTRIBUTES saAttr;
00350   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00351   saAttr.bInheritHandle = TRUE;
00352   saAttr.lpSecurityDescriptor = NULL;
00353   // Create a pipe for the child process's STDOUT.
00354   EAssertR(CreatePipe(&ZipStdinRd, &ZipStdinWr, &saAttr, 0), "Stdout pipe creation failed");
00355   // Ensure the read handle to the pipe for STDOUT is not inherited.
00356   SetHandleInformation(ZipStdinWr, HANDLE_FLAG_INHERIT, 0);
00357   #else
00358   // no implementation necessary
00359   #endif
00360   CreateZipProcess(GetCmd(FNm), FNm);
00361   Bf=new char[MxBfL];  BfL=0;
00362 }
00363 
00364 PSOut TZipOut::New(const TStr& FNm){
00365   return PSOut(new TZipOut(FNm));
00366 }
00367 
00368 TZipOut::~TZipOut() {
00369   if (BfL!=0) { FlushBf(); }
00370   #ifdef GLib_WIN
00371   if (ZipStdinWr != NULL) { EAssertR(CloseHandle(ZipStdinWr), "Closing write-end of pipe failed"); }
00372   if (ZipStdinRd != NULL) { EAssertR(CloseHandle(ZipStdinRd), "Closing read-end of pipe failed"); }
00373   #else
00374   if (ZipStdinWr != NULL) { EAssertR(pclose(ZipStdinWr) != -1, "Closing of the process failed"); }
00375   #endif
00376   if (Bf!=NULL) { delete[] Bf; }
00377 }
00378 
00379 int TZipOut::PutCh(const char& Ch){
00380   if (BfL==MxBfL) {FlushBf();}
00381   return Bf[BfL++]=Ch;
00382 }
00383 
00384 int TZipOut::PutBf(const void* LBf, const TSize& LBfL){
00385   int LBfS=0;
00386   if (BfL+LBfL>MxBfL){
00387     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00388       LBfS+=PutCh(((char*)LBf)[LBfC]);}
00389   } else {
00390     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00391       LBfS+=(Bf[BfL++]=((char*)LBf)[LBfC]);}
00392   }
00393   return LBfS;
00394 }
00395 
00396 void TZipOut::Flush(){
00397   FlushBf();
00398   #ifdef GLib_WIN
00399   EAssertR(FlushFileBuffers(ZipStdinWr)!=0, "Can not flush file '"+GetSNm()+"'.");
00400   #else
00401   EAssertR(fflush(ZipStdinWr)==0, "Can not flush file '"+GetSNm()+"'.");
00402   #endif
00403 }
00404 
00405 bool TZipOut::IsZipExt(const TStr& FNmExt) {
00406   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00407   return FExtToCmdH.IsKey(FNmExt);
00408 }
00409 
00410 void TZipOut::FillFExtToCmdH() {
00411    // 7za compress: "a -y -bd -si{CompressedFNm}"
00412   #ifdef GLib_WIN
00413   const char* ZipCmd = "7z.exe a -y -bd -si";
00414   #else
00415   const char* ZipCmd = "7za a -y -bd -si";
00416   #endif
00417   if (FExtToCmdH.Empty()) {
00418     FExtToCmdH.AddDat(".gz",  ZipCmd);
00419     FExtToCmdH.AddDat(".7z",  ZipCmd);
00420     FExtToCmdH.AddDat(".rar", ZipCmd);
00421     FExtToCmdH.AddDat(".zip", ZipCmd);
00422     FExtToCmdH.AddDat(".cab", ZipCmd);
00423     FExtToCmdH.AddDat(".arj", ZipCmd);
00424     FExtToCmdH.AddDat(".bzip2", ZipCmd);
00425     FExtToCmdH.AddDat(".bz2", ZipCmd);
00426   }
00427 }
00428 
00429 TStr TZipOut::GetCmd(const TStr& ZipFNm) {
00430   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00431   const TStr Ext = ZipFNm.GetFExt().GetLc();
00432   EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
00433   return FExtToCmdH.GetDat(Ext)+ZipFNm.GetFMid();
00434 }