SNAP Library , Developer Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
zipfl.cpp
Go to the documentation of this file.
00001 
00002 // ZIP Input-File
00003 TStrStrH TZipIn::FExtToCmdH;
00004 const int TZipIn::MxBfL=32*1024;
00005 
00006 void TZipIn::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) {
00007   const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr());
00008   #ifdef GLib_WIN
00009   PROCESS_INFORMATION piProcInfo;
00010   STARTUPINFO siStartInfo;
00011   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00012   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00013   siStartInfo.cb = sizeof(STARTUPINFO);
00014   siStartInfo.hStdOutput = ZipStdoutWr;
00015   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00016   // Create the child process.
00017   const BOOL FuncRetn = CreateProcess(NULL,
00018     (LPSTR) CmdLine.CStr(),  // command line
00019     NULL,          // process security attributes
00020     NULL,          // primary thread security attributes
00021     TRUE,          // handles are inherited
00022     0,             // creation flags
00023     NULL,          // use parent's environment
00024     NULL,          // use parent's current directory
00025     &siStartInfo,  // STARTUPINFO pointer
00026     &piProcInfo);  // receives PROCESS_INFORMATION
00027   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00028   CloseHandle(piProcInfo.hProcess);
00029   CloseHandle(piProcInfo.hThread);
00030   #else
00031   ZipStdoutRd = popen(CmdLine.CStr(), "r");
00032   EAssertR(ZipStdoutRd != NULL,  TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00033   #endif
00034 }
00035 
00036 void TZipIn::FillBf(){
00037   EAssertR(CurFPos < FLen, "End of file "+GetSNm()+" reached.");
00038   EAssertR((BfC==BfL)/*&&((BfL==-1)||(BfL==MxBfL))*/, "Error reading file '"+GetSNm()+"'.");
00039   #ifdef GLib_WIN
00040   // Read output from the child process
00041   DWORD BytesRead;
00042   EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
00043   #else
00044   size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
00045   EAssert(BytesRead != 0);
00046   #endif
00047   BfL = (int) BytesRead;
00048   CurFPos += BytesRead;
00049   EAssertR((BfC!=0)||(BfL!=0), "Error reading file '"+GetSNm()+"'.");
00050   BfC = 0;
00051 }
00052 
00053 TZipIn::TZipIn(const TStr& FNm) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
00054   FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
00055   EAssertR(! FNm.Empty(), "Empty file-name.");
00056   EAssertR(TFile::Exists(FNm), TStr::Fmt("File %s does not exist", FNm.CStr()).CStr());
00057   FLen = TZipIn::GetFLen(FNm);
00058   if (FLen == 0) { return; } // empty file
00059   #ifdef GLib_WIN
00060   // create pipes
00061   SECURITY_ATTRIBUTES saAttr;
00062   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00063   saAttr.bInheritHandle = TRUE;
00064   saAttr.lpSecurityDescriptor = NULL;
00065     // Create a pipe for the child process's STDOUT.
00066   const int PipeBufferSz = 32*1024;
00067   EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
00068   // Ensure the read handle to the pipe for STDOUT is not inherited.
00069   SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00070   #else
00071   // no implementation needed
00072   #endif
00073   CreateZipProcess(GetCmd(FNm), FNm);
00074   Bf = new char[MxBfL]; BfC = BfL=-1;
00075   FillBf();
00076 }
00077 
00078 TZipIn::TZipIn(const TStr& FNm, bool& OpenedP) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
00079   FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
00080   EAssertR(! FNm.Empty(), "Empty file-name.");
00081   FLen = TZipIn::GetFLen(FNm);
00082   OpenedP = TFile::Exists(FNm);
00083   if (OpenedP) {
00084     #ifdef GLib_WIN
00085     SECURITY_ATTRIBUTES saAttr;
00086     saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00087     saAttr.bInheritHandle = TRUE;
00088     saAttr.lpSecurityDescriptor = NULL;
00089     // Create a pipe for the child process's STDOUT.
00090     EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, 0), "Stdout pipe creation failed");
00091     // Ensure the read handle to the pipe for STDOUT is not inherited.
00092     SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00093     #else
00094     // no implementation needed
00095     #endif
00096     CreateZipProcess(GetCmd(FNm.GetFExt()), FNm);
00097     Bf = new char[MxBfL]; BfC = BfL=-1;
00098     FillBf();
00099   }
00100 }
00101 
00102 PSIn TZipIn::New(const TStr& FNm) {
00103   return PSIn(new TZipIn(FNm));
00104 }
00105 
00106 PSIn TZipIn::New(const TStr& FNm, bool& OpenedP){
00107   return PSIn(new TZipIn(FNm, OpenedP));
00108 }
00109 
00110 TZipIn::~TZipIn(){
00111   #ifdef GLib_WIN
00112   if (ZipStdoutRd != NULL) {
00113     EAssertR(CloseHandle(ZipStdoutRd), "Closing read-end of pipe failed"); }
00114   if (ZipStdoutWr != NULL) {
00115     EAssertR(CloseHandle(ZipStdoutWr)!=0, "Closing write-end of pipe failed"); }
00116   #else
00117   if (ZipStdoutRd != NULL) {
00118     EAssertR(pclose(ZipStdoutRd) != -1, "Closing of the process failed"); }
00119   #endif
00120   if (Bf != NULL) { delete[] Bf; }
00121 }
00122 
00123 int TZipIn::GetBf(const void* LBf, const TSize& LBfL){
00124   int LBfS=0;
00125   if (TSize(BfC+LBfL)>TSize(BfL)){
00126     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00127       if (BfC==BfL){FillBf();}
00128       LBfS+=((char*)LBf)[LBfC]=Bf[BfC++];}
00129   } else {
00130     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00131       LBfS+=(((char*)LBf)[LBfC]=Bf[BfC++]);}
00132   }
00133   return LBfS;
00134 }
00135 
00136 bool TZipIn::IsZipExt(const TStr& FNmExt) {
00137   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00138   return FExtToCmdH.IsKey(FNmExt);
00139 }
00140 
00141 void TZipIn::FillFExtToCmdH() {
00142   // 7za decompress: "e -y -bd -so";
00143   #ifdef GLib_WIN
00144   const char* ZipCmd = "7z.exe e -y -bd -so";
00145   #else
00146   const char* ZipCmd = "7za e -y -bd -so";
00147   #endif
00148   if (FExtToCmdH.Empty()) {
00149     FExtToCmdH.AddDat(".gz",  ZipCmd);
00150     FExtToCmdH.AddDat(".7z",  ZipCmd);
00151     FExtToCmdH.AddDat(".rar", ZipCmd);
00152     FExtToCmdH.AddDat(".zip", ZipCmd);
00153     FExtToCmdH.AddDat(".cab", ZipCmd);
00154     FExtToCmdH.AddDat(".arj", ZipCmd);
00155     FExtToCmdH.AddDat(".bzip2", ZipCmd);
00156     FExtToCmdH.AddDat(".bz2", ZipCmd);
00157   }
00158 }
00159 
00160 TStr TZipIn::GetCmd(const TStr& ZipFNm) {
00161   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00162   const TStr Ext = ZipFNm.GetFExt().GetLc();
00163   EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
00164   return FExtToCmdH.GetDat(Ext);
00165 }
00166 
00167 uint64 TZipIn::GetFLen(const TStr& ZipFNm) {
00168   #ifdef GLib_WIN
00169   HANDLE ZipStdoutRd, ZipStdoutWr;
00170   // create pipes
00171   SECURITY_ATTRIBUTES saAttr;
00172   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00173   saAttr.bInheritHandle = TRUE;
00174   saAttr.lpSecurityDescriptor = NULL;
00175     // Create a pipe for the child process's STDOUT.
00176   const int PipeBufferSz = 32*1024;
00177   EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
00178   // Ensure the read handle to the pipe for STDOUT is not inherited.
00179   SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00180   //CreateZipProcess(GetCmd(FNm), FNm);
00181   { const TStr CmdLine = TStr::Fmt("7z.exe l %s", ZipFNm.CStr());
00182   PROCESS_INFORMATION piProcInfo;
00183   STARTUPINFO siStartInfo;
00184   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00185   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00186   siStartInfo.cb = sizeof(STARTUPINFO);
00187   siStartInfo.hStdOutput = ZipStdoutWr;
00188   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00189   // Create the child process.
00190   const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(),
00191     NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo);
00192   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00193   CloseHandle(piProcInfo.hProcess);
00194   CloseHandle(piProcInfo.hThread); }
00195   #else
00196   const TStr CmdLine = TStr::Fmt("7za l %s", ZipFNm.CStr());
00197   FILE* ZipStdoutRd = popen(CmdLine.CStr(), "r");
00198   EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00199   #endif
00200   // Read output from the child process
00201   const int BfSz = 32*1024;
00202   char* Bf = new char [BfSz];
00203   int BfC=0, BfL=0;
00204   memset(Bf, 0, BfSz);
00205   #ifdef GLib_WIN
00206   DWORD BytesRead;
00207   EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
00208   #else
00209   size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
00210   EAssert(BytesRead != 0);
00211   EAssert(pclose(ZipStdoutRd) != -1);
00212   #endif
00213   BfL = (int) BytesRead;  IAssert((BfC!=0)||(BfL!=0));
00214   BfC = 0; Bf[BfL] = 0;
00215   // find file lenght
00216   TStr Str(Bf);  delete [] Bf;
00217   TStrV StrV; Str.SplitOnWs(StrV);
00218   int n = StrV.Len()-1;
00219   while (n > 0 && ! StrV[n].IsPrefix("-----")) { n--; }
00220   if (n-7 <= 0) {
00221     WrNotify(TStr::Fmt("Corrupt file %s: MESSAGE:\n", ZipFNm.CStr()).CStr(), Str.CStr());
00222     SaveToErrLog(TStr::Fmt("Corrupt file %s. Message:\n:%s\n", ZipFNm.CStr(), Str.CStr()).CStr());
00223     return 0;
00224   }
00225   return StrV[n-7].GetInt64();
00226 }
00227 
00229 // Output-File
00230 TStrStrH TZipOut::FExtToCmdH;
00231 const TSize TZipOut::MxBfL=4*1024;
00232 
00233 void TZipOut::FlushBf() {
00234   #ifdef GLib_WIN
00235   DWORD BytesOut;
00236   EAssertR(WriteFile(ZipStdinWr, Bf, DWORD(BfL), &BytesOut, NULL)!=0, "Error writting to the file '"+GetSNm()+"'.");
00237   #else
00238   size_t BytesOut = fwrite(Bf, 1, BfL, ZipStdinWr);
00239   #endif
00240   EAssert(BytesOut == BfL);
00241   BfL = 0;
00242 }
00243 
00244 void TZipOut::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) {
00245   const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr());
00246   #ifdef GLib_WIN
00247   PROCESS_INFORMATION piProcInfo;
00248   STARTUPINFO siStartInfo;
00249   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00250   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00251   siStartInfo.cb = sizeof(STARTUPINFO);
00252   siStartInfo.hStdInput = ZipStdinRd;
00253   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00254   // Create the child process.
00255   const BOOL FuncRetn = CreateProcess(NULL,
00256     (LPSTR) CmdLine.CStr(),  // command line
00257     NULL,          // process security attributes
00258     NULL,          // primary thread security attributes
00259     TRUE,          // handles are inherited
00260     0,             // creation flags
00261     NULL,          // use parent's environment
00262     NULL,          // use parent's current directory
00263     &siStartInfo,  // STARTUPINFO pointer
00264     &piProcInfo);  // receives PROCESS_INFORMATION
00265   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00266   CloseHandle(piProcInfo.hProcess);
00267   CloseHandle(piProcInfo.hThread);
00268   #else
00269   ZipStdinWr = popen(CmdLine.CStr(),"w");
00270   EAssertR(ZipStdinWr != NULL,  TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00271   #endif
00272 }
00273 
00274 TZipOut::TZipOut(const TStr& FNm) : TSBase(FNm.CStr()), TSOut(FNm), ZipStdinRd(NULL), ZipStdinWr(NULL), Bf(NULL), BfL(0){
00275   EAssertR(! FNm.Empty(), "Empty file-name.");
00276   #ifdef GLib_WIN
00277   // create pipes
00278   SECURITY_ATTRIBUTES saAttr;
00279   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00280   saAttr.bInheritHandle = TRUE;
00281   saAttr.lpSecurityDescriptor = NULL;
00282   // Create a pipe for the child process's STDOUT.
00283   EAssertR(CreatePipe(&ZipStdinRd, &ZipStdinWr, &saAttr, 0), "Stdout pipe creation failed");
00284   // Ensure the read handle to the pipe for STDOUT is not inherited.
00285   SetHandleInformation(ZipStdinWr, HANDLE_FLAG_INHERIT, 0);
00286   #else
00287   // no implementation necessary
00288   #endif
00289   CreateZipProcess(GetCmd(FNm), FNm);
00290   Bf=new char[MxBfL];  BfL=0;
00291 }
00292 
00293 PSOut TZipOut::New(const TStr& FNm){
00294   return PSOut(new TZipOut(FNm));
00295 }
00296 
00297 TZipOut::~TZipOut() {
00298   if (BfL!=0) { FlushBf(); }
00299   #ifdef GLib_WIN
00300   if (ZipStdinWr != NULL) { EAssertR(CloseHandle(ZipStdinWr), "Closing write-end of pipe failed"); }
00301   if (ZipStdinRd != NULL) { EAssertR(CloseHandle(ZipStdinRd), "Closing read-end of pipe failed"); }
00302   #else
00303   if (ZipStdinWr != NULL) { EAssertR(pclose(ZipStdinWr) != -1, "Closing of the process failed"); }
00304   #endif
00305   if (Bf!=NULL) { delete[] Bf; }
00306 }
00307 
00308 int TZipOut::PutCh(const char& Ch){
00309   if (BfL==MxBfL) {FlushBf();}
00310   return Bf[BfL++]=Ch;
00311 }
00312 
00313 int TZipOut::PutBf(const void* LBf, const TSize& LBfL){
00314   int LBfS=0;
00315   if (BfL+LBfL>MxBfL){
00316     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00317       LBfS+=PutCh(((char*)LBf)[LBfC]);}
00318   } else {
00319     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00320       LBfS+=(Bf[BfL++]=((char*)LBf)[LBfC]);}
00321   }
00322   return LBfS;
00323 }
00324 
00325 void TZipOut::Flush(){
00326   FlushBf();
00327   #ifdef GLib_WIN
00328   EAssertR(FlushFileBuffers(ZipStdinWr)!=0, "Can not flush file '"+GetSNm()+"'.");
00329   #else
00330   EAssertR(fflush(ZipStdinWr)==0, "Can not flush file '"+GetSNm()+"'.");
00331   #endif
00332 }
00333 
00334 bool TZipOut::IsZipExt(const TStr& FNmExt) {
00335   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00336   return FExtToCmdH.IsKey(FNmExt);
00337 }
00338 
00339 void TZipOut::FillFExtToCmdH() {
00340    // 7za compress: "a -y -bd -si{CompressedFNm}"
00341   #ifdef GLib_WIN
00342   const char* ZipCmd = "7z.exe a -y -bd -si";
00343   #else
00344   const char* ZipCmd = "7za a -y -bd -si";
00345   #endif
00346   if (FExtToCmdH.Empty()) {
00347     FExtToCmdH.AddDat(".gz",  ZipCmd);
00348     FExtToCmdH.AddDat(".7z",  ZipCmd);
00349     FExtToCmdH.AddDat(".rar", ZipCmd);
00350     FExtToCmdH.AddDat(".zip", ZipCmd);
00351     FExtToCmdH.AddDat(".cab", ZipCmd);
00352     FExtToCmdH.AddDat(".arj", ZipCmd);
00353     FExtToCmdH.AddDat(".bzip2", ZipCmd);
00354     FExtToCmdH.AddDat(".bz2", ZipCmd);
00355   }
00356 }
00357 
00358 TStr TZipOut::GetCmd(const TStr& ZipFNm) {
00359   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00360   const TStr Ext = ZipFNm.GetFExt().GetLc();
00361   EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
00362   return FExtToCmdH.GetDat(Ext)+ZipFNm.GetFMid();
00363 }