SNAP Library 2.1, User Reference  2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
zipfl.cpp
Go to the documentation of this file.
00001 
00002 // ZIP Input-File
00003 
00004 #if defined(GLib_WIN)
00005   TStr TZipIn::SevenZipPath = "C:\\7Zip";
00006 #elif defined(GLib_CYGWIN)
00007   TStr TZipIn::SevenZipPath = "/usr/bin";
00008 #elif defined(GLib_MACOSX) 
00009   TStr TZipIn::SevenZipPath = "/opt/local/bin";
00010 #else 
00011   TStr TZipIn::SevenZipPath = "/usr/bin";
00012 #endif
00013 
00014 
00015 TStrStrH TZipIn::FExtToCmdH;
00016 const int TZipIn::MxBfL=32*1024;
00017 
00018 void TZipIn::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) {
00019   const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr());
00020   #ifdef GLib_WIN
00021   PROCESS_INFORMATION piProcInfo;
00022   STARTUPINFO siStartInfo;
00023   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00024   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00025   siStartInfo.cb = sizeof(STARTUPINFO);
00026   siStartInfo.hStdOutput = ZipStdoutWr;
00027   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00028   // Create the child process.
00029   const BOOL FuncRetn = CreateProcess(NULL,
00030     (LPSTR) CmdLine.CStr(),  // command line
00031     NULL,          // process security attributes
00032     NULL,          // primary thread security attributes
00033     TRUE,          // handles are inherited
00034     0,             // creation flags
00035     NULL,          // use parent's environment
00036     NULL,          // use parent's current directory
00037     &siStartInfo,  // STARTUPINFO pointer
00038     &piProcInfo);  // receives PROCESS_INFORMATION
00039   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00040   CloseHandle(piProcInfo.hProcess);
00041   CloseHandle(piProcInfo.hThread);
00042   #else
00043   ZipStdoutRd = popen(CmdLine.CStr(), "r");
00044   if (ZipStdoutRd == 0) { // try using SevenZipPath
00045     ZipStdoutRd = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r");
00046   }
00047   EAssertR(ZipStdoutRd != NULL,  TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00048   #endif
00049 }
00050 
00051 void TZipIn::FillBf(){
00052   EAssertR(CurFPos < FLen, "End of file "+GetSNm()+" reached.");
00053   EAssertR((BfC==BfL)/*&&((BfL==-1)||(BfL==MxBfL))*/, "Error reading file '"+GetSNm()+"'.");
00054   #ifdef GLib_WIN
00055   // Read output from the child process
00056   DWORD BytesRead;
00057   EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
00058   #else
00059   size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
00060   EAssert(BytesRead != 0);
00061   #endif
00062   BfL = (int) BytesRead;
00063   CurFPos += BytesRead;
00064   EAssertR((BfC!=0)||(BfL!=0), "Error reading file '"+GetSNm()+"'.");
00065   BfC = 0;
00066 }
00067 
00068 TZipIn::TZipIn(const TStr& FNm) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
00069   FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
00070   EAssertR(! FNm.Empty(), "Empty file-name.");
00071   EAssertR(TFile::Exists(FNm), TStr::Fmt("File %s does not exist", FNm.CStr()).CStr());
00072   FLen = TZipIn::GetFLen(FNm);
00073   if (FLen == 0) { return; } // empty file
00074   #ifdef GLib_WIN
00075   // create pipes
00076   SECURITY_ATTRIBUTES saAttr;
00077   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00078   saAttr.bInheritHandle = TRUE;
00079   saAttr.lpSecurityDescriptor = NULL;
00080     // Create a pipe for the child process's STDOUT.
00081   const int PipeBufferSz = 32*1024;
00082   EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
00083   // Ensure the read handle to the pipe for STDOUT is not inherited.
00084   SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00085   #else
00086   // no implementation needed
00087   #endif
00088   CreateZipProcess(GetCmd(FNm), FNm);
00089   Bf = new char[MxBfL]; BfC = BfL=-1;
00090   FillBf();
00091 }
00092 
00093 TZipIn::TZipIn(const TStr& FNm, bool& OpenedP) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
00094   FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
00095   EAssertR(! FNm.Empty(), "Empty file-name.");
00096   FLen = TZipIn::GetFLen(FNm);
00097   OpenedP = TFile::Exists(FNm);
00098   if (OpenedP) {
00099     #ifdef GLib_WIN
00100     SECURITY_ATTRIBUTES saAttr;
00101     saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00102     saAttr.bInheritHandle = TRUE;
00103     saAttr.lpSecurityDescriptor = NULL;
00104     // Create a pipe for the child process's STDOUT.
00105     EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, 0), "Stdout pipe creation failed");
00106     // Ensure the read handle to the pipe for STDOUT is not inherited.
00107     SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00108     #else
00109     // no implementation needed
00110     #endif
00111     CreateZipProcess(GetCmd(FNm.GetFExt()), FNm);
00112     Bf = new char[MxBfL]; BfC = BfL=-1;
00113     FillBf();
00114   }
00115 }
00116 
00117 PSIn TZipIn::New(const TStr& FNm) {
00118   return PSIn(new TZipIn(FNm));
00119 }
00120 
00121 PSIn TZipIn::New(const TStr& FNm, bool& OpenedP){
00122   return PSIn(new TZipIn(FNm, OpenedP));
00123 }
00124 
00125 TZipIn::~TZipIn(){
00126   #ifdef GLib_WIN
00127   if (ZipStdoutRd != NULL) {
00128     EAssertR(CloseHandle(ZipStdoutRd), "Closing read-end of pipe failed"); }
00129   if (ZipStdoutWr != NULL) {
00130     EAssertR(CloseHandle(ZipStdoutWr)!=0, "Closing write-end of pipe failed"); }
00131   #else
00132   if (ZipStdoutRd != NULL) {
00133     EAssertR(pclose(ZipStdoutRd) != -1, "Closing of the process failed"); }
00134   #endif
00135   if (Bf != NULL) { delete[] Bf; }
00136 }
00137 
00138 int TZipIn::GetBf(const void* LBf, const TSize& LBfL){
00139   int LBfS=0;
00140   if (TSize(BfC+LBfL)>TSize(BfL)){
00141     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00142       if (BfC==BfL){FillBf();}
00143       LBfS+=((char*)LBf)[LBfC]=Bf[BfC++];}
00144   } else {
00145     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00146       LBfS+=(((char*)LBf)[LBfC]=Bf[BfC++]);}
00147   }
00148   return LBfS;
00149 }
00150 
00151 // Gets the next line to LnChA.
00152 // Returns true, if LnChA contains a valid line.
00153 // Returns false, if LnChA is empty, such as end of file was encountered.
00154 bool TZipIn::GetNextLnBf(TChA& LnChA) {
00155   int Status;
00156   int BfN;        // new pointer to the end of line
00157   int BfP;        // previous pointer to the line start
00158   LnChA.Clr();
00159   do {
00160     if (BfC >= BfL) { BfP = 0; } // reset the current pointer, FindEol() will read a new buffer
00161     else { BfP = BfC; }
00162     Status = FindEol(BfN);
00163     if (Status >= 0) {
00164       LnChA.AddBf(&Bf[BfP],BfN-BfP);
00165       if (Status == 1) { return true; } // got a complete line
00166     }
00167     // get more data, if the line is incomplete
00168   } while (Status == 0);
00169   // eof or the last line has no newline
00170   return !LnChA.Empty();
00171 }
00172 
00173 // Sets BfN to the end of line or end of buffer. Reads more data, if needed.
00174 // Returns 1, when an end of line was found, BfN is end of line.
00175 // Returns 0, when an end of line was not found and more data is required,
00176 //    BfN is end of buffer.
00177 // Returns -1, when an end of file was found, BfN is not defined.
00178 int TZipIn::FindEol(int& BfN) {
00179   char Ch;
00180   if (BfC >= BfL) { // check for eof, read more data
00181     if (Eof()) { return -1; }
00182     FillBf();
00183   }
00184   while (BfC < BfL) {
00185     Ch = Bf[BfC++];
00186     if (Ch=='\n') { BfN = BfC-1; return 1; }
00187     if (Ch=='\r' && Bf[BfC+1]=='\n') {
00188       BfC++;  BfN = BfC-2;  return 1; }
00189   }
00190   BfN = BfC;
00191   return 0;
00192 }
00193 
00194 bool TZipIn::IsZipExt(const TStr& FNmExt) {
00195   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00196   return FExtToCmdH.IsKey(FNmExt);
00197 }
00198 
00199 void TZipIn::FillFExtToCmdH() {
00200   // 7za decompress: "e -y -bd -so";
00201   #ifdef GLib_WIN
00202   const char* ZipCmd = "7z.exe e -y -bd -so";
00203   #else
00204   const char* ZipCmd = "7za e -y -bd -so";
00205   #endif
00206   if (FExtToCmdH.Empty()) {
00207     FExtToCmdH.AddDat(".gz",  ZipCmd);
00208     FExtToCmdH.AddDat(".7z",  ZipCmd);
00209     FExtToCmdH.AddDat(".rar", ZipCmd);
00210     FExtToCmdH.AddDat(".zip", ZipCmd);
00211     FExtToCmdH.AddDat(".cab", ZipCmd);
00212     FExtToCmdH.AddDat(".arj", ZipCmd);
00213     FExtToCmdH.AddDat(".bzip2", ZipCmd);
00214     FExtToCmdH.AddDat(".bz2", ZipCmd);
00215   }
00216 }
00217 
00218 TStr TZipIn::GetCmd(const TStr& ZipFNm) {
00219   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00220   const TStr Ext = ZipFNm.GetFExt().GetLc();
00221   EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
00222   return FExtToCmdH.GetDat(Ext);
00223 }
00224 
00225 uint64 TZipIn::GetFLen(const TStr& ZipFNm) {
00226   #ifdef GLib_WIN
00227   HANDLE ZipStdoutRd, ZipStdoutWr;
00228   // create pipes
00229   SECURITY_ATTRIBUTES saAttr;
00230   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00231   saAttr.bInheritHandle = TRUE;
00232   saAttr.lpSecurityDescriptor = NULL;
00233     // Create a pipe for the child process's STDOUT.
00234   const int PipeBufferSz = 32*1024;
00235   EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
00236   // Ensure the read handle to the pipe for STDOUT is not inherited.
00237   SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00238   //CreateZipProcess(GetCmd(FNm), FNm);
00239   { const TStr CmdLine = TStr::Fmt("7z.exe l %s", ZipFNm.CStr());
00240   PROCESS_INFORMATION piProcInfo;
00241   STARTUPINFO siStartInfo;
00242   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00243   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00244   siStartInfo.cb = sizeof(STARTUPINFO);
00245   siStartInfo.hStdOutput = ZipStdoutWr;
00246   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00247   // Create the child process.
00248   const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(),
00249     NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo);
00250   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00251   CloseHandle(piProcInfo.hProcess);
00252   CloseHandle(piProcInfo.hThread); }
00253   #else
00254   const TStr CmdLine = TStr::Fmt("7za l %s", ZipFNm.CStr());
00255   FILE* ZipStdoutRd = popen(CmdLine.CStr(), "r");
00256   if (ZipStdoutRd == NULL) { // try using SevenZipPath
00257     ZipStdoutRd = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r");
00258   }
00259   EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00260   #endif
00261   // Read output from the child process
00262   const int BfSz = 32*1024;
00263   char* Bf = new char [BfSz];
00264   int BfC=0, BfL=0;
00265   memset(Bf, 0, BfSz);
00266   #ifdef GLib_WIN
00267   DWORD BytesRead;
00268   EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
00269   #else
00270   size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
00271   EAssert(BytesRead != 0);
00272   EAssert(pclose(ZipStdoutRd) != -1);
00273   #endif
00274   BfL = (int) BytesRead;  IAssert((BfC!=0)||(BfL!=0));
00275   BfC = 0; Bf[BfL] = 0;
00276   // find file lenght
00277   TStr Str(Bf);  delete [] Bf;
00278   TStrV StrV; Str.SplitOnWs(StrV);
00279   int n = StrV.Len()-1;
00280   while (n > 0 && ! StrV[n].IsPrefix("-----")) { n--; }
00281   if (n-7 <= 0) {
00282     WrNotify(TStr::Fmt("Corrupt file %s: MESSAGE:\n", ZipFNm.CStr()).CStr(), Str.CStr());
00283     SaveToErrLog(TStr::Fmt("Corrupt file %s. Message:\n:%s\n", ZipFNm.CStr(), Str.CStr()).CStr());
00284     return 0;
00285   }
00286   return StrV[n-7].GetInt64();
00287 }
00288 
00290 // Output-File
00291 TStrStrH TZipOut::FExtToCmdH;
00292 const TSize TZipOut::MxBfL=4*1024;
00293 
00294 void TZipOut::FlushBf() {
00295   #ifdef GLib_WIN
00296   DWORD BytesOut;
00297   EAssertR(WriteFile(ZipStdinWr, Bf, DWORD(BfL), &BytesOut, NULL)!=0, "Error writting to the file '"+GetSNm()+"'.");
00298   #else
00299   size_t BytesOut = fwrite(Bf, 1, BfL, ZipStdinWr);
00300   #endif
00301   EAssert(BytesOut == BfL);
00302   BfL = 0;
00303 }
00304 
00305 void TZipOut::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) {
00306   const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr());
00307   #ifdef GLib_WIN
00308   PROCESS_INFORMATION piProcInfo;
00309   STARTUPINFO siStartInfo;
00310   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00311   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00312   siStartInfo.cb = sizeof(STARTUPINFO);
00313   siStartInfo.hStdInput = ZipStdinRd;
00314   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00315   // Create the child process.
00316   const BOOL FuncRetn = CreateProcess(NULL,
00317     (LPSTR) CmdLine.CStr(),  // command line
00318     NULL,          // process security attributes
00319     NULL,          // primary thread security attributes
00320     TRUE,          // handles are inherited
00321     0,             // creation flags
00322     NULL,          // use parent's environment
00323     NULL,          // use parent's current directory
00324     &siStartInfo,  // STARTUPINFO pointer
00325     &piProcInfo);  // receives PROCESS_INFORMATION
00326   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00327   CloseHandle(piProcInfo.hProcess);
00328   CloseHandle(piProcInfo.hThread);
00329   #else
00330   ZipStdinWr = popen(CmdLine.CStr(),"w");
00331   if (ZipStdinWr == NULL) { // try using SevenZipPath
00332     ZipStdinWr = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r");
00333   }
00334   EAssertR(ZipStdinWr != NULL,  TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00335   #endif
00336 }
00337 
00338 TZipOut::TZipOut(const TStr& FNm) : TSBase(FNm.CStr()), TSOut(FNm), ZipStdinRd(NULL), ZipStdinWr(NULL), Bf(NULL), BfL(0){
00339   EAssertR(! FNm.Empty(), "Empty file-name.");
00340   #ifdef GLib_WIN
00341   // create pipes
00342   SECURITY_ATTRIBUTES saAttr;
00343   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00344   saAttr.bInheritHandle = TRUE;
00345   saAttr.lpSecurityDescriptor = NULL;
00346   // Create a pipe for the child process's STDOUT.
00347   EAssertR(CreatePipe(&ZipStdinRd, &ZipStdinWr, &saAttr, 0), "Stdout pipe creation failed");
00348   // Ensure the read handle to the pipe for STDOUT is not inherited.
00349   SetHandleInformation(ZipStdinWr, HANDLE_FLAG_INHERIT, 0);
00350   #else
00351   // no implementation necessary
00352   #endif
00353   CreateZipProcess(GetCmd(FNm), FNm);
00354   Bf=new char[MxBfL];  BfL=0;
00355 }
00356 
00357 PSOut TZipOut::New(const TStr& FNm){
00358   return PSOut(new TZipOut(FNm));
00359 }
00360 
00361 TZipOut::~TZipOut() {
00362   if (BfL!=0) { FlushBf(); }
00363   #ifdef GLib_WIN
00364   if (ZipStdinWr != NULL) { EAssertR(CloseHandle(ZipStdinWr), "Closing write-end of pipe failed"); }
00365   if (ZipStdinRd != NULL) { EAssertR(CloseHandle(ZipStdinRd), "Closing read-end of pipe failed"); }
00366   #else
00367   if (ZipStdinWr != NULL) { EAssertR(pclose(ZipStdinWr) != -1, "Closing of the process failed"); }
00368   #endif
00369   if (Bf!=NULL) { delete[] Bf; }
00370 }
00371 
00372 int TZipOut::PutCh(const char& Ch){
00373   if (BfL==MxBfL) {FlushBf();}
00374   return Bf[BfL++]=Ch;
00375 }
00376 
00377 int TZipOut::PutBf(const void* LBf, const TSize& LBfL){
00378   int LBfS=0;
00379   if (BfL+LBfL>MxBfL){
00380     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00381       LBfS+=PutCh(((char*)LBf)[LBfC]);}
00382   } else {
00383     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00384       LBfS+=(Bf[BfL++]=((char*)LBf)[LBfC]);}
00385   }
00386   return LBfS;
00387 }
00388 
00389 void TZipOut::Flush(){
00390   FlushBf();
00391   #ifdef GLib_WIN
00392   EAssertR(FlushFileBuffers(ZipStdinWr)!=0, "Can not flush file '"+GetSNm()+"'.");
00393   #else
00394   EAssertR(fflush(ZipStdinWr)==0, "Can not flush file '"+GetSNm()+"'.");
00395   #endif
00396 }
00397 
00398 bool TZipOut::IsZipExt(const TStr& FNmExt) {
00399   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00400   return FExtToCmdH.IsKey(FNmExt);
00401 }
00402 
00403 void TZipOut::FillFExtToCmdH() {
00404    // 7za compress: "a -y -bd -si{CompressedFNm}"
00405   #ifdef GLib_WIN
00406   const char* ZipCmd = "7z.exe a -y -bd -si";
00407   #else
00408   const char* ZipCmd = "7za a -y -bd -si";
00409   #endif
00410   if (FExtToCmdH.Empty()) {
00411     FExtToCmdH.AddDat(".gz",  ZipCmd);
00412     FExtToCmdH.AddDat(".7z",  ZipCmd);
00413     FExtToCmdH.AddDat(".rar", ZipCmd);
00414     FExtToCmdH.AddDat(".zip", ZipCmd);
00415     FExtToCmdH.AddDat(".cab", ZipCmd);
00416     FExtToCmdH.AddDat(".arj", ZipCmd);
00417     FExtToCmdH.AddDat(".bzip2", ZipCmd);
00418     FExtToCmdH.AddDat(".bz2", ZipCmd);
00419   }
00420 }
00421 
00422 TStr TZipOut::GetCmd(const TStr& ZipFNm) {
00423   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00424   const TStr Ext = ZipFNm.GetFExt().GetLc();
00425   EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
00426   return FExtToCmdH.GetDat(Ext)+ZipFNm.GetFMid();
00427 }