SNAP Library, Developer Reference  2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
zipfl.cpp
Go to the documentation of this file.
00001 
00002 // ZIP Input-File
00003 TStrStrH TZipIn::FExtToCmdH;
00004 const int TZipIn::MxBfL=32*1024;
00005 
00006 void TZipIn::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) {
00007   const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr());
00008   #ifdef GLib_WIN
00009   PROCESS_INFORMATION piProcInfo;
00010   STARTUPINFO siStartInfo;
00011   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00012   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00013   siStartInfo.cb = sizeof(STARTUPINFO);
00014   siStartInfo.hStdOutput = ZipStdoutWr;
00015   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00016   // Create the child process.
00017   const BOOL FuncRetn = CreateProcess(NULL,
00018     (LPSTR) CmdLine.CStr(),  // command line
00019     NULL,          // process security attributes
00020     NULL,          // primary thread security attributes
00021     TRUE,          // handles are inherited
00022     0,             // creation flags
00023     NULL,          // use parent's environment
00024     NULL,          // use parent's current directory
00025     &siStartInfo,  // STARTUPINFO pointer
00026     &piProcInfo);  // receives PROCESS_INFORMATION
00027   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00028   CloseHandle(piProcInfo.hProcess);
00029   CloseHandle(piProcInfo.hThread);
00030   #else
00031   ZipStdoutRd = popen(CmdLine.CStr(), "r");
00032   EAssertR(ZipStdoutRd != NULL,  TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00033   #endif
00034 }
00035 
00036 void TZipIn::FillBf(){
00037   EAssertR(CurFPos < FLen, "End of file "+GetSNm()+" reached.");
00038   EAssertR((BfC==BfL)/*&&((BfL==-1)||(BfL==MxBfL))*/, "Error reading file '"+GetSNm()+"'.");
00039   #ifdef GLib_WIN
00040   // Read output from the child process
00041   DWORD BytesRead;
00042   EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
00043   #else
00044   size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
00045   EAssert(BytesRead != 0);
00046   #endif
00047   BfL = (int) BytesRead;
00048   CurFPos += BytesRead;
00049   EAssertR((BfC!=0)||(BfL!=0), "Error reading file '"+GetSNm()+"'.");
00050   BfC = 0;
00051 }
00052 
00053 TZipIn::TZipIn(const TStr& FNm) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
00054   FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
00055   EAssertR(! FNm.Empty(), "Empty file-name.");
00056   EAssertR(TFile::Exists(FNm), TStr::Fmt("File %s does not exist", FNm.CStr()).CStr());
00057   FLen = TZipIn::GetFLen(FNm);
00058   if (FLen == 0) { return; } // empty file
00059   #ifdef GLib_WIN
00060   // create pipes
00061   SECURITY_ATTRIBUTES saAttr;
00062   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00063   saAttr.bInheritHandle = TRUE;
00064   saAttr.lpSecurityDescriptor = NULL;
00065     // Create a pipe for the child process's STDOUT.
00066   const int PipeBufferSz = 32*1024;
00067   EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
00068   // Ensure the read handle to the pipe for STDOUT is not inherited.
00069   SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00070   #else
00071   // no implementation needed
00072   #endif
00073   CreateZipProcess(GetCmd(FNm), FNm);
00074   Bf = new char[MxBfL]; BfC = BfL=-1;
00075   FillBf();
00076 }
00077 
00078 TZipIn::TZipIn(const TStr& FNm, bool& OpenedP) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
00079   FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
00080   EAssertR(! FNm.Empty(), "Empty file-name.");
00081   FLen = TZipIn::GetFLen(FNm);
00082   OpenedP = TFile::Exists(FNm);
00083   if (OpenedP) {
00084     #ifdef GLib_WIN
00085     SECURITY_ATTRIBUTES saAttr;
00086     saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00087     saAttr.bInheritHandle = TRUE;
00088     saAttr.lpSecurityDescriptor = NULL;
00089     // Create a pipe for the child process's STDOUT.
00090     EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, 0), "Stdout pipe creation failed");
00091     // Ensure the read handle to the pipe for STDOUT is not inherited.
00092     SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00093     #else
00094     // no implementation needed
00095     #endif
00096     CreateZipProcess(GetCmd(FNm.GetFExt()), FNm);
00097     Bf = new char[MxBfL]; BfC = BfL=-1;
00098     FillBf();
00099   }
00100 }
00101 
00102 PSIn TZipIn::New(const TStr& FNm) {
00103   return PSIn(new TZipIn(FNm));
00104 }
00105 
00106 PSIn TZipIn::New(const TStr& FNm, bool& OpenedP){
00107   return PSIn(new TZipIn(FNm, OpenedP));
00108 }
00109 
00110 TZipIn::~TZipIn(){
00111   #ifdef GLib_WIN
00112   if (ZipStdoutRd != NULL) {
00113     EAssertR(CloseHandle(ZipStdoutRd), "Closing read-end of pipe failed"); }
00114   if (ZipStdoutWr != NULL) {
00115     EAssertR(CloseHandle(ZipStdoutWr)!=0, "Closing write-end of pipe failed"); }
00116   #else
00117   if (ZipStdoutRd != NULL) {
00118     EAssertR(pclose(ZipStdoutRd) != -1, "Closing of the process failed"); }
00119   #endif
00120   if (Bf != NULL) { delete[] Bf; }
00121 }
00122 
00123 int TZipIn::GetBf(const void* LBf, const TSize& LBfL){
00124   int LBfS=0;
00125   if (TSize(BfC+LBfL)>TSize(BfL)){
00126     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00127       if (BfC==BfL){FillBf();}
00128       LBfS+=((char*)LBf)[LBfC]=Bf[BfC++];}
00129   } else {
00130     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00131       LBfS+=(((char*)LBf)[LBfC]=Bf[BfC++]);}
00132   }
00133   return LBfS;
00134 }
00135 
00136 bool TZipIn::IsZipExt(const TStr& FNmExt) {
00137   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00138   return FExtToCmdH.IsKey(FNmExt);
00139 }
00140 
00141 void TZipIn::FillFExtToCmdH() {
00142   // 7za decompress: "e -y -bd -so";
00143   #ifdef GLib_WIN
00144   const char* ZipCmd = "7z.exe e -y -bd -so";
00145   #else
00146   const char* ZipCmd = "7za e -y -bd -so";
00147   #endif
00148   if (FExtToCmdH.Empty()) {
00149     FExtToCmdH.AddDat(".gz",  ZipCmd);
00150     FExtToCmdH.AddDat(".7z",  ZipCmd);
00151     FExtToCmdH.AddDat(".rar", ZipCmd);
00152     FExtToCmdH.AddDat(".zip", ZipCmd);
00153     FExtToCmdH.AddDat(".cab", ZipCmd);
00154     FExtToCmdH.AddDat(".arj", ZipCmd);
00155     FExtToCmdH.AddDat(".bzip2", ZipCmd);
00156     FExtToCmdH.AddDat(".bz2", ZipCmd);
00157   }
00158 }
00159 
00160 TStr TZipIn::GetCmd(const TStr& ZipFNm) {
00161   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00162   const TStr Ext = ZipFNm.GetFExt().GetLc();
00163   EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
00164   return FExtToCmdH.GetDat(Ext);
00165 }
00166 
00167 uint64 TZipIn::GetFLen(const TStr& ZipFNm) {
00168   #ifdef GLib_WIN
00169   HANDLE ZipStdoutRd, ZipStdoutWr;
00170   // create pipes
00171   SECURITY_ATTRIBUTES saAttr;
00172   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00173   saAttr.bInheritHandle = TRUE;
00174   saAttr.lpSecurityDescriptor = NULL;
00175     // Create a pipe for the child process's STDOUT.
00176   const int PipeBufferSz = 32*1024;
00177   EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
00178   // Ensure the read handle to the pipe for STDOUT is not inherited.
00179   SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
00180   //CreateZipProcess(GetCmd(FNm), FNm);
00181   { const TStr CmdLine = TStr::Fmt("7z.exe l %s", ZipFNm.CStr());
00182   PROCESS_INFORMATION piProcInfo;
00183   STARTUPINFO siStartInfo;
00184   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00185   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00186   siStartInfo.cb = sizeof(STARTUPINFO);
00187   siStartInfo.hStdOutput = ZipStdoutWr;
00188   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00189   // Create the child process.
00190   const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(),
00191     NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo);
00192   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00193   CloseHandle(piProcInfo.hProcess);
00194   CloseHandle(piProcInfo.hThread); }
00195   #else
00196   const TStr CmdLine = TStr::Fmt("7za l %s", ZipFNm.CStr());
00197   FILE* ZipStdoutRd = popen(CmdLine.CStr(), "r");
00198   EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00199   #endif
00200   // Read output from the child process
00201   const int BfSz = 32*1024;
00202   char* Bf = new char [BfSz];
00203   int BfC=0, BfL=0;
00204   memset(Bf, 0, BfSz);
00205   #ifdef GLib_WIN
00206   DWORD BytesRead;
00207   EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
00208   #else
00209   size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
00210   EAssert(BytesRead != 0);
00211   EAssert(pclose(ZipStdoutRd) != -1);
00212   #endif
00213   BfL = (int) BytesRead;  IAssert((BfC!=0)||(BfL!=0));
00214   BfC = 0; Bf[BfL] = 0;
00215   // find file lenght
00216   TStr Str(Bf);  delete [] Bf;
00217   TStrV StrV; Str.SplitOnWs(StrV);
00218   int n = StrV.Len()-1;
00219   while (n > 0 && ! StrV[n].IsPrefix("-----")) { n--; }
00220   if (n-7 <= 0) {
00221     WrNotify(TStr::Fmt("Corrupt file %s: MESSAGE:\n", ZipFNm.CStr()).CStr(), Str.CStr());
00222     SaveToErrLog(TStr::Fmt("Corrupt file %s. Message:\n:%s\n", ZipFNm.CStr(), Str.CStr()).CStr());
00223     return 0;
00224   }
00225   return StrV[n-7].GetInt64();
00226 }
00227 
00229 // Output-File
00230 TStrStrH TZipOut::FExtToCmdH;
00231 const TSize TZipOut::MxBfL=4*1024;
00232 
00233 void TZipOut::FlushBf() {
00234   #ifdef GLib_WIN
00235   DWORD BytesOut;
00236   EAssertR(WriteFile(ZipStdinWr, Bf, DWORD(BfL), &BytesOut, NULL)!=0, "Error writting to the file '"+GetSNm()+"'.");
00237   #else
00238   size_t BytesOut = fwrite(Bf, 1, BfL, ZipStdinWr);
00239   #endif
00240   EAssert(BytesOut == BfL);
00241   BfL = 0;
00242 }
00243 
00244 void TZipOut::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) {
00245   const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr());
00246   #ifdef GLib_WIN
00247   PROCESS_INFORMATION piProcInfo;
00248   STARTUPINFO siStartInfo;
00249   ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
00250   ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
00251   siStartInfo.cb = sizeof(STARTUPINFO);
00252   siStartInfo.hStdInput = ZipStdinRd;
00253   siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
00254   // Create the child process.
00255   const BOOL FuncRetn = CreateProcess(NULL,
00256     (LPSTR) CmdLine.CStr(),  // command line
00257     NULL,          // process security attributes
00258     NULL,          // primary thread security attributes
00259     TRUE,          // handles are inherited
00260     0,             // creation flags
00261     NULL,          // use parent's environment
00262     NULL,          // use parent's current directory
00263     &siStartInfo,  // STARTUPINFO pointer
00264     &piProcInfo);  // receives PROCESS_INFORMATION
00265   EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00266   CloseHandle(piProcInfo.hProcess);
00267   CloseHandle(piProcInfo.hThread);
00268   #else
00269   ZipStdinWr = popen(CmdLine.CStr(),"w");
00270   EAssertR(ZipStdinWr != NULL,  TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
00271   #endif
00272 }
00273 
00274 TZipOut::TZipOut(const TStr& FNm) : TSBase(FNm.CStr()), TSOut(FNm), ZipStdinRd(NULL), ZipStdinWr(NULL), Bf(NULL), BfL(0){
00275   EAssertR(! FNm.Empty(), "Empty file-name.");
00276   #ifdef GLib_WIN
00277   // create pipes
00278   SECURITY_ATTRIBUTES saAttr;
00279   saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
00280   saAttr.bInheritHandle = TRUE;
00281   saAttr.lpSecurityDescriptor = NULL;
00282   // Create a pipe for the child process's STDOUT.
00283   EAssertR(CreatePipe(&ZipStdinRd, &ZipStdinWr, &saAttr, 0), "Stdout pipe creation failed");
00284   // Ensure the read handle to the pipe for STDOUT is not inherited.
00285   SetHandleInformation(ZipStdinWr, HANDLE_FLAG_INHERIT, 0);
00286   #else
00287   // no implementation necessary
00288   #endif
00289   CreateZipProcess(GetCmd(FNm), FNm);
00290   Bf=new char[MxBfL];  BfL=0;
00291 }
00292 
00293 PSOut TZipOut::New(const TStr& FNm){
00294   return PSOut(new TZipOut(FNm));
00295 }
00296 
00297 TZipOut::~TZipOut() {
00298   if (BfL!=0) { FlushBf(); }
00299   #ifdef GLib_WIN
00300   if (ZipStdinWr != NULL) { EAssertR(CloseHandle(ZipStdinWr), "Closing write-end of pipe failed"); }
00301   if (ZipStdinRd != NULL) { EAssertR(CloseHandle(ZipStdinRd), "Closing read-end of pipe failed"); }
00302   #else
00303   if (ZipStdinWr != NULL) { EAssertR(pclose(ZipStdinWr) != -1, "Closing of the process failed"); }
00304   #endif
00305   if (Bf!=NULL) { delete[] Bf; }
00306 }
00307 
00308 int TZipOut::PutCh(const char& Ch){
00309   if (BfL==MxBfL) {FlushBf();}
00310   return Bf[BfL++]=Ch;
00311 }
00312 
00313 int TZipOut::PutBf(const void* LBf, const TSize& LBfL){
00314   int LBfS=0;
00315   if (BfL+LBfL>MxBfL){
00316     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00317       LBfS+=PutCh(((char*)LBf)[LBfC]);}
00318   } else {
00319     for (TSize LBfC=0; LBfC<LBfL; LBfC++){
00320       LBfS+=(Bf[BfL++]=((char*)LBf)[LBfC]);}
00321   }
00322   return LBfS;
00323 }
00324 
00325 void TZipOut::Flush(){
00326   FlushBf();
00327   #ifdef GLib_WIN
00328   EAssertR(FlushFileBuffers(ZipStdinWr)!=0, "Can not flush file '"+GetSNm()+"'.");
00329   #else
00330   EAssertR(fflush(ZipStdinWr)==0, "Can not flush file '"+GetSNm()+"'.");
00331   #endif
00332 }
00333 
00334 bool TZipOut::IsZipExt(const TStr& FNmExt) {
00335   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00336   return FExtToCmdH.IsKey(FNmExt);
00337 }
00338 
00339 void TZipOut::FillFExtToCmdH() {
00340    // 7za compress: "a -y -bd -si{CompressedFNm}"
00341   #ifdef GLib_WIN
00342   const char* ZipCmd = "7z.exe a -y -bd -si";
00343   #else
00344   const char* ZipCmd = "7za a -y -bd -si";
00345   #endif
00346   if (FExtToCmdH.Empty()) {
00347     FExtToCmdH.AddDat(".gz",  ZipCmd);
00348     FExtToCmdH.AddDat(".7z",  ZipCmd);
00349     FExtToCmdH.AddDat(".rar", ZipCmd);
00350     FExtToCmdH.AddDat(".zip", ZipCmd);
00351     FExtToCmdH.AddDat(".cab", ZipCmd);
00352     FExtToCmdH.AddDat(".arj", ZipCmd);
00353     FExtToCmdH.AddDat(".bzip2", ZipCmd);
00354     FExtToCmdH.AddDat(".bz2", ZipCmd);
00355   }
00356 }
00357 
00358 TStr TZipOut::GetCmd(const TStr& ZipFNm) {
00359   if (FExtToCmdH.Empty()) FillFExtToCmdH();
00360   const TStr Ext = ZipFNm.GetFExt().GetLc();
00361   EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
00362   return FExtToCmdH.GetDat(Ext)+ZipFNm.GetFMid();
00363 }