SNAP Library 2.0, User Reference  2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
TZipIn Class Reference

#include <zipfl.h>

Inherits TSIn.

List of all members.

Public Member Functions

 TZipIn (const TStr &FNm)
 TZipIn (const TStr &FNm, bool &OpenedP)
 ~TZipIn ()
bool Eof ()
int Len () const
char GetCh ()
char PeekCh ()
int GetBf (const void *LBf, const TSize &LBfL)
bool GetNextLnBf (TChA &LnChA)
uint64 GetFLen () const
uint64 GetCurFPos () const

Static Public Member Functions

static PSIn New (const TStr &FNm)
static PSIn New (const TStr &FNm, bool &OpenedP)
static bool IsZipFNm (const TStr &FNm)
 Check whether the file extension of FNm is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2).
static bool IsZipExt (const TStr &FNmExt)
 Check whether the file extension FNmExt is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2).
static TStr GetCmd (const TStr &ZipFNm)
 Return a command-line string that is executed in order to decompress a file to standard output.
static uint64 GetFLen (const TStr &ZipFNm)
 Return the uncompressed size (in bytes) of the compressed file ZipFNm.
static PSIn NewIfZip (const TStr &FNm)

Private Member Functions

void FillBf ()
int FindEol (int &BfN)
void CreateZipProcess (const TStr &Cmd, const TStr &ZipFNm)
 TZipIn ()
 TZipIn (const TZipIn &)
TZipInoperator= (const TZipIn &)

Static Private Member Functions

static void FillFExtToCmdH ()

Private Attributes

FILE * ZipStdoutRd
FILE * ZipStdoutWr
uint64 FLen
uint64 CurFPos
char * Bf
int BfC
int BfL

Static Private Attributes

static TStrStrH FExtToCmdH
static const int MxBfL = 32*1024

Detailed Description

Compressed File Input Stream. The class reads from a compressed file without explicitly uncompressing it. This is eachieved by running external 7ZIP program which uncompresses to standard output, which is then piped to TZipFl. The class requires 7ZIP to be installed on the machine. Go to http://www.7-zip.org to install the software. 7z (7z.exe) is an executable and can decompress the following formats: .gz, .7z, .rar, .zip, .cab, .arj. bzip2. The class TZIpIn expects that '7z' ('7z.exe') is in the working path. Make sure you can execute '7z e -y -bd -so <FILENAME>' Note: You can only load .gz files of uncompressed size <2GB. If you load some other format (like .bz2 or rar) there is no such limitation. Note2: For 7z to work properly you need both the 7z executable and the directory 'Codecs'.

Definition at line 12 of file zipfl.h.


Constructor & Destructor Documentation

TZipIn::TZipIn ( ) [private]
TZipIn::TZipIn ( const TZipIn ) [private]
TZipIn::TZipIn ( const TStr FNm)

Definition at line 53 of file zipfl.cpp.

                              : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
  FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
  EAssertR(! FNm.Empty(), "Empty file-name.");
  EAssertR(TFile::Exists(FNm), TStr::Fmt("File %s does not exist", FNm.CStr()).CStr());
  FLen = TZipIn::GetFLen(FNm);
  if (FLen == 0) { return; } // empty file
  #ifdef GLib_WIN
  // create pipes
  SECURITY_ATTRIBUTES saAttr;
  saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
  saAttr.bInheritHandle = TRUE;
  saAttr.lpSecurityDescriptor = NULL;
    // Create a pipe for the child process's STDOUT.
  const int PipeBufferSz = 32*1024;
  EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
  // Ensure the read handle to the pipe for STDOUT is not inherited.
  SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
  #else
  // no implementation needed
  #endif
  CreateZipProcess(GetCmd(FNm), FNm);
  Bf = new char[MxBfL]; BfC = BfL=-1;
  FillBf();
}
TZipIn::TZipIn ( const TStr FNm,
bool &  OpenedP 
)

Definition at line 78 of file zipfl.cpp.

                                             : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL),
  FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) {
  EAssertR(! FNm.Empty(), "Empty file-name.");
  FLen = TZipIn::GetFLen(FNm);
  OpenedP = TFile::Exists(FNm);
  if (OpenedP) {
    #ifdef GLib_WIN
    SECURITY_ATTRIBUTES saAttr;
    saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
    saAttr.bInheritHandle = TRUE;
    saAttr.lpSecurityDescriptor = NULL;
    // Create a pipe for the child process's STDOUT.
    EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, 0), "Stdout pipe creation failed");
    // Ensure the read handle to the pipe for STDOUT is not inherited.
    SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
    #else
    // no implementation needed
    #endif
    CreateZipProcess(GetCmd(FNm.GetFExt()), FNm);
    Bf = new char[MxBfL]; BfC = BfL=-1;
    FillBf();
  }
}

Definition at line 110 of file zipfl.cpp.

               {
  #ifdef GLib_WIN
  if (ZipStdoutRd != NULL) {
    EAssertR(CloseHandle(ZipStdoutRd), "Closing read-end of pipe failed"); }
  if (ZipStdoutWr != NULL) {
    EAssertR(CloseHandle(ZipStdoutWr)!=0, "Closing write-end of pipe failed"); }
  #else
  if (ZipStdoutRd != NULL) {
    EAssertR(pclose(ZipStdoutRd) != -1, "Closing of the process failed"); }
  #endif
  if (Bf != NULL) { delete[] Bf; }
}

Member Function Documentation

void TZipIn::CreateZipProcess ( const TStr Cmd,
const TStr ZipFNm 
) [private]

Definition at line 6 of file zipfl.cpp.

                                                                 {
  const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr());
  #ifdef GLib_WIN
  PROCESS_INFORMATION piProcInfo;
  STARTUPINFO siStartInfo;
  ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
  ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
  siStartInfo.cb = sizeof(STARTUPINFO);
  siStartInfo.hStdOutput = ZipStdoutWr;
  siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
  // Create the child process.
  const BOOL FuncRetn = CreateProcess(NULL,
    (LPSTR) CmdLine.CStr(),  // command line
    NULL,          // process security attributes
    NULL,          // primary thread security attributes
    TRUE,          // handles are inherited
    0,             // creation flags
    NULL,          // use parent's environment
    NULL,          // use parent's current directory
    &siStartInfo,  // STARTUPINFO pointer
    &piProcInfo);  // receives PROCESS_INFORMATION
  EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
  CloseHandle(piProcInfo.hProcess);
  CloseHandle(piProcInfo.hThread);
  #else
  ZipStdoutRd = popen(CmdLine.CStr(), "r");
  EAssertR(ZipStdoutRd != NULL,  TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
  #endif
}
bool TZipIn::Eof ( ) [inline, virtual]

Implements TSIn.

Definition at line 40 of file zipfl.h.

{ return CurFPos==FLen && BfC==BfL; }
void TZipIn::FillBf ( ) [private]

Definition at line 36 of file zipfl.cpp.

                   {
  EAssertR(CurFPos < FLen, "End of file "+GetSNm()+" reached.");
  EAssertR((BfC==BfL)/*&&((BfL==-1)||(BfL==MxBfL))*/, "Error reading file '"+GetSNm()+"'.");
  #ifdef GLib_WIN
  // Read output from the child process
  DWORD BytesRead;
  EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
  #else
  size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
  EAssert(BytesRead != 0);
  #endif
  BfL = (int) BytesRead;
  CurFPos += BytesRead;
  EAssertR((BfC!=0)||(BfL!=0), "Error reading file '"+GetSNm()+"'.");
  BfC = 0;
}
void TZipIn::FillFExtToCmdH ( ) [static, private]

Definition at line 208 of file zipfl.cpp.

                            {
  // 7za decompress: "e -y -bd -so";
  #ifdef GLib_WIN
  const char* ZipCmd = "7z.exe e -y -bd -so";
  #else
  const char* ZipCmd = "7za e -y -bd -so";
  #endif
  if (FExtToCmdH.Empty()) {
    FExtToCmdH.AddDat(".gz",  ZipCmd);
    FExtToCmdH.AddDat(".7z",  ZipCmd);
    FExtToCmdH.AddDat(".rar", ZipCmd);
    FExtToCmdH.AddDat(".zip", ZipCmd);
    FExtToCmdH.AddDat(".cab", ZipCmd);
    FExtToCmdH.AddDat(".arj", ZipCmd);
    FExtToCmdH.AddDat(".bzip2", ZipCmd);
    FExtToCmdH.AddDat(".bz2", ZipCmd);
  }
}
int TZipIn::FindEol ( int &  BfN) [private]

Definition at line 175 of file zipfl.cpp.

                            {
  char Ch;

  if (BfC >= BfL) {
    // check for eof, read more data
    if (Eof()) {
      return -1;
    }
    FillBf();
  }

  while (BfC < BfL) {
    Ch = Bf[BfC++];
    if (Ch=='\n') {
      BfN = BfC-1;
      return 1;
    }
    if (Ch=='\r' && Bf[BfC+1]=='\n') {
      BfC++;
      BfN = BfC-2;
      return 1;
    }
  }
  BfN = BfC;

  return 0;
}
int TZipIn::GetBf ( const void *  LBf,
const TSize LBfL 
) [virtual]

Implements TSIn.

Definition at line 123 of file zipfl.cpp.

                                                   {
  int LBfS=0;
  if (TSize(BfC+LBfL)>TSize(BfL)){
    for (TSize LBfC=0; LBfC<LBfL; LBfC++){
      if (BfC==BfL){FillBf();}
      LBfS+=((char*)LBf)[LBfC]=Bf[BfC++];}
  } else {
    for (TSize LBfC=0; LBfC<LBfL; LBfC++){
      LBfS+=(((char*)LBf)[LBfC]=Bf[BfC++]);}
  }
  return LBfS;
}
char TZipIn::GetCh ( ) [inline, virtual]

Implements TSIn.

Definition at line 42 of file zipfl.h.

{ if (BfC==BfL){FillBf();} return Bf[BfC++]; }
TStr TZipIn::GetCmd ( const TStr ZipFNm) [static]

Return a command-line string that is executed in order to decompress a file to standard output.

Definition at line 227 of file zipfl.cpp.

                                      {
  if (FExtToCmdH.Empty()) FillFExtToCmdH();
  const TStr Ext = ZipFNm.GetFExt().GetLc();
  EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
  return FExtToCmdH.GetDat(Ext);
}
uint64 TZipIn::GetCurFPos ( ) const [inline]

Definition at line 48 of file zipfl.h.

{ return CurFPos; }
uint64 TZipIn::GetFLen ( ) const [inline]

Definition at line 47 of file zipfl.h.

{ return FLen; }
uint64 TZipIn::GetFLen ( const TStr ZipFNm) [static]

Return the uncompressed size (in bytes) of the compressed file ZipFNm.

Definition at line 234 of file zipfl.cpp.

                                         {
  #ifdef GLib_WIN
  HANDLE ZipStdoutRd, ZipStdoutWr;
  // create pipes
  SECURITY_ATTRIBUTES saAttr;
  saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
  saAttr.bInheritHandle = TRUE;
  saAttr.lpSecurityDescriptor = NULL;
    // Create a pipe for the child process's STDOUT.
  const int PipeBufferSz = 32*1024;
  EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
  // Ensure the read handle to the pipe for STDOUT is not inherited.
  SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
  //CreateZipProcess(GetCmd(FNm), FNm);
  { const TStr CmdLine = TStr::Fmt("7z.exe l %s", ZipFNm.CStr());
  PROCESS_INFORMATION piProcInfo;
  STARTUPINFO siStartInfo;
  ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
  ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
  siStartInfo.cb = sizeof(STARTUPINFO);
  siStartInfo.hStdOutput = ZipStdoutWr;
  siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
  // Create the child process.
  const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(),
    NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo);
  EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
  CloseHandle(piProcInfo.hProcess);
  CloseHandle(piProcInfo.hThread); }
  #else
  const TStr CmdLine = TStr::Fmt("7za l %s", ZipFNm.CStr());
  FILE* ZipStdoutRd = popen(CmdLine.CStr(), "r");
  EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
  #endif
  // Read output from the child process
  const int BfSz = 32*1024;
  char* Bf = new char [BfSz];
  int BfC=0, BfL=0;
  memset(Bf, 0, BfSz);
  #ifdef GLib_WIN
  DWORD BytesRead;
  EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
  #else
  size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
  EAssert(BytesRead != 0);
  EAssert(pclose(ZipStdoutRd) != -1);
  #endif
  BfL = (int) BytesRead;  IAssert((BfC!=0)||(BfL!=0));
  BfC = 0; Bf[BfL] = 0;
  // find file lenght
  TStr Str(Bf);  delete [] Bf;
  TStrV StrV; Str.SplitOnWs(StrV);
  int n = StrV.Len()-1;
  while (n > 0 && ! StrV[n].IsPrefix("-----")) { n--; }
  if (n-7 <= 0) {
    WrNotify(TStr::Fmt("Corrupt file %s: MESSAGE:\n", ZipFNm.CStr()).CStr(), Str.CStr());
    SaveToErrLog(TStr::Fmt("Corrupt file %s. Message:\n:%s\n", ZipFNm.CStr(), Str.CStr()).CStr());
    return 0;
  }
  return StrV[n-7].GetInt64();
}
bool TZipIn::GetNextLnBf ( TChA LnChA) [virtual]

Implements TSIn.

Definition at line 140 of file zipfl.cpp.

                                    {
  int Status;
  int BfN;        // new pointer to the end of line
  int BfP;        // previous pointer to the line start

  LnChA.Clr();

  do {
    if (BfC >= BfL) {
      // reset the current pointer, FindEol() will read a new buffer
      BfP = 0;
    } else {
      BfP = BfC;
    }
    Status = FindEol(BfN);
    if (Status >= 0) {
      LnChA.AddBf(&Bf[BfP],BfN-BfP);
      if (Status == 1) {
        // got a complete line
        return true;
      }
    }
    // get more data, if the line is incomplete
  } while (Status == 0);

  // eof or the last line has no newline
  return !LnChA.Empty();
}
bool TZipIn::IsZipExt ( const TStr FNmExt) [static]

Check whether the file extension FNmExt is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2).

Definition at line 203 of file zipfl.cpp.

                                        {
  if (FExtToCmdH.Empty()) FillFExtToCmdH();
  return FExtToCmdH.IsKey(FNmExt);
}
static bool TZipIn::IsZipFNm ( const TStr FNm) [inline, static]

Check whether the file extension of FNm is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2).

Definition at line 51 of file zipfl.h.

{ return IsZipExt(FNm.GetFExt()); }
int TZipIn::Len ( ) const [inline, virtual]

Implements TSIn.

Definition at line 41 of file zipfl.h.

{ return int(FLen-CurFPos+BfL-BfC); }
PSIn TZipIn::New ( const TStr FNm) [static]

Definition at line 102 of file zipfl.cpp.

                                {
  return PSIn(new TZipIn(FNm));
}
PSIn TZipIn::New ( const TStr FNm,
bool &  OpenedP 
) [static]

Definition at line 106 of file zipfl.cpp.

                                              {
  return PSIn(new TZipIn(FNm, OpenedP));
}
static PSIn TZipIn::NewIfZip ( const TStr FNm) [inline, static]

Definition at line 58 of file zipfl.h.

{ return IsZipFNm(FNm) ? New(FNm) : TFIn::New(FNm); }
TZipIn& TZipIn::operator= ( const TZipIn ) [private]
char TZipIn::PeekCh ( ) [inline, virtual]

Implements TSIn.

Definition at line 43 of file zipfl.h.

{ if (BfC==BfL){FillBf();} return Bf[BfC]; }

Member Data Documentation

char* TZipIn::Bf [private]

Definition at line 22 of file zipfl.h.

int TZipIn::BfC [private]

Definition at line 23 of file zipfl.h.

int TZipIn::BfL [private]

Definition at line 23 of file zipfl.h.

Definition at line 21 of file zipfl.h.

TStrStrH TZipIn::FExtToCmdH [static, private]

Definition at line 14 of file zipfl.h.

uint64 TZipIn::FLen [private]

Definition at line 21 of file zipfl.h.

const int TZipIn::MxBfL = 32*1024 [static, private]

Definition at line 15 of file zipfl.h.

FILE* TZipIn::ZipStdoutRd [private]

Definition at line 19 of file zipfl.h.

FILE * TZipIn::ZipStdoutWr [private]

Definition at line 19 of file zipfl.h.


The documentation for this class was generated from the following files: