modules/libjar/nsZipArchive.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #ifndef nsZipArchive_h_
     7 #define nsZipArchive_h_
     9 #include "mozilla/Attributes.h"
    11 #define ZIP_TABSIZE   256
    12 #define ZIP_BUFLEN    (4*1024)      /* Used as output buffer when deflating items to a file */
    14 #include "plarena.h"
    15 #include "zlib.h"
    16 #include "zipstruct.h"
    17 #include "nsAutoPtr.h"
    18 #include "nsIFile.h"
    19 #include "nsISupportsImpl.h" // For mozilla::ThreadSafeAutoRefCnt
    20 #include "mozilla/FileUtils.h"
    21 #include "mozilla/FileLocation.h"
    23 #if defined(XP_WIN) && defined(_MSC_VER)
    24 #define MOZ_WIN_MEM_TRY_BEGIN __try {
    25 #define MOZ_WIN_MEM_TRY_CATCH(cmd) }                                \
    26   __except(GetExceptionCode()==EXCEPTION_IN_PAGE_ERROR ?            \
    27            EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH)   \
    28   {                                                                 \
    29     NS_WARNING("EXCEPTION_IN_PAGE_ERROR in " __FUNCTION__);         \
    30     cmd;                                                            \
    31   }
    32 #else
    33 #define MOZ_WIN_MEM_TRY_BEGIN {
    34 #define MOZ_WIN_MEM_TRY_CATCH(cmd) }
    35 #endif
    37 class nsZipFind;
    38 struct PRFileDesc;
    40 /**
    41  * This file defines some of the basic structures used by libjar to
    42  * read Zip files. It makes use of zlib in order to do the decompression.
    43  *
    44  * A few notes on the classes/structs:
    45  * nsZipArchive   represents a single Zip file, and maintains an index
    46  *                of all the items in the file.
    47  * nsZipItem      represents a single item (file) in the Zip archive.
    48  * nsZipFind      represents the metadata involved in doing a search,
    49  *                and current state of the iteration of found objects.
    50  * 'MT''safe' reading from the zipfile is performed through JARInputStream,
    51  * which maintains its own file descriptor, allowing for multiple reads 
    52  * concurrently from the same zip file.
    53  */
    55 /**
    56  * nsZipItem -- a helper struct for nsZipArchive
    57  *
    58  * each nsZipItem represents one file in the archive and all the
    59  * information needed to manipulate it.
    60  */
    61 class nsZipItem
    62 {
    63 public:
    64   const char* Name() { return ((const char*)central) + ZIPCENTRAL_SIZE; }
    66   uint32_t LocalOffset();
    67   uint32_t Size();
    68   uint32_t RealSize();
    69   uint32_t CRC32();
    70   uint16_t Date();
    71   uint16_t Time();
    72   uint16_t Compression();
    73   bool     IsDirectory();
    74   uint16_t Mode();
    75   const uint8_t* GetExtraField(uint16_t aTag, uint16_t *aBlockSize);
    76   PRTime   LastModTime();
    78 #ifdef XP_UNIX
    79   bool     IsSymlink();
    80 #endif
    82   nsZipItem*         next;
    83   const ZipCentral*  central;
    84   uint16_t           nameLength;
    85   bool               isSynthetic;
    86 };
    88 class nsZipHandle;
    90 /** 
    91  * nsZipArchive -- a class for reading the PKZIP file format.
    92  *
    93  */
    94 class nsZipArchive 
    95 {
    96   friend class nsZipFind;
    98 public:
    99   /** constructing does not open the archive. See OpenArchive() */
   100   nsZipArchive();
   102   /** destructing the object closes the archive */
   103   ~nsZipArchive();
   105   /** 
   106    * OpenArchive 
   107    * 
   108    * It's an error to call this more than once on the same nsZipArchive
   109    * object. If we were allowed to use exceptions this would have been 
   110    * part of the constructor 
   111    *
   112    * @param   aZipHandle  The nsZipHandle used to access the zip
   113    * @param   aFd         Optional PRFileDesc for Windows readahead optimization
   114    * @return  status code
   115    */
   116   nsresult OpenArchive(nsZipHandle *aZipHandle, PRFileDesc *aFd = nullptr);
   118   /** 
   119    * OpenArchive 
   120    * 
   121    * Convenience function that generates nsZipHandle
   122    *
   123    * @param   aFile  The file used to access the zip
   124    * @return  status code
   125    */
   126   nsresult OpenArchive(nsIFile *aFile);
   128   /**
   129    * Test the integrity of items in this archive by running
   130    * a CRC check after extracting each item into a memory 
   131    * buffer.  If an entry name is supplied only the 
   132    * specified item is tested.  Else, if null is supplied
   133    * then all the items in the archive are tested.
   134    *
   135    * @return  status code       
   136    */
   137   nsresult Test(const char *aEntryName);
   139   /**
   140    * Closes an open archive.
   141    */
   142   nsresult CloseArchive();
   144   /** 
   145    * GetItem
   146    * @param   aEntryName Name of file in the archive
   147    * @return  pointer to nsZipItem
   148    */  
   149   nsZipItem* GetItem(const char * aEntryName);
   151   /** 
   152    * ExtractFile
   153    *
   154    * @param   zipEntry   Name of file in archive to extract
   155    * @param   outFD      Filedescriptor to write contents to
   156    * @param   outname    Name of file to write to
   157    * @return  status code
   158    */
   159   nsresult ExtractFile(nsZipItem * zipEntry, const char *outname, PRFileDesc * outFD);
   161   /**
   162    * FindInit
   163    *
   164    * Initializes a search for files in the archive. FindNext() returns
   165    * the actual matches. The nsZipFind must be deleted when you're done
   166    *
   167    * @param   aPattern    a string or RegExp pattern to search for
   168    *                      (may be nullptr to find all files in archive)
   169    * @param   aFind       a pointer to a pointer to a structure used
   170    *                      in FindNext.  In the case of an error this
   171    *                      will be set to nullptr.
   172    * @return  status code
   173    */
   174   nsresult FindInit(const char * aPattern, nsZipFind** aFind);
   176   /*
   177    * Gets an undependent handle to the mapped file.
   178    */
   179   nsZipHandle* GetFD();
   181   /**
   182    * Get pointer to the data of the item.
   183    * @param   aItem       Pointer to nsZipItem
   184    * reutrns null when zip file is corrupt.
   185    */
   186   const uint8_t* GetData(nsZipItem* aItem);
   188   bool GetComment(nsACString &aComment);
   190   /**
   191    * Gets the amount of memory taken up by the archive's mapping.
   192    * @return the size
   193    */
   194   int64_t SizeOfMapping();
   196   /*
   197    * Refcounting
   198    */
   199   NS_METHOD_(MozExternalRefCountType) AddRef(void);
   200   NS_METHOD_(MozExternalRefCountType) Release(void);
   202 private:
   203   //--- private members ---
   204   mozilla::ThreadSafeAutoRefCnt mRefCnt; /* ref count */
   205   NS_DECL_OWNINGTHREAD
   207   nsZipItem*    mFiles[ZIP_TABSIZE];
   208   PLArenaPool   mArena;
   210   const char*   mCommentPtr;
   211   uint16_t      mCommentLen;
   213   // Whether we synthesized the directory entries
   214   bool          mBuiltSynthetics;
   216   // file handle
   217   nsRefPtr<nsZipHandle> mFd;
   219   // file URI, for logging
   220   nsCString mURI;
   222 private:
   223   //--- private methods ---
   224   nsZipItem*        CreateZipItem();
   225   nsresult          BuildFileList(PRFileDesc *aFd = nullptr);
   226   nsresult          BuildSynthetics();
   228   nsZipArchive& operator=(const nsZipArchive& rhs) MOZ_DELETE;
   229   nsZipArchive(const nsZipArchive& rhs) MOZ_DELETE;
   230 };
   232 /** 
   233  * nsZipFind 
   234  *
   235  * a helper class for nsZipArchive, representing a search
   236  */
   237 class nsZipFind
   238 {
   239 public:
   240   nsZipFind(nsZipArchive* aZip, char* aPattern, bool regExp);
   241   ~nsZipFind();
   243   nsresult      FindNext(const char** aResult, uint16_t* aNameLen);
   245 private:
   246   nsRefPtr<nsZipArchive> mArchive;
   247   char*         mPattern;
   248   nsZipItem*    mItem;
   249   uint16_t      mSlot;
   250   bool          mRegExp;
   252   nsZipFind& operator=(const nsZipFind& rhs) MOZ_DELETE;
   253   nsZipFind(const nsZipFind& rhs) MOZ_DELETE;
   254 };
   256 /** 
   257  * nsZipCursor -- a low-level class for reading the individual items in a zip.
   258  */
   259 class nsZipCursor {
   260 public:
   261   /**
   262    * Initializes the cursor
   263    *
   264    * @param   aItem       Item of interest
   265    * @param   aZip        Archive
   266    * @param   aBuf        Buffer used for decompression.
   267    *                      This determines the maximum Read() size in the compressed case.
   268    * @param   aBufSize    Buffer size
   269    * @param   doCRC       When set to true Read() will check crc
   270    */
   271   nsZipCursor(nsZipItem *aItem, nsZipArchive *aZip, uint8_t* aBuf = nullptr, uint32_t aBufSize = 0, bool doCRC = false);
   273   ~nsZipCursor();
   275   /**
   276    * Performs reads. In the compressed case it uses aBuf(passed in constructor), for stored files
   277    * it returns a zero-copy buffer.
   278    *
   279    * @param   aBytesRead  Outparam for number of bytes read.
   280    * @return  data read or nullptr if item is corrupted.
   281    */
   282   uint8_t* Read(uint32_t *aBytesRead) {
   283     return ReadOrCopy(aBytesRead, false);
   284   }
   286   /**
   287    * Performs a copy. It always uses aBuf(passed in constructor).
   288    *
   289    * @param   aBytesRead  Outparam for number of bytes read.
   290    * @return  data read or nullptr if item is corrupted.
   291    */
   292   uint8_t* Copy(uint32_t *aBytesRead) {
   293     return ReadOrCopy(aBytesRead, true);
   294   }
   296 private:
   297   /* Actual implementation for both Read and Copy above */
   298   uint8_t* ReadOrCopy(uint32_t *aBytesRead, bool aCopy);
   300   nsZipItem *mItem; 
   301   uint8_t  *mBuf; 
   302   uint32_t  mBufSize; 
   303   z_stream  mZs;
   304   uint32_t mCRC;
   305   bool mDoCRC;
   306 };
   308 /**
   309  * nsZipItemPtr - a RAII convenience class for reading the individual items in a zip.
   310  * It reads whole files and does zero-copy IO for stored files. A buffer is allocated
   311  * for decompression.
   312  * Do not use when the file may be very large.
   313  */
   314 class nsZipItemPtr_base {
   315 public:
   316   /**
   317    * Initializes the reader
   318    *
   319    * @param   aZip        Archive
   320    * @param   aEntryName  Archive membername
   321    * @param   doCRC       When set to true Read() will check crc
   322    */
   323   nsZipItemPtr_base(nsZipArchive *aZip, const char *aEntryName, bool doCRC);
   325   uint32_t Length() const {
   326     return mReadlen;
   327   }
   329 protected:
   330   nsRefPtr<nsZipHandle> mZipHandle;
   331   nsAutoArrayPtr<uint8_t> mAutoBuf;
   332   uint8_t *mReturnBuf;
   333   uint32_t mReadlen;
   334 };
   336 template <class T>
   337 class nsZipItemPtr : public nsZipItemPtr_base {
   338 public:
   339   nsZipItemPtr(nsZipArchive *aZip, const char *aEntryName, bool doCRC = false) : nsZipItemPtr_base(aZip, aEntryName, doCRC) { }
   340   /**
   341    * @return buffer containing the whole zip member or nullptr on error.
   342    * The returned buffer is owned by nsZipItemReader.
   343    */
   344   const T* Buffer() const {
   345     return (const T*)mReturnBuf;
   346   }
   348   operator const T*() const {
   349     return Buffer();
   350   }
   352   /**
   353    * Relinquish ownership of zip member if compressed.
   354    * Copy member into a new buffer if uncompressed.
   355    * @return a buffer with whole zip member. It is caller's responsibility to free() it.
   356    */
   357   T* Forget() {
   358     if (!mReturnBuf)
   359       return nullptr;
   360     // In uncompressed mmap case, give up buffer
   361     if (mAutoBuf.get() == mReturnBuf) {
   362       mReturnBuf = nullptr;
   363       return (T*) mAutoBuf.forget();
   364     }
   365     T *ret = (T*) malloc(Length());
   366     memcpy(ret, mReturnBuf, Length());
   367     mReturnBuf = nullptr;
   368     return ret;
   369   }
   370 };
   372 class nsZipHandle {
   373 friend class nsZipArchive;
   374 friend class mozilla::FileLocation;
   375 public:
   376   static nsresult Init(nsIFile *file, nsZipHandle **ret,
   377                        PRFileDesc **aFd = nullptr);
   378   static nsresult Init(nsZipArchive *zip, const char *entry,
   379                        nsZipHandle **ret);
   381   NS_METHOD_(MozExternalRefCountType) AddRef(void);
   382   NS_METHOD_(MozExternalRefCountType) Release(void);
   384   int64_t SizeOfMapping();
   386 protected:
   387   const uint8_t * mFileData; /* pointer to mmaped file */
   388   uint32_t        mLen;      /* length of file and memory mapped area */
   389   mozilla::FileLocation mFile; /* source file if any, for logging */
   391 private:
   392   nsZipHandle();
   393   ~nsZipHandle();
   395   PRFileMap *                       mMap;    /* nspr datastructure for mmap */
   396   nsAutoPtr<nsZipItemPtr<uint8_t> > mBuf;
   397   mozilla::ThreadSafeAutoRefCnt     mRefCnt; /* ref count */
   398   NS_DECL_OWNINGTHREAD
   399 };
   401 nsresult gZlibInit(z_stream *zs);
   403 #endif /* nsZipArchive_h_ */

mercurial