michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef nsZipArchive_h_ michael@0: #define nsZipArchive_h_ michael@0: michael@0: #include "mozilla/Attributes.h" michael@0: michael@0: #define ZIP_TABSIZE 256 michael@0: #define ZIP_BUFLEN (4*1024) /* Used as output buffer when deflating items to a file */ michael@0: michael@0: #include "plarena.h" michael@0: #include "zlib.h" michael@0: #include "zipstruct.h" michael@0: #include "nsAutoPtr.h" michael@0: #include "nsIFile.h" michael@0: #include "nsISupportsImpl.h" // For mozilla::ThreadSafeAutoRefCnt michael@0: #include "mozilla/FileUtils.h" michael@0: #include "mozilla/FileLocation.h" michael@0: michael@0: #if defined(XP_WIN) && defined(_MSC_VER) michael@0: #define MOZ_WIN_MEM_TRY_BEGIN __try { michael@0: #define MOZ_WIN_MEM_TRY_CATCH(cmd) } \ michael@0: __except(GetExceptionCode()==EXCEPTION_IN_PAGE_ERROR ? \ michael@0: EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) \ michael@0: { \ michael@0: NS_WARNING("EXCEPTION_IN_PAGE_ERROR in " __FUNCTION__); \ michael@0: cmd; \ michael@0: } michael@0: #else michael@0: #define MOZ_WIN_MEM_TRY_BEGIN { michael@0: #define MOZ_WIN_MEM_TRY_CATCH(cmd) } michael@0: #endif michael@0: michael@0: class nsZipFind; michael@0: struct PRFileDesc; michael@0: michael@0: /** michael@0: * This file defines some of the basic structures used by libjar to michael@0: * read Zip files. It makes use of zlib in order to do the decompression. michael@0: * michael@0: * A few notes on the classes/structs: michael@0: * nsZipArchive represents a single Zip file, and maintains an index michael@0: * of all the items in the file. michael@0: * nsZipItem represents a single item (file) in the Zip archive. michael@0: * nsZipFind represents the metadata involved in doing a search, michael@0: * and current state of the iteration of found objects. michael@0: * 'MT''safe' reading from the zipfile is performed through JARInputStream, michael@0: * which maintains its own file descriptor, allowing for multiple reads michael@0: * concurrently from the same zip file. michael@0: */ michael@0: michael@0: /** michael@0: * nsZipItem -- a helper struct for nsZipArchive michael@0: * michael@0: * each nsZipItem represents one file in the archive and all the michael@0: * information needed to manipulate it. michael@0: */ michael@0: class nsZipItem michael@0: { michael@0: public: michael@0: const char* Name() { return ((const char*)central) + ZIPCENTRAL_SIZE; } michael@0: michael@0: uint32_t LocalOffset(); michael@0: uint32_t Size(); michael@0: uint32_t RealSize(); michael@0: uint32_t CRC32(); michael@0: uint16_t Date(); michael@0: uint16_t Time(); michael@0: uint16_t Compression(); michael@0: bool IsDirectory(); michael@0: uint16_t Mode(); michael@0: const uint8_t* GetExtraField(uint16_t aTag, uint16_t *aBlockSize); michael@0: PRTime LastModTime(); michael@0: michael@0: #ifdef XP_UNIX michael@0: bool IsSymlink(); michael@0: #endif michael@0: michael@0: nsZipItem* next; michael@0: const ZipCentral* central; michael@0: uint16_t nameLength; michael@0: bool isSynthetic; michael@0: }; michael@0: michael@0: class nsZipHandle; michael@0: michael@0: /** michael@0: * nsZipArchive -- a class for reading the PKZIP file format. michael@0: * michael@0: */ michael@0: class nsZipArchive michael@0: { michael@0: friend class nsZipFind; michael@0: michael@0: public: michael@0: /** constructing does not open the archive. See OpenArchive() */ michael@0: nsZipArchive(); michael@0: michael@0: /** destructing the object closes the archive */ michael@0: ~nsZipArchive(); michael@0: michael@0: /** michael@0: * OpenArchive michael@0: * michael@0: * It's an error to call this more than once on the same nsZipArchive michael@0: * object. If we were allowed to use exceptions this would have been michael@0: * part of the constructor michael@0: * michael@0: * @param aZipHandle The nsZipHandle used to access the zip michael@0: * @param aFd Optional PRFileDesc for Windows readahead optimization michael@0: * @return status code michael@0: */ michael@0: nsresult OpenArchive(nsZipHandle *aZipHandle, PRFileDesc *aFd = nullptr); michael@0: michael@0: /** michael@0: * OpenArchive michael@0: * michael@0: * Convenience function that generates nsZipHandle michael@0: * michael@0: * @param aFile The file used to access the zip michael@0: * @return status code michael@0: */ michael@0: nsresult OpenArchive(nsIFile *aFile); michael@0: michael@0: /** michael@0: * Test the integrity of items in this archive by running michael@0: * a CRC check after extracting each item into a memory michael@0: * buffer. If an entry name is supplied only the michael@0: * specified item is tested. Else, if null is supplied michael@0: * then all the items in the archive are tested. michael@0: * michael@0: * @return status code michael@0: */ michael@0: nsresult Test(const char *aEntryName); michael@0: michael@0: /** michael@0: * Closes an open archive. michael@0: */ michael@0: nsresult CloseArchive(); michael@0: michael@0: /** michael@0: * GetItem michael@0: * @param aEntryName Name of file in the archive michael@0: * @return pointer to nsZipItem michael@0: */ michael@0: nsZipItem* GetItem(const char * aEntryName); michael@0: michael@0: /** michael@0: * ExtractFile michael@0: * michael@0: * @param zipEntry Name of file in archive to extract michael@0: * @param outFD Filedescriptor to write contents to michael@0: * @param outname Name of file to write to michael@0: * @return status code michael@0: */ michael@0: nsresult ExtractFile(nsZipItem * zipEntry, const char *outname, PRFileDesc * outFD); michael@0: michael@0: /** michael@0: * FindInit michael@0: * michael@0: * Initializes a search for files in the archive. FindNext() returns michael@0: * the actual matches. The nsZipFind must be deleted when you're done michael@0: * michael@0: * @param aPattern a string or RegExp pattern to search for michael@0: * (may be nullptr to find all files in archive) michael@0: * @param aFind a pointer to a pointer to a structure used michael@0: * in FindNext. In the case of an error this michael@0: * will be set to nullptr. michael@0: * @return status code michael@0: */ michael@0: nsresult FindInit(const char * aPattern, nsZipFind** aFind); michael@0: michael@0: /* michael@0: * Gets an undependent handle to the mapped file. michael@0: */ michael@0: nsZipHandle* GetFD(); michael@0: michael@0: /** michael@0: * Get pointer to the data of the item. michael@0: * @param aItem Pointer to nsZipItem michael@0: * reutrns null when zip file is corrupt. michael@0: */ michael@0: const uint8_t* GetData(nsZipItem* aItem); michael@0: michael@0: bool GetComment(nsACString &aComment); michael@0: michael@0: /** michael@0: * Gets the amount of memory taken up by the archive's mapping. michael@0: * @return the size michael@0: */ michael@0: int64_t SizeOfMapping(); michael@0: michael@0: /* michael@0: * Refcounting michael@0: */ michael@0: NS_METHOD_(MozExternalRefCountType) AddRef(void); michael@0: NS_METHOD_(MozExternalRefCountType) Release(void); michael@0: michael@0: private: michael@0: //--- private members --- michael@0: mozilla::ThreadSafeAutoRefCnt mRefCnt; /* ref count */ michael@0: NS_DECL_OWNINGTHREAD michael@0: michael@0: nsZipItem* mFiles[ZIP_TABSIZE]; michael@0: PLArenaPool mArena; michael@0: michael@0: const char* mCommentPtr; michael@0: uint16_t mCommentLen; michael@0: michael@0: // Whether we synthesized the directory entries michael@0: bool mBuiltSynthetics; michael@0: michael@0: // file handle michael@0: nsRefPtr mFd; michael@0: michael@0: // file URI, for logging michael@0: nsCString mURI; michael@0: michael@0: private: michael@0: //--- private methods --- michael@0: nsZipItem* CreateZipItem(); michael@0: nsresult BuildFileList(PRFileDesc *aFd = nullptr); michael@0: nsresult BuildSynthetics(); michael@0: michael@0: nsZipArchive& operator=(const nsZipArchive& rhs) MOZ_DELETE; michael@0: nsZipArchive(const nsZipArchive& rhs) MOZ_DELETE; michael@0: }; michael@0: michael@0: /** michael@0: * nsZipFind michael@0: * michael@0: * a helper class for nsZipArchive, representing a search michael@0: */ michael@0: class nsZipFind michael@0: { michael@0: public: michael@0: nsZipFind(nsZipArchive* aZip, char* aPattern, bool regExp); michael@0: ~nsZipFind(); michael@0: michael@0: nsresult FindNext(const char** aResult, uint16_t* aNameLen); michael@0: michael@0: private: michael@0: nsRefPtr mArchive; michael@0: char* mPattern; michael@0: nsZipItem* mItem; michael@0: uint16_t mSlot; michael@0: bool mRegExp; michael@0: michael@0: nsZipFind& operator=(const nsZipFind& rhs) MOZ_DELETE; michael@0: nsZipFind(const nsZipFind& rhs) MOZ_DELETE; michael@0: }; michael@0: michael@0: /** michael@0: * nsZipCursor -- a low-level class for reading the individual items in a zip. michael@0: */ michael@0: class nsZipCursor { michael@0: public: michael@0: /** michael@0: * Initializes the cursor michael@0: * michael@0: * @param aItem Item of interest michael@0: * @param aZip Archive michael@0: * @param aBuf Buffer used for decompression. michael@0: * This determines the maximum Read() size in the compressed case. michael@0: * @param aBufSize Buffer size michael@0: * @param doCRC When set to true Read() will check crc michael@0: */ michael@0: nsZipCursor(nsZipItem *aItem, nsZipArchive *aZip, uint8_t* aBuf = nullptr, uint32_t aBufSize = 0, bool doCRC = false); michael@0: michael@0: ~nsZipCursor(); michael@0: michael@0: /** michael@0: * Performs reads. In the compressed case it uses aBuf(passed in constructor), for stored files michael@0: * it returns a zero-copy buffer. michael@0: * michael@0: * @param aBytesRead Outparam for number of bytes read. michael@0: * @return data read or nullptr if item is corrupted. michael@0: */ michael@0: uint8_t* Read(uint32_t *aBytesRead) { michael@0: return ReadOrCopy(aBytesRead, false); michael@0: } michael@0: michael@0: /** michael@0: * Performs a copy. It always uses aBuf(passed in constructor). michael@0: * michael@0: * @param aBytesRead Outparam for number of bytes read. michael@0: * @return data read or nullptr if item is corrupted. michael@0: */ michael@0: uint8_t* Copy(uint32_t *aBytesRead) { michael@0: return ReadOrCopy(aBytesRead, true); michael@0: } michael@0: michael@0: private: michael@0: /* Actual implementation for both Read and Copy above */ michael@0: uint8_t* ReadOrCopy(uint32_t *aBytesRead, bool aCopy); michael@0: michael@0: nsZipItem *mItem; michael@0: uint8_t *mBuf; michael@0: uint32_t mBufSize; michael@0: z_stream mZs; michael@0: uint32_t mCRC; michael@0: bool mDoCRC; michael@0: }; michael@0: michael@0: /** michael@0: * nsZipItemPtr - a RAII convenience class for reading the individual items in a zip. michael@0: * It reads whole files and does zero-copy IO for stored files. A buffer is allocated michael@0: * for decompression. michael@0: * Do not use when the file may be very large. michael@0: */ michael@0: class nsZipItemPtr_base { michael@0: public: michael@0: /** michael@0: * Initializes the reader michael@0: * michael@0: * @param aZip Archive michael@0: * @param aEntryName Archive membername michael@0: * @param doCRC When set to true Read() will check crc michael@0: */ michael@0: nsZipItemPtr_base(nsZipArchive *aZip, const char *aEntryName, bool doCRC); michael@0: michael@0: uint32_t Length() const { michael@0: return mReadlen; michael@0: } michael@0: michael@0: protected: michael@0: nsRefPtr mZipHandle; michael@0: nsAutoArrayPtr mAutoBuf; michael@0: uint8_t *mReturnBuf; michael@0: uint32_t mReadlen; michael@0: }; michael@0: michael@0: template michael@0: class nsZipItemPtr : public nsZipItemPtr_base { michael@0: public: michael@0: nsZipItemPtr(nsZipArchive *aZip, const char *aEntryName, bool doCRC = false) : nsZipItemPtr_base(aZip, aEntryName, doCRC) { } michael@0: /** michael@0: * @return buffer containing the whole zip member or nullptr on error. michael@0: * The returned buffer is owned by nsZipItemReader. michael@0: */ michael@0: const T* Buffer() const { michael@0: return (const T*)mReturnBuf; michael@0: } michael@0: michael@0: operator const T*() const { michael@0: return Buffer(); michael@0: } michael@0: michael@0: /** michael@0: * Relinquish ownership of zip member if compressed. michael@0: * Copy member into a new buffer if uncompressed. michael@0: * @return a buffer with whole zip member. It is caller's responsibility to free() it. michael@0: */ michael@0: T* Forget() { michael@0: if (!mReturnBuf) michael@0: return nullptr; michael@0: // In uncompressed mmap case, give up buffer michael@0: if (mAutoBuf.get() == mReturnBuf) { michael@0: mReturnBuf = nullptr; michael@0: return (T*) mAutoBuf.forget(); michael@0: } michael@0: T *ret = (T*) malloc(Length()); michael@0: memcpy(ret, mReturnBuf, Length()); michael@0: mReturnBuf = nullptr; michael@0: return ret; michael@0: } michael@0: }; michael@0: michael@0: class nsZipHandle { michael@0: friend class nsZipArchive; michael@0: friend class mozilla::FileLocation; michael@0: public: michael@0: static nsresult Init(nsIFile *file, nsZipHandle **ret, michael@0: PRFileDesc **aFd = nullptr); michael@0: static nsresult Init(nsZipArchive *zip, const char *entry, michael@0: nsZipHandle **ret); michael@0: michael@0: NS_METHOD_(MozExternalRefCountType) AddRef(void); michael@0: NS_METHOD_(MozExternalRefCountType) Release(void); michael@0: michael@0: int64_t SizeOfMapping(); michael@0: michael@0: protected: michael@0: const uint8_t * mFileData; /* pointer to mmaped file */ michael@0: uint32_t mLen; /* length of file and memory mapped area */ michael@0: mozilla::FileLocation mFile; /* source file if any, for logging */ michael@0: michael@0: private: michael@0: nsZipHandle(); michael@0: ~nsZipHandle(); michael@0: michael@0: PRFileMap * mMap; /* nspr datastructure for mmap */ michael@0: nsAutoPtr > mBuf; michael@0: mozilla::ThreadSafeAutoRefCnt mRefCnt; /* ref count */ michael@0: NS_DECL_OWNINGTHREAD michael@0: }; michael@0: michael@0: nsresult gZlibInit(z_stream *zs); michael@0: michael@0: #endif /* nsZipArchive_h_ */