michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this file, michael@0: * You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include "mozilla/Assertions.h" michael@0: #include "mozilla/Scoped.h" michael@0: #include "SeekableZStream.h" michael@0: #include "Utils.h" michael@0: #include "Logging.h" michael@0: michael@0: Logging Logging::Singleton; michael@0: michael@0: const char *filterName[] = { michael@0: "none", michael@0: "thumb", michael@0: "arm", michael@0: "x86", michael@0: "auto" michael@0: }; michael@0: michael@0: /* Maximum supported size for chunkSize */ michael@0: static const size_t maxChunkSize = michael@0: 1 << (8 * std::min(sizeof(((SeekableZStreamHeader *)nullptr)->chunkSize), michael@0: sizeof(((SeekableZStreamHeader *)nullptr)->lastChunkSize)) - 1); michael@0: michael@0: class Buffer: public MappedPtr michael@0: { michael@0: public: michael@0: virtual ~Buffer() { } michael@0: michael@0: virtual bool Resize(size_t size) michael@0: { michael@0: MemoryRange buf = mmap(nullptr, size, PROT_READ | PROT_WRITE, michael@0: MAP_PRIVATE | MAP_ANON, -1, 0); michael@0: if (buf == MAP_FAILED) michael@0: return false; michael@0: if (*this != MAP_FAILED) michael@0: memcpy(buf, *this, std::min(size, GetLength())); michael@0: Assign(buf); michael@0: return true; michael@0: } michael@0: michael@0: bool Fill(Buffer &other) michael@0: { michael@0: size_t size = other.GetLength(); michael@0: if (!size || !Resize(size)) michael@0: return false; michael@0: memcpy(static_cast(*this), static_cast(other), size); michael@0: return true; michael@0: } michael@0: }; michael@0: michael@0: class FileBuffer: public Buffer michael@0: { michael@0: public: michael@0: bool Init(const char *name, bool writable_ = false) michael@0: { michael@0: fd = open(name, writable_ ? O_RDWR | O_CREAT | O_TRUNC : O_RDONLY, 0666); michael@0: if (fd == -1) michael@0: return false; michael@0: writable = writable_; michael@0: return true; michael@0: } michael@0: michael@0: virtual bool Resize(size_t size) michael@0: { michael@0: if (writable) { michael@0: if (ftruncate(fd, size) == -1) michael@0: return false; michael@0: } michael@0: Assign(MemoryRange::mmap(nullptr, size, michael@0: PROT_READ | (writable ? PROT_WRITE : 0), michael@0: writable ? MAP_SHARED : MAP_PRIVATE, fd, 0)); michael@0: return this != MAP_FAILED; michael@0: } michael@0: michael@0: int getFd() michael@0: { michael@0: return fd; michael@0: } michael@0: michael@0: private: michael@0: AutoCloseFD fd; michael@0: bool writable; michael@0: }; michael@0: michael@0: class FilteredBuffer: public Buffer michael@0: { michael@0: public: michael@0: void Filter(Buffer &other, SeekableZStream::FilterId filter, size_t chunkSize) michael@0: { michael@0: SeekableZStream::ZStreamFilter filterCB = michael@0: SeekableZStream::GetFilter(filter); michael@0: MOZ_ASSERT(filterCB); michael@0: Fill(other); michael@0: size_t size = other.GetLength(); michael@0: Bytef *data = reinterpret_cast(static_cast(*this)); michael@0: size_t avail = 0; michael@0: /* Filter needs to be applied in chunks. */ michael@0: while (size) { michael@0: avail = std::min(size, chunkSize); michael@0: filterCB(data - static_cast(static_cast(*this)), michael@0: SeekableZStream::FILTER, data, avail); michael@0: size -= avail; michael@0: data += avail; michael@0: } michael@0: } michael@0: }; michael@0: michael@0: template michael@0: class Dictionary: public Buffer michael@0: { michael@0: typedef T piece; michael@0: typedef std::pair stat_pair; michael@0: michael@0: static bool stat_cmp(stat_pair a, stat_pair b) michael@0: { michael@0: return a.second < b.second; michael@0: } michael@0: michael@0: public: michael@0: Dictionary(Buffer &inBuf, size_t size) michael@0: { michael@0: if (!size || !Resize(size)) michael@0: return; michael@0: DEBUG_LOG("Creating dictionary"); michael@0: piece *origBufPieces = reinterpret_cast( michael@0: static_cast(inBuf)); michael@0: std::map stats; michael@0: for (unsigned int i = 0; i < inBuf.GetLength() / sizeof(piece); i++) { michael@0: stats[origBufPieces[i]]++; michael@0: } michael@0: std::vector statsVec(stats.begin(), stats.end()); michael@0: std::sort(statsVec.begin(), statsVec.end(), stat_cmp); michael@0: michael@0: piece *dictPieces = reinterpret_cast( michael@0: static_cast(*this)); michael@0: typename std::vector::reverse_iterator it = statsVec.rbegin(); michael@0: for (int i = size / sizeof(piece); i > 0 && it < statsVec.rend(); michael@0: i--, ++it) { michael@0: dictPieces[i - 1] = it->first; michael@0: } michael@0: } michael@0: }; michael@0: michael@0: class SzipAction michael@0: { michael@0: public: michael@0: virtual int run(const char *name, Buffer &origBuf, michael@0: const char *outName, Buffer &outBuf) = 0; michael@0: michael@0: virtual ~SzipAction() {} michael@0: }; michael@0: michael@0: class SzipDecompress: public SzipAction michael@0: { michael@0: public: michael@0: int run(const char *name, Buffer &origBuf, michael@0: const char *outName, Buffer &outBuf); michael@0: }; michael@0: michael@0: michael@0: class SzipCompress: public SzipAction michael@0: { michael@0: public: michael@0: int run(const char *name, Buffer &origBuf, michael@0: const char *outName, Buffer &outBuf); michael@0: michael@0: SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter, michael@0: size_t aDictSize) michael@0: : chunkSize(aChunkSize ? aChunkSize : 16384) michael@0: , filter(aFilter) michael@0: , dictSize(aDictSize) michael@0: {} michael@0: michael@0: const static signed char winSizeLog = 15; michael@0: const static size_t winSize = 1 << winSizeLog; michael@0: michael@0: const static SeekableZStream::FilterId DEFAULT_FILTER = michael@0: #if defined(TARGET_THUMB) michael@0: SeekableZStream::BCJ_THUMB; michael@0: #elif defined(TARGET_ARM) michael@0: SeekableZStream::BCJ_ARM; michael@0: #elif defined(TARGET_X86) michael@0: SeekableZStream::BCJ_X86; michael@0: #else michael@0: SeekableZStream::NONE; michael@0: #endif michael@0: michael@0: private: michael@0: michael@0: int do_compress(Buffer &origBuf, Buffer &outBuf, const unsigned char *aDict, michael@0: size_t aDictSize, SeekableZStream::FilterId aFilter); michael@0: michael@0: size_t chunkSize; michael@0: SeekableZStream::FilterId filter; michael@0: size_t dictSize; michael@0: }; michael@0: michael@0: /* Decompress a seekable compressed stream */ michael@0: int SzipDecompress::run(const char *name, Buffer &origBuf, michael@0: const char *outName, Buffer &outBuf) michael@0: { michael@0: size_t origSize = origBuf.GetLength(); michael@0: if (origSize < sizeof(SeekableZStreamHeader)) { michael@0: LOG("%s is not compressed", name); michael@0: return 0; michael@0: } michael@0: michael@0: SeekableZStream zstream; michael@0: if (!zstream.Init(origBuf, origSize)) michael@0: return 0; michael@0: michael@0: size_t size = zstream.GetUncompressedSize(); michael@0: michael@0: /* Give enough room for the uncompressed data */ michael@0: if (!outBuf.Resize(size)) { michael@0: LOG("Error resizing %s: %s", outName, strerror(errno)); michael@0: return 1; michael@0: } michael@0: michael@0: if (!zstream.Decompress(outBuf, 0, size)) michael@0: return 1; michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: /* Generate a seekable compressed stream. */ michael@0: int SzipCompress::run(const char *name, Buffer &origBuf, michael@0: const char *outName, Buffer &outBuf) michael@0: { michael@0: size_t origSize = origBuf.GetLength(); michael@0: if (origSize == 0) { michael@0: LOG("Won't compress %s: it's empty", name); michael@0: return 1; michael@0: } michael@0: if (SeekableZStreamHeader::validate(origBuf)) { michael@0: LOG("Skipping %s: it's already a szip", name); michael@0: return 0; michael@0: } michael@0: bool compressed = false; michael@0: LOG("Size = %" PRIuSize, origSize); michael@0: michael@0: /* Allocate a buffer the size of the uncompressed data: we don't want michael@0: * a compressed file larger than that anyways. */ michael@0: if (!outBuf.Resize(origSize)) { michael@0: LOG("Couldn't allocate output buffer: %s", strerror(errno)); michael@0: return 1; michael@0: } michael@0: michael@0: /* Find the most appropriate filter */ michael@0: SeekableZStream::FilterId firstFilter, lastFilter; michael@0: bool scanFilters; michael@0: if (filter == SeekableZStream::FILTER_MAX) { michael@0: firstFilter = SeekableZStream::NONE; michael@0: lastFilter = SeekableZStream::FILTER_MAX; michael@0: scanFilters = true; michael@0: } else { michael@0: firstFilter = lastFilter = filter; michael@0: ++lastFilter; michael@0: scanFilters = false; michael@0: } michael@0: michael@0: mozilla::ScopedDeletePtr filteredBuf; michael@0: Buffer *origData; michael@0: for (SeekableZStream::FilterId f = firstFilter; f < lastFilter; ++f) { michael@0: FilteredBuffer *filteredTmp = nullptr; michael@0: Buffer tmpBuf; michael@0: if (f != SeekableZStream::NONE) { michael@0: DEBUG_LOG("Applying filter \"%s\"", filterName[f]); michael@0: filteredTmp = new FilteredBuffer(); michael@0: filteredTmp->Filter(origBuf, f, chunkSize); michael@0: origData = filteredTmp; michael@0: } else { michael@0: origData = &origBuf; michael@0: } michael@0: if (dictSize && !scanFilters) { michael@0: filteredBuf = filteredTmp; michael@0: break; michael@0: } michael@0: DEBUG_LOG("Compressing with no dictionary"); michael@0: if (do_compress(*origData, tmpBuf, nullptr, 0, f) == 0) { michael@0: if (tmpBuf.GetLength() < outBuf.GetLength()) { michael@0: outBuf.Fill(tmpBuf); michael@0: compressed = true; michael@0: filter = f; michael@0: filteredBuf = filteredTmp; michael@0: continue; michael@0: } michael@0: } michael@0: delete filteredTmp; michael@0: } michael@0: michael@0: origData = filteredBuf ? filteredBuf : &origBuf; michael@0: michael@0: if (dictSize) { michael@0: Dictionary dict(*origData, dictSize ? SzipCompress::winSize : 0); michael@0: michael@0: /* Find the most appropriate dictionary size */ michael@0: size_t firstDictSize, lastDictSize; michael@0: if (dictSize == (size_t) -1) { michael@0: /* If we scanned for filters, we effectively already tried dictSize=0 */ michael@0: firstDictSize = scanFilters ? 4096 : 0; michael@0: lastDictSize = SzipCompress::winSize; michael@0: } else { michael@0: firstDictSize = lastDictSize = dictSize; michael@0: } michael@0: michael@0: Buffer tmpBuf; michael@0: for (size_t d = firstDictSize; d <= lastDictSize; d += 4096) { michael@0: DEBUG_LOG("Compressing with dictionary of size %" PRIuSize, d); michael@0: if (do_compress(*origData, tmpBuf, static_cast(dict) michael@0: + SzipCompress::winSize - d, d, filter)) michael@0: continue; michael@0: if (!compressed || tmpBuf.GetLength() < outBuf.GetLength()) { michael@0: outBuf.Fill(tmpBuf); michael@0: compressed = true; michael@0: dictSize = d; michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (!compressed) { michael@0: outBuf.Fill(origBuf); michael@0: LOG("Not compressed"); michael@0: return 0; michael@0: } michael@0: michael@0: if (dictSize == (size_t) -1) michael@0: dictSize = 0; michael@0: michael@0: DEBUG_LOG("Used filter \"%s\" and dictionary size of %" PRIuSize, michael@0: filterName[filter], dictSize); michael@0: LOG("Compressed size is %" PRIuSize, outBuf.GetLength()); michael@0: michael@0: /* Sanity check */ michael@0: Buffer tmpBuf; michael@0: SzipDecompress decompress; michael@0: if (decompress.run("buffer", outBuf, "buffer", tmpBuf)) michael@0: return 1; michael@0: michael@0: size_t size = tmpBuf.GetLength(); michael@0: if (size != origSize) { michael@0: LOG("Compression error: %" PRIuSize " != %" PRIuSize, size, origSize); michael@0: return 1; michael@0: } michael@0: if (memcmp(static_cast(origBuf), static_cast(tmpBuf), size)) { michael@0: LOG("Compression error: content mismatch"); michael@0: return 1; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: int SzipCompress::do_compress(Buffer &origBuf, Buffer &outBuf, michael@0: const unsigned char *aDict, size_t aDictSize, michael@0: SeekableZStream::FilterId aFilter) michael@0: { michael@0: size_t origSize = origBuf.GetLength(); michael@0: MOZ_ASSERT(origSize != 0); michael@0: michael@0: /* Expected total number of chunks */ michael@0: size_t nChunks = ((origSize + chunkSize - 1) / chunkSize); michael@0: michael@0: /* The first chunk is going to be stored after the header, the dictionary michael@0: * and the offset table */ michael@0: size_t offset = sizeof(SeekableZStreamHeader) + aDictSize michael@0: + nChunks * sizeof(uint32_t); michael@0: michael@0: if (offset >= origSize) michael@0: return 1; michael@0: michael@0: /* Allocate a buffer the size of the uncompressed data: we don't want michael@0: * a compressed file larger than that anyways. */ michael@0: if (!outBuf.Resize(origSize)) { michael@0: LOG("Couldn't allocate output buffer: %s", strerror(errno)); michael@0: return 1; michael@0: } michael@0: michael@0: SeekableZStreamHeader *header = new (outBuf) SeekableZStreamHeader; michael@0: unsigned char *dictionary = static_cast( michael@0: outBuf + sizeof(SeekableZStreamHeader)); michael@0: le_uint32 *entry = michael@0: reinterpret_cast(dictionary + aDictSize); michael@0: michael@0: /* Initialize header */ michael@0: header->chunkSize = chunkSize; michael@0: header->dictSize = aDictSize; michael@0: header->totalSize = offset; michael@0: header->windowBits = -SzipCompress::winSizeLog; // Raw stream, michael@0: // window size of 32k. michael@0: header->filter = aFilter; michael@0: if (aDictSize) michael@0: memcpy(dictionary, aDict, aDictSize); michael@0: michael@0: /* Initialize zlib structure */ michael@0: z_stream zStream; michael@0: memset(&zStream, 0, sizeof(zStream)); michael@0: zStream.avail_out = origSize - offset; michael@0: zStream.next_out = static_cast(outBuf) + offset; michael@0: michael@0: size_t avail = 0; michael@0: size_t size = origSize; michael@0: unsigned char *data = reinterpret_cast( michael@0: static_cast(origBuf)); michael@0: while (size) { michael@0: avail = std::min(size, chunkSize); michael@0: michael@0: /* Compress chunk */ michael@0: int ret = deflateInit2(&zStream, 9, Z_DEFLATED, header->windowBits, michael@0: MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY); michael@0: if (aDictSize) michael@0: deflateSetDictionary(&zStream, dictionary, aDictSize); michael@0: MOZ_ASSERT(ret == Z_OK); michael@0: zStream.avail_in = avail; michael@0: zStream.next_in = data; michael@0: ret = deflate(&zStream, Z_FINISH); michael@0: MOZ_ASSERT(ret == Z_STREAM_END); michael@0: ret = deflateEnd(&zStream); michael@0: MOZ_ASSERT(ret == Z_OK); michael@0: if (zStream.avail_out <= 0) michael@0: return 1; michael@0: michael@0: size_t len = origSize - offset - zStream.avail_out; michael@0: michael@0: /* Adjust headers */ michael@0: header->totalSize += len; michael@0: *entry++ = offset; michael@0: header->nChunks++; michael@0: michael@0: /* Prepare for next iteration */ michael@0: size -= avail; michael@0: data += avail; michael@0: offset += len; michael@0: } michael@0: header->lastChunkSize = avail; michael@0: MOZ_ASSERT(header->totalSize == offset); michael@0: MOZ_ASSERT(header->nChunks == nChunks); michael@0: michael@0: if (!outBuf.Resize(offset)) { michael@0: LOG("Error truncating output: %s", strerror(errno)); michael@0: return 1; michael@0: } michael@0: michael@0: return 0; michael@0: michael@0: } michael@0: michael@0: bool GetSize(const char *str, size_t *out) michael@0: { michael@0: char *end; michael@0: MOZ_ASSERT(out); michael@0: errno = 0; michael@0: *out = strtol(str, &end, 10); michael@0: return (!errno && !*end); michael@0: } michael@0: michael@0: int main(int argc, char* argv[]) michael@0: { michael@0: mozilla::ScopedDeletePtr action; michael@0: char **firstArg; michael@0: bool compress = true; michael@0: size_t chunkSize = 0; michael@0: SeekableZStream::FilterId filter = SzipCompress::DEFAULT_FILTER; michael@0: size_t dictSize = (size_t) 0; michael@0: michael@0: Logging::Init(); michael@0: michael@0: for (firstArg = &argv[1]; argc > 2; argc--, firstArg++) { michael@0: if (!firstArg[0] || firstArg[0][0] != '-') michael@0: break; michael@0: if (strcmp(firstArg[0], "-d") == 0) { michael@0: compress = false; michael@0: } else if (strcmp(firstArg[0], "-c") == 0) { michael@0: firstArg++; michael@0: argc--; michael@0: if (!firstArg[0]) michael@0: break; michael@0: if (!GetSize(firstArg[0], &chunkSize) || !chunkSize || michael@0: (chunkSize % 4096) || (chunkSize > maxChunkSize)) { michael@0: LOG("Invalid chunk size"); michael@0: return 1; michael@0: } michael@0: } else if (strcmp(firstArg[0], "-f") == 0) { michael@0: firstArg++; michael@0: argc--; michael@0: if (!firstArg[0]) michael@0: break; michael@0: bool matched = false; michael@0: for (unsigned int i = 0; i < sizeof(filterName) / sizeof(char *); ++i) { michael@0: if (strcmp(firstArg[0], filterName[i]) == 0) { michael@0: filter = static_cast(i); michael@0: matched = true; michael@0: break; michael@0: } michael@0: } michael@0: if (!matched) { michael@0: LOG("Invalid filter"); michael@0: return 1; michael@0: } michael@0: } else if (strcmp(firstArg[0], "-D") == 0) { michael@0: firstArg++; michael@0: argc--; michael@0: if (!firstArg[0]) michael@0: break; michael@0: if (strcmp(firstArg[0], "auto") == 0) { michael@0: dictSize = -1; michael@0: } else if (!GetSize(firstArg[0], &dictSize) || (dictSize >= 1 << 16)) { michael@0: LOG("Invalid dictionary size"); michael@0: return 1; michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (argc != 2 || !firstArg[0]) { michael@0: LOG("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] [-D DICTSIZE] file", michael@0: argv[0]); michael@0: return 1; michael@0: } michael@0: michael@0: if (compress) { michael@0: action = new SzipCompress(chunkSize, filter, dictSize); michael@0: } else { michael@0: if (chunkSize) { michael@0: LOG("-c is incompatible with -d"); michael@0: return 1; michael@0: } michael@0: if (dictSize) { michael@0: LOG("-D is incompatible with -d"); michael@0: return 1; michael@0: } michael@0: action = new SzipDecompress(); michael@0: } michael@0: michael@0: std::stringstream tmpOutStream; michael@0: tmpOutStream << firstArg[0] << ".sz." << getpid(); michael@0: std::string tmpOut(tmpOutStream.str()); michael@0: int ret; michael@0: struct stat st; michael@0: { michael@0: FileBuffer origBuf; michael@0: if (!origBuf.Init(firstArg[0])) { michael@0: LOG("Couldn't open %s: %s", firstArg[0], strerror(errno)); michael@0: return 1; michael@0: } michael@0: michael@0: ret = fstat(origBuf.getFd(), &st); michael@0: if (ret == -1) { michael@0: LOG("Couldn't stat %s: %s", firstArg[0], strerror(errno)); michael@0: return 1; michael@0: } michael@0: michael@0: size_t origSize = st.st_size; michael@0: michael@0: /* Mmap the original file */ michael@0: if (!origBuf.Resize(origSize)) { michael@0: LOG("Couldn't mmap %s: %s", firstArg[0], strerror(errno)); michael@0: return 1; michael@0: } michael@0: michael@0: /* Create the compressed file */ michael@0: FileBuffer outBuf; michael@0: if (!outBuf.Init(tmpOut.c_str(), true)) { michael@0: LOG("Couldn't open %s: %s", tmpOut.c_str(), strerror(errno)); michael@0: return 1; michael@0: } michael@0: michael@0: ret = action->run(firstArg[0], origBuf, tmpOut.c_str(), outBuf); michael@0: if ((ret == 0) && (fstat(outBuf.getFd(), &st) == -1)) { michael@0: st.st_size = 0; michael@0: } michael@0: } michael@0: michael@0: if ((ret == 0) && st.st_size) { michael@0: rename(tmpOut.c_str(), firstArg[0]); michael@0: } else { michael@0: unlink(tmpOut.c_str()); michael@0: } michael@0: return ret; michael@0: }