1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/mozglue/linker/szip.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,589 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this file, 1.6 + * You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +#include <algorithm> 1.9 +#include <map> 1.10 +#include <sys/stat.h> 1.11 +#include <string> 1.12 +#include <sstream> 1.13 +#include <cstring> 1.14 +#include <cstdlib> 1.15 +#include <zlib.h> 1.16 +#include <fcntl.h> 1.17 +#include <errno.h> 1.18 +#include "mozilla/Assertions.h" 1.19 +#include "mozilla/Scoped.h" 1.20 +#include "SeekableZStream.h" 1.21 +#include "Utils.h" 1.22 +#include "Logging.h" 1.23 + 1.24 +Logging Logging::Singleton; 1.25 + 1.26 +const char *filterName[] = { 1.27 + "none", 1.28 + "thumb", 1.29 + "arm", 1.30 + "x86", 1.31 + "auto" 1.32 +}; 1.33 + 1.34 +/* Maximum supported size for chunkSize */ 1.35 +static const size_t maxChunkSize = 1.36 + 1 << (8 * std::min(sizeof(((SeekableZStreamHeader *)nullptr)->chunkSize), 1.37 + sizeof(((SeekableZStreamHeader *)nullptr)->lastChunkSize)) - 1); 1.38 + 1.39 +class Buffer: public MappedPtr 1.40 +{ 1.41 +public: 1.42 + virtual ~Buffer() { } 1.43 + 1.44 + virtual bool Resize(size_t size) 1.45 + { 1.46 + MemoryRange buf = mmap(nullptr, size, PROT_READ | PROT_WRITE, 1.47 + MAP_PRIVATE | MAP_ANON, -1, 0); 1.48 + if (buf == MAP_FAILED) 1.49 + return false; 1.50 + if (*this != MAP_FAILED) 1.51 + memcpy(buf, *this, std::min(size, GetLength())); 1.52 + Assign(buf); 1.53 + return true; 1.54 + } 1.55 + 1.56 + bool Fill(Buffer &other) 1.57 + { 1.58 + size_t size = other.GetLength(); 1.59 + if (!size || !Resize(size)) 1.60 + return false; 1.61 + memcpy(static_cast<void *>(*this), static_cast<void *>(other), size); 1.62 + return true; 1.63 + } 1.64 +}; 1.65 + 1.66 +class FileBuffer: public Buffer 1.67 +{ 1.68 +public: 1.69 + bool Init(const char *name, bool writable_ = false) 1.70 + { 1.71 + fd = open(name, writable_ ? O_RDWR | O_CREAT | O_TRUNC : O_RDONLY, 0666); 1.72 + if (fd == -1) 1.73 + return false; 1.74 + writable = writable_; 1.75 + return true; 1.76 + } 1.77 + 1.78 + virtual bool Resize(size_t size) 1.79 + { 1.80 + if (writable) { 1.81 + if (ftruncate(fd, size) == -1) 1.82 + return false; 1.83 + } 1.84 + Assign(MemoryRange::mmap(nullptr, size, 1.85 + PROT_READ | (writable ? PROT_WRITE : 0), 1.86 + writable ? MAP_SHARED : MAP_PRIVATE, fd, 0)); 1.87 + return this != MAP_FAILED; 1.88 + } 1.89 + 1.90 + int getFd() 1.91 + { 1.92 + return fd; 1.93 + } 1.94 + 1.95 +private: 1.96 + AutoCloseFD fd; 1.97 + bool writable; 1.98 +}; 1.99 + 1.100 +class FilteredBuffer: public Buffer 1.101 +{ 1.102 +public: 1.103 + void Filter(Buffer &other, SeekableZStream::FilterId filter, size_t chunkSize) 1.104 + { 1.105 + SeekableZStream::ZStreamFilter filterCB = 1.106 + SeekableZStream::GetFilter(filter); 1.107 + MOZ_ASSERT(filterCB); 1.108 + Fill(other); 1.109 + size_t size = other.GetLength(); 1.110 + Bytef *data = reinterpret_cast<Bytef *>(static_cast<void *>(*this)); 1.111 + size_t avail = 0; 1.112 + /* Filter needs to be applied in chunks. */ 1.113 + while (size) { 1.114 + avail = std::min(size, chunkSize); 1.115 + filterCB(data - static_cast<unsigned char *>(static_cast<void *>(*this)), 1.116 + SeekableZStream::FILTER, data, avail); 1.117 + size -= avail; 1.118 + data += avail; 1.119 + } 1.120 + } 1.121 +}; 1.122 + 1.123 +template <typename T> 1.124 +class Dictionary: public Buffer 1.125 +{ 1.126 + typedef T piece; 1.127 + typedef std::pair<piece, int> stat_pair; 1.128 + 1.129 + static bool stat_cmp(stat_pair a, stat_pair b) 1.130 + { 1.131 + return a.second < b.second; 1.132 + } 1.133 + 1.134 +public: 1.135 + Dictionary(Buffer &inBuf, size_t size) 1.136 + { 1.137 + if (!size || !Resize(size)) 1.138 + return; 1.139 + DEBUG_LOG("Creating dictionary"); 1.140 + piece *origBufPieces = reinterpret_cast<piece *>( 1.141 + static_cast<void *>(inBuf)); 1.142 + std::map<piece, int> stats; 1.143 + for (unsigned int i = 0; i < inBuf.GetLength() / sizeof(piece); i++) { 1.144 + stats[origBufPieces[i]]++; 1.145 + } 1.146 + std::vector<stat_pair> statsVec(stats.begin(), stats.end()); 1.147 + std::sort(statsVec.begin(), statsVec.end(), stat_cmp); 1.148 + 1.149 + piece *dictPieces = reinterpret_cast<piece *>( 1.150 + static_cast<void *>(*this)); 1.151 + typename std::vector<stat_pair>::reverse_iterator it = statsVec.rbegin(); 1.152 + for (int i = size / sizeof(piece); i > 0 && it < statsVec.rend(); 1.153 + i--, ++it) { 1.154 + dictPieces[i - 1] = it->first; 1.155 + } 1.156 + } 1.157 +}; 1.158 + 1.159 +class SzipAction 1.160 +{ 1.161 +public: 1.162 + virtual int run(const char *name, Buffer &origBuf, 1.163 + const char *outName, Buffer &outBuf) = 0; 1.164 + 1.165 + virtual ~SzipAction() {} 1.166 +}; 1.167 + 1.168 +class SzipDecompress: public SzipAction 1.169 +{ 1.170 +public: 1.171 + int run(const char *name, Buffer &origBuf, 1.172 + const char *outName, Buffer &outBuf); 1.173 +}; 1.174 + 1.175 + 1.176 +class SzipCompress: public SzipAction 1.177 +{ 1.178 +public: 1.179 + int run(const char *name, Buffer &origBuf, 1.180 + const char *outName, Buffer &outBuf); 1.181 + 1.182 + SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter, 1.183 + size_t aDictSize) 1.184 + : chunkSize(aChunkSize ? aChunkSize : 16384) 1.185 + , filter(aFilter) 1.186 + , dictSize(aDictSize) 1.187 + {} 1.188 + 1.189 + const static signed char winSizeLog = 15; 1.190 + const static size_t winSize = 1 << winSizeLog; 1.191 + 1.192 + const static SeekableZStream::FilterId DEFAULT_FILTER = 1.193 +#if defined(TARGET_THUMB) 1.194 + SeekableZStream::BCJ_THUMB; 1.195 +#elif defined(TARGET_ARM) 1.196 + SeekableZStream::BCJ_ARM; 1.197 +#elif defined(TARGET_X86) 1.198 + SeekableZStream::BCJ_X86; 1.199 +#else 1.200 + SeekableZStream::NONE; 1.201 +#endif 1.202 + 1.203 +private: 1.204 + 1.205 + int do_compress(Buffer &origBuf, Buffer &outBuf, const unsigned char *aDict, 1.206 + size_t aDictSize, SeekableZStream::FilterId aFilter); 1.207 + 1.208 + size_t chunkSize; 1.209 + SeekableZStream::FilterId filter; 1.210 + size_t dictSize; 1.211 +}; 1.212 + 1.213 +/* Decompress a seekable compressed stream */ 1.214 +int SzipDecompress::run(const char *name, Buffer &origBuf, 1.215 + const char *outName, Buffer &outBuf) 1.216 +{ 1.217 + size_t origSize = origBuf.GetLength(); 1.218 + if (origSize < sizeof(SeekableZStreamHeader)) { 1.219 + LOG("%s is not compressed", name); 1.220 + return 0; 1.221 + } 1.222 + 1.223 + SeekableZStream zstream; 1.224 + if (!zstream.Init(origBuf, origSize)) 1.225 + return 0; 1.226 + 1.227 + size_t size = zstream.GetUncompressedSize(); 1.228 + 1.229 + /* Give enough room for the uncompressed data */ 1.230 + if (!outBuf.Resize(size)) { 1.231 + LOG("Error resizing %s: %s", outName, strerror(errno)); 1.232 + return 1; 1.233 + } 1.234 + 1.235 + if (!zstream.Decompress(outBuf, 0, size)) 1.236 + return 1; 1.237 + 1.238 + return 0; 1.239 +} 1.240 + 1.241 +/* Generate a seekable compressed stream. */ 1.242 +int SzipCompress::run(const char *name, Buffer &origBuf, 1.243 + const char *outName, Buffer &outBuf) 1.244 +{ 1.245 + size_t origSize = origBuf.GetLength(); 1.246 + if (origSize == 0) { 1.247 + LOG("Won't compress %s: it's empty", name); 1.248 + return 1; 1.249 + } 1.250 + if (SeekableZStreamHeader::validate(origBuf)) { 1.251 + LOG("Skipping %s: it's already a szip", name); 1.252 + return 0; 1.253 + } 1.254 + bool compressed = false; 1.255 + LOG("Size = %" PRIuSize, origSize); 1.256 + 1.257 + /* Allocate a buffer the size of the uncompressed data: we don't want 1.258 + * a compressed file larger than that anyways. */ 1.259 + if (!outBuf.Resize(origSize)) { 1.260 + LOG("Couldn't allocate output buffer: %s", strerror(errno)); 1.261 + return 1; 1.262 + } 1.263 + 1.264 + /* Find the most appropriate filter */ 1.265 + SeekableZStream::FilterId firstFilter, lastFilter; 1.266 + bool scanFilters; 1.267 + if (filter == SeekableZStream::FILTER_MAX) { 1.268 + firstFilter = SeekableZStream::NONE; 1.269 + lastFilter = SeekableZStream::FILTER_MAX; 1.270 + scanFilters = true; 1.271 + } else { 1.272 + firstFilter = lastFilter = filter; 1.273 + ++lastFilter; 1.274 + scanFilters = false; 1.275 + } 1.276 + 1.277 + mozilla::ScopedDeletePtr<Buffer> filteredBuf; 1.278 + Buffer *origData; 1.279 + for (SeekableZStream::FilterId f = firstFilter; f < lastFilter; ++f) { 1.280 + FilteredBuffer *filteredTmp = nullptr; 1.281 + Buffer tmpBuf; 1.282 + if (f != SeekableZStream::NONE) { 1.283 + DEBUG_LOG("Applying filter \"%s\"", filterName[f]); 1.284 + filteredTmp = new FilteredBuffer(); 1.285 + filteredTmp->Filter(origBuf, f, chunkSize); 1.286 + origData = filteredTmp; 1.287 + } else { 1.288 + origData = &origBuf; 1.289 + } 1.290 + if (dictSize && !scanFilters) { 1.291 + filteredBuf = filteredTmp; 1.292 + break; 1.293 + } 1.294 + DEBUG_LOG("Compressing with no dictionary"); 1.295 + if (do_compress(*origData, tmpBuf, nullptr, 0, f) == 0) { 1.296 + if (tmpBuf.GetLength() < outBuf.GetLength()) { 1.297 + outBuf.Fill(tmpBuf); 1.298 + compressed = true; 1.299 + filter = f; 1.300 + filteredBuf = filteredTmp; 1.301 + continue; 1.302 + } 1.303 + } 1.304 + delete filteredTmp; 1.305 + } 1.306 + 1.307 + origData = filteredBuf ? filteredBuf : &origBuf; 1.308 + 1.309 + if (dictSize) { 1.310 + Dictionary<uint64_t> dict(*origData, dictSize ? SzipCompress::winSize : 0); 1.311 + 1.312 + /* Find the most appropriate dictionary size */ 1.313 + size_t firstDictSize, lastDictSize; 1.314 + if (dictSize == (size_t) -1) { 1.315 + /* If we scanned for filters, we effectively already tried dictSize=0 */ 1.316 + firstDictSize = scanFilters ? 4096 : 0; 1.317 + lastDictSize = SzipCompress::winSize; 1.318 + } else { 1.319 + firstDictSize = lastDictSize = dictSize; 1.320 + } 1.321 + 1.322 + Buffer tmpBuf; 1.323 + for (size_t d = firstDictSize; d <= lastDictSize; d += 4096) { 1.324 + DEBUG_LOG("Compressing with dictionary of size %" PRIuSize, d); 1.325 + if (do_compress(*origData, tmpBuf, static_cast<unsigned char *>(dict) 1.326 + + SzipCompress::winSize - d, d, filter)) 1.327 + continue; 1.328 + if (!compressed || tmpBuf.GetLength() < outBuf.GetLength()) { 1.329 + outBuf.Fill(tmpBuf); 1.330 + compressed = true; 1.331 + dictSize = d; 1.332 + } 1.333 + } 1.334 + } 1.335 + 1.336 + if (!compressed) { 1.337 + outBuf.Fill(origBuf); 1.338 + LOG("Not compressed"); 1.339 + return 0; 1.340 + } 1.341 + 1.342 + if (dictSize == (size_t) -1) 1.343 + dictSize = 0; 1.344 + 1.345 + DEBUG_LOG("Used filter \"%s\" and dictionary size of %" PRIuSize, 1.346 + filterName[filter], dictSize); 1.347 + LOG("Compressed size is %" PRIuSize, outBuf.GetLength()); 1.348 + 1.349 + /* Sanity check */ 1.350 + Buffer tmpBuf; 1.351 + SzipDecompress decompress; 1.352 + if (decompress.run("buffer", outBuf, "buffer", tmpBuf)) 1.353 + return 1; 1.354 + 1.355 + size_t size = tmpBuf.GetLength(); 1.356 + if (size != origSize) { 1.357 + LOG("Compression error: %" PRIuSize " != %" PRIuSize, size, origSize); 1.358 + return 1; 1.359 + } 1.360 + if (memcmp(static_cast<void *>(origBuf), static_cast<void *>(tmpBuf), size)) { 1.361 + LOG("Compression error: content mismatch"); 1.362 + return 1; 1.363 + } 1.364 + return 0; 1.365 +} 1.366 + 1.367 +int SzipCompress::do_compress(Buffer &origBuf, Buffer &outBuf, 1.368 + const unsigned char *aDict, size_t aDictSize, 1.369 + SeekableZStream::FilterId aFilter) 1.370 +{ 1.371 + size_t origSize = origBuf.GetLength(); 1.372 + MOZ_ASSERT(origSize != 0); 1.373 + 1.374 + /* Expected total number of chunks */ 1.375 + size_t nChunks = ((origSize + chunkSize - 1) / chunkSize); 1.376 + 1.377 + /* The first chunk is going to be stored after the header, the dictionary 1.378 + * and the offset table */ 1.379 + size_t offset = sizeof(SeekableZStreamHeader) + aDictSize 1.380 + + nChunks * sizeof(uint32_t); 1.381 + 1.382 + if (offset >= origSize) 1.383 + return 1; 1.384 + 1.385 + /* Allocate a buffer the size of the uncompressed data: we don't want 1.386 + * a compressed file larger than that anyways. */ 1.387 + if (!outBuf.Resize(origSize)) { 1.388 + LOG("Couldn't allocate output buffer: %s", strerror(errno)); 1.389 + return 1; 1.390 + } 1.391 + 1.392 + SeekableZStreamHeader *header = new (outBuf) SeekableZStreamHeader; 1.393 + unsigned char *dictionary = static_cast<unsigned char *>( 1.394 + outBuf + sizeof(SeekableZStreamHeader)); 1.395 + le_uint32 *entry = 1.396 + reinterpret_cast<le_uint32 *>(dictionary + aDictSize); 1.397 + 1.398 + /* Initialize header */ 1.399 + header->chunkSize = chunkSize; 1.400 + header->dictSize = aDictSize; 1.401 + header->totalSize = offset; 1.402 + header->windowBits = -SzipCompress::winSizeLog; // Raw stream, 1.403 + // window size of 32k. 1.404 + header->filter = aFilter; 1.405 + if (aDictSize) 1.406 + memcpy(dictionary, aDict, aDictSize); 1.407 + 1.408 + /* Initialize zlib structure */ 1.409 + z_stream zStream; 1.410 + memset(&zStream, 0, sizeof(zStream)); 1.411 + zStream.avail_out = origSize - offset; 1.412 + zStream.next_out = static_cast<Bytef*>(outBuf) + offset; 1.413 + 1.414 + size_t avail = 0; 1.415 + size_t size = origSize; 1.416 + unsigned char *data = reinterpret_cast<unsigned char *>( 1.417 + static_cast<void *>(origBuf)); 1.418 + while (size) { 1.419 + avail = std::min(size, chunkSize); 1.420 + 1.421 + /* Compress chunk */ 1.422 + int ret = deflateInit2(&zStream, 9, Z_DEFLATED, header->windowBits, 1.423 + MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY); 1.424 + if (aDictSize) 1.425 + deflateSetDictionary(&zStream, dictionary, aDictSize); 1.426 + MOZ_ASSERT(ret == Z_OK); 1.427 + zStream.avail_in = avail; 1.428 + zStream.next_in = data; 1.429 + ret = deflate(&zStream, Z_FINISH); 1.430 + MOZ_ASSERT(ret == Z_STREAM_END); 1.431 + ret = deflateEnd(&zStream); 1.432 + MOZ_ASSERT(ret == Z_OK); 1.433 + if (zStream.avail_out <= 0) 1.434 + return 1; 1.435 + 1.436 + size_t len = origSize - offset - zStream.avail_out; 1.437 + 1.438 + /* Adjust headers */ 1.439 + header->totalSize += len; 1.440 + *entry++ = offset; 1.441 + header->nChunks++; 1.442 + 1.443 + /* Prepare for next iteration */ 1.444 + size -= avail; 1.445 + data += avail; 1.446 + offset += len; 1.447 + } 1.448 + header->lastChunkSize = avail; 1.449 + MOZ_ASSERT(header->totalSize == offset); 1.450 + MOZ_ASSERT(header->nChunks == nChunks); 1.451 + 1.452 + if (!outBuf.Resize(offset)) { 1.453 + LOG("Error truncating output: %s", strerror(errno)); 1.454 + return 1; 1.455 + } 1.456 + 1.457 + return 0; 1.458 + 1.459 +} 1.460 + 1.461 +bool GetSize(const char *str, size_t *out) 1.462 +{ 1.463 + char *end; 1.464 + MOZ_ASSERT(out); 1.465 + errno = 0; 1.466 + *out = strtol(str, &end, 10); 1.467 + return (!errno && !*end); 1.468 +} 1.469 + 1.470 +int main(int argc, char* argv[]) 1.471 +{ 1.472 + mozilla::ScopedDeletePtr<SzipAction> action; 1.473 + char **firstArg; 1.474 + bool compress = true; 1.475 + size_t chunkSize = 0; 1.476 + SeekableZStream::FilterId filter = SzipCompress::DEFAULT_FILTER; 1.477 + size_t dictSize = (size_t) 0; 1.478 + 1.479 + Logging::Init(); 1.480 + 1.481 + for (firstArg = &argv[1]; argc > 2; argc--, firstArg++) { 1.482 + if (!firstArg[0] || firstArg[0][0] != '-') 1.483 + break; 1.484 + if (strcmp(firstArg[0], "-d") == 0) { 1.485 + compress = false; 1.486 + } else if (strcmp(firstArg[0], "-c") == 0) { 1.487 + firstArg++; 1.488 + argc--; 1.489 + if (!firstArg[0]) 1.490 + break; 1.491 + if (!GetSize(firstArg[0], &chunkSize) || !chunkSize || 1.492 + (chunkSize % 4096) || (chunkSize > maxChunkSize)) { 1.493 + LOG("Invalid chunk size"); 1.494 + return 1; 1.495 + } 1.496 + } else if (strcmp(firstArg[0], "-f") == 0) { 1.497 + firstArg++; 1.498 + argc--; 1.499 + if (!firstArg[0]) 1.500 + break; 1.501 + bool matched = false; 1.502 + for (unsigned int i = 0; i < sizeof(filterName) / sizeof(char *); ++i) { 1.503 + if (strcmp(firstArg[0], filterName[i]) == 0) { 1.504 + filter = static_cast<SeekableZStream::FilterId>(i); 1.505 + matched = true; 1.506 + break; 1.507 + } 1.508 + } 1.509 + if (!matched) { 1.510 + LOG("Invalid filter"); 1.511 + return 1; 1.512 + } 1.513 + } else if (strcmp(firstArg[0], "-D") == 0) { 1.514 + firstArg++; 1.515 + argc--; 1.516 + if (!firstArg[0]) 1.517 + break; 1.518 + if (strcmp(firstArg[0], "auto") == 0) { 1.519 + dictSize = -1; 1.520 + } else if (!GetSize(firstArg[0], &dictSize) || (dictSize >= 1 << 16)) { 1.521 + LOG("Invalid dictionary size"); 1.522 + return 1; 1.523 + } 1.524 + } 1.525 + } 1.526 + 1.527 + if (argc != 2 || !firstArg[0]) { 1.528 + LOG("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] [-D DICTSIZE] file", 1.529 + argv[0]); 1.530 + return 1; 1.531 + } 1.532 + 1.533 + if (compress) { 1.534 + action = new SzipCompress(chunkSize, filter, dictSize); 1.535 + } else { 1.536 + if (chunkSize) { 1.537 + LOG("-c is incompatible with -d"); 1.538 + return 1; 1.539 + } 1.540 + if (dictSize) { 1.541 + LOG("-D is incompatible with -d"); 1.542 + return 1; 1.543 + } 1.544 + action = new SzipDecompress(); 1.545 + } 1.546 + 1.547 + std::stringstream tmpOutStream; 1.548 + tmpOutStream << firstArg[0] << ".sz." << getpid(); 1.549 + std::string tmpOut(tmpOutStream.str()); 1.550 + int ret; 1.551 + struct stat st; 1.552 + { 1.553 + FileBuffer origBuf; 1.554 + if (!origBuf.Init(firstArg[0])) { 1.555 + LOG("Couldn't open %s: %s", firstArg[0], strerror(errno)); 1.556 + return 1; 1.557 + } 1.558 + 1.559 + ret = fstat(origBuf.getFd(), &st); 1.560 + if (ret == -1) { 1.561 + LOG("Couldn't stat %s: %s", firstArg[0], strerror(errno)); 1.562 + return 1; 1.563 + } 1.564 + 1.565 + size_t origSize = st.st_size; 1.566 + 1.567 + /* Mmap the original file */ 1.568 + if (!origBuf.Resize(origSize)) { 1.569 + LOG("Couldn't mmap %s: %s", firstArg[0], strerror(errno)); 1.570 + return 1; 1.571 + } 1.572 + 1.573 + /* Create the compressed file */ 1.574 + FileBuffer outBuf; 1.575 + if (!outBuf.Init(tmpOut.c_str(), true)) { 1.576 + LOG("Couldn't open %s: %s", tmpOut.c_str(), strerror(errno)); 1.577 + return 1; 1.578 + } 1.579 + 1.580 + ret = action->run(firstArg[0], origBuf, tmpOut.c_str(), outBuf); 1.581 + if ((ret == 0) && (fstat(outBuf.getFd(), &st) == -1)) { 1.582 + st.st_size = 0; 1.583 + } 1.584 + } 1.585 + 1.586 + if ((ret == 0) && st.st_size) { 1.587 + rename(tmpOut.c_str(), firstArg[0]); 1.588 + } else { 1.589 + unlink(tmpOut.c_str()); 1.590 + } 1.591 + return ret; 1.592 +}