mozglue/linker/szip.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /* This Source Code Form is subject to the terms of the Mozilla Public
     2  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
     3  * You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include <algorithm>
     6 #include <map>
     7 #include <sys/stat.h>
     8 #include <string>
     9 #include <sstream>
    10 #include <cstring>
    11 #include <cstdlib>
    12 #include <zlib.h>
    13 #include <fcntl.h>
    14 #include <errno.h>
    15 #include "mozilla/Assertions.h"
    16 #include "mozilla/Scoped.h"
    17 #include "SeekableZStream.h"
    18 #include "Utils.h"
    19 #include "Logging.h"
    21 Logging Logging::Singleton;
    23 const char *filterName[] = {
    24   "none",
    25   "thumb",
    26   "arm",
    27   "x86",
    28   "auto"
    29 };
    31 /* Maximum supported size for chunkSize */
    32 static const size_t maxChunkSize =
    33   1 << (8 * std::min(sizeof(((SeekableZStreamHeader *)nullptr)->chunkSize),
    34                      sizeof(((SeekableZStreamHeader *)nullptr)->lastChunkSize)) - 1);
    36 class Buffer: public MappedPtr
    37 {
    38 public:
    39   virtual ~Buffer() { }
    41   virtual bool Resize(size_t size)
    42   {
    43     MemoryRange buf = mmap(nullptr, size, PROT_READ | PROT_WRITE,
    44                            MAP_PRIVATE | MAP_ANON, -1, 0);
    45     if (buf == MAP_FAILED)
    46       return false;
    47     if (*this != MAP_FAILED)
    48       memcpy(buf, *this, std::min(size, GetLength()));
    49     Assign(buf);
    50     return true;
    51   }
    53   bool Fill(Buffer &other)
    54   {
    55     size_t size = other.GetLength();
    56     if (!size || !Resize(size))
    57       return false;
    58     memcpy(static_cast<void *>(*this), static_cast<void *>(other), size);
    59     return true;
    60   }
    61 };
    63 class FileBuffer: public Buffer
    64 {
    65 public:
    66   bool Init(const char *name, bool writable_ = false)
    67   {
    68     fd = open(name, writable_ ? O_RDWR | O_CREAT | O_TRUNC : O_RDONLY, 0666);
    69     if (fd == -1)
    70       return false;
    71     writable = writable_;
    72     return true;
    73   }
    75   virtual bool Resize(size_t size)
    76   {
    77     if (writable) {
    78       if (ftruncate(fd, size) == -1)
    79         return false;
    80     }
    81     Assign(MemoryRange::mmap(nullptr, size,
    82                              PROT_READ | (writable ? PROT_WRITE : 0),
    83                              writable ? MAP_SHARED : MAP_PRIVATE, fd, 0));
    84     return this != MAP_FAILED;
    85   }
    87   int getFd()
    88   {
    89     return fd;
    90   }
    92 private:
    93   AutoCloseFD fd;
    94   bool writable;
    95 };
    97 class FilteredBuffer: public Buffer
    98 {
    99 public:
   100   void Filter(Buffer &other, SeekableZStream::FilterId filter, size_t chunkSize)
   101   {
   102     SeekableZStream::ZStreamFilter filterCB =
   103       SeekableZStream::GetFilter(filter);
   104     MOZ_ASSERT(filterCB);
   105     Fill(other);
   106     size_t size = other.GetLength();
   107     Bytef *data = reinterpret_cast<Bytef *>(static_cast<void *>(*this));
   108     size_t avail = 0;
   109     /* Filter needs to be applied in chunks. */
   110     while (size) {
   111       avail = std::min(size, chunkSize);
   112       filterCB(data - static_cast<unsigned char *>(static_cast<void *>(*this)),
   113                SeekableZStream::FILTER, data, avail);
   114       size -= avail;
   115       data += avail;
   116     }
   117   }
   118 };
   120 template <typename T>
   121 class Dictionary: public Buffer
   122 {
   123   typedef T piece;
   124   typedef std::pair<piece, int> stat_pair;
   126   static bool stat_cmp(stat_pair a, stat_pair b)
   127   {
   128     return a.second < b.second;
   129   }
   131 public:
   132   Dictionary(Buffer &inBuf, size_t size)
   133   {
   134     if (!size || !Resize(size))
   135       return;
   136     DEBUG_LOG("Creating dictionary");
   137     piece *origBufPieces = reinterpret_cast<piece *>(
   138                            static_cast<void *>(inBuf));
   139     std::map<piece, int> stats;
   140     for (unsigned int i = 0; i < inBuf.GetLength() / sizeof(piece); i++) {
   141       stats[origBufPieces[i]]++;
   142     }
   143     std::vector<stat_pair> statsVec(stats.begin(), stats.end());
   144     std::sort(statsVec.begin(), statsVec.end(), stat_cmp);
   146     piece *dictPieces = reinterpret_cast<piece *>(
   147                         static_cast<void *>(*this));
   148     typename std::vector<stat_pair>::reverse_iterator it = statsVec.rbegin();
   149     for (int i = size / sizeof(piece); i > 0 && it < statsVec.rend();
   150          i--, ++it) {
   151       dictPieces[i - 1] = it->first;
   152     }
   153   }
   154 };
   156 class SzipAction
   157 {
   158 public:
   159   virtual int run(const char *name, Buffer &origBuf,
   160                   const char *outName, Buffer &outBuf) = 0;
   162   virtual ~SzipAction() {}
   163 };
   165 class SzipDecompress: public SzipAction
   166 {
   167 public:
   168   int run(const char *name, Buffer &origBuf,
   169           const char *outName, Buffer &outBuf);
   170 };
   173 class SzipCompress: public SzipAction
   174 {
   175 public:
   176   int run(const char *name, Buffer &origBuf,
   177           const char *outName, Buffer &outBuf);
   179   SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter,
   180                size_t aDictSize)
   181   : chunkSize(aChunkSize ? aChunkSize : 16384)
   182   , filter(aFilter)
   183   , dictSize(aDictSize)
   184   {}
   186   const static signed char winSizeLog = 15;
   187   const static size_t winSize = 1 << winSizeLog;
   189   const static SeekableZStream::FilterId DEFAULT_FILTER =
   190 #if defined(TARGET_THUMB)
   191     SeekableZStream::BCJ_THUMB;
   192 #elif defined(TARGET_ARM)
   193     SeekableZStream::BCJ_ARM;
   194 #elif defined(TARGET_X86)
   195     SeekableZStream::BCJ_X86;
   196 #else
   197     SeekableZStream::NONE;
   198 #endif
   200 private:
   202   int do_compress(Buffer &origBuf, Buffer &outBuf, const unsigned char *aDict,
   203                   size_t aDictSize, SeekableZStream::FilterId aFilter);
   205   size_t chunkSize;
   206   SeekableZStream::FilterId filter;
   207   size_t dictSize;
   208 };
   210 /* Decompress a seekable compressed stream */
   211 int SzipDecompress::run(const char *name, Buffer &origBuf,
   212                         const char *outName, Buffer &outBuf)
   213 {
   214   size_t origSize = origBuf.GetLength();
   215   if (origSize < sizeof(SeekableZStreamHeader)) {
   216     LOG("%s is not compressed", name);
   217     return 0;
   218   }
   220   SeekableZStream zstream;
   221   if (!zstream.Init(origBuf, origSize))
   222     return 0;
   224   size_t size = zstream.GetUncompressedSize();
   226   /* Give enough room for the uncompressed data */
   227   if (!outBuf.Resize(size)) {
   228     LOG("Error resizing %s: %s", outName, strerror(errno));
   229     return 1;
   230   }
   232   if (!zstream.Decompress(outBuf, 0, size))
   233     return 1;
   235   return 0;
   236 }
   238 /* Generate a seekable compressed stream. */
   239 int SzipCompress::run(const char *name, Buffer &origBuf,
   240                       const char *outName, Buffer &outBuf)
   241 {
   242   size_t origSize = origBuf.GetLength();
   243   if (origSize == 0) {
   244     LOG("Won't compress %s: it's empty", name);
   245     return 1;
   246   }
   247   if (SeekableZStreamHeader::validate(origBuf)) {
   248     LOG("Skipping %s: it's already a szip", name);
   249     return 0;
   250   }
   251   bool compressed = false;
   252   LOG("Size = %" PRIuSize, origSize);
   254   /* Allocate a buffer the size of the uncompressed data: we don't want
   255    * a compressed file larger than that anyways. */
   256   if (!outBuf.Resize(origSize)) {
   257     LOG("Couldn't allocate output buffer: %s", strerror(errno));
   258     return 1;
   259   }
   261   /* Find the most appropriate filter */
   262   SeekableZStream::FilterId firstFilter, lastFilter;
   263   bool scanFilters;
   264   if (filter == SeekableZStream::FILTER_MAX) {
   265     firstFilter = SeekableZStream::NONE;
   266     lastFilter = SeekableZStream::FILTER_MAX;
   267     scanFilters = true;
   268   } else {
   269     firstFilter = lastFilter = filter;
   270     ++lastFilter;
   271     scanFilters = false;
   272   }
   274   mozilla::ScopedDeletePtr<Buffer> filteredBuf;
   275   Buffer *origData;
   276   for (SeekableZStream::FilterId f = firstFilter; f < lastFilter; ++f) {
   277     FilteredBuffer *filteredTmp = nullptr;
   278     Buffer tmpBuf;
   279     if (f != SeekableZStream::NONE) {
   280       DEBUG_LOG("Applying filter \"%s\"", filterName[f]);
   281       filteredTmp = new FilteredBuffer();
   282       filteredTmp->Filter(origBuf, f, chunkSize);
   283       origData = filteredTmp;
   284     } else {
   285       origData = &origBuf;
   286     }
   287     if (dictSize  && !scanFilters) {
   288       filteredBuf = filteredTmp;
   289       break;
   290     }
   291     DEBUG_LOG("Compressing with no dictionary");
   292     if (do_compress(*origData, tmpBuf, nullptr, 0, f) == 0) {
   293       if (tmpBuf.GetLength() < outBuf.GetLength()) {
   294         outBuf.Fill(tmpBuf);
   295         compressed = true;
   296         filter = f;
   297         filteredBuf = filteredTmp;
   298         continue;
   299       }
   300     }
   301     delete filteredTmp;
   302   }
   304   origData = filteredBuf ? filteredBuf : &origBuf;
   306   if (dictSize) {
   307     Dictionary<uint64_t> dict(*origData, dictSize ? SzipCompress::winSize : 0);
   309     /* Find the most appropriate dictionary size */
   310     size_t firstDictSize, lastDictSize;
   311     if (dictSize == (size_t) -1) {
   312       /* If we scanned for filters, we effectively already tried dictSize=0 */
   313       firstDictSize = scanFilters ? 4096 : 0;
   314       lastDictSize = SzipCompress::winSize;
   315     } else {
   316       firstDictSize = lastDictSize = dictSize;
   317     }
   319     Buffer tmpBuf;
   320     for (size_t d = firstDictSize; d <= lastDictSize; d += 4096) {
   321       DEBUG_LOG("Compressing with dictionary of size %" PRIuSize, d);
   322       if (do_compress(*origData, tmpBuf, static_cast<unsigned char *>(dict)
   323                       + SzipCompress::winSize - d, d, filter))
   324         continue;
   325       if (!compressed || tmpBuf.GetLength() < outBuf.GetLength()) {
   326         outBuf.Fill(tmpBuf);
   327         compressed = true;
   328         dictSize = d;
   329       }
   330     }
   331   }
   333   if (!compressed) {
   334     outBuf.Fill(origBuf);
   335     LOG("Not compressed");
   336     return 0;
   337   }
   339   if (dictSize == (size_t) -1)
   340     dictSize = 0;
   342   DEBUG_LOG("Used filter \"%s\" and dictionary size of %" PRIuSize,
   343             filterName[filter], dictSize);
   344   LOG("Compressed size is %" PRIuSize, outBuf.GetLength());
   346   /* Sanity check */
   347   Buffer tmpBuf;
   348   SzipDecompress decompress;
   349   if (decompress.run("buffer", outBuf, "buffer", tmpBuf))
   350     return 1;
   352   size_t size = tmpBuf.GetLength();
   353   if (size != origSize) {
   354     LOG("Compression error: %" PRIuSize " != %" PRIuSize, size, origSize);
   355     return 1;
   356   }
   357   if (memcmp(static_cast<void *>(origBuf), static_cast<void *>(tmpBuf), size)) {
   358     LOG("Compression error: content mismatch");
   359     return 1;
   360   }
   361   return 0;
   362 }
   364 int SzipCompress::do_compress(Buffer &origBuf, Buffer &outBuf,
   365                               const unsigned char *aDict, size_t aDictSize,
   366                               SeekableZStream::FilterId aFilter)
   367 {
   368   size_t origSize = origBuf.GetLength();
   369   MOZ_ASSERT(origSize != 0);
   371   /* Expected total number of chunks */
   372   size_t nChunks = ((origSize + chunkSize - 1) / chunkSize);
   374   /* The first chunk is going to be stored after the header, the dictionary
   375    * and the offset table */
   376   size_t offset = sizeof(SeekableZStreamHeader) + aDictSize
   377                   + nChunks * sizeof(uint32_t);
   379   if (offset >= origSize)
   380     return 1;
   382     /* Allocate a buffer the size of the uncompressed data: we don't want
   383    * a compressed file larger than that anyways. */
   384   if (!outBuf.Resize(origSize)) {
   385     LOG("Couldn't allocate output buffer: %s", strerror(errno));
   386     return 1;
   387   }
   389   SeekableZStreamHeader *header = new (outBuf) SeekableZStreamHeader;
   390   unsigned char *dictionary = static_cast<unsigned char *>(
   391                               outBuf + sizeof(SeekableZStreamHeader));
   392   le_uint32 *entry =
   393     reinterpret_cast<le_uint32 *>(dictionary + aDictSize);
   395   /* Initialize header */
   396   header->chunkSize = chunkSize;
   397   header->dictSize = aDictSize;
   398   header->totalSize = offset;
   399   header->windowBits = -SzipCompress::winSizeLog; // Raw stream,
   400                                                   // window size of 32k.
   401   header->filter = aFilter;
   402   if (aDictSize)
   403     memcpy(dictionary, aDict, aDictSize);
   405   /* Initialize zlib structure */
   406   z_stream zStream;
   407   memset(&zStream, 0, sizeof(zStream));
   408   zStream.avail_out = origSize - offset;
   409   zStream.next_out = static_cast<Bytef*>(outBuf) + offset;
   411   size_t avail = 0;
   412   size_t size = origSize;
   413   unsigned char *data = reinterpret_cast<unsigned char *>(
   414                         static_cast<void *>(origBuf));
   415   while (size) {
   416     avail = std::min(size, chunkSize);
   418     /* Compress chunk */
   419     int ret = deflateInit2(&zStream, 9, Z_DEFLATED, header->windowBits,
   420                            MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
   421     if (aDictSize)
   422       deflateSetDictionary(&zStream, dictionary, aDictSize);
   423     MOZ_ASSERT(ret == Z_OK);
   424     zStream.avail_in = avail;
   425     zStream.next_in = data;
   426     ret = deflate(&zStream, Z_FINISH);
   427     MOZ_ASSERT(ret == Z_STREAM_END);
   428     ret = deflateEnd(&zStream);
   429     MOZ_ASSERT(ret == Z_OK);
   430     if (zStream.avail_out <= 0)
   431       return 1;
   433     size_t len = origSize - offset - zStream.avail_out;
   435     /* Adjust headers */
   436     header->totalSize += len;
   437     *entry++ = offset;
   438     header->nChunks++;
   440     /* Prepare for next iteration */
   441     size -= avail;
   442     data += avail;
   443     offset += len;
   444   }
   445   header->lastChunkSize = avail;
   446   MOZ_ASSERT(header->totalSize == offset);
   447   MOZ_ASSERT(header->nChunks == nChunks);
   449   if (!outBuf.Resize(offset)) {
   450     LOG("Error truncating output: %s", strerror(errno));
   451     return 1;
   452   }
   454   return 0;
   456 }
   458 bool GetSize(const char *str, size_t *out)
   459 {
   460   char *end;
   461   MOZ_ASSERT(out);
   462   errno = 0;
   463   *out = strtol(str, &end, 10);
   464   return (!errno && !*end);
   465 }
   467 int main(int argc, char* argv[])
   468 {
   469   mozilla::ScopedDeletePtr<SzipAction> action;
   470   char **firstArg;
   471   bool compress = true;
   472   size_t chunkSize = 0;
   473   SeekableZStream::FilterId filter = SzipCompress::DEFAULT_FILTER;
   474   size_t dictSize = (size_t) 0;
   476   Logging::Init();
   478   for (firstArg = &argv[1]; argc > 2; argc--, firstArg++) {
   479     if (!firstArg[0] || firstArg[0][0] != '-')
   480       break;
   481     if (strcmp(firstArg[0], "-d") == 0) {
   482       compress = false;
   483     } else if (strcmp(firstArg[0], "-c") == 0) {
   484       firstArg++;
   485       argc--;
   486       if (!firstArg[0])
   487         break;
   488       if (!GetSize(firstArg[0], &chunkSize) || !chunkSize ||
   489           (chunkSize % 4096) || (chunkSize > maxChunkSize)) {
   490         LOG("Invalid chunk size");
   491         return 1;
   492       }
   493     } else if (strcmp(firstArg[0], "-f") == 0) {
   494       firstArg++;
   495       argc--;
   496       if (!firstArg[0])
   497         break;
   498       bool matched = false;
   499       for (unsigned int i = 0; i < sizeof(filterName) / sizeof(char *); ++i) {
   500         if (strcmp(firstArg[0], filterName[i]) == 0) {
   501           filter = static_cast<SeekableZStream::FilterId>(i);
   502           matched = true;
   503           break;
   504         }
   505       }
   506       if (!matched) {
   507         LOG("Invalid filter");
   508         return 1;
   509       }
   510     } else if (strcmp(firstArg[0], "-D") == 0) {
   511       firstArg++;
   512       argc--;
   513       if (!firstArg[0])
   514         break;
   515       if (strcmp(firstArg[0], "auto") == 0) {
   516         dictSize = -1;
   517       } else if (!GetSize(firstArg[0], &dictSize) || (dictSize >= 1 << 16)) {
   518         LOG("Invalid dictionary size");
   519         return 1;
   520       }
   521     }
   522   }
   524   if (argc != 2 || !firstArg[0]) {
   525     LOG("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] [-D DICTSIZE] file",
   526         argv[0]);
   527     return 1;
   528   }
   530   if (compress) {
   531     action = new SzipCompress(chunkSize, filter, dictSize);
   532   } else {
   533     if (chunkSize) {
   534       LOG("-c is incompatible with -d");
   535       return 1;
   536     }
   537     if (dictSize) {
   538       LOG("-D is incompatible with -d");
   539       return 1;
   540     }
   541     action = new SzipDecompress();
   542   }
   544   std::stringstream tmpOutStream;
   545   tmpOutStream << firstArg[0] << ".sz." << getpid();
   546   std::string tmpOut(tmpOutStream.str());
   547   int ret;
   548   struct stat st;
   549   {
   550     FileBuffer origBuf;
   551     if (!origBuf.Init(firstArg[0])) {
   552       LOG("Couldn't open %s: %s", firstArg[0], strerror(errno));
   553       return 1;
   554     }
   556     ret = fstat(origBuf.getFd(), &st);
   557     if (ret == -1) {
   558       LOG("Couldn't stat %s: %s", firstArg[0], strerror(errno));
   559       return 1;
   560     }
   562     size_t origSize = st.st_size;
   564     /* Mmap the original file */
   565     if (!origBuf.Resize(origSize)) {
   566       LOG("Couldn't mmap %s: %s", firstArg[0], strerror(errno));
   567       return 1;
   568     }
   570     /* Create the compressed file */
   571     FileBuffer outBuf;
   572     if (!outBuf.Init(tmpOut.c_str(), true)) {
   573       LOG("Couldn't open %s: %s", tmpOut.c_str(), strerror(errno));
   574       return 1;
   575     }
   577     ret = action->run(firstArg[0], origBuf, tmpOut.c_str(), outBuf);
   578     if ((ret == 0) && (fstat(outBuf.getFd(), &st) == -1)) {
   579       st.st_size = 0;
   580     }
   581   }
   583   if ((ret == 0) && st.st_size) {
   584     rename(tmpOut.c_str(), firstArg[0]);
   585   } else {
   586     unlink(tmpOut.c_str());
   587   }
   588   return ret;
   589 }

mercurial