mozglue/linker/szip.cpp

branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
equal deleted inserted replaced
-1:000000000000 0:cd4e4f222c4b
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
3 * You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include <algorithm>
6 #include <map>
7 #include <sys/stat.h>
8 #include <string>
9 #include <sstream>
10 #include <cstring>
11 #include <cstdlib>
12 #include <zlib.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include "mozilla/Assertions.h"
16 #include "mozilla/Scoped.h"
17 #include "SeekableZStream.h"
18 #include "Utils.h"
19 #include "Logging.h"
20
21 Logging Logging::Singleton;
22
23 const char *filterName[] = {
24 "none",
25 "thumb",
26 "arm",
27 "x86",
28 "auto"
29 };
30
31 /* Maximum supported size for chunkSize */
32 static const size_t maxChunkSize =
33 1 << (8 * std::min(sizeof(((SeekableZStreamHeader *)nullptr)->chunkSize),
34 sizeof(((SeekableZStreamHeader *)nullptr)->lastChunkSize)) - 1);
35
36 class Buffer: public MappedPtr
37 {
38 public:
39 virtual ~Buffer() { }
40
41 virtual bool Resize(size_t size)
42 {
43 MemoryRange buf = mmap(nullptr, size, PROT_READ | PROT_WRITE,
44 MAP_PRIVATE | MAP_ANON, -1, 0);
45 if (buf == MAP_FAILED)
46 return false;
47 if (*this != MAP_FAILED)
48 memcpy(buf, *this, std::min(size, GetLength()));
49 Assign(buf);
50 return true;
51 }
52
53 bool Fill(Buffer &other)
54 {
55 size_t size = other.GetLength();
56 if (!size || !Resize(size))
57 return false;
58 memcpy(static_cast<void *>(*this), static_cast<void *>(other), size);
59 return true;
60 }
61 };
62
63 class FileBuffer: public Buffer
64 {
65 public:
66 bool Init(const char *name, bool writable_ = false)
67 {
68 fd = open(name, writable_ ? O_RDWR | O_CREAT | O_TRUNC : O_RDONLY, 0666);
69 if (fd == -1)
70 return false;
71 writable = writable_;
72 return true;
73 }
74
75 virtual bool Resize(size_t size)
76 {
77 if (writable) {
78 if (ftruncate(fd, size) == -1)
79 return false;
80 }
81 Assign(MemoryRange::mmap(nullptr, size,
82 PROT_READ | (writable ? PROT_WRITE : 0),
83 writable ? MAP_SHARED : MAP_PRIVATE, fd, 0));
84 return this != MAP_FAILED;
85 }
86
87 int getFd()
88 {
89 return fd;
90 }
91
92 private:
93 AutoCloseFD fd;
94 bool writable;
95 };
96
97 class FilteredBuffer: public Buffer
98 {
99 public:
100 void Filter(Buffer &other, SeekableZStream::FilterId filter, size_t chunkSize)
101 {
102 SeekableZStream::ZStreamFilter filterCB =
103 SeekableZStream::GetFilter(filter);
104 MOZ_ASSERT(filterCB);
105 Fill(other);
106 size_t size = other.GetLength();
107 Bytef *data = reinterpret_cast<Bytef *>(static_cast<void *>(*this));
108 size_t avail = 0;
109 /* Filter needs to be applied in chunks. */
110 while (size) {
111 avail = std::min(size, chunkSize);
112 filterCB(data - static_cast<unsigned char *>(static_cast<void *>(*this)),
113 SeekableZStream::FILTER, data, avail);
114 size -= avail;
115 data += avail;
116 }
117 }
118 };
119
120 template <typename T>
121 class Dictionary: public Buffer
122 {
123 typedef T piece;
124 typedef std::pair<piece, int> stat_pair;
125
126 static bool stat_cmp(stat_pair a, stat_pair b)
127 {
128 return a.second < b.second;
129 }
130
131 public:
132 Dictionary(Buffer &inBuf, size_t size)
133 {
134 if (!size || !Resize(size))
135 return;
136 DEBUG_LOG("Creating dictionary");
137 piece *origBufPieces = reinterpret_cast<piece *>(
138 static_cast<void *>(inBuf));
139 std::map<piece, int> stats;
140 for (unsigned int i = 0; i < inBuf.GetLength() / sizeof(piece); i++) {
141 stats[origBufPieces[i]]++;
142 }
143 std::vector<stat_pair> statsVec(stats.begin(), stats.end());
144 std::sort(statsVec.begin(), statsVec.end(), stat_cmp);
145
146 piece *dictPieces = reinterpret_cast<piece *>(
147 static_cast<void *>(*this));
148 typename std::vector<stat_pair>::reverse_iterator it = statsVec.rbegin();
149 for (int i = size / sizeof(piece); i > 0 && it < statsVec.rend();
150 i--, ++it) {
151 dictPieces[i - 1] = it->first;
152 }
153 }
154 };
155
156 class SzipAction
157 {
158 public:
159 virtual int run(const char *name, Buffer &origBuf,
160 const char *outName, Buffer &outBuf) = 0;
161
162 virtual ~SzipAction() {}
163 };
164
165 class SzipDecompress: public SzipAction
166 {
167 public:
168 int run(const char *name, Buffer &origBuf,
169 const char *outName, Buffer &outBuf);
170 };
171
172
173 class SzipCompress: public SzipAction
174 {
175 public:
176 int run(const char *name, Buffer &origBuf,
177 const char *outName, Buffer &outBuf);
178
179 SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter,
180 size_t aDictSize)
181 : chunkSize(aChunkSize ? aChunkSize : 16384)
182 , filter(aFilter)
183 , dictSize(aDictSize)
184 {}
185
186 const static signed char winSizeLog = 15;
187 const static size_t winSize = 1 << winSizeLog;
188
189 const static SeekableZStream::FilterId DEFAULT_FILTER =
190 #if defined(TARGET_THUMB)
191 SeekableZStream::BCJ_THUMB;
192 #elif defined(TARGET_ARM)
193 SeekableZStream::BCJ_ARM;
194 #elif defined(TARGET_X86)
195 SeekableZStream::BCJ_X86;
196 #else
197 SeekableZStream::NONE;
198 #endif
199
200 private:
201
202 int do_compress(Buffer &origBuf, Buffer &outBuf, const unsigned char *aDict,
203 size_t aDictSize, SeekableZStream::FilterId aFilter);
204
205 size_t chunkSize;
206 SeekableZStream::FilterId filter;
207 size_t dictSize;
208 };
209
210 /* Decompress a seekable compressed stream */
211 int SzipDecompress::run(const char *name, Buffer &origBuf,
212 const char *outName, Buffer &outBuf)
213 {
214 size_t origSize = origBuf.GetLength();
215 if (origSize < sizeof(SeekableZStreamHeader)) {
216 LOG("%s is not compressed", name);
217 return 0;
218 }
219
220 SeekableZStream zstream;
221 if (!zstream.Init(origBuf, origSize))
222 return 0;
223
224 size_t size = zstream.GetUncompressedSize();
225
226 /* Give enough room for the uncompressed data */
227 if (!outBuf.Resize(size)) {
228 LOG("Error resizing %s: %s", outName, strerror(errno));
229 return 1;
230 }
231
232 if (!zstream.Decompress(outBuf, 0, size))
233 return 1;
234
235 return 0;
236 }
237
238 /* Generate a seekable compressed stream. */
239 int SzipCompress::run(const char *name, Buffer &origBuf,
240 const char *outName, Buffer &outBuf)
241 {
242 size_t origSize = origBuf.GetLength();
243 if (origSize == 0) {
244 LOG("Won't compress %s: it's empty", name);
245 return 1;
246 }
247 if (SeekableZStreamHeader::validate(origBuf)) {
248 LOG("Skipping %s: it's already a szip", name);
249 return 0;
250 }
251 bool compressed = false;
252 LOG("Size = %" PRIuSize, origSize);
253
254 /* Allocate a buffer the size of the uncompressed data: we don't want
255 * a compressed file larger than that anyways. */
256 if (!outBuf.Resize(origSize)) {
257 LOG("Couldn't allocate output buffer: %s", strerror(errno));
258 return 1;
259 }
260
261 /* Find the most appropriate filter */
262 SeekableZStream::FilterId firstFilter, lastFilter;
263 bool scanFilters;
264 if (filter == SeekableZStream::FILTER_MAX) {
265 firstFilter = SeekableZStream::NONE;
266 lastFilter = SeekableZStream::FILTER_MAX;
267 scanFilters = true;
268 } else {
269 firstFilter = lastFilter = filter;
270 ++lastFilter;
271 scanFilters = false;
272 }
273
274 mozilla::ScopedDeletePtr<Buffer> filteredBuf;
275 Buffer *origData;
276 for (SeekableZStream::FilterId f = firstFilter; f < lastFilter; ++f) {
277 FilteredBuffer *filteredTmp = nullptr;
278 Buffer tmpBuf;
279 if (f != SeekableZStream::NONE) {
280 DEBUG_LOG("Applying filter \"%s\"", filterName[f]);
281 filteredTmp = new FilteredBuffer();
282 filteredTmp->Filter(origBuf, f, chunkSize);
283 origData = filteredTmp;
284 } else {
285 origData = &origBuf;
286 }
287 if (dictSize && !scanFilters) {
288 filteredBuf = filteredTmp;
289 break;
290 }
291 DEBUG_LOG("Compressing with no dictionary");
292 if (do_compress(*origData, tmpBuf, nullptr, 0, f) == 0) {
293 if (tmpBuf.GetLength() < outBuf.GetLength()) {
294 outBuf.Fill(tmpBuf);
295 compressed = true;
296 filter = f;
297 filteredBuf = filteredTmp;
298 continue;
299 }
300 }
301 delete filteredTmp;
302 }
303
304 origData = filteredBuf ? filteredBuf : &origBuf;
305
306 if (dictSize) {
307 Dictionary<uint64_t> dict(*origData, dictSize ? SzipCompress::winSize : 0);
308
309 /* Find the most appropriate dictionary size */
310 size_t firstDictSize, lastDictSize;
311 if (dictSize == (size_t) -1) {
312 /* If we scanned for filters, we effectively already tried dictSize=0 */
313 firstDictSize = scanFilters ? 4096 : 0;
314 lastDictSize = SzipCompress::winSize;
315 } else {
316 firstDictSize = lastDictSize = dictSize;
317 }
318
319 Buffer tmpBuf;
320 for (size_t d = firstDictSize; d <= lastDictSize; d += 4096) {
321 DEBUG_LOG("Compressing with dictionary of size %" PRIuSize, d);
322 if (do_compress(*origData, tmpBuf, static_cast<unsigned char *>(dict)
323 + SzipCompress::winSize - d, d, filter))
324 continue;
325 if (!compressed || tmpBuf.GetLength() < outBuf.GetLength()) {
326 outBuf.Fill(tmpBuf);
327 compressed = true;
328 dictSize = d;
329 }
330 }
331 }
332
333 if (!compressed) {
334 outBuf.Fill(origBuf);
335 LOG("Not compressed");
336 return 0;
337 }
338
339 if (dictSize == (size_t) -1)
340 dictSize = 0;
341
342 DEBUG_LOG("Used filter \"%s\" and dictionary size of %" PRIuSize,
343 filterName[filter], dictSize);
344 LOG("Compressed size is %" PRIuSize, outBuf.GetLength());
345
346 /* Sanity check */
347 Buffer tmpBuf;
348 SzipDecompress decompress;
349 if (decompress.run("buffer", outBuf, "buffer", tmpBuf))
350 return 1;
351
352 size_t size = tmpBuf.GetLength();
353 if (size != origSize) {
354 LOG("Compression error: %" PRIuSize " != %" PRIuSize, size, origSize);
355 return 1;
356 }
357 if (memcmp(static_cast<void *>(origBuf), static_cast<void *>(tmpBuf), size)) {
358 LOG("Compression error: content mismatch");
359 return 1;
360 }
361 return 0;
362 }
363
364 int SzipCompress::do_compress(Buffer &origBuf, Buffer &outBuf,
365 const unsigned char *aDict, size_t aDictSize,
366 SeekableZStream::FilterId aFilter)
367 {
368 size_t origSize = origBuf.GetLength();
369 MOZ_ASSERT(origSize != 0);
370
371 /* Expected total number of chunks */
372 size_t nChunks = ((origSize + chunkSize - 1) / chunkSize);
373
374 /* The first chunk is going to be stored after the header, the dictionary
375 * and the offset table */
376 size_t offset = sizeof(SeekableZStreamHeader) + aDictSize
377 + nChunks * sizeof(uint32_t);
378
379 if (offset >= origSize)
380 return 1;
381
382 /* Allocate a buffer the size of the uncompressed data: we don't want
383 * a compressed file larger than that anyways. */
384 if (!outBuf.Resize(origSize)) {
385 LOG("Couldn't allocate output buffer: %s", strerror(errno));
386 return 1;
387 }
388
389 SeekableZStreamHeader *header = new (outBuf) SeekableZStreamHeader;
390 unsigned char *dictionary = static_cast<unsigned char *>(
391 outBuf + sizeof(SeekableZStreamHeader));
392 le_uint32 *entry =
393 reinterpret_cast<le_uint32 *>(dictionary + aDictSize);
394
395 /* Initialize header */
396 header->chunkSize = chunkSize;
397 header->dictSize = aDictSize;
398 header->totalSize = offset;
399 header->windowBits = -SzipCompress::winSizeLog; // Raw stream,
400 // window size of 32k.
401 header->filter = aFilter;
402 if (aDictSize)
403 memcpy(dictionary, aDict, aDictSize);
404
405 /* Initialize zlib structure */
406 z_stream zStream;
407 memset(&zStream, 0, sizeof(zStream));
408 zStream.avail_out = origSize - offset;
409 zStream.next_out = static_cast<Bytef*>(outBuf) + offset;
410
411 size_t avail = 0;
412 size_t size = origSize;
413 unsigned char *data = reinterpret_cast<unsigned char *>(
414 static_cast<void *>(origBuf));
415 while (size) {
416 avail = std::min(size, chunkSize);
417
418 /* Compress chunk */
419 int ret = deflateInit2(&zStream, 9, Z_DEFLATED, header->windowBits,
420 MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
421 if (aDictSize)
422 deflateSetDictionary(&zStream, dictionary, aDictSize);
423 MOZ_ASSERT(ret == Z_OK);
424 zStream.avail_in = avail;
425 zStream.next_in = data;
426 ret = deflate(&zStream, Z_FINISH);
427 MOZ_ASSERT(ret == Z_STREAM_END);
428 ret = deflateEnd(&zStream);
429 MOZ_ASSERT(ret == Z_OK);
430 if (zStream.avail_out <= 0)
431 return 1;
432
433 size_t len = origSize - offset - zStream.avail_out;
434
435 /* Adjust headers */
436 header->totalSize += len;
437 *entry++ = offset;
438 header->nChunks++;
439
440 /* Prepare for next iteration */
441 size -= avail;
442 data += avail;
443 offset += len;
444 }
445 header->lastChunkSize = avail;
446 MOZ_ASSERT(header->totalSize == offset);
447 MOZ_ASSERT(header->nChunks == nChunks);
448
449 if (!outBuf.Resize(offset)) {
450 LOG("Error truncating output: %s", strerror(errno));
451 return 1;
452 }
453
454 return 0;
455
456 }
457
458 bool GetSize(const char *str, size_t *out)
459 {
460 char *end;
461 MOZ_ASSERT(out);
462 errno = 0;
463 *out = strtol(str, &end, 10);
464 return (!errno && !*end);
465 }
466
467 int main(int argc, char* argv[])
468 {
469 mozilla::ScopedDeletePtr<SzipAction> action;
470 char **firstArg;
471 bool compress = true;
472 size_t chunkSize = 0;
473 SeekableZStream::FilterId filter = SzipCompress::DEFAULT_FILTER;
474 size_t dictSize = (size_t) 0;
475
476 Logging::Init();
477
478 for (firstArg = &argv[1]; argc > 2; argc--, firstArg++) {
479 if (!firstArg[0] || firstArg[0][0] != '-')
480 break;
481 if (strcmp(firstArg[0], "-d") == 0) {
482 compress = false;
483 } else if (strcmp(firstArg[0], "-c") == 0) {
484 firstArg++;
485 argc--;
486 if (!firstArg[0])
487 break;
488 if (!GetSize(firstArg[0], &chunkSize) || !chunkSize ||
489 (chunkSize % 4096) || (chunkSize > maxChunkSize)) {
490 LOG("Invalid chunk size");
491 return 1;
492 }
493 } else if (strcmp(firstArg[0], "-f") == 0) {
494 firstArg++;
495 argc--;
496 if (!firstArg[0])
497 break;
498 bool matched = false;
499 for (unsigned int i = 0; i < sizeof(filterName) / sizeof(char *); ++i) {
500 if (strcmp(firstArg[0], filterName[i]) == 0) {
501 filter = static_cast<SeekableZStream::FilterId>(i);
502 matched = true;
503 break;
504 }
505 }
506 if (!matched) {
507 LOG("Invalid filter");
508 return 1;
509 }
510 } else if (strcmp(firstArg[0], "-D") == 0) {
511 firstArg++;
512 argc--;
513 if (!firstArg[0])
514 break;
515 if (strcmp(firstArg[0], "auto") == 0) {
516 dictSize = -1;
517 } else if (!GetSize(firstArg[0], &dictSize) || (dictSize >= 1 << 16)) {
518 LOG("Invalid dictionary size");
519 return 1;
520 }
521 }
522 }
523
524 if (argc != 2 || !firstArg[0]) {
525 LOG("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] [-D DICTSIZE] file",
526 argv[0]);
527 return 1;
528 }
529
530 if (compress) {
531 action = new SzipCompress(chunkSize, filter, dictSize);
532 } else {
533 if (chunkSize) {
534 LOG("-c is incompatible with -d");
535 return 1;
536 }
537 if (dictSize) {
538 LOG("-D is incompatible with -d");
539 return 1;
540 }
541 action = new SzipDecompress();
542 }
543
544 std::stringstream tmpOutStream;
545 tmpOutStream << firstArg[0] << ".sz." << getpid();
546 std::string tmpOut(tmpOutStream.str());
547 int ret;
548 struct stat st;
549 {
550 FileBuffer origBuf;
551 if (!origBuf.Init(firstArg[0])) {
552 LOG("Couldn't open %s: %s", firstArg[0], strerror(errno));
553 return 1;
554 }
555
556 ret = fstat(origBuf.getFd(), &st);
557 if (ret == -1) {
558 LOG("Couldn't stat %s: %s", firstArg[0], strerror(errno));
559 return 1;
560 }
561
562 size_t origSize = st.st_size;
563
564 /* Mmap the original file */
565 if (!origBuf.Resize(origSize)) {
566 LOG("Couldn't mmap %s: %s", firstArg[0], strerror(errno));
567 return 1;
568 }
569
570 /* Create the compressed file */
571 FileBuffer outBuf;
572 if (!outBuf.Init(tmpOut.c_str(), true)) {
573 LOG("Couldn't open %s: %s", tmpOut.c_str(), strerror(errno));
574 return 1;
575 }
576
577 ret = action->run(firstArg[0], origBuf, tmpOut.c_str(), outBuf);
578 if ((ret == 0) && (fstat(outBuf.getFd(), &st) == -1)) {
579 st.st_size = 0;
580 }
581 }
582
583 if ((ret == 0) && st.st_size) {
584 rename(tmpOut.c_str(), firstArg[0]);
585 } else {
586 unlink(tmpOut.c_str());
587 }
588 return ret;
589 }

mercurial