|
1 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
2 * License, v. 2.0. If a copy of the MPL was not distributed with this file, |
|
3 * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
4 |
|
5 #include <algorithm> |
|
6 #include <map> |
|
7 #include <sys/stat.h> |
|
8 #include <string> |
|
9 #include <sstream> |
|
10 #include <cstring> |
|
11 #include <cstdlib> |
|
12 #include <zlib.h> |
|
13 #include <fcntl.h> |
|
14 #include <errno.h> |
|
15 #include "mozilla/Assertions.h" |
|
16 #include "mozilla/Scoped.h" |
|
17 #include "SeekableZStream.h" |
|
18 #include "Utils.h" |
|
19 #include "Logging.h" |
|
20 |
|
21 Logging Logging::Singleton; |
|
22 |
|
23 const char *filterName[] = { |
|
24 "none", |
|
25 "thumb", |
|
26 "arm", |
|
27 "x86", |
|
28 "auto" |
|
29 }; |
|
30 |
|
31 /* Maximum supported size for chunkSize */ |
|
32 static const size_t maxChunkSize = |
|
33 1 << (8 * std::min(sizeof(((SeekableZStreamHeader *)nullptr)->chunkSize), |
|
34 sizeof(((SeekableZStreamHeader *)nullptr)->lastChunkSize)) - 1); |
|
35 |
|
36 class Buffer: public MappedPtr |
|
37 { |
|
38 public: |
|
39 virtual ~Buffer() { } |
|
40 |
|
41 virtual bool Resize(size_t size) |
|
42 { |
|
43 MemoryRange buf = mmap(nullptr, size, PROT_READ | PROT_WRITE, |
|
44 MAP_PRIVATE | MAP_ANON, -1, 0); |
|
45 if (buf == MAP_FAILED) |
|
46 return false; |
|
47 if (*this != MAP_FAILED) |
|
48 memcpy(buf, *this, std::min(size, GetLength())); |
|
49 Assign(buf); |
|
50 return true; |
|
51 } |
|
52 |
|
53 bool Fill(Buffer &other) |
|
54 { |
|
55 size_t size = other.GetLength(); |
|
56 if (!size || !Resize(size)) |
|
57 return false; |
|
58 memcpy(static_cast<void *>(*this), static_cast<void *>(other), size); |
|
59 return true; |
|
60 } |
|
61 }; |
|
62 |
|
63 class FileBuffer: public Buffer |
|
64 { |
|
65 public: |
|
66 bool Init(const char *name, bool writable_ = false) |
|
67 { |
|
68 fd = open(name, writable_ ? O_RDWR | O_CREAT | O_TRUNC : O_RDONLY, 0666); |
|
69 if (fd == -1) |
|
70 return false; |
|
71 writable = writable_; |
|
72 return true; |
|
73 } |
|
74 |
|
75 virtual bool Resize(size_t size) |
|
76 { |
|
77 if (writable) { |
|
78 if (ftruncate(fd, size) == -1) |
|
79 return false; |
|
80 } |
|
81 Assign(MemoryRange::mmap(nullptr, size, |
|
82 PROT_READ | (writable ? PROT_WRITE : 0), |
|
83 writable ? MAP_SHARED : MAP_PRIVATE, fd, 0)); |
|
84 return this != MAP_FAILED; |
|
85 } |
|
86 |
|
87 int getFd() |
|
88 { |
|
89 return fd; |
|
90 } |
|
91 |
|
92 private: |
|
93 AutoCloseFD fd; |
|
94 bool writable; |
|
95 }; |
|
96 |
|
97 class FilteredBuffer: public Buffer |
|
98 { |
|
99 public: |
|
100 void Filter(Buffer &other, SeekableZStream::FilterId filter, size_t chunkSize) |
|
101 { |
|
102 SeekableZStream::ZStreamFilter filterCB = |
|
103 SeekableZStream::GetFilter(filter); |
|
104 MOZ_ASSERT(filterCB); |
|
105 Fill(other); |
|
106 size_t size = other.GetLength(); |
|
107 Bytef *data = reinterpret_cast<Bytef *>(static_cast<void *>(*this)); |
|
108 size_t avail = 0; |
|
109 /* Filter needs to be applied in chunks. */ |
|
110 while (size) { |
|
111 avail = std::min(size, chunkSize); |
|
112 filterCB(data - static_cast<unsigned char *>(static_cast<void *>(*this)), |
|
113 SeekableZStream::FILTER, data, avail); |
|
114 size -= avail; |
|
115 data += avail; |
|
116 } |
|
117 } |
|
118 }; |
|
119 |
|
120 template <typename T> |
|
121 class Dictionary: public Buffer |
|
122 { |
|
123 typedef T piece; |
|
124 typedef std::pair<piece, int> stat_pair; |
|
125 |
|
126 static bool stat_cmp(stat_pair a, stat_pair b) |
|
127 { |
|
128 return a.second < b.second; |
|
129 } |
|
130 |
|
131 public: |
|
132 Dictionary(Buffer &inBuf, size_t size) |
|
133 { |
|
134 if (!size || !Resize(size)) |
|
135 return; |
|
136 DEBUG_LOG("Creating dictionary"); |
|
137 piece *origBufPieces = reinterpret_cast<piece *>( |
|
138 static_cast<void *>(inBuf)); |
|
139 std::map<piece, int> stats; |
|
140 for (unsigned int i = 0; i < inBuf.GetLength() / sizeof(piece); i++) { |
|
141 stats[origBufPieces[i]]++; |
|
142 } |
|
143 std::vector<stat_pair> statsVec(stats.begin(), stats.end()); |
|
144 std::sort(statsVec.begin(), statsVec.end(), stat_cmp); |
|
145 |
|
146 piece *dictPieces = reinterpret_cast<piece *>( |
|
147 static_cast<void *>(*this)); |
|
148 typename std::vector<stat_pair>::reverse_iterator it = statsVec.rbegin(); |
|
149 for (int i = size / sizeof(piece); i > 0 && it < statsVec.rend(); |
|
150 i--, ++it) { |
|
151 dictPieces[i - 1] = it->first; |
|
152 } |
|
153 } |
|
154 }; |
|
155 |
|
156 class SzipAction |
|
157 { |
|
158 public: |
|
159 virtual int run(const char *name, Buffer &origBuf, |
|
160 const char *outName, Buffer &outBuf) = 0; |
|
161 |
|
162 virtual ~SzipAction() {} |
|
163 }; |
|
164 |
|
165 class SzipDecompress: public SzipAction |
|
166 { |
|
167 public: |
|
168 int run(const char *name, Buffer &origBuf, |
|
169 const char *outName, Buffer &outBuf); |
|
170 }; |
|
171 |
|
172 |
|
173 class SzipCompress: public SzipAction |
|
174 { |
|
175 public: |
|
176 int run(const char *name, Buffer &origBuf, |
|
177 const char *outName, Buffer &outBuf); |
|
178 |
|
179 SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter, |
|
180 size_t aDictSize) |
|
181 : chunkSize(aChunkSize ? aChunkSize : 16384) |
|
182 , filter(aFilter) |
|
183 , dictSize(aDictSize) |
|
184 {} |
|
185 |
|
186 const static signed char winSizeLog = 15; |
|
187 const static size_t winSize = 1 << winSizeLog; |
|
188 |
|
189 const static SeekableZStream::FilterId DEFAULT_FILTER = |
|
190 #if defined(TARGET_THUMB) |
|
191 SeekableZStream::BCJ_THUMB; |
|
192 #elif defined(TARGET_ARM) |
|
193 SeekableZStream::BCJ_ARM; |
|
194 #elif defined(TARGET_X86) |
|
195 SeekableZStream::BCJ_X86; |
|
196 #else |
|
197 SeekableZStream::NONE; |
|
198 #endif |
|
199 |
|
200 private: |
|
201 |
|
202 int do_compress(Buffer &origBuf, Buffer &outBuf, const unsigned char *aDict, |
|
203 size_t aDictSize, SeekableZStream::FilterId aFilter); |
|
204 |
|
205 size_t chunkSize; |
|
206 SeekableZStream::FilterId filter; |
|
207 size_t dictSize; |
|
208 }; |
|
209 |
|
210 /* Decompress a seekable compressed stream */ |
|
211 int SzipDecompress::run(const char *name, Buffer &origBuf, |
|
212 const char *outName, Buffer &outBuf) |
|
213 { |
|
214 size_t origSize = origBuf.GetLength(); |
|
215 if (origSize < sizeof(SeekableZStreamHeader)) { |
|
216 LOG("%s is not compressed", name); |
|
217 return 0; |
|
218 } |
|
219 |
|
220 SeekableZStream zstream; |
|
221 if (!zstream.Init(origBuf, origSize)) |
|
222 return 0; |
|
223 |
|
224 size_t size = zstream.GetUncompressedSize(); |
|
225 |
|
226 /* Give enough room for the uncompressed data */ |
|
227 if (!outBuf.Resize(size)) { |
|
228 LOG("Error resizing %s: %s", outName, strerror(errno)); |
|
229 return 1; |
|
230 } |
|
231 |
|
232 if (!zstream.Decompress(outBuf, 0, size)) |
|
233 return 1; |
|
234 |
|
235 return 0; |
|
236 } |
|
237 |
|
238 /* Generate a seekable compressed stream. */ |
|
239 int SzipCompress::run(const char *name, Buffer &origBuf, |
|
240 const char *outName, Buffer &outBuf) |
|
241 { |
|
242 size_t origSize = origBuf.GetLength(); |
|
243 if (origSize == 0) { |
|
244 LOG("Won't compress %s: it's empty", name); |
|
245 return 1; |
|
246 } |
|
247 if (SeekableZStreamHeader::validate(origBuf)) { |
|
248 LOG("Skipping %s: it's already a szip", name); |
|
249 return 0; |
|
250 } |
|
251 bool compressed = false; |
|
252 LOG("Size = %" PRIuSize, origSize); |
|
253 |
|
254 /* Allocate a buffer the size of the uncompressed data: we don't want |
|
255 * a compressed file larger than that anyways. */ |
|
256 if (!outBuf.Resize(origSize)) { |
|
257 LOG("Couldn't allocate output buffer: %s", strerror(errno)); |
|
258 return 1; |
|
259 } |
|
260 |
|
261 /* Find the most appropriate filter */ |
|
262 SeekableZStream::FilterId firstFilter, lastFilter; |
|
263 bool scanFilters; |
|
264 if (filter == SeekableZStream::FILTER_MAX) { |
|
265 firstFilter = SeekableZStream::NONE; |
|
266 lastFilter = SeekableZStream::FILTER_MAX; |
|
267 scanFilters = true; |
|
268 } else { |
|
269 firstFilter = lastFilter = filter; |
|
270 ++lastFilter; |
|
271 scanFilters = false; |
|
272 } |
|
273 |
|
274 mozilla::ScopedDeletePtr<Buffer> filteredBuf; |
|
275 Buffer *origData; |
|
276 for (SeekableZStream::FilterId f = firstFilter; f < lastFilter; ++f) { |
|
277 FilteredBuffer *filteredTmp = nullptr; |
|
278 Buffer tmpBuf; |
|
279 if (f != SeekableZStream::NONE) { |
|
280 DEBUG_LOG("Applying filter \"%s\"", filterName[f]); |
|
281 filteredTmp = new FilteredBuffer(); |
|
282 filteredTmp->Filter(origBuf, f, chunkSize); |
|
283 origData = filteredTmp; |
|
284 } else { |
|
285 origData = &origBuf; |
|
286 } |
|
287 if (dictSize && !scanFilters) { |
|
288 filteredBuf = filteredTmp; |
|
289 break; |
|
290 } |
|
291 DEBUG_LOG("Compressing with no dictionary"); |
|
292 if (do_compress(*origData, tmpBuf, nullptr, 0, f) == 0) { |
|
293 if (tmpBuf.GetLength() < outBuf.GetLength()) { |
|
294 outBuf.Fill(tmpBuf); |
|
295 compressed = true; |
|
296 filter = f; |
|
297 filteredBuf = filteredTmp; |
|
298 continue; |
|
299 } |
|
300 } |
|
301 delete filteredTmp; |
|
302 } |
|
303 |
|
304 origData = filteredBuf ? filteredBuf : &origBuf; |
|
305 |
|
306 if (dictSize) { |
|
307 Dictionary<uint64_t> dict(*origData, dictSize ? SzipCompress::winSize : 0); |
|
308 |
|
309 /* Find the most appropriate dictionary size */ |
|
310 size_t firstDictSize, lastDictSize; |
|
311 if (dictSize == (size_t) -1) { |
|
312 /* If we scanned for filters, we effectively already tried dictSize=0 */ |
|
313 firstDictSize = scanFilters ? 4096 : 0; |
|
314 lastDictSize = SzipCompress::winSize; |
|
315 } else { |
|
316 firstDictSize = lastDictSize = dictSize; |
|
317 } |
|
318 |
|
319 Buffer tmpBuf; |
|
320 for (size_t d = firstDictSize; d <= lastDictSize; d += 4096) { |
|
321 DEBUG_LOG("Compressing with dictionary of size %" PRIuSize, d); |
|
322 if (do_compress(*origData, tmpBuf, static_cast<unsigned char *>(dict) |
|
323 + SzipCompress::winSize - d, d, filter)) |
|
324 continue; |
|
325 if (!compressed || tmpBuf.GetLength() < outBuf.GetLength()) { |
|
326 outBuf.Fill(tmpBuf); |
|
327 compressed = true; |
|
328 dictSize = d; |
|
329 } |
|
330 } |
|
331 } |
|
332 |
|
333 if (!compressed) { |
|
334 outBuf.Fill(origBuf); |
|
335 LOG("Not compressed"); |
|
336 return 0; |
|
337 } |
|
338 |
|
339 if (dictSize == (size_t) -1) |
|
340 dictSize = 0; |
|
341 |
|
342 DEBUG_LOG("Used filter \"%s\" and dictionary size of %" PRIuSize, |
|
343 filterName[filter], dictSize); |
|
344 LOG("Compressed size is %" PRIuSize, outBuf.GetLength()); |
|
345 |
|
346 /* Sanity check */ |
|
347 Buffer tmpBuf; |
|
348 SzipDecompress decompress; |
|
349 if (decompress.run("buffer", outBuf, "buffer", tmpBuf)) |
|
350 return 1; |
|
351 |
|
352 size_t size = tmpBuf.GetLength(); |
|
353 if (size != origSize) { |
|
354 LOG("Compression error: %" PRIuSize " != %" PRIuSize, size, origSize); |
|
355 return 1; |
|
356 } |
|
357 if (memcmp(static_cast<void *>(origBuf), static_cast<void *>(tmpBuf), size)) { |
|
358 LOG("Compression error: content mismatch"); |
|
359 return 1; |
|
360 } |
|
361 return 0; |
|
362 } |
|
363 |
|
364 int SzipCompress::do_compress(Buffer &origBuf, Buffer &outBuf, |
|
365 const unsigned char *aDict, size_t aDictSize, |
|
366 SeekableZStream::FilterId aFilter) |
|
367 { |
|
368 size_t origSize = origBuf.GetLength(); |
|
369 MOZ_ASSERT(origSize != 0); |
|
370 |
|
371 /* Expected total number of chunks */ |
|
372 size_t nChunks = ((origSize + chunkSize - 1) / chunkSize); |
|
373 |
|
374 /* The first chunk is going to be stored after the header, the dictionary |
|
375 * and the offset table */ |
|
376 size_t offset = sizeof(SeekableZStreamHeader) + aDictSize |
|
377 + nChunks * sizeof(uint32_t); |
|
378 |
|
379 if (offset >= origSize) |
|
380 return 1; |
|
381 |
|
382 /* Allocate a buffer the size of the uncompressed data: we don't want |
|
383 * a compressed file larger than that anyways. */ |
|
384 if (!outBuf.Resize(origSize)) { |
|
385 LOG("Couldn't allocate output buffer: %s", strerror(errno)); |
|
386 return 1; |
|
387 } |
|
388 |
|
389 SeekableZStreamHeader *header = new (outBuf) SeekableZStreamHeader; |
|
390 unsigned char *dictionary = static_cast<unsigned char *>( |
|
391 outBuf + sizeof(SeekableZStreamHeader)); |
|
392 le_uint32 *entry = |
|
393 reinterpret_cast<le_uint32 *>(dictionary + aDictSize); |
|
394 |
|
395 /* Initialize header */ |
|
396 header->chunkSize = chunkSize; |
|
397 header->dictSize = aDictSize; |
|
398 header->totalSize = offset; |
|
399 header->windowBits = -SzipCompress::winSizeLog; // Raw stream, |
|
400 // window size of 32k. |
|
401 header->filter = aFilter; |
|
402 if (aDictSize) |
|
403 memcpy(dictionary, aDict, aDictSize); |
|
404 |
|
405 /* Initialize zlib structure */ |
|
406 z_stream zStream; |
|
407 memset(&zStream, 0, sizeof(zStream)); |
|
408 zStream.avail_out = origSize - offset; |
|
409 zStream.next_out = static_cast<Bytef*>(outBuf) + offset; |
|
410 |
|
411 size_t avail = 0; |
|
412 size_t size = origSize; |
|
413 unsigned char *data = reinterpret_cast<unsigned char *>( |
|
414 static_cast<void *>(origBuf)); |
|
415 while (size) { |
|
416 avail = std::min(size, chunkSize); |
|
417 |
|
418 /* Compress chunk */ |
|
419 int ret = deflateInit2(&zStream, 9, Z_DEFLATED, header->windowBits, |
|
420 MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY); |
|
421 if (aDictSize) |
|
422 deflateSetDictionary(&zStream, dictionary, aDictSize); |
|
423 MOZ_ASSERT(ret == Z_OK); |
|
424 zStream.avail_in = avail; |
|
425 zStream.next_in = data; |
|
426 ret = deflate(&zStream, Z_FINISH); |
|
427 MOZ_ASSERT(ret == Z_STREAM_END); |
|
428 ret = deflateEnd(&zStream); |
|
429 MOZ_ASSERT(ret == Z_OK); |
|
430 if (zStream.avail_out <= 0) |
|
431 return 1; |
|
432 |
|
433 size_t len = origSize - offset - zStream.avail_out; |
|
434 |
|
435 /* Adjust headers */ |
|
436 header->totalSize += len; |
|
437 *entry++ = offset; |
|
438 header->nChunks++; |
|
439 |
|
440 /* Prepare for next iteration */ |
|
441 size -= avail; |
|
442 data += avail; |
|
443 offset += len; |
|
444 } |
|
445 header->lastChunkSize = avail; |
|
446 MOZ_ASSERT(header->totalSize == offset); |
|
447 MOZ_ASSERT(header->nChunks == nChunks); |
|
448 |
|
449 if (!outBuf.Resize(offset)) { |
|
450 LOG("Error truncating output: %s", strerror(errno)); |
|
451 return 1; |
|
452 } |
|
453 |
|
454 return 0; |
|
455 |
|
456 } |
|
457 |
|
458 bool GetSize(const char *str, size_t *out) |
|
459 { |
|
460 char *end; |
|
461 MOZ_ASSERT(out); |
|
462 errno = 0; |
|
463 *out = strtol(str, &end, 10); |
|
464 return (!errno && !*end); |
|
465 } |
|
466 |
|
467 int main(int argc, char* argv[]) |
|
468 { |
|
469 mozilla::ScopedDeletePtr<SzipAction> action; |
|
470 char **firstArg; |
|
471 bool compress = true; |
|
472 size_t chunkSize = 0; |
|
473 SeekableZStream::FilterId filter = SzipCompress::DEFAULT_FILTER; |
|
474 size_t dictSize = (size_t) 0; |
|
475 |
|
476 Logging::Init(); |
|
477 |
|
478 for (firstArg = &argv[1]; argc > 2; argc--, firstArg++) { |
|
479 if (!firstArg[0] || firstArg[0][0] != '-') |
|
480 break; |
|
481 if (strcmp(firstArg[0], "-d") == 0) { |
|
482 compress = false; |
|
483 } else if (strcmp(firstArg[0], "-c") == 0) { |
|
484 firstArg++; |
|
485 argc--; |
|
486 if (!firstArg[0]) |
|
487 break; |
|
488 if (!GetSize(firstArg[0], &chunkSize) || !chunkSize || |
|
489 (chunkSize % 4096) || (chunkSize > maxChunkSize)) { |
|
490 LOG("Invalid chunk size"); |
|
491 return 1; |
|
492 } |
|
493 } else if (strcmp(firstArg[0], "-f") == 0) { |
|
494 firstArg++; |
|
495 argc--; |
|
496 if (!firstArg[0]) |
|
497 break; |
|
498 bool matched = false; |
|
499 for (unsigned int i = 0; i < sizeof(filterName) / sizeof(char *); ++i) { |
|
500 if (strcmp(firstArg[0], filterName[i]) == 0) { |
|
501 filter = static_cast<SeekableZStream::FilterId>(i); |
|
502 matched = true; |
|
503 break; |
|
504 } |
|
505 } |
|
506 if (!matched) { |
|
507 LOG("Invalid filter"); |
|
508 return 1; |
|
509 } |
|
510 } else if (strcmp(firstArg[0], "-D") == 0) { |
|
511 firstArg++; |
|
512 argc--; |
|
513 if (!firstArg[0]) |
|
514 break; |
|
515 if (strcmp(firstArg[0], "auto") == 0) { |
|
516 dictSize = -1; |
|
517 } else if (!GetSize(firstArg[0], &dictSize) || (dictSize >= 1 << 16)) { |
|
518 LOG("Invalid dictionary size"); |
|
519 return 1; |
|
520 } |
|
521 } |
|
522 } |
|
523 |
|
524 if (argc != 2 || !firstArg[0]) { |
|
525 LOG("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] [-D DICTSIZE] file", |
|
526 argv[0]); |
|
527 return 1; |
|
528 } |
|
529 |
|
530 if (compress) { |
|
531 action = new SzipCompress(chunkSize, filter, dictSize); |
|
532 } else { |
|
533 if (chunkSize) { |
|
534 LOG("-c is incompatible with -d"); |
|
535 return 1; |
|
536 } |
|
537 if (dictSize) { |
|
538 LOG("-D is incompatible with -d"); |
|
539 return 1; |
|
540 } |
|
541 action = new SzipDecompress(); |
|
542 } |
|
543 |
|
544 std::stringstream tmpOutStream; |
|
545 tmpOutStream << firstArg[0] << ".sz." << getpid(); |
|
546 std::string tmpOut(tmpOutStream.str()); |
|
547 int ret; |
|
548 struct stat st; |
|
549 { |
|
550 FileBuffer origBuf; |
|
551 if (!origBuf.Init(firstArg[0])) { |
|
552 LOG("Couldn't open %s: %s", firstArg[0], strerror(errno)); |
|
553 return 1; |
|
554 } |
|
555 |
|
556 ret = fstat(origBuf.getFd(), &st); |
|
557 if (ret == -1) { |
|
558 LOG("Couldn't stat %s: %s", firstArg[0], strerror(errno)); |
|
559 return 1; |
|
560 } |
|
561 |
|
562 size_t origSize = st.st_size; |
|
563 |
|
564 /* Mmap the original file */ |
|
565 if (!origBuf.Resize(origSize)) { |
|
566 LOG("Couldn't mmap %s: %s", firstArg[0], strerror(errno)); |
|
567 return 1; |
|
568 } |
|
569 |
|
570 /* Create the compressed file */ |
|
571 FileBuffer outBuf; |
|
572 if (!outBuf.Init(tmpOut.c_str(), true)) { |
|
573 LOG("Couldn't open %s: %s", tmpOut.c_str(), strerror(errno)); |
|
574 return 1; |
|
575 } |
|
576 |
|
577 ret = action->run(firstArg[0], origBuf, tmpOut.c_str(), outBuf); |
|
578 if ((ret == 0) && (fstat(outBuf.getFd(), &st) == -1)) { |
|
579 st.st_size = 0; |
|
580 } |
|
581 } |
|
582 |
|
583 if ((ret == 0) && st.st_size) { |
|
584 rename(tmpOut.c_str(), firstArg[0]); |
|
585 } else { |
|
586 unlink(tmpOut.c_str()); |
|
587 } |
|
588 return ret; |
|
589 } |