|
1 # This Source Code Form is subject to the terms of the Mozilla Public |
|
2 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4 |
|
5 from io import BytesIO |
|
6 import struct |
|
7 import zlib |
|
8 import os |
|
9 from zipfile import ( |
|
10 ZIP_STORED, |
|
11 ZIP_DEFLATED, |
|
12 ) |
|
13 from collections import OrderedDict |
|
14 from urlparse import urlparse, ParseResult |
|
15 import mozpack.path |
|
16 |
|
17 JAR_STORED = ZIP_STORED |
|
18 JAR_DEFLATED = ZIP_DEFLATED |
|
19 MAX_WBITS = 15 |
|
20 |
|
21 |
|
22 class JarReaderError(Exception): |
|
23 '''Error type for Jar reader errors.''' |
|
24 |
|
25 |
|
26 class JarWriterError(Exception): |
|
27 '''Error type for Jar writer errors.''' |
|
28 |
|
29 |
|
30 class JarStruct(object): |
|
31 ''' |
|
32 Helper used to define ZIP archive raw data structures. Data structures |
|
33 handled by this helper all start with a magic number, defined in |
|
34 subclasses MAGIC field as a 32-bits unsigned integer, followed by data |
|
35 structured as described in subclasses STRUCT field. |
|
36 |
|
37 The STRUCT field contains a list of (name, type) pairs where name is a |
|
38 field name, and the type can be one of 'uint32', 'uint16' or one of the |
|
39 field names. In the latter case, the field is considered to be a string |
|
40 buffer with a length given in that field. |
|
41 For example, |
|
42 STRUCT = [ |
|
43 ('version', 'uint32'), |
|
44 ('filename_size', 'uint16'), |
|
45 ('filename', 'filename_size') |
|
46 ] |
|
47 describes a structure with a 'version' 32-bits unsigned integer field, |
|
48 followed by a 'filename_size' 16-bits unsigned integer field, followed by a |
|
49 filename_size-long string buffer 'filename'. |
|
50 |
|
51 Fields that are used as other fields size are not stored in objects. In the |
|
52 above example, an instance of such subclass would only have two attributes: |
|
53 obj['version'] |
|
54 obj['filename'] |
|
55 filename_size would be obtained with len(obj['filename']). |
|
56 |
|
57 JarStruct subclasses instances can be either initialized from existing data |
|
58 (deserialized), or with empty fields. |
|
59 ''' |
|
60 |
|
61 TYPE_MAPPING = {'uint32': ('I', 4), 'uint16': ('H', 2)} |
|
62 |
|
63 def __init__(self, data=None): |
|
64 ''' |
|
65 Create an instance from the given data. Data may be omitted to create |
|
66 an instance with empty fields. |
|
67 ''' |
|
68 assert self.MAGIC and isinstance(self.STRUCT, OrderedDict) |
|
69 self.size_fields = set(t for t in self.STRUCT.itervalues() |
|
70 if not t in JarStruct.TYPE_MAPPING) |
|
71 self._values = {} |
|
72 if data: |
|
73 self._init_data(data) |
|
74 else: |
|
75 self._init_empty() |
|
76 |
|
77 def _init_data(self, data): |
|
78 ''' |
|
79 Initialize an instance from data, following the data structure |
|
80 described in self.STRUCT. The self.MAGIC signature is expected at |
|
81 data[:4]. |
|
82 ''' |
|
83 assert data is not None |
|
84 self.signature, size = JarStruct.get_data('uint32', data) |
|
85 if self.signature != self.MAGIC: |
|
86 raise JarReaderError('Bad magic') |
|
87 offset = size |
|
88 # For all fields used as other fields sizes, keep track of their value |
|
89 # separately. |
|
90 sizes = dict((t, 0) for t in self.size_fields) |
|
91 for name, t in self.STRUCT.iteritems(): |
|
92 if t in JarStruct.TYPE_MAPPING: |
|
93 value, size = JarStruct.get_data(t, data[offset:]) |
|
94 else: |
|
95 size = sizes[t] |
|
96 value = data[offset:offset + size] |
|
97 if isinstance(value, memoryview): |
|
98 value = value.tobytes() |
|
99 if not name in sizes: |
|
100 self._values[name] = value |
|
101 else: |
|
102 sizes[name] = value |
|
103 offset += size |
|
104 |
|
105 def _init_empty(self): |
|
106 ''' |
|
107 Initialize an instance with empty fields. |
|
108 ''' |
|
109 self.signature = self.MAGIC |
|
110 for name, t in self.STRUCT.iteritems(): |
|
111 if name in self.size_fields: |
|
112 continue |
|
113 self._values[name] = 0 if t in JarStruct.TYPE_MAPPING else '' |
|
114 |
|
115 @staticmethod |
|
116 def get_data(type, data): |
|
117 ''' |
|
118 Deserialize a single field of given type (must be one of |
|
119 JarStruct.TYPE_MAPPING) at the given offset in the given data. |
|
120 ''' |
|
121 assert type in JarStruct.TYPE_MAPPING |
|
122 assert data is not None |
|
123 format, size = JarStruct.TYPE_MAPPING[type] |
|
124 data = data[:size] |
|
125 if isinstance(data, memoryview): |
|
126 data = data.tobytes() |
|
127 return struct.unpack('<' + format, data)[0], size |
|
128 |
|
129 def serialize(self): |
|
130 ''' |
|
131 Serialize the data structure according to the data structure definition |
|
132 from self.STRUCT. |
|
133 ''' |
|
134 serialized = struct.pack('<I', self.signature) |
|
135 sizes = dict((t, name) for name, t in self.STRUCT.iteritems() |
|
136 if not t in JarStruct.TYPE_MAPPING) |
|
137 for name, t in self.STRUCT.iteritems(): |
|
138 if t in JarStruct.TYPE_MAPPING: |
|
139 format, size = JarStruct.TYPE_MAPPING[t] |
|
140 if name in sizes: |
|
141 value = len(self[sizes[name]]) |
|
142 else: |
|
143 value = self[name] |
|
144 serialized += struct.pack('<' + format, value) |
|
145 else: |
|
146 serialized += self[name] |
|
147 return serialized |
|
148 |
|
149 @property |
|
150 def size(self): |
|
151 ''' |
|
152 Return the size of the data structure, given the current values of all |
|
153 variable length fields. |
|
154 ''' |
|
155 size = JarStruct.TYPE_MAPPING['uint32'][1] |
|
156 for name, type in self.STRUCT.iteritems(): |
|
157 if type in JarStruct.TYPE_MAPPING: |
|
158 size += JarStruct.TYPE_MAPPING[type][1] |
|
159 else: |
|
160 size += len(self[name]) |
|
161 return size |
|
162 |
|
163 def __getitem__(self, key): |
|
164 return self._values[key] |
|
165 |
|
166 def __setitem__(self, key, value): |
|
167 if not key in self.STRUCT: |
|
168 raise KeyError(key) |
|
169 if key in self.size_fields: |
|
170 raise AttributeError("can't set attribute") |
|
171 self._values[key] = value |
|
172 |
|
173 def __contains__(self, key): |
|
174 return key in self._values |
|
175 |
|
176 def __iter__(self): |
|
177 return self._values.iteritems() |
|
178 |
|
179 def __repr__(self): |
|
180 return "<%s %s>" % (self.__class__.__name__, |
|
181 ' '.join('%s=%s' % (n, v) for n, v in self)) |
|
182 |
|
183 |
|
184 class JarCdirEnd(JarStruct): |
|
185 ''' |
|
186 End of central directory record. |
|
187 ''' |
|
188 MAGIC = 0x06054b50 |
|
189 STRUCT = OrderedDict([ |
|
190 ('disk_num', 'uint16'), |
|
191 ('cdir_disk', 'uint16'), |
|
192 ('disk_entries', 'uint16'), |
|
193 ('cdir_entries', 'uint16'), |
|
194 ('cdir_size', 'uint32'), |
|
195 ('cdir_offset', 'uint32'), |
|
196 ('comment_size', 'uint16'), |
|
197 ('comment', 'comment_size'), |
|
198 ]) |
|
199 |
|
200 CDIR_END_SIZE = JarCdirEnd().size |
|
201 |
|
202 |
|
203 class JarCdirEntry(JarStruct): |
|
204 ''' |
|
205 Central directory file header |
|
206 ''' |
|
207 MAGIC = 0x02014b50 |
|
208 STRUCT = OrderedDict([ |
|
209 ('creator_version', 'uint16'), |
|
210 ('min_version', 'uint16'), |
|
211 ('general_flag', 'uint16'), |
|
212 ('compression', 'uint16'), |
|
213 ('lastmod_time', 'uint16'), |
|
214 ('lastmod_date', 'uint16'), |
|
215 ('crc32', 'uint32'), |
|
216 ('compressed_size', 'uint32'), |
|
217 ('uncompressed_size', 'uint32'), |
|
218 ('filename_size', 'uint16'), |
|
219 ('extrafield_size', 'uint16'), |
|
220 ('filecomment_size', 'uint16'), |
|
221 ('disknum', 'uint16'), |
|
222 ('internal_attr', 'uint16'), |
|
223 ('external_attr', 'uint32'), |
|
224 ('offset', 'uint32'), |
|
225 ('filename', 'filename_size'), |
|
226 ('extrafield', 'extrafield_size'), |
|
227 ('filecomment', 'filecomment_size'), |
|
228 ]) |
|
229 |
|
230 |
|
231 class JarLocalFileHeader(JarStruct): |
|
232 ''' |
|
233 Local file header |
|
234 ''' |
|
235 MAGIC = 0x04034b50 |
|
236 STRUCT = OrderedDict([ |
|
237 ('min_version', 'uint16'), |
|
238 ('general_flag', 'uint16'), |
|
239 ('compression', 'uint16'), |
|
240 ('lastmod_time', 'uint16'), |
|
241 ('lastmod_date', 'uint16'), |
|
242 ('crc32', 'uint32'), |
|
243 ('compressed_size', 'uint32'), |
|
244 ('uncompressed_size', 'uint32'), |
|
245 ('filename_size', 'uint16'), |
|
246 ('extra_field_size', 'uint16'), |
|
247 ('filename', 'filename_size'), |
|
248 ('extra_field', 'extra_field_size'), |
|
249 ]) |
|
250 |
|
251 |
|
252 class JarFileReader(object): |
|
253 ''' |
|
254 File-like class for use by JarReader to give access to individual files |
|
255 within a Jar archive. |
|
256 ''' |
|
257 def __init__(self, header, data): |
|
258 ''' |
|
259 Initialize a JarFileReader. header is the local file header |
|
260 corresponding to the file in the jar archive, data a buffer containing |
|
261 the file data. |
|
262 ''' |
|
263 assert header['compression'] in [JAR_DEFLATED, JAR_STORED] |
|
264 self._data = data |
|
265 # Copy some local file header fields. |
|
266 for name in ['filename', 'compressed_size', |
|
267 'uncompressed_size', 'crc32']: |
|
268 setattr(self, name, header[name]) |
|
269 self.compressed = header['compression'] == JAR_DEFLATED |
|
270 |
|
271 def read(self, length=-1): |
|
272 ''' |
|
273 Read some amount of uncompressed data. |
|
274 ''' |
|
275 return self.uncompressed_data.read(length) |
|
276 |
|
277 def readlines(self): |
|
278 ''' |
|
279 Return a list containing all the lines of data in the uncompressed |
|
280 data. |
|
281 ''' |
|
282 return self.read().splitlines(True) |
|
283 |
|
284 def __iter__(self): |
|
285 ''' |
|
286 Iterator, to support the "for line in fileobj" constructs. |
|
287 ''' |
|
288 return iter(self.readlines()) |
|
289 |
|
290 def seek(self, pos, whence=os.SEEK_SET): |
|
291 ''' |
|
292 Change the current position in the uncompressed data. Subsequent reads |
|
293 will start from there. |
|
294 ''' |
|
295 return self.uncompressed_data.seek(pos, whence) |
|
296 |
|
297 def close(self): |
|
298 ''' |
|
299 Free the uncompressed data buffer. |
|
300 ''' |
|
301 self.uncompressed_data.close() |
|
302 |
|
303 @property |
|
304 def compressed_data(self): |
|
305 ''' |
|
306 Return the raw compressed data. |
|
307 ''' |
|
308 return self._data[:self.compressed_size] |
|
309 |
|
310 @property |
|
311 def uncompressed_data(self): |
|
312 ''' |
|
313 Return the uncompressed data. |
|
314 ''' |
|
315 if hasattr(self, '_uncompressed_data'): |
|
316 return self._uncompressed_data |
|
317 data = self.compressed_data |
|
318 if self.compressed: |
|
319 data = zlib.decompress(data.tobytes(), -MAX_WBITS) |
|
320 else: |
|
321 data = data.tobytes() |
|
322 if len(data) != self.uncompressed_size: |
|
323 raise JarReaderError('Corrupted file? %s' % self.filename) |
|
324 self._uncompressed_data = BytesIO(data) |
|
325 return self._uncompressed_data |
|
326 |
|
327 |
|
328 class JarReader(object): |
|
329 ''' |
|
330 Class with methods to read Jar files. Can open standard jar files as well |
|
331 as Mozilla jar files (see further details in the JarWriter documentation). |
|
332 ''' |
|
333 def __init__(self, file=None, fileobj=None): |
|
334 ''' |
|
335 Opens the given file as a Jar archive. Use the given file-like object |
|
336 if one is given instead of opening the given file name. |
|
337 ''' |
|
338 if fileobj: |
|
339 data = fileobj.read() |
|
340 else: |
|
341 data = open(file, 'rb').read() |
|
342 self._data = memoryview(data) |
|
343 # The End of Central Directory Record has a variable size because of |
|
344 # comments it may contain, so scan for it from the end of the file. |
|
345 offset = -CDIR_END_SIZE |
|
346 while True: |
|
347 signature = JarStruct.get_data('uint32', self._data[offset:])[0] |
|
348 if signature == JarCdirEnd.MAGIC: |
|
349 break |
|
350 if offset == -len(self._data): |
|
351 raise JarReaderError('Not a jar?') |
|
352 offset -= 1 |
|
353 self._cdir_end = JarCdirEnd(self._data[offset:]) |
|
354 |
|
355 def close(self): |
|
356 ''' |
|
357 Free some resources associated with the Jar. |
|
358 ''' |
|
359 del self._data |
|
360 |
|
361 @property |
|
362 def entries(self): |
|
363 ''' |
|
364 Return an ordered dict of central directory entries, indexed by |
|
365 filename, in the order they appear in the Jar archive central |
|
366 directory. Directory entries are skipped. |
|
367 ''' |
|
368 if hasattr(self, '_entries'): |
|
369 return self._entries |
|
370 preload = 0 |
|
371 if self.is_optimized: |
|
372 preload = JarStruct.get_data('uint32', self._data)[0] |
|
373 entries = OrderedDict() |
|
374 offset = self._cdir_end['cdir_offset'] |
|
375 for e in xrange(self._cdir_end['cdir_entries']): |
|
376 entry = JarCdirEntry(self._data[offset:]) |
|
377 offset += entry.size |
|
378 # Creator host system. 0 is MSDOS, 3 is Unix |
|
379 host = entry['creator_version'] >> 8 |
|
380 # External attributes values depend on host above. On Unix the |
|
381 # higher bits are the stat.st_mode value. On MSDOS, the lower bits |
|
382 # are the FAT attributes. |
|
383 xattr = entry['external_attr'] |
|
384 # Skip directories |
|
385 if (host == 0 and xattr & 0x10) or (host == 3 and |
|
386 xattr & (040000 << 16)): |
|
387 continue |
|
388 entries[entry['filename']] = entry |
|
389 if entry['offset'] < preload: |
|
390 self._last_preloaded = entry['filename'] |
|
391 self._entries = entries |
|
392 return entries |
|
393 |
|
394 @property |
|
395 def is_optimized(self): |
|
396 ''' |
|
397 Return whether the jar archive is optimized. |
|
398 ''' |
|
399 # In optimized jars, the central directory is at the beginning of the |
|
400 # file, after a single 32-bits value, which is the length of data |
|
401 # preloaded. |
|
402 return self._cdir_end['cdir_offset'] == \ |
|
403 JarStruct.TYPE_MAPPING['uint32'][1] |
|
404 |
|
405 @property |
|
406 def last_preloaded(self): |
|
407 ''' |
|
408 Return the name of the last file that is set to be preloaded. |
|
409 See JarWriter documentation for more details on preloading. |
|
410 ''' |
|
411 if hasattr(self, '_last_preloaded'): |
|
412 return self._last_preloaded |
|
413 self._last_preloaded = None |
|
414 self.entries |
|
415 return self._last_preloaded |
|
416 |
|
417 def _getreader(self, entry): |
|
418 ''' |
|
419 Helper to create a JarFileReader corresponding to the given central |
|
420 directory entry. |
|
421 ''' |
|
422 header = JarLocalFileHeader(self._data[entry['offset']:]) |
|
423 for key, value in entry: |
|
424 if key in header and header[key] != value: |
|
425 raise JarReaderError('Central directory and file header ' + |
|
426 'mismatch. Corrupted archive?') |
|
427 return JarFileReader(header, |
|
428 self._data[entry['offset'] + header.size:]) |
|
429 |
|
430 def __iter__(self): |
|
431 ''' |
|
432 Iterate over all files in the Jar archive, in the form of |
|
433 JarFileReaders. |
|
434 for file in jarReader: |
|
435 ... |
|
436 ''' |
|
437 for entry in self.entries.itervalues(): |
|
438 yield self._getreader(entry) |
|
439 |
|
440 def __getitem__(self, name): |
|
441 ''' |
|
442 Get a JarFileReader for the given file name. |
|
443 ''' |
|
444 return self._getreader(self.entries[name]) |
|
445 |
|
446 def __contains__(self, name): |
|
447 ''' |
|
448 Return whether the given file name appears in the Jar archive. |
|
449 ''' |
|
450 return name in self.entries |
|
451 |
|
452 |
|
453 class JarWriter(object): |
|
454 ''' |
|
455 Class with methods to write Jar files. Can write more-or-less standard jar |
|
456 archives as well as jar archives optimized for Gecko. See the documentation |
|
457 for the close() member function for a description of both layouts. |
|
458 ''' |
|
459 def __init__(self, file=None, fileobj=None, compress=True, optimize=True): |
|
460 ''' |
|
461 Initialize a Jar archive in the given file. Use the given file-like |
|
462 object if one is given instead of opening the given file name. |
|
463 The compress option determines the default behavior for storing data |
|
464 in the jar archive. The optimize options determines whether the jar |
|
465 archive should be optimized for Gecko or not. |
|
466 ''' |
|
467 if fileobj: |
|
468 self._data = fileobj |
|
469 else: |
|
470 self._data = open(file, 'wb') |
|
471 self._compress = compress |
|
472 self._contents = OrderedDict() |
|
473 self._last_preloaded = None |
|
474 self._optimize = optimize |
|
475 |
|
476 def __enter__(self): |
|
477 ''' |
|
478 Context manager __enter__ method for JarWriter. |
|
479 ''' |
|
480 return self |
|
481 |
|
482 def __exit__(self, type, value, tb): |
|
483 ''' |
|
484 Context manager __exit__ method for JarWriter. |
|
485 ''' |
|
486 self.finish() |
|
487 |
|
488 def finish(self): |
|
489 ''' |
|
490 Flush and close the Jar archive. |
|
491 |
|
492 Standard jar archives are laid out like the following: |
|
493 - Local file header 1 |
|
494 - File data 1 |
|
495 - Local file header 2 |
|
496 - File data 2 |
|
497 - (...) |
|
498 - Central directory entry pointing at Local file header 1 |
|
499 - Central directory entry pointing at Local file header 2 |
|
500 - (...) |
|
501 - End of central directory, pointing at first central directory |
|
502 entry. |
|
503 |
|
504 Jar archives optimized for Gecko are laid out like the following: |
|
505 - 32-bits unsigned integer giving the amount of data to preload. |
|
506 - Central directory entry pointing at Local file header 1 |
|
507 - Central directory entry pointing at Local file header 2 |
|
508 - (...) |
|
509 - End of central directory, pointing at first central directory |
|
510 entry. |
|
511 - Local file header 1 |
|
512 - File data 1 |
|
513 - Local file header 2 |
|
514 - File data 2 |
|
515 - (...) |
|
516 - End of central directory, pointing at first central directory |
|
517 entry. |
|
518 The duplication of the End of central directory is to accomodate some |
|
519 Zip reading tools that want an end of central directory structure to |
|
520 follow the central directory entries. |
|
521 ''' |
|
522 offset = 0 |
|
523 headers = {} |
|
524 preload_size = 0 |
|
525 # Prepare central directory entries |
|
526 for entry, content in self._contents.itervalues(): |
|
527 header = JarLocalFileHeader() |
|
528 for name in entry.STRUCT: |
|
529 if name in header: |
|
530 header[name] = entry[name] |
|
531 entry['offset'] = offset |
|
532 offset += len(content) + header.size |
|
533 if entry['filename'] == self._last_preloaded: |
|
534 preload_size = offset |
|
535 headers[entry] = header |
|
536 # Prepare end of central directory |
|
537 end = JarCdirEnd() |
|
538 end['disk_entries'] = len(self._contents) |
|
539 end['cdir_entries'] = end['disk_entries'] |
|
540 end['cdir_size'] = reduce(lambda x, y: x + y[0].size, |
|
541 self._contents.values(), 0) |
|
542 # On optimized archives, store the preloaded size and the central |
|
543 # directory entries, followed by the first end of central directory. |
|
544 if self._optimize: |
|
545 end['cdir_offset'] = 4 |
|
546 offset = end['cdir_size'] + end['cdir_offset'] + end.size |
|
547 if preload_size: |
|
548 preload_size += offset |
|
549 self._data.write(struct.pack('<I', preload_size)) |
|
550 for entry, _ in self._contents.itervalues(): |
|
551 entry['offset'] += offset |
|
552 self._data.write(entry.serialize()) |
|
553 self._data.write(end.serialize()) |
|
554 # Store local file entries followed by compressed data |
|
555 for entry, content in self._contents.itervalues(): |
|
556 self._data.write(headers[entry].serialize()) |
|
557 self._data.write(content) |
|
558 # On non optimized archives, store the central directory entries. |
|
559 if not self._optimize: |
|
560 end['cdir_offset'] = offset |
|
561 for entry, _ in self._contents.itervalues(): |
|
562 self._data.write(entry.serialize()) |
|
563 # Store the end of central directory. |
|
564 self._data.write(end.serialize()) |
|
565 self._data.close() |
|
566 |
|
567 def add(self, name, data, compress=None, mode=None): |
|
568 ''' |
|
569 Add a new member to the jar archive, with the given name and the given |
|
570 data. |
|
571 The compress option indicates if the given data should be compressed |
|
572 (True), not compressed (False), or compressed according to the default |
|
573 defined when creating the JarWriter (None). |
|
574 When the data should be compressed (True or None with self.compress == |
|
575 True), it is only really compressed if the compressed size is smaller |
|
576 than the uncompressed size. |
|
577 The mode option gives the unix permissions that should be stored |
|
578 for the jar entry. |
|
579 The given data may be a buffer, a file-like instance, a Deflater or a |
|
580 JarFileReader instance. The latter two allow to avoid uncompressing |
|
581 data to recompress it. |
|
582 ''' |
|
583 if name in self._contents: |
|
584 raise JarWriterError("File %s already in JarWriter" % name) |
|
585 if compress is None: |
|
586 compress = self._compress |
|
587 if (isinstance(data, JarFileReader) and data.compressed == compress) \ |
|
588 or (isinstance(data, Deflater) and data.compress == compress): |
|
589 deflater = data |
|
590 else: |
|
591 deflater = Deflater(compress) |
|
592 if isinstance(data, basestring): |
|
593 deflater.write(data) |
|
594 elif hasattr(data, 'read'): |
|
595 data.seek(0) |
|
596 deflater.write(data.read()) |
|
597 else: |
|
598 raise JarWriterError("Don't know how to handle %s" % |
|
599 type(data)) |
|
600 # Fill a central directory entry for this new member. |
|
601 entry = JarCdirEntry() |
|
602 entry['creator_version'] = 20 |
|
603 if mode is not None: |
|
604 # Set creator host system (upper byte of creator_version) |
|
605 # to 3 (Unix) so mode is honored when there is one. |
|
606 entry['creator_version'] |= 3 << 8 |
|
607 entry['external_attr'] = (mode & 0xFFFF) << 16L |
|
608 if deflater.compressed: |
|
609 entry['min_version'] = 20 # Version 2.0 supports deflated streams |
|
610 entry['general_flag'] = 2 # Max compression |
|
611 entry['compression'] = JAR_DEFLATED |
|
612 else: |
|
613 entry['min_version'] = 10 # Version 1.0 for stored streams |
|
614 entry['general_flag'] = 0 |
|
615 entry['compression'] = JAR_STORED |
|
616 # January 1st, 2010. See bug 592369. |
|
617 entry['lastmod_date'] = ((2010 - 1980) << 9) | (1 << 5) | 1 |
|
618 entry['lastmod_time'] = 0 |
|
619 entry['crc32'] = deflater.crc32 |
|
620 entry['compressed_size'] = deflater.compressed_size |
|
621 entry['uncompressed_size'] = deflater.uncompressed_size |
|
622 entry['filename'] = name |
|
623 self._contents[name] = entry, deflater.compressed_data |
|
624 |
|
625 def preload(self, files): |
|
626 ''' |
|
627 Set which members of the jar archive should be preloaded when opening |
|
628 the archive in Gecko. This reorders the members according to the order |
|
629 of given list. |
|
630 ''' |
|
631 new_contents = OrderedDict() |
|
632 for f in files: |
|
633 if not f in self._contents: |
|
634 continue |
|
635 new_contents[f] = self._contents[f] |
|
636 self._last_preloaded = f |
|
637 for f in self._contents: |
|
638 if not f in new_contents: |
|
639 new_contents[f] = self._contents[f] |
|
640 self._contents = new_contents |
|
641 |
|
642 |
|
643 class Deflater(object): |
|
644 ''' |
|
645 File-like interface to zlib compression. The data is actually not |
|
646 compressed unless the compressed form is smaller than the uncompressed |
|
647 data. |
|
648 ''' |
|
649 def __init__(self, compress=True): |
|
650 ''' |
|
651 Initialize a Deflater. The compress argument determines whether to |
|
652 try to compress at all. |
|
653 ''' |
|
654 self._data = BytesIO() |
|
655 self.compress = compress |
|
656 if compress: |
|
657 self._deflater = zlib.compressobj(9, zlib.DEFLATED, -MAX_WBITS) |
|
658 self._deflated = BytesIO() |
|
659 else: |
|
660 self._deflater = None |
|
661 |
|
662 def write(self, data): |
|
663 ''' |
|
664 Append a buffer to the Deflater. |
|
665 ''' |
|
666 self._data.write(data) |
|
667 if self.compress: |
|
668 if self._deflater: |
|
669 if isinstance(data, memoryview): |
|
670 data = data.tobytes() |
|
671 self._deflated.write(self._deflater.compress(data)) |
|
672 else: |
|
673 raise JarWriterError("Can't write after flush") |
|
674 |
|
675 def close(self): |
|
676 ''' |
|
677 Close the Deflater. |
|
678 ''' |
|
679 self._data.close() |
|
680 if self.compress: |
|
681 self._deflated.close() |
|
682 |
|
683 def _flush(self): |
|
684 ''' |
|
685 Flush the underlying zlib compression object. |
|
686 ''' |
|
687 if self.compress and self._deflater: |
|
688 self._deflated.write(self._deflater.flush()) |
|
689 self._deflater = None |
|
690 |
|
691 @property |
|
692 def compressed(self): |
|
693 ''' |
|
694 Return whether the data should be compressed. |
|
695 ''' |
|
696 return self._compressed_size < self.uncompressed_size |
|
697 |
|
698 @property |
|
699 def _compressed_size(self): |
|
700 ''' |
|
701 Return the real compressed size of the data written to the Deflater. If |
|
702 the Deflater is set not to compress, the uncompressed size is returned. |
|
703 Otherwise, the actual compressed size is returned, whether or not it is |
|
704 a win over the uncompressed size. |
|
705 ''' |
|
706 if self.compress: |
|
707 self._flush() |
|
708 return self._deflated.tell() |
|
709 return self.uncompressed_size |
|
710 |
|
711 @property |
|
712 def compressed_size(self): |
|
713 ''' |
|
714 Return the compressed size of the data written to the Deflater. If the |
|
715 Deflater is set not to compress, the uncompressed size is returned. |
|
716 Otherwise, if the data should not be compressed (the real compressed |
|
717 size is bigger than the uncompressed size), return the uncompressed |
|
718 size. |
|
719 ''' |
|
720 if self.compressed: |
|
721 return self._compressed_size |
|
722 return self.uncompressed_size |
|
723 |
|
724 @property |
|
725 def uncompressed_size(self): |
|
726 ''' |
|
727 Return the size of the data written to the Deflater. |
|
728 ''' |
|
729 return self._data.tell() |
|
730 |
|
731 @property |
|
732 def crc32(self): |
|
733 ''' |
|
734 Return the crc32 of the data written to the Deflater. |
|
735 ''' |
|
736 return zlib.crc32(self._data.getvalue()) & 0xffffffff |
|
737 |
|
738 @property |
|
739 def compressed_data(self): |
|
740 ''' |
|
741 Return the compressed data, if the data should be compressed (real |
|
742 compressed size smaller than the uncompressed size), or the |
|
743 uncompressed data otherwise. |
|
744 ''' |
|
745 if self.compressed: |
|
746 return self._deflated.getvalue() |
|
747 return self._data.getvalue() |
|
748 |
|
749 |
|
750 class JarLog(dict): |
|
751 ''' |
|
752 Helper to read the file Gecko generates when setting MOZ_JAR_LOG_FILE. |
|
753 The jar log is then available as a dict with the jar path as key (see |
|
754 canonicalize for more details on the key value), and the corresponding |
|
755 access log as a list value. Only the first access to a given member of |
|
756 a jar is stored. |
|
757 ''' |
|
758 def __init__(self, file=None, fileobj=None): |
|
759 if not fileobj: |
|
760 fileobj = open(file, 'r') |
|
761 urlmap = {} |
|
762 for line in fileobj: |
|
763 url, path = line.strip().split(None, 1) |
|
764 if not url or not path: |
|
765 continue |
|
766 if url not in urlmap: |
|
767 urlmap[url] = JarLog.canonicalize(url) |
|
768 jar = urlmap[url] |
|
769 entry = self.setdefault(jar, []) |
|
770 if path not in entry: |
|
771 entry.append(path) |
|
772 |
|
773 @staticmethod |
|
774 def canonicalize(url): |
|
775 ''' |
|
776 The jar path is stored in a MOZ_JAR_LOG_FILE log as a url. This method |
|
777 returns a unique value corresponding to such urls. |
|
778 - file:///{path} becomes {path} |
|
779 - jar:file:///{path}!/{subpath} becomes ({path}, {subpath}) |
|
780 - jar:jar:file:///{path}!/{subpath}!/{subpath2} becomes |
|
781 ({path}, {subpath}, {subpath2}) |
|
782 ''' |
|
783 if not isinstance(url, ParseResult): |
|
784 # Assume that if it doesn't start with jar: or file:, it's a path. |
|
785 if not url.startswith(('jar:', 'file:')): |
|
786 url = 'file:///' + os.path.abspath(url) |
|
787 url = urlparse(url) |
|
788 assert url.scheme |
|
789 assert url.scheme in ('jar', 'file') |
|
790 if url.scheme == 'jar': |
|
791 path = JarLog.canonicalize(url.path) |
|
792 if isinstance(path, tuple): |
|
793 return path[:-1] + tuple(path[-1].split('!/', 1)) |
|
794 return tuple(path.split('!/', 1)) |
|
795 if url.scheme == 'file': |
|
796 assert os.path.isabs(url.path) |
|
797 path = url.path |
|
798 # On Windows, url.path will be /drive:/path ; on Unix systems, |
|
799 # /path. As we want drive:/path instead of /drive:/path on Windows, |
|
800 # remove the leading /. |
|
801 if os.path.isabs(path[1:]): |
|
802 path = path[1:] |
|
803 path = os.path.realpath(path) |
|
804 return mozpack.path.normsep(os.path.normcase(path)) |