Wed, 31 Dec 2014 06:55:50 +0100
Added tag UPSTREAM_283F7C6 for changeset ca08bd8f51b2
michael@0 | 1 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 4 | |
michael@0 | 5 | from io import BytesIO |
michael@0 | 6 | import struct |
michael@0 | 7 | import zlib |
michael@0 | 8 | import os |
michael@0 | 9 | from zipfile import ( |
michael@0 | 10 | ZIP_STORED, |
michael@0 | 11 | ZIP_DEFLATED, |
michael@0 | 12 | ) |
michael@0 | 13 | from collections import OrderedDict |
michael@0 | 14 | from urlparse import urlparse, ParseResult |
michael@0 | 15 | import mozpack.path |
michael@0 | 16 | |
michael@0 | 17 | JAR_STORED = ZIP_STORED |
michael@0 | 18 | JAR_DEFLATED = ZIP_DEFLATED |
michael@0 | 19 | MAX_WBITS = 15 |
michael@0 | 20 | |
michael@0 | 21 | |
michael@0 | 22 | class JarReaderError(Exception): |
michael@0 | 23 | '''Error type for Jar reader errors.''' |
michael@0 | 24 | |
michael@0 | 25 | |
michael@0 | 26 | class JarWriterError(Exception): |
michael@0 | 27 | '''Error type for Jar writer errors.''' |
michael@0 | 28 | |
michael@0 | 29 | |
michael@0 | 30 | class JarStruct(object): |
michael@0 | 31 | ''' |
michael@0 | 32 | Helper used to define ZIP archive raw data structures. Data structures |
michael@0 | 33 | handled by this helper all start with a magic number, defined in |
michael@0 | 34 | subclasses MAGIC field as a 32-bits unsigned integer, followed by data |
michael@0 | 35 | structured as described in subclasses STRUCT field. |
michael@0 | 36 | |
michael@0 | 37 | The STRUCT field contains a list of (name, type) pairs where name is a |
michael@0 | 38 | field name, and the type can be one of 'uint32', 'uint16' or one of the |
michael@0 | 39 | field names. In the latter case, the field is considered to be a string |
michael@0 | 40 | buffer with a length given in that field. |
michael@0 | 41 | For example, |
michael@0 | 42 | STRUCT = [ |
michael@0 | 43 | ('version', 'uint32'), |
michael@0 | 44 | ('filename_size', 'uint16'), |
michael@0 | 45 | ('filename', 'filename_size') |
michael@0 | 46 | ] |
michael@0 | 47 | describes a structure with a 'version' 32-bits unsigned integer field, |
michael@0 | 48 | followed by a 'filename_size' 16-bits unsigned integer field, followed by a |
michael@0 | 49 | filename_size-long string buffer 'filename'. |
michael@0 | 50 | |
michael@0 | 51 | Fields that are used as other fields size are not stored in objects. In the |
michael@0 | 52 | above example, an instance of such subclass would only have two attributes: |
michael@0 | 53 | obj['version'] |
michael@0 | 54 | obj['filename'] |
michael@0 | 55 | filename_size would be obtained with len(obj['filename']). |
michael@0 | 56 | |
michael@0 | 57 | JarStruct subclasses instances can be either initialized from existing data |
michael@0 | 58 | (deserialized), or with empty fields. |
michael@0 | 59 | ''' |
michael@0 | 60 | |
michael@0 | 61 | TYPE_MAPPING = {'uint32': ('I', 4), 'uint16': ('H', 2)} |
michael@0 | 62 | |
michael@0 | 63 | def __init__(self, data=None): |
michael@0 | 64 | ''' |
michael@0 | 65 | Create an instance from the given data. Data may be omitted to create |
michael@0 | 66 | an instance with empty fields. |
michael@0 | 67 | ''' |
michael@0 | 68 | assert self.MAGIC and isinstance(self.STRUCT, OrderedDict) |
michael@0 | 69 | self.size_fields = set(t for t in self.STRUCT.itervalues() |
michael@0 | 70 | if not t in JarStruct.TYPE_MAPPING) |
michael@0 | 71 | self._values = {} |
michael@0 | 72 | if data: |
michael@0 | 73 | self._init_data(data) |
michael@0 | 74 | else: |
michael@0 | 75 | self._init_empty() |
michael@0 | 76 | |
michael@0 | 77 | def _init_data(self, data): |
michael@0 | 78 | ''' |
michael@0 | 79 | Initialize an instance from data, following the data structure |
michael@0 | 80 | described in self.STRUCT. The self.MAGIC signature is expected at |
michael@0 | 81 | data[:4]. |
michael@0 | 82 | ''' |
michael@0 | 83 | assert data is not None |
michael@0 | 84 | self.signature, size = JarStruct.get_data('uint32', data) |
michael@0 | 85 | if self.signature != self.MAGIC: |
michael@0 | 86 | raise JarReaderError('Bad magic') |
michael@0 | 87 | offset = size |
michael@0 | 88 | # For all fields used as other fields sizes, keep track of their value |
michael@0 | 89 | # separately. |
michael@0 | 90 | sizes = dict((t, 0) for t in self.size_fields) |
michael@0 | 91 | for name, t in self.STRUCT.iteritems(): |
michael@0 | 92 | if t in JarStruct.TYPE_MAPPING: |
michael@0 | 93 | value, size = JarStruct.get_data(t, data[offset:]) |
michael@0 | 94 | else: |
michael@0 | 95 | size = sizes[t] |
michael@0 | 96 | value = data[offset:offset + size] |
michael@0 | 97 | if isinstance(value, memoryview): |
michael@0 | 98 | value = value.tobytes() |
michael@0 | 99 | if not name in sizes: |
michael@0 | 100 | self._values[name] = value |
michael@0 | 101 | else: |
michael@0 | 102 | sizes[name] = value |
michael@0 | 103 | offset += size |
michael@0 | 104 | |
michael@0 | 105 | def _init_empty(self): |
michael@0 | 106 | ''' |
michael@0 | 107 | Initialize an instance with empty fields. |
michael@0 | 108 | ''' |
michael@0 | 109 | self.signature = self.MAGIC |
michael@0 | 110 | for name, t in self.STRUCT.iteritems(): |
michael@0 | 111 | if name in self.size_fields: |
michael@0 | 112 | continue |
michael@0 | 113 | self._values[name] = 0 if t in JarStruct.TYPE_MAPPING else '' |
michael@0 | 114 | |
michael@0 | 115 | @staticmethod |
michael@0 | 116 | def get_data(type, data): |
michael@0 | 117 | ''' |
michael@0 | 118 | Deserialize a single field of given type (must be one of |
michael@0 | 119 | JarStruct.TYPE_MAPPING) at the given offset in the given data. |
michael@0 | 120 | ''' |
michael@0 | 121 | assert type in JarStruct.TYPE_MAPPING |
michael@0 | 122 | assert data is not None |
michael@0 | 123 | format, size = JarStruct.TYPE_MAPPING[type] |
michael@0 | 124 | data = data[:size] |
michael@0 | 125 | if isinstance(data, memoryview): |
michael@0 | 126 | data = data.tobytes() |
michael@0 | 127 | return struct.unpack('<' + format, data)[0], size |
michael@0 | 128 | |
michael@0 | 129 | def serialize(self): |
michael@0 | 130 | ''' |
michael@0 | 131 | Serialize the data structure according to the data structure definition |
michael@0 | 132 | from self.STRUCT. |
michael@0 | 133 | ''' |
michael@0 | 134 | serialized = struct.pack('<I', self.signature) |
michael@0 | 135 | sizes = dict((t, name) for name, t in self.STRUCT.iteritems() |
michael@0 | 136 | if not t in JarStruct.TYPE_MAPPING) |
michael@0 | 137 | for name, t in self.STRUCT.iteritems(): |
michael@0 | 138 | if t in JarStruct.TYPE_MAPPING: |
michael@0 | 139 | format, size = JarStruct.TYPE_MAPPING[t] |
michael@0 | 140 | if name in sizes: |
michael@0 | 141 | value = len(self[sizes[name]]) |
michael@0 | 142 | else: |
michael@0 | 143 | value = self[name] |
michael@0 | 144 | serialized += struct.pack('<' + format, value) |
michael@0 | 145 | else: |
michael@0 | 146 | serialized += self[name] |
michael@0 | 147 | return serialized |
michael@0 | 148 | |
michael@0 | 149 | @property |
michael@0 | 150 | def size(self): |
michael@0 | 151 | ''' |
michael@0 | 152 | Return the size of the data structure, given the current values of all |
michael@0 | 153 | variable length fields. |
michael@0 | 154 | ''' |
michael@0 | 155 | size = JarStruct.TYPE_MAPPING['uint32'][1] |
michael@0 | 156 | for name, type in self.STRUCT.iteritems(): |
michael@0 | 157 | if type in JarStruct.TYPE_MAPPING: |
michael@0 | 158 | size += JarStruct.TYPE_MAPPING[type][1] |
michael@0 | 159 | else: |
michael@0 | 160 | size += len(self[name]) |
michael@0 | 161 | return size |
michael@0 | 162 | |
michael@0 | 163 | def __getitem__(self, key): |
michael@0 | 164 | return self._values[key] |
michael@0 | 165 | |
michael@0 | 166 | def __setitem__(self, key, value): |
michael@0 | 167 | if not key in self.STRUCT: |
michael@0 | 168 | raise KeyError(key) |
michael@0 | 169 | if key in self.size_fields: |
michael@0 | 170 | raise AttributeError("can't set attribute") |
michael@0 | 171 | self._values[key] = value |
michael@0 | 172 | |
michael@0 | 173 | def __contains__(self, key): |
michael@0 | 174 | return key in self._values |
michael@0 | 175 | |
michael@0 | 176 | def __iter__(self): |
michael@0 | 177 | return self._values.iteritems() |
michael@0 | 178 | |
michael@0 | 179 | def __repr__(self): |
michael@0 | 180 | return "<%s %s>" % (self.__class__.__name__, |
michael@0 | 181 | ' '.join('%s=%s' % (n, v) for n, v in self)) |
michael@0 | 182 | |
michael@0 | 183 | |
michael@0 | 184 | class JarCdirEnd(JarStruct): |
michael@0 | 185 | ''' |
michael@0 | 186 | End of central directory record. |
michael@0 | 187 | ''' |
michael@0 | 188 | MAGIC = 0x06054b50 |
michael@0 | 189 | STRUCT = OrderedDict([ |
michael@0 | 190 | ('disk_num', 'uint16'), |
michael@0 | 191 | ('cdir_disk', 'uint16'), |
michael@0 | 192 | ('disk_entries', 'uint16'), |
michael@0 | 193 | ('cdir_entries', 'uint16'), |
michael@0 | 194 | ('cdir_size', 'uint32'), |
michael@0 | 195 | ('cdir_offset', 'uint32'), |
michael@0 | 196 | ('comment_size', 'uint16'), |
michael@0 | 197 | ('comment', 'comment_size'), |
michael@0 | 198 | ]) |
michael@0 | 199 | |
michael@0 | 200 | CDIR_END_SIZE = JarCdirEnd().size |
michael@0 | 201 | |
michael@0 | 202 | |
michael@0 | 203 | class JarCdirEntry(JarStruct): |
michael@0 | 204 | ''' |
michael@0 | 205 | Central directory file header |
michael@0 | 206 | ''' |
michael@0 | 207 | MAGIC = 0x02014b50 |
michael@0 | 208 | STRUCT = OrderedDict([ |
michael@0 | 209 | ('creator_version', 'uint16'), |
michael@0 | 210 | ('min_version', 'uint16'), |
michael@0 | 211 | ('general_flag', 'uint16'), |
michael@0 | 212 | ('compression', 'uint16'), |
michael@0 | 213 | ('lastmod_time', 'uint16'), |
michael@0 | 214 | ('lastmod_date', 'uint16'), |
michael@0 | 215 | ('crc32', 'uint32'), |
michael@0 | 216 | ('compressed_size', 'uint32'), |
michael@0 | 217 | ('uncompressed_size', 'uint32'), |
michael@0 | 218 | ('filename_size', 'uint16'), |
michael@0 | 219 | ('extrafield_size', 'uint16'), |
michael@0 | 220 | ('filecomment_size', 'uint16'), |
michael@0 | 221 | ('disknum', 'uint16'), |
michael@0 | 222 | ('internal_attr', 'uint16'), |
michael@0 | 223 | ('external_attr', 'uint32'), |
michael@0 | 224 | ('offset', 'uint32'), |
michael@0 | 225 | ('filename', 'filename_size'), |
michael@0 | 226 | ('extrafield', 'extrafield_size'), |
michael@0 | 227 | ('filecomment', 'filecomment_size'), |
michael@0 | 228 | ]) |
michael@0 | 229 | |
michael@0 | 230 | |
michael@0 | 231 | class JarLocalFileHeader(JarStruct): |
michael@0 | 232 | ''' |
michael@0 | 233 | Local file header |
michael@0 | 234 | ''' |
michael@0 | 235 | MAGIC = 0x04034b50 |
michael@0 | 236 | STRUCT = OrderedDict([ |
michael@0 | 237 | ('min_version', 'uint16'), |
michael@0 | 238 | ('general_flag', 'uint16'), |
michael@0 | 239 | ('compression', 'uint16'), |
michael@0 | 240 | ('lastmod_time', 'uint16'), |
michael@0 | 241 | ('lastmod_date', 'uint16'), |
michael@0 | 242 | ('crc32', 'uint32'), |
michael@0 | 243 | ('compressed_size', 'uint32'), |
michael@0 | 244 | ('uncompressed_size', 'uint32'), |
michael@0 | 245 | ('filename_size', 'uint16'), |
michael@0 | 246 | ('extra_field_size', 'uint16'), |
michael@0 | 247 | ('filename', 'filename_size'), |
michael@0 | 248 | ('extra_field', 'extra_field_size'), |
michael@0 | 249 | ]) |
michael@0 | 250 | |
michael@0 | 251 | |
michael@0 | 252 | class JarFileReader(object): |
michael@0 | 253 | ''' |
michael@0 | 254 | File-like class for use by JarReader to give access to individual files |
michael@0 | 255 | within a Jar archive. |
michael@0 | 256 | ''' |
michael@0 | 257 | def __init__(self, header, data): |
michael@0 | 258 | ''' |
michael@0 | 259 | Initialize a JarFileReader. header is the local file header |
michael@0 | 260 | corresponding to the file in the jar archive, data a buffer containing |
michael@0 | 261 | the file data. |
michael@0 | 262 | ''' |
michael@0 | 263 | assert header['compression'] in [JAR_DEFLATED, JAR_STORED] |
michael@0 | 264 | self._data = data |
michael@0 | 265 | # Copy some local file header fields. |
michael@0 | 266 | for name in ['filename', 'compressed_size', |
michael@0 | 267 | 'uncompressed_size', 'crc32']: |
michael@0 | 268 | setattr(self, name, header[name]) |
michael@0 | 269 | self.compressed = header['compression'] == JAR_DEFLATED |
michael@0 | 270 | |
michael@0 | 271 | def read(self, length=-1): |
michael@0 | 272 | ''' |
michael@0 | 273 | Read some amount of uncompressed data. |
michael@0 | 274 | ''' |
michael@0 | 275 | return self.uncompressed_data.read(length) |
michael@0 | 276 | |
michael@0 | 277 | def readlines(self): |
michael@0 | 278 | ''' |
michael@0 | 279 | Return a list containing all the lines of data in the uncompressed |
michael@0 | 280 | data. |
michael@0 | 281 | ''' |
michael@0 | 282 | return self.read().splitlines(True) |
michael@0 | 283 | |
michael@0 | 284 | def __iter__(self): |
michael@0 | 285 | ''' |
michael@0 | 286 | Iterator, to support the "for line in fileobj" constructs. |
michael@0 | 287 | ''' |
michael@0 | 288 | return iter(self.readlines()) |
michael@0 | 289 | |
michael@0 | 290 | def seek(self, pos, whence=os.SEEK_SET): |
michael@0 | 291 | ''' |
michael@0 | 292 | Change the current position in the uncompressed data. Subsequent reads |
michael@0 | 293 | will start from there. |
michael@0 | 294 | ''' |
michael@0 | 295 | return self.uncompressed_data.seek(pos, whence) |
michael@0 | 296 | |
michael@0 | 297 | def close(self): |
michael@0 | 298 | ''' |
michael@0 | 299 | Free the uncompressed data buffer. |
michael@0 | 300 | ''' |
michael@0 | 301 | self.uncompressed_data.close() |
michael@0 | 302 | |
michael@0 | 303 | @property |
michael@0 | 304 | def compressed_data(self): |
michael@0 | 305 | ''' |
michael@0 | 306 | Return the raw compressed data. |
michael@0 | 307 | ''' |
michael@0 | 308 | return self._data[:self.compressed_size] |
michael@0 | 309 | |
michael@0 | 310 | @property |
michael@0 | 311 | def uncompressed_data(self): |
michael@0 | 312 | ''' |
michael@0 | 313 | Return the uncompressed data. |
michael@0 | 314 | ''' |
michael@0 | 315 | if hasattr(self, '_uncompressed_data'): |
michael@0 | 316 | return self._uncompressed_data |
michael@0 | 317 | data = self.compressed_data |
michael@0 | 318 | if self.compressed: |
michael@0 | 319 | data = zlib.decompress(data.tobytes(), -MAX_WBITS) |
michael@0 | 320 | else: |
michael@0 | 321 | data = data.tobytes() |
michael@0 | 322 | if len(data) != self.uncompressed_size: |
michael@0 | 323 | raise JarReaderError('Corrupted file? %s' % self.filename) |
michael@0 | 324 | self._uncompressed_data = BytesIO(data) |
michael@0 | 325 | return self._uncompressed_data |
michael@0 | 326 | |
michael@0 | 327 | |
michael@0 | 328 | class JarReader(object): |
michael@0 | 329 | ''' |
michael@0 | 330 | Class with methods to read Jar files. Can open standard jar files as well |
michael@0 | 331 | as Mozilla jar files (see further details in the JarWriter documentation). |
michael@0 | 332 | ''' |
michael@0 | 333 | def __init__(self, file=None, fileobj=None): |
michael@0 | 334 | ''' |
michael@0 | 335 | Opens the given file as a Jar archive. Use the given file-like object |
michael@0 | 336 | if one is given instead of opening the given file name. |
michael@0 | 337 | ''' |
michael@0 | 338 | if fileobj: |
michael@0 | 339 | data = fileobj.read() |
michael@0 | 340 | else: |
michael@0 | 341 | data = open(file, 'rb').read() |
michael@0 | 342 | self._data = memoryview(data) |
michael@0 | 343 | # The End of Central Directory Record has a variable size because of |
michael@0 | 344 | # comments it may contain, so scan for it from the end of the file. |
michael@0 | 345 | offset = -CDIR_END_SIZE |
michael@0 | 346 | while True: |
michael@0 | 347 | signature = JarStruct.get_data('uint32', self._data[offset:])[0] |
michael@0 | 348 | if signature == JarCdirEnd.MAGIC: |
michael@0 | 349 | break |
michael@0 | 350 | if offset == -len(self._data): |
michael@0 | 351 | raise JarReaderError('Not a jar?') |
michael@0 | 352 | offset -= 1 |
michael@0 | 353 | self._cdir_end = JarCdirEnd(self._data[offset:]) |
michael@0 | 354 | |
michael@0 | 355 | def close(self): |
michael@0 | 356 | ''' |
michael@0 | 357 | Free some resources associated with the Jar. |
michael@0 | 358 | ''' |
michael@0 | 359 | del self._data |
michael@0 | 360 | |
michael@0 | 361 | @property |
michael@0 | 362 | def entries(self): |
michael@0 | 363 | ''' |
michael@0 | 364 | Return an ordered dict of central directory entries, indexed by |
michael@0 | 365 | filename, in the order they appear in the Jar archive central |
michael@0 | 366 | directory. Directory entries are skipped. |
michael@0 | 367 | ''' |
michael@0 | 368 | if hasattr(self, '_entries'): |
michael@0 | 369 | return self._entries |
michael@0 | 370 | preload = 0 |
michael@0 | 371 | if self.is_optimized: |
michael@0 | 372 | preload = JarStruct.get_data('uint32', self._data)[0] |
michael@0 | 373 | entries = OrderedDict() |
michael@0 | 374 | offset = self._cdir_end['cdir_offset'] |
michael@0 | 375 | for e in xrange(self._cdir_end['cdir_entries']): |
michael@0 | 376 | entry = JarCdirEntry(self._data[offset:]) |
michael@0 | 377 | offset += entry.size |
michael@0 | 378 | # Creator host system. 0 is MSDOS, 3 is Unix |
michael@0 | 379 | host = entry['creator_version'] >> 8 |
michael@0 | 380 | # External attributes values depend on host above. On Unix the |
michael@0 | 381 | # higher bits are the stat.st_mode value. On MSDOS, the lower bits |
michael@0 | 382 | # are the FAT attributes. |
michael@0 | 383 | xattr = entry['external_attr'] |
michael@0 | 384 | # Skip directories |
michael@0 | 385 | if (host == 0 and xattr & 0x10) or (host == 3 and |
michael@0 | 386 | xattr & (040000 << 16)): |
michael@0 | 387 | continue |
michael@0 | 388 | entries[entry['filename']] = entry |
michael@0 | 389 | if entry['offset'] < preload: |
michael@0 | 390 | self._last_preloaded = entry['filename'] |
michael@0 | 391 | self._entries = entries |
michael@0 | 392 | return entries |
michael@0 | 393 | |
michael@0 | 394 | @property |
michael@0 | 395 | def is_optimized(self): |
michael@0 | 396 | ''' |
michael@0 | 397 | Return whether the jar archive is optimized. |
michael@0 | 398 | ''' |
michael@0 | 399 | # In optimized jars, the central directory is at the beginning of the |
michael@0 | 400 | # file, after a single 32-bits value, which is the length of data |
michael@0 | 401 | # preloaded. |
michael@0 | 402 | return self._cdir_end['cdir_offset'] == \ |
michael@0 | 403 | JarStruct.TYPE_MAPPING['uint32'][1] |
michael@0 | 404 | |
michael@0 | 405 | @property |
michael@0 | 406 | def last_preloaded(self): |
michael@0 | 407 | ''' |
michael@0 | 408 | Return the name of the last file that is set to be preloaded. |
michael@0 | 409 | See JarWriter documentation for more details on preloading. |
michael@0 | 410 | ''' |
michael@0 | 411 | if hasattr(self, '_last_preloaded'): |
michael@0 | 412 | return self._last_preloaded |
michael@0 | 413 | self._last_preloaded = None |
michael@0 | 414 | self.entries |
michael@0 | 415 | return self._last_preloaded |
michael@0 | 416 | |
michael@0 | 417 | def _getreader(self, entry): |
michael@0 | 418 | ''' |
michael@0 | 419 | Helper to create a JarFileReader corresponding to the given central |
michael@0 | 420 | directory entry. |
michael@0 | 421 | ''' |
michael@0 | 422 | header = JarLocalFileHeader(self._data[entry['offset']:]) |
michael@0 | 423 | for key, value in entry: |
michael@0 | 424 | if key in header and header[key] != value: |
michael@0 | 425 | raise JarReaderError('Central directory and file header ' + |
michael@0 | 426 | 'mismatch. Corrupted archive?') |
michael@0 | 427 | return JarFileReader(header, |
michael@0 | 428 | self._data[entry['offset'] + header.size:]) |
michael@0 | 429 | |
michael@0 | 430 | def __iter__(self): |
michael@0 | 431 | ''' |
michael@0 | 432 | Iterate over all files in the Jar archive, in the form of |
michael@0 | 433 | JarFileReaders. |
michael@0 | 434 | for file in jarReader: |
michael@0 | 435 | ... |
michael@0 | 436 | ''' |
michael@0 | 437 | for entry in self.entries.itervalues(): |
michael@0 | 438 | yield self._getreader(entry) |
michael@0 | 439 | |
michael@0 | 440 | def __getitem__(self, name): |
michael@0 | 441 | ''' |
michael@0 | 442 | Get a JarFileReader for the given file name. |
michael@0 | 443 | ''' |
michael@0 | 444 | return self._getreader(self.entries[name]) |
michael@0 | 445 | |
michael@0 | 446 | def __contains__(self, name): |
michael@0 | 447 | ''' |
michael@0 | 448 | Return whether the given file name appears in the Jar archive. |
michael@0 | 449 | ''' |
michael@0 | 450 | return name in self.entries |
michael@0 | 451 | |
michael@0 | 452 | |
michael@0 | 453 | class JarWriter(object): |
michael@0 | 454 | ''' |
michael@0 | 455 | Class with methods to write Jar files. Can write more-or-less standard jar |
michael@0 | 456 | archives as well as jar archives optimized for Gecko. See the documentation |
michael@0 | 457 | for the close() member function for a description of both layouts. |
michael@0 | 458 | ''' |
michael@0 | 459 | def __init__(self, file=None, fileobj=None, compress=True, optimize=True): |
michael@0 | 460 | ''' |
michael@0 | 461 | Initialize a Jar archive in the given file. Use the given file-like |
michael@0 | 462 | object if one is given instead of opening the given file name. |
michael@0 | 463 | The compress option determines the default behavior for storing data |
michael@0 | 464 | in the jar archive. The optimize options determines whether the jar |
michael@0 | 465 | archive should be optimized for Gecko or not. |
michael@0 | 466 | ''' |
michael@0 | 467 | if fileobj: |
michael@0 | 468 | self._data = fileobj |
michael@0 | 469 | else: |
michael@0 | 470 | self._data = open(file, 'wb') |
michael@0 | 471 | self._compress = compress |
michael@0 | 472 | self._contents = OrderedDict() |
michael@0 | 473 | self._last_preloaded = None |
michael@0 | 474 | self._optimize = optimize |
michael@0 | 475 | |
michael@0 | 476 | def __enter__(self): |
michael@0 | 477 | ''' |
michael@0 | 478 | Context manager __enter__ method for JarWriter. |
michael@0 | 479 | ''' |
michael@0 | 480 | return self |
michael@0 | 481 | |
michael@0 | 482 | def __exit__(self, type, value, tb): |
michael@0 | 483 | ''' |
michael@0 | 484 | Context manager __exit__ method for JarWriter. |
michael@0 | 485 | ''' |
michael@0 | 486 | self.finish() |
michael@0 | 487 | |
michael@0 | 488 | def finish(self): |
michael@0 | 489 | ''' |
michael@0 | 490 | Flush and close the Jar archive. |
michael@0 | 491 | |
michael@0 | 492 | Standard jar archives are laid out like the following: |
michael@0 | 493 | - Local file header 1 |
michael@0 | 494 | - File data 1 |
michael@0 | 495 | - Local file header 2 |
michael@0 | 496 | - File data 2 |
michael@0 | 497 | - (...) |
michael@0 | 498 | - Central directory entry pointing at Local file header 1 |
michael@0 | 499 | - Central directory entry pointing at Local file header 2 |
michael@0 | 500 | - (...) |
michael@0 | 501 | - End of central directory, pointing at first central directory |
michael@0 | 502 | entry. |
michael@0 | 503 | |
michael@0 | 504 | Jar archives optimized for Gecko are laid out like the following: |
michael@0 | 505 | - 32-bits unsigned integer giving the amount of data to preload. |
michael@0 | 506 | - Central directory entry pointing at Local file header 1 |
michael@0 | 507 | - Central directory entry pointing at Local file header 2 |
michael@0 | 508 | - (...) |
michael@0 | 509 | - End of central directory, pointing at first central directory |
michael@0 | 510 | entry. |
michael@0 | 511 | - Local file header 1 |
michael@0 | 512 | - File data 1 |
michael@0 | 513 | - Local file header 2 |
michael@0 | 514 | - File data 2 |
michael@0 | 515 | - (...) |
michael@0 | 516 | - End of central directory, pointing at first central directory |
michael@0 | 517 | entry. |
michael@0 | 518 | The duplication of the End of central directory is to accomodate some |
michael@0 | 519 | Zip reading tools that want an end of central directory structure to |
michael@0 | 520 | follow the central directory entries. |
michael@0 | 521 | ''' |
michael@0 | 522 | offset = 0 |
michael@0 | 523 | headers = {} |
michael@0 | 524 | preload_size = 0 |
michael@0 | 525 | # Prepare central directory entries |
michael@0 | 526 | for entry, content in self._contents.itervalues(): |
michael@0 | 527 | header = JarLocalFileHeader() |
michael@0 | 528 | for name in entry.STRUCT: |
michael@0 | 529 | if name in header: |
michael@0 | 530 | header[name] = entry[name] |
michael@0 | 531 | entry['offset'] = offset |
michael@0 | 532 | offset += len(content) + header.size |
michael@0 | 533 | if entry['filename'] == self._last_preloaded: |
michael@0 | 534 | preload_size = offset |
michael@0 | 535 | headers[entry] = header |
michael@0 | 536 | # Prepare end of central directory |
michael@0 | 537 | end = JarCdirEnd() |
michael@0 | 538 | end['disk_entries'] = len(self._contents) |
michael@0 | 539 | end['cdir_entries'] = end['disk_entries'] |
michael@0 | 540 | end['cdir_size'] = reduce(lambda x, y: x + y[0].size, |
michael@0 | 541 | self._contents.values(), 0) |
michael@0 | 542 | # On optimized archives, store the preloaded size and the central |
michael@0 | 543 | # directory entries, followed by the first end of central directory. |
michael@0 | 544 | if self._optimize: |
michael@0 | 545 | end['cdir_offset'] = 4 |
michael@0 | 546 | offset = end['cdir_size'] + end['cdir_offset'] + end.size |
michael@0 | 547 | if preload_size: |
michael@0 | 548 | preload_size += offset |
michael@0 | 549 | self._data.write(struct.pack('<I', preload_size)) |
michael@0 | 550 | for entry, _ in self._contents.itervalues(): |
michael@0 | 551 | entry['offset'] += offset |
michael@0 | 552 | self._data.write(entry.serialize()) |
michael@0 | 553 | self._data.write(end.serialize()) |
michael@0 | 554 | # Store local file entries followed by compressed data |
michael@0 | 555 | for entry, content in self._contents.itervalues(): |
michael@0 | 556 | self._data.write(headers[entry].serialize()) |
michael@0 | 557 | self._data.write(content) |
michael@0 | 558 | # On non optimized archives, store the central directory entries. |
michael@0 | 559 | if not self._optimize: |
michael@0 | 560 | end['cdir_offset'] = offset |
michael@0 | 561 | for entry, _ in self._contents.itervalues(): |
michael@0 | 562 | self._data.write(entry.serialize()) |
michael@0 | 563 | # Store the end of central directory. |
michael@0 | 564 | self._data.write(end.serialize()) |
michael@0 | 565 | self._data.close() |
michael@0 | 566 | |
michael@0 | 567 | def add(self, name, data, compress=None, mode=None): |
michael@0 | 568 | ''' |
michael@0 | 569 | Add a new member to the jar archive, with the given name and the given |
michael@0 | 570 | data. |
michael@0 | 571 | The compress option indicates if the given data should be compressed |
michael@0 | 572 | (True), not compressed (False), or compressed according to the default |
michael@0 | 573 | defined when creating the JarWriter (None). |
michael@0 | 574 | When the data should be compressed (True or None with self.compress == |
michael@0 | 575 | True), it is only really compressed if the compressed size is smaller |
michael@0 | 576 | than the uncompressed size. |
michael@0 | 577 | The mode option gives the unix permissions that should be stored |
michael@0 | 578 | for the jar entry. |
michael@0 | 579 | The given data may be a buffer, a file-like instance, a Deflater or a |
michael@0 | 580 | JarFileReader instance. The latter two allow to avoid uncompressing |
michael@0 | 581 | data to recompress it. |
michael@0 | 582 | ''' |
michael@0 | 583 | if name in self._contents: |
michael@0 | 584 | raise JarWriterError("File %s already in JarWriter" % name) |
michael@0 | 585 | if compress is None: |
michael@0 | 586 | compress = self._compress |
michael@0 | 587 | if (isinstance(data, JarFileReader) and data.compressed == compress) \ |
michael@0 | 588 | or (isinstance(data, Deflater) and data.compress == compress): |
michael@0 | 589 | deflater = data |
michael@0 | 590 | else: |
michael@0 | 591 | deflater = Deflater(compress) |
michael@0 | 592 | if isinstance(data, basestring): |
michael@0 | 593 | deflater.write(data) |
michael@0 | 594 | elif hasattr(data, 'read'): |
michael@0 | 595 | data.seek(0) |
michael@0 | 596 | deflater.write(data.read()) |
michael@0 | 597 | else: |
michael@0 | 598 | raise JarWriterError("Don't know how to handle %s" % |
michael@0 | 599 | type(data)) |
michael@0 | 600 | # Fill a central directory entry for this new member. |
michael@0 | 601 | entry = JarCdirEntry() |
michael@0 | 602 | entry['creator_version'] = 20 |
michael@0 | 603 | if mode is not None: |
michael@0 | 604 | # Set creator host system (upper byte of creator_version) |
michael@0 | 605 | # to 3 (Unix) so mode is honored when there is one. |
michael@0 | 606 | entry['creator_version'] |= 3 << 8 |
michael@0 | 607 | entry['external_attr'] = (mode & 0xFFFF) << 16L |
michael@0 | 608 | if deflater.compressed: |
michael@0 | 609 | entry['min_version'] = 20 # Version 2.0 supports deflated streams |
michael@0 | 610 | entry['general_flag'] = 2 # Max compression |
michael@0 | 611 | entry['compression'] = JAR_DEFLATED |
michael@0 | 612 | else: |
michael@0 | 613 | entry['min_version'] = 10 # Version 1.0 for stored streams |
michael@0 | 614 | entry['general_flag'] = 0 |
michael@0 | 615 | entry['compression'] = JAR_STORED |
michael@0 | 616 | # January 1st, 2010. See bug 592369. |
michael@0 | 617 | entry['lastmod_date'] = ((2010 - 1980) << 9) | (1 << 5) | 1 |
michael@0 | 618 | entry['lastmod_time'] = 0 |
michael@0 | 619 | entry['crc32'] = deflater.crc32 |
michael@0 | 620 | entry['compressed_size'] = deflater.compressed_size |
michael@0 | 621 | entry['uncompressed_size'] = deflater.uncompressed_size |
michael@0 | 622 | entry['filename'] = name |
michael@0 | 623 | self._contents[name] = entry, deflater.compressed_data |
michael@0 | 624 | |
michael@0 | 625 | def preload(self, files): |
michael@0 | 626 | ''' |
michael@0 | 627 | Set which members of the jar archive should be preloaded when opening |
michael@0 | 628 | the archive in Gecko. This reorders the members according to the order |
michael@0 | 629 | of given list. |
michael@0 | 630 | ''' |
michael@0 | 631 | new_contents = OrderedDict() |
michael@0 | 632 | for f in files: |
michael@0 | 633 | if not f in self._contents: |
michael@0 | 634 | continue |
michael@0 | 635 | new_contents[f] = self._contents[f] |
michael@0 | 636 | self._last_preloaded = f |
michael@0 | 637 | for f in self._contents: |
michael@0 | 638 | if not f in new_contents: |
michael@0 | 639 | new_contents[f] = self._contents[f] |
michael@0 | 640 | self._contents = new_contents |
michael@0 | 641 | |
michael@0 | 642 | |
michael@0 | 643 | class Deflater(object): |
michael@0 | 644 | ''' |
michael@0 | 645 | File-like interface to zlib compression. The data is actually not |
michael@0 | 646 | compressed unless the compressed form is smaller than the uncompressed |
michael@0 | 647 | data. |
michael@0 | 648 | ''' |
michael@0 | 649 | def __init__(self, compress=True): |
michael@0 | 650 | ''' |
michael@0 | 651 | Initialize a Deflater. The compress argument determines whether to |
michael@0 | 652 | try to compress at all. |
michael@0 | 653 | ''' |
michael@0 | 654 | self._data = BytesIO() |
michael@0 | 655 | self.compress = compress |
michael@0 | 656 | if compress: |
michael@0 | 657 | self._deflater = zlib.compressobj(9, zlib.DEFLATED, -MAX_WBITS) |
michael@0 | 658 | self._deflated = BytesIO() |
michael@0 | 659 | else: |
michael@0 | 660 | self._deflater = None |
michael@0 | 661 | |
michael@0 | 662 | def write(self, data): |
michael@0 | 663 | ''' |
michael@0 | 664 | Append a buffer to the Deflater. |
michael@0 | 665 | ''' |
michael@0 | 666 | self._data.write(data) |
michael@0 | 667 | if self.compress: |
michael@0 | 668 | if self._deflater: |
michael@0 | 669 | if isinstance(data, memoryview): |
michael@0 | 670 | data = data.tobytes() |
michael@0 | 671 | self._deflated.write(self._deflater.compress(data)) |
michael@0 | 672 | else: |
michael@0 | 673 | raise JarWriterError("Can't write after flush") |
michael@0 | 674 | |
michael@0 | 675 | def close(self): |
michael@0 | 676 | ''' |
michael@0 | 677 | Close the Deflater. |
michael@0 | 678 | ''' |
michael@0 | 679 | self._data.close() |
michael@0 | 680 | if self.compress: |
michael@0 | 681 | self._deflated.close() |
michael@0 | 682 | |
michael@0 | 683 | def _flush(self): |
michael@0 | 684 | ''' |
michael@0 | 685 | Flush the underlying zlib compression object. |
michael@0 | 686 | ''' |
michael@0 | 687 | if self.compress and self._deflater: |
michael@0 | 688 | self._deflated.write(self._deflater.flush()) |
michael@0 | 689 | self._deflater = None |
michael@0 | 690 | |
michael@0 | 691 | @property |
michael@0 | 692 | def compressed(self): |
michael@0 | 693 | ''' |
michael@0 | 694 | Return whether the data should be compressed. |
michael@0 | 695 | ''' |
michael@0 | 696 | return self._compressed_size < self.uncompressed_size |
michael@0 | 697 | |
michael@0 | 698 | @property |
michael@0 | 699 | def _compressed_size(self): |
michael@0 | 700 | ''' |
michael@0 | 701 | Return the real compressed size of the data written to the Deflater. If |
michael@0 | 702 | the Deflater is set not to compress, the uncompressed size is returned. |
michael@0 | 703 | Otherwise, the actual compressed size is returned, whether or not it is |
michael@0 | 704 | a win over the uncompressed size. |
michael@0 | 705 | ''' |
michael@0 | 706 | if self.compress: |
michael@0 | 707 | self._flush() |
michael@0 | 708 | return self._deflated.tell() |
michael@0 | 709 | return self.uncompressed_size |
michael@0 | 710 | |
michael@0 | 711 | @property |
michael@0 | 712 | def compressed_size(self): |
michael@0 | 713 | ''' |
michael@0 | 714 | Return the compressed size of the data written to the Deflater. If the |
michael@0 | 715 | Deflater is set not to compress, the uncompressed size is returned. |
michael@0 | 716 | Otherwise, if the data should not be compressed (the real compressed |
michael@0 | 717 | size is bigger than the uncompressed size), return the uncompressed |
michael@0 | 718 | size. |
michael@0 | 719 | ''' |
michael@0 | 720 | if self.compressed: |
michael@0 | 721 | return self._compressed_size |
michael@0 | 722 | return self.uncompressed_size |
michael@0 | 723 | |
michael@0 | 724 | @property |
michael@0 | 725 | def uncompressed_size(self): |
michael@0 | 726 | ''' |
michael@0 | 727 | Return the size of the data written to the Deflater. |
michael@0 | 728 | ''' |
michael@0 | 729 | return self._data.tell() |
michael@0 | 730 | |
michael@0 | 731 | @property |
michael@0 | 732 | def crc32(self): |
michael@0 | 733 | ''' |
michael@0 | 734 | Return the crc32 of the data written to the Deflater. |
michael@0 | 735 | ''' |
michael@0 | 736 | return zlib.crc32(self._data.getvalue()) & 0xffffffff |
michael@0 | 737 | |
michael@0 | 738 | @property |
michael@0 | 739 | def compressed_data(self): |
michael@0 | 740 | ''' |
michael@0 | 741 | Return the compressed data, if the data should be compressed (real |
michael@0 | 742 | compressed size smaller than the uncompressed size), or the |
michael@0 | 743 | uncompressed data otherwise. |
michael@0 | 744 | ''' |
michael@0 | 745 | if self.compressed: |
michael@0 | 746 | return self._deflated.getvalue() |
michael@0 | 747 | return self._data.getvalue() |
michael@0 | 748 | |
michael@0 | 749 | |
michael@0 | 750 | class JarLog(dict): |
michael@0 | 751 | ''' |
michael@0 | 752 | Helper to read the file Gecko generates when setting MOZ_JAR_LOG_FILE. |
michael@0 | 753 | The jar log is then available as a dict with the jar path as key (see |
michael@0 | 754 | canonicalize for more details on the key value), and the corresponding |
michael@0 | 755 | access log as a list value. Only the first access to a given member of |
michael@0 | 756 | a jar is stored. |
michael@0 | 757 | ''' |
michael@0 | 758 | def __init__(self, file=None, fileobj=None): |
michael@0 | 759 | if not fileobj: |
michael@0 | 760 | fileobj = open(file, 'r') |
michael@0 | 761 | urlmap = {} |
michael@0 | 762 | for line in fileobj: |
michael@0 | 763 | url, path = line.strip().split(None, 1) |
michael@0 | 764 | if not url or not path: |
michael@0 | 765 | continue |
michael@0 | 766 | if url not in urlmap: |
michael@0 | 767 | urlmap[url] = JarLog.canonicalize(url) |
michael@0 | 768 | jar = urlmap[url] |
michael@0 | 769 | entry = self.setdefault(jar, []) |
michael@0 | 770 | if path not in entry: |
michael@0 | 771 | entry.append(path) |
michael@0 | 772 | |
michael@0 | 773 | @staticmethod |
michael@0 | 774 | def canonicalize(url): |
michael@0 | 775 | ''' |
michael@0 | 776 | The jar path is stored in a MOZ_JAR_LOG_FILE log as a url. This method |
michael@0 | 777 | returns a unique value corresponding to such urls. |
michael@0 | 778 | - file:///{path} becomes {path} |
michael@0 | 779 | - jar:file:///{path}!/{subpath} becomes ({path}, {subpath}) |
michael@0 | 780 | - jar:jar:file:///{path}!/{subpath}!/{subpath2} becomes |
michael@0 | 781 | ({path}, {subpath}, {subpath2}) |
michael@0 | 782 | ''' |
michael@0 | 783 | if not isinstance(url, ParseResult): |
michael@0 | 784 | # Assume that if it doesn't start with jar: or file:, it's a path. |
michael@0 | 785 | if not url.startswith(('jar:', 'file:')): |
michael@0 | 786 | url = 'file:///' + os.path.abspath(url) |
michael@0 | 787 | url = urlparse(url) |
michael@0 | 788 | assert url.scheme |
michael@0 | 789 | assert url.scheme in ('jar', 'file') |
michael@0 | 790 | if url.scheme == 'jar': |
michael@0 | 791 | path = JarLog.canonicalize(url.path) |
michael@0 | 792 | if isinstance(path, tuple): |
michael@0 | 793 | return path[:-1] + tuple(path[-1].split('!/', 1)) |
michael@0 | 794 | return tuple(path.split('!/', 1)) |
michael@0 | 795 | if url.scheme == 'file': |
michael@0 | 796 | assert os.path.isabs(url.path) |
michael@0 | 797 | path = url.path |
michael@0 | 798 | # On Windows, url.path will be /drive:/path ; on Unix systems, |
michael@0 | 799 | # /path. As we want drive:/path instead of /drive:/path on Windows, |
michael@0 | 800 | # remove the leading /. |
michael@0 | 801 | if os.path.isabs(path[1:]): |
michael@0 | 802 | path = path[1:] |
michael@0 | 803 | path = os.path.realpath(path) |
michael@0 | 804 | return mozpack.path.normsep(os.path.normcase(path)) |