python/mozbuild/mozpack/mozjar.py

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 4
michael@0 5 from io import BytesIO
michael@0 6 import struct
michael@0 7 import zlib
michael@0 8 import os
michael@0 9 from zipfile import (
michael@0 10 ZIP_STORED,
michael@0 11 ZIP_DEFLATED,
michael@0 12 )
michael@0 13 from collections import OrderedDict
michael@0 14 from urlparse import urlparse, ParseResult
michael@0 15 import mozpack.path
michael@0 16
michael@0 17 JAR_STORED = ZIP_STORED
michael@0 18 JAR_DEFLATED = ZIP_DEFLATED
michael@0 19 MAX_WBITS = 15
michael@0 20
michael@0 21
michael@0 22 class JarReaderError(Exception):
michael@0 23 '''Error type for Jar reader errors.'''
michael@0 24
michael@0 25
michael@0 26 class JarWriterError(Exception):
michael@0 27 '''Error type for Jar writer errors.'''
michael@0 28
michael@0 29
michael@0 30 class JarStruct(object):
michael@0 31 '''
michael@0 32 Helper used to define ZIP archive raw data structures. Data structures
michael@0 33 handled by this helper all start with a magic number, defined in
michael@0 34 subclasses MAGIC field as a 32-bits unsigned integer, followed by data
michael@0 35 structured as described in subclasses STRUCT field.
michael@0 36
michael@0 37 The STRUCT field contains a list of (name, type) pairs where name is a
michael@0 38 field name, and the type can be one of 'uint32', 'uint16' or one of the
michael@0 39 field names. In the latter case, the field is considered to be a string
michael@0 40 buffer with a length given in that field.
michael@0 41 For example,
michael@0 42 STRUCT = [
michael@0 43 ('version', 'uint32'),
michael@0 44 ('filename_size', 'uint16'),
michael@0 45 ('filename', 'filename_size')
michael@0 46 ]
michael@0 47 describes a structure with a 'version' 32-bits unsigned integer field,
michael@0 48 followed by a 'filename_size' 16-bits unsigned integer field, followed by a
michael@0 49 filename_size-long string buffer 'filename'.
michael@0 50
michael@0 51 Fields that are used as other fields size are not stored in objects. In the
michael@0 52 above example, an instance of such subclass would only have two attributes:
michael@0 53 obj['version']
michael@0 54 obj['filename']
michael@0 55 filename_size would be obtained with len(obj['filename']).
michael@0 56
michael@0 57 JarStruct subclasses instances can be either initialized from existing data
michael@0 58 (deserialized), or with empty fields.
michael@0 59 '''
michael@0 60
michael@0 61 TYPE_MAPPING = {'uint32': ('I', 4), 'uint16': ('H', 2)}
michael@0 62
michael@0 63 def __init__(self, data=None):
michael@0 64 '''
michael@0 65 Create an instance from the given data. Data may be omitted to create
michael@0 66 an instance with empty fields.
michael@0 67 '''
michael@0 68 assert self.MAGIC and isinstance(self.STRUCT, OrderedDict)
michael@0 69 self.size_fields = set(t for t in self.STRUCT.itervalues()
michael@0 70 if not t in JarStruct.TYPE_MAPPING)
michael@0 71 self._values = {}
michael@0 72 if data:
michael@0 73 self._init_data(data)
michael@0 74 else:
michael@0 75 self._init_empty()
michael@0 76
michael@0 77 def _init_data(self, data):
michael@0 78 '''
michael@0 79 Initialize an instance from data, following the data structure
michael@0 80 described in self.STRUCT. The self.MAGIC signature is expected at
michael@0 81 data[:4].
michael@0 82 '''
michael@0 83 assert data is not None
michael@0 84 self.signature, size = JarStruct.get_data('uint32', data)
michael@0 85 if self.signature != self.MAGIC:
michael@0 86 raise JarReaderError('Bad magic')
michael@0 87 offset = size
michael@0 88 # For all fields used as other fields sizes, keep track of their value
michael@0 89 # separately.
michael@0 90 sizes = dict((t, 0) for t in self.size_fields)
michael@0 91 for name, t in self.STRUCT.iteritems():
michael@0 92 if t in JarStruct.TYPE_MAPPING:
michael@0 93 value, size = JarStruct.get_data(t, data[offset:])
michael@0 94 else:
michael@0 95 size = sizes[t]
michael@0 96 value = data[offset:offset + size]
michael@0 97 if isinstance(value, memoryview):
michael@0 98 value = value.tobytes()
michael@0 99 if not name in sizes:
michael@0 100 self._values[name] = value
michael@0 101 else:
michael@0 102 sizes[name] = value
michael@0 103 offset += size
michael@0 104
michael@0 105 def _init_empty(self):
michael@0 106 '''
michael@0 107 Initialize an instance with empty fields.
michael@0 108 '''
michael@0 109 self.signature = self.MAGIC
michael@0 110 for name, t in self.STRUCT.iteritems():
michael@0 111 if name in self.size_fields:
michael@0 112 continue
michael@0 113 self._values[name] = 0 if t in JarStruct.TYPE_MAPPING else ''
michael@0 114
michael@0 115 @staticmethod
michael@0 116 def get_data(type, data):
michael@0 117 '''
michael@0 118 Deserialize a single field of given type (must be one of
michael@0 119 JarStruct.TYPE_MAPPING) at the given offset in the given data.
michael@0 120 '''
michael@0 121 assert type in JarStruct.TYPE_MAPPING
michael@0 122 assert data is not None
michael@0 123 format, size = JarStruct.TYPE_MAPPING[type]
michael@0 124 data = data[:size]
michael@0 125 if isinstance(data, memoryview):
michael@0 126 data = data.tobytes()
michael@0 127 return struct.unpack('<' + format, data)[0], size
michael@0 128
michael@0 129 def serialize(self):
michael@0 130 '''
michael@0 131 Serialize the data structure according to the data structure definition
michael@0 132 from self.STRUCT.
michael@0 133 '''
michael@0 134 serialized = struct.pack('<I', self.signature)
michael@0 135 sizes = dict((t, name) for name, t in self.STRUCT.iteritems()
michael@0 136 if not t in JarStruct.TYPE_MAPPING)
michael@0 137 for name, t in self.STRUCT.iteritems():
michael@0 138 if t in JarStruct.TYPE_MAPPING:
michael@0 139 format, size = JarStruct.TYPE_MAPPING[t]
michael@0 140 if name in sizes:
michael@0 141 value = len(self[sizes[name]])
michael@0 142 else:
michael@0 143 value = self[name]
michael@0 144 serialized += struct.pack('<' + format, value)
michael@0 145 else:
michael@0 146 serialized += self[name]
michael@0 147 return serialized
michael@0 148
michael@0 149 @property
michael@0 150 def size(self):
michael@0 151 '''
michael@0 152 Return the size of the data structure, given the current values of all
michael@0 153 variable length fields.
michael@0 154 '''
michael@0 155 size = JarStruct.TYPE_MAPPING['uint32'][1]
michael@0 156 for name, type in self.STRUCT.iteritems():
michael@0 157 if type in JarStruct.TYPE_MAPPING:
michael@0 158 size += JarStruct.TYPE_MAPPING[type][1]
michael@0 159 else:
michael@0 160 size += len(self[name])
michael@0 161 return size
michael@0 162
michael@0 163 def __getitem__(self, key):
michael@0 164 return self._values[key]
michael@0 165
michael@0 166 def __setitem__(self, key, value):
michael@0 167 if not key in self.STRUCT:
michael@0 168 raise KeyError(key)
michael@0 169 if key in self.size_fields:
michael@0 170 raise AttributeError("can't set attribute")
michael@0 171 self._values[key] = value
michael@0 172
michael@0 173 def __contains__(self, key):
michael@0 174 return key in self._values
michael@0 175
michael@0 176 def __iter__(self):
michael@0 177 return self._values.iteritems()
michael@0 178
michael@0 179 def __repr__(self):
michael@0 180 return "<%s %s>" % (self.__class__.__name__,
michael@0 181 ' '.join('%s=%s' % (n, v) for n, v in self))
michael@0 182
michael@0 183
michael@0 184 class JarCdirEnd(JarStruct):
michael@0 185 '''
michael@0 186 End of central directory record.
michael@0 187 '''
michael@0 188 MAGIC = 0x06054b50
michael@0 189 STRUCT = OrderedDict([
michael@0 190 ('disk_num', 'uint16'),
michael@0 191 ('cdir_disk', 'uint16'),
michael@0 192 ('disk_entries', 'uint16'),
michael@0 193 ('cdir_entries', 'uint16'),
michael@0 194 ('cdir_size', 'uint32'),
michael@0 195 ('cdir_offset', 'uint32'),
michael@0 196 ('comment_size', 'uint16'),
michael@0 197 ('comment', 'comment_size'),
michael@0 198 ])
michael@0 199
michael@0 200 CDIR_END_SIZE = JarCdirEnd().size
michael@0 201
michael@0 202
michael@0 203 class JarCdirEntry(JarStruct):
michael@0 204 '''
michael@0 205 Central directory file header
michael@0 206 '''
michael@0 207 MAGIC = 0x02014b50
michael@0 208 STRUCT = OrderedDict([
michael@0 209 ('creator_version', 'uint16'),
michael@0 210 ('min_version', 'uint16'),
michael@0 211 ('general_flag', 'uint16'),
michael@0 212 ('compression', 'uint16'),
michael@0 213 ('lastmod_time', 'uint16'),
michael@0 214 ('lastmod_date', 'uint16'),
michael@0 215 ('crc32', 'uint32'),
michael@0 216 ('compressed_size', 'uint32'),
michael@0 217 ('uncompressed_size', 'uint32'),
michael@0 218 ('filename_size', 'uint16'),
michael@0 219 ('extrafield_size', 'uint16'),
michael@0 220 ('filecomment_size', 'uint16'),
michael@0 221 ('disknum', 'uint16'),
michael@0 222 ('internal_attr', 'uint16'),
michael@0 223 ('external_attr', 'uint32'),
michael@0 224 ('offset', 'uint32'),
michael@0 225 ('filename', 'filename_size'),
michael@0 226 ('extrafield', 'extrafield_size'),
michael@0 227 ('filecomment', 'filecomment_size'),
michael@0 228 ])
michael@0 229
michael@0 230
michael@0 231 class JarLocalFileHeader(JarStruct):
michael@0 232 '''
michael@0 233 Local file header
michael@0 234 '''
michael@0 235 MAGIC = 0x04034b50
michael@0 236 STRUCT = OrderedDict([
michael@0 237 ('min_version', 'uint16'),
michael@0 238 ('general_flag', 'uint16'),
michael@0 239 ('compression', 'uint16'),
michael@0 240 ('lastmod_time', 'uint16'),
michael@0 241 ('lastmod_date', 'uint16'),
michael@0 242 ('crc32', 'uint32'),
michael@0 243 ('compressed_size', 'uint32'),
michael@0 244 ('uncompressed_size', 'uint32'),
michael@0 245 ('filename_size', 'uint16'),
michael@0 246 ('extra_field_size', 'uint16'),
michael@0 247 ('filename', 'filename_size'),
michael@0 248 ('extra_field', 'extra_field_size'),
michael@0 249 ])
michael@0 250
michael@0 251
michael@0 252 class JarFileReader(object):
michael@0 253 '''
michael@0 254 File-like class for use by JarReader to give access to individual files
michael@0 255 within a Jar archive.
michael@0 256 '''
michael@0 257 def __init__(self, header, data):
michael@0 258 '''
michael@0 259 Initialize a JarFileReader. header is the local file header
michael@0 260 corresponding to the file in the jar archive, data a buffer containing
michael@0 261 the file data.
michael@0 262 '''
michael@0 263 assert header['compression'] in [JAR_DEFLATED, JAR_STORED]
michael@0 264 self._data = data
michael@0 265 # Copy some local file header fields.
michael@0 266 for name in ['filename', 'compressed_size',
michael@0 267 'uncompressed_size', 'crc32']:
michael@0 268 setattr(self, name, header[name])
michael@0 269 self.compressed = header['compression'] == JAR_DEFLATED
michael@0 270
michael@0 271 def read(self, length=-1):
michael@0 272 '''
michael@0 273 Read some amount of uncompressed data.
michael@0 274 '''
michael@0 275 return self.uncompressed_data.read(length)
michael@0 276
michael@0 277 def readlines(self):
michael@0 278 '''
michael@0 279 Return a list containing all the lines of data in the uncompressed
michael@0 280 data.
michael@0 281 '''
michael@0 282 return self.read().splitlines(True)
michael@0 283
michael@0 284 def __iter__(self):
michael@0 285 '''
michael@0 286 Iterator, to support the "for line in fileobj" constructs.
michael@0 287 '''
michael@0 288 return iter(self.readlines())
michael@0 289
michael@0 290 def seek(self, pos, whence=os.SEEK_SET):
michael@0 291 '''
michael@0 292 Change the current position in the uncompressed data. Subsequent reads
michael@0 293 will start from there.
michael@0 294 '''
michael@0 295 return self.uncompressed_data.seek(pos, whence)
michael@0 296
michael@0 297 def close(self):
michael@0 298 '''
michael@0 299 Free the uncompressed data buffer.
michael@0 300 '''
michael@0 301 self.uncompressed_data.close()
michael@0 302
michael@0 303 @property
michael@0 304 def compressed_data(self):
michael@0 305 '''
michael@0 306 Return the raw compressed data.
michael@0 307 '''
michael@0 308 return self._data[:self.compressed_size]
michael@0 309
michael@0 310 @property
michael@0 311 def uncompressed_data(self):
michael@0 312 '''
michael@0 313 Return the uncompressed data.
michael@0 314 '''
michael@0 315 if hasattr(self, '_uncompressed_data'):
michael@0 316 return self._uncompressed_data
michael@0 317 data = self.compressed_data
michael@0 318 if self.compressed:
michael@0 319 data = zlib.decompress(data.tobytes(), -MAX_WBITS)
michael@0 320 else:
michael@0 321 data = data.tobytes()
michael@0 322 if len(data) != self.uncompressed_size:
michael@0 323 raise JarReaderError('Corrupted file? %s' % self.filename)
michael@0 324 self._uncompressed_data = BytesIO(data)
michael@0 325 return self._uncompressed_data
michael@0 326
michael@0 327
michael@0 328 class JarReader(object):
michael@0 329 '''
michael@0 330 Class with methods to read Jar files. Can open standard jar files as well
michael@0 331 as Mozilla jar files (see further details in the JarWriter documentation).
michael@0 332 '''
michael@0 333 def __init__(self, file=None, fileobj=None):
michael@0 334 '''
michael@0 335 Opens the given file as a Jar archive. Use the given file-like object
michael@0 336 if one is given instead of opening the given file name.
michael@0 337 '''
michael@0 338 if fileobj:
michael@0 339 data = fileobj.read()
michael@0 340 else:
michael@0 341 data = open(file, 'rb').read()
michael@0 342 self._data = memoryview(data)
michael@0 343 # The End of Central Directory Record has a variable size because of
michael@0 344 # comments it may contain, so scan for it from the end of the file.
michael@0 345 offset = -CDIR_END_SIZE
michael@0 346 while True:
michael@0 347 signature = JarStruct.get_data('uint32', self._data[offset:])[0]
michael@0 348 if signature == JarCdirEnd.MAGIC:
michael@0 349 break
michael@0 350 if offset == -len(self._data):
michael@0 351 raise JarReaderError('Not a jar?')
michael@0 352 offset -= 1
michael@0 353 self._cdir_end = JarCdirEnd(self._data[offset:])
michael@0 354
michael@0 355 def close(self):
michael@0 356 '''
michael@0 357 Free some resources associated with the Jar.
michael@0 358 '''
michael@0 359 del self._data
michael@0 360
michael@0 361 @property
michael@0 362 def entries(self):
michael@0 363 '''
michael@0 364 Return an ordered dict of central directory entries, indexed by
michael@0 365 filename, in the order they appear in the Jar archive central
michael@0 366 directory. Directory entries are skipped.
michael@0 367 '''
michael@0 368 if hasattr(self, '_entries'):
michael@0 369 return self._entries
michael@0 370 preload = 0
michael@0 371 if self.is_optimized:
michael@0 372 preload = JarStruct.get_data('uint32', self._data)[0]
michael@0 373 entries = OrderedDict()
michael@0 374 offset = self._cdir_end['cdir_offset']
michael@0 375 for e in xrange(self._cdir_end['cdir_entries']):
michael@0 376 entry = JarCdirEntry(self._data[offset:])
michael@0 377 offset += entry.size
michael@0 378 # Creator host system. 0 is MSDOS, 3 is Unix
michael@0 379 host = entry['creator_version'] >> 8
michael@0 380 # External attributes values depend on host above. On Unix the
michael@0 381 # higher bits are the stat.st_mode value. On MSDOS, the lower bits
michael@0 382 # are the FAT attributes.
michael@0 383 xattr = entry['external_attr']
michael@0 384 # Skip directories
michael@0 385 if (host == 0 and xattr & 0x10) or (host == 3 and
michael@0 386 xattr & (040000 << 16)):
michael@0 387 continue
michael@0 388 entries[entry['filename']] = entry
michael@0 389 if entry['offset'] < preload:
michael@0 390 self._last_preloaded = entry['filename']
michael@0 391 self._entries = entries
michael@0 392 return entries
michael@0 393
michael@0 394 @property
michael@0 395 def is_optimized(self):
michael@0 396 '''
michael@0 397 Return whether the jar archive is optimized.
michael@0 398 '''
michael@0 399 # In optimized jars, the central directory is at the beginning of the
michael@0 400 # file, after a single 32-bits value, which is the length of data
michael@0 401 # preloaded.
michael@0 402 return self._cdir_end['cdir_offset'] == \
michael@0 403 JarStruct.TYPE_MAPPING['uint32'][1]
michael@0 404
michael@0 405 @property
michael@0 406 def last_preloaded(self):
michael@0 407 '''
michael@0 408 Return the name of the last file that is set to be preloaded.
michael@0 409 See JarWriter documentation for more details on preloading.
michael@0 410 '''
michael@0 411 if hasattr(self, '_last_preloaded'):
michael@0 412 return self._last_preloaded
michael@0 413 self._last_preloaded = None
michael@0 414 self.entries
michael@0 415 return self._last_preloaded
michael@0 416
michael@0 417 def _getreader(self, entry):
michael@0 418 '''
michael@0 419 Helper to create a JarFileReader corresponding to the given central
michael@0 420 directory entry.
michael@0 421 '''
michael@0 422 header = JarLocalFileHeader(self._data[entry['offset']:])
michael@0 423 for key, value in entry:
michael@0 424 if key in header and header[key] != value:
michael@0 425 raise JarReaderError('Central directory and file header ' +
michael@0 426 'mismatch. Corrupted archive?')
michael@0 427 return JarFileReader(header,
michael@0 428 self._data[entry['offset'] + header.size:])
michael@0 429
michael@0 430 def __iter__(self):
michael@0 431 '''
michael@0 432 Iterate over all files in the Jar archive, in the form of
michael@0 433 JarFileReaders.
michael@0 434 for file in jarReader:
michael@0 435 ...
michael@0 436 '''
michael@0 437 for entry in self.entries.itervalues():
michael@0 438 yield self._getreader(entry)
michael@0 439
michael@0 440 def __getitem__(self, name):
michael@0 441 '''
michael@0 442 Get a JarFileReader for the given file name.
michael@0 443 '''
michael@0 444 return self._getreader(self.entries[name])
michael@0 445
michael@0 446 def __contains__(self, name):
michael@0 447 '''
michael@0 448 Return whether the given file name appears in the Jar archive.
michael@0 449 '''
michael@0 450 return name in self.entries
michael@0 451
michael@0 452
michael@0 453 class JarWriter(object):
michael@0 454 '''
michael@0 455 Class with methods to write Jar files. Can write more-or-less standard jar
michael@0 456 archives as well as jar archives optimized for Gecko. See the documentation
michael@0 457 for the close() member function for a description of both layouts.
michael@0 458 '''
michael@0 459 def __init__(self, file=None, fileobj=None, compress=True, optimize=True):
michael@0 460 '''
michael@0 461 Initialize a Jar archive in the given file. Use the given file-like
michael@0 462 object if one is given instead of opening the given file name.
michael@0 463 The compress option determines the default behavior for storing data
michael@0 464 in the jar archive. The optimize options determines whether the jar
michael@0 465 archive should be optimized for Gecko or not.
michael@0 466 '''
michael@0 467 if fileobj:
michael@0 468 self._data = fileobj
michael@0 469 else:
michael@0 470 self._data = open(file, 'wb')
michael@0 471 self._compress = compress
michael@0 472 self._contents = OrderedDict()
michael@0 473 self._last_preloaded = None
michael@0 474 self._optimize = optimize
michael@0 475
michael@0 476 def __enter__(self):
michael@0 477 '''
michael@0 478 Context manager __enter__ method for JarWriter.
michael@0 479 '''
michael@0 480 return self
michael@0 481
michael@0 482 def __exit__(self, type, value, tb):
michael@0 483 '''
michael@0 484 Context manager __exit__ method for JarWriter.
michael@0 485 '''
michael@0 486 self.finish()
michael@0 487
michael@0 488 def finish(self):
michael@0 489 '''
michael@0 490 Flush and close the Jar archive.
michael@0 491
michael@0 492 Standard jar archives are laid out like the following:
michael@0 493 - Local file header 1
michael@0 494 - File data 1
michael@0 495 - Local file header 2
michael@0 496 - File data 2
michael@0 497 - (...)
michael@0 498 - Central directory entry pointing at Local file header 1
michael@0 499 - Central directory entry pointing at Local file header 2
michael@0 500 - (...)
michael@0 501 - End of central directory, pointing at first central directory
michael@0 502 entry.
michael@0 503
michael@0 504 Jar archives optimized for Gecko are laid out like the following:
michael@0 505 - 32-bits unsigned integer giving the amount of data to preload.
michael@0 506 - Central directory entry pointing at Local file header 1
michael@0 507 - Central directory entry pointing at Local file header 2
michael@0 508 - (...)
michael@0 509 - End of central directory, pointing at first central directory
michael@0 510 entry.
michael@0 511 - Local file header 1
michael@0 512 - File data 1
michael@0 513 - Local file header 2
michael@0 514 - File data 2
michael@0 515 - (...)
michael@0 516 - End of central directory, pointing at first central directory
michael@0 517 entry.
michael@0 518 The duplication of the End of central directory is to accomodate some
michael@0 519 Zip reading tools that want an end of central directory structure to
michael@0 520 follow the central directory entries.
michael@0 521 '''
michael@0 522 offset = 0
michael@0 523 headers = {}
michael@0 524 preload_size = 0
michael@0 525 # Prepare central directory entries
michael@0 526 for entry, content in self._contents.itervalues():
michael@0 527 header = JarLocalFileHeader()
michael@0 528 for name in entry.STRUCT:
michael@0 529 if name in header:
michael@0 530 header[name] = entry[name]
michael@0 531 entry['offset'] = offset
michael@0 532 offset += len(content) + header.size
michael@0 533 if entry['filename'] == self._last_preloaded:
michael@0 534 preload_size = offset
michael@0 535 headers[entry] = header
michael@0 536 # Prepare end of central directory
michael@0 537 end = JarCdirEnd()
michael@0 538 end['disk_entries'] = len(self._contents)
michael@0 539 end['cdir_entries'] = end['disk_entries']
michael@0 540 end['cdir_size'] = reduce(lambda x, y: x + y[0].size,
michael@0 541 self._contents.values(), 0)
michael@0 542 # On optimized archives, store the preloaded size and the central
michael@0 543 # directory entries, followed by the first end of central directory.
michael@0 544 if self._optimize:
michael@0 545 end['cdir_offset'] = 4
michael@0 546 offset = end['cdir_size'] + end['cdir_offset'] + end.size
michael@0 547 if preload_size:
michael@0 548 preload_size += offset
michael@0 549 self._data.write(struct.pack('<I', preload_size))
michael@0 550 for entry, _ in self._contents.itervalues():
michael@0 551 entry['offset'] += offset
michael@0 552 self._data.write(entry.serialize())
michael@0 553 self._data.write(end.serialize())
michael@0 554 # Store local file entries followed by compressed data
michael@0 555 for entry, content in self._contents.itervalues():
michael@0 556 self._data.write(headers[entry].serialize())
michael@0 557 self._data.write(content)
michael@0 558 # On non optimized archives, store the central directory entries.
michael@0 559 if not self._optimize:
michael@0 560 end['cdir_offset'] = offset
michael@0 561 for entry, _ in self._contents.itervalues():
michael@0 562 self._data.write(entry.serialize())
michael@0 563 # Store the end of central directory.
michael@0 564 self._data.write(end.serialize())
michael@0 565 self._data.close()
michael@0 566
michael@0 567 def add(self, name, data, compress=None, mode=None):
michael@0 568 '''
michael@0 569 Add a new member to the jar archive, with the given name and the given
michael@0 570 data.
michael@0 571 The compress option indicates if the given data should be compressed
michael@0 572 (True), not compressed (False), or compressed according to the default
michael@0 573 defined when creating the JarWriter (None).
michael@0 574 When the data should be compressed (True or None with self.compress ==
michael@0 575 True), it is only really compressed if the compressed size is smaller
michael@0 576 than the uncompressed size.
michael@0 577 The mode option gives the unix permissions that should be stored
michael@0 578 for the jar entry.
michael@0 579 The given data may be a buffer, a file-like instance, a Deflater or a
michael@0 580 JarFileReader instance. The latter two allow to avoid uncompressing
michael@0 581 data to recompress it.
michael@0 582 '''
michael@0 583 if name in self._contents:
michael@0 584 raise JarWriterError("File %s already in JarWriter" % name)
michael@0 585 if compress is None:
michael@0 586 compress = self._compress
michael@0 587 if (isinstance(data, JarFileReader) and data.compressed == compress) \
michael@0 588 or (isinstance(data, Deflater) and data.compress == compress):
michael@0 589 deflater = data
michael@0 590 else:
michael@0 591 deflater = Deflater(compress)
michael@0 592 if isinstance(data, basestring):
michael@0 593 deflater.write(data)
michael@0 594 elif hasattr(data, 'read'):
michael@0 595 data.seek(0)
michael@0 596 deflater.write(data.read())
michael@0 597 else:
michael@0 598 raise JarWriterError("Don't know how to handle %s" %
michael@0 599 type(data))
michael@0 600 # Fill a central directory entry for this new member.
michael@0 601 entry = JarCdirEntry()
michael@0 602 entry['creator_version'] = 20
michael@0 603 if mode is not None:
michael@0 604 # Set creator host system (upper byte of creator_version)
michael@0 605 # to 3 (Unix) so mode is honored when there is one.
michael@0 606 entry['creator_version'] |= 3 << 8
michael@0 607 entry['external_attr'] = (mode & 0xFFFF) << 16L
michael@0 608 if deflater.compressed:
michael@0 609 entry['min_version'] = 20 # Version 2.0 supports deflated streams
michael@0 610 entry['general_flag'] = 2 # Max compression
michael@0 611 entry['compression'] = JAR_DEFLATED
michael@0 612 else:
michael@0 613 entry['min_version'] = 10 # Version 1.0 for stored streams
michael@0 614 entry['general_flag'] = 0
michael@0 615 entry['compression'] = JAR_STORED
michael@0 616 # January 1st, 2010. See bug 592369.
michael@0 617 entry['lastmod_date'] = ((2010 - 1980) << 9) | (1 << 5) | 1
michael@0 618 entry['lastmod_time'] = 0
michael@0 619 entry['crc32'] = deflater.crc32
michael@0 620 entry['compressed_size'] = deflater.compressed_size
michael@0 621 entry['uncompressed_size'] = deflater.uncompressed_size
michael@0 622 entry['filename'] = name
michael@0 623 self._contents[name] = entry, deflater.compressed_data
michael@0 624
michael@0 625 def preload(self, files):
michael@0 626 '''
michael@0 627 Set which members of the jar archive should be preloaded when opening
michael@0 628 the archive in Gecko. This reorders the members according to the order
michael@0 629 of given list.
michael@0 630 '''
michael@0 631 new_contents = OrderedDict()
michael@0 632 for f in files:
michael@0 633 if not f in self._contents:
michael@0 634 continue
michael@0 635 new_contents[f] = self._contents[f]
michael@0 636 self._last_preloaded = f
michael@0 637 for f in self._contents:
michael@0 638 if not f in new_contents:
michael@0 639 new_contents[f] = self._contents[f]
michael@0 640 self._contents = new_contents
michael@0 641
michael@0 642
michael@0 643 class Deflater(object):
michael@0 644 '''
michael@0 645 File-like interface to zlib compression. The data is actually not
michael@0 646 compressed unless the compressed form is smaller than the uncompressed
michael@0 647 data.
michael@0 648 '''
michael@0 649 def __init__(self, compress=True):
michael@0 650 '''
michael@0 651 Initialize a Deflater. The compress argument determines whether to
michael@0 652 try to compress at all.
michael@0 653 '''
michael@0 654 self._data = BytesIO()
michael@0 655 self.compress = compress
michael@0 656 if compress:
michael@0 657 self._deflater = zlib.compressobj(9, zlib.DEFLATED, -MAX_WBITS)
michael@0 658 self._deflated = BytesIO()
michael@0 659 else:
michael@0 660 self._deflater = None
michael@0 661
michael@0 662 def write(self, data):
michael@0 663 '''
michael@0 664 Append a buffer to the Deflater.
michael@0 665 '''
michael@0 666 self._data.write(data)
michael@0 667 if self.compress:
michael@0 668 if self._deflater:
michael@0 669 if isinstance(data, memoryview):
michael@0 670 data = data.tobytes()
michael@0 671 self._deflated.write(self._deflater.compress(data))
michael@0 672 else:
michael@0 673 raise JarWriterError("Can't write after flush")
michael@0 674
michael@0 675 def close(self):
michael@0 676 '''
michael@0 677 Close the Deflater.
michael@0 678 '''
michael@0 679 self._data.close()
michael@0 680 if self.compress:
michael@0 681 self._deflated.close()
michael@0 682
michael@0 683 def _flush(self):
michael@0 684 '''
michael@0 685 Flush the underlying zlib compression object.
michael@0 686 '''
michael@0 687 if self.compress and self._deflater:
michael@0 688 self._deflated.write(self._deflater.flush())
michael@0 689 self._deflater = None
michael@0 690
michael@0 691 @property
michael@0 692 def compressed(self):
michael@0 693 '''
michael@0 694 Return whether the data should be compressed.
michael@0 695 '''
michael@0 696 return self._compressed_size < self.uncompressed_size
michael@0 697
michael@0 698 @property
michael@0 699 def _compressed_size(self):
michael@0 700 '''
michael@0 701 Return the real compressed size of the data written to the Deflater. If
michael@0 702 the Deflater is set not to compress, the uncompressed size is returned.
michael@0 703 Otherwise, the actual compressed size is returned, whether or not it is
michael@0 704 a win over the uncompressed size.
michael@0 705 '''
michael@0 706 if self.compress:
michael@0 707 self._flush()
michael@0 708 return self._deflated.tell()
michael@0 709 return self.uncompressed_size
michael@0 710
michael@0 711 @property
michael@0 712 def compressed_size(self):
michael@0 713 '''
michael@0 714 Return the compressed size of the data written to the Deflater. If the
michael@0 715 Deflater is set not to compress, the uncompressed size is returned.
michael@0 716 Otherwise, if the data should not be compressed (the real compressed
michael@0 717 size is bigger than the uncompressed size), return the uncompressed
michael@0 718 size.
michael@0 719 '''
michael@0 720 if self.compressed:
michael@0 721 return self._compressed_size
michael@0 722 return self.uncompressed_size
michael@0 723
michael@0 724 @property
michael@0 725 def uncompressed_size(self):
michael@0 726 '''
michael@0 727 Return the size of the data written to the Deflater.
michael@0 728 '''
michael@0 729 return self._data.tell()
michael@0 730
michael@0 731 @property
michael@0 732 def crc32(self):
michael@0 733 '''
michael@0 734 Return the crc32 of the data written to the Deflater.
michael@0 735 '''
michael@0 736 return zlib.crc32(self._data.getvalue()) & 0xffffffff
michael@0 737
michael@0 738 @property
michael@0 739 def compressed_data(self):
michael@0 740 '''
michael@0 741 Return the compressed data, if the data should be compressed (real
michael@0 742 compressed size smaller than the uncompressed size), or the
michael@0 743 uncompressed data otherwise.
michael@0 744 '''
michael@0 745 if self.compressed:
michael@0 746 return self._deflated.getvalue()
michael@0 747 return self._data.getvalue()
michael@0 748
michael@0 749
michael@0 750 class JarLog(dict):
michael@0 751 '''
michael@0 752 Helper to read the file Gecko generates when setting MOZ_JAR_LOG_FILE.
michael@0 753 The jar log is then available as a dict with the jar path as key (see
michael@0 754 canonicalize for more details on the key value), and the corresponding
michael@0 755 access log as a list value. Only the first access to a given member of
michael@0 756 a jar is stored.
michael@0 757 '''
michael@0 758 def __init__(self, file=None, fileobj=None):
michael@0 759 if not fileobj:
michael@0 760 fileobj = open(file, 'r')
michael@0 761 urlmap = {}
michael@0 762 for line in fileobj:
michael@0 763 url, path = line.strip().split(None, 1)
michael@0 764 if not url or not path:
michael@0 765 continue
michael@0 766 if url not in urlmap:
michael@0 767 urlmap[url] = JarLog.canonicalize(url)
michael@0 768 jar = urlmap[url]
michael@0 769 entry = self.setdefault(jar, [])
michael@0 770 if path not in entry:
michael@0 771 entry.append(path)
michael@0 772
michael@0 773 @staticmethod
michael@0 774 def canonicalize(url):
michael@0 775 '''
michael@0 776 The jar path is stored in a MOZ_JAR_LOG_FILE log as a url. This method
michael@0 777 returns a unique value corresponding to such urls.
michael@0 778 - file:///{path} becomes {path}
michael@0 779 - jar:file:///{path}!/{subpath} becomes ({path}, {subpath})
michael@0 780 - jar:jar:file:///{path}!/{subpath}!/{subpath2} becomes
michael@0 781 ({path}, {subpath}, {subpath2})
michael@0 782 '''
michael@0 783 if not isinstance(url, ParseResult):
michael@0 784 # Assume that if it doesn't start with jar: or file:, it's a path.
michael@0 785 if not url.startswith(('jar:', 'file:')):
michael@0 786 url = 'file:///' + os.path.abspath(url)
michael@0 787 url = urlparse(url)
michael@0 788 assert url.scheme
michael@0 789 assert url.scheme in ('jar', 'file')
michael@0 790 if url.scheme == 'jar':
michael@0 791 path = JarLog.canonicalize(url.path)
michael@0 792 if isinstance(path, tuple):
michael@0 793 return path[:-1] + tuple(path[-1].split('!/', 1))
michael@0 794 return tuple(path.split('!/', 1))
michael@0 795 if url.scheme == 'file':
michael@0 796 assert os.path.isabs(url.path)
michael@0 797 path = url.path
michael@0 798 # On Windows, url.path will be /drive:/path ; on Unix systems,
michael@0 799 # /path. As we want drive:/path instead of /drive:/path on Windows,
michael@0 800 # remove the leading /.
michael@0 801 if os.path.isabs(path[1:]):
michael@0 802 path = path[1:]
michael@0 803 path = os.path.realpath(path)
michael@0 804 return mozpack.path.normsep(os.path.normcase(path))

mercurial