michael@0: # This Source Code Form is subject to the terms of the Mozilla Public
michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0: 
michael@0: from io import BytesIO
michael@0: import struct
michael@0: import zlib
michael@0: import os
michael@0: from zipfile import (
michael@0:     ZIP_STORED,
michael@0:     ZIP_DEFLATED,
michael@0: )
michael@0: from collections import OrderedDict
michael@0: from urlparse import urlparse, ParseResult
michael@0: import mozpack.path
michael@0: 
michael@0: JAR_STORED = ZIP_STORED
michael@0: JAR_DEFLATED = ZIP_DEFLATED
michael@0: MAX_WBITS = 15
michael@0: 
michael@0: 
michael@0: class JarReaderError(Exception):
michael@0:     '''Error type for Jar reader errors.'''
michael@0: 
michael@0: 
michael@0: class JarWriterError(Exception):
michael@0:     '''Error type for Jar writer errors.'''
michael@0: 
michael@0: 
michael@0: class JarStruct(object):
michael@0:     '''
michael@0:     Helper used to define ZIP archive raw data structures. Data structures
michael@0:     handled by this helper all start with a magic number, defined in
michael@0:     subclasses MAGIC field as a 32-bits unsigned integer, followed by data
michael@0:     structured as described in subclasses STRUCT field.
michael@0: 
michael@0:     The STRUCT field contains a list of (name, type) pairs where name is a
michael@0:     field name, and the type can be one of 'uint32', 'uint16' or one of the
michael@0:     field names. In the latter case, the field is considered to be a string
michael@0:     buffer with a length given in that field.
michael@0:     For example,
michael@0:         STRUCT = [
michael@0:             ('version', 'uint32'),
michael@0:             ('filename_size', 'uint16'),
michael@0:             ('filename', 'filename_size')
michael@0:         ]
michael@0:     describes a structure with a 'version' 32-bits unsigned integer field,
michael@0:     followed by a 'filename_size' 16-bits unsigned integer field, followed by a
michael@0:     filename_size-long string buffer 'filename'.
michael@0: 
michael@0:     Fields that are used as other fields size are not stored in objects. In the
michael@0:     above example, an instance of such subclass would only have two attributes:
michael@0:         obj['version']
michael@0:         obj['filename']
michael@0:     filename_size would be obtained with len(obj['filename']).
michael@0: 
michael@0:     JarStruct subclasses instances can be either initialized from existing data
michael@0:     (deserialized), or with empty fields.
michael@0:     '''
michael@0: 
michael@0:     TYPE_MAPPING = {'uint32': ('I', 4), 'uint16': ('H', 2)}
michael@0: 
michael@0:     def __init__(self, data=None):
michael@0:         '''
michael@0:         Create an instance from the given data. Data may be omitted to create
michael@0:         an instance with empty fields.
michael@0:         '''
michael@0:         assert self.MAGIC and isinstance(self.STRUCT, OrderedDict)
michael@0:         self.size_fields = set(t for t in self.STRUCT.itervalues()
michael@0:                                if not t in JarStruct.TYPE_MAPPING)
michael@0:         self._values = {}
michael@0:         if data:
michael@0:             self._init_data(data)
michael@0:         else:
michael@0:             self._init_empty()
michael@0: 
michael@0:     def _init_data(self, data):
michael@0:         '''
michael@0:         Initialize an instance from data, following the data structure
michael@0:         described in self.STRUCT. The self.MAGIC signature is expected at
michael@0:         data[:4].
michael@0:         '''
michael@0:         assert data is not None
michael@0:         self.signature, size = JarStruct.get_data('uint32', data)
michael@0:         if self.signature != self.MAGIC:
michael@0:             raise JarReaderError('Bad magic')
michael@0:         offset = size
michael@0:         # For all fields used as other fields sizes, keep track of their value
michael@0:         # separately.
michael@0:         sizes = dict((t, 0) for t in self.size_fields)
michael@0:         for name, t in self.STRUCT.iteritems():
michael@0:             if t in JarStruct.TYPE_MAPPING:
michael@0:                 value, size = JarStruct.get_data(t, data[offset:])
michael@0:             else:
michael@0:                 size = sizes[t]
michael@0:                 value = data[offset:offset + size]
michael@0:                 if isinstance(value, memoryview):
michael@0:                     value = value.tobytes()
michael@0:             if not name in sizes:
michael@0:                 self._values[name] = value
michael@0:             else:
michael@0:                 sizes[name] = value
michael@0:             offset += size
michael@0: 
michael@0:     def _init_empty(self):
michael@0:         '''
michael@0:         Initialize an instance with empty fields.
michael@0:         '''
michael@0:         self.signature = self.MAGIC
michael@0:         for name, t in self.STRUCT.iteritems():
michael@0:             if name in self.size_fields:
michael@0:                 continue
michael@0:             self._values[name] = 0 if t in JarStruct.TYPE_MAPPING else ''
michael@0: 
michael@0:     @staticmethod
michael@0:     def get_data(type, data):
michael@0:         '''
michael@0:         Deserialize a single field of given type (must be one of
michael@0:         JarStruct.TYPE_MAPPING) at the given offset in the given data.
michael@0:         '''
michael@0:         assert type in JarStruct.TYPE_MAPPING
michael@0:         assert data is not None
michael@0:         format, size = JarStruct.TYPE_MAPPING[type]
michael@0:         data = data[:size]
michael@0:         if isinstance(data, memoryview):
michael@0:             data = data.tobytes()
michael@0:         return struct.unpack('<' + format, data)[0], size
michael@0: 
michael@0:     def serialize(self):
michael@0:         '''
michael@0:         Serialize the data structure according to the data structure definition
michael@0:         from self.STRUCT.
michael@0:         '''
michael@0:         serialized = struct.pack('<I', self.signature)
michael@0:         sizes = dict((t, name) for name, t in self.STRUCT.iteritems()
michael@0:                      if not t in JarStruct.TYPE_MAPPING)
michael@0:         for name, t in self.STRUCT.iteritems():
michael@0:             if t in JarStruct.TYPE_MAPPING:
michael@0:                 format, size = JarStruct.TYPE_MAPPING[t]
michael@0:                 if name in sizes:
michael@0:                     value = len(self[sizes[name]])
michael@0:                 else:
michael@0:                     value = self[name]
michael@0:                 serialized += struct.pack('<' + format, value)
michael@0:             else:
michael@0:                 serialized += self[name]
michael@0:         return serialized
michael@0: 
michael@0:     @property
michael@0:     def size(self):
michael@0:         '''
michael@0:         Return the size of the data structure, given the current values of all
michael@0:         variable length fields.
michael@0:         '''
michael@0:         size = JarStruct.TYPE_MAPPING['uint32'][1]
michael@0:         for name, type in self.STRUCT.iteritems():
michael@0:             if type in JarStruct.TYPE_MAPPING:
michael@0:                 size += JarStruct.TYPE_MAPPING[type][1]
michael@0:             else:
michael@0:                 size += len(self[name])
michael@0:         return size
michael@0: 
michael@0:     def __getitem__(self, key):
michael@0:         return self._values[key]
michael@0: 
michael@0:     def __setitem__(self, key, value):
michael@0:         if not key in self.STRUCT:
michael@0:             raise KeyError(key)
michael@0:         if key in self.size_fields:
michael@0:             raise AttributeError("can't set attribute")
michael@0:         self._values[key] = value
michael@0: 
michael@0:     def __contains__(self, key):
michael@0:         return key in self._values
michael@0: 
michael@0:     def __iter__(self):
michael@0:         return self._values.iteritems()
michael@0: 
michael@0:     def __repr__(self):
michael@0:         return "<%s %s>" % (self.__class__.__name__,
michael@0:                             ' '.join('%s=%s' % (n, v) for n, v in self))
michael@0: 
michael@0: 
michael@0: class JarCdirEnd(JarStruct):
michael@0:     '''
michael@0:     End of central directory record.
michael@0:     '''
michael@0:     MAGIC = 0x06054b50
michael@0:     STRUCT = OrderedDict([
michael@0:         ('disk_num', 'uint16'),
michael@0:         ('cdir_disk', 'uint16'),
michael@0:         ('disk_entries', 'uint16'),
michael@0:         ('cdir_entries', 'uint16'),
michael@0:         ('cdir_size', 'uint32'),
michael@0:         ('cdir_offset', 'uint32'),
michael@0:         ('comment_size', 'uint16'),
michael@0:         ('comment', 'comment_size'),
michael@0:     ])
michael@0: 
michael@0: CDIR_END_SIZE = JarCdirEnd().size
michael@0: 
michael@0: 
michael@0: class JarCdirEntry(JarStruct):
michael@0:     '''
michael@0:     Central directory file header
michael@0:     '''
michael@0:     MAGIC = 0x02014b50
michael@0:     STRUCT = OrderedDict([
michael@0:         ('creator_version', 'uint16'),
michael@0:         ('min_version', 'uint16'),
michael@0:         ('general_flag', 'uint16'),
michael@0:         ('compression', 'uint16'),
michael@0:         ('lastmod_time', 'uint16'),
michael@0:         ('lastmod_date', 'uint16'),
michael@0:         ('crc32', 'uint32'),
michael@0:         ('compressed_size', 'uint32'),
michael@0:         ('uncompressed_size', 'uint32'),
michael@0:         ('filename_size', 'uint16'),
michael@0:         ('extrafield_size', 'uint16'),
michael@0:         ('filecomment_size', 'uint16'),
michael@0:         ('disknum', 'uint16'),
michael@0:         ('internal_attr', 'uint16'),
michael@0:         ('external_attr', 'uint32'),
michael@0:         ('offset', 'uint32'),
michael@0:         ('filename', 'filename_size'),
michael@0:         ('extrafield', 'extrafield_size'),
michael@0:         ('filecomment', 'filecomment_size'),
michael@0:     ])
michael@0: 
michael@0: 
michael@0: class JarLocalFileHeader(JarStruct):
michael@0:     '''
michael@0:     Local file header
michael@0:     '''
michael@0:     MAGIC = 0x04034b50
michael@0:     STRUCT = OrderedDict([
michael@0:         ('min_version', 'uint16'),
michael@0:         ('general_flag', 'uint16'),
michael@0:         ('compression', 'uint16'),
michael@0:         ('lastmod_time', 'uint16'),
michael@0:         ('lastmod_date', 'uint16'),
michael@0:         ('crc32', 'uint32'),
michael@0:         ('compressed_size', 'uint32'),
michael@0:         ('uncompressed_size', 'uint32'),
michael@0:         ('filename_size', 'uint16'),
michael@0:         ('extra_field_size', 'uint16'),
michael@0:         ('filename', 'filename_size'),
michael@0:         ('extra_field', 'extra_field_size'),
michael@0:     ])
michael@0: 
michael@0: 
michael@0: class JarFileReader(object):
michael@0:     '''
michael@0:     File-like class for use by JarReader to give access to individual files
michael@0:     within a Jar archive.
michael@0:     '''
michael@0:     def __init__(self, header, data):
michael@0:         '''
michael@0:         Initialize a JarFileReader. header is the local file header
michael@0:         corresponding to the file in the jar archive, data a buffer containing
michael@0:         the file data.
michael@0:         '''
michael@0:         assert header['compression'] in [JAR_DEFLATED, JAR_STORED]
michael@0:         self._data = data
michael@0:         # Copy some local file header fields.
michael@0:         for name in ['filename', 'compressed_size',
michael@0:                      'uncompressed_size', 'crc32']:
michael@0:             setattr(self, name, header[name])
michael@0:         self.compressed = header['compression'] == JAR_DEFLATED
michael@0: 
michael@0:     def read(self, length=-1):
michael@0:         '''
michael@0:         Read some amount of uncompressed data.
michael@0:         '''
michael@0:         return self.uncompressed_data.read(length)
michael@0: 
michael@0:     def readlines(self):
michael@0:         '''
michael@0:         Return a list containing all the lines of data in the uncompressed
michael@0:         data.
michael@0:         '''
michael@0:         return self.read().splitlines(True)
michael@0: 
michael@0:     def __iter__(self):
michael@0:         '''
michael@0:         Iterator, to support the "for line in fileobj" constructs.
michael@0:         '''
michael@0:         return iter(self.readlines())
michael@0: 
michael@0:     def seek(self, pos, whence=os.SEEK_SET):
michael@0:         '''
michael@0:         Change the current position in the uncompressed data. Subsequent reads
michael@0:         will start from there.
michael@0:         '''
michael@0:         return self.uncompressed_data.seek(pos, whence)
michael@0: 
michael@0:     def close(self):
michael@0:         '''
michael@0:         Free the uncompressed data buffer.
michael@0:         '''
michael@0:         self.uncompressed_data.close()
michael@0: 
michael@0:     @property
michael@0:     def compressed_data(self):
michael@0:         '''
michael@0:         Return the raw compressed data.
michael@0:         '''
michael@0:         return self._data[:self.compressed_size]
michael@0: 
michael@0:     @property
michael@0:     def uncompressed_data(self):
michael@0:         '''
michael@0:         Return the uncompressed data.
michael@0:         '''
michael@0:         if hasattr(self, '_uncompressed_data'):
michael@0:             return self._uncompressed_data
michael@0:         data = self.compressed_data
michael@0:         if self.compressed:
michael@0:             data = zlib.decompress(data.tobytes(), -MAX_WBITS)
michael@0:         else:
michael@0:             data = data.tobytes()
michael@0:         if len(data) != self.uncompressed_size:
michael@0:             raise JarReaderError('Corrupted file? %s' % self.filename)
michael@0:         self._uncompressed_data = BytesIO(data)
michael@0:         return self._uncompressed_data
michael@0: 
michael@0: 
michael@0: class JarReader(object):
michael@0:     '''
michael@0:     Class with methods to read Jar files. Can open standard jar files as well
michael@0:     as Mozilla jar files (see further details in the JarWriter documentation).
michael@0:     '''
michael@0:     def __init__(self, file=None, fileobj=None):
michael@0:         '''
michael@0:         Opens the given file as a Jar archive. Use the given file-like object
michael@0:         if one is given instead of opening the given file name.
michael@0:         '''
michael@0:         if fileobj:
michael@0:             data = fileobj.read()
michael@0:         else:
michael@0:             data = open(file, 'rb').read()
michael@0:         self._data = memoryview(data)
michael@0:         # The End of Central Directory Record has a variable size because of
michael@0:         # comments it may contain, so scan for it from the end of the file.
michael@0:         offset = -CDIR_END_SIZE
michael@0:         while True:
michael@0:             signature = JarStruct.get_data('uint32', self._data[offset:])[0]
michael@0:             if signature == JarCdirEnd.MAGIC:
michael@0:                 break
michael@0:             if offset == -len(self._data):
michael@0:                 raise JarReaderError('Not a jar?')
michael@0:             offset -= 1
michael@0:         self._cdir_end = JarCdirEnd(self._data[offset:])
michael@0: 
michael@0:     def close(self):
michael@0:         '''
michael@0:         Free some resources associated with the Jar.
michael@0:         '''
michael@0:         del self._data
michael@0: 
michael@0:     @property
michael@0:     def entries(self):
michael@0:         '''
michael@0:         Return an ordered dict of central directory entries, indexed by
michael@0:         filename, in the order they appear in the Jar archive central
michael@0:         directory. Directory entries are skipped.
michael@0:         '''
michael@0:         if hasattr(self, '_entries'):
michael@0:             return self._entries
michael@0:         preload = 0
michael@0:         if self.is_optimized:
michael@0:             preload = JarStruct.get_data('uint32', self._data)[0]
michael@0:         entries = OrderedDict()
michael@0:         offset = self._cdir_end['cdir_offset']
michael@0:         for e in xrange(self._cdir_end['cdir_entries']):
michael@0:             entry = JarCdirEntry(self._data[offset:])
michael@0:             offset += entry.size
michael@0:             # Creator host system. 0 is MSDOS, 3 is Unix
michael@0:             host = entry['creator_version'] >> 8
michael@0:             # External attributes values depend on host above. On Unix the
michael@0:             # higher bits are the stat.st_mode value. On MSDOS, the lower bits
michael@0:             # are the FAT attributes.
michael@0:             xattr = entry['external_attr']
michael@0:             # Skip directories
michael@0:             if (host == 0 and xattr & 0x10) or (host == 3 and
michael@0:                                                 xattr & (040000 << 16)):
michael@0:                 continue
michael@0:             entries[entry['filename']] = entry
michael@0:             if entry['offset'] < preload:
michael@0:                 self._last_preloaded = entry['filename']
michael@0:         self._entries = entries
michael@0:         return entries
michael@0: 
michael@0:     @property
michael@0:     def is_optimized(self):
michael@0:         '''
michael@0:         Return whether the jar archive is optimized.
michael@0:         '''
michael@0:         # In optimized jars, the central directory is at the beginning of the
michael@0:         # file, after a single 32-bits value, which is the length of data
michael@0:         # preloaded.
michael@0:         return self._cdir_end['cdir_offset'] == \
michael@0:             JarStruct.TYPE_MAPPING['uint32'][1]
michael@0: 
michael@0:     @property
michael@0:     def last_preloaded(self):
michael@0:         '''
michael@0:         Return the name of the last file that is set to be preloaded.
michael@0:         See JarWriter documentation for more details on preloading.
michael@0:         '''
michael@0:         if hasattr(self, '_last_preloaded'):
michael@0:             return self._last_preloaded
michael@0:         self._last_preloaded = None
michael@0:         self.entries
michael@0:         return self._last_preloaded
michael@0: 
michael@0:     def _getreader(self, entry):
michael@0:         '''
michael@0:         Helper to create a JarFileReader corresponding to the given central
michael@0:         directory entry.
michael@0:         '''
michael@0:         header = JarLocalFileHeader(self._data[entry['offset']:])
michael@0:         for key, value in entry:
michael@0:             if key in header and header[key] != value:
michael@0:                 raise JarReaderError('Central directory and file header ' +
michael@0:                                      'mismatch. Corrupted archive?')
michael@0:         return JarFileReader(header,
michael@0:                              self._data[entry['offset'] + header.size:])
michael@0: 
michael@0:     def __iter__(self):
michael@0:         '''
michael@0:         Iterate over all files in the Jar archive, in the form of
michael@0:         JarFileReaders.
michael@0:             for file in jarReader:
michael@0:                 ...
michael@0:         '''
michael@0:         for entry in self.entries.itervalues():
michael@0:             yield self._getreader(entry)
michael@0: 
michael@0:     def __getitem__(self, name):
michael@0:         '''
michael@0:         Get a JarFileReader for the given file name.
michael@0:         '''
michael@0:         return self._getreader(self.entries[name])
michael@0: 
michael@0:     def __contains__(self, name):
michael@0:         '''
michael@0:         Return whether the given file name appears in the Jar archive.
michael@0:         '''
michael@0:         return name in self.entries
michael@0: 
michael@0: 
michael@0: class JarWriter(object):
michael@0:     '''
michael@0:     Class with methods to write Jar files. Can write more-or-less standard jar
michael@0:     archives as well as jar archives optimized for Gecko. See the documentation
michael@0:     for the close() member function for a description of both layouts.
michael@0:     '''
michael@0:     def __init__(self, file=None, fileobj=None, compress=True, optimize=True):
michael@0:         '''
michael@0:         Initialize a Jar archive in the given file. Use the given file-like
michael@0:         object if one is given instead of opening the given file name.
michael@0:         The compress option determines the default behavior for storing data
michael@0:         in the jar archive. The optimize options determines whether the jar
michael@0:         archive should be optimized for Gecko or not.
michael@0:         '''
michael@0:         if fileobj:
michael@0:             self._data = fileobj
michael@0:         else:
michael@0:             self._data = open(file, 'wb')
michael@0:         self._compress = compress
michael@0:         self._contents = OrderedDict()
michael@0:         self._last_preloaded = None
michael@0:         self._optimize = optimize
michael@0: 
michael@0:     def __enter__(self):
michael@0:         '''
michael@0:         Context manager __enter__ method for JarWriter.
michael@0:         '''
michael@0:         return self
michael@0: 
michael@0:     def __exit__(self, type, value, tb):
michael@0:         '''
michael@0:         Context manager __exit__ method for JarWriter.
michael@0:         '''
michael@0:         self.finish()
michael@0: 
michael@0:     def finish(self):
michael@0:         '''
michael@0:         Flush and close the Jar archive.
michael@0: 
michael@0:         Standard jar archives are laid out like the following:
michael@0:             - Local file header 1
michael@0:             - File data 1
michael@0:             - Local file header 2
michael@0:             - File data 2
michael@0:             - (...)
michael@0:             - Central directory entry pointing at Local file header 1
michael@0:             - Central directory entry pointing at Local file header 2
michael@0:             - (...)
michael@0:             - End of central directory, pointing at first central directory
michael@0:               entry.
michael@0: 
michael@0:         Jar archives optimized for Gecko are laid out like the following:
michael@0:             - 32-bits unsigned integer giving the amount of data to preload.
michael@0:             - Central directory entry pointing at Local file header 1
michael@0:             - Central directory entry pointing at Local file header 2
michael@0:             - (...)
michael@0:             - End of central directory, pointing at first central directory
michael@0:               entry.
michael@0:             - Local file header 1
michael@0:             - File data 1
michael@0:             - Local file header 2
michael@0:             - File data 2
michael@0:             - (...)
michael@0:             - End of central directory, pointing at first central directory
michael@0:               entry.
michael@0:         The duplication of the End of central directory is to accomodate some
michael@0:         Zip reading tools that want an end of central directory structure to
michael@0:         follow the central directory entries.
michael@0:         '''
michael@0:         offset = 0
michael@0:         headers = {}
michael@0:         preload_size = 0
michael@0:         # Prepare central directory entries
michael@0:         for entry, content in self._contents.itervalues():
michael@0:             header = JarLocalFileHeader()
michael@0:             for name in entry.STRUCT:
michael@0:                 if name in header:
michael@0:                     header[name] = entry[name]
michael@0:             entry['offset'] = offset
michael@0:             offset += len(content) + header.size
michael@0:             if entry['filename'] == self._last_preloaded:
michael@0:                 preload_size = offset
michael@0:             headers[entry] = header
michael@0:         # Prepare end of central directory
michael@0:         end = JarCdirEnd()
michael@0:         end['disk_entries'] = len(self._contents)
michael@0:         end['cdir_entries'] = end['disk_entries']
michael@0:         end['cdir_size'] = reduce(lambda x, y: x + y[0].size,
michael@0:                                   self._contents.values(), 0)
michael@0:         # On optimized archives, store the preloaded size and the central
michael@0:         # directory entries, followed by the first end of central directory.
michael@0:         if self._optimize:
michael@0:             end['cdir_offset'] = 4
michael@0:             offset = end['cdir_size'] + end['cdir_offset'] + end.size
michael@0:             if preload_size:
michael@0:                 preload_size += offset
michael@0:             self._data.write(struct.pack('<I', preload_size))
michael@0:             for entry, _ in self._contents.itervalues():
michael@0:                 entry['offset'] += offset
michael@0:                 self._data.write(entry.serialize())
michael@0:             self._data.write(end.serialize())
michael@0:         # Store local file entries followed by compressed data
michael@0:         for entry, content in self._contents.itervalues():
michael@0:             self._data.write(headers[entry].serialize())
michael@0:             self._data.write(content)
michael@0:         # On non optimized archives, store the central directory entries.
michael@0:         if not self._optimize:
michael@0:             end['cdir_offset'] = offset
michael@0:             for entry, _ in self._contents.itervalues():
michael@0:                 self._data.write(entry.serialize())
michael@0:         # Store the end of central directory.
michael@0:         self._data.write(end.serialize())
michael@0:         self._data.close()
michael@0: 
michael@0:     def add(self, name, data, compress=None, mode=None):
michael@0:         '''
michael@0:         Add a new member to the jar archive, with the given name and the given
michael@0:         data.
michael@0:         The compress option indicates if the given data should be compressed
michael@0:         (True), not compressed (False), or compressed according to the default
michael@0:         defined when creating the JarWriter (None).
michael@0:         When the data should be compressed (True or None with self.compress ==
michael@0:         True), it is only really compressed if the compressed size is smaller
michael@0:         than the uncompressed size.
michael@0:         The mode option gives the unix permissions that should be stored
michael@0:         for the jar entry.
michael@0:         The given data may be a buffer, a file-like instance, a Deflater or a
michael@0:         JarFileReader instance. The latter two allow to avoid uncompressing
michael@0:         data to recompress it.
michael@0:         '''
michael@0:         if name in self._contents:
michael@0:             raise JarWriterError("File %s already in JarWriter" % name)
michael@0:         if compress is None:
michael@0:             compress = self._compress
michael@0:         if (isinstance(data, JarFileReader) and data.compressed == compress) \
michael@0:                 or (isinstance(data, Deflater) and data.compress == compress):
michael@0:             deflater = data
michael@0:         else:
michael@0:             deflater = Deflater(compress)
michael@0:             if isinstance(data, basestring):
michael@0:                 deflater.write(data)
michael@0:             elif hasattr(data, 'read'):
michael@0:                 data.seek(0)
michael@0:                 deflater.write(data.read())
michael@0:             else:
michael@0:                 raise JarWriterError("Don't know how to handle %s" %
michael@0:                                      type(data))
michael@0:         # Fill a central directory entry for this new member.
michael@0:         entry = JarCdirEntry()
michael@0:         entry['creator_version'] = 20
michael@0:         if mode is not None:
michael@0:             # Set creator host system (upper byte of creator_version)
michael@0:             # to 3 (Unix) so mode is honored when there is one.
michael@0:             entry['creator_version'] |= 3 << 8
michael@0:             entry['external_attr'] = (mode & 0xFFFF) << 16L
michael@0:         if deflater.compressed:
michael@0:             entry['min_version'] = 20  # Version 2.0 supports deflated streams
michael@0:             entry['general_flag'] = 2  # Max compression
michael@0:             entry['compression'] = JAR_DEFLATED
michael@0:         else:
michael@0:             entry['min_version'] = 10  # Version 1.0 for stored streams
michael@0:             entry['general_flag'] = 0
michael@0:             entry['compression'] = JAR_STORED
michael@0:         # January 1st, 2010. See bug 592369.
michael@0:         entry['lastmod_date'] = ((2010 - 1980) << 9) | (1 << 5) | 1
michael@0:         entry['lastmod_time'] = 0
michael@0:         entry['crc32'] = deflater.crc32
michael@0:         entry['compressed_size'] = deflater.compressed_size
michael@0:         entry['uncompressed_size'] = deflater.uncompressed_size
michael@0:         entry['filename'] = name
michael@0:         self._contents[name] = entry, deflater.compressed_data
michael@0: 
michael@0:     def preload(self, files):
michael@0:         '''
michael@0:         Set which members of the jar archive should be preloaded when opening
michael@0:         the archive in Gecko. This reorders the members according to the order
michael@0:         of given list.
michael@0:         '''
michael@0:         new_contents = OrderedDict()
michael@0:         for f in files:
michael@0:             if not f in self._contents:
michael@0:                 continue
michael@0:             new_contents[f] = self._contents[f]
michael@0:             self._last_preloaded = f
michael@0:         for f in self._contents:
michael@0:             if not f in new_contents:
michael@0:                 new_contents[f] = self._contents[f]
michael@0:         self._contents = new_contents
michael@0: 
michael@0: 
michael@0: class Deflater(object):
michael@0:     '''
michael@0:     File-like interface to zlib compression. The data is actually not
michael@0:     compressed unless the compressed form is smaller than the uncompressed
michael@0:     data.
michael@0:     '''
michael@0:     def __init__(self, compress=True):
michael@0:         '''
michael@0:         Initialize a Deflater. The compress argument determines whether to
michael@0:         try to compress at all.
michael@0:         '''
michael@0:         self._data = BytesIO()
michael@0:         self.compress = compress
michael@0:         if compress:
michael@0:             self._deflater = zlib.compressobj(9, zlib.DEFLATED, -MAX_WBITS)
michael@0:             self._deflated = BytesIO()
michael@0:         else:
michael@0:             self._deflater = None
michael@0: 
michael@0:     def write(self, data):
michael@0:         '''
michael@0:         Append a buffer to the Deflater.
michael@0:         '''
michael@0:         self._data.write(data)
michael@0:         if self.compress:
michael@0:             if self._deflater:
michael@0:                 if isinstance(data, memoryview):
michael@0:                     data = data.tobytes()
michael@0:                 self._deflated.write(self._deflater.compress(data))
michael@0:             else:
michael@0:                 raise JarWriterError("Can't write after flush")
michael@0: 
michael@0:     def close(self):
michael@0:         '''
michael@0:         Close the Deflater.
michael@0:         '''
michael@0:         self._data.close()
michael@0:         if self.compress:
michael@0:             self._deflated.close()
michael@0: 
michael@0:     def _flush(self):
michael@0:         '''
michael@0:         Flush the underlying zlib compression object.
michael@0:         '''
michael@0:         if self.compress and self._deflater:
michael@0:             self._deflated.write(self._deflater.flush())
michael@0:             self._deflater = None
michael@0: 
michael@0:     @property
michael@0:     def compressed(self):
michael@0:         '''
michael@0:         Return whether the data should be compressed.
michael@0:         '''
michael@0:         return self._compressed_size < self.uncompressed_size
michael@0: 
michael@0:     @property
michael@0:     def _compressed_size(self):
michael@0:         '''
michael@0:         Return the real compressed size of the data written to the Deflater. If
michael@0:         the Deflater is set not to compress, the uncompressed size is returned.
michael@0:         Otherwise, the actual compressed size is returned, whether or not it is
michael@0:         a win over the uncompressed size.
michael@0:         '''
michael@0:         if self.compress:
michael@0:             self._flush()
michael@0:             return self._deflated.tell()
michael@0:         return self.uncompressed_size
michael@0: 
michael@0:     @property
michael@0:     def compressed_size(self):
michael@0:         '''
michael@0:         Return the compressed size of the data written to the Deflater. If the
michael@0:         Deflater is set not to compress, the uncompressed size is returned.
michael@0:         Otherwise, if the data should not be compressed (the real compressed
michael@0:         size is bigger than the uncompressed size), return the uncompressed
michael@0:         size.
michael@0:         '''
michael@0:         if self.compressed:
michael@0:             return self._compressed_size
michael@0:         return self.uncompressed_size
michael@0: 
michael@0:     @property
michael@0:     def uncompressed_size(self):
michael@0:         '''
michael@0:         Return the size of the data written to the Deflater.
michael@0:         '''
michael@0:         return self._data.tell()
michael@0: 
michael@0:     @property
michael@0:     def crc32(self):
michael@0:         '''
michael@0:         Return the crc32 of the data written to the Deflater.
michael@0:         '''
michael@0:         return zlib.crc32(self._data.getvalue()) & 0xffffffff
michael@0: 
michael@0:     @property
michael@0:     def compressed_data(self):
michael@0:         '''
michael@0:         Return the compressed data, if the data should be compressed (real
michael@0:         compressed size smaller than the uncompressed size), or the
michael@0:         uncompressed data otherwise.
michael@0:         '''
michael@0:         if self.compressed:
michael@0:             return self._deflated.getvalue()
michael@0:         return self._data.getvalue()
michael@0: 
michael@0: 
michael@0: class JarLog(dict):
michael@0:     '''
michael@0:     Helper to read the file Gecko generates when setting MOZ_JAR_LOG_FILE.
michael@0:     The jar log is then available as a dict with the jar path as key (see
michael@0:     canonicalize for more details on the key value), and the corresponding
michael@0:     access log as a list value. Only the first access to a given member of
michael@0:     a jar is stored.
michael@0:     '''
michael@0:     def __init__(self, file=None, fileobj=None):
michael@0:         if not fileobj:
michael@0:             fileobj = open(file, 'r')
michael@0:         urlmap = {}
michael@0:         for line in fileobj:
michael@0:             url, path = line.strip().split(None, 1)
michael@0:             if not url or not path:
michael@0:                 continue
michael@0:             if url not in urlmap:
michael@0:                 urlmap[url] = JarLog.canonicalize(url)
michael@0:             jar = urlmap[url]
michael@0:             entry = self.setdefault(jar, [])
michael@0:             if path not in entry:
michael@0:                 entry.append(path)
michael@0: 
michael@0:     @staticmethod
michael@0:     def canonicalize(url):
michael@0:         '''
michael@0:         The jar path is stored in a MOZ_JAR_LOG_FILE log as a url. This method
michael@0:         returns a unique value corresponding to such urls.
michael@0:         - file:///{path} becomes {path}
michael@0:         - jar:file:///{path}!/{subpath} becomes ({path}, {subpath})
michael@0:         - jar:jar:file:///{path}!/{subpath}!/{subpath2} becomes
michael@0:            ({path}, {subpath}, {subpath2})
michael@0:         '''
michael@0:         if not isinstance(url, ParseResult):
michael@0:             # Assume that if it doesn't start with jar: or file:, it's a path.
michael@0:             if not url.startswith(('jar:', 'file:')):
michael@0:                 url = 'file:///' + os.path.abspath(url)
michael@0:             url = urlparse(url)
michael@0:         assert url.scheme
michael@0:         assert url.scheme in ('jar', 'file')
michael@0:         if url.scheme == 'jar':
michael@0:             path = JarLog.canonicalize(url.path)
michael@0:             if isinstance(path, tuple):
michael@0:                 return path[:-1] + tuple(path[-1].split('!/', 1))
michael@0:             return tuple(path.split('!/', 1))
michael@0:         if url.scheme == 'file':
michael@0:             assert os.path.isabs(url.path)
michael@0:             path = url.path
michael@0:             # On Windows, url.path will be /drive:/path ; on Unix systems,
michael@0:             # /path. As we want drive:/path instead of /drive:/path on Windows,
michael@0:             # remove the leading /.
michael@0:             if os.path.isabs(path[1:]):
michael@0:                 path = path[1:]
michael@0:             path = os.path.realpath(path)
michael@0:             return mozpack.path.normsep(os.path.normcase(path))