michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: from io import BytesIO michael@0: import struct michael@0: import zlib michael@0: import os michael@0: from zipfile import ( michael@0: ZIP_STORED, michael@0: ZIP_DEFLATED, michael@0: ) michael@0: from collections import OrderedDict michael@0: from urlparse import urlparse, ParseResult michael@0: import mozpack.path michael@0: michael@0: JAR_STORED = ZIP_STORED michael@0: JAR_DEFLATED = ZIP_DEFLATED michael@0: MAX_WBITS = 15 michael@0: michael@0: michael@0: class JarReaderError(Exception): michael@0: '''Error type for Jar reader errors.''' michael@0: michael@0: michael@0: class JarWriterError(Exception): michael@0: '''Error type for Jar writer errors.''' michael@0: michael@0: michael@0: class JarStruct(object): michael@0: ''' michael@0: Helper used to define ZIP archive raw data structures. Data structures michael@0: handled by this helper all start with a magic number, defined in michael@0: subclasses MAGIC field as a 32-bits unsigned integer, followed by data michael@0: structured as described in subclasses STRUCT field. michael@0: michael@0: The STRUCT field contains a list of (name, type) pairs where name is a michael@0: field name, and the type can be one of 'uint32', 'uint16' or one of the michael@0: field names. In the latter case, the field is considered to be a string michael@0: buffer with a length given in that field. michael@0: For example, michael@0: STRUCT = [ michael@0: ('version', 'uint32'), michael@0: ('filename_size', 'uint16'), michael@0: ('filename', 'filename_size') michael@0: ] michael@0: describes a structure with a 'version' 32-bits unsigned integer field, michael@0: followed by a 'filename_size' 16-bits unsigned integer field, followed by a michael@0: filename_size-long string buffer 'filename'. michael@0: michael@0: Fields that are used as other fields size are not stored in objects. In the michael@0: above example, an instance of such subclass would only have two attributes: michael@0: obj['version'] michael@0: obj['filename'] michael@0: filename_size would be obtained with len(obj['filename']). michael@0: michael@0: JarStruct subclasses instances can be either initialized from existing data michael@0: (deserialized), or with empty fields. michael@0: ''' michael@0: michael@0: TYPE_MAPPING = {'uint32': ('I', 4), 'uint16': ('H', 2)} michael@0: michael@0: def __init__(self, data=None): michael@0: ''' michael@0: Create an instance from the given data. Data may be omitted to create michael@0: an instance with empty fields. michael@0: ''' michael@0: assert self.MAGIC and isinstance(self.STRUCT, OrderedDict) michael@0: self.size_fields = set(t for t in self.STRUCT.itervalues() michael@0: if not t in JarStruct.TYPE_MAPPING) michael@0: self._values = {} michael@0: if data: michael@0: self._init_data(data) michael@0: else: michael@0: self._init_empty() michael@0: michael@0: def _init_data(self, data): michael@0: ''' michael@0: Initialize an instance from data, following the data structure michael@0: described in self.STRUCT. The self.MAGIC signature is expected at michael@0: data[:4]. michael@0: ''' michael@0: assert data is not None michael@0: self.signature, size = JarStruct.get_data('uint32', data) michael@0: if self.signature != self.MAGIC: michael@0: raise JarReaderError('Bad magic') michael@0: offset = size michael@0: # For all fields used as other fields sizes, keep track of their value michael@0: # separately. michael@0: sizes = dict((t, 0) for t in self.size_fields) michael@0: for name, t in self.STRUCT.iteritems(): michael@0: if t in JarStruct.TYPE_MAPPING: michael@0: value, size = JarStruct.get_data(t, data[offset:]) michael@0: else: michael@0: size = sizes[t] michael@0: value = data[offset:offset + size] michael@0: if isinstance(value, memoryview): michael@0: value = value.tobytes() michael@0: if not name in sizes: michael@0: self._values[name] = value michael@0: else: michael@0: sizes[name] = value michael@0: offset += size michael@0: michael@0: def _init_empty(self): michael@0: ''' michael@0: Initialize an instance with empty fields. michael@0: ''' michael@0: self.signature = self.MAGIC michael@0: for name, t in self.STRUCT.iteritems(): michael@0: if name in self.size_fields: michael@0: continue michael@0: self._values[name] = 0 if t in JarStruct.TYPE_MAPPING else '' michael@0: michael@0: @staticmethod michael@0: def get_data(type, data): michael@0: ''' michael@0: Deserialize a single field of given type (must be one of michael@0: JarStruct.TYPE_MAPPING) at the given offset in the given data. michael@0: ''' michael@0: assert type in JarStruct.TYPE_MAPPING michael@0: assert data is not None michael@0: format, size = JarStruct.TYPE_MAPPING[type] michael@0: data = data[:size] michael@0: if isinstance(data, memoryview): michael@0: data = data.tobytes() michael@0: return struct.unpack('<' + format, data)[0], size michael@0: michael@0: def serialize(self): michael@0: ''' michael@0: Serialize the data structure according to the data structure definition michael@0: from self.STRUCT. michael@0: ''' michael@0: serialized = struct.pack('" % (self.__class__.__name__, michael@0: ' '.join('%s=%s' % (n, v) for n, v in self)) michael@0: michael@0: michael@0: class JarCdirEnd(JarStruct): michael@0: ''' michael@0: End of central directory record. michael@0: ''' michael@0: MAGIC = 0x06054b50 michael@0: STRUCT = OrderedDict([ michael@0: ('disk_num', 'uint16'), michael@0: ('cdir_disk', 'uint16'), michael@0: ('disk_entries', 'uint16'), michael@0: ('cdir_entries', 'uint16'), michael@0: ('cdir_size', 'uint32'), michael@0: ('cdir_offset', 'uint32'), michael@0: ('comment_size', 'uint16'), michael@0: ('comment', 'comment_size'), michael@0: ]) michael@0: michael@0: CDIR_END_SIZE = JarCdirEnd().size michael@0: michael@0: michael@0: class JarCdirEntry(JarStruct): michael@0: ''' michael@0: Central directory file header michael@0: ''' michael@0: MAGIC = 0x02014b50 michael@0: STRUCT = OrderedDict([ michael@0: ('creator_version', 'uint16'), michael@0: ('min_version', 'uint16'), michael@0: ('general_flag', 'uint16'), michael@0: ('compression', 'uint16'), michael@0: ('lastmod_time', 'uint16'), michael@0: ('lastmod_date', 'uint16'), michael@0: ('crc32', 'uint32'), michael@0: ('compressed_size', 'uint32'), michael@0: ('uncompressed_size', 'uint32'), michael@0: ('filename_size', 'uint16'), michael@0: ('extrafield_size', 'uint16'), michael@0: ('filecomment_size', 'uint16'), michael@0: ('disknum', 'uint16'), michael@0: ('internal_attr', 'uint16'), michael@0: ('external_attr', 'uint32'), michael@0: ('offset', 'uint32'), michael@0: ('filename', 'filename_size'), michael@0: ('extrafield', 'extrafield_size'), michael@0: ('filecomment', 'filecomment_size'), michael@0: ]) michael@0: michael@0: michael@0: class JarLocalFileHeader(JarStruct): michael@0: ''' michael@0: Local file header michael@0: ''' michael@0: MAGIC = 0x04034b50 michael@0: STRUCT = OrderedDict([ michael@0: ('min_version', 'uint16'), michael@0: ('general_flag', 'uint16'), michael@0: ('compression', 'uint16'), michael@0: ('lastmod_time', 'uint16'), michael@0: ('lastmod_date', 'uint16'), michael@0: ('crc32', 'uint32'), michael@0: ('compressed_size', 'uint32'), michael@0: ('uncompressed_size', 'uint32'), michael@0: ('filename_size', 'uint16'), michael@0: ('extra_field_size', 'uint16'), michael@0: ('filename', 'filename_size'), michael@0: ('extra_field', 'extra_field_size'), michael@0: ]) michael@0: michael@0: michael@0: class JarFileReader(object): michael@0: ''' michael@0: File-like class for use by JarReader to give access to individual files michael@0: within a Jar archive. michael@0: ''' michael@0: def __init__(self, header, data): michael@0: ''' michael@0: Initialize a JarFileReader. header is the local file header michael@0: corresponding to the file in the jar archive, data a buffer containing michael@0: the file data. michael@0: ''' michael@0: assert header['compression'] in [JAR_DEFLATED, JAR_STORED] michael@0: self._data = data michael@0: # Copy some local file header fields. michael@0: for name in ['filename', 'compressed_size', michael@0: 'uncompressed_size', 'crc32']: michael@0: setattr(self, name, header[name]) michael@0: self.compressed = header['compression'] == JAR_DEFLATED michael@0: michael@0: def read(self, length=-1): michael@0: ''' michael@0: Read some amount of uncompressed data. michael@0: ''' michael@0: return self.uncompressed_data.read(length) michael@0: michael@0: def readlines(self): michael@0: ''' michael@0: Return a list containing all the lines of data in the uncompressed michael@0: data. michael@0: ''' michael@0: return self.read().splitlines(True) michael@0: michael@0: def __iter__(self): michael@0: ''' michael@0: Iterator, to support the "for line in fileobj" constructs. michael@0: ''' michael@0: return iter(self.readlines()) michael@0: michael@0: def seek(self, pos, whence=os.SEEK_SET): michael@0: ''' michael@0: Change the current position in the uncompressed data. Subsequent reads michael@0: will start from there. michael@0: ''' michael@0: return self.uncompressed_data.seek(pos, whence) michael@0: michael@0: def close(self): michael@0: ''' michael@0: Free the uncompressed data buffer. michael@0: ''' michael@0: self.uncompressed_data.close() michael@0: michael@0: @property michael@0: def compressed_data(self): michael@0: ''' michael@0: Return the raw compressed data. michael@0: ''' michael@0: return self._data[:self.compressed_size] michael@0: michael@0: @property michael@0: def uncompressed_data(self): michael@0: ''' michael@0: Return the uncompressed data. michael@0: ''' michael@0: if hasattr(self, '_uncompressed_data'): michael@0: return self._uncompressed_data michael@0: data = self.compressed_data michael@0: if self.compressed: michael@0: data = zlib.decompress(data.tobytes(), -MAX_WBITS) michael@0: else: michael@0: data = data.tobytes() michael@0: if len(data) != self.uncompressed_size: michael@0: raise JarReaderError('Corrupted file? %s' % self.filename) michael@0: self._uncompressed_data = BytesIO(data) michael@0: return self._uncompressed_data michael@0: michael@0: michael@0: class JarReader(object): michael@0: ''' michael@0: Class with methods to read Jar files. Can open standard jar files as well michael@0: as Mozilla jar files (see further details in the JarWriter documentation). michael@0: ''' michael@0: def __init__(self, file=None, fileobj=None): michael@0: ''' michael@0: Opens the given file as a Jar archive. Use the given file-like object michael@0: if one is given instead of opening the given file name. michael@0: ''' michael@0: if fileobj: michael@0: data = fileobj.read() michael@0: else: michael@0: data = open(file, 'rb').read() michael@0: self._data = memoryview(data) michael@0: # The End of Central Directory Record has a variable size because of michael@0: # comments it may contain, so scan for it from the end of the file. michael@0: offset = -CDIR_END_SIZE michael@0: while True: michael@0: signature = JarStruct.get_data('uint32', self._data[offset:])[0] michael@0: if signature == JarCdirEnd.MAGIC: michael@0: break michael@0: if offset == -len(self._data): michael@0: raise JarReaderError('Not a jar?') michael@0: offset -= 1 michael@0: self._cdir_end = JarCdirEnd(self._data[offset:]) michael@0: michael@0: def close(self): michael@0: ''' michael@0: Free some resources associated with the Jar. michael@0: ''' michael@0: del self._data michael@0: michael@0: @property michael@0: def entries(self): michael@0: ''' michael@0: Return an ordered dict of central directory entries, indexed by michael@0: filename, in the order they appear in the Jar archive central michael@0: directory. Directory entries are skipped. michael@0: ''' michael@0: if hasattr(self, '_entries'): michael@0: return self._entries michael@0: preload = 0 michael@0: if self.is_optimized: michael@0: preload = JarStruct.get_data('uint32', self._data)[0] michael@0: entries = OrderedDict() michael@0: offset = self._cdir_end['cdir_offset'] michael@0: for e in xrange(self._cdir_end['cdir_entries']): michael@0: entry = JarCdirEntry(self._data[offset:]) michael@0: offset += entry.size michael@0: # Creator host system. 0 is MSDOS, 3 is Unix michael@0: host = entry['creator_version'] >> 8 michael@0: # External attributes values depend on host above. On Unix the michael@0: # higher bits are the stat.st_mode value. On MSDOS, the lower bits michael@0: # are the FAT attributes. michael@0: xattr = entry['external_attr'] michael@0: # Skip directories michael@0: if (host == 0 and xattr & 0x10) or (host == 3 and michael@0: xattr & (040000 << 16)): michael@0: continue michael@0: entries[entry['filename']] = entry michael@0: if entry['offset'] < preload: michael@0: self._last_preloaded = entry['filename'] michael@0: self._entries = entries michael@0: return entries michael@0: michael@0: @property michael@0: def is_optimized(self): michael@0: ''' michael@0: Return whether the jar archive is optimized. michael@0: ''' michael@0: # In optimized jars, the central directory is at the beginning of the michael@0: # file, after a single 32-bits value, which is the length of data michael@0: # preloaded. michael@0: return self._cdir_end['cdir_offset'] == \ michael@0: JarStruct.TYPE_MAPPING['uint32'][1] michael@0: michael@0: @property michael@0: def last_preloaded(self): michael@0: ''' michael@0: Return the name of the last file that is set to be preloaded. michael@0: See JarWriter documentation for more details on preloading. michael@0: ''' michael@0: if hasattr(self, '_last_preloaded'): michael@0: return self._last_preloaded michael@0: self._last_preloaded = None michael@0: self.entries michael@0: return self._last_preloaded michael@0: michael@0: def _getreader(self, entry): michael@0: ''' michael@0: Helper to create a JarFileReader corresponding to the given central michael@0: directory entry. michael@0: ''' michael@0: header = JarLocalFileHeader(self._data[entry['offset']:]) michael@0: for key, value in entry: michael@0: if key in header and header[key] != value: michael@0: raise JarReaderError('Central directory and file header ' + michael@0: 'mismatch. Corrupted archive?') michael@0: return JarFileReader(header, michael@0: self._data[entry['offset'] + header.size:]) michael@0: michael@0: def __iter__(self): michael@0: ''' michael@0: Iterate over all files in the Jar archive, in the form of michael@0: JarFileReaders. michael@0: for file in jarReader: michael@0: ... michael@0: ''' michael@0: for entry in self.entries.itervalues(): michael@0: yield self._getreader(entry) michael@0: michael@0: def __getitem__(self, name): michael@0: ''' michael@0: Get a JarFileReader for the given file name. michael@0: ''' michael@0: return self._getreader(self.entries[name]) michael@0: michael@0: def __contains__(self, name): michael@0: ''' michael@0: Return whether the given file name appears in the Jar archive. michael@0: ''' michael@0: return name in self.entries michael@0: michael@0: michael@0: class JarWriter(object): michael@0: ''' michael@0: Class with methods to write Jar files. Can write more-or-less standard jar michael@0: archives as well as jar archives optimized for Gecko. See the documentation michael@0: for the close() member function for a description of both layouts. michael@0: ''' michael@0: def __init__(self, file=None, fileobj=None, compress=True, optimize=True): michael@0: ''' michael@0: Initialize a Jar archive in the given file. Use the given file-like michael@0: object if one is given instead of opening the given file name. michael@0: The compress option determines the default behavior for storing data michael@0: in the jar archive. The optimize options determines whether the jar michael@0: archive should be optimized for Gecko or not. michael@0: ''' michael@0: if fileobj: michael@0: self._data = fileobj michael@0: else: michael@0: self._data = open(file, 'wb') michael@0: self._compress = compress michael@0: self._contents = OrderedDict() michael@0: self._last_preloaded = None michael@0: self._optimize = optimize michael@0: michael@0: def __enter__(self): michael@0: ''' michael@0: Context manager __enter__ method for JarWriter. michael@0: ''' michael@0: return self michael@0: michael@0: def __exit__(self, type, value, tb): michael@0: ''' michael@0: Context manager __exit__ method for JarWriter. michael@0: ''' michael@0: self.finish() michael@0: michael@0: def finish(self): michael@0: ''' michael@0: Flush and close the Jar archive. michael@0: michael@0: Standard jar archives are laid out like the following: michael@0: - Local file header 1 michael@0: - File data 1 michael@0: - Local file header 2 michael@0: - File data 2 michael@0: - (...) michael@0: - Central directory entry pointing at Local file header 1 michael@0: - Central directory entry pointing at Local file header 2 michael@0: - (...) michael@0: - End of central directory, pointing at first central directory michael@0: entry. michael@0: michael@0: Jar archives optimized for Gecko are laid out like the following: michael@0: - 32-bits unsigned integer giving the amount of data to preload. michael@0: - Central directory entry pointing at Local file header 1 michael@0: - Central directory entry pointing at Local file header 2 michael@0: - (...) michael@0: - End of central directory, pointing at first central directory michael@0: entry. michael@0: - Local file header 1 michael@0: - File data 1 michael@0: - Local file header 2 michael@0: - File data 2 michael@0: - (...) michael@0: - End of central directory, pointing at first central directory michael@0: entry. michael@0: The duplication of the End of central directory is to accomodate some michael@0: Zip reading tools that want an end of central directory structure to michael@0: follow the central directory entries. michael@0: ''' michael@0: offset = 0 michael@0: headers = {} michael@0: preload_size = 0 michael@0: # Prepare central directory entries michael@0: for entry, content in self._contents.itervalues(): michael@0: header = JarLocalFileHeader() michael@0: for name in entry.STRUCT: michael@0: if name in header: michael@0: header[name] = entry[name] michael@0: entry['offset'] = offset michael@0: offset += len(content) + header.size michael@0: if entry['filename'] == self._last_preloaded: michael@0: preload_size = offset michael@0: headers[entry] = header michael@0: # Prepare end of central directory michael@0: end = JarCdirEnd() michael@0: end['disk_entries'] = len(self._contents) michael@0: end['cdir_entries'] = end['disk_entries'] michael@0: end['cdir_size'] = reduce(lambda x, y: x + y[0].size, michael@0: self._contents.values(), 0) michael@0: # On optimized archives, store the preloaded size and the central michael@0: # directory entries, followed by the first end of central directory. michael@0: if self._optimize: michael@0: end['cdir_offset'] = 4 michael@0: offset = end['cdir_size'] + end['cdir_offset'] + end.size michael@0: if preload_size: michael@0: preload_size += offset michael@0: self._data.write(struct.pack('