python/mozbuild/mozpack/mozjar.py

Fri, 16 Jan 2015 18:13:44 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 18:13:44 +0100
branch
TOR_BUG_9701
changeset 14
925c144e1f1f
permissions
-rw-r--r--

Integrate suggestion from review to improve consistency with existing code.

     1 # This Source Code Form is subject to the terms of the Mozilla Public
     2 # License, v. 2.0. If a copy of the MPL was not distributed with this
     3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     5 from io import BytesIO
     6 import struct
     7 import zlib
     8 import os
     9 from zipfile import (
    10     ZIP_STORED,
    11     ZIP_DEFLATED,
    12 )
    13 from collections import OrderedDict
    14 from urlparse import urlparse, ParseResult
    15 import mozpack.path
    17 JAR_STORED = ZIP_STORED
    18 JAR_DEFLATED = ZIP_DEFLATED
    19 MAX_WBITS = 15
    22 class JarReaderError(Exception):
    23     '''Error type for Jar reader errors.'''
    26 class JarWriterError(Exception):
    27     '''Error type for Jar writer errors.'''
    30 class JarStruct(object):
    31     '''
    32     Helper used to define ZIP archive raw data structures. Data structures
    33     handled by this helper all start with a magic number, defined in
    34     subclasses MAGIC field as a 32-bits unsigned integer, followed by data
    35     structured as described in subclasses STRUCT field.
    37     The STRUCT field contains a list of (name, type) pairs where name is a
    38     field name, and the type can be one of 'uint32', 'uint16' or one of the
    39     field names. In the latter case, the field is considered to be a string
    40     buffer with a length given in that field.
    41     For example,
    42         STRUCT = [
    43             ('version', 'uint32'),
    44             ('filename_size', 'uint16'),
    45             ('filename', 'filename_size')
    46         ]
    47     describes a structure with a 'version' 32-bits unsigned integer field,
    48     followed by a 'filename_size' 16-bits unsigned integer field, followed by a
    49     filename_size-long string buffer 'filename'.
    51     Fields that are used as other fields size are not stored in objects. In the
    52     above example, an instance of such subclass would only have two attributes:
    53         obj['version']
    54         obj['filename']
    55     filename_size would be obtained with len(obj['filename']).
    57     JarStruct subclasses instances can be either initialized from existing data
    58     (deserialized), or with empty fields.
    59     '''
    61     TYPE_MAPPING = {'uint32': ('I', 4), 'uint16': ('H', 2)}
    63     def __init__(self, data=None):
    64         '''
    65         Create an instance from the given data. Data may be omitted to create
    66         an instance with empty fields.
    67         '''
    68         assert self.MAGIC and isinstance(self.STRUCT, OrderedDict)
    69         self.size_fields = set(t for t in self.STRUCT.itervalues()
    70                                if not t in JarStruct.TYPE_MAPPING)
    71         self._values = {}
    72         if data:
    73             self._init_data(data)
    74         else:
    75             self._init_empty()
    77     def _init_data(self, data):
    78         '''
    79         Initialize an instance from data, following the data structure
    80         described in self.STRUCT. The self.MAGIC signature is expected at
    81         data[:4].
    82         '''
    83         assert data is not None
    84         self.signature, size = JarStruct.get_data('uint32', data)
    85         if self.signature != self.MAGIC:
    86             raise JarReaderError('Bad magic')
    87         offset = size
    88         # For all fields used as other fields sizes, keep track of their value
    89         # separately.
    90         sizes = dict((t, 0) for t in self.size_fields)
    91         for name, t in self.STRUCT.iteritems():
    92             if t in JarStruct.TYPE_MAPPING:
    93                 value, size = JarStruct.get_data(t, data[offset:])
    94             else:
    95                 size = sizes[t]
    96                 value = data[offset:offset + size]
    97                 if isinstance(value, memoryview):
    98                     value = value.tobytes()
    99             if not name in sizes:
   100                 self._values[name] = value
   101             else:
   102                 sizes[name] = value
   103             offset += size
   105     def _init_empty(self):
   106         '''
   107         Initialize an instance with empty fields.
   108         '''
   109         self.signature = self.MAGIC
   110         for name, t in self.STRUCT.iteritems():
   111             if name in self.size_fields:
   112                 continue
   113             self._values[name] = 0 if t in JarStruct.TYPE_MAPPING else ''
   115     @staticmethod
   116     def get_data(type, data):
   117         '''
   118         Deserialize a single field of given type (must be one of
   119         JarStruct.TYPE_MAPPING) at the given offset in the given data.
   120         '''
   121         assert type in JarStruct.TYPE_MAPPING
   122         assert data is not None
   123         format, size = JarStruct.TYPE_MAPPING[type]
   124         data = data[:size]
   125         if isinstance(data, memoryview):
   126             data = data.tobytes()
   127         return struct.unpack('<' + format, data)[0], size
   129     def serialize(self):
   130         '''
   131         Serialize the data structure according to the data structure definition
   132         from self.STRUCT.
   133         '''
   134         serialized = struct.pack('<I', self.signature)
   135         sizes = dict((t, name) for name, t in self.STRUCT.iteritems()
   136                      if not t in JarStruct.TYPE_MAPPING)
   137         for name, t in self.STRUCT.iteritems():
   138             if t in JarStruct.TYPE_MAPPING:
   139                 format, size = JarStruct.TYPE_MAPPING[t]
   140                 if name in sizes:
   141                     value = len(self[sizes[name]])
   142                 else:
   143                     value = self[name]
   144                 serialized += struct.pack('<' + format, value)
   145             else:
   146                 serialized += self[name]
   147         return serialized
   149     @property
   150     def size(self):
   151         '''
   152         Return the size of the data structure, given the current values of all
   153         variable length fields.
   154         '''
   155         size = JarStruct.TYPE_MAPPING['uint32'][1]
   156         for name, type in self.STRUCT.iteritems():
   157             if type in JarStruct.TYPE_MAPPING:
   158                 size += JarStruct.TYPE_MAPPING[type][1]
   159             else:
   160                 size += len(self[name])
   161         return size
   163     def __getitem__(self, key):
   164         return self._values[key]
   166     def __setitem__(self, key, value):
   167         if not key in self.STRUCT:
   168             raise KeyError(key)
   169         if key in self.size_fields:
   170             raise AttributeError("can't set attribute")
   171         self._values[key] = value
   173     def __contains__(self, key):
   174         return key in self._values
   176     def __iter__(self):
   177         return self._values.iteritems()
   179     def __repr__(self):
   180         return "<%s %s>" % (self.__class__.__name__,
   181                             ' '.join('%s=%s' % (n, v) for n, v in self))
   184 class JarCdirEnd(JarStruct):
   185     '''
   186     End of central directory record.
   187     '''
   188     MAGIC = 0x06054b50
   189     STRUCT = OrderedDict([
   190         ('disk_num', 'uint16'),
   191         ('cdir_disk', 'uint16'),
   192         ('disk_entries', 'uint16'),
   193         ('cdir_entries', 'uint16'),
   194         ('cdir_size', 'uint32'),
   195         ('cdir_offset', 'uint32'),
   196         ('comment_size', 'uint16'),
   197         ('comment', 'comment_size'),
   198     ])
   200 CDIR_END_SIZE = JarCdirEnd().size
   203 class JarCdirEntry(JarStruct):
   204     '''
   205     Central directory file header
   206     '''
   207     MAGIC = 0x02014b50
   208     STRUCT = OrderedDict([
   209         ('creator_version', 'uint16'),
   210         ('min_version', 'uint16'),
   211         ('general_flag', 'uint16'),
   212         ('compression', 'uint16'),
   213         ('lastmod_time', 'uint16'),
   214         ('lastmod_date', 'uint16'),
   215         ('crc32', 'uint32'),
   216         ('compressed_size', 'uint32'),
   217         ('uncompressed_size', 'uint32'),
   218         ('filename_size', 'uint16'),
   219         ('extrafield_size', 'uint16'),
   220         ('filecomment_size', 'uint16'),
   221         ('disknum', 'uint16'),
   222         ('internal_attr', 'uint16'),
   223         ('external_attr', 'uint32'),
   224         ('offset', 'uint32'),
   225         ('filename', 'filename_size'),
   226         ('extrafield', 'extrafield_size'),
   227         ('filecomment', 'filecomment_size'),
   228     ])
   231 class JarLocalFileHeader(JarStruct):
   232     '''
   233     Local file header
   234     '''
   235     MAGIC = 0x04034b50
   236     STRUCT = OrderedDict([
   237         ('min_version', 'uint16'),
   238         ('general_flag', 'uint16'),
   239         ('compression', 'uint16'),
   240         ('lastmod_time', 'uint16'),
   241         ('lastmod_date', 'uint16'),
   242         ('crc32', 'uint32'),
   243         ('compressed_size', 'uint32'),
   244         ('uncompressed_size', 'uint32'),
   245         ('filename_size', 'uint16'),
   246         ('extra_field_size', 'uint16'),
   247         ('filename', 'filename_size'),
   248         ('extra_field', 'extra_field_size'),
   249     ])
   252 class JarFileReader(object):
   253     '''
   254     File-like class for use by JarReader to give access to individual files
   255     within a Jar archive.
   256     '''
   257     def __init__(self, header, data):
   258         '''
   259         Initialize a JarFileReader. header is the local file header
   260         corresponding to the file in the jar archive, data a buffer containing
   261         the file data.
   262         '''
   263         assert header['compression'] in [JAR_DEFLATED, JAR_STORED]
   264         self._data = data
   265         # Copy some local file header fields.
   266         for name in ['filename', 'compressed_size',
   267                      'uncompressed_size', 'crc32']:
   268             setattr(self, name, header[name])
   269         self.compressed = header['compression'] == JAR_DEFLATED
   271     def read(self, length=-1):
   272         '''
   273         Read some amount of uncompressed data.
   274         '''
   275         return self.uncompressed_data.read(length)
   277     def readlines(self):
   278         '''
   279         Return a list containing all the lines of data in the uncompressed
   280         data.
   281         '''
   282         return self.read().splitlines(True)
   284     def __iter__(self):
   285         '''
   286         Iterator, to support the "for line in fileobj" constructs.
   287         '''
   288         return iter(self.readlines())
   290     def seek(self, pos, whence=os.SEEK_SET):
   291         '''
   292         Change the current position in the uncompressed data. Subsequent reads
   293         will start from there.
   294         '''
   295         return self.uncompressed_data.seek(pos, whence)
   297     def close(self):
   298         '''
   299         Free the uncompressed data buffer.
   300         '''
   301         self.uncompressed_data.close()
   303     @property
   304     def compressed_data(self):
   305         '''
   306         Return the raw compressed data.
   307         '''
   308         return self._data[:self.compressed_size]
   310     @property
   311     def uncompressed_data(self):
   312         '''
   313         Return the uncompressed data.
   314         '''
   315         if hasattr(self, '_uncompressed_data'):
   316             return self._uncompressed_data
   317         data = self.compressed_data
   318         if self.compressed:
   319             data = zlib.decompress(data.tobytes(), -MAX_WBITS)
   320         else:
   321             data = data.tobytes()
   322         if len(data) != self.uncompressed_size:
   323             raise JarReaderError('Corrupted file? %s' % self.filename)
   324         self._uncompressed_data = BytesIO(data)
   325         return self._uncompressed_data
   328 class JarReader(object):
   329     '''
   330     Class with methods to read Jar files. Can open standard jar files as well
   331     as Mozilla jar files (see further details in the JarWriter documentation).
   332     '''
   333     def __init__(self, file=None, fileobj=None):
   334         '''
   335         Opens the given file as a Jar archive. Use the given file-like object
   336         if one is given instead of opening the given file name.
   337         '''
   338         if fileobj:
   339             data = fileobj.read()
   340         else:
   341             data = open(file, 'rb').read()
   342         self._data = memoryview(data)
   343         # The End of Central Directory Record has a variable size because of
   344         # comments it may contain, so scan for it from the end of the file.
   345         offset = -CDIR_END_SIZE
   346         while True:
   347             signature = JarStruct.get_data('uint32', self._data[offset:])[0]
   348             if signature == JarCdirEnd.MAGIC:
   349                 break
   350             if offset == -len(self._data):
   351                 raise JarReaderError('Not a jar?')
   352             offset -= 1
   353         self._cdir_end = JarCdirEnd(self._data[offset:])
   355     def close(self):
   356         '''
   357         Free some resources associated with the Jar.
   358         '''
   359         del self._data
   361     @property
   362     def entries(self):
   363         '''
   364         Return an ordered dict of central directory entries, indexed by
   365         filename, in the order they appear in the Jar archive central
   366         directory. Directory entries are skipped.
   367         '''
   368         if hasattr(self, '_entries'):
   369             return self._entries
   370         preload = 0
   371         if self.is_optimized:
   372             preload = JarStruct.get_data('uint32', self._data)[0]
   373         entries = OrderedDict()
   374         offset = self._cdir_end['cdir_offset']
   375         for e in xrange(self._cdir_end['cdir_entries']):
   376             entry = JarCdirEntry(self._data[offset:])
   377             offset += entry.size
   378             # Creator host system. 0 is MSDOS, 3 is Unix
   379             host = entry['creator_version'] >> 8
   380             # External attributes values depend on host above. On Unix the
   381             # higher bits are the stat.st_mode value. On MSDOS, the lower bits
   382             # are the FAT attributes.
   383             xattr = entry['external_attr']
   384             # Skip directories
   385             if (host == 0 and xattr & 0x10) or (host == 3 and
   386                                                 xattr & (040000 << 16)):
   387                 continue
   388             entries[entry['filename']] = entry
   389             if entry['offset'] < preload:
   390                 self._last_preloaded = entry['filename']
   391         self._entries = entries
   392         return entries
   394     @property
   395     def is_optimized(self):
   396         '''
   397         Return whether the jar archive is optimized.
   398         '''
   399         # In optimized jars, the central directory is at the beginning of the
   400         # file, after a single 32-bits value, which is the length of data
   401         # preloaded.
   402         return self._cdir_end['cdir_offset'] == \
   403             JarStruct.TYPE_MAPPING['uint32'][1]
   405     @property
   406     def last_preloaded(self):
   407         '''
   408         Return the name of the last file that is set to be preloaded.
   409         See JarWriter documentation for more details on preloading.
   410         '''
   411         if hasattr(self, '_last_preloaded'):
   412             return self._last_preloaded
   413         self._last_preloaded = None
   414         self.entries
   415         return self._last_preloaded
   417     def _getreader(self, entry):
   418         '''
   419         Helper to create a JarFileReader corresponding to the given central
   420         directory entry.
   421         '''
   422         header = JarLocalFileHeader(self._data[entry['offset']:])
   423         for key, value in entry:
   424             if key in header and header[key] != value:
   425                 raise JarReaderError('Central directory and file header ' +
   426                                      'mismatch. Corrupted archive?')
   427         return JarFileReader(header,
   428                              self._data[entry['offset'] + header.size:])
   430     def __iter__(self):
   431         '''
   432         Iterate over all files in the Jar archive, in the form of
   433         JarFileReaders.
   434             for file in jarReader:
   435                 ...
   436         '''
   437         for entry in self.entries.itervalues():
   438             yield self._getreader(entry)
   440     def __getitem__(self, name):
   441         '''
   442         Get a JarFileReader for the given file name.
   443         '''
   444         return self._getreader(self.entries[name])
   446     def __contains__(self, name):
   447         '''
   448         Return whether the given file name appears in the Jar archive.
   449         '''
   450         return name in self.entries
   453 class JarWriter(object):
   454     '''
   455     Class with methods to write Jar files. Can write more-or-less standard jar
   456     archives as well as jar archives optimized for Gecko. See the documentation
   457     for the close() member function for a description of both layouts.
   458     '''
   459     def __init__(self, file=None, fileobj=None, compress=True, optimize=True):
   460         '''
   461         Initialize a Jar archive in the given file. Use the given file-like
   462         object if one is given instead of opening the given file name.
   463         The compress option determines the default behavior for storing data
   464         in the jar archive. The optimize options determines whether the jar
   465         archive should be optimized for Gecko or not.
   466         '''
   467         if fileobj:
   468             self._data = fileobj
   469         else:
   470             self._data = open(file, 'wb')
   471         self._compress = compress
   472         self._contents = OrderedDict()
   473         self._last_preloaded = None
   474         self._optimize = optimize
   476     def __enter__(self):
   477         '''
   478         Context manager __enter__ method for JarWriter.
   479         '''
   480         return self
   482     def __exit__(self, type, value, tb):
   483         '''
   484         Context manager __exit__ method for JarWriter.
   485         '''
   486         self.finish()
   488     def finish(self):
   489         '''
   490         Flush and close the Jar archive.
   492         Standard jar archives are laid out like the following:
   493             - Local file header 1
   494             - File data 1
   495             - Local file header 2
   496             - File data 2
   497             - (...)
   498             - Central directory entry pointing at Local file header 1
   499             - Central directory entry pointing at Local file header 2
   500             - (...)
   501             - End of central directory, pointing at first central directory
   502               entry.
   504         Jar archives optimized for Gecko are laid out like the following:
   505             - 32-bits unsigned integer giving the amount of data to preload.
   506             - Central directory entry pointing at Local file header 1
   507             - Central directory entry pointing at Local file header 2
   508             - (...)
   509             - End of central directory, pointing at first central directory
   510               entry.
   511             - Local file header 1
   512             - File data 1
   513             - Local file header 2
   514             - File data 2
   515             - (...)
   516             - End of central directory, pointing at first central directory
   517               entry.
   518         The duplication of the End of central directory is to accomodate some
   519         Zip reading tools that want an end of central directory structure to
   520         follow the central directory entries.
   521         '''
   522         offset = 0
   523         headers = {}
   524         preload_size = 0
   525         # Prepare central directory entries
   526         for entry, content in self._contents.itervalues():
   527             header = JarLocalFileHeader()
   528             for name in entry.STRUCT:
   529                 if name in header:
   530                     header[name] = entry[name]
   531             entry['offset'] = offset
   532             offset += len(content) + header.size
   533             if entry['filename'] == self._last_preloaded:
   534                 preload_size = offset
   535             headers[entry] = header
   536         # Prepare end of central directory
   537         end = JarCdirEnd()
   538         end['disk_entries'] = len(self._contents)
   539         end['cdir_entries'] = end['disk_entries']
   540         end['cdir_size'] = reduce(lambda x, y: x + y[0].size,
   541                                   self._contents.values(), 0)
   542         # On optimized archives, store the preloaded size and the central
   543         # directory entries, followed by the first end of central directory.
   544         if self._optimize:
   545             end['cdir_offset'] = 4
   546             offset = end['cdir_size'] + end['cdir_offset'] + end.size
   547             if preload_size:
   548                 preload_size += offset
   549             self._data.write(struct.pack('<I', preload_size))
   550             for entry, _ in self._contents.itervalues():
   551                 entry['offset'] += offset
   552                 self._data.write(entry.serialize())
   553             self._data.write(end.serialize())
   554         # Store local file entries followed by compressed data
   555         for entry, content in self._contents.itervalues():
   556             self._data.write(headers[entry].serialize())
   557             self._data.write(content)
   558         # On non optimized archives, store the central directory entries.
   559         if not self._optimize:
   560             end['cdir_offset'] = offset
   561             for entry, _ in self._contents.itervalues():
   562                 self._data.write(entry.serialize())
   563         # Store the end of central directory.
   564         self._data.write(end.serialize())
   565         self._data.close()
   567     def add(self, name, data, compress=None, mode=None):
   568         '''
   569         Add a new member to the jar archive, with the given name and the given
   570         data.
   571         The compress option indicates if the given data should be compressed
   572         (True), not compressed (False), or compressed according to the default
   573         defined when creating the JarWriter (None).
   574         When the data should be compressed (True or None with self.compress ==
   575         True), it is only really compressed if the compressed size is smaller
   576         than the uncompressed size.
   577         The mode option gives the unix permissions that should be stored
   578         for the jar entry.
   579         The given data may be a buffer, a file-like instance, a Deflater or a
   580         JarFileReader instance. The latter two allow to avoid uncompressing
   581         data to recompress it.
   582         '''
   583         if name in self._contents:
   584             raise JarWriterError("File %s already in JarWriter" % name)
   585         if compress is None:
   586             compress = self._compress
   587         if (isinstance(data, JarFileReader) and data.compressed == compress) \
   588                 or (isinstance(data, Deflater) and data.compress == compress):
   589             deflater = data
   590         else:
   591             deflater = Deflater(compress)
   592             if isinstance(data, basestring):
   593                 deflater.write(data)
   594             elif hasattr(data, 'read'):
   595                 data.seek(0)
   596                 deflater.write(data.read())
   597             else:
   598                 raise JarWriterError("Don't know how to handle %s" %
   599                                      type(data))
   600         # Fill a central directory entry for this new member.
   601         entry = JarCdirEntry()
   602         entry['creator_version'] = 20
   603         if mode is not None:
   604             # Set creator host system (upper byte of creator_version)
   605             # to 3 (Unix) so mode is honored when there is one.
   606             entry['creator_version'] |= 3 << 8
   607             entry['external_attr'] = (mode & 0xFFFF) << 16L
   608         if deflater.compressed:
   609             entry['min_version'] = 20  # Version 2.0 supports deflated streams
   610             entry['general_flag'] = 2  # Max compression
   611             entry['compression'] = JAR_DEFLATED
   612         else:
   613             entry['min_version'] = 10  # Version 1.0 for stored streams
   614             entry['general_flag'] = 0
   615             entry['compression'] = JAR_STORED
   616         # January 1st, 2010. See bug 592369.
   617         entry['lastmod_date'] = ((2010 - 1980) << 9) | (1 << 5) | 1
   618         entry['lastmod_time'] = 0
   619         entry['crc32'] = deflater.crc32
   620         entry['compressed_size'] = deflater.compressed_size
   621         entry['uncompressed_size'] = deflater.uncompressed_size
   622         entry['filename'] = name
   623         self._contents[name] = entry, deflater.compressed_data
   625     def preload(self, files):
   626         '''
   627         Set which members of the jar archive should be preloaded when opening
   628         the archive in Gecko. This reorders the members according to the order
   629         of given list.
   630         '''
   631         new_contents = OrderedDict()
   632         for f in files:
   633             if not f in self._contents:
   634                 continue
   635             new_contents[f] = self._contents[f]
   636             self._last_preloaded = f
   637         for f in self._contents:
   638             if not f in new_contents:
   639                 new_contents[f] = self._contents[f]
   640         self._contents = new_contents
   643 class Deflater(object):
   644     '''
   645     File-like interface to zlib compression. The data is actually not
   646     compressed unless the compressed form is smaller than the uncompressed
   647     data.
   648     '''
   649     def __init__(self, compress=True):
   650         '''
   651         Initialize a Deflater. The compress argument determines whether to
   652         try to compress at all.
   653         '''
   654         self._data = BytesIO()
   655         self.compress = compress
   656         if compress:
   657             self._deflater = zlib.compressobj(9, zlib.DEFLATED, -MAX_WBITS)
   658             self._deflated = BytesIO()
   659         else:
   660             self._deflater = None
   662     def write(self, data):
   663         '''
   664         Append a buffer to the Deflater.
   665         '''
   666         self._data.write(data)
   667         if self.compress:
   668             if self._deflater:
   669                 if isinstance(data, memoryview):
   670                     data = data.tobytes()
   671                 self._deflated.write(self._deflater.compress(data))
   672             else:
   673                 raise JarWriterError("Can't write after flush")
   675     def close(self):
   676         '''
   677         Close the Deflater.
   678         '''
   679         self._data.close()
   680         if self.compress:
   681             self._deflated.close()
   683     def _flush(self):
   684         '''
   685         Flush the underlying zlib compression object.
   686         '''
   687         if self.compress and self._deflater:
   688             self._deflated.write(self._deflater.flush())
   689             self._deflater = None
   691     @property
   692     def compressed(self):
   693         '''
   694         Return whether the data should be compressed.
   695         '''
   696         return self._compressed_size < self.uncompressed_size
   698     @property
   699     def _compressed_size(self):
   700         '''
   701         Return the real compressed size of the data written to the Deflater. If
   702         the Deflater is set not to compress, the uncompressed size is returned.
   703         Otherwise, the actual compressed size is returned, whether or not it is
   704         a win over the uncompressed size.
   705         '''
   706         if self.compress:
   707             self._flush()
   708             return self._deflated.tell()
   709         return self.uncompressed_size
   711     @property
   712     def compressed_size(self):
   713         '''
   714         Return the compressed size of the data written to the Deflater. If the
   715         Deflater is set not to compress, the uncompressed size is returned.
   716         Otherwise, if the data should not be compressed (the real compressed
   717         size is bigger than the uncompressed size), return the uncompressed
   718         size.
   719         '''
   720         if self.compressed:
   721             return self._compressed_size
   722         return self.uncompressed_size
   724     @property
   725     def uncompressed_size(self):
   726         '''
   727         Return the size of the data written to the Deflater.
   728         '''
   729         return self._data.tell()
   731     @property
   732     def crc32(self):
   733         '''
   734         Return the crc32 of the data written to the Deflater.
   735         '''
   736         return zlib.crc32(self._data.getvalue()) & 0xffffffff
   738     @property
   739     def compressed_data(self):
   740         '''
   741         Return the compressed data, if the data should be compressed (real
   742         compressed size smaller than the uncompressed size), or the
   743         uncompressed data otherwise.
   744         '''
   745         if self.compressed:
   746             return self._deflated.getvalue()
   747         return self._data.getvalue()
   750 class JarLog(dict):
   751     '''
   752     Helper to read the file Gecko generates when setting MOZ_JAR_LOG_FILE.
   753     The jar log is then available as a dict with the jar path as key (see
   754     canonicalize for more details on the key value), and the corresponding
   755     access log as a list value. Only the first access to a given member of
   756     a jar is stored.
   757     '''
   758     def __init__(self, file=None, fileobj=None):
   759         if not fileobj:
   760             fileobj = open(file, 'r')
   761         urlmap = {}
   762         for line in fileobj:
   763             url, path = line.strip().split(None, 1)
   764             if not url or not path:
   765                 continue
   766             if url not in urlmap:
   767                 urlmap[url] = JarLog.canonicalize(url)
   768             jar = urlmap[url]
   769             entry = self.setdefault(jar, [])
   770             if path not in entry:
   771                 entry.append(path)
   773     @staticmethod
   774     def canonicalize(url):
   775         '''
   776         The jar path is stored in a MOZ_JAR_LOG_FILE log as a url. This method
   777         returns a unique value corresponding to such urls.
   778         - file:///{path} becomes {path}
   779         - jar:file:///{path}!/{subpath} becomes ({path}, {subpath})
   780         - jar:jar:file:///{path}!/{subpath}!/{subpath2} becomes
   781            ({path}, {subpath}, {subpath2})
   782         '''
   783         if not isinstance(url, ParseResult):
   784             # Assume that if it doesn't start with jar: or file:, it's a path.
   785             if not url.startswith(('jar:', 'file:')):
   786                 url = 'file:///' + os.path.abspath(url)
   787             url = urlparse(url)
   788         assert url.scheme
   789         assert url.scheme in ('jar', 'file')
   790         if url.scheme == 'jar':
   791             path = JarLog.canonicalize(url.path)
   792             if isinstance(path, tuple):
   793                 return path[:-1] + tuple(path[-1].split('!/', 1))
   794             return tuple(path.split('!/', 1))
   795         if url.scheme == 'file':
   796             assert os.path.isabs(url.path)
   797             path = url.path
   798             # On Windows, url.path will be /drive:/path ; on Unix systems,
   799             # /path. As we want drive:/path instead of /drive:/path on Windows,
   800             # remove the leading /.
   801             if os.path.isabs(path[1:]):
   802                 path = path[1:]
   803             path = os.path.realpath(path)
   804             return mozpack.path.normsep(os.path.normcase(path))

mercurial