python/mozbuild/mozpack/copier.py

Fri, 16 Jan 2015 18:13:44 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 18:13:44 +0100
branch
TOR_BUG_9701
changeset 14
925c144e1f1f
permissions
-rw-r--r--

Integrate suggestion from review to improve consistency with existing code.

     1 # This Source Code Form is subject to the terms of the Mozilla Public
     2 # License, v. 2.0. If a copy of the MPL was not distributed with this
     3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     5 import os
     6 import stat
     8 from mozpack.errors import errors
     9 from mozpack.files import (
    10     BaseFile,
    11     Dest,
    12 )
    13 import mozpack.path
    14 import errno
    15 from collections import (
    16     Counter,
    17     OrderedDict,
    18 )
    21 class FileRegistry(object):
    22     '''
    23     Generic container to keep track of a set of BaseFile instances. It
    24     preserves the order under which the files are added, but doesn't keep
    25     track of empty directories (directories are not stored at all).
    26     The paths associated with the BaseFile instances are relative to an
    27     unspecified (virtual) root directory.
    29         registry = FileRegistry()
    30         registry.add('foo/bar', file_instance)
    31     '''
    33     def __init__(self):
    34         self._files = OrderedDict()
    35         self._required_directories = Counter()
    37     def _partial_paths(self, path):
    38         '''
    39         Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"].
    40         '''
    41         partial_paths = []
    42         partial_path = path
    43         while partial_path:
    44             partial_path = mozpack.path.dirname(partial_path)
    45             if partial_path:
    46                 partial_paths.append(partial_path)
    47         return partial_paths
    49     def add(self, path, content):
    50         '''
    51         Add a BaseFile instance to the container, under the given path.
    52         '''
    53         assert isinstance(content, BaseFile)
    54         if path in self._files:
    55             return errors.error("%s already added" % path)
    56         if self._required_directories[path] > 0:
    57             return errors.error("Can't add %s: it is a required directory" %
    58                                 path)
    59         # Check whether any parent of the given path is already stored
    60         partial_paths = self._partial_paths(path)
    61         for partial_path in partial_paths:
    62             if partial_path in self._files:
    63                 return errors.error("Can't add %s: %s is a file" %
    64                                     (path, partial_path))
    65         self._files[path] = content
    66         self._required_directories.update(partial_paths)
    68     def match(self, pattern):
    69         '''
    70         Return the list of paths, stored in the container, matching the
    71         given pattern. See the mozpack.path.match documentation for a
    72         description of the handled patterns.
    73         '''
    74         if '*' in pattern:
    75             return [p for p in self.paths()
    76                     if mozpack.path.match(p, pattern)]
    77         if pattern == '':
    78             return self.paths()
    79         if pattern in self._files:
    80             return [pattern]
    81         return [p for p in self.paths()
    82                 if mozpack.path.basedir(p, [pattern]) == pattern]
    84     def remove(self, pattern):
    85         '''
    86         Remove paths matching the given pattern from the container. See the
    87         mozpack.path.match documentation for a description of the handled
    88         patterns.
    89         '''
    90         items = self.match(pattern)
    91         if not items:
    92             return errors.error("Can't remove %s: %s" % (pattern,
    93                                 "not matching anything previously added"))
    94         for i in items:
    95             del self._files[i]
    96             self._required_directories.subtract(self._partial_paths(i))
    98     def paths(self):
    99         '''
   100         Return all paths stored in the container, in the order they were added.
   101         '''
   102         return self._files.keys()
   104     def __len__(self):
   105         '''
   106         Return number of paths stored in the container.
   107         '''
   108         return len(self._files)
   110     def __contains__(self, pattern):
   111         raise RuntimeError("'in' operator forbidden for %s. Use contains()." %
   112                            self.__class__.__name__)
   114     def contains(self, pattern):
   115         '''
   116         Return whether the container contains paths matching the given
   117         pattern. See the mozpack.path.match documentation for a description of
   118         the handled patterns.
   119         '''
   120         return len(self.match(pattern)) > 0
   122     def __getitem__(self, path):
   123         '''
   124         Return the BaseFile instance stored in the container for the given
   125         path.
   126         '''
   127         return self._files[path]
   129     def __iter__(self):
   130         '''
   131         Iterate over all (path, BaseFile instance) pairs from the container.
   132             for path, file in registry:
   133                 (...)
   134         '''
   135         return self._files.iteritems()
   137     def required_directories(self):
   138         '''
   139         Return the set of directories required by the paths in the container,
   140         in no particular order.  The returned directories are relative to an
   141         unspecified (virtual) root directory (and do not include said root
   142         directory).
   143         '''
   144         return set(k for k, v in self._required_directories.items() if v > 0)
   147 class FileCopyResult(object):
   148     """Represents results of a FileCopier.copy operation."""
   150     def __init__(self):
   151         self.updated_files = set()
   152         self.existing_files = set()
   153         self.removed_files = set()
   154         self.removed_directories = set()
   156     @property
   157     def updated_files_count(self):
   158         return len(self.updated_files)
   160     @property
   161     def existing_files_count(self):
   162         return len(self.existing_files)
   164     @property
   165     def removed_files_count(self):
   166         return len(self.removed_files)
   168     @property
   169     def removed_directories_count(self):
   170         return len(self.removed_directories)
   173 class FileCopier(FileRegistry):
   174     '''
   175     FileRegistry with the ability to copy the registered files to a separate
   176     directory.
   177     '''
   178     def copy(self, destination, skip_if_older=True,
   179              remove_unaccounted=True,
   180              remove_all_directory_symlinks=True,
   181              remove_empty_directories=True):
   182         '''
   183         Copy all registered files to the given destination path. The given
   184         destination can be an existing directory, or not exist at all. It
   185         can't be e.g. a file.
   186         The copy process acts a bit like rsync: files are not copied when they
   187         don't need to (see mozpack.files for details on file.copy).
   189         By default, files in the destination directory that aren't
   190         registered are removed and empty directories are deleted. In
   191         addition, all directory symlinks in the destination directory
   192         are deleted: this is a conservative approach to ensure that we
   193         never accidently write files into a directory that is not the
   194         destination directory. In the worst case, we might have a
   195         directory symlink in the object directory to the source
   196         directory.
   198         To disable removing of unregistered files, pass
   199         remove_unaccounted=False. To disable removing empty
   200         directories, pass remove_empty_directories=False. In rare
   201         cases, you might want to maintain directory symlinks in the
   202         destination directory (at least those that are not required to
   203         be regular directories): pass
   204         remove_all_directory_symlinks=False. Exercise caution with
   205         this flag: you almost certainly do not want to preserve
   206         directory symlinks.
   208         Returns a FileCopyResult that details what changed.
   209         '''
   210         assert isinstance(destination, basestring)
   211         assert not os.path.exists(destination) or os.path.isdir(destination)
   213         result = FileCopyResult()
   214         have_symlinks = hasattr(os, 'symlink')
   215         destination = os.path.normpath(destination)
   217         # We create the destination directory specially. We can't do this as
   218         # part of the loop doing mkdir() below because that loop munges
   219         # symlinks and permissions and parent directories of the destination
   220         # directory may have their own weird schema. The contract is we only
   221         # manage children of destination, not its parents.
   222         try:
   223             os.makedirs(destination)
   224         except OSError as e:
   225             if e.errno != errno.EEXIST:
   226                 raise
   228         # Because we could be handling thousands of files, code in this
   229         # function is optimized to minimize system calls. We prefer CPU time
   230         # in Python over possibly I/O bound filesystem calls to stat() and
   231         # friends.
   233         required_dirs = set([destination])
   234         dest_files = set()
   236         for p, f in self:
   237             dest_files.add(os.path.normpath(os.path.join(destination, p)))
   239         required_dirs |= set(os.path.normpath(os.path.join(destination, d))
   240             for d in self.required_directories())
   242         # Ensure destination directories are in place and proper.
   243         #
   244         # The "proper" bit is important. We need to ensure that directories
   245         # have appropriate permissions or we will be unable to discover
   246         # and write files. Furthermore, we need to verify directories aren't
   247         # symlinks.
   248         #
   249         # Symlinked directories (a symlink whose target is a directory) are
   250         # incompatible with us because our manifest talks in terms of files,
   251         # not directories. If we leave symlinked directories unchecked, we
   252         # would blindly follow symlinks and this might confuse file
   253         # installation. For example, if an existing directory is a symlink
   254         # to directory X and we attempt to install a symlink in this directory
   255         # to a file in directory X, we may create a recursive symlink!
   256         for d in sorted(required_dirs, key=len):
   257             try:
   258                 os.mkdir(d)
   259             except OSError as error:
   260                 if error.errno != errno.EEXIST:
   261                     raise
   263             # We allow the destination to be a symlink because the caller
   264             # is responsible for managing the destination and we assume
   265             # they know what they are doing.
   266             if have_symlinks and d != destination:
   267                 st = os.lstat(d)
   268                 if stat.S_ISLNK(st.st_mode):
   269                     # While we have remove_unaccounted, it doesn't apply
   270                     # to directory symlinks because if it did, our behavior
   271                     # could be very wrong.
   272                     os.remove(d)
   273                     os.mkdir(d)
   275             if not os.access(d, os.W_OK):
   276                 umask = os.umask(0077)
   277                 os.umask(umask)
   278                 os.chmod(d, 0777 & ~umask)
   280         # While we have remove_unaccounted, it doesn't apply to empty
   281         # directories because it wouldn't make sense: an empty directory
   282         # is empty, so removing it should have no effect.
   283         existing_dirs = set()
   284         existing_files = set()
   285         for root, dirs, files in os.walk(destination):
   286             # We need to perform the same symlink detection as above. os.walk()
   287             # doesn't follow symlinks into directories by default, so we need
   288             # to check dirs (we can't wait for root).
   289             if have_symlinks:
   290                 filtered = []
   291                 for d in dirs:
   292                     full = os.path.join(root, d)
   293                     st = os.lstat(full)
   294                     if stat.S_ISLNK(st.st_mode):
   295                         # This directory symlink is not a required
   296                         # directory: any such symlink would have been
   297                         # removed and a directory created above.
   298                         if remove_all_directory_symlinks:
   299                             os.remove(full)
   300                             result.removed_files.add(os.path.normpath(full))
   301                         else:
   302                             existing_files.add(os.path.normpath(full))
   303                     else:
   304                         filtered.append(d)
   306                 dirs[:] = filtered
   308             existing_dirs.add(os.path.normpath(root))
   310             for d in dirs:
   311                 existing_dirs.add(os.path.normpath(os.path.join(root, d)))
   313             for f in files:
   314                 existing_files.add(os.path.normpath(os.path.join(root, f)))
   316         # Now we reconcile the state of the world against what we want.
   318         # Remove files no longer accounted for.
   319         if remove_unaccounted:
   320             for f in existing_files - dest_files:
   321                 # Windows requires write access to remove files.
   322                 if os.name == 'nt' and not os.access(f, os.W_OK):
   323                     # It doesn't matter what we set permissions to since we
   324                     # will remove this file shortly.
   325                     os.chmod(f, 0600)
   327                 os.remove(f)
   328                 result.removed_files.add(f)
   330         # Install files.
   331         for p, f in self:
   332             destfile = os.path.normpath(os.path.join(destination, p))
   333             if f.copy(destfile, skip_if_older):
   334                 result.updated_files.add(destfile)
   335             else:
   336                 result.existing_files.add(destfile)
   338         if not remove_empty_directories:
   339             return result
   341         # Figure out which directories can be removed. This is complicated
   342         # by the fact we optionally remove existing files. This would be easy
   343         # if we walked the directory tree after installing files. But, we're
   344         # trying to minimize system calls.
   346         # Start with the ideal set.
   347         remove_dirs = existing_dirs - required_dirs
   349         # Then don't remove directories if we didn't remove unaccounted files
   350         # and one of those files exists.
   351         if not remove_unaccounted:
   352             for f in existing_files:
   353                 parent = f
   354                 previous = ''
   355                 parents = set()
   356                 while True:
   357                     parent = os.path.dirname(parent)
   358                     parents.add(parent)
   360                     if previous == parent:
   361                         break
   363                     previous = parent
   365                 remove_dirs -= parents
   367         # Remove empty directories that aren't required.
   368         for d in sorted(remove_dirs, key=len, reverse=True):
   369             # Permissions may not allow deletion. So ensure write access is
   370             # in place before attempting delete.
   371             os.chmod(d, 0700)
   372             os.rmdir(d)
   373             result.removed_directories.add(d)
   375         return result
   378 class FilePurger(FileCopier):
   379     """A variation of FileCopier that is used to purge untracked files.
   381     Callers create an instance then call .add() to register files/paths that
   382     should exist. Once the canonical set of files that may exist is defined,
   383     .purge() is called against a target directory. All files and empty
   384     directories in the target directory that aren't in the registry will be
   385     deleted.
   386     """
   387     class FakeFile(BaseFile):
   388         def copy(self, dest, skip_if_older=True):
   389             return True
   391     def add(self, path):
   392         """Record that a path should exist.
   394         We currently do not track what kind of entity should be behind that
   395         path. We presumably could add type tracking later and have purging
   396         delete entities if there is a type mismatch.
   397         """
   398         return FileCopier.add(self, path, FilePurger.FakeFile())
   400     def purge(self, dest):
   401         """Deletes all files and empty directories not in the registry."""
   402         return FileCopier.copy(self, dest)
   404     def copy(self, *args, **kwargs):
   405         raise Exception('copy() disabled on FilePurger. Use purge().')
   408 class Jarrer(FileRegistry, BaseFile):
   409     '''
   410     FileRegistry with the ability to copy and pack the registered files as a
   411     jar file. Also acts as a BaseFile instance, to be copied with a FileCopier.
   412     '''
   413     def __init__(self, compress=True, optimize=True):
   414         '''
   415         Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation
   416         for details on the compress and optimize arguments.
   417         '''
   418         self.compress = compress
   419         self.optimize = optimize
   420         self._preload = []
   421         FileRegistry.__init__(self)
   423     def copy(self, dest, skip_if_older=True):
   424         '''
   425         Pack all registered files in the given destination jar. The given
   426         destination jar may be a path to jar file, or a Dest instance for
   427         a jar file.
   428         If the destination jar file exists, its (compressed) contents are used
   429         instead of the registered BaseFile instances when appropriate.
   430         '''
   431         class DeflaterDest(Dest):
   432             '''
   433             Dest-like class, reading from a file-like object initially, but
   434             switching to a Deflater object if written to.
   436                 dest = DeflaterDest(original_file)
   437                 dest.read()      # Reads original_file
   438                 dest.write(data) # Creates a Deflater and write data there
   439                 dest.read()      # Re-opens the Deflater and reads from it
   440             '''
   441             def __init__(self, orig=None, compress=True):
   442                 self.mode = None
   443                 self.deflater = orig
   444                 self.compress = compress
   446             def read(self, length=-1):
   447                 if self.mode != 'r':
   448                     assert self.mode is None
   449                     self.mode = 'r'
   450                 return self.deflater.read(length)
   452             def write(self, data):
   453                 if self.mode != 'w':
   454                     from mozpack.mozjar import Deflater
   455                     self.deflater = Deflater(self.compress)
   456                     self.mode = 'w'
   457                 self.deflater.write(data)
   459             def exists(self):
   460                 return self.deflater is not None
   462         if isinstance(dest, basestring):
   463             dest = Dest(dest)
   464         assert isinstance(dest, Dest)
   466         from mozpack.mozjar import JarWriter, JarReader
   467         try:
   468             old_jar = JarReader(fileobj=dest)
   469         except Exception:
   470             old_jar = []
   472         old_contents = dict([(f.filename, f) for f in old_jar])
   474         with JarWriter(fileobj=dest, compress=self.compress,
   475                        optimize=self.optimize) as jar:
   476             for path, file in self:
   477                 if path in old_contents:
   478                     deflater = DeflaterDest(old_contents[path], self.compress)
   479                 else:
   480                     deflater = DeflaterDest(compress=self.compress)
   481                 file.copy(deflater, skip_if_older)
   482                 jar.add(path, deflater.deflater, mode=file.mode)
   483             if self._preload:
   484                 jar.preload(self._preload)
   486     def open(self):
   487         raise RuntimeError('unsupported')
   489     def preload(self, paths):
   490         '''
   491         Add the given set of paths to the list of preloaded files. See
   492         mozpack.mozjar.JarWriter documentation for details on jar preloading.
   493         '''
   494         self._preload.extend(paths)

mercurial