python/mozbuild/mozpack/copier.py

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/python/mozbuild/mozpack/copier.py	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,494 @@
     1.4 +# This Source Code Form is subject to the terms of the Mozilla Public
     1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.7 +
     1.8 +import os
     1.9 +import stat
    1.10 +
    1.11 +from mozpack.errors import errors
    1.12 +from mozpack.files import (
    1.13 +    BaseFile,
    1.14 +    Dest,
    1.15 +)
    1.16 +import mozpack.path
    1.17 +import errno
    1.18 +from collections import (
    1.19 +    Counter,
    1.20 +    OrderedDict,
    1.21 +)
    1.22 +
    1.23 +
    1.24 +class FileRegistry(object):
    1.25 +    '''
    1.26 +    Generic container to keep track of a set of BaseFile instances. It
    1.27 +    preserves the order under which the files are added, but doesn't keep
    1.28 +    track of empty directories (directories are not stored at all).
    1.29 +    The paths associated with the BaseFile instances are relative to an
    1.30 +    unspecified (virtual) root directory.
    1.31 +
    1.32 +        registry = FileRegistry()
    1.33 +        registry.add('foo/bar', file_instance)
    1.34 +    '''
    1.35 +
    1.36 +    def __init__(self):
    1.37 +        self._files = OrderedDict()
    1.38 +        self._required_directories = Counter()
    1.39 +
    1.40 +    def _partial_paths(self, path):
    1.41 +        '''
    1.42 +        Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"].
    1.43 +        '''
    1.44 +        partial_paths = []
    1.45 +        partial_path = path
    1.46 +        while partial_path:
    1.47 +            partial_path = mozpack.path.dirname(partial_path)
    1.48 +            if partial_path:
    1.49 +                partial_paths.append(partial_path)
    1.50 +        return partial_paths
    1.51 +
    1.52 +    def add(self, path, content):
    1.53 +        '''
    1.54 +        Add a BaseFile instance to the container, under the given path.
    1.55 +        '''
    1.56 +        assert isinstance(content, BaseFile)
    1.57 +        if path in self._files:
    1.58 +            return errors.error("%s already added" % path)
    1.59 +        if self._required_directories[path] > 0:
    1.60 +            return errors.error("Can't add %s: it is a required directory" %
    1.61 +                                path)
    1.62 +        # Check whether any parent of the given path is already stored
    1.63 +        partial_paths = self._partial_paths(path)
    1.64 +        for partial_path in partial_paths:
    1.65 +            if partial_path in self._files:
    1.66 +                return errors.error("Can't add %s: %s is a file" %
    1.67 +                                    (path, partial_path))
    1.68 +        self._files[path] = content
    1.69 +        self._required_directories.update(partial_paths)
    1.70 +
    1.71 +    def match(self, pattern):
    1.72 +        '''
    1.73 +        Return the list of paths, stored in the container, matching the
    1.74 +        given pattern. See the mozpack.path.match documentation for a
    1.75 +        description of the handled patterns.
    1.76 +        '''
    1.77 +        if '*' in pattern:
    1.78 +            return [p for p in self.paths()
    1.79 +                    if mozpack.path.match(p, pattern)]
    1.80 +        if pattern == '':
    1.81 +            return self.paths()
    1.82 +        if pattern in self._files:
    1.83 +            return [pattern]
    1.84 +        return [p for p in self.paths()
    1.85 +                if mozpack.path.basedir(p, [pattern]) == pattern]
    1.86 +
    1.87 +    def remove(self, pattern):
    1.88 +        '''
    1.89 +        Remove paths matching the given pattern from the container. See the
    1.90 +        mozpack.path.match documentation for a description of the handled
    1.91 +        patterns.
    1.92 +        '''
    1.93 +        items = self.match(pattern)
    1.94 +        if not items:
    1.95 +            return errors.error("Can't remove %s: %s" % (pattern,
    1.96 +                                "not matching anything previously added"))
    1.97 +        for i in items:
    1.98 +            del self._files[i]
    1.99 +            self._required_directories.subtract(self._partial_paths(i))
   1.100 +
   1.101 +    def paths(self):
   1.102 +        '''
   1.103 +        Return all paths stored in the container, in the order they were added.
   1.104 +        '''
   1.105 +        return self._files.keys()
   1.106 +
   1.107 +    def __len__(self):
   1.108 +        '''
   1.109 +        Return number of paths stored in the container.
   1.110 +        '''
   1.111 +        return len(self._files)
   1.112 +
   1.113 +    def __contains__(self, pattern):
   1.114 +        raise RuntimeError("'in' operator forbidden for %s. Use contains()." %
   1.115 +                           self.__class__.__name__)
   1.116 +
   1.117 +    def contains(self, pattern):
   1.118 +        '''
   1.119 +        Return whether the container contains paths matching the given
   1.120 +        pattern. See the mozpack.path.match documentation for a description of
   1.121 +        the handled patterns.
   1.122 +        '''
   1.123 +        return len(self.match(pattern)) > 0
   1.124 +
   1.125 +    def __getitem__(self, path):
   1.126 +        '''
   1.127 +        Return the BaseFile instance stored in the container for the given
   1.128 +        path.
   1.129 +        '''
   1.130 +        return self._files[path]
   1.131 +
   1.132 +    def __iter__(self):
   1.133 +        '''
   1.134 +        Iterate over all (path, BaseFile instance) pairs from the container.
   1.135 +            for path, file in registry:
   1.136 +                (...)
   1.137 +        '''
   1.138 +        return self._files.iteritems()
   1.139 +
   1.140 +    def required_directories(self):
   1.141 +        '''
   1.142 +        Return the set of directories required by the paths in the container,
   1.143 +        in no particular order.  The returned directories are relative to an
   1.144 +        unspecified (virtual) root directory (and do not include said root
   1.145 +        directory).
   1.146 +        '''
   1.147 +        return set(k for k, v in self._required_directories.items() if v > 0)
   1.148 +
   1.149 +
   1.150 +class FileCopyResult(object):
   1.151 +    """Represents results of a FileCopier.copy operation."""
   1.152 +
   1.153 +    def __init__(self):
   1.154 +        self.updated_files = set()
   1.155 +        self.existing_files = set()
   1.156 +        self.removed_files = set()
   1.157 +        self.removed_directories = set()
   1.158 +
   1.159 +    @property
   1.160 +    def updated_files_count(self):
   1.161 +        return len(self.updated_files)
   1.162 +
   1.163 +    @property
   1.164 +    def existing_files_count(self):
   1.165 +        return len(self.existing_files)
   1.166 +
   1.167 +    @property
   1.168 +    def removed_files_count(self):
   1.169 +        return len(self.removed_files)
   1.170 +
   1.171 +    @property
   1.172 +    def removed_directories_count(self):
   1.173 +        return len(self.removed_directories)
   1.174 +
   1.175 +
   1.176 +class FileCopier(FileRegistry):
   1.177 +    '''
   1.178 +    FileRegistry with the ability to copy the registered files to a separate
   1.179 +    directory.
   1.180 +    '''
   1.181 +    def copy(self, destination, skip_if_older=True,
   1.182 +             remove_unaccounted=True,
   1.183 +             remove_all_directory_symlinks=True,
   1.184 +             remove_empty_directories=True):
   1.185 +        '''
   1.186 +        Copy all registered files to the given destination path. The given
   1.187 +        destination can be an existing directory, or not exist at all. It
   1.188 +        can't be e.g. a file.
   1.189 +        The copy process acts a bit like rsync: files are not copied when they
   1.190 +        don't need to (see mozpack.files for details on file.copy).
   1.191 +
   1.192 +        By default, files in the destination directory that aren't
   1.193 +        registered are removed and empty directories are deleted. In
   1.194 +        addition, all directory symlinks in the destination directory
   1.195 +        are deleted: this is a conservative approach to ensure that we
   1.196 +        never accidently write files into a directory that is not the
   1.197 +        destination directory. In the worst case, we might have a
   1.198 +        directory symlink in the object directory to the source
   1.199 +        directory.
   1.200 +
   1.201 +        To disable removing of unregistered files, pass
   1.202 +        remove_unaccounted=False. To disable removing empty
   1.203 +        directories, pass remove_empty_directories=False. In rare
   1.204 +        cases, you might want to maintain directory symlinks in the
   1.205 +        destination directory (at least those that are not required to
   1.206 +        be regular directories): pass
   1.207 +        remove_all_directory_symlinks=False. Exercise caution with
   1.208 +        this flag: you almost certainly do not want to preserve
   1.209 +        directory symlinks.
   1.210 +
   1.211 +        Returns a FileCopyResult that details what changed.
   1.212 +        '''
   1.213 +        assert isinstance(destination, basestring)
   1.214 +        assert not os.path.exists(destination) or os.path.isdir(destination)
   1.215 +
   1.216 +        result = FileCopyResult()
   1.217 +        have_symlinks = hasattr(os, 'symlink')
   1.218 +        destination = os.path.normpath(destination)
   1.219 +
   1.220 +        # We create the destination directory specially. We can't do this as
   1.221 +        # part of the loop doing mkdir() below because that loop munges
   1.222 +        # symlinks and permissions and parent directories of the destination
   1.223 +        # directory may have their own weird schema. The contract is we only
   1.224 +        # manage children of destination, not its parents.
   1.225 +        try:
   1.226 +            os.makedirs(destination)
   1.227 +        except OSError as e:
   1.228 +            if e.errno != errno.EEXIST:
   1.229 +                raise
   1.230 +
   1.231 +        # Because we could be handling thousands of files, code in this
   1.232 +        # function is optimized to minimize system calls. We prefer CPU time
   1.233 +        # in Python over possibly I/O bound filesystem calls to stat() and
   1.234 +        # friends.
   1.235 +
   1.236 +        required_dirs = set([destination])
   1.237 +        dest_files = set()
   1.238 +
   1.239 +        for p, f in self:
   1.240 +            dest_files.add(os.path.normpath(os.path.join(destination, p)))
   1.241 +
   1.242 +        required_dirs |= set(os.path.normpath(os.path.join(destination, d))
   1.243 +            for d in self.required_directories())
   1.244 +
   1.245 +        # Ensure destination directories are in place and proper.
   1.246 +        #
   1.247 +        # The "proper" bit is important. We need to ensure that directories
   1.248 +        # have appropriate permissions or we will be unable to discover
   1.249 +        # and write files. Furthermore, we need to verify directories aren't
   1.250 +        # symlinks.
   1.251 +        #
   1.252 +        # Symlinked directories (a symlink whose target is a directory) are
   1.253 +        # incompatible with us because our manifest talks in terms of files,
   1.254 +        # not directories. If we leave symlinked directories unchecked, we
   1.255 +        # would blindly follow symlinks and this might confuse file
   1.256 +        # installation. For example, if an existing directory is a symlink
   1.257 +        # to directory X and we attempt to install a symlink in this directory
   1.258 +        # to a file in directory X, we may create a recursive symlink!
   1.259 +        for d in sorted(required_dirs, key=len):
   1.260 +            try:
   1.261 +                os.mkdir(d)
   1.262 +            except OSError as error:
   1.263 +                if error.errno != errno.EEXIST:
   1.264 +                    raise
   1.265 +
   1.266 +            # We allow the destination to be a symlink because the caller
   1.267 +            # is responsible for managing the destination and we assume
   1.268 +            # they know what they are doing.
   1.269 +            if have_symlinks and d != destination:
   1.270 +                st = os.lstat(d)
   1.271 +                if stat.S_ISLNK(st.st_mode):
   1.272 +                    # While we have remove_unaccounted, it doesn't apply
   1.273 +                    # to directory symlinks because if it did, our behavior
   1.274 +                    # could be very wrong.
   1.275 +                    os.remove(d)
   1.276 +                    os.mkdir(d)
   1.277 +
   1.278 +            if not os.access(d, os.W_OK):
   1.279 +                umask = os.umask(0077)
   1.280 +                os.umask(umask)
   1.281 +                os.chmod(d, 0777 & ~umask)
   1.282 +
   1.283 +        # While we have remove_unaccounted, it doesn't apply to empty
   1.284 +        # directories because it wouldn't make sense: an empty directory
   1.285 +        # is empty, so removing it should have no effect.
   1.286 +        existing_dirs = set()
   1.287 +        existing_files = set()
   1.288 +        for root, dirs, files in os.walk(destination):
   1.289 +            # We need to perform the same symlink detection as above. os.walk()
   1.290 +            # doesn't follow symlinks into directories by default, so we need
   1.291 +            # to check dirs (we can't wait for root).
   1.292 +            if have_symlinks:
   1.293 +                filtered = []
   1.294 +                for d in dirs:
   1.295 +                    full = os.path.join(root, d)
   1.296 +                    st = os.lstat(full)
   1.297 +                    if stat.S_ISLNK(st.st_mode):
   1.298 +                        # This directory symlink is not a required
   1.299 +                        # directory: any such symlink would have been
   1.300 +                        # removed and a directory created above.
   1.301 +                        if remove_all_directory_symlinks:
   1.302 +                            os.remove(full)
   1.303 +                            result.removed_files.add(os.path.normpath(full))
   1.304 +                        else:
   1.305 +                            existing_files.add(os.path.normpath(full))
   1.306 +                    else:
   1.307 +                        filtered.append(d)
   1.308 +
   1.309 +                dirs[:] = filtered
   1.310 +
   1.311 +            existing_dirs.add(os.path.normpath(root))
   1.312 +
   1.313 +            for d in dirs:
   1.314 +                existing_dirs.add(os.path.normpath(os.path.join(root, d)))
   1.315 +
   1.316 +            for f in files:
   1.317 +                existing_files.add(os.path.normpath(os.path.join(root, f)))
   1.318 +
   1.319 +        # Now we reconcile the state of the world against what we want.
   1.320 +
   1.321 +        # Remove files no longer accounted for.
   1.322 +        if remove_unaccounted:
   1.323 +            for f in existing_files - dest_files:
   1.324 +                # Windows requires write access to remove files.
   1.325 +                if os.name == 'nt' and not os.access(f, os.W_OK):
   1.326 +                    # It doesn't matter what we set permissions to since we
   1.327 +                    # will remove this file shortly.
   1.328 +                    os.chmod(f, 0600)
   1.329 +
   1.330 +                os.remove(f)
   1.331 +                result.removed_files.add(f)
   1.332 +
   1.333 +        # Install files.
   1.334 +        for p, f in self:
   1.335 +            destfile = os.path.normpath(os.path.join(destination, p))
   1.336 +            if f.copy(destfile, skip_if_older):
   1.337 +                result.updated_files.add(destfile)
   1.338 +            else:
   1.339 +                result.existing_files.add(destfile)
   1.340 +
   1.341 +        if not remove_empty_directories:
   1.342 +            return result
   1.343 +
   1.344 +        # Figure out which directories can be removed. This is complicated
   1.345 +        # by the fact we optionally remove existing files. This would be easy
   1.346 +        # if we walked the directory tree after installing files. But, we're
   1.347 +        # trying to minimize system calls.
   1.348 +
   1.349 +        # Start with the ideal set.
   1.350 +        remove_dirs = existing_dirs - required_dirs
   1.351 +
   1.352 +        # Then don't remove directories if we didn't remove unaccounted files
   1.353 +        # and one of those files exists.
   1.354 +        if not remove_unaccounted:
   1.355 +            for f in existing_files:
   1.356 +                parent = f
   1.357 +                previous = ''
   1.358 +                parents = set()
   1.359 +                while True:
   1.360 +                    parent = os.path.dirname(parent)
   1.361 +                    parents.add(parent)
   1.362 +
   1.363 +                    if previous == parent:
   1.364 +                        break
   1.365 +
   1.366 +                    previous = parent
   1.367 +
   1.368 +                remove_dirs -= parents
   1.369 +
   1.370 +        # Remove empty directories that aren't required.
   1.371 +        for d in sorted(remove_dirs, key=len, reverse=True):
   1.372 +            # Permissions may not allow deletion. So ensure write access is
   1.373 +            # in place before attempting delete.
   1.374 +            os.chmod(d, 0700)
   1.375 +            os.rmdir(d)
   1.376 +            result.removed_directories.add(d)
   1.377 +
   1.378 +        return result
   1.379 +
   1.380 +
   1.381 +class FilePurger(FileCopier):
   1.382 +    """A variation of FileCopier that is used to purge untracked files.
   1.383 +
   1.384 +    Callers create an instance then call .add() to register files/paths that
   1.385 +    should exist. Once the canonical set of files that may exist is defined,
   1.386 +    .purge() is called against a target directory. All files and empty
   1.387 +    directories in the target directory that aren't in the registry will be
   1.388 +    deleted.
   1.389 +    """
   1.390 +    class FakeFile(BaseFile):
   1.391 +        def copy(self, dest, skip_if_older=True):
   1.392 +            return True
   1.393 +
   1.394 +    def add(self, path):
   1.395 +        """Record that a path should exist.
   1.396 +
   1.397 +        We currently do not track what kind of entity should be behind that
   1.398 +        path. We presumably could add type tracking later and have purging
   1.399 +        delete entities if there is a type mismatch.
   1.400 +        """
   1.401 +        return FileCopier.add(self, path, FilePurger.FakeFile())
   1.402 +
   1.403 +    def purge(self, dest):
   1.404 +        """Deletes all files and empty directories not in the registry."""
   1.405 +        return FileCopier.copy(self, dest)
   1.406 +
   1.407 +    def copy(self, *args, **kwargs):
   1.408 +        raise Exception('copy() disabled on FilePurger. Use purge().')
   1.409 +
   1.410 +
   1.411 +class Jarrer(FileRegistry, BaseFile):
   1.412 +    '''
   1.413 +    FileRegistry with the ability to copy and pack the registered files as a
   1.414 +    jar file. Also acts as a BaseFile instance, to be copied with a FileCopier.
   1.415 +    '''
   1.416 +    def __init__(self, compress=True, optimize=True):
   1.417 +        '''
   1.418 +        Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation
   1.419 +        for details on the compress and optimize arguments.
   1.420 +        '''
   1.421 +        self.compress = compress
   1.422 +        self.optimize = optimize
   1.423 +        self._preload = []
   1.424 +        FileRegistry.__init__(self)
   1.425 +
   1.426 +    def copy(self, dest, skip_if_older=True):
   1.427 +        '''
   1.428 +        Pack all registered files in the given destination jar. The given
   1.429 +        destination jar may be a path to jar file, or a Dest instance for
   1.430 +        a jar file.
   1.431 +        If the destination jar file exists, its (compressed) contents are used
   1.432 +        instead of the registered BaseFile instances when appropriate.
   1.433 +        '''
   1.434 +        class DeflaterDest(Dest):
   1.435 +            '''
   1.436 +            Dest-like class, reading from a file-like object initially, but
   1.437 +            switching to a Deflater object if written to.
   1.438 +
   1.439 +                dest = DeflaterDest(original_file)
   1.440 +                dest.read()      # Reads original_file
   1.441 +                dest.write(data) # Creates a Deflater and write data there
   1.442 +                dest.read()      # Re-opens the Deflater and reads from it
   1.443 +            '''
   1.444 +            def __init__(self, orig=None, compress=True):
   1.445 +                self.mode = None
   1.446 +                self.deflater = orig
   1.447 +                self.compress = compress
   1.448 +
   1.449 +            def read(self, length=-1):
   1.450 +                if self.mode != 'r':
   1.451 +                    assert self.mode is None
   1.452 +                    self.mode = 'r'
   1.453 +                return self.deflater.read(length)
   1.454 +
   1.455 +            def write(self, data):
   1.456 +                if self.mode != 'w':
   1.457 +                    from mozpack.mozjar import Deflater
   1.458 +                    self.deflater = Deflater(self.compress)
   1.459 +                    self.mode = 'w'
   1.460 +                self.deflater.write(data)
   1.461 +
   1.462 +            def exists(self):
   1.463 +                return self.deflater is not None
   1.464 +
   1.465 +        if isinstance(dest, basestring):
   1.466 +            dest = Dest(dest)
   1.467 +        assert isinstance(dest, Dest)
   1.468 +
   1.469 +        from mozpack.mozjar import JarWriter, JarReader
   1.470 +        try:
   1.471 +            old_jar = JarReader(fileobj=dest)
   1.472 +        except Exception:
   1.473 +            old_jar = []
   1.474 +
   1.475 +        old_contents = dict([(f.filename, f) for f in old_jar])
   1.476 +
   1.477 +        with JarWriter(fileobj=dest, compress=self.compress,
   1.478 +                       optimize=self.optimize) as jar:
   1.479 +            for path, file in self:
   1.480 +                if path in old_contents:
   1.481 +                    deflater = DeflaterDest(old_contents[path], self.compress)
   1.482 +                else:
   1.483 +                    deflater = DeflaterDest(compress=self.compress)
   1.484 +                file.copy(deflater, skip_if_older)
   1.485 +                jar.add(path, deflater.deflater, mode=file.mode)
   1.486 +            if self._preload:
   1.487 +                jar.preload(self._preload)
   1.488 +
   1.489 +    def open(self):
   1.490 +        raise RuntimeError('unsupported')
   1.491 +
   1.492 +    def preload(self, paths):
   1.493 +        '''
   1.494 +        Add the given set of paths to the list of preloaded files. See
   1.495 +        mozpack.mozjar.JarWriter documentation for details on jar preloading.
   1.496 +        '''
   1.497 +        self._preload.extend(paths)

mercurial