michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: import os michael@0: import stat michael@0: michael@0: from mozpack.errors import errors michael@0: from mozpack.files import ( michael@0: BaseFile, michael@0: Dest, michael@0: ) michael@0: import mozpack.path michael@0: import errno michael@0: from collections import ( michael@0: Counter, michael@0: OrderedDict, michael@0: ) michael@0: michael@0: michael@0: class FileRegistry(object): michael@0: ''' michael@0: Generic container to keep track of a set of BaseFile instances. It michael@0: preserves the order under which the files are added, but doesn't keep michael@0: track of empty directories (directories are not stored at all). michael@0: The paths associated with the BaseFile instances are relative to an michael@0: unspecified (virtual) root directory. michael@0: michael@0: registry = FileRegistry() michael@0: registry.add('foo/bar', file_instance) michael@0: ''' michael@0: michael@0: def __init__(self): michael@0: self._files = OrderedDict() michael@0: self._required_directories = Counter() michael@0: michael@0: def _partial_paths(self, path): michael@0: ''' michael@0: Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"]. michael@0: ''' michael@0: partial_paths = [] michael@0: partial_path = path michael@0: while partial_path: michael@0: partial_path = mozpack.path.dirname(partial_path) michael@0: if partial_path: michael@0: partial_paths.append(partial_path) michael@0: return partial_paths michael@0: michael@0: def add(self, path, content): michael@0: ''' michael@0: Add a BaseFile instance to the container, under the given path. michael@0: ''' michael@0: assert isinstance(content, BaseFile) michael@0: if path in self._files: michael@0: return errors.error("%s already added" % path) michael@0: if self._required_directories[path] > 0: michael@0: return errors.error("Can't add %s: it is a required directory" % michael@0: path) michael@0: # Check whether any parent of the given path is already stored michael@0: partial_paths = self._partial_paths(path) michael@0: for partial_path in partial_paths: michael@0: if partial_path in self._files: michael@0: return errors.error("Can't add %s: %s is a file" % michael@0: (path, partial_path)) michael@0: self._files[path] = content michael@0: self._required_directories.update(partial_paths) michael@0: michael@0: def match(self, pattern): michael@0: ''' michael@0: Return the list of paths, stored in the container, matching the michael@0: given pattern. See the mozpack.path.match documentation for a michael@0: description of the handled patterns. michael@0: ''' michael@0: if '*' in pattern: michael@0: return [p for p in self.paths() michael@0: if mozpack.path.match(p, pattern)] michael@0: if pattern == '': michael@0: return self.paths() michael@0: if pattern in self._files: michael@0: return [pattern] michael@0: return [p for p in self.paths() michael@0: if mozpack.path.basedir(p, [pattern]) == pattern] michael@0: michael@0: def remove(self, pattern): michael@0: ''' michael@0: Remove paths matching the given pattern from the container. See the michael@0: mozpack.path.match documentation for a description of the handled michael@0: patterns. michael@0: ''' michael@0: items = self.match(pattern) michael@0: if not items: michael@0: return errors.error("Can't remove %s: %s" % (pattern, michael@0: "not matching anything previously added")) michael@0: for i in items: michael@0: del self._files[i] michael@0: self._required_directories.subtract(self._partial_paths(i)) michael@0: michael@0: def paths(self): michael@0: ''' michael@0: Return all paths stored in the container, in the order they were added. michael@0: ''' michael@0: return self._files.keys() michael@0: michael@0: def __len__(self): michael@0: ''' michael@0: Return number of paths stored in the container. michael@0: ''' michael@0: return len(self._files) michael@0: michael@0: def __contains__(self, pattern): michael@0: raise RuntimeError("'in' operator forbidden for %s. Use contains()." % michael@0: self.__class__.__name__) michael@0: michael@0: def contains(self, pattern): michael@0: ''' michael@0: Return whether the container contains paths matching the given michael@0: pattern. See the mozpack.path.match documentation for a description of michael@0: the handled patterns. michael@0: ''' michael@0: return len(self.match(pattern)) > 0 michael@0: michael@0: def __getitem__(self, path): michael@0: ''' michael@0: Return the BaseFile instance stored in the container for the given michael@0: path. michael@0: ''' michael@0: return self._files[path] michael@0: michael@0: def __iter__(self): michael@0: ''' michael@0: Iterate over all (path, BaseFile instance) pairs from the container. michael@0: for path, file in registry: michael@0: (...) michael@0: ''' michael@0: return self._files.iteritems() michael@0: michael@0: def required_directories(self): michael@0: ''' michael@0: Return the set of directories required by the paths in the container, michael@0: in no particular order. The returned directories are relative to an michael@0: unspecified (virtual) root directory (and do not include said root michael@0: directory). michael@0: ''' michael@0: return set(k for k, v in self._required_directories.items() if v > 0) michael@0: michael@0: michael@0: class FileCopyResult(object): michael@0: """Represents results of a FileCopier.copy operation.""" michael@0: michael@0: def __init__(self): michael@0: self.updated_files = set() michael@0: self.existing_files = set() michael@0: self.removed_files = set() michael@0: self.removed_directories = set() michael@0: michael@0: @property michael@0: def updated_files_count(self): michael@0: return len(self.updated_files) michael@0: michael@0: @property michael@0: def existing_files_count(self): michael@0: return len(self.existing_files) michael@0: michael@0: @property michael@0: def removed_files_count(self): michael@0: return len(self.removed_files) michael@0: michael@0: @property michael@0: def removed_directories_count(self): michael@0: return len(self.removed_directories) michael@0: michael@0: michael@0: class FileCopier(FileRegistry): michael@0: ''' michael@0: FileRegistry with the ability to copy the registered files to a separate michael@0: directory. michael@0: ''' michael@0: def copy(self, destination, skip_if_older=True, michael@0: remove_unaccounted=True, michael@0: remove_all_directory_symlinks=True, michael@0: remove_empty_directories=True): michael@0: ''' michael@0: Copy all registered files to the given destination path. The given michael@0: destination can be an existing directory, or not exist at all. It michael@0: can't be e.g. a file. michael@0: The copy process acts a bit like rsync: files are not copied when they michael@0: don't need to (see mozpack.files for details on file.copy). michael@0: michael@0: By default, files in the destination directory that aren't michael@0: registered are removed and empty directories are deleted. In michael@0: addition, all directory symlinks in the destination directory michael@0: are deleted: this is a conservative approach to ensure that we michael@0: never accidently write files into a directory that is not the michael@0: destination directory. In the worst case, we might have a michael@0: directory symlink in the object directory to the source michael@0: directory. michael@0: michael@0: To disable removing of unregistered files, pass michael@0: remove_unaccounted=False. To disable removing empty michael@0: directories, pass remove_empty_directories=False. In rare michael@0: cases, you might want to maintain directory symlinks in the michael@0: destination directory (at least those that are not required to michael@0: be regular directories): pass michael@0: remove_all_directory_symlinks=False. Exercise caution with michael@0: this flag: you almost certainly do not want to preserve michael@0: directory symlinks. michael@0: michael@0: Returns a FileCopyResult that details what changed. michael@0: ''' michael@0: assert isinstance(destination, basestring) michael@0: assert not os.path.exists(destination) or os.path.isdir(destination) michael@0: michael@0: result = FileCopyResult() michael@0: have_symlinks = hasattr(os, 'symlink') michael@0: destination = os.path.normpath(destination) michael@0: michael@0: # We create the destination directory specially. We can't do this as michael@0: # part of the loop doing mkdir() below because that loop munges michael@0: # symlinks and permissions and parent directories of the destination michael@0: # directory may have their own weird schema. The contract is we only michael@0: # manage children of destination, not its parents. michael@0: try: michael@0: os.makedirs(destination) michael@0: except OSError as e: michael@0: if e.errno != errno.EEXIST: michael@0: raise michael@0: michael@0: # Because we could be handling thousands of files, code in this michael@0: # function is optimized to minimize system calls. We prefer CPU time michael@0: # in Python over possibly I/O bound filesystem calls to stat() and michael@0: # friends. michael@0: michael@0: required_dirs = set([destination]) michael@0: dest_files = set() michael@0: michael@0: for p, f in self: michael@0: dest_files.add(os.path.normpath(os.path.join(destination, p))) michael@0: michael@0: required_dirs |= set(os.path.normpath(os.path.join(destination, d)) michael@0: for d in self.required_directories()) michael@0: michael@0: # Ensure destination directories are in place and proper. michael@0: # michael@0: # The "proper" bit is important. We need to ensure that directories michael@0: # have appropriate permissions or we will be unable to discover michael@0: # and write files. Furthermore, we need to verify directories aren't michael@0: # symlinks. michael@0: # michael@0: # Symlinked directories (a symlink whose target is a directory) are michael@0: # incompatible with us because our manifest talks in terms of files, michael@0: # not directories. If we leave symlinked directories unchecked, we michael@0: # would blindly follow symlinks and this might confuse file michael@0: # installation. For example, if an existing directory is a symlink michael@0: # to directory X and we attempt to install a symlink in this directory michael@0: # to a file in directory X, we may create a recursive symlink! michael@0: for d in sorted(required_dirs, key=len): michael@0: try: michael@0: os.mkdir(d) michael@0: except OSError as error: michael@0: if error.errno != errno.EEXIST: michael@0: raise michael@0: michael@0: # We allow the destination to be a symlink because the caller michael@0: # is responsible for managing the destination and we assume michael@0: # they know what they are doing. michael@0: if have_symlinks and d != destination: michael@0: st = os.lstat(d) michael@0: if stat.S_ISLNK(st.st_mode): michael@0: # While we have remove_unaccounted, it doesn't apply michael@0: # to directory symlinks because if it did, our behavior michael@0: # could be very wrong. michael@0: os.remove(d) michael@0: os.mkdir(d) michael@0: michael@0: if not os.access(d, os.W_OK): michael@0: umask = os.umask(0077) michael@0: os.umask(umask) michael@0: os.chmod(d, 0777 & ~umask) michael@0: michael@0: # While we have remove_unaccounted, it doesn't apply to empty michael@0: # directories because it wouldn't make sense: an empty directory michael@0: # is empty, so removing it should have no effect. michael@0: existing_dirs = set() michael@0: existing_files = set() michael@0: for root, dirs, files in os.walk(destination): michael@0: # We need to perform the same symlink detection as above. os.walk() michael@0: # doesn't follow symlinks into directories by default, so we need michael@0: # to check dirs (we can't wait for root). michael@0: if have_symlinks: michael@0: filtered = [] michael@0: for d in dirs: michael@0: full = os.path.join(root, d) michael@0: st = os.lstat(full) michael@0: if stat.S_ISLNK(st.st_mode): michael@0: # This directory symlink is not a required michael@0: # directory: any such symlink would have been michael@0: # removed and a directory created above. michael@0: if remove_all_directory_symlinks: michael@0: os.remove(full) michael@0: result.removed_files.add(os.path.normpath(full)) michael@0: else: michael@0: existing_files.add(os.path.normpath(full)) michael@0: else: michael@0: filtered.append(d) michael@0: michael@0: dirs[:] = filtered michael@0: michael@0: existing_dirs.add(os.path.normpath(root)) michael@0: michael@0: for d in dirs: michael@0: existing_dirs.add(os.path.normpath(os.path.join(root, d))) michael@0: michael@0: for f in files: michael@0: existing_files.add(os.path.normpath(os.path.join(root, f))) michael@0: michael@0: # Now we reconcile the state of the world against what we want. michael@0: michael@0: # Remove files no longer accounted for. michael@0: if remove_unaccounted: michael@0: for f in existing_files - dest_files: michael@0: # Windows requires write access to remove files. michael@0: if os.name == 'nt' and not os.access(f, os.W_OK): michael@0: # It doesn't matter what we set permissions to since we michael@0: # will remove this file shortly. michael@0: os.chmod(f, 0600) michael@0: michael@0: os.remove(f) michael@0: result.removed_files.add(f) michael@0: michael@0: # Install files. michael@0: for p, f in self: michael@0: destfile = os.path.normpath(os.path.join(destination, p)) michael@0: if f.copy(destfile, skip_if_older): michael@0: result.updated_files.add(destfile) michael@0: else: michael@0: result.existing_files.add(destfile) michael@0: michael@0: if not remove_empty_directories: michael@0: return result michael@0: michael@0: # Figure out which directories can be removed. This is complicated michael@0: # by the fact we optionally remove existing files. This would be easy michael@0: # if we walked the directory tree after installing files. But, we're michael@0: # trying to minimize system calls. michael@0: michael@0: # Start with the ideal set. michael@0: remove_dirs = existing_dirs - required_dirs michael@0: michael@0: # Then don't remove directories if we didn't remove unaccounted files michael@0: # and one of those files exists. michael@0: if not remove_unaccounted: michael@0: for f in existing_files: michael@0: parent = f michael@0: previous = '' michael@0: parents = set() michael@0: while True: michael@0: parent = os.path.dirname(parent) michael@0: parents.add(parent) michael@0: michael@0: if previous == parent: michael@0: break michael@0: michael@0: previous = parent michael@0: michael@0: remove_dirs -= parents michael@0: michael@0: # Remove empty directories that aren't required. michael@0: for d in sorted(remove_dirs, key=len, reverse=True): michael@0: # Permissions may not allow deletion. So ensure write access is michael@0: # in place before attempting delete. michael@0: os.chmod(d, 0700) michael@0: os.rmdir(d) michael@0: result.removed_directories.add(d) michael@0: michael@0: return result michael@0: michael@0: michael@0: class FilePurger(FileCopier): michael@0: """A variation of FileCopier that is used to purge untracked files. michael@0: michael@0: Callers create an instance then call .add() to register files/paths that michael@0: should exist. Once the canonical set of files that may exist is defined, michael@0: .purge() is called against a target directory. All files and empty michael@0: directories in the target directory that aren't in the registry will be michael@0: deleted. michael@0: """ michael@0: class FakeFile(BaseFile): michael@0: def copy(self, dest, skip_if_older=True): michael@0: return True michael@0: michael@0: def add(self, path): michael@0: """Record that a path should exist. michael@0: michael@0: We currently do not track what kind of entity should be behind that michael@0: path. We presumably could add type tracking later and have purging michael@0: delete entities if there is a type mismatch. michael@0: """ michael@0: return FileCopier.add(self, path, FilePurger.FakeFile()) michael@0: michael@0: def purge(self, dest): michael@0: """Deletes all files and empty directories not in the registry.""" michael@0: return FileCopier.copy(self, dest) michael@0: michael@0: def copy(self, *args, **kwargs): michael@0: raise Exception('copy() disabled on FilePurger. Use purge().') michael@0: michael@0: michael@0: class Jarrer(FileRegistry, BaseFile): michael@0: ''' michael@0: FileRegistry with the ability to copy and pack the registered files as a michael@0: jar file. Also acts as a BaseFile instance, to be copied with a FileCopier. michael@0: ''' michael@0: def __init__(self, compress=True, optimize=True): michael@0: ''' michael@0: Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation michael@0: for details on the compress and optimize arguments. michael@0: ''' michael@0: self.compress = compress michael@0: self.optimize = optimize michael@0: self._preload = [] michael@0: FileRegistry.__init__(self) michael@0: michael@0: def copy(self, dest, skip_if_older=True): michael@0: ''' michael@0: Pack all registered files in the given destination jar. The given michael@0: destination jar may be a path to jar file, or a Dest instance for michael@0: a jar file. michael@0: If the destination jar file exists, its (compressed) contents are used michael@0: instead of the registered BaseFile instances when appropriate. michael@0: ''' michael@0: class DeflaterDest(Dest): michael@0: ''' michael@0: Dest-like class, reading from a file-like object initially, but michael@0: switching to a Deflater object if written to. michael@0: michael@0: dest = DeflaterDest(original_file) michael@0: dest.read() # Reads original_file michael@0: dest.write(data) # Creates a Deflater and write data there michael@0: dest.read() # Re-opens the Deflater and reads from it michael@0: ''' michael@0: def __init__(self, orig=None, compress=True): michael@0: self.mode = None michael@0: self.deflater = orig michael@0: self.compress = compress michael@0: michael@0: def read(self, length=-1): michael@0: if self.mode != 'r': michael@0: assert self.mode is None michael@0: self.mode = 'r' michael@0: return self.deflater.read(length) michael@0: michael@0: def write(self, data): michael@0: if self.mode != 'w': michael@0: from mozpack.mozjar import Deflater michael@0: self.deflater = Deflater(self.compress) michael@0: self.mode = 'w' michael@0: self.deflater.write(data) michael@0: michael@0: def exists(self): michael@0: return self.deflater is not None michael@0: michael@0: if isinstance(dest, basestring): michael@0: dest = Dest(dest) michael@0: assert isinstance(dest, Dest) michael@0: michael@0: from mozpack.mozjar import JarWriter, JarReader michael@0: try: michael@0: old_jar = JarReader(fileobj=dest) michael@0: except Exception: michael@0: old_jar = [] michael@0: michael@0: old_contents = dict([(f.filename, f) for f in old_jar]) michael@0: michael@0: with JarWriter(fileobj=dest, compress=self.compress, michael@0: optimize=self.optimize) as jar: michael@0: for path, file in self: michael@0: if path in old_contents: michael@0: deflater = DeflaterDest(old_contents[path], self.compress) michael@0: else: michael@0: deflater = DeflaterDest(compress=self.compress) michael@0: file.copy(deflater, skip_if_older) michael@0: jar.add(path, deflater.deflater, mode=file.mode) michael@0: if self._preload: michael@0: jar.preload(self._preload) michael@0: michael@0: def open(self): michael@0: raise RuntimeError('unsupported') michael@0: michael@0: def preload(self, paths): michael@0: ''' michael@0: Add the given set of paths to the list of preloaded files. See michael@0: mozpack.mozjar.JarWriter documentation for details on jar preloading. michael@0: ''' michael@0: self._preload.extend(paths)