1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/python/mozbuild/mozpack/copier.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,494 @@ 1.4 +# This Source Code Form is subject to the terms of the Mozilla Public 1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.7 + 1.8 +import os 1.9 +import stat 1.10 + 1.11 +from mozpack.errors import errors 1.12 +from mozpack.files import ( 1.13 + BaseFile, 1.14 + Dest, 1.15 +) 1.16 +import mozpack.path 1.17 +import errno 1.18 +from collections import ( 1.19 + Counter, 1.20 + OrderedDict, 1.21 +) 1.22 + 1.23 + 1.24 +class FileRegistry(object): 1.25 + ''' 1.26 + Generic container to keep track of a set of BaseFile instances. It 1.27 + preserves the order under which the files are added, but doesn't keep 1.28 + track of empty directories (directories are not stored at all). 1.29 + The paths associated with the BaseFile instances are relative to an 1.30 + unspecified (virtual) root directory. 1.31 + 1.32 + registry = FileRegistry() 1.33 + registry.add('foo/bar', file_instance) 1.34 + ''' 1.35 + 1.36 + def __init__(self): 1.37 + self._files = OrderedDict() 1.38 + self._required_directories = Counter() 1.39 + 1.40 + def _partial_paths(self, path): 1.41 + ''' 1.42 + Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"]. 1.43 + ''' 1.44 + partial_paths = [] 1.45 + partial_path = path 1.46 + while partial_path: 1.47 + partial_path = mozpack.path.dirname(partial_path) 1.48 + if partial_path: 1.49 + partial_paths.append(partial_path) 1.50 + return partial_paths 1.51 + 1.52 + def add(self, path, content): 1.53 + ''' 1.54 + Add a BaseFile instance to the container, under the given path. 1.55 + ''' 1.56 + assert isinstance(content, BaseFile) 1.57 + if path in self._files: 1.58 + return errors.error("%s already added" % path) 1.59 + if self._required_directories[path] > 0: 1.60 + return errors.error("Can't add %s: it is a required directory" % 1.61 + path) 1.62 + # Check whether any parent of the given path is already stored 1.63 + partial_paths = self._partial_paths(path) 1.64 + for partial_path in partial_paths: 1.65 + if partial_path in self._files: 1.66 + return errors.error("Can't add %s: %s is a file" % 1.67 + (path, partial_path)) 1.68 + self._files[path] = content 1.69 + self._required_directories.update(partial_paths) 1.70 + 1.71 + def match(self, pattern): 1.72 + ''' 1.73 + Return the list of paths, stored in the container, matching the 1.74 + given pattern. See the mozpack.path.match documentation for a 1.75 + description of the handled patterns. 1.76 + ''' 1.77 + if '*' in pattern: 1.78 + return [p for p in self.paths() 1.79 + if mozpack.path.match(p, pattern)] 1.80 + if pattern == '': 1.81 + return self.paths() 1.82 + if pattern in self._files: 1.83 + return [pattern] 1.84 + return [p for p in self.paths() 1.85 + if mozpack.path.basedir(p, [pattern]) == pattern] 1.86 + 1.87 + def remove(self, pattern): 1.88 + ''' 1.89 + Remove paths matching the given pattern from the container. See the 1.90 + mozpack.path.match documentation for a description of the handled 1.91 + patterns. 1.92 + ''' 1.93 + items = self.match(pattern) 1.94 + if not items: 1.95 + return errors.error("Can't remove %s: %s" % (pattern, 1.96 + "not matching anything previously added")) 1.97 + for i in items: 1.98 + del self._files[i] 1.99 + self._required_directories.subtract(self._partial_paths(i)) 1.100 + 1.101 + def paths(self): 1.102 + ''' 1.103 + Return all paths stored in the container, in the order they were added. 1.104 + ''' 1.105 + return self._files.keys() 1.106 + 1.107 + def __len__(self): 1.108 + ''' 1.109 + Return number of paths stored in the container. 1.110 + ''' 1.111 + return len(self._files) 1.112 + 1.113 + def __contains__(self, pattern): 1.114 + raise RuntimeError("'in' operator forbidden for %s. Use contains()." % 1.115 + self.__class__.__name__) 1.116 + 1.117 + def contains(self, pattern): 1.118 + ''' 1.119 + Return whether the container contains paths matching the given 1.120 + pattern. See the mozpack.path.match documentation for a description of 1.121 + the handled patterns. 1.122 + ''' 1.123 + return len(self.match(pattern)) > 0 1.124 + 1.125 + def __getitem__(self, path): 1.126 + ''' 1.127 + Return the BaseFile instance stored in the container for the given 1.128 + path. 1.129 + ''' 1.130 + return self._files[path] 1.131 + 1.132 + def __iter__(self): 1.133 + ''' 1.134 + Iterate over all (path, BaseFile instance) pairs from the container. 1.135 + for path, file in registry: 1.136 + (...) 1.137 + ''' 1.138 + return self._files.iteritems() 1.139 + 1.140 + def required_directories(self): 1.141 + ''' 1.142 + Return the set of directories required by the paths in the container, 1.143 + in no particular order. The returned directories are relative to an 1.144 + unspecified (virtual) root directory (and do not include said root 1.145 + directory). 1.146 + ''' 1.147 + return set(k for k, v in self._required_directories.items() if v > 0) 1.148 + 1.149 + 1.150 +class FileCopyResult(object): 1.151 + """Represents results of a FileCopier.copy operation.""" 1.152 + 1.153 + def __init__(self): 1.154 + self.updated_files = set() 1.155 + self.existing_files = set() 1.156 + self.removed_files = set() 1.157 + self.removed_directories = set() 1.158 + 1.159 + @property 1.160 + def updated_files_count(self): 1.161 + return len(self.updated_files) 1.162 + 1.163 + @property 1.164 + def existing_files_count(self): 1.165 + return len(self.existing_files) 1.166 + 1.167 + @property 1.168 + def removed_files_count(self): 1.169 + return len(self.removed_files) 1.170 + 1.171 + @property 1.172 + def removed_directories_count(self): 1.173 + return len(self.removed_directories) 1.174 + 1.175 + 1.176 +class FileCopier(FileRegistry): 1.177 + ''' 1.178 + FileRegistry with the ability to copy the registered files to a separate 1.179 + directory. 1.180 + ''' 1.181 + def copy(self, destination, skip_if_older=True, 1.182 + remove_unaccounted=True, 1.183 + remove_all_directory_symlinks=True, 1.184 + remove_empty_directories=True): 1.185 + ''' 1.186 + Copy all registered files to the given destination path. The given 1.187 + destination can be an existing directory, or not exist at all. It 1.188 + can't be e.g. a file. 1.189 + The copy process acts a bit like rsync: files are not copied when they 1.190 + don't need to (see mozpack.files for details on file.copy). 1.191 + 1.192 + By default, files in the destination directory that aren't 1.193 + registered are removed and empty directories are deleted. In 1.194 + addition, all directory symlinks in the destination directory 1.195 + are deleted: this is a conservative approach to ensure that we 1.196 + never accidently write files into a directory that is not the 1.197 + destination directory. In the worst case, we might have a 1.198 + directory symlink in the object directory to the source 1.199 + directory. 1.200 + 1.201 + To disable removing of unregistered files, pass 1.202 + remove_unaccounted=False. To disable removing empty 1.203 + directories, pass remove_empty_directories=False. In rare 1.204 + cases, you might want to maintain directory symlinks in the 1.205 + destination directory (at least those that are not required to 1.206 + be regular directories): pass 1.207 + remove_all_directory_symlinks=False. Exercise caution with 1.208 + this flag: you almost certainly do not want to preserve 1.209 + directory symlinks. 1.210 + 1.211 + Returns a FileCopyResult that details what changed. 1.212 + ''' 1.213 + assert isinstance(destination, basestring) 1.214 + assert not os.path.exists(destination) or os.path.isdir(destination) 1.215 + 1.216 + result = FileCopyResult() 1.217 + have_symlinks = hasattr(os, 'symlink') 1.218 + destination = os.path.normpath(destination) 1.219 + 1.220 + # We create the destination directory specially. We can't do this as 1.221 + # part of the loop doing mkdir() below because that loop munges 1.222 + # symlinks and permissions and parent directories of the destination 1.223 + # directory may have their own weird schema. The contract is we only 1.224 + # manage children of destination, not its parents. 1.225 + try: 1.226 + os.makedirs(destination) 1.227 + except OSError as e: 1.228 + if e.errno != errno.EEXIST: 1.229 + raise 1.230 + 1.231 + # Because we could be handling thousands of files, code in this 1.232 + # function is optimized to minimize system calls. We prefer CPU time 1.233 + # in Python over possibly I/O bound filesystem calls to stat() and 1.234 + # friends. 1.235 + 1.236 + required_dirs = set([destination]) 1.237 + dest_files = set() 1.238 + 1.239 + for p, f in self: 1.240 + dest_files.add(os.path.normpath(os.path.join(destination, p))) 1.241 + 1.242 + required_dirs |= set(os.path.normpath(os.path.join(destination, d)) 1.243 + for d in self.required_directories()) 1.244 + 1.245 + # Ensure destination directories are in place and proper. 1.246 + # 1.247 + # The "proper" bit is important. We need to ensure that directories 1.248 + # have appropriate permissions or we will be unable to discover 1.249 + # and write files. Furthermore, we need to verify directories aren't 1.250 + # symlinks. 1.251 + # 1.252 + # Symlinked directories (a symlink whose target is a directory) are 1.253 + # incompatible with us because our manifest talks in terms of files, 1.254 + # not directories. If we leave symlinked directories unchecked, we 1.255 + # would blindly follow symlinks and this might confuse file 1.256 + # installation. For example, if an existing directory is a symlink 1.257 + # to directory X and we attempt to install a symlink in this directory 1.258 + # to a file in directory X, we may create a recursive symlink! 1.259 + for d in sorted(required_dirs, key=len): 1.260 + try: 1.261 + os.mkdir(d) 1.262 + except OSError as error: 1.263 + if error.errno != errno.EEXIST: 1.264 + raise 1.265 + 1.266 + # We allow the destination to be a symlink because the caller 1.267 + # is responsible for managing the destination and we assume 1.268 + # they know what they are doing. 1.269 + if have_symlinks and d != destination: 1.270 + st = os.lstat(d) 1.271 + if stat.S_ISLNK(st.st_mode): 1.272 + # While we have remove_unaccounted, it doesn't apply 1.273 + # to directory symlinks because if it did, our behavior 1.274 + # could be very wrong. 1.275 + os.remove(d) 1.276 + os.mkdir(d) 1.277 + 1.278 + if not os.access(d, os.W_OK): 1.279 + umask = os.umask(0077) 1.280 + os.umask(umask) 1.281 + os.chmod(d, 0777 & ~umask) 1.282 + 1.283 + # While we have remove_unaccounted, it doesn't apply to empty 1.284 + # directories because it wouldn't make sense: an empty directory 1.285 + # is empty, so removing it should have no effect. 1.286 + existing_dirs = set() 1.287 + existing_files = set() 1.288 + for root, dirs, files in os.walk(destination): 1.289 + # We need to perform the same symlink detection as above. os.walk() 1.290 + # doesn't follow symlinks into directories by default, so we need 1.291 + # to check dirs (we can't wait for root). 1.292 + if have_symlinks: 1.293 + filtered = [] 1.294 + for d in dirs: 1.295 + full = os.path.join(root, d) 1.296 + st = os.lstat(full) 1.297 + if stat.S_ISLNK(st.st_mode): 1.298 + # This directory symlink is not a required 1.299 + # directory: any such symlink would have been 1.300 + # removed and a directory created above. 1.301 + if remove_all_directory_symlinks: 1.302 + os.remove(full) 1.303 + result.removed_files.add(os.path.normpath(full)) 1.304 + else: 1.305 + existing_files.add(os.path.normpath(full)) 1.306 + else: 1.307 + filtered.append(d) 1.308 + 1.309 + dirs[:] = filtered 1.310 + 1.311 + existing_dirs.add(os.path.normpath(root)) 1.312 + 1.313 + for d in dirs: 1.314 + existing_dirs.add(os.path.normpath(os.path.join(root, d))) 1.315 + 1.316 + for f in files: 1.317 + existing_files.add(os.path.normpath(os.path.join(root, f))) 1.318 + 1.319 + # Now we reconcile the state of the world against what we want. 1.320 + 1.321 + # Remove files no longer accounted for. 1.322 + if remove_unaccounted: 1.323 + for f in existing_files - dest_files: 1.324 + # Windows requires write access to remove files. 1.325 + if os.name == 'nt' and not os.access(f, os.W_OK): 1.326 + # It doesn't matter what we set permissions to since we 1.327 + # will remove this file shortly. 1.328 + os.chmod(f, 0600) 1.329 + 1.330 + os.remove(f) 1.331 + result.removed_files.add(f) 1.332 + 1.333 + # Install files. 1.334 + for p, f in self: 1.335 + destfile = os.path.normpath(os.path.join(destination, p)) 1.336 + if f.copy(destfile, skip_if_older): 1.337 + result.updated_files.add(destfile) 1.338 + else: 1.339 + result.existing_files.add(destfile) 1.340 + 1.341 + if not remove_empty_directories: 1.342 + return result 1.343 + 1.344 + # Figure out which directories can be removed. This is complicated 1.345 + # by the fact we optionally remove existing files. This would be easy 1.346 + # if we walked the directory tree after installing files. But, we're 1.347 + # trying to minimize system calls. 1.348 + 1.349 + # Start with the ideal set. 1.350 + remove_dirs = existing_dirs - required_dirs 1.351 + 1.352 + # Then don't remove directories if we didn't remove unaccounted files 1.353 + # and one of those files exists. 1.354 + if not remove_unaccounted: 1.355 + for f in existing_files: 1.356 + parent = f 1.357 + previous = '' 1.358 + parents = set() 1.359 + while True: 1.360 + parent = os.path.dirname(parent) 1.361 + parents.add(parent) 1.362 + 1.363 + if previous == parent: 1.364 + break 1.365 + 1.366 + previous = parent 1.367 + 1.368 + remove_dirs -= parents 1.369 + 1.370 + # Remove empty directories that aren't required. 1.371 + for d in sorted(remove_dirs, key=len, reverse=True): 1.372 + # Permissions may not allow deletion. So ensure write access is 1.373 + # in place before attempting delete. 1.374 + os.chmod(d, 0700) 1.375 + os.rmdir(d) 1.376 + result.removed_directories.add(d) 1.377 + 1.378 + return result 1.379 + 1.380 + 1.381 +class FilePurger(FileCopier): 1.382 + """A variation of FileCopier that is used to purge untracked files. 1.383 + 1.384 + Callers create an instance then call .add() to register files/paths that 1.385 + should exist. Once the canonical set of files that may exist is defined, 1.386 + .purge() is called against a target directory. All files and empty 1.387 + directories in the target directory that aren't in the registry will be 1.388 + deleted. 1.389 + """ 1.390 + class FakeFile(BaseFile): 1.391 + def copy(self, dest, skip_if_older=True): 1.392 + return True 1.393 + 1.394 + def add(self, path): 1.395 + """Record that a path should exist. 1.396 + 1.397 + We currently do not track what kind of entity should be behind that 1.398 + path. We presumably could add type tracking later and have purging 1.399 + delete entities if there is a type mismatch. 1.400 + """ 1.401 + return FileCopier.add(self, path, FilePurger.FakeFile()) 1.402 + 1.403 + def purge(self, dest): 1.404 + """Deletes all files and empty directories not in the registry.""" 1.405 + return FileCopier.copy(self, dest) 1.406 + 1.407 + def copy(self, *args, **kwargs): 1.408 + raise Exception('copy() disabled on FilePurger. Use purge().') 1.409 + 1.410 + 1.411 +class Jarrer(FileRegistry, BaseFile): 1.412 + ''' 1.413 + FileRegistry with the ability to copy and pack the registered files as a 1.414 + jar file. Also acts as a BaseFile instance, to be copied with a FileCopier. 1.415 + ''' 1.416 + def __init__(self, compress=True, optimize=True): 1.417 + ''' 1.418 + Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation 1.419 + for details on the compress and optimize arguments. 1.420 + ''' 1.421 + self.compress = compress 1.422 + self.optimize = optimize 1.423 + self._preload = [] 1.424 + FileRegistry.__init__(self) 1.425 + 1.426 + def copy(self, dest, skip_if_older=True): 1.427 + ''' 1.428 + Pack all registered files in the given destination jar. The given 1.429 + destination jar may be a path to jar file, or a Dest instance for 1.430 + a jar file. 1.431 + If the destination jar file exists, its (compressed) contents are used 1.432 + instead of the registered BaseFile instances when appropriate. 1.433 + ''' 1.434 + class DeflaterDest(Dest): 1.435 + ''' 1.436 + Dest-like class, reading from a file-like object initially, but 1.437 + switching to a Deflater object if written to. 1.438 + 1.439 + dest = DeflaterDest(original_file) 1.440 + dest.read() # Reads original_file 1.441 + dest.write(data) # Creates a Deflater and write data there 1.442 + dest.read() # Re-opens the Deflater and reads from it 1.443 + ''' 1.444 + def __init__(self, orig=None, compress=True): 1.445 + self.mode = None 1.446 + self.deflater = orig 1.447 + self.compress = compress 1.448 + 1.449 + def read(self, length=-1): 1.450 + if self.mode != 'r': 1.451 + assert self.mode is None 1.452 + self.mode = 'r' 1.453 + return self.deflater.read(length) 1.454 + 1.455 + def write(self, data): 1.456 + if self.mode != 'w': 1.457 + from mozpack.mozjar import Deflater 1.458 + self.deflater = Deflater(self.compress) 1.459 + self.mode = 'w' 1.460 + self.deflater.write(data) 1.461 + 1.462 + def exists(self): 1.463 + return self.deflater is not None 1.464 + 1.465 + if isinstance(dest, basestring): 1.466 + dest = Dest(dest) 1.467 + assert isinstance(dest, Dest) 1.468 + 1.469 + from mozpack.mozjar import JarWriter, JarReader 1.470 + try: 1.471 + old_jar = JarReader(fileobj=dest) 1.472 + except Exception: 1.473 + old_jar = [] 1.474 + 1.475 + old_contents = dict([(f.filename, f) for f in old_jar]) 1.476 + 1.477 + with JarWriter(fileobj=dest, compress=self.compress, 1.478 + optimize=self.optimize) as jar: 1.479 + for path, file in self: 1.480 + if path in old_contents: 1.481 + deflater = DeflaterDest(old_contents[path], self.compress) 1.482 + else: 1.483 + deflater = DeflaterDest(compress=self.compress) 1.484 + file.copy(deflater, skip_if_older) 1.485 + jar.add(path, deflater.deflater, mode=file.mode) 1.486 + if self._preload: 1.487 + jar.preload(self._preload) 1.488 + 1.489 + def open(self): 1.490 + raise RuntimeError('unsupported') 1.491 + 1.492 + def preload(self, paths): 1.493 + ''' 1.494 + Add the given set of paths to the list of preloaded files. See 1.495 + mozpack.mozjar.JarWriter documentation for details on jar preloading. 1.496 + ''' 1.497 + self._preload.extend(paths)