python/mozbuild/mozpack/copier.py

Wed, 31 Dec 2014 06:55:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:55:50 +0100
changeset 2
7e26c7da4463
permissions
-rw-r--r--

Added tag UPSTREAM_283F7C6 for changeset ca08bd8f51b2

michael@0 1 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 4
michael@0 5 import os
michael@0 6 import stat
michael@0 7
michael@0 8 from mozpack.errors import errors
michael@0 9 from mozpack.files import (
michael@0 10 BaseFile,
michael@0 11 Dest,
michael@0 12 )
michael@0 13 import mozpack.path
michael@0 14 import errno
michael@0 15 from collections import (
michael@0 16 Counter,
michael@0 17 OrderedDict,
michael@0 18 )
michael@0 19
michael@0 20
michael@0 21 class FileRegistry(object):
michael@0 22 '''
michael@0 23 Generic container to keep track of a set of BaseFile instances. It
michael@0 24 preserves the order under which the files are added, but doesn't keep
michael@0 25 track of empty directories (directories are not stored at all).
michael@0 26 The paths associated with the BaseFile instances are relative to an
michael@0 27 unspecified (virtual) root directory.
michael@0 28
michael@0 29 registry = FileRegistry()
michael@0 30 registry.add('foo/bar', file_instance)
michael@0 31 '''
michael@0 32
michael@0 33 def __init__(self):
michael@0 34 self._files = OrderedDict()
michael@0 35 self._required_directories = Counter()
michael@0 36
michael@0 37 def _partial_paths(self, path):
michael@0 38 '''
michael@0 39 Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"].
michael@0 40 '''
michael@0 41 partial_paths = []
michael@0 42 partial_path = path
michael@0 43 while partial_path:
michael@0 44 partial_path = mozpack.path.dirname(partial_path)
michael@0 45 if partial_path:
michael@0 46 partial_paths.append(partial_path)
michael@0 47 return partial_paths
michael@0 48
michael@0 49 def add(self, path, content):
michael@0 50 '''
michael@0 51 Add a BaseFile instance to the container, under the given path.
michael@0 52 '''
michael@0 53 assert isinstance(content, BaseFile)
michael@0 54 if path in self._files:
michael@0 55 return errors.error("%s already added" % path)
michael@0 56 if self._required_directories[path] > 0:
michael@0 57 return errors.error("Can't add %s: it is a required directory" %
michael@0 58 path)
michael@0 59 # Check whether any parent of the given path is already stored
michael@0 60 partial_paths = self._partial_paths(path)
michael@0 61 for partial_path in partial_paths:
michael@0 62 if partial_path in self._files:
michael@0 63 return errors.error("Can't add %s: %s is a file" %
michael@0 64 (path, partial_path))
michael@0 65 self._files[path] = content
michael@0 66 self._required_directories.update(partial_paths)
michael@0 67
michael@0 68 def match(self, pattern):
michael@0 69 '''
michael@0 70 Return the list of paths, stored in the container, matching the
michael@0 71 given pattern. See the mozpack.path.match documentation for a
michael@0 72 description of the handled patterns.
michael@0 73 '''
michael@0 74 if '*' in pattern:
michael@0 75 return [p for p in self.paths()
michael@0 76 if mozpack.path.match(p, pattern)]
michael@0 77 if pattern == '':
michael@0 78 return self.paths()
michael@0 79 if pattern in self._files:
michael@0 80 return [pattern]
michael@0 81 return [p for p in self.paths()
michael@0 82 if mozpack.path.basedir(p, [pattern]) == pattern]
michael@0 83
michael@0 84 def remove(self, pattern):
michael@0 85 '''
michael@0 86 Remove paths matching the given pattern from the container. See the
michael@0 87 mozpack.path.match documentation for a description of the handled
michael@0 88 patterns.
michael@0 89 '''
michael@0 90 items = self.match(pattern)
michael@0 91 if not items:
michael@0 92 return errors.error("Can't remove %s: %s" % (pattern,
michael@0 93 "not matching anything previously added"))
michael@0 94 for i in items:
michael@0 95 del self._files[i]
michael@0 96 self._required_directories.subtract(self._partial_paths(i))
michael@0 97
michael@0 98 def paths(self):
michael@0 99 '''
michael@0 100 Return all paths stored in the container, in the order they were added.
michael@0 101 '''
michael@0 102 return self._files.keys()
michael@0 103
michael@0 104 def __len__(self):
michael@0 105 '''
michael@0 106 Return number of paths stored in the container.
michael@0 107 '''
michael@0 108 return len(self._files)
michael@0 109
michael@0 110 def __contains__(self, pattern):
michael@0 111 raise RuntimeError("'in' operator forbidden for %s. Use contains()." %
michael@0 112 self.__class__.__name__)
michael@0 113
michael@0 114 def contains(self, pattern):
michael@0 115 '''
michael@0 116 Return whether the container contains paths matching the given
michael@0 117 pattern. See the mozpack.path.match documentation for a description of
michael@0 118 the handled patterns.
michael@0 119 '''
michael@0 120 return len(self.match(pattern)) > 0
michael@0 121
michael@0 122 def __getitem__(self, path):
michael@0 123 '''
michael@0 124 Return the BaseFile instance stored in the container for the given
michael@0 125 path.
michael@0 126 '''
michael@0 127 return self._files[path]
michael@0 128
michael@0 129 def __iter__(self):
michael@0 130 '''
michael@0 131 Iterate over all (path, BaseFile instance) pairs from the container.
michael@0 132 for path, file in registry:
michael@0 133 (...)
michael@0 134 '''
michael@0 135 return self._files.iteritems()
michael@0 136
michael@0 137 def required_directories(self):
michael@0 138 '''
michael@0 139 Return the set of directories required by the paths in the container,
michael@0 140 in no particular order. The returned directories are relative to an
michael@0 141 unspecified (virtual) root directory (and do not include said root
michael@0 142 directory).
michael@0 143 '''
michael@0 144 return set(k for k, v in self._required_directories.items() if v > 0)
michael@0 145
michael@0 146
michael@0 147 class FileCopyResult(object):
michael@0 148 """Represents results of a FileCopier.copy operation."""
michael@0 149
michael@0 150 def __init__(self):
michael@0 151 self.updated_files = set()
michael@0 152 self.existing_files = set()
michael@0 153 self.removed_files = set()
michael@0 154 self.removed_directories = set()
michael@0 155
michael@0 156 @property
michael@0 157 def updated_files_count(self):
michael@0 158 return len(self.updated_files)
michael@0 159
michael@0 160 @property
michael@0 161 def existing_files_count(self):
michael@0 162 return len(self.existing_files)
michael@0 163
michael@0 164 @property
michael@0 165 def removed_files_count(self):
michael@0 166 return len(self.removed_files)
michael@0 167
michael@0 168 @property
michael@0 169 def removed_directories_count(self):
michael@0 170 return len(self.removed_directories)
michael@0 171
michael@0 172
michael@0 173 class FileCopier(FileRegistry):
michael@0 174 '''
michael@0 175 FileRegistry with the ability to copy the registered files to a separate
michael@0 176 directory.
michael@0 177 '''
michael@0 178 def copy(self, destination, skip_if_older=True,
michael@0 179 remove_unaccounted=True,
michael@0 180 remove_all_directory_symlinks=True,
michael@0 181 remove_empty_directories=True):
michael@0 182 '''
michael@0 183 Copy all registered files to the given destination path. The given
michael@0 184 destination can be an existing directory, or not exist at all. It
michael@0 185 can't be e.g. a file.
michael@0 186 The copy process acts a bit like rsync: files are not copied when they
michael@0 187 don't need to (see mozpack.files for details on file.copy).
michael@0 188
michael@0 189 By default, files in the destination directory that aren't
michael@0 190 registered are removed and empty directories are deleted. In
michael@0 191 addition, all directory symlinks in the destination directory
michael@0 192 are deleted: this is a conservative approach to ensure that we
michael@0 193 never accidently write files into a directory that is not the
michael@0 194 destination directory. In the worst case, we might have a
michael@0 195 directory symlink in the object directory to the source
michael@0 196 directory.
michael@0 197
michael@0 198 To disable removing of unregistered files, pass
michael@0 199 remove_unaccounted=False. To disable removing empty
michael@0 200 directories, pass remove_empty_directories=False. In rare
michael@0 201 cases, you might want to maintain directory symlinks in the
michael@0 202 destination directory (at least those that are not required to
michael@0 203 be regular directories): pass
michael@0 204 remove_all_directory_symlinks=False. Exercise caution with
michael@0 205 this flag: you almost certainly do not want to preserve
michael@0 206 directory symlinks.
michael@0 207
michael@0 208 Returns a FileCopyResult that details what changed.
michael@0 209 '''
michael@0 210 assert isinstance(destination, basestring)
michael@0 211 assert not os.path.exists(destination) or os.path.isdir(destination)
michael@0 212
michael@0 213 result = FileCopyResult()
michael@0 214 have_symlinks = hasattr(os, 'symlink')
michael@0 215 destination = os.path.normpath(destination)
michael@0 216
michael@0 217 # We create the destination directory specially. We can't do this as
michael@0 218 # part of the loop doing mkdir() below because that loop munges
michael@0 219 # symlinks and permissions and parent directories of the destination
michael@0 220 # directory may have their own weird schema. The contract is we only
michael@0 221 # manage children of destination, not its parents.
michael@0 222 try:
michael@0 223 os.makedirs(destination)
michael@0 224 except OSError as e:
michael@0 225 if e.errno != errno.EEXIST:
michael@0 226 raise
michael@0 227
michael@0 228 # Because we could be handling thousands of files, code in this
michael@0 229 # function is optimized to minimize system calls. We prefer CPU time
michael@0 230 # in Python over possibly I/O bound filesystem calls to stat() and
michael@0 231 # friends.
michael@0 232
michael@0 233 required_dirs = set([destination])
michael@0 234 dest_files = set()
michael@0 235
michael@0 236 for p, f in self:
michael@0 237 dest_files.add(os.path.normpath(os.path.join(destination, p)))
michael@0 238
michael@0 239 required_dirs |= set(os.path.normpath(os.path.join(destination, d))
michael@0 240 for d in self.required_directories())
michael@0 241
michael@0 242 # Ensure destination directories are in place and proper.
michael@0 243 #
michael@0 244 # The "proper" bit is important. We need to ensure that directories
michael@0 245 # have appropriate permissions or we will be unable to discover
michael@0 246 # and write files. Furthermore, we need to verify directories aren't
michael@0 247 # symlinks.
michael@0 248 #
michael@0 249 # Symlinked directories (a symlink whose target is a directory) are
michael@0 250 # incompatible with us because our manifest talks in terms of files,
michael@0 251 # not directories. If we leave symlinked directories unchecked, we
michael@0 252 # would blindly follow symlinks and this might confuse file
michael@0 253 # installation. For example, if an existing directory is a symlink
michael@0 254 # to directory X and we attempt to install a symlink in this directory
michael@0 255 # to a file in directory X, we may create a recursive symlink!
michael@0 256 for d in sorted(required_dirs, key=len):
michael@0 257 try:
michael@0 258 os.mkdir(d)
michael@0 259 except OSError as error:
michael@0 260 if error.errno != errno.EEXIST:
michael@0 261 raise
michael@0 262
michael@0 263 # We allow the destination to be a symlink because the caller
michael@0 264 # is responsible for managing the destination and we assume
michael@0 265 # they know what they are doing.
michael@0 266 if have_symlinks and d != destination:
michael@0 267 st = os.lstat(d)
michael@0 268 if stat.S_ISLNK(st.st_mode):
michael@0 269 # While we have remove_unaccounted, it doesn't apply
michael@0 270 # to directory symlinks because if it did, our behavior
michael@0 271 # could be very wrong.
michael@0 272 os.remove(d)
michael@0 273 os.mkdir(d)
michael@0 274
michael@0 275 if not os.access(d, os.W_OK):
michael@0 276 umask = os.umask(0077)
michael@0 277 os.umask(umask)
michael@0 278 os.chmod(d, 0777 & ~umask)
michael@0 279
michael@0 280 # While we have remove_unaccounted, it doesn't apply to empty
michael@0 281 # directories because it wouldn't make sense: an empty directory
michael@0 282 # is empty, so removing it should have no effect.
michael@0 283 existing_dirs = set()
michael@0 284 existing_files = set()
michael@0 285 for root, dirs, files in os.walk(destination):
michael@0 286 # We need to perform the same symlink detection as above. os.walk()
michael@0 287 # doesn't follow symlinks into directories by default, so we need
michael@0 288 # to check dirs (we can't wait for root).
michael@0 289 if have_symlinks:
michael@0 290 filtered = []
michael@0 291 for d in dirs:
michael@0 292 full = os.path.join(root, d)
michael@0 293 st = os.lstat(full)
michael@0 294 if stat.S_ISLNK(st.st_mode):
michael@0 295 # This directory symlink is not a required
michael@0 296 # directory: any such symlink would have been
michael@0 297 # removed and a directory created above.
michael@0 298 if remove_all_directory_symlinks:
michael@0 299 os.remove(full)
michael@0 300 result.removed_files.add(os.path.normpath(full))
michael@0 301 else:
michael@0 302 existing_files.add(os.path.normpath(full))
michael@0 303 else:
michael@0 304 filtered.append(d)
michael@0 305
michael@0 306 dirs[:] = filtered
michael@0 307
michael@0 308 existing_dirs.add(os.path.normpath(root))
michael@0 309
michael@0 310 for d in dirs:
michael@0 311 existing_dirs.add(os.path.normpath(os.path.join(root, d)))
michael@0 312
michael@0 313 for f in files:
michael@0 314 existing_files.add(os.path.normpath(os.path.join(root, f)))
michael@0 315
michael@0 316 # Now we reconcile the state of the world against what we want.
michael@0 317
michael@0 318 # Remove files no longer accounted for.
michael@0 319 if remove_unaccounted:
michael@0 320 for f in existing_files - dest_files:
michael@0 321 # Windows requires write access to remove files.
michael@0 322 if os.name == 'nt' and not os.access(f, os.W_OK):
michael@0 323 # It doesn't matter what we set permissions to since we
michael@0 324 # will remove this file shortly.
michael@0 325 os.chmod(f, 0600)
michael@0 326
michael@0 327 os.remove(f)
michael@0 328 result.removed_files.add(f)
michael@0 329
michael@0 330 # Install files.
michael@0 331 for p, f in self:
michael@0 332 destfile = os.path.normpath(os.path.join(destination, p))
michael@0 333 if f.copy(destfile, skip_if_older):
michael@0 334 result.updated_files.add(destfile)
michael@0 335 else:
michael@0 336 result.existing_files.add(destfile)
michael@0 337
michael@0 338 if not remove_empty_directories:
michael@0 339 return result
michael@0 340
michael@0 341 # Figure out which directories can be removed. This is complicated
michael@0 342 # by the fact we optionally remove existing files. This would be easy
michael@0 343 # if we walked the directory tree after installing files. But, we're
michael@0 344 # trying to minimize system calls.
michael@0 345
michael@0 346 # Start with the ideal set.
michael@0 347 remove_dirs = existing_dirs - required_dirs
michael@0 348
michael@0 349 # Then don't remove directories if we didn't remove unaccounted files
michael@0 350 # and one of those files exists.
michael@0 351 if not remove_unaccounted:
michael@0 352 for f in existing_files:
michael@0 353 parent = f
michael@0 354 previous = ''
michael@0 355 parents = set()
michael@0 356 while True:
michael@0 357 parent = os.path.dirname(parent)
michael@0 358 parents.add(parent)
michael@0 359
michael@0 360 if previous == parent:
michael@0 361 break
michael@0 362
michael@0 363 previous = parent
michael@0 364
michael@0 365 remove_dirs -= parents
michael@0 366
michael@0 367 # Remove empty directories that aren't required.
michael@0 368 for d in sorted(remove_dirs, key=len, reverse=True):
michael@0 369 # Permissions may not allow deletion. So ensure write access is
michael@0 370 # in place before attempting delete.
michael@0 371 os.chmod(d, 0700)
michael@0 372 os.rmdir(d)
michael@0 373 result.removed_directories.add(d)
michael@0 374
michael@0 375 return result
michael@0 376
michael@0 377
michael@0 378 class FilePurger(FileCopier):
michael@0 379 """A variation of FileCopier that is used to purge untracked files.
michael@0 380
michael@0 381 Callers create an instance then call .add() to register files/paths that
michael@0 382 should exist. Once the canonical set of files that may exist is defined,
michael@0 383 .purge() is called against a target directory. All files and empty
michael@0 384 directories in the target directory that aren't in the registry will be
michael@0 385 deleted.
michael@0 386 """
michael@0 387 class FakeFile(BaseFile):
michael@0 388 def copy(self, dest, skip_if_older=True):
michael@0 389 return True
michael@0 390
michael@0 391 def add(self, path):
michael@0 392 """Record that a path should exist.
michael@0 393
michael@0 394 We currently do not track what kind of entity should be behind that
michael@0 395 path. We presumably could add type tracking later and have purging
michael@0 396 delete entities if there is a type mismatch.
michael@0 397 """
michael@0 398 return FileCopier.add(self, path, FilePurger.FakeFile())
michael@0 399
michael@0 400 def purge(self, dest):
michael@0 401 """Deletes all files and empty directories not in the registry."""
michael@0 402 return FileCopier.copy(self, dest)
michael@0 403
michael@0 404 def copy(self, *args, **kwargs):
michael@0 405 raise Exception('copy() disabled on FilePurger. Use purge().')
michael@0 406
michael@0 407
michael@0 408 class Jarrer(FileRegistry, BaseFile):
michael@0 409 '''
michael@0 410 FileRegistry with the ability to copy and pack the registered files as a
michael@0 411 jar file. Also acts as a BaseFile instance, to be copied with a FileCopier.
michael@0 412 '''
michael@0 413 def __init__(self, compress=True, optimize=True):
michael@0 414 '''
michael@0 415 Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation
michael@0 416 for details on the compress and optimize arguments.
michael@0 417 '''
michael@0 418 self.compress = compress
michael@0 419 self.optimize = optimize
michael@0 420 self._preload = []
michael@0 421 FileRegistry.__init__(self)
michael@0 422
michael@0 423 def copy(self, dest, skip_if_older=True):
michael@0 424 '''
michael@0 425 Pack all registered files in the given destination jar. The given
michael@0 426 destination jar may be a path to jar file, or a Dest instance for
michael@0 427 a jar file.
michael@0 428 If the destination jar file exists, its (compressed) contents are used
michael@0 429 instead of the registered BaseFile instances when appropriate.
michael@0 430 '''
michael@0 431 class DeflaterDest(Dest):
michael@0 432 '''
michael@0 433 Dest-like class, reading from a file-like object initially, but
michael@0 434 switching to a Deflater object if written to.
michael@0 435
michael@0 436 dest = DeflaterDest(original_file)
michael@0 437 dest.read() # Reads original_file
michael@0 438 dest.write(data) # Creates a Deflater and write data there
michael@0 439 dest.read() # Re-opens the Deflater and reads from it
michael@0 440 '''
michael@0 441 def __init__(self, orig=None, compress=True):
michael@0 442 self.mode = None
michael@0 443 self.deflater = orig
michael@0 444 self.compress = compress
michael@0 445
michael@0 446 def read(self, length=-1):
michael@0 447 if self.mode != 'r':
michael@0 448 assert self.mode is None
michael@0 449 self.mode = 'r'
michael@0 450 return self.deflater.read(length)
michael@0 451
michael@0 452 def write(self, data):
michael@0 453 if self.mode != 'w':
michael@0 454 from mozpack.mozjar import Deflater
michael@0 455 self.deflater = Deflater(self.compress)
michael@0 456 self.mode = 'w'
michael@0 457 self.deflater.write(data)
michael@0 458
michael@0 459 def exists(self):
michael@0 460 return self.deflater is not None
michael@0 461
michael@0 462 if isinstance(dest, basestring):
michael@0 463 dest = Dest(dest)
michael@0 464 assert isinstance(dest, Dest)
michael@0 465
michael@0 466 from mozpack.mozjar import JarWriter, JarReader
michael@0 467 try:
michael@0 468 old_jar = JarReader(fileobj=dest)
michael@0 469 except Exception:
michael@0 470 old_jar = []
michael@0 471
michael@0 472 old_contents = dict([(f.filename, f) for f in old_jar])
michael@0 473
michael@0 474 with JarWriter(fileobj=dest, compress=self.compress,
michael@0 475 optimize=self.optimize) as jar:
michael@0 476 for path, file in self:
michael@0 477 if path in old_contents:
michael@0 478 deflater = DeflaterDest(old_contents[path], self.compress)
michael@0 479 else:
michael@0 480 deflater = DeflaterDest(compress=self.compress)
michael@0 481 file.copy(deflater, skip_if_older)
michael@0 482 jar.add(path, deflater.deflater, mode=file.mode)
michael@0 483 if self._preload:
michael@0 484 jar.preload(self._preload)
michael@0 485
michael@0 486 def open(self):
michael@0 487 raise RuntimeError('unsupported')
michael@0 488
michael@0 489 def preload(self, paths):
michael@0 490 '''
michael@0 491 Add the given set of paths to the list of preloaded files. See
michael@0 492 mozpack.mozjar.JarWriter documentation for details on jar preloading.
michael@0 493 '''
michael@0 494 self._preload.extend(paths)

mercurial