Fri, 16 Jan 2015 18:13:44 +0100
Integrate suggestion from review to improve consistency with existing code.
1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 import os
6 import stat
8 from mozpack.errors import errors
9 from mozpack.files import (
10 BaseFile,
11 Dest,
12 )
13 import mozpack.path
14 import errno
15 from collections import (
16 Counter,
17 OrderedDict,
18 )
21 class FileRegistry(object):
22 '''
23 Generic container to keep track of a set of BaseFile instances. It
24 preserves the order under which the files are added, but doesn't keep
25 track of empty directories (directories are not stored at all).
26 The paths associated with the BaseFile instances are relative to an
27 unspecified (virtual) root directory.
29 registry = FileRegistry()
30 registry.add('foo/bar', file_instance)
31 '''
33 def __init__(self):
34 self._files = OrderedDict()
35 self._required_directories = Counter()
37 def _partial_paths(self, path):
38 '''
39 Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"].
40 '''
41 partial_paths = []
42 partial_path = path
43 while partial_path:
44 partial_path = mozpack.path.dirname(partial_path)
45 if partial_path:
46 partial_paths.append(partial_path)
47 return partial_paths
49 def add(self, path, content):
50 '''
51 Add a BaseFile instance to the container, under the given path.
52 '''
53 assert isinstance(content, BaseFile)
54 if path in self._files:
55 return errors.error("%s already added" % path)
56 if self._required_directories[path] > 0:
57 return errors.error("Can't add %s: it is a required directory" %
58 path)
59 # Check whether any parent of the given path is already stored
60 partial_paths = self._partial_paths(path)
61 for partial_path in partial_paths:
62 if partial_path in self._files:
63 return errors.error("Can't add %s: %s is a file" %
64 (path, partial_path))
65 self._files[path] = content
66 self._required_directories.update(partial_paths)
68 def match(self, pattern):
69 '''
70 Return the list of paths, stored in the container, matching the
71 given pattern. See the mozpack.path.match documentation for a
72 description of the handled patterns.
73 '''
74 if '*' in pattern:
75 return [p for p in self.paths()
76 if mozpack.path.match(p, pattern)]
77 if pattern == '':
78 return self.paths()
79 if pattern in self._files:
80 return [pattern]
81 return [p for p in self.paths()
82 if mozpack.path.basedir(p, [pattern]) == pattern]
84 def remove(self, pattern):
85 '''
86 Remove paths matching the given pattern from the container. See the
87 mozpack.path.match documentation for a description of the handled
88 patterns.
89 '''
90 items = self.match(pattern)
91 if not items:
92 return errors.error("Can't remove %s: %s" % (pattern,
93 "not matching anything previously added"))
94 for i in items:
95 del self._files[i]
96 self._required_directories.subtract(self._partial_paths(i))
98 def paths(self):
99 '''
100 Return all paths stored in the container, in the order they were added.
101 '''
102 return self._files.keys()
104 def __len__(self):
105 '''
106 Return number of paths stored in the container.
107 '''
108 return len(self._files)
110 def __contains__(self, pattern):
111 raise RuntimeError("'in' operator forbidden for %s. Use contains()." %
112 self.__class__.__name__)
114 def contains(self, pattern):
115 '''
116 Return whether the container contains paths matching the given
117 pattern. See the mozpack.path.match documentation for a description of
118 the handled patterns.
119 '''
120 return len(self.match(pattern)) > 0
122 def __getitem__(self, path):
123 '''
124 Return the BaseFile instance stored in the container for the given
125 path.
126 '''
127 return self._files[path]
129 def __iter__(self):
130 '''
131 Iterate over all (path, BaseFile instance) pairs from the container.
132 for path, file in registry:
133 (...)
134 '''
135 return self._files.iteritems()
137 def required_directories(self):
138 '''
139 Return the set of directories required by the paths in the container,
140 in no particular order. The returned directories are relative to an
141 unspecified (virtual) root directory (and do not include said root
142 directory).
143 '''
144 return set(k for k, v in self._required_directories.items() if v > 0)
147 class FileCopyResult(object):
148 """Represents results of a FileCopier.copy operation."""
150 def __init__(self):
151 self.updated_files = set()
152 self.existing_files = set()
153 self.removed_files = set()
154 self.removed_directories = set()
156 @property
157 def updated_files_count(self):
158 return len(self.updated_files)
160 @property
161 def existing_files_count(self):
162 return len(self.existing_files)
164 @property
165 def removed_files_count(self):
166 return len(self.removed_files)
168 @property
169 def removed_directories_count(self):
170 return len(self.removed_directories)
173 class FileCopier(FileRegistry):
174 '''
175 FileRegistry with the ability to copy the registered files to a separate
176 directory.
177 '''
178 def copy(self, destination, skip_if_older=True,
179 remove_unaccounted=True,
180 remove_all_directory_symlinks=True,
181 remove_empty_directories=True):
182 '''
183 Copy all registered files to the given destination path. The given
184 destination can be an existing directory, or not exist at all. It
185 can't be e.g. a file.
186 The copy process acts a bit like rsync: files are not copied when they
187 don't need to (see mozpack.files for details on file.copy).
189 By default, files in the destination directory that aren't
190 registered are removed and empty directories are deleted. In
191 addition, all directory symlinks in the destination directory
192 are deleted: this is a conservative approach to ensure that we
193 never accidently write files into a directory that is not the
194 destination directory. In the worst case, we might have a
195 directory symlink in the object directory to the source
196 directory.
198 To disable removing of unregistered files, pass
199 remove_unaccounted=False. To disable removing empty
200 directories, pass remove_empty_directories=False. In rare
201 cases, you might want to maintain directory symlinks in the
202 destination directory (at least those that are not required to
203 be regular directories): pass
204 remove_all_directory_symlinks=False. Exercise caution with
205 this flag: you almost certainly do not want to preserve
206 directory symlinks.
208 Returns a FileCopyResult that details what changed.
209 '''
210 assert isinstance(destination, basestring)
211 assert not os.path.exists(destination) or os.path.isdir(destination)
213 result = FileCopyResult()
214 have_symlinks = hasattr(os, 'symlink')
215 destination = os.path.normpath(destination)
217 # We create the destination directory specially. We can't do this as
218 # part of the loop doing mkdir() below because that loop munges
219 # symlinks and permissions and parent directories of the destination
220 # directory may have their own weird schema. The contract is we only
221 # manage children of destination, not its parents.
222 try:
223 os.makedirs(destination)
224 except OSError as e:
225 if e.errno != errno.EEXIST:
226 raise
228 # Because we could be handling thousands of files, code in this
229 # function is optimized to minimize system calls. We prefer CPU time
230 # in Python over possibly I/O bound filesystem calls to stat() and
231 # friends.
233 required_dirs = set([destination])
234 dest_files = set()
236 for p, f in self:
237 dest_files.add(os.path.normpath(os.path.join(destination, p)))
239 required_dirs |= set(os.path.normpath(os.path.join(destination, d))
240 for d in self.required_directories())
242 # Ensure destination directories are in place and proper.
243 #
244 # The "proper" bit is important. We need to ensure that directories
245 # have appropriate permissions or we will be unable to discover
246 # and write files. Furthermore, we need to verify directories aren't
247 # symlinks.
248 #
249 # Symlinked directories (a symlink whose target is a directory) are
250 # incompatible with us because our manifest talks in terms of files,
251 # not directories. If we leave symlinked directories unchecked, we
252 # would blindly follow symlinks and this might confuse file
253 # installation. For example, if an existing directory is a symlink
254 # to directory X and we attempt to install a symlink in this directory
255 # to a file in directory X, we may create a recursive symlink!
256 for d in sorted(required_dirs, key=len):
257 try:
258 os.mkdir(d)
259 except OSError as error:
260 if error.errno != errno.EEXIST:
261 raise
263 # We allow the destination to be a symlink because the caller
264 # is responsible for managing the destination and we assume
265 # they know what they are doing.
266 if have_symlinks and d != destination:
267 st = os.lstat(d)
268 if stat.S_ISLNK(st.st_mode):
269 # While we have remove_unaccounted, it doesn't apply
270 # to directory symlinks because if it did, our behavior
271 # could be very wrong.
272 os.remove(d)
273 os.mkdir(d)
275 if not os.access(d, os.W_OK):
276 umask = os.umask(0077)
277 os.umask(umask)
278 os.chmod(d, 0777 & ~umask)
280 # While we have remove_unaccounted, it doesn't apply to empty
281 # directories because it wouldn't make sense: an empty directory
282 # is empty, so removing it should have no effect.
283 existing_dirs = set()
284 existing_files = set()
285 for root, dirs, files in os.walk(destination):
286 # We need to perform the same symlink detection as above. os.walk()
287 # doesn't follow symlinks into directories by default, so we need
288 # to check dirs (we can't wait for root).
289 if have_symlinks:
290 filtered = []
291 for d in dirs:
292 full = os.path.join(root, d)
293 st = os.lstat(full)
294 if stat.S_ISLNK(st.st_mode):
295 # This directory symlink is not a required
296 # directory: any such symlink would have been
297 # removed and a directory created above.
298 if remove_all_directory_symlinks:
299 os.remove(full)
300 result.removed_files.add(os.path.normpath(full))
301 else:
302 existing_files.add(os.path.normpath(full))
303 else:
304 filtered.append(d)
306 dirs[:] = filtered
308 existing_dirs.add(os.path.normpath(root))
310 for d in dirs:
311 existing_dirs.add(os.path.normpath(os.path.join(root, d)))
313 for f in files:
314 existing_files.add(os.path.normpath(os.path.join(root, f)))
316 # Now we reconcile the state of the world against what we want.
318 # Remove files no longer accounted for.
319 if remove_unaccounted:
320 for f in existing_files - dest_files:
321 # Windows requires write access to remove files.
322 if os.name == 'nt' and not os.access(f, os.W_OK):
323 # It doesn't matter what we set permissions to since we
324 # will remove this file shortly.
325 os.chmod(f, 0600)
327 os.remove(f)
328 result.removed_files.add(f)
330 # Install files.
331 for p, f in self:
332 destfile = os.path.normpath(os.path.join(destination, p))
333 if f.copy(destfile, skip_if_older):
334 result.updated_files.add(destfile)
335 else:
336 result.existing_files.add(destfile)
338 if not remove_empty_directories:
339 return result
341 # Figure out which directories can be removed. This is complicated
342 # by the fact we optionally remove existing files. This would be easy
343 # if we walked the directory tree after installing files. But, we're
344 # trying to minimize system calls.
346 # Start with the ideal set.
347 remove_dirs = existing_dirs - required_dirs
349 # Then don't remove directories if we didn't remove unaccounted files
350 # and one of those files exists.
351 if not remove_unaccounted:
352 for f in existing_files:
353 parent = f
354 previous = ''
355 parents = set()
356 while True:
357 parent = os.path.dirname(parent)
358 parents.add(parent)
360 if previous == parent:
361 break
363 previous = parent
365 remove_dirs -= parents
367 # Remove empty directories that aren't required.
368 for d in sorted(remove_dirs, key=len, reverse=True):
369 # Permissions may not allow deletion. So ensure write access is
370 # in place before attempting delete.
371 os.chmod(d, 0700)
372 os.rmdir(d)
373 result.removed_directories.add(d)
375 return result
378 class FilePurger(FileCopier):
379 """A variation of FileCopier that is used to purge untracked files.
381 Callers create an instance then call .add() to register files/paths that
382 should exist. Once the canonical set of files that may exist is defined,
383 .purge() is called against a target directory. All files and empty
384 directories in the target directory that aren't in the registry will be
385 deleted.
386 """
387 class FakeFile(BaseFile):
388 def copy(self, dest, skip_if_older=True):
389 return True
391 def add(self, path):
392 """Record that a path should exist.
394 We currently do not track what kind of entity should be behind that
395 path. We presumably could add type tracking later and have purging
396 delete entities if there is a type mismatch.
397 """
398 return FileCopier.add(self, path, FilePurger.FakeFile())
400 def purge(self, dest):
401 """Deletes all files and empty directories not in the registry."""
402 return FileCopier.copy(self, dest)
404 def copy(self, *args, **kwargs):
405 raise Exception('copy() disabled on FilePurger. Use purge().')
408 class Jarrer(FileRegistry, BaseFile):
409 '''
410 FileRegistry with the ability to copy and pack the registered files as a
411 jar file. Also acts as a BaseFile instance, to be copied with a FileCopier.
412 '''
413 def __init__(self, compress=True, optimize=True):
414 '''
415 Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation
416 for details on the compress and optimize arguments.
417 '''
418 self.compress = compress
419 self.optimize = optimize
420 self._preload = []
421 FileRegistry.__init__(self)
423 def copy(self, dest, skip_if_older=True):
424 '''
425 Pack all registered files in the given destination jar. The given
426 destination jar may be a path to jar file, or a Dest instance for
427 a jar file.
428 If the destination jar file exists, its (compressed) contents are used
429 instead of the registered BaseFile instances when appropriate.
430 '''
431 class DeflaterDest(Dest):
432 '''
433 Dest-like class, reading from a file-like object initially, but
434 switching to a Deflater object if written to.
436 dest = DeflaterDest(original_file)
437 dest.read() # Reads original_file
438 dest.write(data) # Creates a Deflater and write data there
439 dest.read() # Re-opens the Deflater and reads from it
440 '''
441 def __init__(self, orig=None, compress=True):
442 self.mode = None
443 self.deflater = orig
444 self.compress = compress
446 def read(self, length=-1):
447 if self.mode != 'r':
448 assert self.mode is None
449 self.mode = 'r'
450 return self.deflater.read(length)
452 def write(self, data):
453 if self.mode != 'w':
454 from mozpack.mozjar import Deflater
455 self.deflater = Deflater(self.compress)
456 self.mode = 'w'
457 self.deflater.write(data)
459 def exists(self):
460 return self.deflater is not None
462 if isinstance(dest, basestring):
463 dest = Dest(dest)
464 assert isinstance(dest, Dest)
466 from mozpack.mozjar import JarWriter, JarReader
467 try:
468 old_jar = JarReader(fileobj=dest)
469 except Exception:
470 old_jar = []
472 old_contents = dict([(f.filename, f) for f in old_jar])
474 with JarWriter(fileobj=dest, compress=self.compress,
475 optimize=self.optimize) as jar:
476 for path, file in self:
477 if path in old_contents:
478 deflater = DeflaterDest(old_contents[path], self.compress)
479 else:
480 deflater = DeflaterDest(compress=self.compress)
481 file.copy(deflater, skip_if_older)
482 jar.add(path, deflater.deflater, mode=file.mode)
483 if self._preload:
484 jar.preload(self._preload)
486 def open(self):
487 raise RuntimeError('unsupported')
489 def preload(self, paths):
490 '''
491 Add the given set of paths to the list of preloaded files. See
492 mozpack.mozjar.JarWriter documentation for details on jar preloading.
493 '''
494 self._preload.extend(paths)