python/mozbuild/mozpack/copier.py

branch
TOR_BUG_9701
changeset 14
925c144e1f1f
equal deleted inserted replaced
-1:000000000000 0:b6ad23da86e7
1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5 import os
6 import stat
7
8 from mozpack.errors import errors
9 from mozpack.files import (
10 BaseFile,
11 Dest,
12 )
13 import mozpack.path
14 import errno
15 from collections import (
16 Counter,
17 OrderedDict,
18 )
19
20
21 class FileRegistry(object):
22 '''
23 Generic container to keep track of a set of BaseFile instances. It
24 preserves the order under which the files are added, but doesn't keep
25 track of empty directories (directories are not stored at all).
26 The paths associated with the BaseFile instances are relative to an
27 unspecified (virtual) root directory.
28
29 registry = FileRegistry()
30 registry.add('foo/bar', file_instance)
31 '''
32
33 def __init__(self):
34 self._files = OrderedDict()
35 self._required_directories = Counter()
36
37 def _partial_paths(self, path):
38 '''
39 Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"].
40 '''
41 partial_paths = []
42 partial_path = path
43 while partial_path:
44 partial_path = mozpack.path.dirname(partial_path)
45 if partial_path:
46 partial_paths.append(partial_path)
47 return partial_paths
48
49 def add(self, path, content):
50 '''
51 Add a BaseFile instance to the container, under the given path.
52 '''
53 assert isinstance(content, BaseFile)
54 if path in self._files:
55 return errors.error("%s already added" % path)
56 if self._required_directories[path] > 0:
57 return errors.error("Can't add %s: it is a required directory" %
58 path)
59 # Check whether any parent of the given path is already stored
60 partial_paths = self._partial_paths(path)
61 for partial_path in partial_paths:
62 if partial_path in self._files:
63 return errors.error("Can't add %s: %s is a file" %
64 (path, partial_path))
65 self._files[path] = content
66 self._required_directories.update(partial_paths)
67
68 def match(self, pattern):
69 '''
70 Return the list of paths, stored in the container, matching the
71 given pattern. See the mozpack.path.match documentation for a
72 description of the handled patterns.
73 '''
74 if '*' in pattern:
75 return [p for p in self.paths()
76 if mozpack.path.match(p, pattern)]
77 if pattern == '':
78 return self.paths()
79 if pattern in self._files:
80 return [pattern]
81 return [p for p in self.paths()
82 if mozpack.path.basedir(p, [pattern]) == pattern]
83
84 def remove(self, pattern):
85 '''
86 Remove paths matching the given pattern from the container. See the
87 mozpack.path.match documentation for a description of the handled
88 patterns.
89 '''
90 items = self.match(pattern)
91 if not items:
92 return errors.error("Can't remove %s: %s" % (pattern,
93 "not matching anything previously added"))
94 for i in items:
95 del self._files[i]
96 self._required_directories.subtract(self._partial_paths(i))
97
98 def paths(self):
99 '''
100 Return all paths stored in the container, in the order they were added.
101 '''
102 return self._files.keys()
103
104 def __len__(self):
105 '''
106 Return number of paths stored in the container.
107 '''
108 return len(self._files)
109
110 def __contains__(self, pattern):
111 raise RuntimeError("'in' operator forbidden for %s. Use contains()." %
112 self.__class__.__name__)
113
114 def contains(self, pattern):
115 '''
116 Return whether the container contains paths matching the given
117 pattern. See the mozpack.path.match documentation for a description of
118 the handled patterns.
119 '''
120 return len(self.match(pattern)) > 0
121
122 def __getitem__(self, path):
123 '''
124 Return the BaseFile instance stored in the container for the given
125 path.
126 '''
127 return self._files[path]
128
129 def __iter__(self):
130 '''
131 Iterate over all (path, BaseFile instance) pairs from the container.
132 for path, file in registry:
133 (...)
134 '''
135 return self._files.iteritems()
136
137 def required_directories(self):
138 '''
139 Return the set of directories required by the paths in the container,
140 in no particular order. The returned directories are relative to an
141 unspecified (virtual) root directory (and do not include said root
142 directory).
143 '''
144 return set(k for k, v in self._required_directories.items() if v > 0)
145
146
147 class FileCopyResult(object):
148 """Represents results of a FileCopier.copy operation."""
149
150 def __init__(self):
151 self.updated_files = set()
152 self.existing_files = set()
153 self.removed_files = set()
154 self.removed_directories = set()
155
156 @property
157 def updated_files_count(self):
158 return len(self.updated_files)
159
160 @property
161 def existing_files_count(self):
162 return len(self.existing_files)
163
164 @property
165 def removed_files_count(self):
166 return len(self.removed_files)
167
168 @property
169 def removed_directories_count(self):
170 return len(self.removed_directories)
171
172
173 class FileCopier(FileRegistry):
174 '''
175 FileRegistry with the ability to copy the registered files to a separate
176 directory.
177 '''
178 def copy(self, destination, skip_if_older=True,
179 remove_unaccounted=True,
180 remove_all_directory_symlinks=True,
181 remove_empty_directories=True):
182 '''
183 Copy all registered files to the given destination path. The given
184 destination can be an existing directory, or not exist at all. It
185 can't be e.g. a file.
186 The copy process acts a bit like rsync: files are not copied when they
187 don't need to (see mozpack.files for details on file.copy).
188
189 By default, files in the destination directory that aren't
190 registered are removed and empty directories are deleted. In
191 addition, all directory symlinks in the destination directory
192 are deleted: this is a conservative approach to ensure that we
193 never accidently write files into a directory that is not the
194 destination directory. In the worst case, we might have a
195 directory symlink in the object directory to the source
196 directory.
197
198 To disable removing of unregistered files, pass
199 remove_unaccounted=False. To disable removing empty
200 directories, pass remove_empty_directories=False. In rare
201 cases, you might want to maintain directory symlinks in the
202 destination directory (at least those that are not required to
203 be regular directories): pass
204 remove_all_directory_symlinks=False. Exercise caution with
205 this flag: you almost certainly do not want to preserve
206 directory symlinks.
207
208 Returns a FileCopyResult that details what changed.
209 '''
210 assert isinstance(destination, basestring)
211 assert not os.path.exists(destination) or os.path.isdir(destination)
212
213 result = FileCopyResult()
214 have_symlinks = hasattr(os, 'symlink')
215 destination = os.path.normpath(destination)
216
217 # We create the destination directory specially. We can't do this as
218 # part of the loop doing mkdir() below because that loop munges
219 # symlinks and permissions and parent directories of the destination
220 # directory may have their own weird schema. The contract is we only
221 # manage children of destination, not its parents.
222 try:
223 os.makedirs(destination)
224 except OSError as e:
225 if e.errno != errno.EEXIST:
226 raise
227
228 # Because we could be handling thousands of files, code in this
229 # function is optimized to minimize system calls. We prefer CPU time
230 # in Python over possibly I/O bound filesystem calls to stat() and
231 # friends.
232
233 required_dirs = set([destination])
234 dest_files = set()
235
236 for p, f in self:
237 dest_files.add(os.path.normpath(os.path.join(destination, p)))
238
239 required_dirs |= set(os.path.normpath(os.path.join(destination, d))
240 for d in self.required_directories())
241
242 # Ensure destination directories are in place and proper.
243 #
244 # The "proper" bit is important. We need to ensure that directories
245 # have appropriate permissions or we will be unable to discover
246 # and write files. Furthermore, we need to verify directories aren't
247 # symlinks.
248 #
249 # Symlinked directories (a symlink whose target is a directory) are
250 # incompatible with us because our manifest talks in terms of files,
251 # not directories. If we leave symlinked directories unchecked, we
252 # would blindly follow symlinks and this might confuse file
253 # installation. For example, if an existing directory is a symlink
254 # to directory X and we attempt to install a symlink in this directory
255 # to a file in directory X, we may create a recursive symlink!
256 for d in sorted(required_dirs, key=len):
257 try:
258 os.mkdir(d)
259 except OSError as error:
260 if error.errno != errno.EEXIST:
261 raise
262
263 # We allow the destination to be a symlink because the caller
264 # is responsible for managing the destination and we assume
265 # they know what they are doing.
266 if have_symlinks and d != destination:
267 st = os.lstat(d)
268 if stat.S_ISLNK(st.st_mode):
269 # While we have remove_unaccounted, it doesn't apply
270 # to directory symlinks because if it did, our behavior
271 # could be very wrong.
272 os.remove(d)
273 os.mkdir(d)
274
275 if not os.access(d, os.W_OK):
276 umask = os.umask(0077)
277 os.umask(umask)
278 os.chmod(d, 0777 & ~umask)
279
280 # While we have remove_unaccounted, it doesn't apply to empty
281 # directories because it wouldn't make sense: an empty directory
282 # is empty, so removing it should have no effect.
283 existing_dirs = set()
284 existing_files = set()
285 for root, dirs, files in os.walk(destination):
286 # We need to perform the same symlink detection as above. os.walk()
287 # doesn't follow symlinks into directories by default, so we need
288 # to check dirs (we can't wait for root).
289 if have_symlinks:
290 filtered = []
291 for d in dirs:
292 full = os.path.join(root, d)
293 st = os.lstat(full)
294 if stat.S_ISLNK(st.st_mode):
295 # This directory symlink is not a required
296 # directory: any such symlink would have been
297 # removed and a directory created above.
298 if remove_all_directory_symlinks:
299 os.remove(full)
300 result.removed_files.add(os.path.normpath(full))
301 else:
302 existing_files.add(os.path.normpath(full))
303 else:
304 filtered.append(d)
305
306 dirs[:] = filtered
307
308 existing_dirs.add(os.path.normpath(root))
309
310 for d in dirs:
311 existing_dirs.add(os.path.normpath(os.path.join(root, d)))
312
313 for f in files:
314 existing_files.add(os.path.normpath(os.path.join(root, f)))
315
316 # Now we reconcile the state of the world against what we want.
317
318 # Remove files no longer accounted for.
319 if remove_unaccounted:
320 for f in existing_files - dest_files:
321 # Windows requires write access to remove files.
322 if os.name == 'nt' and not os.access(f, os.W_OK):
323 # It doesn't matter what we set permissions to since we
324 # will remove this file shortly.
325 os.chmod(f, 0600)
326
327 os.remove(f)
328 result.removed_files.add(f)
329
330 # Install files.
331 for p, f in self:
332 destfile = os.path.normpath(os.path.join(destination, p))
333 if f.copy(destfile, skip_if_older):
334 result.updated_files.add(destfile)
335 else:
336 result.existing_files.add(destfile)
337
338 if not remove_empty_directories:
339 return result
340
341 # Figure out which directories can be removed. This is complicated
342 # by the fact we optionally remove existing files. This would be easy
343 # if we walked the directory tree after installing files. But, we're
344 # trying to minimize system calls.
345
346 # Start with the ideal set.
347 remove_dirs = existing_dirs - required_dirs
348
349 # Then don't remove directories if we didn't remove unaccounted files
350 # and one of those files exists.
351 if not remove_unaccounted:
352 for f in existing_files:
353 parent = f
354 previous = ''
355 parents = set()
356 while True:
357 parent = os.path.dirname(parent)
358 parents.add(parent)
359
360 if previous == parent:
361 break
362
363 previous = parent
364
365 remove_dirs -= parents
366
367 # Remove empty directories that aren't required.
368 for d in sorted(remove_dirs, key=len, reverse=True):
369 # Permissions may not allow deletion. So ensure write access is
370 # in place before attempting delete.
371 os.chmod(d, 0700)
372 os.rmdir(d)
373 result.removed_directories.add(d)
374
375 return result
376
377
378 class FilePurger(FileCopier):
379 """A variation of FileCopier that is used to purge untracked files.
380
381 Callers create an instance then call .add() to register files/paths that
382 should exist. Once the canonical set of files that may exist is defined,
383 .purge() is called against a target directory. All files and empty
384 directories in the target directory that aren't in the registry will be
385 deleted.
386 """
387 class FakeFile(BaseFile):
388 def copy(self, dest, skip_if_older=True):
389 return True
390
391 def add(self, path):
392 """Record that a path should exist.
393
394 We currently do not track what kind of entity should be behind that
395 path. We presumably could add type tracking later and have purging
396 delete entities if there is a type mismatch.
397 """
398 return FileCopier.add(self, path, FilePurger.FakeFile())
399
400 def purge(self, dest):
401 """Deletes all files and empty directories not in the registry."""
402 return FileCopier.copy(self, dest)
403
404 def copy(self, *args, **kwargs):
405 raise Exception('copy() disabled on FilePurger. Use purge().')
406
407
408 class Jarrer(FileRegistry, BaseFile):
409 '''
410 FileRegistry with the ability to copy and pack the registered files as a
411 jar file. Also acts as a BaseFile instance, to be copied with a FileCopier.
412 '''
413 def __init__(self, compress=True, optimize=True):
414 '''
415 Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation
416 for details on the compress and optimize arguments.
417 '''
418 self.compress = compress
419 self.optimize = optimize
420 self._preload = []
421 FileRegistry.__init__(self)
422
423 def copy(self, dest, skip_if_older=True):
424 '''
425 Pack all registered files in the given destination jar. The given
426 destination jar may be a path to jar file, or a Dest instance for
427 a jar file.
428 If the destination jar file exists, its (compressed) contents are used
429 instead of the registered BaseFile instances when appropriate.
430 '''
431 class DeflaterDest(Dest):
432 '''
433 Dest-like class, reading from a file-like object initially, but
434 switching to a Deflater object if written to.
435
436 dest = DeflaterDest(original_file)
437 dest.read() # Reads original_file
438 dest.write(data) # Creates a Deflater and write data there
439 dest.read() # Re-opens the Deflater and reads from it
440 '''
441 def __init__(self, orig=None, compress=True):
442 self.mode = None
443 self.deflater = orig
444 self.compress = compress
445
446 def read(self, length=-1):
447 if self.mode != 'r':
448 assert self.mode is None
449 self.mode = 'r'
450 return self.deflater.read(length)
451
452 def write(self, data):
453 if self.mode != 'w':
454 from mozpack.mozjar import Deflater
455 self.deflater = Deflater(self.compress)
456 self.mode = 'w'
457 self.deflater.write(data)
458
459 def exists(self):
460 return self.deflater is not None
461
462 if isinstance(dest, basestring):
463 dest = Dest(dest)
464 assert isinstance(dest, Dest)
465
466 from mozpack.mozjar import JarWriter, JarReader
467 try:
468 old_jar = JarReader(fileobj=dest)
469 except Exception:
470 old_jar = []
471
472 old_contents = dict([(f.filename, f) for f in old_jar])
473
474 with JarWriter(fileobj=dest, compress=self.compress,
475 optimize=self.optimize) as jar:
476 for path, file in self:
477 if path in old_contents:
478 deflater = DeflaterDest(old_contents[path], self.compress)
479 else:
480 deflater = DeflaterDest(compress=self.compress)
481 file.copy(deflater, skip_if_older)
482 jar.add(path, deflater.deflater, mode=file.mode)
483 if self._preload:
484 jar.preload(self._preload)
485
486 def open(self):
487 raise RuntimeError('unsupported')
488
489 def preload(self, paths):
490 '''
491 Add the given set of paths to the list of preloaded files. See
492 mozpack.mozjar.JarWriter documentation for details on jar preloading.
493 '''
494 self._preload.extend(paths)

mercurial