|
1 # This Source Code Form is subject to the terms of the Mozilla Public |
|
2 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4 |
|
5 import os |
|
6 import stat |
|
7 |
|
8 from mozpack.errors import errors |
|
9 from mozpack.files import ( |
|
10 BaseFile, |
|
11 Dest, |
|
12 ) |
|
13 import mozpack.path |
|
14 import errno |
|
15 from collections import ( |
|
16 Counter, |
|
17 OrderedDict, |
|
18 ) |
|
19 |
|
20 |
|
21 class FileRegistry(object): |
|
22 ''' |
|
23 Generic container to keep track of a set of BaseFile instances. It |
|
24 preserves the order under which the files are added, but doesn't keep |
|
25 track of empty directories (directories are not stored at all). |
|
26 The paths associated with the BaseFile instances are relative to an |
|
27 unspecified (virtual) root directory. |
|
28 |
|
29 registry = FileRegistry() |
|
30 registry.add('foo/bar', file_instance) |
|
31 ''' |
|
32 |
|
33 def __init__(self): |
|
34 self._files = OrderedDict() |
|
35 self._required_directories = Counter() |
|
36 |
|
37 def _partial_paths(self, path): |
|
38 ''' |
|
39 Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"]. |
|
40 ''' |
|
41 partial_paths = [] |
|
42 partial_path = path |
|
43 while partial_path: |
|
44 partial_path = mozpack.path.dirname(partial_path) |
|
45 if partial_path: |
|
46 partial_paths.append(partial_path) |
|
47 return partial_paths |
|
48 |
|
49 def add(self, path, content): |
|
50 ''' |
|
51 Add a BaseFile instance to the container, under the given path. |
|
52 ''' |
|
53 assert isinstance(content, BaseFile) |
|
54 if path in self._files: |
|
55 return errors.error("%s already added" % path) |
|
56 if self._required_directories[path] > 0: |
|
57 return errors.error("Can't add %s: it is a required directory" % |
|
58 path) |
|
59 # Check whether any parent of the given path is already stored |
|
60 partial_paths = self._partial_paths(path) |
|
61 for partial_path in partial_paths: |
|
62 if partial_path in self._files: |
|
63 return errors.error("Can't add %s: %s is a file" % |
|
64 (path, partial_path)) |
|
65 self._files[path] = content |
|
66 self._required_directories.update(partial_paths) |
|
67 |
|
68 def match(self, pattern): |
|
69 ''' |
|
70 Return the list of paths, stored in the container, matching the |
|
71 given pattern. See the mozpack.path.match documentation for a |
|
72 description of the handled patterns. |
|
73 ''' |
|
74 if '*' in pattern: |
|
75 return [p for p in self.paths() |
|
76 if mozpack.path.match(p, pattern)] |
|
77 if pattern == '': |
|
78 return self.paths() |
|
79 if pattern in self._files: |
|
80 return [pattern] |
|
81 return [p for p in self.paths() |
|
82 if mozpack.path.basedir(p, [pattern]) == pattern] |
|
83 |
|
84 def remove(self, pattern): |
|
85 ''' |
|
86 Remove paths matching the given pattern from the container. See the |
|
87 mozpack.path.match documentation for a description of the handled |
|
88 patterns. |
|
89 ''' |
|
90 items = self.match(pattern) |
|
91 if not items: |
|
92 return errors.error("Can't remove %s: %s" % (pattern, |
|
93 "not matching anything previously added")) |
|
94 for i in items: |
|
95 del self._files[i] |
|
96 self._required_directories.subtract(self._partial_paths(i)) |
|
97 |
|
98 def paths(self): |
|
99 ''' |
|
100 Return all paths stored in the container, in the order they were added. |
|
101 ''' |
|
102 return self._files.keys() |
|
103 |
|
104 def __len__(self): |
|
105 ''' |
|
106 Return number of paths stored in the container. |
|
107 ''' |
|
108 return len(self._files) |
|
109 |
|
110 def __contains__(self, pattern): |
|
111 raise RuntimeError("'in' operator forbidden for %s. Use contains()." % |
|
112 self.__class__.__name__) |
|
113 |
|
114 def contains(self, pattern): |
|
115 ''' |
|
116 Return whether the container contains paths matching the given |
|
117 pattern. See the mozpack.path.match documentation for a description of |
|
118 the handled patterns. |
|
119 ''' |
|
120 return len(self.match(pattern)) > 0 |
|
121 |
|
122 def __getitem__(self, path): |
|
123 ''' |
|
124 Return the BaseFile instance stored in the container for the given |
|
125 path. |
|
126 ''' |
|
127 return self._files[path] |
|
128 |
|
129 def __iter__(self): |
|
130 ''' |
|
131 Iterate over all (path, BaseFile instance) pairs from the container. |
|
132 for path, file in registry: |
|
133 (...) |
|
134 ''' |
|
135 return self._files.iteritems() |
|
136 |
|
137 def required_directories(self): |
|
138 ''' |
|
139 Return the set of directories required by the paths in the container, |
|
140 in no particular order. The returned directories are relative to an |
|
141 unspecified (virtual) root directory (and do not include said root |
|
142 directory). |
|
143 ''' |
|
144 return set(k for k, v in self._required_directories.items() if v > 0) |
|
145 |
|
146 |
|
147 class FileCopyResult(object): |
|
148 """Represents results of a FileCopier.copy operation.""" |
|
149 |
|
150 def __init__(self): |
|
151 self.updated_files = set() |
|
152 self.existing_files = set() |
|
153 self.removed_files = set() |
|
154 self.removed_directories = set() |
|
155 |
|
156 @property |
|
157 def updated_files_count(self): |
|
158 return len(self.updated_files) |
|
159 |
|
160 @property |
|
161 def existing_files_count(self): |
|
162 return len(self.existing_files) |
|
163 |
|
164 @property |
|
165 def removed_files_count(self): |
|
166 return len(self.removed_files) |
|
167 |
|
168 @property |
|
169 def removed_directories_count(self): |
|
170 return len(self.removed_directories) |
|
171 |
|
172 |
|
173 class FileCopier(FileRegistry): |
|
174 ''' |
|
175 FileRegistry with the ability to copy the registered files to a separate |
|
176 directory. |
|
177 ''' |
|
178 def copy(self, destination, skip_if_older=True, |
|
179 remove_unaccounted=True, |
|
180 remove_all_directory_symlinks=True, |
|
181 remove_empty_directories=True): |
|
182 ''' |
|
183 Copy all registered files to the given destination path. The given |
|
184 destination can be an existing directory, or not exist at all. It |
|
185 can't be e.g. a file. |
|
186 The copy process acts a bit like rsync: files are not copied when they |
|
187 don't need to (see mozpack.files for details on file.copy). |
|
188 |
|
189 By default, files in the destination directory that aren't |
|
190 registered are removed and empty directories are deleted. In |
|
191 addition, all directory symlinks in the destination directory |
|
192 are deleted: this is a conservative approach to ensure that we |
|
193 never accidently write files into a directory that is not the |
|
194 destination directory. In the worst case, we might have a |
|
195 directory symlink in the object directory to the source |
|
196 directory. |
|
197 |
|
198 To disable removing of unregistered files, pass |
|
199 remove_unaccounted=False. To disable removing empty |
|
200 directories, pass remove_empty_directories=False. In rare |
|
201 cases, you might want to maintain directory symlinks in the |
|
202 destination directory (at least those that are not required to |
|
203 be regular directories): pass |
|
204 remove_all_directory_symlinks=False. Exercise caution with |
|
205 this flag: you almost certainly do not want to preserve |
|
206 directory symlinks. |
|
207 |
|
208 Returns a FileCopyResult that details what changed. |
|
209 ''' |
|
210 assert isinstance(destination, basestring) |
|
211 assert not os.path.exists(destination) or os.path.isdir(destination) |
|
212 |
|
213 result = FileCopyResult() |
|
214 have_symlinks = hasattr(os, 'symlink') |
|
215 destination = os.path.normpath(destination) |
|
216 |
|
217 # We create the destination directory specially. We can't do this as |
|
218 # part of the loop doing mkdir() below because that loop munges |
|
219 # symlinks and permissions and parent directories of the destination |
|
220 # directory may have their own weird schema. The contract is we only |
|
221 # manage children of destination, not its parents. |
|
222 try: |
|
223 os.makedirs(destination) |
|
224 except OSError as e: |
|
225 if e.errno != errno.EEXIST: |
|
226 raise |
|
227 |
|
228 # Because we could be handling thousands of files, code in this |
|
229 # function is optimized to minimize system calls. We prefer CPU time |
|
230 # in Python over possibly I/O bound filesystem calls to stat() and |
|
231 # friends. |
|
232 |
|
233 required_dirs = set([destination]) |
|
234 dest_files = set() |
|
235 |
|
236 for p, f in self: |
|
237 dest_files.add(os.path.normpath(os.path.join(destination, p))) |
|
238 |
|
239 required_dirs |= set(os.path.normpath(os.path.join(destination, d)) |
|
240 for d in self.required_directories()) |
|
241 |
|
242 # Ensure destination directories are in place and proper. |
|
243 # |
|
244 # The "proper" bit is important. We need to ensure that directories |
|
245 # have appropriate permissions or we will be unable to discover |
|
246 # and write files. Furthermore, we need to verify directories aren't |
|
247 # symlinks. |
|
248 # |
|
249 # Symlinked directories (a symlink whose target is a directory) are |
|
250 # incompatible with us because our manifest talks in terms of files, |
|
251 # not directories. If we leave symlinked directories unchecked, we |
|
252 # would blindly follow symlinks and this might confuse file |
|
253 # installation. For example, if an existing directory is a symlink |
|
254 # to directory X and we attempt to install a symlink in this directory |
|
255 # to a file in directory X, we may create a recursive symlink! |
|
256 for d in sorted(required_dirs, key=len): |
|
257 try: |
|
258 os.mkdir(d) |
|
259 except OSError as error: |
|
260 if error.errno != errno.EEXIST: |
|
261 raise |
|
262 |
|
263 # We allow the destination to be a symlink because the caller |
|
264 # is responsible for managing the destination and we assume |
|
265 # they know what they are doing. |
|
266 if have_symlinks and d != destination: |
|
267 st = os.lstat(d) |
|
268 if stat.S_ISLNK(st.st_mode): |
|
269 # While we have remove_unaccounted, it doesn't apply |
|
270 # to directory symlinks because if it did, our behavior |
|
271 # could be very wrong. |
|
272 os.remove(d) |
|
273 os.mkdir(d) |
|
274 |
|
275 if not os.access(d, os.W_OK): |
|
276 umask = os.umask(0077) |
|
277 os.umask(umask) |
|
278 os.chmod(d, 0777 & ~umask) |
|
279 |
|
280 # While we have remove_unaccounted, it doesn't apply to empty |
|
281 # directories because it wouldn't make sense: an empty directory |
|
282 # is empty, so removing it should have no effect. |
|
283 existing_dirs = set() |
|
284 existing_files = set() |
|
285 for root, dirs, files in os.walk(destination): |
|
286 # We need to perform the same symlink detection as above. os.walk() |
|
287 # doesn't follow symlinks into directories by default, so we need |
|
288 # to check dirs (we can't wait for root). |
|
289 if have_symlinks: |
|
290 filtered = [] |
|
291 for d in dirs: |
|
292 full = os.path.join(root, d) |
|
293 st = os.lstat(full) |
|
294 if stat.S_ISLNK(st.st_mode): |
|
295 # This directory symlink is not a required |
|
296 # directory: any such symlink would have been |
|
297 # removed and a directory created above. |
|
298 if remove_all_directory_symlinks: |
|
299 os.remove(full) |
|
300 result.removed_files.add(os.path.normpath(full)) |
|
301 else: |
|
302 existing_files.add(os.path.normpath(full)) |
|
303 else: |
|
304 filtered.append(d) |
|
305 |
|
306 dirs[:] = filtered |
|
307 |
|
308 existing_dirs.add(os.path.normpath(root)) |
|
309 |
|
310 for d in dirs: |
|
311 existing_dirs.add(os.path.normpath(os.path.join(root, d))) |
|
312 |
|
313 for f in files: |
|
314 existing_files.add(os.path.normpath(os.path.join(root, f))) |
|
315 |
|
316 # Now we reconcile the state of the world against what we want. |
|
317 |
|
318 # Remove files no longer accounted for. |
|
319 if remove_unaccounted: |
|
320 for f in existing_files - dest_files: |
|
321 # Windows requires write access to remove files. |
|
322 if os.name == 'nt' and not os.access(f, os.W_OK): |
|
323 # It doesn't matter what we set permissions to since we |
|
324 # will remove this file shortly. |
|
325 os.chmod(f, 0600) |
|
326 |
|
327 os.remove(f) |
|
328 result.removed_files.add(f) |
|
329 |
|
330 # Install files. |
|
331 for p, f in self: |
|
332 destfile = os.path.normpath(os.path.join(destination, p)) |
|
333 if f.copy(destfile, skip_if_older): |
|
334 result.updated_files.add(destfile) |
|
335 else: |
|
336 result.existing_files.add(destfile) |
|
337 |
|
338 if not remove_empty_directories: |
|
339 return result |
|
340 |
|
341 # Figure out which directories can be removed. This is complicated |
|
342 # by the fact we optionally remove existing files. This would be easy |
|
343 # if we walked the directory tree after installing files. But, we're |
|
344 # trying to minimize system calls. |
|
345 |
|
346 # Start with the ideal set. |
|
347 remove_dirs = existing_dirs - required_dirs |
|
348 |
|
349 # Then don't remove directories if we didn't remove unaccounted files |
|
350 # and one of those files exists. |
|
351 if not remove_unaccounted: |
|
352 for f in existing_files: |
|
353 parent = f |
|
354 previous = '' |
|
355 parents = set() |
|
356 while True: |
|
357 parent = os.path.dirname(parent) |
|
358 parents.add(parent) |
|
359 |
|
360 if previous == parent: |
|
361 break |
|
362 |
|
363 previous = parent |
|
364 |
|
365 remove_dirs -= parents |
|
366 |
|
367 # Remove empty directories that aren't required. |
|
368 for d in sorted(remove_dirs, key=len, reverse=True): |
|
369 # Permissions may not allow deletion. So ensure write access is |
|
370 # in place before attempting delete. |
|
371 os.chmod(d, 0700) |
|
372 os.rmdir(d) |
|
373 result.removed_directories.add(d) |
|
374 |
|
375 return result |
|
376 |
|
377 |
|
378 class FilePurger(FileCopier): |
|
379 """A variation of FileCopier that is used to purge untracked files. |
|
380 |
|
381 Callers create an instance then call .add() to register files/paths that |
|
382 should exist. Once the canonical set of files that may exist is defined, |
|
383 .purge() is called against a target directory. All files and empty |
|
384 directories in the target directory that aren't in the registry will be |
|
385 deleted. |
|
386 """ |
|
387 class FakeFile(BaseFile): |
|
388 def copy(self, dest, skip_if_older=True): |
|
389 return True |
|
390 |
|
391 def add(self, path): |
|
392 """Record that a path should exist. |
|
393 |
|
394 We currently do not track what kind of entity should be behind that |
|
395 path. We presumably could add type tracking later and have purging |
|
396 delete entities if there is a type mismatch. |
|
397 """ |
|
398 return FileCopier.add(self, path, FilePurger.FakeFile()) |
|
399 |
|
400 def purge(self, dest): |
|
401 """Deletes all files and empty directories not in the registry.""" |
|
402 return FileCopier.copy(self, dest) |
|
403 |
|
404 def copy(self, *args, **kwargs): |
|
405 raise Exception('copy() disabled on FilePurger. Use purge().') |
|
406 |
|
407 |
|
408 class Jarrer(FileRegistry, BaseFile): |
|
409 ''' |
|
410 FileRegistry with the ability to copy and pack the registered files as a |
|
411 jar file. Also acts as a BaseFile instance, to be copied with a FileCopier. |
|
412 ''' |
|
413 def __init__(self, compress=True, optimize=True): |
|
414 ''' |
|
415 Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation |
|
416 for details on the compress and optimize arguments. |
|
417 ''' |
|
418 self.compress = compress |
|
419 self.optimize = optimize |
|
420 self._preload = [] |
|
421 FileRegistry.__init__(self) |
|
422 |
|
423 def copy(self, dest, skip_if_older=True): |
|
424 ''' |
|
425 Pack all registered files in the given destination jar. The given |
|
426 destination jar may be a path to jar file, or a Dest instance for |
|
427 a jar file. |
|
428 If the destination jar file exists, its (compressed) contents are used |
|
429 instead of the registered BaseFile instances when appropriate. |
|
430 ''' |
|
431 class DeflaterDest(Dest): |
|
432 ''' |
|
433 Dest-like class, reading from a file-like object initially, but |
|
434 switching to a Deflater object if written to. |
|
435 |
|
436 dest = DeflaterDest(original_file) |
|
437 dest.read() # Reads original_file |
|
438 dest.write(data) # Creates a Deflater and write data there |
|
439 dest.read() # Re-opens the Deflater and reads from it |
|
440 ''' |
|
441 def __init__(self, orig=None, compress=True): |
|
442 self.mode = None |
|
443 self.deflater = orig |
|
444 self.compress = compress |
|
445 |
|
446 def read(self, length=-1): |
|
447 if self.mode != 'r': |
|
448 assert self.mode is None |
|
449 self.mode = 'r' |
|
450 return self.deflater.read(length) |
|
451 |
|
452 def write(self, data): |
|
453 if self.mode != 'w': |
|
454 from mozpack.mozjar import Deflater |
|
455 self.deflater = Deflater(self.compress) |
|
456 self.mode = 'w' |
|
457 self.deflater.write(data) |
|
458 |
|
459 def exists(self): |
|
460 return self.deflater is not None |
|
461 |
|
462 if isinstance(dest, basestring): |
|
463 dest = Dest(dest) |
|
464 assert isinstance(dest, Dest) |
|
465 |
|
466 from mozpack.mozjar import JarWriter, JarReader |
|
467 try: |
|
468 old_jar = JarReader(fileobj=dest) |
|
469 except Exception: |
|
470 old_jar = [] |
|
471 |
|
472 old_contents = dict([(f.filename, f) for f in old_jar]) |
|
473 |
|
474 with JarWriter(fileobj=dest, compress=self.compress, |
|
475 optimize=self.optimize) as jar: |
|
476 for path, file in self: |
|
477 if path in old_contents: |
|
478 deflater = DeflaterDest(old_contents[path], self.compress) |
|
479 else: |
|
480 deflater = DeflaterDest(compress=self.compress) |
|
481 file.copy(deflater, skip_if_older) |
|
482 jar.add(path, deflater.deflater, mode=file.mode) |
|
483 if self._preload: |
|
484 jar.preload(self._preload) |
|
485 |
|
486 def open(self): |
|
487 raise RuntimeError('unsupported') |
|
488 |
|
489 def preload(self, paths): |
|
490 ''' |
|
491 Add the given set of paths to the list of preloaded files. See |
|
492 mozpack.mozjar.JarWriter documentation for details on jar preloading. |
|
493 ''' |
|
494 self._preload.extend(paths) |