|
1 # This Source Code Form is subject to the terms of the Mozilla Public |
|
2 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4 |
|
5 import sys |
|
6 import hashlib |
|
7 from mozpack.packager.unpack import UnpackFinder |
|
8 from collections import OrderedDict |
|
9 |
|
10 ''' |
|
11 Find files duplicated in a given packaged directory, independently of its |
|
12 package format. |
|
13 ''' |
|
14 |
|
15 |
|
16 def find_dupes(source): |
|
17 md5s = OrderedDict() |
|
18 for p, f in UnpackFinder(source): |
|
19 content = f.open().read() |
|
20 m = hashlib.md5(content).digest() |
|
21 if not m in md5s: |
|
22 md5s[m] = (len(content), []) |
|
23 md5s[m][1].append(p) |
|
24 total = 0 |
|
25 num_dupes = 0 |
|
26 for m, (size, paths) in md5s.iteritems(): |
|
27 if len(paths) > 1: |
|
28 print 'Duplicates %d bytes%s:' % (size, |
|
29 ' (%d times)' % (len(paths) - 1) if len(paths) > 2 else '') |
|
30 print ''.join(' %s\n' % p for p in paths) |
|
31 total += (len(paths) - 1) * size |
|
32 num_dupes += 1 |
|
33 if num_dupes: |
|
34 print "WARNING: Found %d duplicated files taking %d bytes" % \ |
|
35 (num_dupes, total) + " (uncompressed)" |
|
36 |
|
37 |
|
38 def main(): |
|
39 if len(sys.argv) != 2: |
|
40 import os |
|
41 print >>sys.stderr, "Usage: %s directory" % \ |
|
42 os.path.basename(sys.argv[0]) |
|
43 sys.exit(1) |
|
44 |
|
45 find_dupes(sys.argv[1]) |
|
46 |
|
47 if __name__ == "__main__": |
|
48 main() |