toolkit/mozapps/installer/find-dupes.py

branch
TOR_BUG_3246
changeset 7
129ffea94266
equal deleted inserted replaced
-1:000000000000 0:3e0c15dd073b
1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5 import sys
6 import hashlib
7 from mozpack.packager.unpack import UnpackFinder
8 from collections import OrderedDict
9
10 '''
11 Find files duplicated in a given packaged directory, independently of its
12 package format.
13 '''
14
15
16 def find_dupes(source):
17 md5s = OrderedDict()
18 for p, f in UnpackFinder(source):
19 content = f.open().read()
20 m = hashlib.md5(content).digest()
21 if not m in md5s:
22 md5s[m] = (len(content), [])
23 md5s[m][1].append(p)
24 total = 0
25 num_dupes = 0
26 for m, (size, paths) in md5s.iteritems():
27 if len(paths) > 1:
28 print 'Duplicates %d bytes%s:' % (size,
29 ' (%d times)' % (len(paths) - 1) if len(paths) > 2 else '')
30 print ''.join(' %s\n' % p for p in paths)
31 total += (len(paths) - 1) * size
32 num_dupes += 1
33 if num_dupes:
34 print "WARNING: Found %d duplicated files taking %d bytes" % \
35 (num_dupes, total) + " (uncompressed)"
36
37
38 def main():
39 if len(sys.argv) != 2:
40 import os
41 print >>sys.stderr, "Usage: %s directory" % \
42 os.path.basename(sys.argv[0])
43 sys.exit(1)
44
45 find_dupes(sys.argv[1])
46
47 if __name__ == "__main__":
48 main()

mercurial