1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/mozapps/installer/find-dupes.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,48 @@ 1.4 +# This Source Code Form is subject to the terms of the Mozilla Public 1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.7 + 1.8 +import sys 1.9 +import hashlib 1.10 +from mozpack.packager.unpack import UnpackFinder 1.11 +from collections import OrderedDict 1.12 + 1.13 +''' 1.14 +Find files duplicated in a given packaged directory, independently of its 1.15 +package format. 1.16 +''' 1.17 + 1.18 + 1.19 +def find_dupes(source): 1.20 + md5s = OrderedDict() 1.21 + for p, f in UnpackFinder(source): 1.22 + content = f.open().read() 1.23 + m = hashlib.md5(content).digest() 1.24 + if not m in md5s: 1.25 + md5s[m] = (len(content), []) 1.26 + md5s[m][1].append(p) 1.27 + total = 0 1.28 + num_dupes = 0 1.29 + for m, (size, paths) in md5s.iteritems(): 1.30 + if len(paths) > 1: 1.31 + print 'Duplicates %d bytes%s:' % (size, 1.32 + ' (%d times)' % (len(paths) - 1) if len(paths) > 2 else '') 1.33 + print ''.join(' %s\n' % p for p in paths) 1.34 + total += (len(paths) - 1) * size 1.35 + num_dupes += 1 1.36 + if num_dupes: 1.37 + print "WARNING: Found %d duplicated files taking %d bytes" % \ 1.38 + (num_dupes, total) + " (uncompressed)" 1.39 + 1.40 + 1.41 +def main(): 1.42 + if len(sys.argv) != 2: 1.43 + import os 1.44 + print >>sys.stderr, "Usage: %s directory" % \ 1.45 + os.path.basename(sys.argv[0]) 1.46 + sys.exit(1) 1.47 + 1.48 + find_dupes(sys.argv[1]) 1.49 + 1.50 +if __name__ == "__main__": 1.51 + main()