toolkit/mozapps/installer/find-dupes.py

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/toolkit/mozapps/installer/find-dupes.py	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,48 @@
     1.4 +# This Source Code Form is subject to the terms of the Mozilla Public
     1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.7 +
     1.8 +import sys
     1.9 +import hashlib
    1.10 +from mozpack.packager.unpack import UnpackFinder
    1.11 +from collections import OrderedDict
    1.12 +
    1.13 +'''
    1.14 +Find files duplicated in a given packaged directory, independently of its
    1.15 +package format.
    1.16 +'''
    1.17 +
    1.18 +
    1.19 +def find_dupes(source):
    1.20 +    md5s = OrderedDict()
    1.21 +    for p, f in UnpackFinder(source):
    1.22 +        content = f.open().read()
    1.23 +        m = hashlib.md5(content).digest()
    1.24 +        if not m in md5s:
    1.25 +            md5s[m] = (len(content), [])
    1.26 +        md5s[m][1].append(p)
    1.27 +    total = 0
    1.28 +    num_dupes = 0
    1.29 +    for m, (size, paths) in md5s.iteritems():
    1.30 +        if len(paths) > 1:
    1.31 +            print 'Duplicates %d bytes%s:' % (size,
    1.32 +                  ' (%d times)' % (len(paths) - 1) if len(paths) > 2 else '')
    1.33 +            print ''.join('  %s\n' % p for p in paths)
    1.34 +            total += (len(paths) - 1) * size
    1.35 +            num_dupes += 1
    1.36 +    if num_dupes:
    1.37 +        print "WARNING: Found %d duplicated files taking %d bytes" % \
    1.38 +              (num_dupes, total) + " (uncompressed)"
    1.39 +
    1.40 +
    1.41 +def main():
    1.42 +    if len(sys.argv) != 2:
    1.43 +        import os
    1.44 +        print >>sys.stderr, "Usage: %s directory" % \
    1.45 +                            os.path.basename(sys.argv[0])
    1.46 +        sys.exit(1)
    1.47 +
    1.48 +    find_dupes(sys.argv[1])
    1.49 +
    1.50 +if __name__ == "__main__":
    1.51 +    main()

mercurial