michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: import sys michael@0: import hashlib michael@0: from mozpack.packager.unpack import UnpackFinder michael@0: from collections import OrderedDict michael@0: michael@0: ''' michael@0: Find files duplicated in a given packaged directory, independently of its michael@0: package format. michael@0: ''' michael@0: michael@0: michael@0: def find_dupes(source): michael@0: md5s = OrderedDict() michael@0: for p, f in UnpackFinder(source): michael@0: content = f.open().read() michael@0: m = hashlib.md5(content).digest() michael@0: if not m in md5s: michael@0: md5s[m] = (len(content), []) michael@0: md5s[m][1].append(p) michael@0: total = 0 michael@0: num_dupes = 0 michael@0: for m, (size, paths) in md5s.iteritems(): michael@0: if len(paths) > 1: michael@0: print 'Duplicates %d bytes%s:' % (size, michael@0: ' (%d times)' % (len(paths) - 1) if len(paths) > 2 else '') michael@0: print ''.join(' %s\n' % p for p in paths) michael@0: total += (len(paths) - 1) * size michael@0: num_dupes += 1 michael@0: if num_dupes: michael@0: print "WARNING: Found %d duplicated files taking %d bytes" % \ michael@0: (num_dupes, total) + " (uncompressed)" michael@0: michael@0: michael@0: def main(): michael@0: if len(sys.argv) != 2: michael@0: import os michael@0: print >>sys.stderr, "Usage: %s directory" % \ michael@0: os.path.basename(sys.argv[0]) michael@0: sys.exit(1) michael@0: michael@0: find_dupes(sys.argv[1]) michael@0: michael@0: if __name__ == "__main__": michael@0: main()