Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 4 | |
michael@0 | 5 | import sys |
michael@0 | 6 | import hashlib |
michael@0 | 7 | from mozpack.packager.unpack import UnpackFinder |
michael@0 | 8 | from collections import OrderedDict |
michael@0 | 9 | |
michael@0 | 10 | ''' |
michael@0 | 11 | Find files duplicated in a given packaged directory, independently of its |
michael@0 | 12 | package format. |
michael@0 | 13 | ''' |
michael@0 | 14 | |
michael@0 | 15 | |
michael@0 | 16 | def find_dupes(source): |
michael@0 | 17 | md5s = OrderedDict() |
michael@0 | 18 | for p, f in UnpackFinder(source): |
michael@0 | 19 | content = f.open().read() |
michael@0 | 20 | m = hashlib.md5(content).digest() |
michael@0 | 21 | if not m in md5s: |
michael@0 | 22 | md5s[m] = (len(content), []) |
michael@0 | 23 | md5s[m][1].append(p) |
michael@0 | 24 | total = 0 |
michael@0 | 25 | num_dupes = 0 |
michael@0 | 26 | for m, (size, paths) in md5s.iteritems(): |
michael@0 | 27 | if len(paths) > 1: |
michael@0 | 28 | print 'Duplicates %d bytes%s:' % (size, |
michael@0 | 29 | ' (%d times)' % (len(paths) - 1) if len(paths) > 2 else '') |
michael@0 | 30 | print ''.join(' %s\n' % p for p in paths) |
michael@0 | 31 | total += (len(paths) - 1) * size |
michael@0 | 32 | num_dupes += 1 |
michael@0 | 33 | if num_dupes: |
michael@0 | 34 | print "WARNING: Found %d duplicated files taking %d bytes" % \ |
michael@0 | 35 | (num_dupes, total) + " (uncompressed)" |
michael@0 | 36 | |
michael@0 | 37 | |
michael@0 | 38 | def main(): |
michael@0 | 39 | if len(sys.argv) != 2: |
michael@0 | 40 | import os |
michael@0 | 41 | print >>sys.stderr, "Usage: %s directory" % \ |
michael@0 | 42 | os.path.basename(sys.argv[0]) |
michael@0 | 43 | sys.exit(1) |
michael@0 | 44 | |
michael@0 | 45 | find_dupes(sys.argv[1]) |
michael@0 | 46 | |
michael@0 | 47 | if __name__ == "__main__": |
michael@0 | 48 | main() |