build/checksums.py

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rwxr-xr-x

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 #!/usr/bin/python
michael@0 2 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 5
michael@0 6 from __future__ import with_statement
michael@0 7
michael@0 8 from optparse import OptionParser
michael@0 9 import logging
michael@0 10 import os
michael@0 11 try:
michael@0 12 import hashlib
michael@0 13 except:
michael@0 14 hashlib = None
michael@0 15
michael@0 16 def digest_file(filename, digest, chunk_size=1024):
michael@0 17 '''Produce a checksum for the file specified by 'filename'. 'filename'
michael@0 18 is a string path to a file that is opened and read in this function. The
michael@0 19 checksum algorithm is specified by 'digest' and is a valid OpenSSL
michael@0 20 algorithm. If the digest used is not valid or Python's hashlib doesn't
michael@0 21 work, the None object will be returned instead. The size of blocks
michael@0 22 that this function will read from the file object it opens based on
michael@0 23 'filename' can be specified by 'chunk_size', which defaults to 1K'''
michael@0 24 assert not os.path.isdir(filename), 'this function only works with files'
michael@0 25 logger = logging.getLogger('checksums.py')
michael@0 26 if hashlib is not None:
michael@0 27 logger.debug('Creating new %s object' % digest)
michael@0 28 h = hashlib.new(digest)
michael@0 29 with open(filename, 'rb') as f:
michael@0 30 while True:
michael@0 31 data = f.read(chunk_size)
michael@0 32 if not data:
michael@0 33 logger.debug('Finished reading in file')
michael@0 34 break
michael@0 35 h.update(data)
michael@0 36 hash = h.hexdigest()
michael@0 37 logger.debug('Hash for %s is %s' % (filename, hash))
michael@0 38 return hash
michael@0 39 else:
michael@0 40 # In this case we could subprocess.Popen and .communicate with
michael@0 41 # sha1sum or md5sum
michael@0 42 logger.warn('The python module for hashlib is missing!')
michael@0 43 return None
michael@0 44
michael@0 45
michael@0 46 def process_files(files, output_filename, digests, strip):
michael@0 47 '''This function takes a list of file names, 'files'. It will then
michael@0 48 compute the checksum for each of the files by opening the files.
michael@0 49 Once each file is read and its checksum is computed, this function
michael@0 50 will write the information to the file specified by 'output_filename'.
michael@0 51 The path written in the output file will have anything specified by 'strip'
michael@0 52 removed from the path. The output file is closed before returning nothing
michael@0 53 The algorithm to compute checksums with can be specified by 'digests'
michael@0 54 and needs to be a list of valid OpenSSL algorithms.
michael@0 55
michael@0 56 The output file is written in the format:
michael@0 57 <hash> <algorithm> <filesize> <filepath>
michael@0 58 Example:
michael@0 59 d1fa09a<snip>e4220 sha1 14250744 firefox-4.0b6pre.en-US.mac64.dmg
michael@0 60 '''
michael@0 61
michael@0 62 logger = logging.getLogger('checksums.py')
michael@0 63 if os.path.exists(output_filename):
michael@0 64 logger.debug('Overwriting existing checksums file "%s"' %
michael@0 65 output_filename)
michael@0 66 else:
michael@0 67 logger.debug('Creating a new checksums file "%s"' % output_filename)
michael@0 68 with open(output_filename, 'w+') as output:
michael@0 69 for file in files:
michael@0 70 if os.path.isdir(file):
michael@0 71 logger.warn('%s is a directory, skipping' % file)
michael@0 72 else:
michael@0 73 for digest in digests:
michael@0 74 hash = digest_file(file, digest)
michael@0 75 if hash is None:
michael@0 76 logger.warn('Unable to generate a hash for %s. ' +
michael@0 77 'Skipping.' % file)
michael@0 78 continue
michael@0 79 if file.startswith(strip):
michael@0 80 short_file = file[len(strip):]
michael@0 81 short_file = short_file.lstrip('/')
michael@0 82 else:
michael@0 83 short_file = file
michael@0 84 print >>output, '%s %s %s %s' % (hash, digest,
michael@0 85 os.path.getsize(file),
michael@0 86 short_file)
michael@0 87
michael@0 88 def setup_logging(level=logging.DEBUG):
michael@0 89 '''This function sets up the logging module using a speficiable logging
michael@0 90 module logging level. The default log level is DEBUG.
michael@0 91
michael@0 92 The output is in the format:
michael@0 93 <level> - <message>
michael@0 94 Example:
michael@0 95 DEBUG - Finished reading in file
michael@0 96 '''
michael@0 97
michael@0 98 logger = logging.getLogger('checksums.py')
michael@0 99 logger.setLevel(logging.DEBUG)
michael@0 100 handler = logging.StreamHandler()
michael@0 101 handler.setLevel(level)
michael@0 102 formatter = logging.Formatter("%(levelname)s - %(message)s")
michael@0 103 handler.setFormatter(formatter)
michael@0 104 logger.addHandler(handler)
michael@0 105
michael@0 106 def main():
michael@0 107 '''This is a main function that parses arguments, sets up logging
michael@0 108 and generates a checksum file'''
michael@0 109 # Parse command line arguments
michael@0 110 parser = OptionParser()
michael@0 111 parser.add_option('-d', '--digest', help='checksum algorithm to use',
michael@0 112 action='append', dest='digests')
michael@0 113 parser.add_option('-o', '--output', help='output file to use',
michael@0 114 action='store', dest='outfile', default='checksums')
michael@0 115 parser.add_option('-v', '--verbose',
michael@0 116 help='Be noisy (takes precedence over quiet)',
michael@0 117 action='store_true', dest='verbose', default=False)
michael@0 118 parser.add_option('-q', '--quiet', help='Be quiet', action='store_true',
michael@0 119 dest='quiet', default=False)
michael@0 120 parser.add_option('-s', '--strip',
michael@0 121 help='strip this path from the filenames',
michael@0 122 dest='strip', default=os.getcwd())
michael@0 123 options, args = parser.parse_args()
michael@0 124
michael@0 125 #Figure out which logging level to use
michael@0 126 if options.verbose:
michael@0 127 loglevel = logging.DEBUG
michael@0 128 elif options.quiet:
michael@0 129 loglevel = logging.ERROR
michael@0 130 else:
michael@0 131 loglevel = logging.INFO
michael@0 132
michael@0 133 #Set up logging
michael@0 134 setup_logging(loglevel)
michael@0 135 logger = logging.getLogger('checksums.py')
michael@0 136
michael@0 137 # Validate the digest type to use
michael@0 138 if not options.digests:
michael@0 139 options.digests = ['sha1']
michael@0 140 try:
michael@0 141 for digest in options.digests:
michael@0 142 hashlib.new(digest)
michael@0 143 except ValueError, ve:
michael@0 144 logger.error('Could not create a "%s" hash object (%s)' %
michael@0 145 (digest, ve.args[0]))
michael@0 146 exit(1)
michael@0 147
michael@0 148 # Validate the files to checksum
michael@0 149 files = []
michael@0 150 for i in args:
michael@0 151 if os.path.exists(i):
michael@0 152 files.append(i)
michael@0 153 else:
michael@0 154 logger.info('File "%s" was not found on the filesystem' % i)
michael@0 155 process_files(files, options.outfile, options.digests, options.strip)
michael@0 156
michael@0 157 if __name__ == '__main__':
michael@0 158 main()

mercurial