build/checksums.py

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rwxr-xr-x

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 #!/usr/bin/python
     2 # This Source Code Form is subject to the terms of the Mozilla Public
     3 # License, v. 2.0. If a copy of the MPL was not distributed with this
     4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     6 from __future__ import with_statement
     8 from optparse import OptionParser
     9 import logging
    10 import os
    11 try:
    12     import hashlib
    13 except:
    14     hashlib = None
    16 def digest_file(filename, digest, chunk_size=1024):
    17     '''Produce a checksum for the file specified by 'filename'.  'filename'
    18     is a string path to a file that is opened and read in this function.  The
    19     checksum algorithm is specified by 'digest' and is a valid OpenSSL
    20     algorithm.  If the digest used is not valid or Python's hashlib doesn't
    21     work, the None object will be returned instead.  The size of blocks
    22     that this function will read from the file object it opens based on
    23     'filename' can be specified by 'chunk_size', which defaults to 1K'''
    24     assert not os.path.isdir(filename), 'this function only works with files'
    25     logger = logging.getLogger('checksums.py')
    26     if hashlib is not None:
    27         logger.debug('Creating new %s object' % digest)
    28         h = hashlib.new(digest)
    29         with open(filename, 'rb') as f:
    30             while True:
    31                 data = f.read(chunk_size)
    32                 if not data:
    33                     logger.debug('Finished reading in file')
    34                     break
    35                 h.update(data)
    36         hash = h.hexdigest()
    37         logger.debug('Hash for %s is %s' % (filename, hash))
    38         return hash
    39     else:
    40         # In this case we could subprocess.Popen and .communicate with
    41         # sha1sum or md5sum
    42         logger.warn('The python module for hashlib is missing!')
    43         return None
    46 def process_files(files, output_filename, digests, strip):
    47     '''This function takes a list of file names, 'files'.  It will then
    48     compute the checksum for each of the files by opening the files.
    49     Once each file is read and its checksum is computed, this function
    50     will write the information to the file specified by 'output_filename'.
    51     The path written in the output file will have anything specified by 'strip'
    52     removed from the path.  The output file is closed before returning nothing
    53     The algorithm to compute checksums with can be specified by 'digests' 
    54     and needs to be a list of valid OpenSSL algorithms.
    56     The output file is written in the format:
    57         <hash> <algorithm> <filesize> <filepath>
    58     Example:
    59         d1fa09a<snip>e4220 sha1 14250744 firefox-4.0b6pre.en-US.mac64.dmg
    60     '''
    62     logger = logging.getLogger('checksums.py')
    63     if os.path.exists(output_filename):
    64         logger.debug('Overwriting existing checksums file "%s"' %
    65                      output_filename)
    66     else:
    67         logger.debug('Creating a new checksums file "%s"' % output_filename)
    68     with open(output_filename, 'w+') as output:
    69         for file in files:
    70             if os.path.isdir(file):
    71                 logger.warn('%s is a directory, skipping' % file)
    72             else:
    73                 for digest in digests:
    74                     hash = digest_file(file, digest)
    75                     if hash is None:
    76                         logger.warn('Unable to generate a hash for %s. ' +
    77                                     'Skipping.' % file)
    78                         continue
    79                     if file.startswith(strip):
    80                         short_file = file[len(strip):]
    81                         short_file = short_file.lstrip('/')
    82                     else:
    83                         short_file = file
    84                     print >>output, '%s %s %s %s' % (hash, digest,
    85                                                      os.path.getsize(file),
    86                                                      short_file)
    88 def setup_logging(level=logging.DEBUG):
    89     '''This function sets up the logging module using a speficiable logging
    90     module logging level.  The default log level is DEBUG.
    92     The output is in the format:
    93         <level> - <message>
    94     Example:
    95         DEBUG - Finished reading in file
    96 '''
    98     logger = logging.getLogger('checksums.py')
    99     logger.setLevel(logging.DEBUG)
   100     handler = logging.StreamHandler()
   101     handler.setLevel(level)
   102     formatter = logging.Formatter("%(levelname)s - %(message)s")
   103     handler.setFormatter(formatter)
   104     logger.addHandler(handler)
   106 def main():
   107     '''This is a main function that parses arguments, sets up logging
   108     and generates a checksum file'''
   109     # Parse command line arguments
   110     parser = OptionParser()
   111     parser.add_option('-d', '--digest', help='checksum algorithm to use',
   112                       action='append', dest='digests')
   113     parser.add_option('-o', '--output', help='output file to use',
   114                       action='store', dest='outfile', default='checksums')
   115     parser.add_option('-v', '--verbose',
   116                       help='Be noisy (takes precedence over quiet)',
   117                       action='store_true', dest='verbose', default=False)
   118     parser.add_option('-q', '--quiet', help='Be quiet', action='store_true',
   119                       dest='quiet', default=False)
   120     parser.add_option('-s', '--strip',
   121                       help='strip this path from the filenames',
   122                       dest='strip', default=os.getcwd())
   123     options, args = parser.parse_args()
   125     #Figure out which logging level to use
   126     if options.verbose:
   127         loglevel = logging.DEBUG
   128     elif options.quiet:
   129         loglevel = logging.ERROR
   130     else:
   131         loglevel = logging.INFO
   133     #Set up logging
   134     setup_logging(loglevel)
   135     logger = logging.getLogger('checksums.py')
   137     # Validate the digest type to use
   138     if not options.digests:
   139         options.digests = ['sha1']
   140     try:
   141         for digest in options.digests:
   142             hashlib.new(digest)
   143     except ValueError, ve:
   144         logger.error('Could not create a "%s" hash object (%s)' %
   145                      (digest, ve.args[0]))
   146         exit(1)
   148     # Validate the files to checksum
   149     files = []
   150     for i in args:
   151         if os.path.exists(i):
   152             files.append(i)
   153         else:
   154             logger.info('File "%s" was not found on the filesystem' % i)
   155     process_files(files, options.outfile, options.digests, options.strip)
   157 if __name__ == '__main__':
   158     main()

mercurial