|
1 #!/usr/bin/python |
|
2 # This Source Code Form is subject to the terms of the Mozilla Public |
|
3 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
5 |
|
6 from __future__ import with_statement |
|
7 |
|
8 from optparse import OptionParser |
|
9 import logging |
|
10 import os |
|
11 try: |
|
12 import hashlib |
|
13 except: |
|
14 hashlib = None |
|
15 |
|
16 def digest_file(filename, digest, chunk_size=1024): |
|
17 '''Produce a checksum for the file specified by 'filename'. 'filename' |
|
18 is a string path to a file that is opened and read in this function. The |
|
19 checksum algorithm is specified by 'digest' and is a valid OpenSSL |
|
20 algorithm. If the digest used is not valid or Python's hashlib doesn't |
|
21 work, the None object will be returned instead. The size of blocks |
|
22 that this function will read from the file object it opens based on |
|
23 'filename' can be specified by 'chunk_size', which defaults to 1K''' |
|
24 assert not os.path.isdir(filename), 'this function only works with files' |
|
25 logger = logging.getLogger('checksums.py') |
|
26 if hashlib is not None: |
|
27 logger.debug('Creating new %s object' % digest) |
|
28 h = hashlib.new(digest) |
|
29 with open(filename, 'rb') as f: |
|
30 while True: |
|
31 data = f.read(chunk_size) |
|
32 if not data: |
|
33 logger.debug('Finished reading in file') |
|
34 break |
|
35 h.update(data) |
|
36 hash = h.hexdigest() |
|
37 logger.debug('Hash for %s is %s' % (filename, hash)) |
|
38 return hash |
|
39 else: |
|
40 # In this case we could subprocess.Popen and .communicate with |
|
41 # sha1sum or md5sum |
|
42 logger.warn('The python module for hashlib is missing!') |
|
43 return None |
|
44 |
|
45 |
|
46 def process_files(files, output_filename, digests, strip): |
|
47 '''This function takes a list of file names, 'files'. It will then |
|
48 compute the checksum for each of the files by opening the files. |
|
49 Once each file is read and its checksum is computed, this function |
|
50 will write the information to the file specified by 'output_filename'. |
|
51 The path written in the output file will have anything specified by 'strip' |
|
52 removed from the path. The output file is closed before returning nothing |
|
53 The algorithm to compute checksums with can be specified by 'digests' |
|
54 and needs to be a list of valid OpenSSL algorithms. |
|
55 |
|
56 The output file is written in the format: |
|
57 <hash> <algorithm> <filesize> <filepath> |
|
58 Example: |
|
59 d1fa09a<snip>e4220 sha1 14250744 firefox-4.0b6pre.en-US.mac64.dmg |
|
60 ''' |
|
61 |
|
62 logger = logging.getLogger('checksums.py') |
|
63 if os.path.exists(output_filename): |
|
64 logger.debug('Overwriting existing checksums file "%s"' % |
|
65 output_filename) |
|
66 else: |
|
67 logger.debug('Creating a new checksums file "%s"' % output_filename) |
|
68 with open(output_filename, 'w+') as output: |
|
69 for file in files: |
|
70 if os.path.isdir(file): |
|
71 logger.warn('%s is a directory, skipping' % file) |
|
72 else: |
|
73 for digest in digests: |
|
74 hash = digest_file(file, digest) |
|
75 if hash is None: |
|
76 logger.warn('Unable to generate a hash for %s. ' + |
|
77 'Skipping.' % file) |
|
78 continue |
|
79 if file.startswith(strip): |
|
80 short_file = file[len(strip):] |
|
81 short_file = short_file.lstrip('/') |
|
82 else: |
|
83 short_file = file |
|
84 print >>output, '%s %s %s %s' % (hash, digest, |
|
85 os.path.getsize(file), |
|
86 short_file) |
|
87 |
|
88 def setup_logging(level=logging.DEBUG): |
|
89 '''This function sets up the logging module using a speficiable logging |
|
90 module logging level. The default log level is DEBUG. |
|
91 |
|
92 The output is in the format: |
|
93 <level> - <message> |
|
94 Example: |
|
95 DEBUG - Finished reading in file |
|
96 ''' |
|
97 |
|
98 logger = logging.getLogger('checksums.py') |
|
99 logger.setLevel(logging.DEBUG) |
|
100 handler = logging.StreamHandler() |
|
101 handler.setLevel(level) |
|
102 formatter = logging.Formatter("%(levelname)s - %(message)s") |
|
103 handler.setFormatter(formatter) |
|
104 logger.addHandler(handler) |
|
105 |
|
106 def main(): |
|
107 '''This is a main function that parses arguments, sets up logging |
|
108 and generates a checksum file''' |
|
109 # Parse command line arguments |
|
110 parser = OptionParser() |
|
111 parser.add_option('-d', '--digest', help='checksum algorithm to use', |
|
112 action='append', dest='digests') |
|
113 parser.add_option('-o', '--output', help='output file to use', |
|
114 action='store', dest='outfile', default='checksums') |
|
115 parser.add_option('-v', '--verbose', |
|
116 help='Be noisy (takes precedence over quiet)', |
|
117 action='store_true', dest='verbose', default=False) |
|
118 parser.add_option('-q', '--quiet', help='Be quiet', action='store_true', |
|
119 dest='quiet', default=False) |
|
120 parser.add_option('-s', '--strip', |
|
121 help='strip this path from the filenames', |
|
122 dest='strip', default=os.getcwd()) |
|
123 options, args = parser.parse_args() |
|
124 |
|
125 #Figure out which logging level to use |
|
126 if options.verbose: |
|
127 loglevel = logging.DEBUG |
|
128 elif options.quiet: |
|
129 loglevel = logging.ERROR |
|
130 else: |
|
131 loglevel = logging.INFO |
|
132 |
|
133 #Set up logging |
|
134 setup_logging(loglevel) |
|
135 logger = logging.getLogger('checksums.py') |
|
136 |
|
137 # Validate the digest type to use |
|
138 if not options.digests: |
|
139 options.digests = ['sha1'] |
|
140 try: |
|
141 for digest in options.digests: |
|
142 hashlib.new(digest) |
|
143 except ValueError, ve: |
|
144 logger.error('Could not create a "%s" hash object (%s)' % |
|
145 (digest, ve.args[0])) |
|
146 exit(1) |
|
147 |
|
148 # Validate the files to checksum |
|
149 files = [] |
|
150 for i in args: |
|
151 if os.path.exists(i): |
|
152 files.append(i) |
|
153 else: |
|
154 logger.info('File "%s" was not found on the filesystem' % i) |
|
155 process_files(files, options.outfile, options.digests, options.strip) |
|
156 |
|
157 if __name__ == '__main__': |
|
158 main() |