1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/build/unix/build-clang/tooltool.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,561 @@ 1.4 +#!/usr/bin/env python 1.5 + 1.6 +#tooltool is a lookaside cache implemented in Python 1.7 +#Copyright (C) 2011 John H. Ford <john@johnford.info> 1.8 +# 1.9 +#This program is free software; you can redistribute it and/or 1.10 +#modify it under the terms of the GNU General Public License 1.11 +#as published by the Free Software Foundation version 2 1.12 +# 1.13 +#This program is distributed in the hope that it will be useful, 1.14 +#but WITHOUT ANY WARRANTY; without even the implied warranty of 1.15 +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1.16 +#GNU General Public License for more details. 1.17 +# 1.18 +#You should have received a copy of the GNU General Public License 1.19 +#along with this program; if not, write to the Free Software 1.20 +#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 1.21 + 1.22 +# An manifest file specifies files in that directory that are stored 1.23 +# elsewhere. This file should only contain file in the directory 1.24 +# which the manifest file resides in and it should be called 'manifest.manifest' 1.25 + 1.26 +__version__ = '1' 1.27 + 1.28 +import json 1.29 +import os 1.30 +import optparse 1.31 +import logging 1.32 +import hashlib 1.33 +import urllib2 1.34 +import ConfigParser 1.35 + 1.36 +log = logging.getLogger(__name__) 1.37 + 1.38 +class FileRecordJSONEncoderException(Exception): pass 1.39 +class InvalidManifest(Exception): pass 1.40 +class ExceptionWithFilename(Exception): 1.41 + def __init__(self, filename): 1.42 + Exception.__init__(self) 1.43 + self.filename = filename 1.44 + 1.45 +class DigestMismatchException(ExceptionWithFilename): pass 1.46 +class MissingFileException(ExceptionWithFilename): pass 1.47 + 1.48 +class FileRecord(object): 1.49 + def __init__(self, filename, size, digest, algorithm): 1.50 + object.__init__(self) 1.51 + self.filename = filename 1.52 + self.size = size 1.53 + self.digest = digest 1.54 + self.algorithm = algorithm 1.55 + log.debug("creating %s 0x%x" % (self.__class__.__name__, id(self))) 1.56 + 1.57 + def __eq__(self, other): 1.58 + if self is other: 1.59 + return True 1.60 + if self.filename == other.filename and \ 1.61 + self.size == other.size and \ 1.62 + self.digest == other.digest and \ 1.63 + self.algorithm == other.algorithm: 1.64 + return True 1.65 + else: 1.66 + return False 1.67 + 1.68 + def __ne__(self, other): 1.69 + return not self.__eq__(other) 1.70 + 1.71 + def __str__(self): 1.72 + return repr(self) 1.73 + 1.74 + def __repr__(self): 1.75 + return "%s.%s(filename='%s', size='%s', digest='%s', algorithm='%s')" % (__name__, 1.76 + self.__class__.__name__, 1.77 + self.filename, self.size, self.digest, self.algorithm) 1.78 + 1.79 + def present(self): 1.80 + # Doesn't check validity 1.81 + return os.path.exists(self.filename) 1.82 + 1.83 + def validate_size(self): 1.84 + if self.present(): 1.85 + return self.size == os.path.getsize(self.filename) 1.86 + else: 1.87 + log.debug("trying to validate size on a missing file, %s", self.filename) 1.88 + raise MissingFileException(filename=self.filename) 1.89 + 1.90 + def validate_digest(self): 1.91 + if self.present(): 1.92 + with open(self.filename, 'rb') as f: 1.93 + return self.digest == digest_file(f, self.algorithm) 1.94 + else: 1.95 + log.debug("trying to validate digest on a missing file, %s', self.filename") 1.96 + raise MissingFileException(filename=self.filename) 1.97 + 1.98 + def validate(self): 1.99 + if self.validate_size(): 1.100 + if self.validate_digest(): 1.101 + return True 1.102 + return False 1.103 + 1.104 + def describe(self): 1.105 + if self.present() and self.validate(): 1.106 + return "'%s' is present and valid" % self.filename 1.107 + elif self.present(): 1.108 + return "'%s' is present and invalid" % self.filename 1.109 + else: 1.110 + return "'%s' is absent" % self.filename 1.111 + 1.112 + 1.113 +def create_file_record(filename, algorithm): 1.114 + fo = open(filename, 'rb') 1.115 + stored_filename = os.path.split(filename)[1] 1.116 + fr = FileRecord(stored_filename, os.path.getsize(filename), digest_file(fo, algorithm), algorithm) 1.117 + fo.close() 1.118 + return fr 1.119 + 1.120 + 1.121 +class FileRecordJSONEncoder(json.JSONEncoder): 1.122 + def encode_file_record(self, obj): 1.123 + if not issubclass(type(obj), FileRecord): 1.124 + err = "FileRecordJSONEncoder is only for FileRecord and lists of FileRecords, not %s" % obj.__class__.__name__ 1.125 + log.warn(err) 1.126 + raise FileRecordJSONEncoderException(err) 1.127 + else: 1.128 + return {'filename': obj.filename, 'size': obj.size, 'algorithm': obj.algorithm, 'digest': obj.digest} 1.129 + 1.130 + def default(self, f): 1.131 + if issubclass(type(f), list): 1.132 + record_list = [] 1.133 + for i in f: 1.134 + record_list.append(self.encode_file_record(i)) 1.135 + return record_list 1.136 + else: 1.137 + return self.encode_file_record(f) 1.138 + 1.139 + 1.140 +class FileRecordJSONDecoder(json.JSONDecoder): 1.141 + """I help the json module materialize a FileRecord from 1.142 + a JSON file. I understand FileRecords and lists of 1.143 + FileRecords. I ignore things that I don't expect for now""" 1.144 + # TODO: make this more explicit in what it's looking for 1.145 + # and error out on unexpected things 1.146 + def process_file_records(self, obj): 1.147 + if isinstance(obj, list): 1.148 + record_list = [] 1.149 + for i in obj: 1.150 + record = self.process_file_records(i) 1.151 + if issubclass(type(record), FileRecord): 1.152 + record_list.append(record) 1.153 + return record_list 1.154 + if isinstance(obj, dict) and \ 1.155 + len(obj.keys()) == 4 and \ 1.156 + obj.has_key('filename') and \ 1.157 + obj.has_key('size') and \ 1.158 + obj.has_key('algorithm') and \ 1.159 + obj.has_key('digest'): 1.160 + rv = FileRecord(obj['filename'], obj['size'], obj['digest'], obj['algorithm']) 1.161 + log.debug("materialized %s" % rv) 1.162 + return rv 1.163 + return obj 1.164 + 1.165 + def decode(self, s): 1.166 + decoded = json.JSONDecoder.decode(self, s) 1.167 + rv = self.process_file_records(decoded) 1.168 + return rv 1.169 + 1.170 + 1.171 +class Manifest(object): 1.172 + 1.173 + valid_formats = ('json',) 1.174 + 1.175 + def __init__(self, file_records=[]): 1.176 + self.file_records = file_records 1.177 + 1.178 + def __eq__(self, other): 1.179 + if self is other: 1.180 + return True 1.181 + if len(self.file_records) != len(other.file_records): 1.182 + log.debug('Manifests differ in number of files') 1.183 + return False 1.184 + #TODO: Lists in a different order should be equal 1.185 + for record in range(0,len(self.file_records)): 1.186 + if self.file_records[record] != other.file_records[record]: 1.187 + log.debug('FileRecords differ, %s vs %s' % (self.file_records[record], 1.188 + other.file_records[record])) 1.189 + return False 1.190 + return True 1.191 + 1.192 + def __deepcopy__(self, memo): 1.193 + # This is required for a deep copy 1.194 + return Manifest(self.file_records[:]) 1.195 + 1.196 + def __copy__(self): 1.197 + return Manifest(self.file_records) 1.198 + 1.199 + def copy(self): 1.200 + return Manifest(self.file_records[:]) 1.201 + 1.202 + def present(self): 1.203 + return all(i.present() for i in self.file_records) 1.204 + 1.205 + def validate_sizes(self): 1.206 + return all(i.validate_size() for i in self.file_records) 1.207 + 1.208 + def validate_digests(self): 1.209 + return all(i.validate_digest() for i in self.file_records) 1.210 + 1.211 + def validate(self): 1.212 + return all(i.validate() for i in self.file_records) 1.213 + 1.214 + def sort(self): 1.215 + #TODO: WRITE TESTS 1.216 + self.file_records.sort(key=lambda x: x.size) 1.217 + 1.218 + def load(self, data_file, fmt='json'): 1.219 + assert fmt in self.valid_formats 1.220 + if fmt == 'json': 1.221 + try: 1.222 + self.file_records.extend(json.load(data_file, cls=FileRecordJSONDecoder)) 1.223 + self.sort() 1.224 + except ValueError: 1.225 + raise InvalidManifest("trying to read invalid manifest file") 1.226 + 1.227 + def loads(self, data_string, fmt='json'): 1.228 + assert fmt in self.valid_formats 1.229 + if fmt == 'json': 1.230 + try: 1.231 + self.file_records.extend(json.loads(data_string, cls=FileRecordJSONDecoder)) 1.232 + self.sort() 1.233 + except ValueError: 1.234 + raise InvalidManifest("trying to read invalid manifest file") 1.235 + 1.236 + def dump(self, output_file, fmt='json'): 1.237 + assert fmt in self.valid_formats 1.238 + self.sort() 1.239 + if fmt == 'json': 1.240 + rv = json.dump(self.file_records, output_file, indent=0, cls=FileRecordJSONEncoder) 1.241 + print >> output_file, '' 1.242 + return rv 1.243 + 1.244 + def dumps(self, fmt='json'): 1.245 + assert fmt in self.valid_formats 1.246 + self.sort() 1.247 + if fmt == 'json': 1.248 + return json.dumps(self.file_records, cls=FileRecordJSONEncoder) 1.249 + 1.250 + 1.251 +def digest_file(f, a): 1.252 + """I take a file like object 'f' and return a hex-string containing 1.253 + of the result of the algorithm 'a' applied to 'f'.""" 1.254 + h = hashlib.new(a) 1.255 + chunk_size = 1024*10 1.256 + data = f.read(chunk_size) 1.257 + while data: 1.258 + h.update(data) 1.259 + data = f.read(chunk_size) 1.260 + if hasattr(f, 'name'): 1.261 + log.debug('hashed %s with %s to be %s', f.name, a, h.hexdigest()) 1.262 + else: 1.263 + log.debug('hashed a file with %s to be %s', a, h.hexdigest()) 1.264 + return h.hexdigest() 1.265 + 1.266 +# TODO: write tests for this function 1.267 +def open_manifest(manifest_file): 1.268 + """I know how to take a filename and load it into a Manifest object""" 1.269 + if os.path.exists(manifest_file): 1.270 + manifest = Manifest() 1.271 + with open(manifest_file) as f: 1.272 + manifest.load(f) 1.273 + log.debug("loaded manifest from file '%s'" % manifest_file) 1.274 + return manifest 1.275 + else: 1.276 + log.debug("tried to load absent file '%s' as manifest" % manifest_file) 1.277 + raise InvalidManifest("manifest file '%s' does not exist" % manifest_file) 1.278 + 1.279 +# TODO: write tests for this function 1.280 +def list_manifest(manifest_file): 1.281 + """I know how print all the files in a location""" 1.282 + try: 1.283 + manifest = open_manifest(manifest_file) 1.284 + except InvalidManifest: 1.285 + log.error("failed to load manifest file at '%s'" % manifest_file) 1.286 + return False 1.287 + for f in manifest.file_records: 1.288 + print "%s\t%s\t%s" % ("P" if f.present() else "-", 1.289 + "V" if f.present() and f.validate() else "-", 1.290 + f.filename) 1.291 + return True 1.292 + 1.293 +def validate_manifest(manifest_file): 1.294 + """I validate that all files in a manifest are present and valid but 1.295 + don't fetch or delete them if they aren't""" 1.296 + try: 1.297 + manifest = open_manifest(manifest_file) 1.298 + except InvalidManifest: 1.299 + log.error("failed to load manifest file at '%s'" % manifest_file) 1.300 + return False 1.301 + invalid_files = [] 1.302 + absent_files = [] 1.303 + for f in manifest.file_records: 1.304 + if not f.present(): 1.305 + absent_files.append(f) 1.306 + else: 1.307 + if not f.validate(): 1.308 + invalid_files.append(f) 1.309 + if len(invalid_files + absent_files) == 0: 1.310 + return True 1.311 + else: 1.312 + return False 1.313 + 1.314 +# TODO: write tests for this function 1.315 +def add_files(manifest_file, algorithm, filenames): 1.316 + # returns True if all files successfully added, False if not 1.317 + # and doesn't catch library Exceptions. If any files are already 1.318 + # tracked in the manifest, return will be False because they weren't 1.319 + # added 1.320 + all_files_added = True 1.321 + # Create a old_manifest object to add to 1.322 + if os.path.exists(manifest_file): 1.323 + old_manifest = open_manifest(manifest_file) 1.324 + else: 1.325 + old_manifest = Manifest() 1.326 + log.debug("creating a new manifest file") 1.327 + new_manifest = Manifest() # use a different manifest for the output 1.328 + for filename in filenames: 1.329 + log.debug("adding %s" % filename) 1.330 + path, name = os.path.split(filename) 1.331 + new_fr = create_file_record(filename, algorithm) 1.332 + log.debug("appending a new file record to manifest file") 1.333 + add = True 1.334 + for fr in old_manifest.file_records: 1.335 + log.debug("manifest file has '%s'" % "', ".join([x.filename for x in old_manifest.file_records])) 1.336 + if new_fr == fr and new_fr.validate(): 1.337 + # TODO: Decide if this case should really cause a False return 1.338 + log.info("file already in old_manifest file and matches") 1.339 + add = False 1.340 + elif new_fr == fr and not new_fr.validate(): 1.341 + log.error("file already in old_manifest file but is invalid") 1.342 + add = False 1.343 + if filename == fr.filename: 1.344 + log.error("manifest already contains file named %s" % filename) 1.345 + add = False 1.346 + if add: 1.347 + new_manifest.file_records.append(new_fr) 1.348 + log.debug("added '%s' to manifest" % filename) 1.349 + else: 1.350 + all_files_added = False 1.351 + with open(manifest_file, 'wb') as output: 1.352 + new_manifest.dump(output, fmt='json') 1.353 + return all_files_added 1.354 + 1.355 + 1.356 +# TODO: write tests for this function 1.357 +def fetch_file(base_url, file_record, overwrite=False, grabchunk=1024*4): 1.358 + # A file which is requested to be fetched that exists locally will be hashed. 1.359 + # If the hash matches the requested file's hash, nothing will be done and the 1.360 + # function will return. If the function is told to overwrite and there is a 1.361 + # digest mismatch, the exiting file will be overwritten 1.362 + if file_record.present(): 1.363 + if file_record.validate(): 1.364 + log.info("existing '%s' is valid, not fetching" % file_record.filename) 1.365 + return True 1.366 + if overwrite: 1.367 + log.info("overwriting '%s' as requested" % file_record.filename) 1.368 + else: 1.369 + # All of the following is for a useful error message 1.370 + with open(file_record.filename, 'rb') as f: 1.371 + d = digest_file(f, file_record.algorithm) 1.372 + log.error("digest mismatch between manifest(%s...) and local file(%s...)" % \ 1.373 + (file_record.digest[:8], d[:8])) 1.374 + log.debug("full digests: manifest (%s) local file (%s)" % (file_record.digest, d)) 1.375 + # Let's bail! 1.376 + return False 1.377 + 1.378 + # Generate the URL for the file on the server side 1.379 + url = "%s/%s/%s" % (base_url, file_record.algorithm, file_record.digest) 1.380 + 1.381 + log.debug("fetching from '%s'" % url) 1.382 + 1.383 + # TODO: This should be abstracted to make generic retreival protocol handling easy 1.384 + # Well, the file doesn't exist locally. Lets fetch it. 1.385 + try: 1.386 + f = urllib2.urlopen(url) 1.387 + log.debug("opened %s for reading" % url) 1.388 + with open(file_record.filename, 'wb') as out: 1.389 + k = True 1.390 + size = 0 1.391 + while k: 1.392 + # TODO: print statistics as file transfers happen both for info and to stop 1.393 + # buildbot timeouts 1.394 + indata = f.read(grabchunk) 1.395 + out.write(indata) 1.396 + size += len(indata) 1.397 + if indata == '': 1.398 + k = False 1.399 + if size != file_record.size: 1.400 + log.error("transfer from %s to %s failed due to a difference of %d bytes" % (url, 1.401 + file_record.filename, file_record.size - size)) 1.402 + return False 1.403 + log.info("fetched %s" % file_record.filename) 1.404 + except (urllib2.URLError, urllib2.HTTPError) as e: 1.405 + log.error("failed to fetch '%s': %s" % (file_record.filename, e), 1.406 + exc_info=True) 1.407 + return False 1.408 + except IOError: 1.409 + log.error("failed to write to '%s'" % file_record.filename, 1.410 + exc_info=True) 1.411 + return False 1.412 + return True 1.413 + 1.414 + 1.415 +# TODO: write tests for this function 1.416 +def fetch_files(manifest_file, base_url, overwrite, filenames=[]): 1.417 + # Lets load the manifest file 1.418 + try: 1.419 + manifest = open_manifest(manifest_file) 1.420 + except InvalidManifest: 1.421 + log.error("failed to load manifest file at '%s'" % manifest_file) 1.422 + return False 1.423 + # We want to track files that fail to be fetched as well as 1.424 + # files that are fetched 1.425 + failed_files = [] 1.426 + 1.427 + # Lets go through the manifest and fetch the files that we want 1.428 + fetched_files = [] 1.429 + for f in manifest.file_records: 1.430 + if f.filename in filenames or len(filenames) == 0: 1.431 + log.debug("fetching %s" % f.filename) 1.432 + if fetch_file(base_url, f, overwrite): 1.433 + fetched_files.append(f) 1.434 + else: 1.435 + failed_files.append(f.filename) 1.436 + else: 1.437 + log.debug("skipping %s" % f.filename) 1.438 + 1.439 + # Even if we get the file, lets ensure that it matches what the 1.440 + # manifest specified 1.441 + for localfile in fetched_files: 1.442 + if not localfile.validate(): 1.443 + log.error("'%s'" % localfile.describe()) 1.444 + 1.445 + # If we failed to fetch or validate a file, we need to fail 1.446 + if len(failed_files) > 0: 1.447 + log.error("The following files failed: '%s'" % "', ".join(failed_files)) 1.448 + return False 1.449 + return True 1.450 + 1.451 + 1.452 +# TODO: write tests for this function 1.453 +def process_command(options, args): 1.454 + """ I know how to take a list of program arguments and 1.455 + start doing the right thing with them""" 1.456 + cmd = args[0] 1.457 + cmd_args = args[1:] 1.458 + log.debug("processing '%s' command with args '%s'" % (cmd, '", "'.join(cmd_args))) 1.459 + log.debug("using options: %s" % options) 1.460 + if cmd == 'list': 1.461 + return list_manifest(options['manifest']) 1.462 + if cmd == 'validate': 1.463 + return validate_manifest(options['manifest']) 1.464 + elif cmd == 'add': 1.465 + return add_files(options['manifest'], options['algorithm'], cmd_args) 1.466 + elif cmd == 'fetch': 1.467 + if not options.has_key('base_url') or options.get('base_url') is None: 1.468 + log.critical('fetch command requires url option') 1.469 + return False 1.470 + return fetch_files(options['manifest'], options['base_url'], options['overwrite'], cmd_args) 1.471 + else: 1.472 + log.critical('command "%s" is not implemented' % cmd) 1.473 + return False 1.474 + 1.475 +# fetching api: 1.476 +# http://hostname/algorithm/hash 1.477 +# example: http://people.mozilla.org/sha1/1234567890abcedf 1.478 +# This will make it possible to have the server allow clients to 1.479 +# use different algorithms than what was uploaded to the server 1.480 + 1.481 +# TODO: Implement the following features: 1.482 +# -optimization: do small files first, justification is that they are faster 1.483 +# and cause a faster failure if they are invalid 1.484 +# -store permissions 1.485 +# -local renames i.e. call the file one thing on the server and 1.486 +# something different locally 1.487 +# -deal with the cases: 1.488 +# -local data matches file requested with different filename 1.489 +# -two different files with same name, different hash 1.490 +# -?only ever locally to digest as filename, symlink to real name 1.491 +# -?maybe deal with files as a dir of the filename with all files in that dir as the versions of that file 1.492 +# - e.g. ./python-2.6.7.dmg/0123456789abcdef and ./python-2.6.7.dmg/abcdef0123456789 1.493 + 1.494 +def main(): 1.495 + # Set up logging, for now just to the console 1.496 + ch = logging.StreamHandler() 1.497 + cf = logging.Formatter("%(levelname)s - %(message)s") 1.498 + ch.setFormatter(cf) 1.499 + 1.500 + # Set up option parsing 1.501 + parser = optparse.OptionParser() 1.502 + # I wish there was a way to say "only allow args to be 1.503 + # sequential and at the end of the argv. 1.504 + # OH! i could step through sys.argv and check for things starting without -/-- before things starting with them 1.505 + parser.add_option('-q', '--quiet', default=False, 1.506 + dest='quiet', action='store_true') 1.507 + parser.add_option('-v', '--verbose', default=False, 1.508 + dest='verbose', action='store_true') 1.509 + parser.add_option('-m', '--manifest', default='manifest.tt', 1.510 + dest='manifest', action='store', 1.511 + help='specify the manifest file to be operated on') 1.512 + parser.add_option('-d', '--algorithm', default='sha512', 1.513 + dest='algorithm', action='store', 1.514 + help='openssl hashing algorithm to use') 1.515 + parser.add_option('-o', '--overwrite', default=False, 1.516 + dest='overwrite', action='store_true', 1.517 + help='if fetching, remote copy will overwrite a local copy that is different. ') 1.518 + parser.add_option('--url', dest='base_url', action='store', 1.519 + help='base url for fetching files') 1.520 + parser.add_option('--ignore-config-files', action='store_true', default=False, 1.521 + dest='ignore_cfg_files') 1.522 + (options_obj, args) = parser.parse_args() 1.523 + # Dictionaries are easier to work with 1.524 + options = vars(options_obj) 1.525 + 1.526 + 1.527 + # Use some of the option parser to figure out application 1.528 + # log level 1.529 + if options.get('verbose'): 1.530 + ch.setLevel(logging.DEBUG) 1.531 + elif options.get('quiet'): 1.532 + ch.setLevel(logging.ERROR) 1.533 + else: 1.534 + ch.setLevel(logging.INFO) 1.535 + log.addHandler(ch) 1.536 + 1.537 + cfg_file = ConfigParser.SafeConfigParser() 1.538 + if not options.get("ignore_cfg_files"): 1.539 + read_files = cfg_file.read(['/etc/tooltool', os.path.expanduser('~/.tooltool'), 1.540 + os.path.join(os.getcwd(), '.tooltool')]) 1.541 + log.debug("read in the config files '%s'" % '", '.join(read_files)) 1.542 + else: 1.543 + log.debug("skipping config files") 1.544 + 1.545 + for option in ('base_url', 'algorithm'): 1.546 + if not options.get(option): 1.547 + try: 1.548 + options[option] = cfg_file.get('general', option) 1.549 + log.debug("read '%s' as '%s' from cfg_file" % (option, options[option])) 1.550 + except (ConfigParser.NoSectionError, ConfigParser.NoOptionError) as e: 1.551 + log.debug("%s in config file" % e, exc_info=True) 1.552 + 1.553 + if not options.has_key('manifest'): 1.554 + parser.error("no manifest file specified") 1.555 + 1.556 + if len(args) < 1: 1.557 + parser.error('You must specify a command') 1.558 + exit(0 if process_command(options, args) else 1) 1.559 + 1.560 +if __name__ == "__main__": 1.561 + main() 1.562 +else: 1.563 + log.addHandler(logging.NullHandler()) 1.564 + #log.addHandler(logging.StreamHandler())