The Tor Browser: build/unix/build-clang/tooltool.py@ac0c01689b40

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 #!/usr/bin/env python

     3 #tooltool is a lookaside cache implemented in Python

     4 #Copyright (C) 2011 John H. Ford <john@johnford.info>

5 #

     6 #This program is free software; you can redistribute it and/or

     7 #modify it under the terms of the GNU General Public License

     8 #as published by the Free Software Foundation version 2

9 #

    10 #This program is distributed in the hope that it will be useful,

    11 #but WITHOUT ANY WARRANTY; without even the implied warranty of

    12 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    13 #GNU General Public License for more details.

    14 #

    15 #You should have received a copy of the GNU General Public License

    16 #along with this program; if not, write to the Free Software

    17 #Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

    19 # An manifest file specifies files in that directory that are stored

    20 # elsewhere.  This file should only contain file in the directory

    21 # which the manifest file resides in and it should be called 'manifest.manifest'

    23 __version__ = '1'

    25 import json

    26 import os

    27 import optparse

    28 import logging

    29 import hashlib

    30 import urllib2

    31 import ConfigParser

    33 log = logging.getLogger(__name__)

    35 class FileRecordJSONEncoderException(Exception): pass

    36 class InvalidManifest(Exception): pass

    37 class ExceptionWithFilename(Exception):

    38     def __init__(self, filename):

    39         Exception.__init__(self)

    40         self.filename = filename

    42 class DigestMismatchException(ExceptionWithFilename): pass

    43 class MissingFileException(ExceptionWithFilename): pass

    45 class FileRecord(object):

    46     def __init__(self, filename, size, digest, algorithm):

    47         object.__init__(self)

    48         self.filename = filename

    49         self.size = size

    50         self.digest = digest

    51         self.algorithm = algorithm

    52         log.debug("creating %s 0x%x" % (self.__class__.__name__, id(self)))

    54     def __eq__(self, other):

    55         if self is other:

    56             return True

    57         if self.filename == other.filename and \

    58             self.size == other.size and \

    59             self.digest == other.digest and \

    60             self.algorithm == other.algorithm:

    61             return True

    62         else:

    63             return False

    65     def __ne__(self, other):

    66         return not self.__eq__(other)

    68     def __str__(self):

    69         return repr(self)

    71     def __repr__(self):

    72         return "%s.%s(filename='%s', size='%s', digest='%s', algorithm='%s')" % (__name__,

    73                 self.__class__.__name__,

    74                 self.filename, self.size, self.digest, self.algorithm)

    76     def present(self):

    77         # Doesn't check validity

    78         return os.path.exists(self.filename)

    80     def validate_size(self):

    81         if self.present():

    82             return self.size == os.path.getsize(self.filename)

    83         else:

    84             log.debug("trying to validate size on a missing file, %s", self.filename)

    85             raise MissingFileException(filename=self.filename)

    87     def validate_digest(self):

    88         if self.present():

    89             with open(self.filename, 'rb') as f:

    90                 return self.digest == digest_file(f, self.algorithm)

    91         else:

    92             log.debug("trying to validate digest on a missing file, %s', self.filename")

    93             raise MissingFileException(filename=self.filename)

    95     def validate(self):

    96         if self.validate_size():

    97             if self.validate_digest():

    98                 return True

    99         return False

   101     def describe(self):

   102         if self.present() and self.validate():

   103             return "'%s' is present and valid" % self.filename

   104         elif self.present():

   105             return "'%s' is present and invalid" % self.filename

   106         else:

   107             return "'%s' is absent" % self.filename

   110 def create_file_record(filename, algorithm):

   111     fo = open(filename, 'rb')

   112     stored_filename = os.path.split(filename)[1]

   113     fr = FileRecord(stored_filename, os.path.getsize(filename), digest_file(fo, algorithm), algorithm)

   114     fo.close()

   115     return fr

   118 class FileRecordJSONEncoder(json.JSONEncoder):

   119     def encode_file_record(self, obj):

   120         if not issubclass(type(obj), FileRecord):

   121             err = "FileRecordJSONEncoder is only for FileRecord and lists of FileRecords, not %s" % obj.__class__.__name__

   122             log.warn(err)

   123             raise FileRecordJSONEncoderException(err)

   124         else:

   125             return {'filename': obj.filename, 'size': obj.size, 'algorithm': obj.algorithm, 'digest': obj.digest}

   127     def default(self, f):

   128         if issubclass(type(f), list):

   129             record_list = []

   130             for i in f:

   131                 record_list.append(self.encode_file_record(i))

   132             return record_list

   133         else:

   134             return self.encode_file_record(f)

   137 class FileRecordJSONDecoder(json.JSONDecoder):

   138     """I help the json module materialize a FileRecord from

   139     a JSON file.  I understand FileRecords and lists of

   140     FileRecords.  I ignore things that I don't expect for now"""

   141     # TODO: make this more explicit in what it's looking for

   142     # and error out on unexpected things

   143     def process_file_records(self, obj):

   144         if isinstance(obj, list):

   145             record_list = []

   146             for i in obj:

   147                 record = self.process_file_records(i)

   148                 if issubclass(type(record), FileRecord):

   149                     record_list.append(record)

   150             return record_list

   151         if isinstance(obj, dict) and \

   152            len(obj.keys()) == 4 and \

   153            obj.has_key('filename') and \

   154            obj.has_key('size') and \

   155            obj.has_key('algorithm') and \

   156            obj.has_key('digest'):

   157             rv = FileRecord(obj['filename'], obj['size'], obj['digest'], obj['algorithm'])

   158             log.debug("materialized %s" % rv)

   159             return rv

   160         return obj

   162     def decode(self, s):

   163         decoded = json.JSONDecoder.decode(self, s)

   164         rv = self.process_file_records(decoded)

   165         return rv

   168 class Manifest(object):

   170     valid_formats = ('json',)

   172     def __init__(self, file_records=[]):

   173         self.file_records = file_records

   175     def __eq__(self, other):

   176         if self is other:

   177             return True

   178         if len(self.file_records) != len(other.file_records):

   179             log.debug('Manifests differ in number of files')

   180             return False

   181         #TODO: Lists in a different order should be equal

   182         for record in range(0,len(self.file_records)):

   183             if self.file_records[record] != other.file_records[record]:

   184                 log.debug('FileRecords differ, %s vs %s' % (self.file_records[record],

   185                                                             other.file_records[record]))

   186                 return False

   187         return True

   189     def __deepcopy__(self, memo):

   190         # This is required for a deep copy

   191         return Manifest(self.file_records[:])

   193     def __copy__(self):

   194         return Manifest(self.file_records)

   196     def copy(self):

   197         return Manifest(self.file_records[:])

   199     def present(self):

   200         return all(i.present() for i in self.file_records)

   202     def validate_sizes(self):

   203         return all(i.validate_size() for i in self.file_records)

   205     def validate_digests(self):

   206         return all(i.validate_digest() for i in self.file_records)

   208     def validate(self):

   209         return all(i.validate() for i in self.file_records)

   211     def sort(self):

   212         #TODO: WRITE TESTS

   213         self.file_records.sort(key=lambda x: x.size)

   215     def load(self, data_file, fmt='json'):

   216         assert fmt in self.valid_formats

   217         if fmt == 'json':

   218             try:

   219                 self.file_records.extend(json.load(data_file, cls=FileRecordJSONDecoder))

   220                 self.sort()

   221             except ValueError:

   222                 raise InvalidManifest("trying to read invalid manifest file")

   224     def loads(self, data_string, fmt='json'):

   225         assert fmt in self.valid_formats

   226         if fmt == 'json':

   227             try:

   228                 self.file_records.extend(json.loads(data_string, cls=FileRecordJSONDecoder))

   229                 self.sort()

   230             except ValueError:

   231                 raise InvalidManifest("trying to read invalid manifest file")

   233     def dump(self, output_file, fmt='json'):

   234         assert fmt in self.valid_formats

   235         self.sort()

   236         if fmt == 'json':

   237             rv = json.dump(self.file_records, output_file, indent=0, cls=FileRecordJSONEncoder)

   238             print >> output_file, ''

   239             return rv

   241     def dumps(self, fmt='json'):

   242         assert fmt in self.valid_formats

   243         self.sort()

   244         if fmt == 'json':

   245             return json.dumps(self.file_records, cls=FileRecordJSONEncoder)

   248 def digest_file(f, a):

   249     """I take a file like object 'f' and return a hex-string containing

   250     of the result of the algorithm 'a' applied to 'f'."""

   251     h = hashlib.new(a)

   252     chunk_size = 1024*10

   253     data = f.read(chunk_size)

   254     while data:

   255         h.update(data)

   256         data = f.read(chunk_size)

   257     if hasattr(f, 'name'):

   258         log.debug('hashed %s with %s to be %s', f.name, a, h.hexdigest())

   259     else:

   260         log.debug('hashed a file with %s to be %s', a, h.hexdigest())

   261     return h.hexdigest()

   263 # TODO: write tests for this function

   264 def open_manifest(manifest_file):

   265     """I know how to take a filename and load it into a Manifest object"""

   266     if os.path.exists(manifest_file):

   267         manifest = Manifest()

   268         with open(manifest_file) as f:

   269             manifest.load(f)

   270             log.debug("loaded manifest from file '%s'" % manifest_file)

   271         return manifest

   272     else:

   273         log.debug("tried to load absent file '%s' as manifest" % manifest_file)

   274         raise InvalidManifest("manifest file '%s' does not exist" % manifest_file)

   276 # TODO: write tests for this function

   277 def list_manifest(manifest_file):

   278     """I know how print all the files in a location"""

   279     try:

   280         manifest = open_manifest(manifest_file)

   281     except InvalidManifest:

   282         log.error("failed to load manifest file at '%s'" % manifest_file)

   283         return False

   284     for f in manifest.file_records:

   285         print "%s\t%s\t%s" % ("P" if f.present() else "-",

   286                               "V" if f.present() and f.validate() else "-",

   287                               f.filename)

   288     return True

   290 def validate_manifest(manifest_file):

   291     """I validate that all files in a manifest are present and valid but

   292     don't fetch or delete them if they aren't"""

   293     try:

   294         manifest = open_manifest(manifest_file)

   295     except InvalidManifest:

   296         log.error("failed to load manifest file at '%s'" % manifest_file)

   297         return False

   298     invalid_files = []

   299     absent_files = []

   300     for f in manifest.file_records:

   301         if not f.present():

   302             absent_files.append(f)

   303         else:

   304             if not f.validate():

   305                 invalid_files.append(f)

   306     if len(invalid_files + absent_files) == 0:

   307         return True

   308     else:

   309         return False

   311 # TODO: write tests for this function

   312 def add_files(manifest_file, algorithm, filenames):

   313     # returns True if all files successfully added, False if not

   314     # and doesn't catch library Exceptions.  If any files are already

   315     # tracked in the manifest, return will be False because they weren't

   316     # added

   317     all_files_added = True

   318     # Create a old_manifest object to add to

   319     if os.path.exists(manifest_file):

   320         old_manifest = open_manifest(manifest_file)

   321     else:

   322         old_manifest = Manifest()

   323         log.debug("creating a new manifest file")

   324     new_manifest = Manifest() # use a different manifest for the output

   325     for filename in filenames:

   326         log.debug("adding %s" % filename)

   327         path, name = os.path.split(filename)

   328         new_fr = create_file_record(filename, algorithm)

   329         log.debug("appending a new file record to manifest file")

   330         add = True

   331         for fr in old_manifest.file_records:

   332             log.debug("manifest file has '%s'" % "', ".join([x.filename for x in old_manifest.file_records]))

   333             if new_fr == fr and new_fr.validate():

   334                 # TODO: Decide if this case should really cause a False return

   335                 log.info("file already in old_manifest file and matches")

   336                 add = False

   337             elif new_fr == fr and not new_fr.validate():

   338                 log.error("file already in old_manifest file but is invalid")

   339                 add = False

   340             if filename == fr.filename:

   341                 log.error("manifest already contains file named %s" % filename)

   342                 add = False

   343         if add:

   344             new_manifest.file_records.append(new_fr)

   345             log.debug("added '%s' to manifest" % filename)

   346         else:

   347             all_files_added = False

   348     with open(manifest_file, 'wb') as output:

   349         new_manifest.dump(output, fmt='json')

   350     return all_files_added

   353 # TODO: write tests for this function

   354 def fetch_file(base_url, file_record, overwrite=False, grabchunk=1024*4):

   355     # A file which is requested to be fetched that exists locally will be hashed.

   356     # If the hash matches the requested file's hash, nothing will be done and the

   357     # function will return.  If the function is told to overwrite and there is a

   358     # digest mismatch, the exiting file will be overwritten

   359     if file_record.present():

   360         if file_record.validate():

   361             log.info("existing '%s' is valid, not fetching" % file_record.filename)

   362             return True

   363         if overwrite:

   364             log.info("overwriting '%s' as requested" % file_record.filename)

   365         else:

   366             # All of the following is for a useful error message

   367             with open(file_record.filename, 'rb') as f:

   368                 d = digest_file(f, file_record.algorithm)

   369             log.error("digest mismatch between manifest(%s...) and local file(%s...)" % \

   370                     (file_record.digest[:8], d[:8]))

   371             log.debug("full digests: manifest (%s) local file (%s)" % (file_record.digest, d))

   372             # Let's bail!

   373             return False

   375     # Generate the URL for the file on the server side

   376     url = "%s/%s/%s" % (base_url, file_record.algorithm, file_record.digest)

   378     log.debug("fetching from '%s'" % url)

   380     # TODO: This should be abstracted to make generic retreival protocol handling easy

   381     # Well, the file doesn't exist locally.  Lets fetch it.

   382     try:

   383         f = urllib2.urlopen(url)

   384         log.debug("opened %s for reading" % url)

   385         with open(file_record.filename, 'wb') as out:

   386             k = True

   387             size = 0

   388             while k:

   389                 # TODO: print statistics as file transfers happen both for info and to stop

   390                 # buildbot timeouts

   391                 indata = f.read(grabchunk)

   392                 out.write(indata)

   393                 size += len(indata)

   394                 if indata == '':

   395                     k = False

   396             if size != file_record.size:

   397                 log.error("transfer from %s to %s failed due to a difference of %d bytes" % (url,

   398                             file_record.filename, file_record.size - size))

   399                 return False

   400             log.info("fetched %s" % file_record.filename)

   401     except (urllib2.URLError, urllib2.HTTPError) as e:

   402         log.error("failed to fetch '%s': %s" % (file_record.filename, e),

   403                   exc_info=True)

   404         return False

   405     except IOError:

   406         log.error("failed to write to '%s'" % file_record.filename,

   407                   exc_info=True)

   408         return False

   409     return True

   412 # TODO: write tests for this function

   413 def fetch_files(manifest_file, base_url, overwrite, filenames=[]):

   414     # Lets load the manifest file

   415     try:

   416         manifest = open_manifest(manifest_file)

   417     except InvalidManifest:

   418         log.error("failed to load manifest file at '%s'" % manifest_file)

   419         return False

   420     # We want to track files that fail to be fetched as well as

   421     # files that are fetched

   422     failed_files = []

   424     # Lets go through the manifest and fetch the files that we want

   425     fetched_files = []

   426     for f in manifest.file_records:

   427         if f.filename in filenames or len(filenames) == 0:

   428             log.debug("fetching %s" % f.filename)

   429             if fetch_file(base_url, f, overwrite):

   430                 fetched_files.append(f)

   431             else:

   432                 failed_files.append(f.filename)

   433         else:

   434             log.debug("skipping %s" % f.filename)

   436     # Even if we get the file, lets ensure that it matches what the

   437     # manifest specified

   438     for localfile in fetched_files:

   439         if not localfile.validate():

   440             log.error("'%s'" % localfile.describe())

   442     # If we failed to fetch or validate a file, we need to fail

   443     if len(failed_files) > 0:

   444         log.error("The following files failed: '%s'" % "', ".join(failed_files))

   445         return False

   446     return True

   449 # TODO: write tests for this function

   450 def process_command(options, args):

   451     """ I know how to take a list of program arguments and

   452     start doing the right thing with them"""

   453     cmd = args[0]

   454     cmd_args = args[1:]

   455     log.debug("processing '%s' command with args '%s'" % (cmd, '", "'.join(cmd_args)))

   456     log.debug("using options: %s" % options)

   457     if cmd == 'list':

   458         return list_manifest(options['manifest'])

   459     if cmd == 'validate':

   460         return validate_manifest(options['manifest'])

   461     elif cmd == 'add':

   462         return add_files(options['manifest'], options['algorithm'], cmd_args)

   463     elif cmd == 'fetch':

   464         if not options.has_key('base_url') or options.get('base_url') is None:

   465             log.critical('fetch command requires url option')

   466             return False

   467         return fetch_files(options['manifest'], options['base_url'], options['overwrite'], cmd_args)

   468     else:

   469         log.critical('command "%s" is not implemented' % cmd)

   470         return False

   472 # fetching api:

   473 #   http://hostname/algorithm/hash

   474 #   example: http://people.mozilla.org/sha1/1234567890abcedf

   475 # This will make it possible to have the server allow clients to

   476 # use different algorithms than what was uploaded to the server

   478 # TODO: Implement the following features:

   479 #   -optimization: do small files first, justification is that they are faster

   480 #    and cause a faster failure if they are invalid

   481 #   -store permissions

   482 #   -local renames i.e. call the file one thing on the server and

   483 #    something different locally

   484 #   -deal with the cases:

   485 #     -local data matches file requested with different filename

   486 #     -two different files with same name, different hash

   487 #   -?only ever locally to digest as filename, symlink to real name

   488 #   -?maybe deal with files as a dir of the filename with all files in that dir as the versions of that file

   489 #      - e.g. ./python-2.6.7.dmg/0123456789abcdef and ./python-2.6.7.dmg/abcdef0123456789

   491 def main():

   492     # Set up logging, for now just to the console

   493     ch = logging.StreamHandler()

   494     cf = logging.Formatter("%(levelname)s - %(message)s")

   495     ch.setFormatter(cf)

   497     # Set up option parsing

   498     parser = optparse.OptionParser()

   499     # I wish there was a way to say "only allow args to be

   500     # sequential and at the end of the argv.

   501     # OH! i could step through sys.argv and check for things starting without -/-- before things starting with them

   502     parser.add_option('-q', '--quiet', default=False,

   503             dest='quiet', action='store_true')

   504     parser.add_option('-v', '--verbose', default=False,

   505             dest='verbose', action='store_true')

   506     parser.add_option('-m', '--manifest', default='manifest.tt',

   507             dest='manifest', action='store',

   508             help='specify the manifest file to be operated on')

   509     parser.add_option('-d', '--algorithm', default='sha512',

   510             dest='algorithm', action='store',

   511             help='openssl hashing algorithm to use')

   512     parser.add_option('-o', '--overwrite', default=False,

   513             dest='overwrite', action='store_true',

   514             help='if fetching, remote copy will overwrite a local copy that is different. ')

   515     parser.add_option('--url', dest='base_url', action='store',

   516             help='base url for fetching files')

   517     parser.add_option('--ignore-config-files', action='store_true', default=False,

   518                      dest='ignore_cfg_files')

   519     (options_obj, args) = parser.parse_args()

   520     # Dictionaries are easier to work with

   521     options = vars(options_obj)

   524     # Use some of the option parser to figure out application

   525     # log level

   526     if options.get('verbose'):

   527         ch.setLevel(logging.DEBUG)

   528     elif options.get('quiet'):

   529         ch.setLevel(logging.ERROR)

   530     else:

   531         ch.setLevel(logging.INFO)

   532     log.addHandler(ch)

   534     cfg_file = ConfigParser.SafeConfigParser()

   535     if not options.get("ignore_cfg_files"):

   536         read_files = cfg_file.read(['/etc/tooltool', os.path.expanduser('~/.tooltool'),

   537                    os.path.join(os.getcwd(), '.tooltool')])

   538         log.debug("read in the config files '%s'" % '", '.join(read_files))

   539     else:

   540         log.debug("skipping config files")

   542     for option in ('base_url', 'algorithm'):

   543         if not options.get(option):

   544             try:

   545                 options[option] = cfg_file.get('general', option)

   546                 log.debug("read '%s' as '%s' from cfg_file" % (option, options[option]))

   547             except (ConfigParser.NoSectionError, ConfigParser.NoOptionError) as e:

   548                 log.debug("%s in config file" % e, exc_info=True)

   550     if not options.has_key('manifest'):

   551         parser.error("no manifest file specified")

   553     if len(args) < 1:

   554         parser.error('You must specify a command')

   555     exit(0 if process_command(options, args) else 1)

   557 if __name__ == "__main__":

   558     main()

   559 else:

   560     log.addHandler(logging.NullHandler())

   561     #log.addHandler(logging.StreamHandler())

The Tor Browser / file revision

build/unix/build-clang/tooltool.py@ac0c01689b40

build/unix/build-clang/tooltool.py