The Tor Browser: tools/performance/diff-talos.py@97036ab72558

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 #!/usr/bin/env python

     3 # This Source Code Form is subject to the terms of the Mozilla Public

     4 # License, v. 2.0. If a copy of the MPL was not distributed with this

     5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

     7 """

     8 This is a simple script that does one thing only: compare talos runs from

     9 two revisions. It is intended to check which of two try runs is best or if

    10 a try improves over the m-c or m-i revision in branches from.

    12 A big design goal is to avoid bit rot and to assert when bit rot is detected.

    13 The set of tests we run is a moving target. When possible this script

    14 should work with any test set, but in parts where it has to hard code

    15 information, it should try to assert that it is valid so that changes

    16 are detected and it is fixed earlier.

    17 """

    19 import json

    20 import urllib2

    21 import math

    22 import sys

    23 from optparse import OptionParser

    25 # FIXME: currently we assert that we know all the benchmarks just so

    26 # we are sure to maintain the bigger_is_better set updated. Is there a better

    27 # way to find/compute it?

    28 bigger_is_better = frozenset(('v8_7', 'dromaeo_dom', 'dromaeo_css'))

    30 smaller_is_better = frozenset(('tdhtmlr_paint', 'tp5n_main_rss_paint',

    31                                'ts_paint', 'tp5n_paint', 'tsvgr_opacity',

    32                                'a11yr_paint', 'kraken',

    33                                'tdhtmlr_nochrome_paint',

    34                                'tspaint_places_generated_med', 'tpaint',

    35                                'tp5n_shutdown_paint', 'tsvgr',

    36                                'tp5n_pbytes_paint', 'tscrollr_paint',

    37                                'tspaint_places_generated_max',

    38                                'tp5n_responsiveness_paint',

    39                                'sunspider', 'tp5n_xres_paint', 'num_ctors',

    40                                'tresize', 'trobopan', 'tcheckerboard',

    41                                'tcheck3', 'tcheck2', 'tprovider',

    42                                'tp5n_modlistbytes_paint',

    43                                'trace_malloc_maxheap', 'tp4m_nochrome',

    44                                'trace_malloc_leaks', 'tp4m_main_rss_nochrome',

    45                                'tp4m_shutdown_nochrome', 'tdhtml_nochrome',

    46                                'ts_shutdown', 'tp5n_%cpu_paint',

    47                                'trace_malloc_allocs', 'ts',

    48                                'tsvg_nochrome', 'tp5n_content_rss_paint',

    49                                'tp5n_main_startup_fileio_paint',

    50                                'tp5n_nonmain_normal_netio_paint',

    51                                'tp5n_nonmain_startup_fileio_paint',

    52                                'tp5n_main_normal_fileio_paint',

    53                                'tp5n_nonmain_normal_fileio_paint',

    54                                'tp5n_main_startup_netio_paint',

    55                                'tp5n_main_normal_netio_paint',

    56                                'tp5n_main_shutdown_netio_paint',

    57                                'tp5n_main_shutdown_fileio_paint'))

    59 all_benchmarks = smaller_is_better | bigger_is_better

    60 assert len(smaller_is_better & bigger_is_better) == 0

    62 def get_raw_data_for_revisions(revisions):

    63     """Loads data for the revisions, returns an array with one element for each

    64     revision."""

    65     selectors = ["revision=%s" % revision for revision in revisions]

    66     selector = '&'.join(selectors)

    67     url = "http://graphs.mozilla.org/api/test/runs/revisions?%s" % selector

    68     url_stream = urllib2.urlopen(url)

    69     data = json.load(url_stream)

    70     assert frozenset(data.keys()) == frozenset(('stat', 'revisions'))

    71     assert data['stat'] == 'ok'

    72     rev_data = data['revisions']

    73     assert frozenset(rev_data.keys()) == frozenset(revisions)

    74     return [rev_data[r] for r in revisions]

    76 def mean(values):

    77     return float(sum(values))/len(values)

    79 def c4(n):

    80     n = float(n)

    81     numerator = math.gamma(n/2)*math.sqrt(2/(n-1))

    82     denominator = math.gamma((n-1)/2)

    83     return numerator/denominator

    85 def unbiased_standard_deviation(values):

    86     n = len(values)

    87     if n == 1:

    88         return None

    89     acc = 0

    90     avg = mean(values)

    91     for i in values:

    92         dist = i - avg

    93         acc += dist * dist

    94     return math.sqrt(acc/(n-1))/c4(n)

    96 class BenchmarkResult:

    97     """ Stores the summary (mean and standard deviation) of a set of talus

    98     runs on the same revision and OS."""

    99     def __init__(self, avg, std):

   100         self.avg = avg

   101         self.std = std

   102     def __str__(self):

   103         t = "%s," % self.avg

   104         return "(%-13s %s)" % (t, self.std)

   106 # FIXME: This function computes the statistics of multiple runs of talos on a

   107 # single revision. Should it also support computing statistics over runs of

   108 # different revisions assuming the revisions are equivalent from a performance

   109 # perspective?

   110 def digest_revision_data(data):

   111     ret = {}

   112     benchmarks = frozenset(data.keys())

   113     # assert that all the benchmarks are known. If they are not,

   114     # smaller_is_better or bigger_is_better needs to be updated depending on

   115     # the benchmark type.

   116     assert all_benchmarks.issuperset(benchmarks), \

   117         "%s not found in all_benchmarks" % ','.join((benchmarks - all_benchmarks))

   118     for benchmark in benchmarks:

   119         benchmark_data = data[benchmark]

   120         expected_keys = frozenset(("test_runs", "name", "id"))

   121         assert frozenset(benchmark_data.keys()) == expected_keys

   122         test_runs = benchmark_data["test_runs"]

   123         operating_systems = test_runs.keys()

   124         results = {}

   125         for os in operating_systems:

   126             os_runs = test_runs[os]

   127             values = []

   128             for os_run in os_runs:

   129                 # there are 4 fields: test run id, build id, timestamp,

   130                 # mean value

   131                 assert len(os_run) == 4

   132                 values.append(os_run[3])

   133             avg = mean(values)

   134             std = unbiased_standard_deviation(values)

   135             results[os] = BenchmarkResult(avg, std)

   136         ret[benchmark] = results

   137     return ret

   139 def get_data_for_revisions(revisions):

   140     raw_data = get_raw_data_for_revisions(revisions)

   141     return [digest_revision_data(x) for x in raw_data]

   143 def overlaps(a, b):

   144     return a[1] >= b[0] and b[1] >= a[0]

   146 def is_significant(old, new):

   147     # conservative hack: if we don't know, say it is significant.

   148     if old.std is None or new.std is None:

   149         return True

   150     # use a 2 standard deviation interval, which is about 95% confidence.

   151     old_interval = [old.avg - old.std, old.avg + old.std]

   152     new_interval = [new.avg - new.std, new.avg + new.std]

   153     return not overlaps(old_interval, new_interval)

   155 def compute_difference(benchmark, old, new):

   156     if benchmark in bigger_is_better:

   157         new, old = old, new

   159     if new.avg >= old.avg:

   160         return "%1.4fx worse" % (new.avg/old.avg)

   161     else:

   162         return "%1.4fx better" % (old.avg/new.avg)

   164 #FIXME: the printing could use a table class that computes the sizes of the

   165 # cells instead of the current hard coded values.

   166 def print_data_comparison(datav):

   167     assert len(datav) == 2

   168     old_data = datav[0]

   169     new_data = datav[1]

   170     old_benchmarks = frozenset(old_data.keys())

   171     new_benchmarks = frozenset(new_data.keys())

   172     benchmarks = old_benchmarks.intersection(new_benchmarks)

   173     for benchmark in sorted(benchmarks):

   174         print benchmark

   175         old_benchmark_data = old_data[benchmark]

   176         new_benchmark_data = new_data[benchmark]

   177         old_operating_systems = frozenset(old_benchmark_data.keys())

   178         new_operating_systems = frozenset(new_benchmark_data.keys())

   179         operating_systems = old_operating_systems.intersection(new_operating_systems)

   180         for os in sorted(operating_systems):

   181             old_os_data = old_benchmark_data[os]

   182             new_os_data = new_benchmark_data[os]

   183             if not is_significant(old_os_data, new_os_data):

   184                 continue

   186             diff = compute_difference(benchmark, old_os_data, new_os_data)

   187             print '%-33s | %-30s -> %-30s %s' % \

   188                 (os, old_os_data, new_os_data, diff)

   189         print

   191 def main():

   192     parser = OptionParser(usage='Usage: %prog old_revision new_revision')

   193     options, args = parser.parse_args()

   194     if len(args) != 2:

   195         parser.print_help()

   196         sys.exit(1)

   198     print_data_comparison(get_data_for_revisions(args))

   200 if __name__ == '__main__':

   201     main()

The Tor Browser / file revision

tools/performance/diff-talos.py@97036ab72558

tools/performance/diff-talos.py