toolkit/components/telemetry/histogram_tools.py

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/toolkit/components/telemetry/histogram_tools.py	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,231 @@
     1.4 +# This Source Code Form is subject to the terms of the Mozilla Public
     1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.7 +
     1.8 +import json
     1.9 +import math
    1.10 +import re
    1.11 +
    1.12 +from collections import OrderedDict
    1.13 +
    1.14 +def table_dispatch(kind, table, body):
    1.15 +    """Call body with table[kind] if it exists.  Raise an error otherwise."""
    1.16 +    if kind in table:
    1.17 +        return body(table[kind])
    1.18 +    else:
    1.19 +        raise BaseException, "don't know how to handle a histogram of kind %s" % kind
    1.20 +
    1.21 +class DefinitionException(BaseException):
    1.22 +    pass
    1.23 +
    1.24 +def check_numeric_limits(dmin, dmax, n_buckets):
    1.25 +    if type(dmin) != int:
    1.26 +        raise DefinitionException, "minimum is not a number"
    1.27 +    if type(dmax) != int:
    1.28 +        raise DefinitionException, "maximum is not a number"
    1.29 +    if type(n_buckets) != int:
    1.30 +        raise DefinitionException, "number of buckets is not a number"
    1.31 +
    1.32 +def linear_buckets(dmin, dmax, n_buckets):
    1.33 +    check_numeric_limits(dmin, dmax, n_buckets)
    1.34 +    ret_array = [0] * n_buckets
    1.35 +    dmin = float(dmin)
    1.36 +    dmax = float(dmax)
    1.37 +    for i in range(1, n_buckets):
    1.38 +        linear_range = (dmin * (n_buckets - 1 - i) + dmax * (i - 1)) / (n_buckets - 2)
    1.39 +        ret_array[i] = int(linear_range + 0.5)
    1.40 +    return ret_array
    1.41 +
    1.42 +def exponential_buckets(dmin, dmax, n_buckets):
    1.43 +    check_numeric_limits(dmin, dmax, n_buckets)
    1.44 +    log_max = math.log(dmax);
    1.45 +    bucket_index = 2;
    1.46 +    ret_array = [0] * n_buckets
    1.47 +    current = dmin
    1.48 +    ret_array[1] = current
    1.49 +    for bucket_index in range(2, n_buckets):
    1.50 +        log_current = math.log(current)
    1.51 +        log_ratio = (log_max - log_current) / (n_buckets - bucket_index)
    1.52 +        log_next = log_current + log_ratio
    1.53 +        next_value = int(math.floor(math.exp(log_next) + 0.5))
    1.54 +        if next_value > current:
    1.55 +            current = next_value
    1.56 +        else:
    1.57 +            current = current + 1
    1.58 +        ret_array[bucket_index] = current
    1.59 +    return ret_array
    1.60 +
    1.61 +always_allowed_keys = ['kind', 'description', 'cpp_guard', 'expires_in_version']
    1.62 +
    1.63 +class Histogram:
    1.64 +    """A class for representing a histogram definition."""
    1.65 +
    1.66 +    def __init__(self, name, definition):
    1.67 +        """Initialize a histogram named name with the given definition.
    1.68 +definition is a dict-like object that must contain at least the keys:
    1.69 +
    1.70 + - 'kind': The kind of histogram.  Must be one of 'boolean', 'flag',
    1.71 +   'enumerated', 'linear', or 'exponential'.
    1.72 + - 'description': A textual description of the histogram.
    1.73 +
    1.74 +The key 'cpp_guard' is optional; if present, it denotes a preprocessor
    1.75 +symbol that should guard C/C++ definitions associated with the histogram."""
    1.76 +        self.verify_attributes(name, definition)
    1.77 +        self._name = name
    1.78 +        self._description = definition['description']
    1.79 +        self._kind = definition['kind']
    1.80 +        self._cpp_guard = definition.get('cpp_guard')
    1.81 +        self._extended_statistics_ok = definition.get('extended_statistics_ok', False)
    1.82 +        self._expiration = definition.get('expires_in_version')
    1.83 +        self.compute_bucket_parameters(definition)
    1.84 +        table = { 'boolean': 'BOOLEAN',
    1.85 +                  'flag': 'FLAG',
    1.86 +                  'enumerated': 'LINEAR',
    1.87 +                  'linear': 'LINEAR',
    1.88 +                  'exponential': 'EXPONENTIAL' }
    1.89 +        table_dispatch(self.kind(), table,
    1.90 +                       lambda k: self._set_nsITelemetry_kind(k))
    1.91 +
    1.92 +    def name(self):
    1.93 +        """Return the name of the histogram."""
    1.94 +        return self._name
    1.95 +
    1.96 +    def description(self):
    1.97 +        """Return the description of the histogram."""
    1.98 +        return self._description
    1.99 +
   1.100 +    def kind(self):
   1.101 +        """Return the kind of the histogram.
   1.102 +Will be one of 'boolean', 'flag', 'enumerated', 'linear', or 'exponential'."""
   1.103 +        return self._kind
   1.104 +
   1.105 +    def expiration(self):
   1.106 +        """Return the expiration version of the histogram."""
   1.107 +        return self._expiration
   1.108 +
   1.109 +    def nsITelemetry_kind(self):
   1.110 +        """Return the nsITelemetry constant corresponding to the kind of
   1.111 +the histogram."""
   1.112 +        return self._nsITelemetry_kind
   1.113 +
   1.114 +    def _set_nsITelemetry_kind(self, kind):
   1.115 +        self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % kind
   1.116 +
   1.117 +    def low(self):
   1.118 +        """Return the lower bound of the histogram.  May be a string."""
   1.119 +        return self._low
   1.120 +
   1.121 +    def high(self):
   1.122 +        """Return the high bound of the histogram.  May be a string."""
   1.123 +        return self._high
   1.124 +
   1.125 +    def n_buckets(self):
   1.126 +        """Return the number of buckets in the histogram.  May be a string."""
   1.127 +        return self._n_buckets
   1.128 +
   1.129 +    def cpp_guard(self):
   1.130 +        """Return the preprocessor symbol that should guard C/C++ definitions
   1.131 +associated with the histogram.  Returns None if no guarding is necessary."""
   1.132 +        return self._cpp_guard
   1.133 +
   1.134 +    def extended_statistics_ok(self):
   1.135 +        """Return True if gathering extended statistics for this histogram
   1.136 +is enabled."""
   1.137 +        return self._extended_statistics_ok
   1.138 +
   1.139 +    def ranges(self):
   1.140 +        """Return an array of lower bounds for each bucket in the histogram."""
   1.141 +        table = { 'boolean': linear_buckets,
   1.142 +                  'flag': linear_buckets,
   1.143 +                  'enumerated': linear_buckets,
   1.144 +                  'linear': linear_buckets,
   1.145 +                  'exponential': exponential_buckets }
   1.146 +        return table_dispatch(self.kind(), table,
   1.147 +                              lambda p: p(self.low(), self.high(), self.n_buckets()))
   1.148 +
   1.149 +    def compute_bucket_parameters(self, definition):
   1.150 +        table = {
   1.151 +            'boolean': Histogram.boolean_flag_bucket_parameters,
   1.152 +            'flag': Histogram.boolean_flag_bucket_parameters,
   1.153 +            'enumerated': Histogram.enumerated_bucket_parameters,
   1.154 +            'linear': Histogram.linear_bucket_parameters,
   1.155 +            'exponential': Histogram.exponential_bucket_parameters
   1.156 +            }
   1.157 +        table_dispatch(self.kind(), table,
   1.158 +                       lambda p: self.set_bucket_parameters(*p(definition)))
   1.159 +
   1.160 +    def verify_attributes(self, name, definition):
   1.161 +        global always_allowed_keys
   1.162 +        general_keys = always_allowed_keys + ['low', 'high', 'n_buckets']
   1.163 +
   1.164 +        table = {
   1.165 +            'boolean': always_allowed_keys,
   1.166 +            'flag': always_allowed_keys,
   1.167 +            'enumerated': always_allowed_keys + ['n_values'],
   1.168 +            'linear': general_keys,
   1.169 +            'exponential': general_keys + ['extended_statistics_ok']
   1.170 +            }
   1.171 +        table_dispatch(definition['kind'], table,
   1.172 +                       lambda allowed_keys: Histogram.check_keys(name, definition, allowed_keys))
   1.173 +
   1.174 +        Histogram.check_expiration(name, definition)
   1.175 +
   1.176 +    @staticmethod
   1.177 +    def check_expiration(name, definition):
   1.178 +        expiration = definition.get('expires_in_version')
   1.179 +
   1.180 +        if not expiration:
   1.181 +            return
   1.182 +
   1.183 +        if re.match(r'^[1-9][0-9]*$', expiration):
   1.184 +            expiration = expiration + ".0a1"
   1.185 +        elif re.match(r'^[1-9][0-9]*\.0$', expiration):
   1.186 +            expiration = expiration + "a1"
   1.187 +
   1.188 +        definition['expires_in_version'] = expiration
   1.189 +
   1.190 +    @staticmethod
   1.191 +    def check_keys(name, definition, allowed_keys):
   1.192 +        for key in definition.iterkeys():
   1.193 +            if key not in allowed_keys:
   1.194 +                raise KeyError, '%s not permitted for %s' % (key, name)
   1.195 +
   1.196 +    def set_bucket_parameters(self, low, high, n_buckets):
   1.197 +        def try_to_coerce_to_number(v):
   1.198 +            try:
   1.199 +                return eval(v, {})
   1.200 +            except:
   1.201 +                return v
   1.202 +        self._low = try_to_coerce_to_number(low)
   1.203 +        self._high = try_to_coerce_to_number(high)
   1.204 +        self._n_buckets = try_to_coerce_to_number(n_buckets)
   1.205 +
   1.206 +    @staticmethod
   1.207 +    def boolean_flag_bucket_parameters(definition):
   1.208 +        return (1, 2, 3)
   1.209 +
   1.210 +    @staticmethod
   1.211 +    def linear_bucket_parameters(definition):
   1.212 +        return (definition.get('low', 1),
   1.213 +                definition['high'],
   1.214 +                definition['n_buckets'])
   1.215 +
   1.216 +    @staticmethod
   1.217 +    def enumerated_bucket_parameters(definition):
   1.218 +        n_values = definition['n_values']
   1.219 +        return (1, n_values, "%s+1" % n_values)
   1.220 +
   1.221 +    @staticmethod
   1.222 +    def exponential_bucket_parameters(definition):
   1.223 +        return (definition.get('low', 1),
   1.224 +                definition['high'],
   1.225 +                definition['n_buckets'])
   1.226 +
   1.227 +def from_file(filename):
   1.228 +    """Return an iterator that provides a sequence of Histograms for
   1.229 +the histograms defined in filename.
   1.230 +    """
   1.231 +    with open(filename, 'r') as f:
   1.232 +        histograms = json.load(f, object_pairs_hook=OrderedDict)
   1.233 +        for (name, definition) in histograms.iteritems():
   1.234 +            yield Histogram(name, definition)

mercurial