toolkit/components/crashes/CrashManager.jsm

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/toolkit/components/crashes/CrashManager.jsm	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1035 @@
     1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.7 +
     1.8 +"use strict";
     1.9 +
    1.10 +const {classes: Cc, interfaces: Ci, utils: Cu} = Components;
    1.11 +
    1.12 +Cu.import("resource://gre/modules/Log.jsm", this);
    1.13 +Cu.import("resource://gre/modules/osfile.jsm", this)
    1.14 +Cu.import("resource://gre/modules/Promise.jsm", this);
    1.15 +Cu.import("resource://gre/modules/Services.jsm", this);
    1.16 +Cu.import("resource://gre/modules/Task.jsm", this);
    1.17 +Cu.import("resource://gre/modules/Timer.jsm", this);
    1.18 +Cu.import("resource://gre/modules/XPCOMUtils.jsm", this);
    1.19 +Cu.import("resource://services-common/utils.js", this);
    1.20 +
    1.21 +this.EXPORTED_SYMBOLS = [
    1.22 +  "CrashManager",
    1.23 +];
    1.24 +
    1.25 +/**
    1.26 + * How long to wait after application startup before crash event files are
    1.27 + * automatically aggregated.
    1.28 + *
    1.29 + * We defer aggregation for performance reasons, as we don't want too many
    1.30 + * services competing for I/O immediately after startup.
    1.31 + */
    1.32 +const AGGREGATE_STARTUP_DELAY_MS = 57000;
    1.33 +
    1.34 +const MILLISECONDS_IN_DAY = 24 * 60 * 60 * 1000;
    1.35 +
    1.36 +// Converts Date to days since UNIX epoch.
    1.37 +// This was copied from /services/metrics.storage.jsm. The implementation
    1.38 +// does not account for leap seconds.
    1.39 +function dateToDays(date) {
    1.40 +  return Math.floor(date.getTime() / MILLISECONDS_IN_DAY);
    1.41 +}
    1.42 +
    1.43 +
    1.44 +/**
    1.45 + * A gateway to crash-related data.
    1.46 + *
    1.47 + * This type is generic and can be instantiated any number of times.
    1.48 + * However, most applications will typically only have one instance
    1.49 + * instantiated and that instance will point to profile and user appdata
    1.50 + * directories.
    1.51 + *
    1.52 + * Instances are created by passing an object with properties.
    1.53 + * Recognized properties are:
    1.54 + *
    1.55 + *   pendingDumpsDir (string) (required)
    1.56 + *     Where dump files that haven't been uploaded are located.
    1.57 + *
    1.58 + *   submittedDumpsDir (string) (required)
    1.59 + *     Where records of uploaded dumps are located.
    1.60 + *
    1.61 + *   eventsDirs (array)
    1.62 + *     Directories (defined as strings) where events files are written. This
    1.63 + *     instance will collects events from files in the directories specified.
    1.64 + *
    1.65 + *   storeDir (string)
    1.66 + *     Directory we will use for our data store. This instance will write
    1.67 + *     data files into the directory specified.
    1.68 + *
    1.69 + *   telemetryStoreSizeKey (string)
    1.70 + *     Telemetry histogram to report store size under.
    1.71 + */
    1.72 +this.CrashManager = function (options) {
    1.73 +  for (let k of ["pendingDumpsDir", "submittedDumpsDir", "eventsDirs",
    1.74 +    "storeDir"]) {
    1.75 +    if (!(k in options)) {
    1.76 +      throw new Error("Required key not present in options: " + k);
    1.77 +    }
    1.78 +  }
    1.79 +
    1.80 +  this._log = Log.repository.getLogger("Crashes.CrashManager");
    1.81 +
    1.82 +  for (let k in options) {
    1.83 +    let v = options[k];
    1.84 +
    1.85 +    switch (k) {
    1.86 +      case "pendingDumpsDir":
    1.87 +        this._pendingDumpsDir = v;
    1.88 +        break;
    1.89 +
    1.90 +      case "submittedDumpsDir":
    1.91 +        this._submittedDumpsDir = v;
    1.92 +        break;
    1.93 +
    1.94 +      case "eventsDirs":
    1.95 +        this._eventsDirs = v;
    1.96 +        break;
    1.97 +
    1.98 +      case "storeDir":
    1.99 +        this._storeDir = v;
   1.100 +        break;
   1.101 +
   1.102 +      case "telemetryStoreSizeKey":
   1.103 +        this._telemetryStoreSizeKey = v;
   1.104 +        break;
   1.105 +
   1.106 +      default:
   1.107 +        throw new Error("Unknown property in options: " + k);
   1.108 +    }
   1.109 +  }
   1.110 +
   1.111 +  // Promise for in-progress aggregation operation. We store it on the
   1.112 +  // object so it can be returned for in-progress operations.
   1.113 +  this._aggregatePromise = null;
   1.114 +
   1.115 +  // The CrashStore currently attached to this object.
   1.116 +  this._store = null;
   1.117 +
   1.118 +  // The timer controlling the expiration of the CrashStore instance.
   1.119 +  this._storeTimer = null;
   1.120 +
   1.121 +  // This is a semaphore that prevents the store from being freed by our
   1.122 +  // timer-based resource freeing mechanism.
   1.123 +  this._storeProtectedCount = 0;
   1.124 +};
   1.125 +
   1.126 +this.CrashManager.prototype = Object.freeze({
   1.127 +  DUMP_REGEX: /^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.dmp$/i,
   1.128 +  SUBMITTED_REGEX: /^bp-(?:hr-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.txt$/i,
   1.129 +  ALL_REGEX: /^(.*)$/,
   1.130 +
   1.131 +  // How long the store object should persist in memory before being
   1.132 +  // automatically garbage collected.
   1.133 +  STORE_EXPIRATION_MS: 60 * 1000,
   1.134 +
   1.135 +  // Number of days after which a crash with no activity will get purged.
   1.136 +  PURGE_OLDER_THAN_DAYS: 180,
   1.137 +
   1.138 +  // The following are return codes for individual event file processing.
   1.139 +  // File processed OK.
   1.140 +  EVENT_FILE_SUCCESS: "ok",
   1.141 +  // The event appears to be malformed.
   1.142 +  EVENT_FILE_ERROR_MALFORMED: "malformed",
   1.143 +  // The type of event is unknown.
   1.144 +  EVENT_FILE_ERROR_UNKNOWN_EVENT: "unknown-event",
   1.145 +
   1.146 +  /**
   1.147 +   * Obtain a list of all dumps pending upload.
   1.148 +   *
   1.149 +   * The returned value is a promise that resolves to an array of objects
   1.150 +   * on success. Each element in the array has the following properties:
   1.151 +   *
   1.152 +   *   id (string)
   1.153 +   *      The ID of the crash (a UUID).
   1.154 +   *
   1.155 +   *   path (string)
   1.156 +   *      The filename of the crash (<UUID.dmp>)
   1.157 +   *
   1.158 +   *   date (Date)
   1.159 +   *      When this dump was created
   1.160 +   *
   1.161 +   * The returned arry is sorted by the modified time of the file backing
   1.162 +   * the entry, oldest to newest.
   1.163 +   *
   1.164 +   * @return Promise<Array>
   1.165 +   */
   1.166 +  pendingDumps: function () {
   1.167 +    return this._getDirectoryEntries(this._pendingDumpsDir, this.DUMP_REGEX);
   1.168 +  },
   1.169 +
   1.170 +  /**
   1.171 +   * Obtain a list of all dump files corresponding to submitted crashes.
   1.172 +   *
   1.173 +   * The returned value is a promise that resolves to an Array of
   1.174 +   * objects. Each object has the following properties:
   1.175 +   *
   1.176 +   *   path (string)
   1.177 +   *     The path of the file this entry comes from.
   1.178 +   *
   1.179 +   *   id (string)
   1.180 +   *     The crash UUID.
   1.181 +   *
   1.182 +   *   date (Date)
   1.183 +   *     The (estimated) date this crash was submitted.
   1.184 +   *
   1.185 +   * The returned array is sorted by the modified time of the file backing
   1.186 +   * the entry, oldest to newest.
   1.187 +   *
   1.188 +   * @return Promise<Array>
   1.189 +   */
   1.190 +  submittedDumps: function () {
   1.191 +    return this._getDirectoryEntries(this._submittedDumpsDir,
   1.192 +                                     this.SUBMITTED_REGEX);
   1.193 +  },
   1.194 +
   1.195 +  /**
   1.196 +   * Aggregates "loose" events files into the unified "database."
   1.197 +   *
   1.198 +   * This function should be called periodically to collect metadata from
   1.199 +   * all events files into the central data store maintained by this manager.
   1.200 +   *
   1.201 +   * Once events have been stored in the backing store the corresponding
   1.202 +   * source files are deleted.
   1.203 +   *
   1.204 +   * Only one aggregation operation is allowed to occur at a time. If this
   1.205 +   * is called when an existing aggregation is in progress, the promise for
   1.206 +   * the original call will be returned.
   1.207 +   *
   1.208 +   * @return promise<int> The number of event files that were examined.
   1.209 +   */
   1.210 +  aggregateEventsFiles: function () {
   1.211 +    if (this._aggregatePromise) {
   1.212 +      return this._aggregatePromise;
   1.213 +    }
   1.214 +
   1.215 +    return this._aggregatePromise = Task.spawn(function* () {
   1.216 +      if (this._aggregatePromise) {
   1.217 +        return this._aggregatePromise;
   1.218 +      }
   1.219 +
   1.220 +      try {
   1.221 +        let unprocessedFiles = yield this._getUnprocessedEventsFiles();
   1.222 +
   1.223 +        let deletePaths = [];
   1.224 +        let needsSave = false;
   1.225 +
   1.226 +        this._storeProtectedCount++;
   1.227 +        for (let entry of unprocessedFiles) {
   1.228 +          try {
   1.229 +            let result = yield this._processEventFile(entry);
   1.230 +
   1.231 +            switch (result) {
   1.232 +              case this.EVENT_FILE_SUCCESS:
   1.233 +                needsSave = true;
   1.234 +                // Fall through.
   1.235 +
   1.236 +              case this.EVENT_FILE_ERROR_MALFORMED:
   1.237 +                deletePaths.push(entry.path);
   1.238 +                break;
   1.239 +
   1.240 +              case this.EVENT_FILE_ERROR_UNKNOWN_EVENT:
   1.241 +                break;
   1.242 +
   1.243 +              default:
   1.244 +                Cu.reportError("Unhandled crash event file return code. Please " +
   1.245 +                               "file a bug: " + result);
   1.246 +            }
   1.247 +          } catch (ex if ex instanceof OS.File.Error) {
   1.248 +            this._log.warn("I/O error reading " + entry.path + ": " +
   1.249 +                           CommonUtils.exceptionStr(ex));
   1.250 +          } catch (ex) {
   1.251 +            // We should never encounter an exception. This likely represents
   1.252 +            // a coding error because all errors should be detected and
   1.253 +            // converted to return codes.
   1.254 +            //
   1.255 +            // If we get here, report the error and delete the source file
   1.256 +            // so we don't see it again.
   1.257 +            Cu.reportError("Exception when processing crash event file: " +
   1.258 +                           CommonUtils.exceptionStr(ex));
   1.259 +            deletePaths.push(entry.path);
   1.260 +          }
   1.261 +        }
   1.262 +
   1.263 +        if (needsSave) {
   1.264 +          let store = yield this._getStore();
   1.265 +          yield store.save();
   1.266 +        }
   1.267 +
   1.268 +        for (let path of deletePaths) {
   1.269 +          try {
   1.270 +            yield OS.File.remove(path);
   1.271 +          } catch (ex) {
   1.272 +            this._log.warn("Error removing event file (" + path + "): " +
   1.273 +                           CommonUtils.exceptionStr(ex));
   1.274 +          }
   1.275 +        }
   1.276 +
   1.277 +        return unprocessedFiles.length;
   1.278 +
   1.279 +      } finally {
   1.280 +        this._aggregatePromise = false;
   1.281 +        this._storeProtectedCount--;
   1.282 +      }
   1.283 +    }.bind(this));
   1.284 +  },
   1.285 +
   1.286 +  /**
   1.287 +   * Prune old crash data.
   1.288 +   *
   1.289 +   * @param date
   1.290 +   *        (Date) The cutoff point for pruning. Crashes without data newer
   1.291 +   *        than this will be pruned.
   1.292 +   */
   1.293 +  pruneOldCrashes: function (date) {
   1.294 +    return Task.spawn(function* () {
   1.295 +      let store = yield this._getStore();
   1.296 +      store.pruneOldCrashes(date);
   1.297 +      yield store.save();
   1.298 +    }.bind(this));
   1.299 +  },
   1.300 +
   1.301 +  /**
   1.302 +   * Run tasks that should be periodically performed.
   1.303 +   */
   1.304 +  runMaintenanceTasks: function () {
   1.305 +    return Task.spawn(function* () {
   1.306 +      yield this.aggregateEventsFiles();
   1.307 +
   1.308 +      let offset = this.PURGE_OLDER_THAN_DAYS * MILLISECONDS_IN_DAY;
   1.309 +      yield this.pruneOldCrashes(new Date(Date.now() - offset));
   1.310 +    }.bind(this));
   1.311 +  },
   1.312 +
   1.313 +  /**
   1.314 +   * Schedule maintenance tasks for some point in the future.
   1.315 +   *
   1.316 +   * @param delay
   1.317 +   *        (integer) Delay in milliseconds when maintenance should occur.
   1.318 +   */
   1.319 +  scheduleMaintenance: function (delay) {
   1.320 +    let deferred = Promise.defer();
   1.321 +
   1.322 +    setTimeout(() => {
   1.323 +      this.runMaintenanceTasks().then(deferred.resolve, deferred.reject);
   1.324 +    }, delay);
   1.325 +
   1.326 +    return deferred.promise;
   1.327 +  },
   1.328 +
   1.329 +  /**
   1.330 +   * Obtain the paths of all unprocessed events files.
   1.331 +   *
   1.332 +   * The promise-resolved array is sorted by file mtime, oldest to newest.
   1.333 +   */
   1.334 +  _getUnprocessedEventsFiles: function () {
   1.335 +    return Task.spawn(function* () {
   1.336 +      let entries = [];
   1.337 +
   1.338 +      for (let dir of this._eventsDirs) {
   1.339 +        for (let e of yield this._getDirectoryEntries(dir, this.ALL_REGEX)) {
   1.340 +          entries.push(e);
   1.341 +        }
   1.342 +      }
   1.343 +
   1.344 +      entries.sort((a, b) => { return a.date - b.date; });
   1.345 +
   1.346 +      return entries;
   1.347 +    }.bind(this));
   1.348 +  },
   1.349 +
   1.350 +  // See docs/crash-events.rst for the file format specification.
   1.351 +  _processEventFile: function (entry) {
   1.352 +    return Task.spawn(function* () {
   1.353 +      let data = yield OS.File.read(entry.path);
   1.354 +      let store = yield this._getStore();
   1.355 +
   1.356 +      let decoder = new TextDecoder();
   1.357 +      data = decoder.decode(data);
   1.358 +
   1.359 +      let type, time, payload;
   1.360 +      let start = 0;
   1.361 +      for (let i = 0; i < 2; i++) {
   1.362 +        let index = data.indexOf("\n", start);
   1.363 +        if (index == -1) {
   1.364 +          return this.EVENT_FILE_ERROR_MALFORMED;
   1.365 +        }
   1.366 +
   1.367 +        let sub = data.substring(start, index);
   1.368 +        switch (i) {
   1.369 +          case 0:
   1.370 +            type = sub;
   1.371 +            break;
   1.372 +          case 1:
   1.373 +            time = sub;
   1.374 +            try {
   1.375 +              time = parseInt(time, 10);
   1.376 +            } catch (ex) {
   1.377 +              return this.EVENT_FILE_ERROR_MALFORMED;
   1.378 +            }
   1.379 +        }
   1.380 +
   1.381 +        start = index + 1;
   1.382 +      }
   1.383 +      let date = new Date(time * 1000);
   1.384 +      let payload = data.substring(start);
   1.385 +
   1.386 +      return this._handleEventFilePayload(store, entry, type, date, payload);
   1.387 +    }.bind(this));
   1.388 +  },
   1.389 +
   1.390 +  _handleEventFilePayload: function (store, entry, type, date, payload) {
   1.391 +      // The payload types and formats are documented in docs/crash-events.rst.
   1.392 +      // Do not change the format of an existing type. Instead, invent a new
   1.393 +      // type.
   1.394 +
   1.395 +      let eventMap = {
   1.396 +        "crash.main.1": "addMainProcessCrash",
   1.397 +        "crash.plugin.1": "addPluginCrash",
   1.398 +        "hang.plugin.1": "addPluginHang",
   1.399 +      };
   1.400 +
   1.401 +      if (type in eventMap) {
   1.402 +        let lines = payload.split("\n");
   1.403 +        if (lines.length > 1) {
   1.404 +          this._log.warn("Multiple lines unexpected in payload for " +
   1.405 +                         entry.path);
   1.406 +          return this.EVENT_FILE_ERROR_MALFORMED;
   1.407 +        }
   1.408 +
   1.409 +        store[eventMap[type]](payload, date);
   1.410 +        return this.EVENT_FILE_SUCCESS;
   1.411 +      }
   1.412 +
   1.413 +      // DO NOT ADD NEW TYPES WITHOUT DOCUMENTING!
   1.414 +
   1.415 +      return this.EVENT_FILE_ERROR_UNKNOWN_EVENT;
   1.416 +  },
   1.417 +
   1.418 +  /**
   1.419 +   * The resolved promise is an array of objects with the properties:
   1.420 +   *
   1.421 +   *   path -- String filename
   1.422 +   *   id -- regexp.match()[1] (likely the crash ID)
   1.423 +   *   date -- Date mtime of the file
   1.424 +   */
   1.425 +  _getDirectoryEntries: function (path, re) {
   1.426 +    return Task.spawn(function* () {
   1.427 +      try {
   1.428 +        yield OS.File.stat(path);
   1.429 +      } catch (ex if ex instanceof OS.File.Error && ex.becauseNoSuchFile) {
   1.430 +          return [];
   1.431 +      }
   1.432 +
   1.433 +      let it = new OS.File.DirectoryIterator(path);
   1.434 +      let entries = [];
   1.435 +
   1.436 +      try {
   1.437 +        yield it.forEach((entry, index, it) => {
   1.438 +          if (entry.isDir) {
   1.439 +            return;
   1.440 +          }
   1.441 +
   1.442 +          let match = re.exec(entry.name);
   1.443 +          if (!match) {
   1.444 +            return;
   1.445 +          }
   1.446 +
   1.447 +          return OS.File.stat(entry.path).then((info) => {
   1.448 +            entries.push({
   1.449 +              path: entry.path,
   1.450 +              id: match[1],
   1.451 +              date: info.lastModificationDate,
   1.452 +            });
   1.453 +          });
   1.454 +        });
   1.455 +      } finally {
   1.456 +        it.close();
   1.457 +      }
   1.458 +
   1.459 +      entries.sort((a, b) => { return a.date - b.date; });
   1.460 +
   1.461 +      return entries;
   1.462 +    }.bind(this));
   1.463 +  },
   1.464 +
   1.465 +  _getStore: function () {
   1.466 +    return Task.spawn(function* () {
   1.467 +      if (!this._store) {
   1.468 +        yield OS.File.makeDir(this._storeDir, {
   1.469 +          ignoreExisting: true,
   1.470 +          unixMode: OS.Constants.libc.S_IRWXU,
   1.471 +        });
   1.472 +
   1.473 +        let store = new CrashStore(this._storeDir, this._telemetryStoreSizeKey);
   1.474 +        yield store.load();
   1.475 +
   1.476 +        this._store = store;
   1.477 +        this._storeTimer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
   1.478 +      }
   1.479 +
   1.480 +      // The application can go long periods without interacting with the
   1.481 +      // store. Since the store takes up resources, we automatically "free"
   1.482 +      // the store after inactivity so resources can be returned to the system.
   1.483 +      // We do this via a timer and a mechanism that tracks when the store
   1.484 +      // is being accessed.
   1.485 +      this._storeTimer.cancel();
   1.486 +
   1.487 +      // This callback frees resources from the store unless the store
   1.488 +      // is protected from freeing by some other process.
   1.489 +      let timerCB = function () {
   1.490 +        if (this._storeProtectedCount) {
   1.491 +          this._storeTimer.initWithCallback(timerCB, this.STORE_EXPIRATION_MS,
   1.492 +                                            this._storeTimer.TYPE_ONE_SHOT);
   1.493 +          return;
   1.494 +        }
   1.495 +
   1.496 +        // We kill the reference that we hold. GC will kill it later. If
   1.497 +        // someone else holds a reference, that will prevent GC until that
   1.498 +        // reference is gone.
   1.499 +        this._store = null;
   1.500 +        this._storeTimer = null;
   1.501 +      }.bind(this);
   1.502 +
   1.503 +      this._storeTimer.initWithCallback(timerCB, this.STORE_EXPIRATION_MS,
   1.504 +                                        this._storeTimer.TYPE_ONE_SHOT);
   1.505 +
   1.506 +      return this._store;
   1.507 +    }.bind(this));
   1.508 +  },
   1.509 +
   1.510 +  /**
   1.511 +   * Obtain information about all known crashes.
   1.512 +   *
   1.513 +   * Returns an array of CrashRecord instances. Instances are read-only.
   1.514 +   */
   1.515 +  getCrashes: function () {
   1.516 +    return Task.spawn(function* () {
   1.517 +      let store = yield this._getStore();
   1.518 +
   1.519 +      return store.crashes;
   1.520 +    }.bind(this));
   1.521 +  },
   1.522 +
   1.523 +  getCrashCountsByDay: function () {
   1.524 +    return Task.spawn(function* () {
   1.525 +      let store = yield this._getStore();
   1.526 +
   1.527 +      return store._countsByDay;
   1.528 +    }.bind(this));
   1.529 +  },
   1.530 +});
   1.531 +
   1.532 +let gCrashManager;
   1.533 +
   1.534 +/**
   1.535 + * Interface to storage of crash data.
   1.536 + *
   1.537 + * This type handles storage of crash metadata. It exists as a separate type
   1.538 + * from the crash manager for performance reasons: since all crash metadata
   1.539 + * needs to be loaded into memory for access, we wish to easily dispose of all
   1.540 + * associated memory when this data is no longer needed. Having an isolated
   1.541 + * object whose references can easily be lost faciliates that simple disposal.
   1.542 + *
   1.543 + * When metadata is updated, the caller must explicitly persist the changes
   1.544 + * to disk. This prevents excessive I/O during updates.
   1.545 + *
   1.546 + * The store has a mechanism for ensuring it doesn't grow too large. A ceiling
   1.547 + * is placed on the number of daily events that can occur for events that can
   1.548 + * occur with relatively high frequency, notably plugin crashes and hangs
   1.549 + * (plugins can enter cycles where they repeatedly crash). If we've reached
   1.550 + * the high water mark and new data arrives, it's silently dropped.
   1.551 + * However, the count of actual events is always preserved. This allows
   1.552 + * us to report on the severity of problems beyond the storage threshold.
   1.553 + *
   1.554 + * Main process crashes are excluded from limits because they are both
   1.555 + * important and should be rare.
   1.556 + *
   1.557 + * @param storeDir (string)
   1.558 + *        Directory the store should be located in.
   1.559 + * @param telemetrySizeKey (string)
   1.560 + *        The telemetry histogram that should be used to store the size
   1.561 + *        of the data file.
   1.562 + */
   1.563 +function CrashStore(storeDir, telemetrySizeKey) {
   1.564 +  this._storeDir = storeDir;
   1.565 +  this._telemetrySizeKey = telemetrySizeKey;
   1.566 +
   1.567 +  this._storePath = OS.Path.join(storeDir, "store.json.mozlz4");
   1.568 +
   1.569 +  // Holds the read data from disk.
   1.570 +  this._data = null;
   1.571 +
   1.572 +  // Maps days since UNIX epoch to a Map of event types to counts.
   1.573 +  // This data structure is populated when the JSON file is loaded
   1.574 +  // and is also updated when new events are added.
   1.575 +  this._countsByDay = new Map();
   1.576 +}
   1.577 +
   1.578 +CrashStore.prototype = Object.freeze({
   1.579 +  // A crash that occurred in the main process.
   1.580 +  TYPE_MAIN_CRASH: "main-crash",
   1.581 +
   1.582 +  // A crash in a plugin process.
   1.583 +  TYPE_PLUGIN_CRASH: "plugin-crash",
   1.584 +
   1.585 +  // A hang in a plugin process.
   1.586 +  TYPE_PLUGIN_HANG: "plugin-hang",
   1.587 +
   1.588 +  // Maximum number of events to store per day. This establishes a
   1.589 +  // ceiling on the per-type/per-day records that will be stored.
   1.590 +  HIGH_WATER_DAILY_THRESHOLD: 100,
   1.591 +
   1.592 +  /**
   1.593 +   * Load data from disk.
   1.594 +   *
   1.595 +   * @return Promise
   1.596 +   */
   1.597 +  load: function () {
   1.598 +    return Task.spawn(function* () {
   1.599 +      // Loading replaces data. So reset data structures.
   1.600 +      this._data = {
   1.601 +        v: 1,
   1.602 +        crashes: new Map(),
   1.603 +        corruptDate: null,
   1.604 +      };
   1.605 +      this._countsByDay = new Map();
   1.606 +
   1.607 +      try {
   1.608 +        let decoder = new TextDecoder();
   1.609 +        let data = yield OS.File.read(this._storePath, {compression: "lz4"});
   1.610 +        data = JSON.parse(decoder.decode(data));
   1.611 +
   1.612 +        if (data.corruptDate) {
   1.613 +          this._data.corruptDate = new Date(data.corruptDate);
   1.614 +        }
   1.615 +
   1.616 +        // actualCounts is used to validate that the derived counts by
   1.617 +        // days stored in the payload matches up to actual data.
   1.618 +        let actualCounts = new Map();
   1.619 +
   1.620 +        for (let id in data.crashes) {
   1.621 +          let crash = data.crashes[id];
   1.622 +          let denormalized = this._denormalize(crash);
   1.623 +
   1.624 +          this._data.crashes.set(id, denormalized);
   1.625 +
   1.626 +          let key = dateToDays(denormalized.crashDate) + "-" + denormalized.type;
   1.627 +          actualCounts.set(key, (actualCounts.get(key) || 0) + 1);
   1.628 +        }
   1.629 +
   1.630 +        // The validation in this loop is arguably not necessary. We perform
   1.631 +        // it as a defense against unknown bugs.
   1.632 +        for (let dayKey in data.countsByDay) {
   1.633 +          let day = parseInt(dayKey, 10);
   1.634 +          for (let type in data.countsByDay[day]) {
   1.635 +            this._ensureCountsForDay(day);
   1.636 +
   1.637 +            let count = data.countsByDay[day][type];
   1.638 +            let key = day + "-" + type;
   1.639 +
   1.640 +            // If the payload says we have data for a given day but we
   1.641 +            // don't, the payload is wrong. Ignore it.
   1.642 +            if (!actualCounts.has(key)) {
   1.643 +              continue;
   1.644 +            }
   1.645 +
   1.646 +            // If we encountered more data in the payload than what the
   1.647 +            // data structure says, use the proper value.
   1.648 +            count = Math.max(count, actualCounts.get(key));
   1.649 +
   1.650 +            this._countsByDay.get(day).set(type, count);
   1.651 +          }
   1.652 +        }
   1.653 +      } catch (ex if ex instanceof OS.File.Error && ex.becauseNoSuchFile) {
   1.654 +        // Missing files (first use) are allowed.
   1.655 +      } catch (ex) {
   1.656 +        // If we can't load for any reason, mark a corrupt date in the instance
   1.657 +        // and swallow the error.
   1.658 +        //
   1.659 +        // The marking of a corrupted file is intentionally not persisted to
   1.660 +        // disk yet. Instead, we wait until the next save(). This is to give
   1.661 +        // non-permanent failures the opportunity to recover on their own.
   1.662 +        this._data.corruptDate = new Date();
   1.663 +      }
   1.664 +    }.bind(this));
   1.665 +  },
   1.666 +
   1.667 +  /**
   1.668 +   * Save data to disk.
   1.669 +   *
   1.670 +   * @return Promise<null>
   1.671 +   */
   1.672 +  save: function () {
   1.673 +    return Task.spawn(function* () {
   1.674 +      if (!this._data) {
   1.675 +        return;
   1.676 +      }
   1.677 +
   1.678 +      let normalized = {
   1.679 +        // The version should be incremented whenever the format
   1.680 +        // changes.
   1.681 +        v: 1,
   1.682 +        // Maps crash IDs to objects defining the crash.
   1.683 +        crashes: {},
   1.684 +        // Maps days since UNIX epoch to objects mapping event types to
   1.685 +        // counts. This is a mirror of this._countsByDay. e.g.
   1.686 +        // {
   1.687 +        //    15000: {
   1.688 +        //        "main-crash": 2,
   1.689 +        //        "plugin-crash": 1
   1.690 +        //    }
   1.691 +        // }
   1.692 +        countsByDay: {},
   1.693 +
   1.694 +        // When the store was last corrupted.
   1.695 +        corruptDate: null,
   1.696 +      };
   1.697 +
   1.698 +      if (this._data.corruptDate) {
   1.699 +        normalized.corruptDate = this._data.corruptDate.getTime();
   1.700 +      }
   1.701 +
   1.702 +      for (let [id, crash] of this._data.crashes) {
   1.703 +        let c = this._normalize(crash);
   1.704 +        normalized.crashes[id] = c;
   1.705 +      }
   1.706 +
   1.707 +      for (let [day, m] of this._countsByDay) {
   1.708 +        normalized.countsByDay[day] = {};
   1.709 +        for (let [type, count] of m) {
   1.710 +          normalized.countsByDay[day][type] = count;
   1.711 +        }
   1.712 +      }
   1.713 +
   1.714 +      let encoder = new TextEncoder();
   1.715 +      let data = encoder.encode(JSON.stringify(normalized));
   1.716 +      let size = yield OS.File.writeAtomic(this._storePath, data, {
   1.717 +                                           tmpPath: this._storePath + ".tmp",
   1.718 +                                           compression: "lz4"});
   1.719 +      if (this._telemetrySizeKey) {
   1.720 +        Services.telemetry.getHistogramById(this._telemetrySizeKey).add(size);
   1.721 +      }
   1.722 +    }.bind(this));
   1.723 +  },
   1.724 +
   1.725 +  /**
   1.726 +   * Normalize an object into one fit for serialization.
   1.727 +   *
   1.728 +   * This function along with _denormalize() serve to hack around the
   1.729 +   * default handling of Date JSON serialization because Date serialization
   1.730 +   * is undefined by JSON.
   1.731 +   *
   1.732 +   * Fields ending with "Date" are assumed to contain Date instances.
   1.733 +   * We convert these to milliseconds since epoch on output and back to
   1.734 +   * Date on input.
   1.735 +   */
   1.736 +  _normalize: function (o) {
   1.737 +    let normalized = {};
   1.738 +
   1.739 +    for (let k in o) {
   1.740 +      let v = o[k];
   1.741 +      if (v && k.endsWith("Date")) {
   1.742 +        normalized[k] = v.getTime();
   1.743 +      } else {
   1.744 +        normalized[k] = v;
   1.745 +      }
   1.746 +    }
   1.747 +
   1.748 +    return normalized;
   1.749 +  },
   1.750 +
   1.751 +  /**
   1.752 +   * Convert a serialized object back to its native form.
   1.753 +   */
   1.754 +  _denormalize: function (o) {
   1.755 +    let n = {};
   1.756 +
   1.757 +    for (let k in o) {
   1.758 +      let v = o[k];
   1.759 +      if (v && k.endsWith("Date")) {
   1.760 +        n[k] = new Date(parseInt(v, 10));
   1.761 +      } else {
   1.762 +        n[k] = v;
   1.763 +      }
   1.764 +    }
   1.765 +
   1.766 +    return n;
   1.767 +  },
   1.768 +
   1.769 +  /**
   1.770 +   * Prune old crash data.
   1.771 +   *
   1.772 +   * Crashes without recent activity are pruned from the store so the
   1.773 +   * size of the store is not unbounded. If there is activity on a crash,
   1.774 +   * that activity will keep the crash and all its data around for longer.
   1.775 +   *
   1.776 +   * @param date
   1.777 +   *        (Date) The cutoff at which data will be pruned. If an entry
   1.778 +   *        doesn't have data newer than this, it will be pruned.
   1.779 +   */
   1.780 +  pruneOldCrashes: function (date) {
   1.781 +    for (let crash of this.crashes) {
   1.782 +      let newest = crash.newestDate;
   1.783 +      if (!newest || newest.getTime() < date.getTime()) {
   1.784 +        this._data.crashes.delete(crash.id);
   1.785 +      }
   1.786 +    }
   1.787 +  },
   1.788 +
   1.789 +  /**
   1.790 +   * Date the store was last corrupted and required a reset.
   1.791 +   *
   1.792 +   * May be null (no corruption has ever occurred) or a Date instance.
   1.793 +   */
   1.794 +  get corruptDate() {
   1.795 +    return this._data.corruptDate;
   1.796 +  },
   1.797 +
   1.798 +  /**
   1.799 +   * The number of distinct crashes tracked.
   1.800 +   */
   1.801 +  get crashesCount() {
   1.802 +    return this._data.crashes.size;
   1.803 +  },
   1.804 +
   1.805 +  /**
   1.806 +   * All crashes tracked.
   1.807 +   *
   1.808 +   * This is an array of CrashRecord.
   1.809 +   */
   1.810 +  get crashes() {
   1.811 +    let crashes = [];
   1.812 +    for (let [id, crash] of this._data.crashes) {
   1.813 +      crashes.push(new CrashRecord(crash));
   1.814 +    }
   1.815 +
   1.816 +    return crashes;
   1.817 +  },
   1.818 +
   1.819 +  /**
   1.820 +   * Obtain a particular crash from its ID.
   1.821 +   *
   1.822 +   * A CrashRecord will be returned if the crash exists. null will be returned
   1.823 +   * if the crash is unknown.
   1.824 +   */
   1.825 +  getCrash: function (id) {
   1.826 +    for (let crash of this.crashes) {
   1.827 +      if (crash.id == id) {
   1.828 +        return crash;
   1.829 +      }
   1.830 +    }
   1.831 +
   1.832 +    return null;
   1.833 +  },
   1.834 +
   1.835 +  _ensureCountsForDay: function (day) {
   1.836 +    if (!this._countsByDay.has(day)) {
   1.837 +      this._countsByDay.set(day, new Map());
   1.838 +    }
   1.839 +  },
   1.840 +
   1.841 +  /**
   1.842 +   * Ensure the crash record is present in storage.
   1.843 +   *
   1.844 +   * Returns the crash record if we're allowed to store it or null
   1.845 +   * if we've hit the high water mark.
   1.846 +   *
   1.847 +   * @param id
   1.848 +   *        (string) The crash ID.
   1.849 +   * @param type
   1.850 +   *        (string) One of the this.TYPE_* constants describing the crash type.
   1.851 +   * @param date
   1.852 +   *        (Date) When this crash occurred.
   1.853 +   *
   1.854 +   * @return null | object crash record
   1.855 +   */
   1.856 +  _ensureCrashRecord: function (id, type, date) {
   1.857 +    let day = dateToDays(date);
   1.858 +    this._ensureCountsForDay(day);
   1.859 +
   1.860 +    let count = (this._countsByDay.get(day).get(type) || 0) + 1;
   1.861 +    this._countsByDay.get(day).set(type, count);
   1.862 +
   1.863 +    if (count > this.HIGH_WATER_DAILY_THRESHOLD && type != this.TYPE_MAIN_CRASH) {
   1.864 +      return null;
   1.865 +    }
   1.866 +
   1.867 +    if (!this._data.crashes.has(id)) {
   1.868 +      this._data.crashes.set(id, {
   1.869 +        id: id,
   1.870 +        type: type,
   1.871 +        crashDate: date,
   1.872 +      });
   1.873 +    }
   1.874 +
   1.875 +    let crash = this._data.crashes.get(id);
   1.876 +    crash.type = type;
   1.877 +    crash.date = date;
   1.878 +
   1.879 +    return crash;
   1.880 +  },
   1.881 +
   1.882 +  /**
   1.883 +   * Record the occurrence of a crash in the main process.
   1.884 +   *
   1.885 +   * @param id (string) Crash ID. Likely a UUID.
   1.886 +   * @param date (Date) When the crash occurred.
   1.887 +   */
   1.888 +  addMainProcessCrash: function (id, date) {
   1.889 +    this._ensureCrashRecord(id, this.TYPE_MAIN_CRASH, date);
   1.890 +  },
   1.891 +
   1.892 +  /**
   1.893 +   * Record the occurrence of a crash in a plugin process.
   1.894 +   *
   1.895 +   * @param id (string) Crash ID. Likely a UUID.
   1.896 +   * @param date (Date) When the crash occurred.
   1.897 +   */
   1.898 +  addPluginCrash: function (id, date) {
   1.899 +    this._ensureCrashRecord(id, this.TYPE_PLUGIN_CRASH, date);
   1.900 +  },
   1.901 +
   1.902 +  /**
   1.903 +   * Record the occurrence of a hang in a plugin process.
   1.904 +   *
   1.905 +   * @param id (string) Crash ID. Likely a UUID.
   1.906 +   * @param date (Date) When the hang was reported.
   1.907 +   */
   1.908 +  addPluginHang: function (id, date) {
   1.909 +    this._ensureCrashRecord(id, this.TYPE_PLUGIN_HANG, date);
   1.910 +  },
   1.911 +
   1.912 +  get mainProcessCrashes() {
   1.913 +    let crashes = [];
   1.914 +    for (let crash of this.crashes) {
   1.915 +      if (crash.isMainProcessCrash) {
   1.916 +        crashes.push(crash);
   1.917 +      }
   1.918 +    }
   1.919 +
   1.920 +    return crashes;
   1.921 +  },
   1.922 +
   1.923 +  get pluginCrashes() {
   1.924 +    let crashes = [];
   1.925 +    for (let crash of this.crashes) {
   1.926 +      if (crash.isPluginCrash) {
   1.927 +        crashes.push(crash);
   1.928 +      }
   1.929 +    }
   1.930 +
   1.931 +    return crashes;
   1.932 +  },
   1.933 +
   1.934 +  get pluginHangs() {
   1.935 +    let crashes = [];
   1.936 +    for (let crash of this.crashes) {
   1.937 +      if (crash.isPluginHang) {
   1.938 +        crashes.push(crash);
   1.939 +      }
   1.940 +    }
   1.941 +
   1.942 +    return crashes;
   1.943 +  },
   1.944 +});
   1.945 +
   1.946 +/**
   1.947 + * Represents an individual crash with metadata.
   1.948 + *
   1.949 + * This is a wrapper around the low-level anonymous JS objects that define
   1.950 + * crashes. It exposes a consistent and helpful API.
   1.951 + *
   1.952 + * Instances of this type should only be constructured inside this module,
   1.953 + * not externally. The constructor is not considered a public API.
   1.954 + *
   1.955 + * @param o (object)
   1.956 + *        The crash's entry from the CrashStore.
   1.957 + */
   1.958 +function CrashRecord(o) {
   1.959 +  this._o = o;
   1.960 +}
   1.961 +
   1.962 +CrashRecord.prototype = Object.freeze({
   1.963 +  get id() {
   1.964 +    return this._o.id;
   1.965 +  },
   1.966 +
   1.967 +  get crashDate() {
   1.968 +    return this._o.crashDate;
   1.969 +  },
   1.970 +
   1.971 +  /**
   1.972 +   * Obtain the newest date in this record.
   1.973 +   *
   1.974 +   * This is a convenience getter. The returned value is used to determine when
   1.975 +   * to expire a record.
   1.976 +   */
   1.977 +  get newestDate() {
   1.978 +    // We currently only have 1 date, so this is easy.
   1.979 +    return this._o.crashDate;
   1.980 +  },
   1.981 +
   1.982 +  get oldestDate() {
   1.983 +    return this._o.crashDate;
   1.984 +  },
   1.985 +
   1.986 +  get type() {
   1.987 +    return this._o.type;
   1.988 +  },
   1.989 +
   1.990 +  get isMainProcessCrash() {
   1.991 +    return this._o.type == CrashStore.prototype.TYPE_MAIN_CRASH;
   1.992 +  },
   1.993 +
   1.994 +  get isPluginCrash() {
   1.995 +    return this._o.type == CrashStore.prototype.TYPE_PLUGIN_CRASH;
   1.996 +  },
   1.997 +
   1.998 +  get isPluginHang() {
   1.999 +    return this._o.type == CrashStore.prototype.TYPE_PLUGIN_HANG;
  1.1000 +  },
  1.1001 +});
  1.1002 +
  1.1003 +/**
  1.1004 + * Obtain the global CrashManager instance used by the running application.
  1.1005 + *
  1.1006 + * CrashManager is likely only ever instantiated once per application lifetime.
  1.1007 + * The main reason it's implemented as a reusable type is to facilitate testing.
  1.1008 + */
  1.1009 +XPCOMUtils.defineLazyGetter(this.CrashManager, "Singleton", function () {
  1.1010 +  if (gCrashManager) {
  1.1011 +    return gCrashManager;
  1.1012 +  }
  1.1013 +
  1.1014 +  let crPath = OS.Path.join(OS.Constants.Path.userApplicationDataDir,
  1.1015 +                            "Crash Reports");
  1.1016 +  let storePath = OS.Path.join(OS.Constants.Path.profileDir, "crashes");
  1.1017 +
  1.1018 +  gCrashManager = new CrashManager({
  1.1019 +    pendingDumpsDir: OS.Path.join(crPath, "pending"),
  1.1020 +    submittedDumpsDir: OS.Path.join(crPath, "submitted"),
  1.1021 +    eventsDirs: [OS.Path.join(crPath, "events"), OS.Path.join(storePath, "events")],
  1.1022 +    storeDir: storePath,
  1.1023 +    telemetryStoreSizeKey: "CRASH_STORE_COMPRESSED_BYTES",
  1.1024 +  });
  1.1025 +
  1.1026 +  // Automatically aggregate event files shortly after startup. This
  1.1027 +  // ensures it happens with some frequency.
  1.1028 +  //
  1.1029 +  // There are performance considerations here. While this is doing
  1.1030 +  // work and could negatively impact performance, the amount of work
  1.1031 +  // is kept small per run by periodically aggregating event files.
  1.1032 +  // Furthermore, well-behaving installs should not have much work
  1.1033 +  // here to do. If there is a lot of work, that install has bigger
  1.1034 +  // issues beyond reduced performance near startup.
  1.1035 +  gCrashManager.scheduleMaintenance(AGGREGATE_STARTUP_DELAY_MS);
  1.1036 +
  1.1037 +  return gCrashManager;
  1.1038 +});

mercurial