toolkit/components/crashes/CrashManager.jsm

Fri, 16 Jan 2015 18:13:44 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 18:13:44 +0100
branch
TOR_BUG_9701
changeset 14
925c144e1f1f
permissions
-rw-r--r--

Integrate suggestion from review to improve consistency with existing code.

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4
michael@0 5 "use strict";
michael@0 6
michael@0 7 const {classes: Cc, interfaces: Ci, utils: Cu} = Components;
michael@0 8
michael@0 9 Cu.import("resource://gre/modules/Log.jsm", this);
michael@0 10 Cu.import("resource://gre/modules/osfile.jsm", this)
michael@0 11 Cu.import("resource://gre/modules/Promise.jsm", this);
michael@0 12 Cu.import("resource://gre/modules/Services.jsm", this);
michael@0 13 Cu.import("resource://gre/modules/Task.jsm", this);
michael@0 14 Cu.import("resource://gre/modules/Timer.jsm", this);
michael@0 15 Cu.import("resource://gre/modules/XPCOMUtils.jsm", this);
michael@0 16 Cu.import("resource://services-common/utils.js", this);
michael@0 17
michael@0 18 this.EXPORTED_SYMBOLS = [
michael@0 19 "CrashManager",
michael@0 20 ];
michael@0 21
michael@0 22 /**
michael@0 23 * How long to wait after application startup before crash event files are
michael@0 24 * automatically aggregated.
michael@0 25 *
michael@0 26 * We defer aggregation for performance reasons, as we don't want too many
michael@0 27 * services competing for I/O immediately after startup.
michael@0 28 */
michael@0 29 const AGGREGATE_STARTUP_DELAY_MS = 57000;
michael@0 30
michael@0 31 const MILLISECONDS_IN_DAY = 24 * 60 * 60 * 1000;
michael@0 32
michael@0 33 // Converts Date to days since UNIX epoch.
michael@0 34 // This was copied from /services/metrics.storage.jsm. The implementation
michael@0 35 // does not account for leap seconds.
michael@0 36 function dateToDays(date) {
michael@0 37 return Math.floor(date.getTime() / MILLISECONDS_IN_DAY);
michael@0 38 }
michael@0 39
michael@0 40
michael@0 41 /**
michael@0 42 * A gateway to crash-related data.
michael@0 43 *
michael@0 44 * This type is generic and can be instantiated any number of times.
michael@0 45 * However, most applications will typically only have one instance
michael@0 46 * instantiated and that instance will point to profile and user appdata
michael@0 47 * directories.
michael@0 48 *
michael@0 49 * Instances are created by passing an object with properties.
michael@0 50 * Recognized properties are:
michael@0 51 *
michael@0 52 * pendingDumpsDir (string) (required)
michael@0 53 * Where dump files that haven't been uploaded are located.
michael@0 54 *
michael@0 55 * submittedDumpsDir (string) (required)
michael@0 56 * Where records of uploaded dumps are located.
michael@0 57 *
michael@0 58 * eventsDirs (array)
michael@0 59 * Directories (defined as strings) where events files are written. This
michael@0 60 * instance will collects events from files in the directories specified.
michael@0 61 *
michael@0 62 * storeDir (string)
michael@0 63 * Directory we will use for our data store. This instance will write
michael@0 64 * data files into the directory specified.
michael@0 65 *
michael@0 66 * telemetryStoreSizeKey (string)
michael@0 67 * Telemetry histogram to report store size under.
michael@0 68 */
michael@0 69 this.CrashManager = function (options) {
michael@0 70 for (let k of ["pendingDumpsDir", "submittedDumpsDir", "eventsDirs",
michael@0 71 "storeDir"]) {
michael@0 72 if (!(k in options)) {
michael@0 73 throw new Error("Required key not present in options: " + k);
michael@0 74 }
michael@0 75 }
michael@0 76
michael@0 77 this._log = Log.repository.getLogger("Crashes.CrashManager");
michael@0 78
michael@0 79 for (let k in options) {
michael@0 80 let v = options[k];
michael@0 81
michael@0 82 switch (k) {
michael@0 83 case "pendingDumpsDir":
michael@0 84 this._pendingDumpsDir = v;
michael@0 85 break;
michael@0 86
michael@0 87 case "submittedDumpsDir":
michael@0 88 this._submittedDumpsDir = v;
michael@0 89 break;
michael@0 90
michael@0 91 case "eventsDirs":
michael@0 92 this._eventsDirs = v;
michael@0 93 break;
michael@0 94
michael@0 95 case "storeDir":
michael@0 96 this._storeDir = v;
michael@0 97 break;
michael@0 98
michael@0 99 case "telemetryStoreSizeKey":
michael@0 100 this._telemetryStoreSizeKey = v;
michael@0 101 break;
michael@0 102
michael@0 103 default:
michael@0 104 throw new Error("Unknown property in options: " + k);
michael@0 105 }
michael@0 106 }
michael@0 107
michael@0 108 // Promise for in-progress aggregation operation. We store it on the
michael@0 109 // object so it can be returned for in-progress operations.
michael@0 110 this._aggregatePromise = null;
michael@0 111
michael@0 112 // The CrashStore currently attached to this object.
michael@0 113 this._store = null;
michael@0 114
michael@0 115 // The timer controlling the expiration of the CrashStore instance.
michael@0 116 this._storeTimer = null;
michael@0 117
michael@0 118 // This is a semaphore that prevents the store from being freed by our
michael@0 119 // timer-based resource freeing mechanism.
michael@0 120 this._storeProtectedCount = 0;
michael@0 121 };
michael@0 122
michael@0 123 this.CrashManager.prototype = Object.freeze({
michael@0 124 DUMP_REGEX: /^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.dmp$/i,
michael@0 125 SUBMITTED_REGEX: /^bp-(?:hr-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.txt$/i,
michael@0 126 ALL_REGEX: /^(.*)$/,
michael@0 127
michael@0 128 // How long the store object should persist in memory before being
michael@0 129 // automatically garbage collected.
michael@0 130 STORE_EXPIRATION_MS: 60 * 1000,
michael@0 131
michael@0 132 // Number of days after which a crash with no activity will get purged.
michael@0 133 PURGE_OLDER_THAN_DAYS: 180,
michael@0 134
michael@0 135 // The following are return codes for individual event file processing.
michael@0 136 // File processed OK.
michael@0 137 EVENT_FILE_SUCCESS: "ok",
michael@0 138 // The event appears to be malformed.
michael@0 139 EVENT_FILE_ERROR_MALFORMED: "malformed",
michael@0 140 // The type of event is unknown.
michael@0 141 EVENT_FILE_ERROR_UNKNOWN_EVENT: "unknown-event",
michael@0 142
michael@0 143 /**
michael@0 144 * Obtain a list of all dumps pending upload.
michael@0 145 *
michael@0 146 * The returned value is a promise that resolves to an array of objects
michael@0 147 * on success. Each element in the array has the following properties:
michael@0 148 *
michael@0 149 * id (string)
michael@0 150 * The ID of the crash (a UUID).
michael@0 151 *
michael@0 152 * path (string)
michael@0 153 * The filename of the crash (<UUID.dmp>)
michael@0 154 *
michael@0 155 * date (Date)
michael@0 156 * When this dump was created
michael@0 157 *
michael@0 158 * The returned arry is sorted by the modified time of the file backing
michael@0 159 * the entry, oldest to newest.
michael@0 160 *
michael@0 161 * @return Promise<Array>
michael@0 162 */
michael@0 163 pendingDumps: function () {
michael@0 164 return this._getDirectoryEntries(this._pendingDumpsDir, this.DUMP_REGEX);
michael@0 165 },
michael@0 166
michael@0 167 /**
michael@0 168 * Obtain a list of all dump files corresponding to submitted crashes.
michael@0 169 *
michael@0 170 * The returned value is a promise that resolves to an Array of
michael@0 171 * objects. Each object has the following properties:
michael@0 172 *
michael@0 173 * path (string)
michael@0 174 * The path of the file this entry comes from.
michael@0 175 *
michael@0 176 * id (string)
michael@0 177 * The crash UUID.
michael@0 178 *
michael@0 179 * date (Date)
michael@0 180 * The (estimated) date this crash was submitted.
michael@0 181 *
michael@0 182 * The returned array is sorted by the modified time of the file backing
michael@0 183 * the entry, oldest to newest.
michael@0 184 *
michael@0 185 * @return Promise<Array>
michael@0 186 */
michael@0 187 submittedDumps: function () {
michael@0 188 return this._getDirectoryEntries(this._submittedDumpsDir,
michael@0 189 this.SUBMITTED_REGEX);
michael@0 190 },
michael@0 191
michael@0 192 /**
michael@0 193 * Aggregates "loose" events files into the unified "database."
michael@0 194 *
michael@0 195 * This function should be called periodically to collect metadata from
michael@0 196 * all events files into the central data store maintained by this manager.
michael@0 197 *
michael@0 198 * Once events have been stored in the backing store the corresponding
michael@0 199 * source files are deleted.
michael@0 200 *
michael@0 201 * Only one aggregation operation is allowed to occur at a time. If this
michael@0 202 * is called when an existing aggregation is in progress, the promise for
michael@0 203 * the original call will be returned.
michael@0 204 *
michael@0 205 * @return promise<int> The number of event files that were examined.
michael@0 206 */
michael@0 207 aggregateEventsFiles: function () {
michael@0 208 if (this._aggregatePromise) {
michael@0 209 return this._aggregatePromise;
michael@0 210 }
michael@0 211
michael@0 212 return this._aggregatePromise = Task.spawn(function* () {
michael@0 213 if (this._aggregatePromise) {
michael@0 214 return this._aggregatePromise;
michael@0 215 }
michael@0 216
michael@0 217 try {
michael@0 218 let unprocessedFiles = yield this._getUnprocessedEventsFiles();
michael@0 219
michael@0 220 let deletePaths = [];
michael@0 221 let needsSave = false;
michael@0 222
michael@0 223 this._storeProtectedCount++;
michael@0 224 for (let entry of unprocessedFiles) {
michael@0 225 try {
michael@0 226 let result = yield this._processEventFile(entry);
michael@0 227
michael@0 228 switch (result) {
michael@0 229 case this.EVENT_FILE_SUCCESS:
michael@0 230 needsSave = true;
michael@0 231 // Fall through.
michael@0 232
michael@0 233 case this.EVENT_FILE_ERROR_MALFORMED:
michael@0 234 deletePaths.push(entry.path);
michael@0 235 break;
michael@0 236
michael@0 237 case this.EVENT_FILE_ERROR_UNKNOWN_EVENT:
michael@0 238 break;
michael@0 239
michael@0 240 default:
michael@0 241 Cu.reportError("Unhandled crash event file return code. Please " +
michael@0 242 "file a bug: " + result);
michael@0 243 }
michael@0 244 } catch (ex if ex instanceof OS.File.Error) {
michael@0 245 this._log.warn("I/O error reading " + entry.path + ": " +
michael@0 246 CommonUtils.exceptionStr(ex));
michael@0 247 } catch (ex) {
michael@0 248 // We should never encounter an exception. This likely represents
michael@0 249 // a coding error because all errors should be detected and
michael@0 250 // converted to return codes.
michael@0 251 //
michael@0 252 // If we get here, report the error and delete the source file
michael@0 253 // so we don't see it again.
michael@0 254 Cu.reportError("Exception when processing crash event file: " +
michael@0 255 CommonUtils.exceptionStr(ex));
michael@0 256 deletePaths.push(entry.path);
michael@0 257 }
michael@0 258 }
michael@0 259
michael@0 260 if (needsSave) {
michael@0 261 let store = yield this._getStore();
michael@0 262 yield store.save();
michael@0 263 }
michael@0 264
michael@0 265 for (let path of deletePaths) {
michael@0 266 try {
michael@0 267 yield OS.File.remove(path);
michael@0 268 } catch (ex) {
michael@0 269 this._log.warn("Error removing event file (" + path + "): " +
michael@0 270 CommonUtils.exceptionStr(ex));
michael@0 271 }
michael@0 272 }
michael@0 273
michael@0 274 return unprocessedFiles.length;
michael@0 275
michael@0 276 } finally {
michael@0 277 this._aggregatePromise = false;
michael@0 278 this._storeProtectedCount--;
michael@0 279 }
michael@0 280 }.bind(this));
michael@0 281 },
michael@0 282
michael@0 283 /**
michael@0 284 * Prune old crash data.
michael@0 285 *
michael@0 286 * @param date
michael@0 287 * (Date) The cutoff point for pruning. Crashes without data newer
michael@0 288 * than this will be pruned.
michael@0 289 */
michael@0 290 pruneOldCrashes: function (date) {
michael@0 291 return Task.spawn(function* () {
michael@0 292 let store = yield this._getStore();
michael@0 293 store.pruneOldCrashes(date);
michael@0 294 yield store.save();
michael@0 295 }.bind(this));
michael@0 296 },
michael@0 297
michael@0 298 /**
michael@0 299 * Run tasks that should be periodically performed.
michael@0 300 */
michael@0 301 runMaintenanceTasks: function () {
michael@0 302 return Task.spawn(function* () {
michael@0 303 yield this.aggregateEventsFiles();
michael@0 304
michael@0 305 let offset = this.PURGE_OLDER_THAN_DAYS * MILLISECONDS_IN_DAY;
michael@0 306 yield this.pruneOldCrashes(new Date(Date.now() - offset));
michael@0 307 }.bind(this));
michael@0 308 },
michael@0 309
michael@0 310 /**
michael@0 311 * Schedule maintenance tasks for some point in the future.
michael@0 312 *
michael@0 313 * @param delay
michael@0 314 * (integer) Delay in milliseconds when maintenance should occur.
michael@0 315 */
michael@0 316 scheduleMaintenance: function (delay) {
michael@0 317 let deferred = Promise.defer();
michael@0 318
michael@0 319 setTimeout(() => {
michael@0 320 this.runMaintenanceTasks().then(deferred.resolve, deferred.reject);
michael@0 321 }, delay);
michael@0 322
michael@0 323 return deferred.promise;
michael@0 324 },
michael@0 325
michael@0 326 /**
michael@0 327 * Obtain the paths of all unprocessed events files.
michael@0 328 *
michael@0 329 * The promise-resolved array is sorted by file mtime, oldest to newest.
michael@0 330 */
michael@0 331 _getUnprocessedEventsFiles: function () {
michael@0 332 return Task.spawn(function* () {
michael@0 333 let entries = [];
michael@0 334
michael@0 335 for (let dir of this._eventsDirs) {
michael@0 336 for (let e of yield this._getDirectoryEntries(dir, this.ALL_REGEX)) {
michael@0 337 entries.push(e);
michael@0 338 }
michael@0 339 }
michael@0 340
michael@0 341 entries.sort((a, b) => { return a.date - b.date; });
michael@0 342
michael@0 343 return entries;
michael@0 344 }.bind(this));
michael@0 345 },
michael@0 346
michael@0 347 // See docs/crash-events.rst for the file format specification.
michael@0 348 _processEventFile: function (entry) {
michael@0 349 return Task.spawn(function* () {
michael@0 350 let data = yield OS.File.read(entry.path);
michael@0 351 let store = yield this._getStore();
michael@0 352
michael@0 353 let decoder = new TextDecoder();
michael@0 354 data = decoder.decode(data);
michael@0 355
michael@0 356 let type, time, payload;
michael@0 357 let start = 0;
michael@0 358 for (let i = 0; i < 2; i++) {
michael@0 359 let index = data.indexOf("\n", start);
michael@0 360 if (index == -1) {
michael@0 361 return this.EVENT_FILE_ERROR_MALFORMED;
michael@0 362 }
michael@0 363
michael@0 364 let sub = data.substring(start, index);
michael@0 365 switch (i) {
michael@0 366 case 0:
michael@0 367 type = sub;
michael@0 368 break;
michael@0 369 case 1:
michael@0 370 time = sub;
michael@0 371 try {
michael@0 372 time = parseInt(time, 10);
michael@0 373 } catch (ex) {
michael@0 374 return this.EVENT_FILE_ERROR_MALFORMED;
michael@0 375 }
michael@0 376 }
michael@0 377
michael@0 378 start = index + 1;
michael@0 379 }
michael@0 380 let date = new Date(time * 1000);
michael@0 381 let payload = data.substring(start);
michael@0 382
michael@0 383 return this._handleEventFilePayload(store, entry, type, date, payload);
michael@0 384 }.bind(this));
michael@0 385 },
michael@0 386
michael@0 387 _handleEventFilePayload: function (store, entry, type, date, payload) {
michael@0 388 // The payload types and formats are documented in docs/crash-events.rst.
michael@0 389 // Do not change the format of an existing type. Instead, invent a new
michael@0 390 // type.
michael@0 391
michael@0 392 let eventMap = {
michael@0 393 "crash.main.1": "addMainProcessCrash",
michael@0 394 "crash.plugin.1": "addPluginCrash",
michael@0 395 "hang.plugin.1": "addPluginHang",
michael@0 396 };
michael@0 397
michael@0 398 if (type in eventMap) {
michael@0 399 let lines = payload.split("\n");
michael@0 400 if (lines.length > 1) {
michael@0 401 this._log.warn("Multiple lines unexpected in payload for " +
michael@0 402 entry.path);
michael@0 403 return this.EVENT_FILE_ERROR_MALFORMED;
michael@0 404 }
michael@0 405
michael@0 406 store[eventMap[type]](payload, date);
michael@0 407 return this.EVENT_FILE_SUCCESS;
michael@0 408 }
michael@0 409
michael@0 410 // DO NOT ADD NEW TYPES WITHOUT DOCUMENTING!
michael@0 411
michael@0 412 return this.EVENT_FILE_ERROR_UNKNOWN_EVENT;
michael@0 413 },
michael@0 414
michael@0 415 /**
michael@0 416 * The resolved promise is an array of objects with the properties:
michael@0 417 *
michael@0 418 * path -- String filename
michael@0 419 * id -- regexp.match()[1] (likely the crash ID)
michael@0 420 * date -- Date mtime of the file
michael@0 421 */
michael@0 422 _getDirectoryEntries: function (path, re) {
michael@0 423 return Task.spawn(function* () {
michael@0 424 try {
michael@0 425 yield OS.File.stat(path);
michael@0 426 } catch (ex if ex instanceof OS.File.Error && ex.becauseNoSuchFile) {
michael@0 427 return [];
michael@0 428 }
michael@0 429
michael@0 430 let it = new OS.File.DirectoryIterator(path);
michael@0 431 let entries = [];
michael@0 432
michael@0 433 try {
michael@0 434 yield it.forEach((entry, index, it) => {
michael@0 435 if (entry.isDir) {
michael@0 436 return;
michael@0 437 }
michael@0 438
michael@0 439 let match = re.exec(entry.name);
michael@0 440 if (!match) {
michael@0 441 return;
michael@0 442 }
michael@0 443
michael@0 444 return OS.File.stat(entry.path).then((info) => {
michael@0 445 entries.push({
michael@0 446 path: entry.path,
michael@0 447 id: match[1],
michael@0 448 date: info.lastModificationDate,
michael@0 449 });
michael@0 450 });
michael@0 451 });
michael@0 452 } finally {
michael@0 453 it.close();
michael@0 454 }
michael@0 455
michael@0 456 entries.sort((a, b) => { return a.date - b.date; });
michael@0 457
michael@0 458 return entries;
michael@0 459 }.bind(this));
michael@0 460 },
michael@0 461
michael@0 462 _getStore: function () {
michael@0 463 return Task.spawn(function* () {
michael@0 464 if (!this._store) {
michael@0 465 yield OS.File.makeDir(this._storeDir, {
michael@0 466 ignoreExisting: true,
michael@0 467 unixMode: OS.Constants.libc.S_IRWXU,
michael@0 468 });
michael@0 469
michael@0 470 let store = new CrashStore(this._storeDir, this._telemetryStoreSizeKey);
michael@0 471 yield store.load();
michael@0 472
michael@0 473 this._store = store;
michael@0 474 this._storeTimer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
michael@0 475 }
michael@0 476
michael@0 477 // The application can go long periods without interacting with the
michael@0 478 // store. Since the store takes up resources, we automatically "free"
michael@0 479 // the store after inactivity so resources can be returned to the system.
michael@0 480 // We do this via a timer and a mechanism that tracks when the store
michael@0 481 // is being accessed.
michael@0 482 this._storeTimer.cancel();
michael@0 483
michael@0 484 // This callback frees resources from the store unless the store
michael@0 485 // is protected from freeing by some other process.
michael@0 486 let timerCB = function () {
michael@0 487 if (this._storeProtectedCount) {
michael@0 488 this._storeTimer.initWithCallback(timerCB, this.STORE_EXPIRATION_MS,
michael@0 489 this._storeTimer.TYPE_ONE_SHOT);
michael@0 490 return;
michael@0 491 }
michael@0 492
michael@0 493 // We kill the reference that we hold. GC will kill it later. If
michael@0 494 // someone else holds a reference, that will prevent GC until that
michael@0 495 // reference is gone.
michael@0 496 this._store = null;
michael@0 497 this._storeTimer = null;
michael@0 498 }.bind(this);
michael@0 499
michael@0 500 this._storeTimer.initWithCallback(timerCB, this.STORE_EXPIRATION_MS,
michael@0 501 this._storeTimer.TYPE_ONE_SHOT);
michael@0 502
michael@0 503 return this._store;
michael@0 504 }.bind(this));
michael@0 505 },
michael@0 506
michael@0 507 /**
michael@0 508 * Obtain information about all known crashes.
michael@0 509 *
michael@0 510 * Returns an array of CrashRecord instances. Instances are read-only.
michael@0 511 */
michael@0 512 getCrashes: function () {
michael@0 513 return Task.spawn(function* () {
michael@0 514 let store = yield this._getStore();
michael@0 515
michael@0 516 return store.crashes;
michael@0 517 }.bind(this));
michael@0 518 },
michael@0 519
michael@0 520 getCrashCountsByDay: function () {
michael@0 521 return Task.spawn(function* () {
michael@0 522 let store = yield this._getStore();
michael@0 523
michael@0 524 return store._countsByDay;
michael@0 525 }.bind(this));
michael@0 526 },
michael@0 527 });
michael@0 528
michael@0 529 let gCrashManager;
michael@0 530
michael@0 531 /**
michael@0 532 * Interface to storage of crash data.
michael@0 533 *
michael@0 534 * This type handles storage of crash metadata. It exists as a separate type
michael@0 535 * from the crash manager for performance reasons: since all crash metadata
michael@0 536 * needs to be loaded into memory for access, we wish to easily dispose of all
michael@0 537 * associated memory when this data is no longer needed. Having an isolated
michael@0 538 * object whose references can easily be lost faciliates that simple disposal.
michael@0 539 *
michael@0 540 * When metadata is updated, the caller must explicitly persist the changes
michael@0 541 * to disk. This prevents excessive I/O during updates.
michael@0 542 *
michael@0 543 * The store has a mechanism for ensuring it doesn't grow too large. A ceiling
michael@0 544 * is placed on the number of daily events that can occur for events that can
michael@0 545 * occur with relatively high frequency, notably plugin crashes and hangs
michael@0 546 * (plugins can enter cycles where they repeatedly crash). If we've reached
michael@0 547 * the high water mark and new data arrives, it's silently dropped.
michael@0 548 * However, the count of actual events is always preserved. This allows
michael@0 549 * us to report on the severity of problems beyond the storage threshold.
michael@0 550 *
michael@0 551 * Main process crashes are excluded from limits because they are both
michael@0 552 * important and should be rare.
michael@0 553 *
michael@0 554 * @param storeDir (string)
michael@0 555 * Directory the store should be located in.
michael@0 556 * @param telemetrySizeKey (string)
michael@0 557 * The telemetry histogram that should be used to store the size
michael@0 558 * of the data file.
michael@0 559 */
michael@0 560 function CrashStore(storeDir, telemetrySizeKey) {
michael@0 561 this._storeDir = storeDir;
michael@0 562 this._telemetrySizeKey = telemetrySizeKey;
michael@0 563
michael@0 564 this._storePath = OS.Path.join(storeDir, "store.json.mozlz4");
michael@0 565
michael@0 566 // Holds the read data from disk.
michael@0 567 this._data = null;
michael@0 568
michael@0 569 // Maps days since UNIX epoch to a Map of event types to counts.
michael@0 570 // This data structure is populated when the JSON file is loaded
michael@0 571 // and is also updated when new events are added.
michael@0 572 this._countsByDay = new Map();
michael@0 573 }
michael@0 574
michael@0 575 CrashStore.prototype = Object.freeze({
michael@0 576 // A crash that occurred in the main process.
michael@0 577 TYPE_MAIN_CRASH: "main-crash",
michael@0 578
michael@0 579 // A crash in a plugin process.
michael@0 580 TYPE_PLUGIN_CRASH: "plugin-crash",
michael@0 581
michael@0 582 // A hang in a plugin process.
michael@0 583 TYPE_PLUGIN_HANG: "plugin-hang",
michael@0 584
michael@0 585 // Maximum number of events to store per day. This establishes a
michael@0 586 // ceiling on the per-type/per-day records that will be stored.
michael@0 587 HIGH_WATER_DAILY_THRESHOLD: 100,
michael@0 588
michael@0 589 /**
michael@0 590 * Load data from disk.
michael@0 591 *
michael@0 592 * @return Promise
michael@0 593 */
michael@0 594 load: function () {
michael@0 595 return Task.spawn(function* () {
michael@0 596 // Loading replaces data. So reset data structures.
michael@0 597 this._data = {
michael@0 598 v: 1,
michael@0 599 crashes: new Map(),
michael@0 600 corruptDate: null,
michael@0 601 };
michael@0 602 this._countsByDay = new Map();
michael@0 603
michael@0 604 try {
michael@0 605 let decoder = new TextDecoder();
michael@0 606 let data = yield OS.File.read(this._storePath, {compression: "lz4"});
michael@0 607 data = JSON.parse(decoder.decode(data));
michael@0 608
michael@0 609 if (data.corruptDate) {
michael@0 610 this._data.corruptDate = new Date(data.corruptDate);
michael@0 611 }
michael@0 612
michael@0 613 // actualCounts is used to validate that the derived counts by
michael@0 614 // days stored in the payload matches up to actual data.
michael@0 615 let actualCounts = new Map();
michael@0 616
michael@0 617 for (let id in data.crashes) {
michael@0 618 let crash = data.crashes[id];
michael@0 619 let denormalized = this._denormalize(crash);
michael@0 620
michael@0 621 this._data.crashes.set(id, denormalized);
michael@0 622
michael@0 623 let key = dateToDays(denormalized.crashDate) + "-" + denormalized.type;
michael@0 624 actualCounts.set(key, (actualCounts.get(key) || 0) + 1);
michael@0 625 }
michael@0 626
michael@0 627 // The validation in this loop is arguably not necessary. We perform
michael@0 628 // it as a defense against unknown bugs.
michael@0 629 for (let dayKey in data.countsByDay) {
michael@0 630 let day = parseInt(dayKey, 10);
michael@0 631 for (let type in data.countsByDay[day]) {
michael@0 632 this._ensureCountsForDay(day);
michael@0 633
michael@0 634 let count = data.countsByDay[day][type];
michael@0 635 let key = day + "-" + type;
michael@0 636
michael@0 637 // If the payload says we have data for a given day but we
michael@0 638 // don't, the payload is wrong. Ignore it.
michael@0 639 if (!actualCounts.has(key)) {
michael@0 640 continue;
michael@0 641 }
michael@0 642
michael@0 643 // If we encountered more data in the payload than what the
michael@0 644 // data structure says, use the proper value.
michael@0 645 count = Math.max(count, actualCounts.get(key));
michael@0 646
michael@0 647 this._countsByDay.get(day).set(type, count);
michael@0 648 }
michael@0 649 }
michael@0 650 } catch (ex if ex instanceof OS.File.Error && ex.becauseNoSuchFile) {
michael@0 651 // Missing files (first use) are allowed.
michael@0 652 } catch (ex) {
michael@0 653 // If we can't load for any reason, mark a corrupt date in the instance
michael@0 654 // and swallow the error.
michael@0 655 //
michael@0 656 // The marking of a corrupted file is intentionally not persisted to
michael@0 657 // disk yet. Instead, we wait until the next save(). This is to give
michael@0 658 // non-permanent failures the opportunity to recover on their own.
michael@0 659 this._data.corruptDate = new Date();
michael@0 660 }
michael@0 661 }.bind(this));
michael@0 662 },
michael@0 663
michael@0 664 /**
michael@0 665 * Save data to disk.
michael@0 666 *
michael@0 667 * @return Promise<null>
michael@0 668 */
michael@0 669 save: function () {
michael@0 670 return Task.spawn(function* () {
michael@0 671 if (!this._data) {
michael@0 672 return;
michael@0 673 }
michael@0 674
michael@0 675 let normalized = {
michael@0 676 // The version should be incremented whenever the format
michael@0 677 // changes.
michael@0 678 v: 1,
michael@0 679 // Maps crash IDs to objects defining the crash.
michael@0 680 crashes: {},
michael@0 681 // Maps days since UNIX epoch to objects mapping event types to
michael@0 682 // counts. This is a mirror of this._countsByDay. e.g.
michael@0 683 // {
michael@0 684 // 15000: {
michael@0 685 // "main-crash": 2,
michael@0 686 // "plugin-crash": 1
michael@0 687 // }
michael@0 688 // }
michael@0 689 countsByDay: {},
michael@0 690
michael@0 691 // When the store was last corrupted.
michael@0 692 corruptDate: null,
michael@0 693 };
michael@0 694
michael@0 695 if (this._data.corruptDate) {
michael@0 696 normalized.corruptDate = this._data.corruptDate.getTime();
michael@0 697 }
michael@0 698
michael@0 699 for (let [id, crash] of this._data.crashes) {
michael@0 700 let c = this._normalize(crash);
michael@0 701 normalized.crashes[id] = c;
michael@0 702 }
michael@0 703
michael@0 704 for (let [day, m] of this._countsByDay) {
michael@0 705 normalized.countsByDay[day] = {};
michael@0 706 for (let [type, count] of m) {
michael@0 707 normalized.countsByDay[day][type] = count;
michael@0 708 }
michael@0 709 }
michael@0 710
michael@0 711 let encoder = new TextEncoder();
michael@0 712 let data = encoder.encode(JSON.stringify(normalized));
michael@0 713 let size = yield OS.File.writeAtomic(this._storePath, data, {
michael@0 714 tmpPath: this._storePath + ".tmp",
michael@0 715 compression: "lz4"});
michael@0 716 if (this._telemetrySizeKey) {
michael@0 717 Services.telemetry.getHistogramById(this._telemetrySizeKey).add(size);
michael@0 718 }
michael@0 719 }.bind(this));
michael@0 720 },
michael@0 721
michael@0 722 /**
michael@0 723 * Normalize an object into one fit for serialization.
michael@0 724 *
michael@0 725 * This function along with _denormalize() serve to hack around the
michael@0 726 * default handling of Date JSON serialization because Date serialization
michael@0 727 * is undefined by JSON.
michael@0 728 *
michael@0 729 * Fields ending with "Date" are assumed to contain Date instances.
michael@0 730 * We convert these to milliseconds since epoch on output and back to
michael@0 731 * Date on input.
michael@0 732 */
michael@0 733 _normalize: function (o) {
michael@0 734 let normalized = {};
michael@0 735
michael@0 736 for (let k in o) {
michael@0 737 let v = o[k];
michael@0 738 if (v && k.endsWith("Date")) {
michael@0 739 normalized[k] = v.getTime();
michael@0 740 } else {
michael@0 741 normalized[k] = v;
michael@0 742 }
michael@0 743 }
michael@0 744
michael@0 745 return normalized;
michael@0 746 },
michael@0 747
michael@0 748 /**
michael@0 749 * Convert a serialized object back to its native form.
michael@0 750 */
michael@0 751 _denormalize: function (o) {
michael@0 752 let n = {};
michael@0 753
michael@0 754 for (let k in o) {
michael@0 755 let v = o[k];
michael@0 756 if (v && k.endsWith("Date")) {
michael@0 757 n[k] = new Date(parseInt(v, 10));
michael@0 758 } else {
michael@0 759 n[k] = v;
michael@0 760 }
michael@0 761 }
michael@0 762
michael@0 763 return n;
michael@0 764 },
michael@0 765
michael@0 766 /**
michael@0 767 * Prune old crash data.
michael@0 768 *
michael@0 769 * Crashes without recent activity are pruned from the store so the
michael@0 770 * size of the store is not unbounded. If there is activity on a crash,
michael@0 771 * that activity will keep the crash and all its data around for longer.
michael@0 772 *
michael@0 773 * @param date
michael@0 774 * (Date) The cutoff at which data will be pruned. If an entry
michael@0 775 * doesn't have data newer than this, it will be pruned.
michael@0 776 */
michael@0 777 pruneOldCrashes: function (date) {
michael@0 778 for (let crash of this.crashes) {
michael@0 779 let newest = crash.newestDate;
michael@0 780 if (!newest || newest.getTime() < date.getTime()) {
michael@0 781 this._data.crashes.delete(crash.id);
michael@0 782 }
michael@0 783 }
michael@0 784 },
michael@0 785
michael@0 786 /**
michael@0 787 * Date the store was last corrupted and required a reset.
michael@0 788 *
michael@0 789 * May be null (no corruption has ever occurred) or a Date instance.
michael@0 790 */
michael@0 791 get corruptDate() {
michael@0 792 return this._data.corruptDate;
michael@0 793 },
michael@0 794
michael@0 795 /**
michael@0 796 * The number of distinct crashes tracked.
michael@0 797 */
michael@0 798 get crashesCount() {
michael@0 799 return this._data.crashes.size;
michael@0 800 },
michael@0 801
michael@0 802 /**
michael@0 803 * All crashes tracked.
michael@0 804 *
michael@0 805 * This is an array of CrashRecord.
michael@0 806 */
michael@0 807 get crashes() {
michael@0 808 let crashes = [];
michael@0 809 for (let [id, crash] of this._data.crashes) {
michael@0 810 crashes.push(new CrashRecord(crash));
michael@0 811 }
michael@0 812
michael@0 813 return crashes;
michael@0 814 },
michael@0 815
michael@0 816 /**
michael@0 817 * Obtain a particular crash from its ID.
michael@0 818 *
michael@0 819 * A CrashRecord will be returned if the crash exists. null will be returned
michael@0 820 * if the crash is unknown.
michael@0 821 */
michael@0 822 getCrash: function (id) {
michael@0 823 for (let crash of this.crashes) {
michael@0 824 if (crash.id == id) {
michael@0 825 return crash;
michael@0 826 }
michael@0 827 }
michael@0 828
michael@0 829 return null;
michael@0 830 },
michael@0 831
michael@0 832 _ensureCountsForDay: function (day) {
michael@0 833 if (!this._countsByDay.has(day)) {
michael@0 834 this._countsByDay.set(day, new Map());
michael@0 835 }
michael@0 836 },
michael@0 837
michael@0 838 /**
michael@0 839 * Ensure the crash record is present in storage.
michael@0 840 *
michael@0 841 * Returns the crash record if we're allowed to store it or null
michael@0 842 * if we've hit the high water mark.
michael@0 843 *
michael@0 844 * @param id
michael@0 845 * (string) The crash ID.
michael@0 846 * @param type
michael@0 847 * (string) One of the this.TYPE_* constants describing the crash type.
michael@0 848 * @param date
michael@0 849 * (Date) When this crash occurred.
michael@0 850 *
michael@0 851 * @return null | object crash record
michael@0 852 */
michael@0 853 _ensureCrashRecord: function (id, type, date) {
michael@0 854 let day = dateToDays(date);
michael@0 855 this._ensureCountsForDay(day);
michael@0 856
michael@0 857 let count = (this._countsByDay.get(day).get(type) || 0) + 1;
michael@0 858 this._countsByDay.get(day).set(type, count);
michael@0 859
michael@0 860 if (count > this.HIGH_WATER_DAILY_THRESHOLD && type != this.TYPE_MAIN_CRASH) {
michael@0 861 return null;
michael@0 862 }
michael@0 863
michael@0 864 if (!this._data.crashes.has(id)) {
michael@0 865 this._data.crashes.set(id, {
michael@0 866 id: id,
michael@0 867 type: type,
michael@0 868 crashDate: date,
michael@0 869 });
michael@0 870 }
michael@0 871
michael@0 872 let crash = this._data.crashes.get(id);
michael@0 873 crash.type = type;
michael@0 874 crash.date = date;
michael@0 875
michael@0 876 return crash;
michael@0 877 },
michael@0 878
michael@0 879 /**
michael@0 880 * Record the occurrence of a crash in the main process.
michael@0 881 *
michael@0 882 * @param id (string) Crash ID. Likely a UUID.
michael@0 883 * @param date (Date) When the crash occurred.
michael@0 884 */
michael@0 885 addMainProcessCrash: function (id, date) {
michael@0 886 this._ensureCrashRecord(id, this.TYPE_MAIN_CRASH, date);
michael@0 887 },
michael@0 888
michael@0 889 /**
michael@0 890 * Record the occurrence of a crash in a plugin process.
michael@0 891 *
michael@0 892 * @param id (string) Crash ID. Likely a UUID.
michael@0 893 * @param date (Date) When the crash occurred.
michael@0 894 */
michael@0 895 addPluginCrash: function (id, date) {
michael@0 896 this._ensureCrashRecord(id, this.TYPE_PLUGIN_CRASH, date);
michael@0 897 },
michael@0 898
michael@0 899 /**
michael@0 900 * Record the occurrence of a hang in a plugin process.
michael@0 901 *
michael@0 902 * @param id (string) Crash ID. Likely a UUID.
michael@0 903 * @param date (Date) When the hang was reported.
michael@0 904 */
michael@0 905 addPluginHang: function (id, date) {
michael@0 906 this._ensureCrashRecord(id, this.TYPE_PLUGIN_HANG, date);
michael@0 907 },
michael@0 908
michael@0 909 get mainProcessCrashes() {
michael@0 910 let crashes = [];
michael@0 911 for (let crash of this.crashes) {
michael@0 912 if (crash.isMainProcessCrash) {
michael@0 913 crashes.push(crash);
michael@0 914 }
michael@0 915 }
michael@0 916
michael@0 917 return crashes;
michael@0 918 },
michael@0 919
michael@0 920 get pluginCrashes() {
michael@0 921 let crashes = [];
michael@0 922 for (let crash of this.crashes) {
michael@0 923 if (crash.isPluginCrash) {
michael@0 924 crashes.push(crash);
michael@0 925 }
michael@0 926 }
michael@0 927
michael@0 928 return crashes;
michael@0 929 },
michael@0 930
michael@0 931 get pluginHangs() {
michael@0 932 let crashes = [];
michael@0 933 for (let crash of this.crashes) {
michael@0 934 if (crash.isPluginHang) {
michael@0 935 crashes.push(crash);
michael@0 936 }
michael@0 937 }
michael@0 938
michael@0 939 return crashes;
michael@0 940 },
michael@0 941 });
michael@0 942
michael@0 943 /**
michael@0 944 * Represents an individual crash with metadata.
michael@0 945 *
michael@0 946 * This is a wrapper around the low-level anonymous JS objects that define
michael@0 947 * crashes. It exposes a consistent and helpful API.
michael@0 948 *
michael@0 949 * Instances of this type should only be constructured inside this module,
michael@0 950 * not externally. The constructor is not considered a public API.
michael@0 951 *
michael@0 952 * @param o (object)
michael@0 953 * The crash's entry from the CrashStore.
michael@0 954 */
michael@0 955 function CrashRecord(o) {
michael@0 956 this._o = o;
michael@0 957 }
michael@0 958
michael@0 959 CrashRecord.prototype = Object.freeze({
michael@0 960 get id() {
michael@0 961 return this._o.id;
michael@0 962 },
michael@0 963
michael@0 964 get crashDate() {
michael@0 965 return this._o.crashDate;
michael@0 966 },
michael@0 967
michael@0 968 /**
michael@0 969 * Obtain the newest date in this record.
michael@0 970 *
michael@0 971 * This is a convenience getter. The returned value is used to determine when
michael@0 972 * to expire a record.
michael@0 973 */
michael@0 974 get newestDate() {
michael@0 975 // We currently only have 1 date, so this is easy.
michael@0 976 return this._o.crashDate;
michael@0 977 },
michael@0 978
michael@0 979 get oldestDate() {
michael@0 980 return this._o.crashDate;
michael@0 981 },
michael@0 982
michael@0 983 get type() {
michael@0 984 return this._o.type;
michael@0 985 },
michael@0 986
michael@0 987 get isMainProcessCrash() {
michael@0 988 return this._o.type == CrashStore.prototype.TYPE_MAIN_CRASH;
michael@0 989 },
michael@0 990
michael@0 991 get isPluginCrash() {
michael@0 992 return this._o.type == CrashStore.prototype.TYPE_PLUGIN_CRASH;
michael@0 993 },
michael@0 994
michael@0 995 get isPluginHang() {
michael@0 996 return this._o.type == CrashStore.prototype.TYPE_PLUGIN_HANG;
michael@0 997 },
michael@0 998 });
michael@0 999
michael@0 1000 /**
michael@0 1001 * Obtain the global CrashManager instance used by the running application.
michael@0 1002 *
michael@0 1003 * CrashManager is likely only ever instantiated once per application lifetime.
michael@0 1004 * The main reason it's implemented as a reusable type is to facilitate testing.
michael@0 1005 */
michael@0 1006 XPCOMUtils.defineLazyGetter(this.CrashManager, "Singleton", function () {
michael@0 1007 if (gCrashManager) {
michael@0 1008 return gCrashManager;
michael@0 1009 }
michael@0 1010
michael@0 1011 let crPath = OS.Path.join(OS.Constants.Path.userApplicationDataDir,
michael@0 1012 "Crash Reports");
michael@0 1013 let storePath = OS.Path.join(OS.Constants.Path.profileDir, "crashes");
michael@0 1014
michael@0 1015 gCrashManager = new CrashManager({
michael@0 1016 pendingDumpsDir: OS.Path.join(crPath, "pending"),
michael@0 1017 submittedDumpsDir: OS.Path.join(crPath, "submitted"),
michael@0 1018 eventsDirs: [OS.Path.join(crPath, "events"), OS.Path.join(storePath, "events")],
michael@0 1019 storeDir: storePath,
michael@0 1020 telemetryStoreSizeKey: "CRASH_STORE_COMPRESSED_BYTES",
michael@0 1021 });
michael@0 1022
michael@0 1023 // Automatically aggregate event files shortly after startup. This
michael@0 1024 // ensures it happens with some frequency.
michael@0 1025 //
michael@0 1026 // There are performance considerations here. While this is doing
michael@0 1027 // work and could negatively impact performance, the amount of work
michael@0 1028 // is kept small per run by periodically aggregating event files.
michael@0 1029 // Furthermore, well-behaving installs should not have much work
michael@0 1030 // here to do. If there is a lot of work, that install has bigger
michael@0 1031 // issues beyond reduced performance near startup.
michael@0 1032 gCrashManager.scheduleMaintenance(AGGREGATE_STARTUP_DELAY_MS);
michael@0 1033
michael@0 1034 return gCrashManager;
michael@0 1035 });

mercurial