memory/replace/dmd/DMD.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/memory/replace/dmd/DMD.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2612 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#include "DMD.h"
    1.11 +
    1.12 +#include <ctype.h>
    1.13 +#include <errno.h>
    1.14 +#include <limits.h>
    1.15 +#include <stdarg.h>
    1.16 +#include <stdio.h>
    1.17 +#include <stdlib.h>
    1.18 +#include <string.h>
    1.19 +
    1.20 +#ifdef XP_WIN
    1.21 +#if defined(MOZ_OPTIMIZE) && !defined(MOZ_PROFILING)
    1.22 +#error "Optimized, DMD-enabled builds on Windows must be built with --enable-profiling"
    1.23 +#endif
    1.24 +#include <windows.h>
    1.25 +#include <process.h>
    1.26 +#else
    1.27 +#include <unistd.h>
    1.28 +#endif
    1.29 +
    1.30 +#ifdef ANDROID
    1.31 +#include <android/log.h>
    1.32 +#endif
    1.33 +
    1.34 +#include "nscore.h"
    1.35 +#include "nsStackWalk.h"
    1.36 +
    1.37 +#include "js/HashTable.h"
    1.38 +#include "js/Vector.h"
    1.39 +
    1.40 +#include "mozilla/Assertions.h"
    1.41 +#include "mozilla/HashFunctions.h"
    1.42 +#include "mozilla/Likely.h"
    1.43 +#include "mozilla/MemoryReporting.h"
    1.44 +
    1.45 +// MOZ_REPLACE_ONLY_MEMALIGN saves us from having to define
    1.46 +// replace_{posix_memalign,aligned_alloc,valloc}.  It requires defining
    1.47 +// PAGE_SIZE.  Nb: sysconf() is expensive, but it's only used for (the obsolete
    1.48 +// and rarely used) valloc.
    1.49 +#define MOZ_REPLACE_ONLY_MEMALIGN 1
    1.50 +#ifdef XP_WIN
    1.51 +#define PAGE_SIZE GetPageSize()
    1.52 +static long GetPageSize()
    1.53 +{
    1.54 +  SYSTEM_INFO si;
    1.55 +  GetSystemInfo(&si);
    1.56 +  return si.dwPageSize;
    1.57 +}
    1.58 +#else
    1.59 +#define PAGE_SIZE sysconf(_SC_PAGESIZE)
    1.60 +#endif
    1.61 +#include "replace_malloc.h"
    1.62 +#undef MOZ_REPLACE_ONLY_MEMALIGN
    1.63 +#undef PAGE_SIZE
    1.64 +
    1.65 +namespace mozilla {
    1.66 +namespace dmd {
    1.67 +
    1.68 +//---------------------------------------------------------------------------
    1.69 +// Utilities
    1.70 +//---------------------------------------------------------------------------
    1.71 +
    1.72 +#ifndef DISALLOW_COPY_AND_ASSIGN
    1.73 +#define DISALLOW_COPY_AND_ASSIGN(T) \
    1.74 +  T(const T&);                      \
    1.75 +  void operator=(const T&)
    1.76 +#endif
    1.77 +
    1.78 +static const malloc_table_t* gMallocTable = nullptr;
    1.79 +
    1.80 +// This enables/disables DMD.
    1.81 +static bool gIsDMDRunning = false;
    1.82 +
    1.83 +// This provides infallible allocations (they abort on OOM).  We use it for all
    1.84 +// of DMD's own allocations, which fall into the following three cases.
    1.85 +// - Direct allocations (the easy case).
    1.86 +// - Indirect allocations in js::{Vector,HashSet,HashMap} -- this class serves
    1.87 +//   as their AllocPolicy.
    1.88 +// - Other indirect allocations (e.g. NS_StackWalk) -- see the comments on
    1.89 +//   Thread::mBlockIntercepts and in replace_malloc for how these work.
    1.90 +//
    1.91 +class InfallibleAllocPolicy
    1.92 +{
    1.93 +  static void ExitOnFailure(const void* aP);
    1.94 +
    1.95 +public:
    1.96 +  static void* malloc_(size_t aSize)
    1.97 +  {
    1.98 +    void* p = gMallocTable->malloc(aSize);
    1.99 +    ExitOnFailure(p);
   1.100 +    return p;
   1.101 +  }
   1.102 +
   1.103 +  static void* calloc_(size_t aSize)
   1.104 +  {
   1.105 +    void* p = gMallocTable->calloc(1, aSize);
   1.106 +    ExitOnFailure(p);
   1.107 +    return p;
   1.108 +  }
   1.109 +
   1.110 +  // This realloc_ is the one we use for direct reallocs within DMD.
   1.111 +  static void* realloc_(void* aPtr, size_t aNewSize)
   1.112 +  {
   1.113 +    void* p = gMallocTable->realloc(aPtr, aNewSize);
   1.114 +    ExitOnFailure(p);
   1.115 +    return p;
   1.116 +  }
   1.117 +
   1.118 +  // This realloc_ is required for this to be a JS container AllocPolicy.
   1.119 +  static void* realloc_(void* aPtr, size_t aOldSize, size_t aNewSize)
   1.120 +  {
   1.121 +    return InfallibleAllocPolicy::realloc_(aPtr, aNewSize);
   1.122 +  }
   1.123 +
   1.124 +  static void* memalign_(size_t aAlignment, size_t aSize)
   1.125 +  {
   1.126 +    void* p = gMallocTable->memalign(aAlignment, aSize);
   1.127 +    ExitOnFailure(p);
   1.128 +    return p;
   1.129 +  }
   1.130 +
   1.131 +  static void free_(void* aPtr) { gMallocTable->free(aPtr); }
   1.132 +
   1.133 +  static char* strdup_(const char* aStr)
   1.134 +  {
   1.135 +    char* s = (char*) InfallibleAllocPolicy::malloc_(strlen(aStr) + 1);
   1.136 +    strcpy(s, aStr);
   1.137 +    return s;
   1.138 +  }
   1.139 +
   1.140 +  template <class T>
   1.141 +  static T* new_()
   1.142 +  {
   1.143 +    void* mem = malloc_(sizeof(T));
   1.144 +    ExitOnFailure(mem);
   1.145 +    return new (mem) T;
   1.146 +  }
   1.147 +
   1.148 +  template <class T, typename P1>
   1.149 +  static T* new_(P1 p1)
   1.150 +  {
   1.151 +    void* mem = malloc_(sizeof(T));
   1.152 +    ExitOnFailure(mem);
   1.153 +    return new (mem) T(p1);
   1.154 +  }
   1.155 +
   1.156 +  template <class T>
   1.157 +  static void delete_(T *p)
   1.158 +  {
   1.159 +    if (p) {
   1.160 +      p->~T();
   1.161 +      InfallibleAllocPolicy::free_(p);
   1.162 +    }
   1.163 +  }
   1.164 +
   1.165 +  static void reportAllocOverflow() { ExitOnFailure(nullptr); }
   1.166 +};
   1.167 +
   1.168 +// This is only needed because of the |const void*| vs |void*| arg mismatch.
   1.169 +static size_t
   1.170 +MallocSizeOf(const void* aPtr)
   1.171 +{
   1.172 +  return gMallocTable->malloc_usable_size(const_cast<void*>(aPtr));
   1.173 +}
   1.174 +
   1.175 +static void
   1.176 +StatusMsg(const char* aFmt, ...)
   1.177 +{
   1.178 +  va_list ap;
   1.179 +  va_start(ap, aFmt);
   1.180 +#ifdef ANDROID
   1.181 +  __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, ap);
   1.182 +#else
   1.183 +  // The +64 is easily enough for the "DMD[<pid>] " prefix and the NUL.
   1.184 +  char* fmt = (char*) InfallibleAllocPolicy::malloc_(strlen(aFmt) + 64);
   1.185 +  sprintf(fmt, "DMD[%d] %s", getpid(), aFmt);
   1.186 +  vfprintf(stderr, fmt, ap);
   1.187 +  InfallibleAllocPolicy::free_(fmt);
   1.188 +#endif
   1.189 +  va_end(ap);
   1.190 +}
   1.191 +
   1.192 +/* static */ void
   1.193 +InfallibleAllocPolicy::ExitOnFailure(const void* aP)
   1.194 +{
   1.195 +  if (!aP) {
   1.196 +    StatusMsg("out of memory;  aborting\n");
   1.197 +    MOZ_CRASH();
   1.198 +  }
   1.199 +}
   1.200 +
   1.201 +void
   1.202 +Writer::Write(const char* aFmt, ...) const
   1.203 +{
   1.204 +  va_list ap;
   1.205 +  va_start(ap, aFmt);
   1.206 +  mWriterFun(mWriteState, aFmt, ap);
   1.207 +  va_end(ap);
   1.208 +}
   1.209 +
   1.210 +#define W(...) aWriter.Write(__VA_ARGS__);
   1.211 +
   1.212 +#define WriteTitle(...)                                                       \
   1.213 +  W("------------------------------------------------------------------\n");  \
   1.214 +  W(__VA_ARGS__);                                                             \
   1.215 +  W("------------------------------------------------------------------\n\n");
   1.216 +
   1.217 +MOZ_EXPORT void
   1.218 +FpWrite(void* aWriteState, const char* aFmt, va_list aAp)
   1.219 +{
   1.220 +  FILE* fp = static_cast<FILE*>(aWriteState);
   1.221 +  vfprintf(fp, aFmt, aAp);
   1.222 +}
   1.223 +
   1.224 +static double
   1.225 +Percent(size_t part, size_t whole)
   1.226 +{
   1.227 +  return (whole == 0) ? 0 : 100 * (double)part / whole;
   1.228 +}
   1.229 +
   1.230 +// Commifies the number and prepends a '~' if requested.  Best used with
   1.231 +// |kBufLen| and |gBuf[1234]|, because they should be big enough for any number
   1.232 +// we'll see.
   1.233 +static char*
   1.234 +Show(size_t n, char* buf, size_t buflen, bool addTilde = false)
   1.235 +{
   1.236 +  int nc = 0, i = 0, lasti = buflen - 2;
   1.237 +  buf[lasti + 1] = '\0';
   1.238 +  if (n == 0) {
   1.239 +    buf[lasti - i] = '0';
   1.240 +    i++;
   1.241 +  } else {
   1.242 +    while (n > 0) {
   1.243 +      if (((i - nc) % 3) == 0 && i != 0) {
   1.244 +        buf[lasti - i] = ',';
   1.245 +        i++;
   1.246 +        nc++;
   1.247 +      }
   1.248 +      buf[lasti - i] = static_cast<char>((n % 10) + '0');
   1.249 +      i++;
   1.250 +      n /= 10;
   1.251 +    }
   1.252 +  }
   1.253 +  int firstCharIndex = lasti - i + 1;
   1.254 +
   1.255 +  if (addTilde) {
   1.256 +    firstCharIndex--;
   1.257 +    buf[firstCharIndex] = '~';
   1.258 +  }
   1.259 +
   1.260 +  MOZ_ASSERT(firstCharIndex >= 0);
   1.261 +  return &buf[firstCharIndex];
   1.262 +}
   1.263 +
   1.264 +static const char*
   1.265 +Plural(size_t aN)
   1.266 +{
   1.267 +  return aN == 1 ? "" : "s";
   1.268 +}
   1.269 +
   1.270 +// Used by calls to Show().
   1.271 +static const size_t kBufLen = 64;
   1.272 +static char gBuf1[kBufLen];
   1.273 +static char gBuf2[kBufLen];
   1.274 +static char gBuf3[kBufLen];
   1.275 +static char gBuf4[kBufLen];
   1.276 +
   1.277 +//---------------------------------------------------------------------------
   1.278 +// Options (Part 1)
   1.279 +//---------------------------------------------------------------------------
   1.280 +
   1.281 +class Options
   1.282 +{
   1.283 +  template <typename T>
   1.284 +  struct NumOption
   1.285 +  {
   1.286 +    const T mDefault;
   1.287 +    const T mMax;
   1.288 +    T       mActual;
   1.289 +    NumOption(T aDefault, T aMax)
   1.290 +      : mDefault(aDefault), mMax(aMax), mActual(aDefault)
   1.291 +    {}
   1.292 +  };
   1.293 +
   1.294 +  enum Mode {
   1.295 +    Normal,   // run normally
   1.296 +    Test,     // do some basic correctness tests
   1.297 +    Stress    // do some performance stress tests
   1.298 +  };
   1.299 +
   1.300 +  char* mDMDEnvVar;   // a saved copy, for printing during Dump()
   1.301 +
   1.302 +  NumOption<size_t>   mSampleBelowSize;
   1.303 +  NumOption<uint32_t> mMaxFrames;
   1.304 +  NumOption<uint32_t> mMaxRecords;
   1.305 +  Mode mMode;
   1.306 +
   1.307 +  void BadArg(const char* aArg);
   1.308 +  static const char* ValueIfMatch(const char* aArg, const char* aOptionName);
   1.309 +  static bool GetLong(const char* aArg, const char* aOptionName,
   1.310 +                      long aMin, long aMax, long* aN);
   1.311 +
   1.312 +public:
   1.313 +  Options(const char* aDMDEnvVar);
   1.314 +
   1.315 +  const char* DMDEnvVar() const { return mDMDEnvVar; }
   1.316 +
   1.317 +  size_t SampleBelowSize() const { return mSampleBelowSize.mActual; }
   1.318 +  size_t MaxFrames()       const { return mMaxFrames.mActual; }
   1.319 +  size_t MaxRecords()      const { return mMaxRecords.mActual; }
   1.320 +
   1.321 +  void SetSampleBelowSize(size_t aN) { mSampleBelowSize.mActual = aN; }
   1.322 +
   1.323 +  bool IsTestMode()   const { return mMode == Test; }
   1.324 +  bool IsStressMode() const { return mMode == Stress; }
   1.325 +};
   1.326 +
   1.327 +static Options *gOptions;
   1.328 +
   1.329 +//---------------------------------------------------------------------------
   1.330 +// The global lock
   1.331 +//---------------------------------------------------------------------------
   1.332 +
   1.333 +// MutexBase implements the platform-specific parts of a mutex.
   1.334 +
   1.335 +#ifdef XP_WIN
   1.336 +
   1.337 +class MutexBase
   1.338 +{
   1.339 +  CRITICAL_SECTION mCS;
   1.340 +
   1.341 +  DISALLOW_COPY_AND_ASSIGN(MutexBase);
   1.342 +
   1.343 +public:
   1.344 +  MutexBase()
   1.345 +  {
   1.346 +    InitializeCriticalSection(&mCS);
   1.347 +  }
   1.348 +
   1.349 +  ~MutexBase()
   1.350 +  {
   1.351 +    DeleteCriticalSection(&mCS);
   1.352 +  }
   1.353 +
   1.354 +  void Lock()
   1.355 +  {
   1.356 +    EnterCriticalSection(&mCS);
   1.357 +  }
   1.358 +
   1.359 +  void Unlock()
   1.360 +  {
   1.361 +    LeaveCriticalSection(&mCS);
   1.362 +  }
   1.363 +};
   1.364 +
   1.365 +#else
   1.366 +
   1.367 +#include <pthread.h>
   1.368 +#include <sys/types.h>
   1.369 +
   1.370 +class MutexBase
   1.371 +{
   1.372 +  pthread_mutex_t mMutex;
   1.373 +
   1.374 +  DISALLOW_COPY_AND_ASSIGN(MutexBase);
   1.375 +
   1.376 +public:
   1.377 +  MutexBase()
   1.378 +  {
   1.379 +    pthread_mutex_init(&mMutex, nullptr);
   1.380 +  }
   1.381 +
   1.382 +  void Lock()
   1.383 +  {
   1.384 +    pthread_mutex_lock(&mMutex);
   1.385 +  }
   1.386 +
   1.387 +  void Unlock()
   1.388 +  {
   1.389 +    pthread_mutex_unlock(&mMutex);
   1.390 +  }
   1.391 +};
   1.392 +
   1.393 +#endif
   1.394 +
   1.395 +class Mutex : private MutexBase
   1.396 +{
   1.397 +  bool mIsLocked;
   1.398 +
   1.399 +  DISALLOW_COPY_AND_ASSIGN(Mutex);
   1.400 +
   1.401 +public:
   1.402 +  Mutex()
   1.403 +    : mIsLocked(false)
   1.404 +  {}
   1.405 +
   1.406 +  void Lock()
   1.407 +  {
   1.408 +    MutexBase::Lock();
   1.409 +    MOZ_ASSERT(!mIsLocked);
   1.410 +    mIsLocked = true;
   1.411 +  }
   1.412 +
   1.413 +  void Unlock()
   1.414 +  {
   1.415 +    MOZ_ASSERT(mIsLocked);
   1.416 +    mIsLocked = false;
   1.417 +    MutexBase::Unlock();
   1.418 +  }
   1.419 +
   1.420 +  bool IsLocked()
   1.421 +  {
   1.422 +    return mIsLocked;
   1.423 +  }
   1.424 +};
   1.425 +
   1.426 +// This lock must be held while manipulating global state, such as
   1.427 +// gStackTraceTable, gBlockTable, etc.
   1.428 +static Mutex* gStateLock = nullptr;
   1.429 +
   1.430 +class AutoLockState
   1.431 +{
   1.432 +  DISALLOW_COPY_AND_ASSIGN(AutoLockState);
   1.433 +
   1.434 +public:
   1.435 +  AutoLockState()
   1.436 +  {
   1.437 +    gStateLock->Lock();
   1.438 +  }
   1.439 +  ~AutoLockState()
   1.440 +  {
   1.441 +    gStateLock->Unlock();
   1.442 +  }
   1.443 +};
   1.444 +
   1.445 +class AutoUnlockState
   1.446 +{
   1.447 +  DISALLOW_COPY_AND_ASSIGN(AutoUnlockState);
   1.448 +
   1.449 +public:
   1.450 +  AutoUnlockState()
   1.451 +  {
   1.452 +    gStateLock->Unlock();
   1.453 +  }
   1.454 +  ~AutoUnlockState()
   1.455 +  {
   1.456 +    gStateLock->Lock();
   1.457 +  }
   1.458 +};
   1.459 +
   1.460 +//---------------------------------------------------------------------------
   1.461 +// Thread-local storage and blocking of intercepts
   1.462 +//---------------------------------------------------------------------------
   1.463 +
   1.464 +#ifdef XP_WIN
   1.465 +
   1.466 +#define DMD_TLS_INDEX_TYPE              DWORD
   1.467 +#define DMD_CREATE_TLS_INDEX(i_)        do {                                  \
   1.468 +                                          (i_) = TlsAlloc();                  \
   1.469 +                                        } while (0)
   1.470 +#define DMD_DESTROY_TLS_INDEX(i_)       TlsFree((i_))
   1.471 +#define DMD_GET_TLS_DATA(i_)            TlsGetValue((i_))
   1.472 +#define DMD_SET_TLS_DATA(i_, v_)        TlsSetValue((i_), (v_))
   1.473 +
   1.474 +#else
   1.475 +
   1.476 +#include <pthread.h>
   1.477 +
   1.478 +#define DMD_TLS_INDEX_TYPE               pthread_key_t
   1.479 +#define DMD_CREATE_TLS_INDEX(i_)         pthread_key_create(&(i_), nullptr)
   1.480 +#define DMD_DESTROY_TLS_INDEX(i_)        pthread_key_delete((i_))
   1.481 +#define DMD_GET_TLS_DATA(i_)             pthread_getspecific((i_))
   1.482 +#define DMD_SET_TLS_DATA(i_, v_)         pthread_setspecific((i_), (v_))
   1.483 +
   1.484 +#endif
   1.485 +
   1.486 +static DMD_TLS_INDEX_TYPE gTlsIndex;
   1.487 +
   1.488 +class Thread
   1.489 +{
   1.490 +  // Required for allocation via InfallibleAllocPolicy::new_.
   1.491 +  friend class InfallibleAllocPolicy;
   1.492 +
   1.493 +  // When true, this blocks intercepts, which allows malloc interception
   1.494 +  // functions to themselves call malloc.  (Nb: for direct calls to malloc we
   1.495 +  // can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes
   1.496 +  // indirectly call vanilla malloc via functions like NS_StackWalk.)
   1.497 +  bool mBlockIntercepts;
   1.498 +
   1.499 +  Thread()
   1.500 +    : mBlockIntercepts(false)
   1.501 +  {}
   1.502 +
   1.503 +  DISALLOW_COPY_AND_ASSIGN(Thread);
   1.504 +
   1.505 +public:
   1.506 +  static Thread* Fetch();
   1.507 +
   1.508 +  bool BlockIntercepts()
   1.509 +  {
   1.510 +    MOZ_ASSERT(!mBlockIntercepts);
   1.511 +    return mBlockIntercepts = true;
   1.512 +  }
   1.513 +
   1.514 +  bool UnblockIntercepts()
   1.515 +  {
   1.516 +    MOZ_ASSERT(mBlockIntercepts);
   1.517 +    return mBlockIntercepts = false;
   1.518 +  }
   1.519 +
   1.520 +  bool InterceptsAreBlocked() const
   1.521 +  {
   1.522 +    return mBlockIntercepts;
   1.523 +  }
   1.524 +};
   1.525 +
   1.526 +/* static */ Thread*
   1.527 +Thread::Fetch()
   1.528 +{
   1.529 +  Thread* t = static_cast<Thread*>(DMD_GET_TLS_DATA(gTlsIndex));
   1.530 +
   1.531 +  if (MOZ_UNLIKELY(!t)) {
   1.532 +    // This memory is never freed, even if the thread dies.  It's a leak, but
   1.533 +    // only a tiny one.
   1.534 +    t = InfallibleAllocPolicy::new_<Thread>();
   1.535 +    DMD_SET_TLS_DATA(gTlsIndex, t);
   1.536 +  }
   1.537 +
   1.538 +  return t;
   1.539 +}
   1.540 +
   1.541 +// An object of this class must be created (on the stack) before running any
   1.542 +// code that might allocate.
   1.543 +class AutoBlockIntercepts
   1.544 +{
   1.545 +  Thread* const mT;
   1.546 +
   1.547 +  DISALLOW_COPY_AND_ASSIGN(AutoBlockIntercepts);
   1.548 +
   1.549 +public:
   1.550 +  AutoBlockIntercepts(Thread* aT)
   1.551 +    : mT(aT)
   1.552 +  {
   1.553 +    mT->BlockIntercepts();
   1.554 +  }
   1.555 +  ~AutoBlockIntercepts()
   1.556 +  {
   1.557 +    MOZ_ASSERT(mT->InterceptsAreBlocked());
   1.558 +    mT->UnblockIntercepts();
   1.559 +  }
   1.560 +};
   1.561 +
   1.562 +//---------------------------------------------------------------------------
   1.563 +// Location service
   1.564 +//---------------------------------------------------------------------------
   1.565 +
   1.566 +// This class is used to print details about code locations.
   1.567 +class LocationService
   1.568 +{
   1.569 +  // WriteLocation() is the key function in this class.  It's basically a
   1.570 +  // wrapper around NS_DescribeCodeAddress.
   1.571 +  //
   1.572 +  // However, NS_DescribeCodeAddress is very slow on some platforms, and we
   1.573 +  // have lots of repeated (i.e. same PC) calls to it.  So we do some caching
   1.574 +  // of results.  Each cached result includes two strings (|mFunction| and
   1.575 +  // |mLibrary|), so we also optimize them for space in the following ways.
   1.576 +  //
   1.577 +  // - The number of distinct library names is small, e.g. a few dozen.  There
   1.578 +  //   is lots of repetition, especially of libxul.  So we intern them in their
   1.579 +  //   own table, which saves space over duplicating them for each cache entry.
   1.580 +  //
   1.581 +  // - The number of distinct function names is much higher, so we duplicate
   1.582 +  //   them in each cache entry.  That's more space-efficient than interning
   1.583 +  //   because entries containing single-occurrence function names are quickly
   1.584 +  //   overwritten, and their copies released.  In addition, empty function
   1.585 +  //   names are common, so we use nullptr to represent them compactly.
   1.586 +
   1.587 +  struct StringHasher
   1.588 +  {
   1.589 +      typedef const char* Lookup;
   1.590 +
   1.591 +      static uint32_t hash(const char* const& aS)
   1.592 +      {
   1.593 +          return HashString(aS);
   1.594 +      }
   1.595 +
   1.596 +      static bool match(const char* const& aA, const char* const& aB)
   1.597 +      {
   1.598 +          return strcmp(aA, aB) == 0;
   1.599 +      }
   1.600 +  };
   1.601 +
   1.602 +  typedef js::HashSet<const char*, StringHasher, InfallibleAllocPolicy>
   1.603 +          StringTable;
   1.604 +
   1.605 +  StringTable mLibraryStrings;
   1.606 +
   1.607 +  struct Entry
   1.608 +  {
   1.609 +    const void* mPc;
   1.610 +    char*       mFunction;  // owned by the Entry;  may be null
   1.611 +    const char* mLibrary;   // owned by mLibraryStrings;  never null
   1.612 +                            //   in a non-empty entry is in use
   1.613 +    ptrdiff_t   mLOffset;
   1.614 +    char*       mFileName;  // owned by the Entry; may be null
   1.615 +    uint32_t    mLineNo:31;
   1.616 +    uint32_t    mInUse:1;   // is the entry used?
   1.617 +
   1.618 +    Entry()
   1.619 +      : mPc(0), mFunction(nullptr), mLibrary(nullptr), mLOffset(0), mFileName(nullptr), mLineNo(0), mInUse(0)
   1.620 +    {}
   1.621 +
   1.622 +    ~Entry()
   1.623 +    {
   1.624 +      // We don't free mLibrary because it's externally owned.
   1.625 +      InfallibleAllocPolicy::free_(mFunction);
   1.626 +      InfallibleAllocPolicy::free_(mFileName);
   1.627 +    }
   1.628 +
   1.629 +    void Replace(const void* aPc, const char* aFunction,
   1.630 +                 const char* aLibrary, ptrdiff_t aLOffset,
   1.631 +                 const char* aFileName, unsigned long aLineNo)
   1.632 +    {
   1.633 +      mPc = aPc;
   1.634 +
   1.635 +      // Convert "" to nullptr.  Otherwise, make a copy of the name.
   1.636 +      InfallibleAllocPolicy::free_(mFunction);
   1.637 +      mFunction =
   1.638 +        !aFunction[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFunction);
   1.639 +      InfallibleAllocPolicy::free_(mFileName);
   1.640 +      mFileName =
   1.641 +        !aFileName[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFileName);
   1.642 +
   1.643 +
   1.644 +      mLibrary = aLibrary;
   1.645 +      mLOffset = aLOffset;
   1.646 +      mLineNo = aLineNo;
   1.647 +
   1.648 +      mInUse = 1;
   1.649 +    }
   1.650 +
   1.651 +    size_t SizeOfExcludingThis() {
   1.652 +      // Don't measure mLibrary because it's externally owned.
   1.653 +      return MallocSizeOf(mFunction) + MallocSizeOf(mFileName);
   1.654 +    }
   1.655 +  };
   1.656 +
   1.657 +  // A direct-mapped cache.  When doing a dump just after starting desktop
   1.658 +  // Firefox (which is similar to dumping after a longer-running session,
   1.659 +  // thanks to the limit on how many records we dump), a cache with 2^24
   1.660 +  // entries (which approximates an infinite-entry cache) has a ~91% hit rate.
   1.661 +  // A cache with 2^12 entries has a ~83% hit rate, and takes up ~85 KiB (on
   1.662 +  // 32-bit platforms) or ~150 KiB (on 64-bit platforms).
   1.663 +  static const size_t kNumEntries = 1 << 12;
   1.664 +  static const size_t kMask = kNumEntries - 1;
   1.665 +  Entry mEntries[kNumEntries];
   1.666 +
   1.667 +  size_t mNumCacheHits;
   1.668 +  size_t mNumCacheMisses;
   1.669 +
   1.670 +public:
   1.671 +  LocationService()
   1.672 +    : mEntries(), mNumCacheHits(0), mNumCacheMisses(0)
   1.673 +  {
   1.674 +    (void)mLibraryStrings.init(64);
   1.675 +  }
   1.676 +
   1.677 +  void WriteLocation(const Writer& aWriter, const void* aPc)
   1.678 +  {
   1.679 +    MOZ_ASSERT(gStateLock->IsLocked());
   1.680 +
   1.681 +    uint32_t index = HashGeneric(aPc) & kMask;
   1.682 +    MOZ_ASSERT(index < kNumEntries);
   1.683 +    Entry& entry = mEntries[index];
   1.684 +
   1.685 +    if (!entry.mInUse || entry.mPc != aPc) {
   1.686 +      mNumCacheMisses++;
   1.687 +
   1.688 +      // NS_DescribeCodeAddress can (on Linux) acquire a lock inside
   1.689 +      // the shared library loader.  Another thread might call malloc
   1.690 +      // while holding that lock (when loading a shared library).  So
   1.691 +      // we have to exit gStateLock around this call.  For details, see
   1.692 +      // https://bugzilla.mozilla.org/show_bug.cgi?id=363334#c3
   1.693 +      nsCodeAddressDetails details;
   1.694 +      {
   1.695 +        AutoUnlockState unlock;
   1.696 +        (void)NS_DescribeCodeAddress(const_cast<void*>(aPc), &details);
   1.697 +      }
   1.698 +
   1.699 +      // Intern the library name.
   1.700 +      const char* library = nullptr;
   1.701 +      StringTable::AddPtr p = mLibraryStrings.lookupForAdd(details.library);
   1.702 +      if (!p) {
   1.703 +        library = InfallibleAllocPolicy::strdup_(details.library);
   1.704 +        (void)mLibraryStrings.add(p, library);
   1.705 +      } else {
   1.706 +        library = *p;
   1.707 +      }
   1.708 +
   1.709 +      entry.Replace(aPc, details.function, library, details.loffset, details.filename, details.lineno);
   1.710 +
   1.711 +    } else {
   1.712 +      mNumCacheHits++;
   1.713 +    }
   1.714 +
   1.715 +    MOZ_ASSERT(entry.mPc == aPc);
   1.716 +
   1.717 +    uintptr_t entryPc = (uintptr_t)(entry.mPc);
   1.718 +    // Sometimes we get nothing useful.  Just print "???" for the entire entry
   1.719 +    // so that fix-linux-stack.pl doesn't complain about an empty filename.
   1.720 +    if (!entry.mFunction && !entry.mLibrary[0] && entry.mLOffset == 0) {
   1.721 +      W("   ??? 0x%x\n", entryPc);
   1.722 +    } else {
   1.723 +      // Use "???" for unknown functions.
   1.724 +      const char* entryFunction = entry.mFunction ? entry.mFunction : "???";
   1.725 +      if (entry.mFileName) {
   1.726 +        // On Windows we can get the filename and line number at runtime.
   1.727 +        W("   %s (%s:%lu) 0x%x\n",
   1.728 +          entryFunction, entry.mFileName, entry.mLineNo, entryPc);
   1.729 +      } else {
   1.730 +        // On Linux and Mac we cannot get the filename and line number at
   1.731 +        // runtime, so we print the offset in a form that fix-linux-stack.pl and
   1.732 +        // fix_macosx_stack.py can post-process.
   1.733 +        W("   %s[%s +0x%X] 0x%x\n",
   1.734 +          entryFunction, entry.mLibrary, entry.mLOffset, entryPc);
   1.735 +      }
   1.736 +    }
   1.737 +  }
   1.738 +
   1.739 +  size_t SizeOfIncludingThis()
   1.740 +  {
   1.741 +    size_t n = MallocSizeOf(this);
   1.742 +    for (uint32_t i = 0; i < kNumEntries; i++) {
   1.743 +      n += mEntries[i].SizeOfExcludingThis();
   1.744 +    }
   1.745 +
   1.746 +    n += mLibraryStrings.sizeOfExcludingThis(MallocSizeOf);
   1.747 +    for (StringTable::Range r = mLibraryStrings.all();
   1.748 +         !r.empty();
   1.749 +         r.popFront()) {
   1.750 +      n += MallocSizeOf(r.front());
   1.751 +    }
   1.752 +
   1.753 +    return n;
   1.754 +  }
   1.755 +
   1.756 +  size_t CacheCapacity() const { return kNumEntries; }
   1.757 +
   1.758 +  size_t CacheCount() const
   1.759 +  {
   1.760 +    size_t n = 0;
   1.761 +    for (size_t i = 0; i < kNumEntries; i++) {
   1.762 +      if (mEntries[i].mInUse) {
   1.763 +        n++;
   1.764 +      }
   1.765 +    }
   1.766 +    return n;
   1.767 +  }
   1.768 +
   1.769 +  size_t NumCacheHits()   const { return mNumCacheHits; }
   1.770 +  size_t NumCacheMisses() const { return mNumCacheMisses; }
   1.771 +};
   1.772 +
   1.773 +//---------------------------------------------------------------------------
   1.774 +// Stack traces
   1.775 +//---------------------------------------------------------------------------
   1.776 +
   1.777 +class StackTrace
   1.778 +{
   1.779 +public:
   1.780 +  static const uint32_t MaxFrames = 24;
   1.781 +
   1.782 +private:
   1.783 +  uint32_t mLength;         // The number of PCs.
   1.784 +  void* mPcs[MaxFrames];    // The PCs themselves.  If --max-frames is less
   1.785 +                            // than 24, this array is bigger than necessary,
   1.786 +                            // but that case is unusual.
   1.787 +
   1.788 +public:
   1.789 +  StackTrace() : mLength(0) {}
   1.790 +
   1.791 +  uint32_t Length() const { return mLength; }
   1.792 +  void* Pc(uint32_t i) const { MOZ_ASSERT(i < mLength); return mPcs[i]; }
   1.793 +
   1.794 +  uint32_t Size() const { return mLength * sizeof(mPcs[0]); }
   1.795 +
   1.796 +  // The stack trace returned by this function is interned in gStackTraceTable,
   1.797 +  // and so is immortal and unmovable.
   1.798 +  static const StackTrace* Get(Thread* aT);
   1.799 +
   1.800 +  void Sort()
   1.801 +  {
   1.802 +    qsort(mPcs, mLength, sizeof(mPcs[0]), StackTrace::QsortCmp);
   1.803 +  }
   1.804 +
   1.805 +  void Print(const Writer& aWriter, LocationService* aLocService) const;
   1.806 +
   1.807 +  // Hash policy.
   1.808 +
   1.809 +  typedef StackTrace* Lookup;
   1.810 +
   1.811 +  static uint32_t hash(const StackTrace* const& aSt)
   1.812 +  {
   1.813 +    return mozilla::HashBytes(aSt->mPcs, aSt->Size());
   1.814 +  }
   1.815 +
   1.816 +  static bool match(const StackTrace* const& aA,
   1.817 +                    const StackTrace* const& aB)
   1.818 +  {
   1.819 +    return aA->mLength == aB->mLength &&
   1.820 +           memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0;
   1.821 +  }
   1.822 +
   1.823 +private:
   1.824 +  static void StackWalkCallback(void* aPc, void* aSp, void* aClosure)
   1.825 +  {
   1.826 +    StackTrace* st = (StackTrace*) aClosure;
   1.827 +    MOZ_ASSERT(st->mLength < MaxFrames);
   1.828 +    st->mPcs[st->mLength] = aPc;
   1.829 +    st->mLength++;
   1.830 +  }
   1.831 +
   1.832 +  static int QsortCmp(const void* aA, const void* aB)
   1.833 +  {
   1.834 +    const void* const a = *static_cast<const void* const*>(aA);
   1.835 +    const void* const b = *static_cast<const void* const*>(aB);
   1.836 +    if (a < b) return -1;
   1.837 +    if (a > b) return  1;
   1.838 +    return 0;
   1.839 +  }
   1.840 +};
   1.841 +
   1.842 +typedef js::HashSet<StackTrace*, StackTrace, InfallibleAllocPolicy>
   1.843 +        StackTraceTable;
   1.844 +static StackTraceTable* gStackTraceTable = nullptr;
   1.845 +
   1.846 +// We won't GC the stack trace table until it this many elements.
   1.847 +static uint32_t gGCStackTraceTableWhenSizeExceeds = 4 * 1024;
   1.848 +
   1.849 +void
   1.850 +StackTrace::Print(const Writer& aWriter, LocationService* aLocService) const
   1.851 +{
   1.852 +  if (mLength == 0) {
   1.853 +    W("   (empty)\n");  // StackTrace::Get() must have failed
   1.854 +    return;
   1.855 +  }
   1.856 +
   1.857 +  for (uint32_t i = 0; i < mLength; i++) {
   1.858 +    aLocService->WriteLocation(aWriter, Pc(i));
   1.859 +  }
   1.860 +}
   1.861 +
   1.862 +/* static */ const StackTrace*
   1.863 +StackTrace::Get(Thread* aT)
   1.864 +{
   1.865 +  MOZ_ASSERT(gStateLock->IsLocked());
   1.866 +  MOZ_ASSERT(aT->InterceptsAreBlocked());
   1.867 +
   1.868 +  // On Windows, NS_StackWalk can acquire a lock from the shared library
   1.869 +  // loader.  Another thread might call malloc while holding that lock (when
   1.870 +  // loading a shared library).  So we can't be in gStateLock during the call
   1.871 +  // to NS_StackWalk.  For details, see
   1.872 +  // https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8
   1.873 +  // On Linux, something similar can happen;  see bug 824340.
   1.874 +  // So let's just release it on all platforms.
   1.875 +  nsresult rv;
   1.876 +  StackTrace tmp;
   1.877 +  {
   1.878 +    AutoUnlockState unlock;
   1.879 +    uint32_t skipFrames = 2;
   1.880 +    rv = NS_StackWalk(StackWalkCallback, skipFrames,
   1.881 +                      gOptions->MaxFrames(), &tmp, 0, nullptr);
   1.882 +  }
   1.883 +
   1.884 +  if (rv == NS_OK) {
   1.885 +    // Handle the common case first.  All is ok.  Nothing to do.
   1.886 +  } else if (rv == NS_ERROR_NOT_IMPLEMENTED || rv == NS_ERROR_FAILURE) {
   1.887 +    tmp.mLength = 0;
   1.888 +  } else if (rv == NS_ERROR_UNEXPECTED) {
   1.889 +    // XXX: This |rv| only happens on Mac, and it indicates that we're handling
   1.890 +    // a call to malloc that happened inside a mutex-handling function.  Any
   1.891 +    // attempt to create a semaphore (which can happen in printf) could
   1.892 +    // deadlock.
   1.893 +    //
   1.894 +    // However, the most complex thing DMD does after Get() returns is to put
   1.895 +    // something in a hash table, which might call
   1.896 +    // InfallibleAllocPolicy::malloc_.  I'm not yet sure if this needs special
   1.897 +    // handling, hence the forced abort.  Sorry.  If you hit this, please file
   1.898 +    // a bug and CC nnethercote.
   1.899 +    MOZ_CRASH();
   1.900 +  } else {
   1.901 +    MOZ_CRASH();  // should be impossible
   1.902 +  }
   1.903 +
   1.904 +  StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp);
   1.905 +  if (!p) {
   1.906 +    StackTrace* stnew = InfallibleAllocPolicy::new_<StackTrace>(tmp);
   1.907 +    (void)gStackTraceTable->add(p, stnew);
   1.908 +  }
   1.909 +  return *p;
   1.910 +}
   1.911 +
   1.912 +//---------------------------------------------------------------------------
   1.913 +// Heap blocks
   1.914 +//---------------------------------------------------------------------------
   1.915 +
   1.916 +// This class combines a 2-byte-aligned pointer (i.e. one whose bottom bit
   1.917 +// is zero) with a 1-bit tag.
   1.918 +//
   1.919 +// |T| is the pointer type, e.g. |int*|, not the pointed-to type.  This makes
   1.920 +// is easier to have const pointers, e.g. |TaggedPtr<const int*>|.
   1.921 +template <typename T>
   1.922 +class TaggedPtr
   1.923 +{
   1.924 +  union
   1.925 +  {
   1.926 +    T         mPtr;
   1.927 +    uintptr_t mUint;
   1.928 +  };
   1.929 +
   1.930 +  static const uintptr_t kTagMask = uintptr_t(0x1);
   1.931 +  static const uintptr_t kPtrMask = ~kTagMask;
   1.932 +
   1.933 +  static bool IsTwoByteAligned(T aPtr)
   1.934 +  {
   1.935 +    return (uintptr_t(aPtr) & kTagMask) == 0;
   1.936 +  }
   1.937 +
   1.938 +public:
   1.939 +  TaggedPtr()
   1.940 +    : mPtr(nullptr)
   1.941 +  {}
   1.942 +
   1.943 +  TaggedPtr(T aPtr, bool aBool)
   1.944 +    : mPtr(aPtr)
   1.945 +  {
   1.946 +    MOZ_ASSERT(IsTwoByteAligned(aPtr));
   1.947 +    uintptr_t tag = uintptr_t(aBool);
   1.948 +    MOZ_ASSERT(tag <= kTagMask);
   1.949 +    mUint |= (tag & kTagMask);
   1.950 +  }
   1.951 +
   1.952 +  void Set(T aPtr, bool aBool)
   1.953 +  {
   1.954 +    MOZ_ASSERT(IsTwoByteAligned(aPtr));
   1.955 +    mPtr = aPtr;
   1.956 +    uintptr_t tag = uintptr_t(aBool);
   1.957 +    MOZ_ASSERT(tag <= kTagMask);
   1.958 +    mUint |= (tag & kTagMask);
   1.959 +  }
   1.960 +
   1.961 +  T Ptr() const { return reinterpret_cast<T>(mUint & kPtrMask); }
   1.962 +
   1.963 +  bool Tag() const { return bool(mUint & kTagMask); }
   1.964 +};
   1.965 +
   1.966 +// A live heap block.
   1.967 +class Block
   1.968 +{
   1.969 +  const void*  mPtr;
   1.970 +  const size_t mReqSize;    // size requested
   1.971 +
   1.972 +  // Ptr: |mAllocStackTrace| - stack trace where this block was allocated.
   1.973 +  // Tag bit 0: |mSampled| - was this block sampled? (if so, slop == 0).
   1.974 +  TaggedPtr<const StackTrace* const>
   1.975 +    mAllocStackTrace_mSampled;
   1.976 +
   1.977 +  // This array has two elements because we record at most two reports of a
   1.978 +  // block.
   1.979 +  // - Ptr: |mReportStackTrace| - stack trace where this block was reported.
   1.980 +  //   nullptr if not reported.
   1.981 +  // - Tag bit 0: |mReportedOnAlloc| - was the block reported immediately on
   1.982 +  //   allocation?  If so, DMD must not clear the report at the end of Dump().
   1.983 +  //   Only relevant if |mReportStackTrace| is non-nullptr.
   1.984 +  //
   1.985 +  // |mPtr| is used as the key in BlockTable, so it's ok for this member
   1.986 +  // to be |mutable|.
   1.987 +  mutable TaggedPtr<const StackTrace*> mReportStackTrace_mReportedOnAlloc[2];
   1.988 +
   1.989 +public:
   1.990 +  Block(const void* aPtr, size_t aReqSize, const StackTrace* aAllocStackTrace,
   1.991 +        bool aSampled)
   1.992 +    : mPtr(aPtr),
   1.993 +      mReqSize(aReqSize),
   1.994 +      mAllocStackTrace_mSampled(aAllocStackTrace, aSampled),
   1.995 +      mReportStackTrace_mReportedOnAlloc()     // all fields get zeroed
   1.996 +  {
   1.997 +    MOZ_ASSERT(aAllocStackTrace);
   1.998 +  }
   1.999 +
  1.1000 +  size_t ReqSize() const { return mReqSize; }
  1.1001 +
  1.1002 +  // Sampled blocks always have zero slop.
  1.1003 +  size_t SlopSize() const
  1.1004 +  {
  1.1005 +    return IsSampled() ? 0 : MallocSizeOf(mPtr) - mReqSize;
  1.1006 +  }
  1.1007 +
  1.1008 +  size_t UsableSize() const
  1.1009 +  {
  1.1010 +    return IsSampled() ? mReqSize : MallocSizeOf(mPtr);
  1.1011 +  }
  1.1012 +
  1.1013 +  bool IsSampled() const
  1.1014 +  {
  1.1015 +    return mAllocStackTrace_mSampled.Tag();
  1.1016 +  }
  1.1017 +
  1.1018 +  const StackTrace* AllocStackTrace() const
  1.1019 +  {
  1.1020 +    return mAllocStackTrace_mSampled.Ptr();
  1.1021 +  }
  1.1022 +
  1.1023 +  const StackTrace* ReportStackTrace1() const {
  1.1024 +    return mReportStackTrace_mReportedOnAlloc[0].Ptr();
  1.1025 +  }
  1.1026 +
  1.1027 +  const StackTrace* ReportStackTrace2() const {
  1.1028 +    return mReportStackTrace_mReportedOnAlloc[1].Ptr();
  1.1029 +  }
  1.1030 +
  1.1031 +  bool ReportedOnAlloc1() const {
  1.1032 +    return mReportStackTrace_mReportedOnAlloc[0].Tag();
  1.1033 +  }
  1.1034 +
  1.1035 +  bool ReportedOnAlloc2() const {
  1.1036 +    return mReportStackTrace_mReportedOnAlloc[1].Tag();
  1.1037 +  }
  1.1038 +
  1.1039 +  uint32_t NumReports() const {
  1.1040 +    if (ReportStackTrace2()) {
  1.1041 +      MOZ_ASSERT(ReportStackTrace1());
  1.1042 +      return 2;
  1.1043 +    }
  1.1044 +    if (ReportStackTrace1()) {
  1.1045 +      return 1;
  1.1046 +    }
  1.1047 +    return 0;
  1.1048 +  }
  1.1049 +
  1.1050 +  // This is |const| thanks to the |mutable| fields above.
  1.1051 +  void Report(Thread* aT, bool aReportedOnAlloc) const
  1.1052 +  {
  1.1053 +    // We don't bother recording reports after the 2nd one.
  1.1054 +    uint32_t numReports = NumReports();
  1.1055 +    if (numReports < 2) {
  1.1056 +      mReportStackTrace_mReportedOnAlloc[numReports].Set(StackTrace::Get(aT),
  1.1057 +                                                         aReportedOnAlloc);
  1.1058 +    }
  1.1059 +  }
  1.1060 +
  1.1061 +  void UnreportIfNotReportedOnAlloc() const
  1.1062 +  {
  1.1063 +    if (!ReportedOnAlloc1() && !ReportedOnAlloc2()) {
  1.1064 +      mReportStackTrace_mReportedOnAlloc[0].Set(nullptr, 0);
  1.1065 +      mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0);
  1.1066 +
  1.1067 +    } else if (!ReportedOnAlloc1() && ReportedOnAlloc2()) {
  1.1068 +      // Shift the 2nd report down to the 1st one.
  1.1069 +      mReportStackTrace_mReportedOnAlloc[0] =
  1.1070 +        mReportStackTrace_mReportedOnAlloc[1];
  1.1071 +      mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0);
  1.1072 +
  1.1073 +    } else if (ReportedOnAlloc1() && !ReportedOnAlloc2()) {
  1.1074 +      mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0);
  1.1075 +    }
  1.1076 +  }
  1.1077 +
  1.1078 +  // Hash policy.
  1.1079 +
  1.1080 +  typedef const void* Lookup;
  1.1081 +
  1.1082 +  static uint32_t hash(const void* const& aPtr)
  1.1083 +  {
  1.1084 +    return mozilla::HashGeneric(aPtr);
  1.1085 +  }
  1.1086 +
  1.1087 +  static bool match(const Block& aB, const void* const& aPtr)
  1.1088 +  {
  1.1089 +    return aB.mPtr == aPtr;
  1.1090 +  }
  1.1091 +};
  1.1092 +
  1.1093 +typedef js::HashSet<Block, Block, InfallibleAllocPolicy> BlockTable;
  1.1094 +static BlockTable* gBlockTable = nullptr;
  1.1095 +
  1.1096 +typedef js::HashSet<const StackTrace*, js::DefaultHasher<const StackTrace*>,
  1.1097 +                    InfallibleAllocPolicy>
  1.1098 +        StackTraceSet;
  1.1099 +
  1.1100 +// Add a pointer to each live stack trace into the given StackTraceSet.  (A
  1.1101 +// stack trace is live if it's used by one of the live blocks.)
  1.1102 +static void
  1.1103 +GatherUsedStackTraces(StackTraceSet& aStackTraces)
  1.1104 +{
  1.1105 +  MOZ_ASSERT(gStateLock->IsLocked());
  1.1106 +  MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked());
  1.1107 +
  1.1108 +  aStackTraces.finish();
  1.1109 +  aStackTraces.init(1024);
  1.1110 +
  1.1111 +  for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) {
  1.1112 +    const Block& b = r.front();
  1.1113 +    aStackTraces.put(b.AllocStackTrace());
  1.1114 +    aStackTraces.put(b.ReportStackTrace1());
  1.1115 +    aStackTraces.put(b.ReportStackTrace2());
  1.1116 +  }
  1.1117 +
  1.1118 +  // Any of the stack traces added above may have been null.  For the sake of
  1.1119 +  // cleanliness, don't leave the null pointer in the set.
  1.1120 +  aStackTraces.remove(nullptr);
  1.1121 +}
  1.1122 +
  1.1123 +// Delete stack traces that we aren't using, and compact our hashtable.
  1.1124 +static void
  1.1125 +GCStackTraces()
  1.1126 +{
  1.1127 +  MOZ_ASSERT(gStateLock->IsLocked());
  1.1128 +  MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked());
  1.1129 +
  1.1130 +  StackTraceSet usedStackTraces;
  1.1131 +  GatherUsedStackTraces(usedStackTraces);
  1.1132 +
  1.1133 +  // Delete all unused stack traces from gStackTraceTable.  The Enum destructor
  1.1134 +  // will automatically rehash and compact the table.
  1.1135 +  for (StackTraceTable::Enum e(*gStackTraceTable);
  1.1136 +       !e.empty();
  1.1137 +       e.popFront()) {
  1.1138 +    StackTrace* const& st = e.front();
  1.1139 +
  1.1140 +    if (!usedStackTraces.has(st)) {
  1.1141 +      e.removeFront();
  1.1142 +      InfallibleAllocPolicy::delete_(st);
  1.1143 +    }
  1.1144 +  }
  1.1145 +
  1.1146 +  // Schedule a GC when we have twice as many stack traces as we had right after
  1.1147 +  // this GC finished.
  1.1148 +  gGCStackTraceTableWhenSizeExceeds = 2 * gStackTraceTable->count();
  1.1149 +}
  1.1150 +
  1.1151 +//---------------------------------------------------------------------------
  1.1152 +// malloc/free callbacks
  1.1153 +//---------------------------------------------------------------------------
  1.1154 +
  1.1155 +static size_t gSmallBlockActualSizeCounter = 0;
  1.1156 +
  1.1157 +static void
  1.1158 +AllocCallback(void* aPtr, size_t aReqSize, Thread* aT)
  1.1159 +{
  1.1160 +  MOZ_ASSERT(gIsDMDRunning);
  1.1161 +
  1.1162 +  if (!aPtr) {
  1.1163 +    return;
  1.1164 +  }
  1.1165 +
  1.1166 +  AutoLockState lock;
  1.1167 +  AutoBlockIntercepts block(aT);
  1.1168 +
  1.1169 +  size_t actualSize = gMallocTable->malloc_usable_size(aPtr);
  1.1170 +  size_t sampleBelowSize = gOptions->SampleBelowSize();
  1.1171 +
  1.1172 +  if (actualSize < sampleBelowSize) {
  1.1173 +    // If this allocation is smaller than the sample-below size, increment the
  1.1174 +    // cumulative counter.  Then, if that counter now exceeds the sample size,
  1.1175 +    // blame this allocation for |sampleBelowSize| bytes.  This precludes the
  1.1176 +    // measurement of slop.
  1.1177 +    gSmallBlockActualSizeCounter += actualSize;
  1.1178 +    if (gSmallBlockActualSizeCounter >= sampleBelowSize) {
  1.1179 +      gSmallBlockActualSizeCounter -= sampleBelowSize;
  1.1180 +
  1.1181 +      Block b(aPtr, sampleBelowSize, StackTrace::Get(aT), /* sampled */ true);
  1.1182 +      (void)gBlockTable->putNew(aPtr, b);
  1.1183 +    }
  1.1184 +  } else {
  1.1185 +    // If this block size is larger than the sample size, record it exactly.
  1.1186 +    Block b(aPtr, aReqSize, StackTrace::Get(aT), /* sampled */ false);
  1.1187 +    (void)gBlockTable->putNew(aPtr, b);
  1.1188 +  }
  1.1189 +}
  1.1190 +
  1.1191 +static void
  1.1192 +FreeCallback(void* aPtr, Thread* aT)
  1.1193 +{
  1.1194 +  MOZ_ASSERT(gIsDMDRunning);
  1.1195 +
  1.1196 +  if (!aPtr) {
  1.1197 +    return;
  1.1198 +  }
  1.1199 +
  1.1200 +  AutoLockState lock;
  1.1201 +  AutoBlockIntercepts block(aT);
  1.1202 +
  1.1203 +  gBlockTable->remove(aPtr);
  1.1204 +
  1.1205 +  if (gStackTraceTable->count() > gGCStackTraceTableWhenSizeExceeds) {
  1.1206 +    GCStackTraces();
  1.1207 +  }
  1.1208 +}
  1.1209 +
  1.1210 +//---------------------------------------------------------------------------
  1.1211 +// malloc/free interception
  1.1212 +//---------------------------------------------------------------------------
  1.1213 +
  1.1214 +static void Init(const malloc_table_t* aMallocTable);
  1.1215 +
  1.1216 +}   // namespace dmd
  1.1217 +}   // namespace mozilla
  1.1218 +
  1.1219 +void
  1.1220 +replace_init(const malloc_table_t* aMallocTable)
  1.1221 +{
  1.1222 +  mozilla::dmd::Init(aMallocTable);
  1.1223 +}
  1.1224 +
  1.1225 +void*
  1.1226 +replace_malloc(size_t aSize)
  1.1227 +{
  1.1228 +  using namespace mozilla::dmd;
  1.1229 +
  1.1230 +  if (!gIsDMDRunning) {
  1.1231 +    // DMD hasn't started up, either because it wasn't enabled by the user, or
  1.1232 +    // we're still in Init() and something has indirectly called malloc.  Do a
  1.1233 +    // vanilla malloc.  (In the latter case, if it fails we'll crash.  But
  1.1234 +    // OOM is highly unlikely so early on.)
  1.1235 +    return gMallocTable->malloc(aSize);
  1.1236 +  }
  1.1237 +
  1.1238 +  Thread* t = Thread::Fetch();
  1.1239 +  if (t->InterceptsAreBlocked()) {
  1.1240 +    // Intercepts are blocked, which means this must be a call to malloc
  1.1241 +    // triggered indirectly by DMD (e.g. via NS_StackWalk).  Be infallible.
  1.1242 +    return InfallibleAllocPolicy::malloc_(aSize);
  1.1243 +  }
  1.1244 +
  1.1245 +  // This must be a call to malloc from outside DMD.  Intercept it.
  1.1246 +  void* ptr = gMallocTable->malloc(aSize);
  1.1247 +  AllocCallback(ptr, aSize, t);
  1.1248 +  return ptr;
  1.1249 +}
  1.1250 +
  1.1251 +void*
  1.1252 +replace_calloc(size_t aCount, size_t aSize)
  1.1253 +{
  1.1254 +  using namespace mozilla::dmd;
  1.1255 +
  1.1256 +  if (!gIsDMDRunning) {
  1.1257 +    return gMallocTable->calloc(aCount, aSize);
  1.1258 +  }
  1.1259 +
  1.1260 +  Thread* t = Thread::Fetch();
  1.1261 +  if (t->InterceptsAreBlocked()) {
  1.1262 +    return InfallibleAllocPolicy::calloc_(aCount * aSize);
  1.1263 +  }
  1.1264 +
  1.1265 +  void* ptr = gMallocTable->calloc(aCount, aSize);
  1.1266 +  AllocCallback(ptr, aCount * aSize, t);
  1.1267 +  return ptr;
  1.1268 +}
  1.1269 +
  1.1270 +void*
  1.1271 +replace_realloc(void* aOldPtr, size_t aSize)
  1.1272 +{
  1.1273 +  using namespace mozilla::dmd;
  1.1274 +
  1.1275 +  if (!gIsDMDRunning) {
  1.1276 +    return gMallocTable->realloc(aOldPtr, aSize);
  1.1277 +  }
  1.1278 +
  1.1279 +  Thread* t = Thread::Fetch();
  1.1280 +  if (t->InterceptsAreBlocked()) {
  1.1281 +    return InfallibleAllocPolicy::realloc_(aOldPtr, aSize);
  1.1282 +  }
  1.1283 +
  1.1284 +  // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|.
  1.1285 +  if (!aOldPtr) {
  1.1286 +    return replace_malloc(aSize);
  1.1287 +  }
  1.1288 +
  1.1289 +  // Be very careful here!  Must remove the block from the table before doing
  1.1290 +  // the realloc to avoid races, just like in replace_free().
  1.1291 +  // Nb: This does an unnecessary hashtable remove+add if the block doesn't
  1.1292 +  // move, but doing better isn't worth the effort.
  1.1293 +  FreeCallback(aOldPtr, t);
  1.1294 +  void* ptr = gMallocTable->realloc(aOldPtr, aSize);
  1.1295 +  if (ptr) {
  1.1296 +    AllocCallback(ptr, aSize, t);
  1.1297 +  } else {
  1.1298 +    // If realloc fails, we re-insert the old pointer.  It will look like it
  1.1299 +    // was allocated for the first time here, which is untrue, and the slop
  1.1300 +    // bytes will be zero, which may be untrue.  But this case is rare and
  1.1301 +    // doing better isn't worth the effort.
  1.1302 +    AllocCallback(aOldPtr, gMallocTable->malloc_usable_size(aOldPtr), t);
  1.1303 +  }
  1.1304 +  return ptr;
  1.1305 +}
  1.1306 +
  1.1307 +void*
  1.1308 +replace_memalign(size_t aAlignment, size_t aSize)
  1.1309 +{
  1.1310 +  using namespace mozilla::dmd;
  1.1311 +
  1.1312 +  if (!gIsDMDRunning) {
  1.1313 +    return gMallocTable->memalign(aAlignment, aSize);
  1.1314 +  }
  1.1315 +
  1.1316 +  Thread* t = Thread::Fetch();
  1.1317 +  if (t->InterceptsAreBlocked()) {
  1.1318 +    return InfallibleAllocPolicy::memalign_(aAlignment, aSize);
  1.1319 +  }
  1.1320 +
  1.1321 +  void* ptr = gMallocTable->memalign(aAlignment, aSize);
  1.1322 +  AllocCallback(ptr, aSize, t);
  1.1323 +  return ptr;
  1.1324 +}
  1.1325 +
  1.1326 +void
  1.1327 +replace_free(void* aPtr)
  1.1328 +{
  1.1329 +  using namespace mozilla::dmd;
  1.1330 +
  1.1331 +  if (!gIsDMDRunning) {
  1.1332 +    gMallocTable->free(aPtr);
  1.1333 +    return;
  1.1334 +  }
  1.1335 +
  1.1336 +  Thread* t = Thread::Fetch();
  1.1337 +  if (t->InterceptsAreBlocked()) {
  1.1338 +    return InfallibleAllocPolicy::free_(aPtr);
  1.1339 +  }
  1.1340 +
  1.1341 +  // Do the actual free after updating the table.  Otherwise, another thread
  1.1342 +  // could call malloc and get the freed block and update the table, and then
  1.1343 +  // our update here would remove the newly-malloc'd block.
  1.1344 +  FreeCallback(aPtr, t);
  1.1345 +  gMallocTable->free(aPtr);
  1.1346 +}
  1.1347 +
  1.1348 +namespace mozilla {
  1.1349 +namespace dmd {
  1.1350 +
  1.1351 +//---------------------------------------------------------------------------
  1.1352 +// Stack trace records
  1.1353 +//---------------------------------------------------------------------------
  1.1354 +
  1.1355 +class TraceRecordKey
  1.1356 +{
  1.1357 +public:
  1.1358 +  const StackTrace* const mAllocStackTrace;   // never null
  1.1359 +protected:
  1.1360 +  const StackTrace* const mReportStackTrace1; // nullptr if unreported
  1.1361 +  const StackTrace* const mReportStackTrace2; // nullptr if not 2x-reported
  1.1362 +
  1.1363 +public:
  1.1364 +  TraceRecordKey(const Block& aB)
  1.1365 +    : mAllocStackTrace(aB.AllocStackTrace()),
  1.1366 +      mReportStackTrace1(aB.ReportStackTrace1()),
  1.1367 +      mReportStackTrace2(aB.ReportStackTrace2())
  1.1368 +  {
  1.1369 +    MOZ_ASSERT(mAllocStackTrace);
  1.1370 +  }
  1.1371 +
  1.1372 +  // Hash policy.
  1.1373 +
  1.1374 +  typedef TraceRecordKey Lookup;
  1.1375 +
  1.1376 +  static uint32_t hash(const TraceRecordKey& aKey)
  1.1377 +  {
  1.1378 +    return mozilla::HashGeneric(aKey.mAllocStackTrace,
  1.1379 +                                aKey.mReportStackTrace1,
  1.1380 +                                aKey.mReportStackTrace2);
  1.1381 +  }
  1.1382 +
  1.1383 +  static bool match(const TraceRecordKey& aA, const TraceRecordKey& aB)
  1.1384 +  {
  1.1385 +    return aA.mAllocStackTrace   == aB.mAllocStackTrace &&
  1.1386 +           aA.mReportStackTrace1 == aB.mReportStackTrace1 &&
  1.1387 +           aA.mReportStackTrace2 == aB.mReportStackTrace2;
  1.1388 +  }
  1.1389 +};
  1.1390 +
  1.1391 +class RecordSize
  1.1392 +{
  1.1393 +  static const size_t kReqBits = sizeof(size_t) * 8 - 1;  // 31 or 63
  1.1394 +
  1.1395 +  size_t mReq;              // size requested
  1.1396 +  size_t mSlop:kReqBits;    // slop bytes
  1.1397 +  size_t mSampled:1;        // were one or more blocks contributing to this
  1.1398 +                            //   RecordSize sampled?
  1.1399 +public:
  1.1400 +  RecordSize()
  1.1401 +    : mReq(0),
  1.1402 +      mSlop(0),
  1.1403 +      mSampled(false)
  1.1404 +  {}
  1.1405 +
  1.1406 +  size_t Req()    const { return mReq; }
  1.1407 +  size_t Slop()   const { return mSlop; }
  1.1408 +  size_t Usable() const { return mReq + mSlop; }
  1.1409 +
  1.1410 +  bool IsSampled() const { return mSampled; }
  1.1411 +
  1.1412 +  void Add(const Block& aB)
  1.1413 +  {
  1.1414 +    mReq  += aB.ReqSize();
  1.1415 +    mSlop += aB.SlopSize();
  1.1416 +    mSampled = mSampled || aB.IsSampled();
  1.1417 +  }
  1.1418 +
  1.1419 +  void Add(const RecordSize& aRecordSize)
  1.1420 +  {
  1.1421 +    mReq  += aRecordSize.Req();
  1.1422 +    mSlop += aRecordSize.Slop();
  1.1423 +    mSampled = mSampled || aRecordSize.IsSampled();
  1.1424 +  }
  1.1425 +
  1.1426 +  static int Cmp(const RecordSize& aA, const RecordSize& aB)
  1.1427 +  {
  1.1428 +    // Primary sort: put bigger usable sizes first.
  1.1429 +    if (aA.Usable() > aB.Usable()) return -1;
  1.1430 +    if (aA.Usable() < aB.Usable()) return  1;
  1.1431 +
  1.1432 +    // Secondary sort: put bigger requested sizes first.
  1.1433 +    if (aA.Req() > aB.Req()) return -1;
  1.1434 +    if (aA.Req() < aB.Req()) return  1;
  1.1435 +
  1.1436 +    // Tertiary sort: put non-sampled records before sampled records.
  1.1437 +    if (!aA.mSampled &&  aB.mSampled) return -1;
  1.1438 +    if ( aA.mSampled && !aB.mSampled) return  1;
  1.1439 +
  1.1440 +    return 0;
  1.1441 +  }
  1.1442 +};
  1.1443 +
  1.1444 +// A collection of one or more heap blocks with a common TraceRecordKey.
  1.1445 +class TraceRecord : public TraceRecordKey
  1.1446 +{
  1.1447 +  // The TraceRecordKey base class serves as the key in TraceRecordTables.
  1.1448 +  // These two fields constitute the value, so it's ok for them to be
  1.1449 +  // |mutable|.
  1.1450 +  mutable uint32_t    mNumBlocks; // number of blocks with this TraceRecordKey
  1.1451 +  mutable RecordSize mRecordSize; // combined size of those blocks
  1.1452 +
  1.1453 +public:
  1.1454 +  explicit TraceRecord(const TraceRecordKey& aKey)
  1.1455 +    : TraceRecordKey(aKey),
  1.1456 +      mNumBlocks(0),
  1.1457 +      mRecordSize()
  1.1458 +  {}
  1.1459 +
  1.1460 +  uint32_t NumBlocks() const { return mNumBlocks; }
  1.1461 +
  1.1462 +  const RecordSize& GetRecordSize() const { return mRecordSize; }
  1.1463 +
  1.1464 +  // This is |const| thanks to the |mutable| fields above.
  1.1465 +  void Add(const Block& aB) const
  1.1466 +  {
  1.1467 +    mNumBlocks++;
  1.1468 +    mRecordSize.Add(aB);
  1.1469 +  }
  1.1470 +
  1.1471 +  // For PrintSortedRecords.
  1.1472 +  static const char* const kRecordKind;
  1.1473 +  static bool recordsOverlap() { return false; }
  1.1474 +
  1.1475 +  void Print(const Writer& aWriter, LocationService* aLocService,
  1.1476 +             uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
  1.1477 +             size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
  1.1478 +             size_t aTotalUsableSize) const;
  1.1479 +
  1.1480 +  static int QsortCmp(const void* aA, const void* aB)
  1.1481 +  {
  1.1482 +    const TraceRecord* const a = *static_cast<const TraceRecord* const*>(aA);
  1.1483 +    const TraceRecord* const b = *static_cast<const TraceRecord* const*>(aB);
  1.1484 +
  1.1485 +    return RecordSize::Cmp(a->mRecordSize, b->mRecordSize);
  1.1486 +  }
  1.1487 +};
  1.1488 +
  1.1489 +const char* const TraceRecord::kRecordKind = "trace";
  1.1490 +
  1.1491 +typedef js::HashSet<TraceRecord, TraceRecord, InfallibleAllocPolicy>
  1.1492 +        TraceRecordTable;
  1.1493 +
  1.1494 +void
  1.1495 +TraceRecord::Print(const Writer& aWriter, LocationService* aLocService,
  1.1496 +                   uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
  1.1497 +                   size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
  1.1498 +                   size_t aTotalUsableSize) const
  1.1499 +{
  1.1500 +  bool showTilde = mRecordSize.IsSampled();
  1.1501 +
  1.1502 +  W("%s: %s block%s in stack trace record %s of %s\n",
  1.1503 +    aStr,
  1.1504 +    Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
  1.1505 +    Show(aM, gBuf2, kBufLen),
  1.1506 +    Show(aN, gBuf3, kBufLen));
  1.1507 +
  1.1508 +  W(" %s bytes (%s requested / %s slop)\n",
  1.1509 +    Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde),
  1.1510 +    Show(mRecordSize.Req(),    gBuf2, kBufLen, showTilde),
  1.1511 +    Show(mRecordSize.Slop(),   gBuf3, kBufLen, showTilde));
  1.1512 +
  1.1513 +  W(" %4.2f%% of the heap (%4.2f%% cumulative); "
  1.1514 +    " %4.2f%% of %s (%4.2f%% cumulative)\n",
  1.1515 +    Percent(mRecordSize.Usable(), aTotalUsableSize),
  1.1516 +    Percent(aCumulativeUsableSize, aTotalUsableSize),
  1.1517 +    Percent(mRecordSize.Usable(), aCategoryUsableSize),
  1.1518 +    astr,
  1.1519 +    Percent(aCumulativeUsableSize, aCategoryUsableSize));
  1.1520 +
  1.1521 +  W(" Allocated at\n");
  1.1522 +  mAllocStackTrace->Print(aWriter, aLocService);
  1.1523 +
  1.1524 +  if (mReportStackTrace1) {
  1.1525 +    W("\n Reported at\n");
  1.1526 +    mReportStackTrace1->Print(aWriter, aLocService);
  1.1527 +  }
  1.1528 +  if (mReportStackTrace2) {
  1.1529 +    W("\n Reported again at\n");
  1.1530 +    mReportStackTrace2->Print(aWriter, aLocService);
  1.1531 +  }
  1.1532 +
  1.1533 +  W("\n");
  1.1534 +}
  1.1535 +
  1.1536 +//---------------------------------------------------------------------------
  1.1537 +// Stack frame records
  1.1538 +//---------------------------------------------------------------------------
  1.1539 +
  1.1540 +// A collection of one or more stack frames (from heap block allocation stack
  1.1541 +// traces) with a common PC.
  1.1542 +class FrameRecord
  1.1543 +{
  1.1544 +  // mPc is used as the key in FrameRecordTable, and the other members
  1.1545 +  // constitute the value, so it's ok for them to be |mutable|.
  1.1546 +  const void* const  mPc;
  1.1547 +  mutable size_t     mNumBlocks;
  1.1548 +  mutable size_t     mNumTraceRecords;
  1.1549 +  mutable RecordSize mRecordSize;
  1.1550 +
  1.1551 +public:
  1.1552 +  explicit FrameRecord(const void* aPc)
  1.1553 +    : mPc(aPc),
  1.1554 +      mNumBlocks(0),
  1.1555 +      mNumTraceRecords(0),
  1.1556 +      mRecordSize()
  1.1557 +  {}
  1.1558 +
  1.1559 +  const RecordSize& GetRecordSize() const { return mRecordSize; }
  1.1560 +
  1.1561 +  // This is |const| thanks to the |mutable| fields above.
  1.1562 +  void Add(const TraceRecord& aTr) const
  1.1563 +  {
  1.1564 +    mNumBlocks += aTr.NumBlocks();
  1.1565 +    mNumTraceRecords++;
  1.1566 +    mRecordSize.Add(aTr.GetRecordSize());
  1.1567 +  }
  1.1568 +
  1.1569 +  void Print(const Writer& aWriter, LocationService* aLocService,
  1.1570 +             uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
  1.1571 +             size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
  1.1572 +             size_t aTotalUsableSize) const;
  1.1573 +
  1.1574 +  static int QsortCmp(const void* aA, const void* aB)
  1.1575 +  {
  1.1576 +    const FrameRecord* const a = *static_cast<const FrameRecord* const*>(aA);
  1.1577 +    const FrameRecord* const b = *static_cast<const FrameRecord* const*>(aB);
  1.1578 +
  1.1579 +    return RecordSize::Cmp(a->mRecordSize, b->mRecordSize);
  1.1580 +  }
  1.1581 +
  1.1582 +  // For PrintSortedRecords.
  1.1583 +  static const char* const kRecordKind;
  1.1584 +  static bool recordsOverlap() { return true; }
  1.1585 +
  1.1586 +  // Hash policy.
  1.1587 +
  1.1588 +  typedef const void* Lookup;
  1.1589 +
  1.1590 +  static uint32_t hash(const void* const& aPc)
  1.1591 +  {
  1.1592 +    return mozilla::HashGeneric(aPc);
  1.1593 +  }
  1.1594 +
  1.1595 +  static bool match(const FrameRecord& aFr, const void* const& aPc)
  1.1596 +  {
  1.1597 +    return aFr.mPc == aPc;
  1.1598 +  }
  1.1599 +};
  1.1600 +
  1.1601 +const char* const FrameRecord::kRecordKind = "frame";
  1.1602 +
  1.1603 +typedef js::HashSet<FrameRecord, FrameRecord, InfallibleAllocPolicy>
  1.1604 +        FrameRecordTable;
  1.1605 +
  1.1606 +void
  1.1607 +FrameRecord::Print(const Writer& aWriter, LocationService* aLocService,
  1.1608 +                   uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
  1.1609 +                   size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
  1.1610 +                   size_t aTotalUsableSize) const
  1.1611 +{
  1.1612 +  (void)aCumulativeUsableSize;
  1.1613 +
  1.1614 +  bool showTilde = mRecordSize.IsSampled();
  1.1615 +
  1.1616 +  W("%s: %s block%s from %s stack trace record%s in stack frame record %s of %s\n",
  1.1617 +    aStr,
  1.1618 +    Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
  1.1619 +    Show(mNumTraceRecords, gBuf2, kBufLen, showTilde), Plural(mNumTraceRecords),
  1.1620 +    Show(aM, gBuf3, kBufLen),
  1.1621 +    Show(aN, gBuf4, kBufLen));
  1.1622 +
  1.1623 +  W(" %s bytes (%s requested / %s slop)\n",
  1.1624 +    Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde),
  1.1625 +    Show(mRecordSize.Req(),    gBuf2, kBufLen, showTilde),
  1.1626 +    Show(mRecordSize.Slop(),   gBuf3, kBufLen, showTilde));
  1.1627 +
  1.1628 +  W(" %4.2f%% of the heap;  %4.2f%% of %s\n",
  1.1629 +    Percent(mRecordSize.Usable(), aTotalUsableSize),
  1.1630 +    Percent(mRecordSize.Usable(), aCategoryUsableSize),
  1.1631 +    astr);
  1.1632 +
  1.1633 +  W(" PC is\n");
  1.1634 +  aLocService->WriteLocation(aWriter, mPc);
  1.1635 +  W("\n");
  1.1636 +}
  1.1637 +
  1.1638 +//---------------------------------------------------------------------------
  1.1639 +// Options (Part 2)
  1.1640 +//---------------------------------------------------------------------------
  1.1641 +
  1.1642 +// Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah"
  1.1643 +// (where "blah" is non-empty) and return the pointer to "blah".  |aArg| can
  1.1644 +// have leading space chars (but not other whitespace).
  1.1645 +const char*
  1.1646 +Options::ValueIfMatch(const char* aArg, const char* aOptionName)
  1.1647 +{
  1.1648 +  MOZ_ASSERT(!isspace(*aArg));  // any leading whitespace should not remain
  1.1649 +  size_t optionLen = strlen(aOptionName);
  1.1650 +  if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' &&
  1.1651 +      aArg[optionLen + 1]) {
  1.1652 +    return aArg + optionLen + 1;
  1.1653 +  }
  1.1654 +  return nullptr;
  1.1655 +}
  1.1656 +
  1.1657 +// Extracts a |long| value for an option from an argument.  It must be within
  1.1658 +// the range |aMin..aMax| (inclusive).
  1.1659 +bool
  1.1660 +Options::GetLong(const char* aArg, const char* aOptionName,
  1.1661 +                 long aMin, long aMax, long* aN)
  1.1662 +{
  1.1663 +  if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) {
  1.1664 +    char* endPtr;
  1.1665 +    *aN = strtol(optionValue, &endPtr, /* base */ 10);
  1.1666 +    if (!*endPtr && aMin <= *aN && *aN <= aMax &&
  1.1667 +        *aN != LONG_MIN && *aN != LONG_MAX) {
  1.1668 +      return true;
  1.1669 +    }
  1.1670 +  }
  1.1671 +  return false;
  1.1672 +}
  1.1673 +
  1.1674 +// The sample-below default is a prime number close to 4096.
  1.1675 +// - Why that size?  Because it's *much* faster but only moderately less precise
  1.1676 +//   than a size of 1.
  1.1677 +// - Why prime?  Because it makes our sampling more random.  If we used a size
  1.1678 +//   of 4096, for example, then our alloc counter would only take on even
  1.1679 +//   values, because jemalloc always rounds up requests sizes.  In contrast, a
  1.1680 +//   prime size will explore all possible values of the alloc counter.
  1.1681 +//
  1.1682 +Options::Options(const char* aDMDEnvVar)
  1.1683 +  : mDMDEnvVar(InfallibleAllocPolicy::strdup_(aDMDEnvVar)),
  1.1684 +    mSampleBelowSize(4093, 100 * 100 * 1000),
  1.1685 +    mMaxFrames(StackTrace::MaxFrames, StackTrace::MaxFrames),
  1.1686 +    mMaxRecords(1000, 1000000),
  1.1687 +    mMode(Normal)
  1.1688 +{
  1.1689 +  char* e = mDMDEnvVar;
  1.1690 +  if (strcmp(e, "1") != 0) {
  1.1691 +    bool isEnd = false;
  1.1692 +    while (!isEnd) {
  1.1693 +      // Consume leading whitespace.
  1.1694 +      while (isspace(*e)) {
  1.1695 +        e++;
  1.1696 +      }
  1.1697 +
  1.1698 +      // Save the start of the arg.
  1.1699 +      const char* arg = e;
  1.1700 +
  1.1701 +      // Find the first char after the arg, and temporarily change it to '\0'
  1.1702 +      // to isolate the arg.
  1.1703 +      while (!isspace(*e) && *e != '\0') {
  1.1704 +        e++;
  1.1705 +      }
  1.1706 +      char replacedChar = *e;
  1.1707 +      isEnd = replacedChar == '\0';
  1.1708 +      *e = '\0';
  1.1709 +
  1.1710 +      // Handle arg
  1.1711 +      long myLong;
  1.1712 +      if (GetLong(arg, "--sample-below", 1, mSampleBelowSize.mMax, &myLong)) {
  1.1713 +        mSampleBelowSize.mActual = myLong;
  1.1714 +
  1.1715 +      } else if (GetLong(arg, "--max-frames", 1, mMaxFrames.mMax, &myLong)) {
  1.1716 +        mMaxFrames.mActual = myLong;
  1.1717 +
  1.1718 +      } else if (GetLong(arg, "--max-records", 1, mMaxRecords.mMax, &myLong)) {
  1.1719 +        mMaxRecords.mActual = myLong;
  1.1720 +
  1.1721 +      } else if (strcmp(arg, "--mode=normal") == 0) {
  1.1722 +        mMode = Options::Normal;
  1.1723 +      } else if (strcmp(arg, "--mode=test")   == 0) {
  1.1724 +        mMode = Options::Test;
  1.1725 +      } else if (strcmp(arg, "--mode=stress") == 0) {
  1.1726 +        mMode = Options::Stress;
  1.1727 +
  1.1728 +      } else if (strcmp(arg, "") == 0) {
  1.1729 +        // This can only happen if there is trailing whitespace.  Ignore.
  1.1730 +        MOZ_ASSERT(isEnd);
  1.1731 +
  1.1732 +      } else {
  1.1733 +        BadArg(arg);
  1.1734 +      }
  1.1735 +
  1.1736 +      // Undo the temporary isolation.
  1.1737 +      *e = replacedChar;
  1.1738 +    }
  1.1739 +  }
  1.1740 +}
  1.1741 +
  1.1742 +void
  1.1743 +Options::BadArg(const char* aArg)
  1.1744 +{
  1.1745 +  StatusMsg("\n");
  1.1746 +  StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg);
  1.1747 +  StatusMsg("\n");
  1.1748 +  StatusMsg("Valid values of $DMD are:\n");
  1.1749 +  StatusMsg("- undefined or \"\" or \"0\", which disables DMD, or\n");
  1.1750 +  StatusMsg("- \"1\", which enables it with the default options, or\n");
  1.1751 +  StatusMsg("- a whitespace-separated list of |--option=val| entries, which\n");
  1.1752 +  StatusMsg("  enables it with non-default options.\n");
  1.1753 +  StatusMsg("\n");
  1.1754 +  StatusMsg("The following options are allowed;  defaults are shown in [].\n");
  1.1755 +  StatusMsg("  --sample-below=<1..%d> Sample blocks smaller than this [%d]\n",
  1.1756 +            int(mSampleBelowSize.mMax),
  1.1757 +            int(mSampleBelowSize.mDefault));
  1.1758 +  StatusMsg("                               (prime numbers are recommended)\n");
  1.1759 +  StatusMsg("  --max-frames=<1..%d>         Max. depth of stack traces [%d]\n",
  1.1760 +            int(mMaxFrames.mMax),
  1.1761 +            int(mMaxFrames.mDefault));
  1.1762 +  StatusMsg("  --max-records=<1..%u>   Max. number of records printed [%u]\n",
  1.1763 +            mMaxRecords.mMax,
  1.1764 +            mMaxRecords.mDefault);
  1.1765 +  StatusMsg("  --mode=<normal|test|stress>  Mode of operation [normal]\n");
  1.1766 +  StatusMsg("\n");
  1.1767 +  exit(1);
  1.1768 +}
  1.1769 +
  1.1770 +//---------------------------------------------------------------------------
  1.1771 +// DMD start-up
  1.1772 +//---------------------------------------------------------------------------
  1.1773 +
  1.1774 +#ifdef XP_MACOSX
  1.1775 +static void
  1.1776 +NopStackWalkCallback(void* aPc, void* aSp, void* aClosure)
  1.1777 +{
  1.1778 +}
  1.1779 +#endif
  1.1780 +
  1.1781 +// Note that fopen() can allocate.
  1.1782 +static FILE*
  1.1783 +OpenOutputFile(const char* aFilename)
  1.1784 +{
  1.1785 +  FILE* fp = fopen(aFilename, "w");
  1.1786 +  if (!fp) {
  1.1787 +    StatusMsg("can't create %s file: %s\n", aFilename, strerror(errno));
  1.1788 +    exit(1);
  1.1789 +  }
  1.1790 +  return fp;
  1.1791 +}
  1.1792 +
  1.1793 +static void RunTestMode(FILE* fp);
  1.1794 +static void RunStressMode(FILE* fp);
  1.1795 +
  1.1796 +// WARNING: this function runs *very* early -- before all static initializers
  1.1797 +// have run.  For this reason, non-scalar globals such as gStateLock and
  1.1798 +// gStackTraceTable are allocated dynamically (so we can guarantee their
  1.1799 +// construction in this function) rather than statically.
  1.1800 +static void
  1.1801 +Init(const malloc_table_t* aMallocTable)
  1.1802 +{
  1.1803 +  MOZ_ASSERT(!gIsDMDRunning);
  1.1804 +
  1.1805 +  gMallocTable = aMallocTable;
  1.1806 +
  1.1807 +  // DMD is controlled by the |DMD| environment variable.
  1.1808 +  // - If it's unset or empty or "0", DMD doesn't run.
  1.1809 +  // - Otherwise, the contents dictate DMD's behaviour.
  1.1810 +
  1.1811 +  char* e = getenv("DMD");
  1.1812 +  StatusMsg("$DMD = '%s'\n", e);
  1.1813 +
  1.1814 +  if (!e || strcmp(e, "") == 0 || strcmp(e, "0") == 0) {
  1.1815 +    StatusMsg("DMD is not enabled\n");
  1.1816 +    return;
  1.1817 +  }
  1.1818 +
  1.1819 +  // Parse $DMD env var.
  1.1820 +  gOptions = InfallibleAllocPolicy::new_<Options>(e);
  1.1821 +
  1.1822 +  StatusMsg("DMD is enabled\n");
  1.1823 +
  1.1824 +#ifdef XP_MACOSX
  1.1825 +  // On Mac OS X we need to call StackWalkInitCriticalAddress() very early
  1.1826 +  // (prior to the creation of any mutexes, apparently) otherwise we can get
  1.1827 +  // hangs when getting stack traces (bug 821577).  But
  1.1828 +  // StackWalkInitCriticalAddress() isn't exported from xpcom/, so instead we
  1.1829 +  // just call NS_StackWalk, because that calls StackWalkInitCriticalAddress().
  1.1830 +  // See the comment above StackWalkInitCriticalAddress() for more details.
  1.1831 +  (void)NS_StackWalk(NopStackWalkCallback, /* skipFrames */ 0,
  1.1832 +                     /* maxFrames */ 1, nullptr, 0, nullptr);
  1.1833 +#endif
  1.1834 +
  1.1835 +  gStateLock = InfallibleAllocPolicy::new_<Mutex>();
  1.1836 +
  1.1837 +  gSmallBlockActualSizeCounter = 0;
  1.1838 +
  1.1839 +  DMD_CREATE_TLS_INDEX(gTlsIndex);
  1.1840 +
  1.1841 +  {
  1.1842 +    AutoLockState lock;
  1.1843 +
  1.1844 +    gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>();
  1.1845 +    gStackTraceTable->init(8192);
  1.1846 +
  1.1847 +    gBlockTable = InfallibleAllocPolicy::new_<BlockTable>();
  1.1848 +    gBlockTable->init(8192);
  1.1849 +  }
  1.1850 +
  1.1851 +  if (gOptions->IsTestMode()) {
  1.1852 +    // OpenOutputFile() can allocate.  So do this before setting
  1.1853 +    // gIsDMDRunning so those allocations don't show up in our results.  Once
  1.1854 +    // gIsDMDRunning is set we are intercepting malloc et al. in earnest.
  1.1855 +    FILE* fp = OpenOutputFile("test.dmd");
  1.1856 +    gIsDMDRunning = true;
  1.1857 +
  1.1858 +    StatusMsg("running test mode...\n");
  1.1859 +    RunTestMode(fp);
  1.1860 +    StatusMsg("finished test mode\n");
  1.1861 +    fclose(fp);
  1.1862 +    exit(0);
  1.1863 +  }
  1.1864 +
  1.1865 +  if (gOptions->IsStressMode()) {
  1.1866 +    FILE* fp = OpenOutputFile("stress.dmd");
  1.1867 +    gIsDMDRunning = true;
  1.1868 +
  1.1869 +    StatusMsg("running stress mode...\n");
  1.1870 +    RunStressMode(fp);
  1.1871 +    StatusMsg("finished stress mode\n");
  1.1872 +    fclose(fp);
  1.1873 +    exit(0);
  1.1874 +  }
  1.1875 +
  1.1876 +  gIsDMDRunning = true;
  1.1877 +}
  1.1878 +
  1.1879 +//---------------------------------------------------------------------------
  1.1880 +// DMD reporting and unreporting
  1.1881 +//---------------------------------------------------------------------------
  1.1882 +
  1.1883 +static void
  1.1884 +ReportHelper(const void* aPtr, bool aReportedOnAlloc)
  1.1885 +{
  1.1886 +  if (!gIsDMDRunning || !aPtr) {
  1.1887 +    return;
  1.1888 +  }
  1.1889 +
  1.1890 +  Thread* t = Thread::Fetch();
  1.1891 +
  1.1892 +  AutoBlockIntercepts block(t);
  1.1893 +  AutoLockState lock;
  1.1894 +
  1.1895 +  if (BlockTable::Ptr p = gBlockTable->lookup(aPtr)) {
  1.1896 +    p->Report(t, aReportedOnAlloc);
  1.1897 +  } else {
  1.1898 +    // We have no record of the block.  Do nothing.  Either:
  1.1899 +    // - We're sampling and we skipped this block.  This is likely.
  1.1900 +    // - It's a bogus pointer.  This is unlikely because Report() is almost
  1.1901 +    //   always called in conjunction with a malloc_size_of-style function.
  1.1902 +  }
  1.1903 +}
  1.1904 +
  1.1905 +MOZ_EXPORT void
  1.1906 +Report(const void* aPtr)
  1.1907 +{
  1.1908 +  ReportHelper(aPtr, /* onAlloc */ false);
  1.1909 +}
  1.1910 +
  1.1911 +MOZ_EXPORT void
  1.1912 +ReportOnAlloc(const void* aPtr)
  1.1913 +{
  1.1914 +  ReportHelper(aPtr, /* onAlloc */ true);
  1.1915 +}
  1.1916 +
  1.1917 +//---------------------------------------------------------------------------
  1.1918 +// DMD output
  1.1919 +//---------------------------------------------------------------------------
  1.1920 +
  1.1921 +// This works for both TraceRecords and StackFrameRecords.
  1.1922 +template <class Record>
  1.1923 +static void
  1.1924 +PrintSortedRecords(const Writer& aWriter, LocationService* aLocService,
  1.1925 +                   const char* aStr, const char* astr,
  1.1926 +                   const js::HashSet<Record, Record, InfallibleAllocPolicy>&
  1.1927 +                         aRecordTable,
  1.1928 +                   size_t aCategoryUsableSize, size_t aTotalUsableSize)
  1.1929 +{
  1.1930 +  const char* kind = Record::kRecordKind;
  1.1931 +  StatusMsg("  creating and sorting %s stack %s record array...\n", astr, kind);
  1.1932 +
  1.1933 +  // Convert the table into a sorted array.
  1.1934 +  js::Vector<const Record*, 0, InfallibleAllocPolicy> recordArray;
  1.1935 +  recordArray.reserve(aRecordTable.count());
  1.1936 +  typedef js::HashSet<Record, Record, InfallibleAllocPolicy> RecordTable;
  1.1937 +  for (typename RecordTable::Range r = aRecordTable.all();
  1.1938 +       !r.empty();
  1.1939 +       r.popFront()) {
  1.1940 +    recordArray.infallibleAppend(&r.front());
  1.1941 +  }
  1.1942 +  qsort(recordArray.begin(), recordArray.length(), sizeof(recordArray[0]),
  1.1943 +        Record::QsortCmp);
  1.1944 +
  1.1945 +  WriteTitle("%s stack %s records\n", aStr, kind);
  1.1946 +
  1.1947 +  if (recordArray.length() == 0) {
  1.1948 +    W("(none)\n\n");
  1.1949 +    return;
  1.1950 +  }
  1.1951 +
  1.1952 +  StatusMsg("  printing %s stack %s record array...\n", astr, kind);
  1.1953 +  size_t cumulativeUsableSize = 0;
  1.1954 +
  1.1955 +  // Limit the number of records printed, because fix-linux-stack.pl is too
  1.1956 +  // damn slow.  Note that we don't break out of this loop because we need to
  1.1957 +  // keep adding to |cumulativeUsableSize|.
  1.1958 +  uint32_t numRecords = recordArray.length();
  1.1959 +  uint32_t maxRecords = gOptions->MaxRecords();
  1.1960 +  for (uint32_t i = 0; i < numRecords; i++) {
  1.1961 +    const Record* r = recordArray[i];
  1.1962 +    cumulativeUsableSize += r->GetRecordSize().Usable();
  1.1963 +    if (i < maxRecords) {
  1.1964 +      r->Print(aWriter, aLocService, i+1, numRecords, aStr, astr,
  1.1965 +               aCategoryUsableSize, cumulativeUsableSize, aTotalUsableSize);
  1.1966 +    } else if (i == maxRecords) {
  1.1967 +      W("%s: stopping after %s stack %s records\n\n", aStr,
  1.1968 +        Show(maxRecords, gBuf1, kBufLen), kind);
  1.1969 +    }
  1.1970 +  }
  1.1971 +
  1.1972 +  // This holds for TraceRecords, but not for FrameRecords.
  1.1973 +  MOZ_ASSERT_IF(!Record::recordsOverlap(),
  1.1974 +                aCategoryUsableSize == cumulativeUsableSize);
  1.1975 +}
  1.1976 +
  1.1977 +static void
  1.1978 +PrintSortedTraceAndFrameRecords(const Writer& aWriter,
  1.1979 +                                LocationService* aLocService,
  1.1980 +                                const char* aStr, const char* astr,
  1.1981 +                                const TraceRecordTable& aTraceRecordTable,
  1.1982 +                                size_t aCategoryUsableSize,
  1.1983 +                                size_t aTotalUsableSize)
  1.1984 +{
  1.1985 +  PrintSortedRecords(aWriter, aLocService, aStr, astr, aTraceRecordTable,
  1.1986 +                     aCategoryUsableSize, aTotalUsableSize);
  1.1987 +
  1.1988 +  FrameRecordTable frameRecordTable;
  1.1989 +  (void)frameRecordTable.init(2048);
  1.1990 +  for (TraceRecordTable::Range r = aTraceRecordTable.all();
  1.1991 +       !r.empty();
  1.1992 +       r.popFront()) {
  1.1993 +    const TraceRecord& tr = r.front();
  1.1994 +    const StackTrace* st = tr.mAllocStackTrace;
  1.1995 +
  1.1996 +    // A single PC can appear multiple times in a stack trace.  We ignore
  1.1997 +    // duplicates by first sorting and then ignoring adjacent duplicates.
  1.1998 +    StackTrace sorted(*st);
  1.1999 +    sorted.Sort();              // sorts the copy, not the original
  1.2000 +    void* prevPc = (void*)intptr_t(-1);
  1.2001 +    for (uint32_t i = 0; i < sorted.Length(); i++) {
  1.2002 +      void* pc = sorted.Pc(i);
  1.2003 +      if (pc == prevPc) {
  1.2004 +        continue;               // ignore duplicate
  1.2005 +      }
  1.2006 +      prevPc = pc;
  1.2007 +
  1.2008 +      FrameRecordTable::AddPtr p = frameRecordTable.lookupForAdd(pc);
  1.2009 +      if (!p) {
  1.2010 +        FrameRecord fr(pc);
  1.2011 +        (void)frameRecordTable.add(p, fr);
  1.2012 +      }
  1.2013 +      p->Add(tr);
  1.2014 +    }
  1.2015 +  }
  1.2016 +
  1.2017 +  PrintSortedRecords(aWriter, aLocService, aStr, astr, frameRecordTable,
  1.2018 +                     aCategoryUsableSize, aTotalUsableSize);
  1.2019 +}
  1.2020 +
  1.2021 +// Note that, unlike most SizeOf* functions, this function does not take a
  1.2022 +// |mozilla::MallocSizeOf| argument.  That's because those arguments are
  1.2023 +// primarily to aid DMD track heap blocks... but DMD deliberately doesn't track
  1.2024 +// heap blocks it allocated for itself!
  1.2025 +//
  1.2026 +// SizeOfInternal should be called while you're holding the state lock and
  1.2027 +// while intercepts are blocked; SizeOf acquires the lock and blocks
  1.2028 +// intercepts.
  1.2029 +
  1.2030 +static void
  1.2031 +SizeOfInternal(Sizes* aSizes)
  1.2032 +{
  1.2033 +  MOZ_ASSERT(gStateLock->IsLocked());
  1.2034 +  MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked());
  1.2035 +
  1.2036 +  aSizes->Clear();
  1.2037 +
  1.2038 +  if (!gIsDMDRunning) {
  1.2039 +    return;
  1.2040 +  }
  1.2041 +
  1.2042 +  StackTraceSet usedStackTraces;
  1.2043 +  GatherUsedStackTraces(usedStackTraces);
  1.2044 +
  1.2045 +  for (StackTraceTable::Range r = gStackTraceTable->all();
  1.2046 +       !r.empty();
  1.2047 +       r.popFront()) {
  1.2048 +    StackTrace* const& st = r.front();
  1.2049 +
  1.2050 +    if (usedStackTraces.has(st)) {
  1.2051 +      aSizes->mStackTracesUsed += MallocSizeOf(st);
  1.2052 +    } else {
  1.2053 +      aSizes->mStackTracesUnused += MallocSizeOf(st);
  1.2054 +    }
  1.2055 +  }
  1.2056 +
  1.2057 +  aSizes->mStackTraceTable =
  1.2058 +    gStackTraceTable->sizeOfIncludingThis(MallocSizeOf);
  1.2059 +
  1.2060 +  aSizes->mBlockTable = gBlockTable->sizeOfIncludingThis(MallocSizeOf);
  1.2061 +}
  1.2062 +
  1.2063 +MOZ_EXPORT void
  1.2064 +SizeOf(Sizes* aSizes)
  1.2065 +{
  1.2066 +  aSizes->Clear();
  1.2067 +
  1.2068 +  if (!gIsDMDRunning) {
  1.2069 +    return;
  1.2070 +  }
  1.2071 +
  1.2072 +  AutoBlockIntercepts block(Thread::Fetch());
  1.2073 +  AutoLockState lock;
  1.2074 +  SizeOfInternal(aSizes);
  1.2075 +}
  1.2076 +
  1.2077 +void
  1.2078 +ClearReportsInternal()
  1.2079 +{
  1.2080 +  MOZ_ASSERT(gStateLock->IsLocked());
  1.2081 +
  1.2082 +  // Unreport all blocks that were marked reported by a memory reporter.  This
  1.2083 +  // excludes those that were reported on allocation, because they need to keep
  1.2084 +  // their reported marking.
  1.2085 +  for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) {
  1.2086 +    r.front().UnreportIfNotReportedOnAlloc();
  1.2087 +  }
  1.2088 +}
  1.2089 +
  1.2090 +MOZ_EXPORT void
  1.2091 +ClearReports()
  1.2092 +{
  1.2093 +  if (!gIsDMDRunning) {
  1.2094 +    return;
  1.2095 +  }
  1.2096 +
  1.2097 +  AutoLockState lock;
  1.2098 +  ClearReportsInternal();
  1.2099 +}
  1.2100 +
  1.2101 +MOZ_EXPORT bool
  1.2102 +IsEnabled()
  1.2103 +{
  1.2104 +  return gIsDMDRunning;
  1.2105 +}
  1.2106 +
  1.2107 +MOZ_EXPORT void
  1.2108 +Dump(Writer aWriter)
  1.2109 +{
  1.2110 +  if (!gIsDMDRunning) {
  1.2111 +    const char* msg = "cannot Dump();  DMD was not enabled at startup\n";
  1.2112 +    StatusMsg("%s", msg);
  1.2113 +    W("%s", msg);
  1.2114 +    return;
  1.2115 +  }
  1.2116 +
  1.2117 +  AutoBlockIntercepts block(Thread::Fetch());
  1.2118 +  AutoLockState lock;
  1.2119 +
  1.2120 +  static int dumpCount = 1;
  1.2121 +  StatusMsg("Dump %d {\n", dumpCount++);
  1.2122 +
  1.2123 +  StatusMsg("  gathering stack trace records...\n");
  1.2124 +
  1.2125 +  TraceRecordTable unreportedTraceRecordTable;
  1.2126 +  (void)unreportedTraceRecordTable.init(1024);
  1.2127 +  size_t unreportedUsableSize = 0;
  1.2128 +  size_t unreportedNumBlocks = 0;
  1.2129 +
  1.2130 +  TraceRecordTable onceReportedTraceRecordTable;
  1.2131 +  (void)onceReportedTraceRecordTable.init(1024);
  1.2132 +  size_t onceReportedUsableSize = 0;
  1.2133 +  size_t onceReportedNumBlocks = 0;
  1.2134 +
  1.2135 +  TraceRecordTable twiceReportedTraceRecordTable;
  1.2136 +  (void)twiceReportedTraceRecordTable.init(0);
  1.2137 +  size_t twiceReportedUsableSize = 0;
  1.2138 +  size_t twiceReportedNumBlocks = 0;
  1.2139 +
  1.2140 +  bool anyBlocksSampled = false;
  1.2141 +
  1.2142 +  for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) {
  1.2143 +    const Block& b = r.front();
  1.2144 +
  1.2145 +    TraceRecordTable* table;
  1.2146 +    uint32_t numReports = b.NumReports();
  1.2147 +    if (numReports == 0) {
  1.2148 +      unreportedUsableSize += b.UsableSize();
  1.2149 +      unreportedNumBlocks++;
  1.2150 +      table = &unreportedTraceRecordTable;
  1.2151 +    } else if (numReports == 1) {
  1.2152 +      onceReportedUsableSize += b.UsableSize();
  1.2153 +      onceReportedNumBlocks++;
  1.2154 +      table = &onceReportedTraceRecordTable;
  1.2155 +    } else {
  1.2156 +      MOZ_ASSERT(numReports == 2);
  1.2157 +      twiceReportedUsableSize += b.UsableSize();
  1.2158 +      twiceReportedNumBlocks++;
  1.2159 +      table = &twiceReportedTraceRecordTable;
  1.2160 +    }
  1.2161 +    TraceRecordKey key(b);
  1.2162 +    TraceRecordTable::AddPtr p = table->lookupForAdd(key);
  1.2163 +    if (!p) {
  1.2164 +      TraceRecord tr(b);
  1.2165 +      (void)table->add(p, tr);
  1.2166 +    }
  1.2167 +    p->Add(b);
  1.2168 +
  1.2169 +    anyBlocksSampled = anyBlocksSampled || b.IsSampled();
  1.2170 +  }
  1.2171 +  size_t totalUsableSize =
  1.2172 +    unreportedUsableSize + onceReportedUsableSize + twiceReportedUsableSize;
  1.2173 +  size_t totalNumBlocks =
  1.2174 +    unreportedNumBlocks + onceReportedNumBlocks + twiceReportedNumBlocks;
  1.2175 +
  1.2176 +  WriteTitle("Invocation\n");
  1.2177 +  W("$DMD = '%s'\n", gOptions->DMDEnvVar());
  1.2178 +  W("Sample-below size = %lld\n\n",
  1.2179 +    (long long)(gOptions->SampleBelowSize()));
  1.2180 +
  1.2181 +  // Allocate this on the heap instead of the stack because it's fairly large.
  1.2182 +  LocationService* locService = InfallibleAllocPolicy::new_<LocationService>();
  1.2183 +
  1.2184 +  PrintSortedRecords(aWriter, locService, "Twice-reported", "twice-reported",
  1.2185 +                     twiceReportedTraceRecordTable, twiceReportedUsableSize,
  1.2186 +                     totalUsableSize);
  1.2187 +
  1.2188 +  PrintSortedTraceAndFrameRecords(aWriter, locService,
  1.2189 +                                  "Unreported", "unreported",
  1.2190 +                                  unreportedTraceRecordTable,
  1.2191 +                                  unreportedUsableSize, totalUsableSize);
  1.2192 +
  1.2193 +  PrintSortedTraceAndFrameRecords(aWriter, locService,
  1.2194 +                                 "Once-reported", "once-reported",
  1.2195 +                                 onceReportedTraceRecordTable,
  1.2196 +                                 onceReportedUsableSize, totalUsableSize);
  1.2197 +
  1.2198 +  bool showTilde = anyBlocksSampled;
  1.2199 +  WriteTitle("Summary\n");
  1.2200 +
  1.2201 +  W("Total:          %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
  1.2202 +    Show(totalUsableSize, gBuf1, kBufLen, showTilde),
  1.2203 +    100.0,
  1.2204 +    Show(totalNumBlocks,  gBuf2, kBufLen, showTilde),
  1.2205 +    100.0);
  1.2206 +
  1.2207 +  W("Unreported:     %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
  1.2208 +    Show(unreportedUsableSize, gBuf1, kBufLen, showTilde),
  1.2209 +    Percent(unreportedUsableSize, totalUsableSize),
  1.2210 +    Show(unreportedNumBlocks, gBuf2, kBufLen, showTilde),
  1.2211 +    Percent(unreportedNumBlocks, totalNumBlocks));
  1.2212 +
  1.2213 +  W("Once-reported:  %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
  1.2214 +    Show(onceReportedUsableSize, gBuf1, kBufLen, showTilde),
  1.2215 +    Percent(onceReportedUsableSize, totalUsableSize),
  1.2216 +    Show(onceReportedNumBlocks, gBuf2, kBufLen, showTilde),
  1.2217 +    Percent(onceReportedNumBlocks, totalNumBlocks));
  1.2218 +
  1.2219 +  W("Twice-reported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
  1.2220 +    Show(twiceReportedUsableSize, gBuf1, kBufLen, showTilde),
  1.2221 +    Percent(twiceReportedUsableSize, totalUsableSize),
  1.2222 +    Show(twiceReportedNumBlocks, gBuf2, kBufLen, showTilde),
  1.2223 +    Percent(twiceReportedNumBlocks, totalNumBlocks));
  1.2224 +
  1.2225 +  W("\n");
  1.2226 +
  1.2227 +  // Stats are non-deterministic, so don't show them in test mode.
  1.2228 +  if (!gOptions->IsTestMode()) {
  1.2229 +    Sizes sizes;
  1.2230 +    SizeOfInternal(&sizes);
  1.2231 +
  1.2232 +    WriteTitle("Execution measurements\n");
  1.2233 +
  1.2234 +    W("Data structures that persist after Dump() ends:\n");
  1.2235 +
  1.2236 +    W("  Used stack traces:    %10s bytes\n",
  1.2237 +      Show(sizes.mStackTracesUsed, gBuf1, kBufLen));
  1.2238 +
  1.2239 +    W("  Unused stack traces:  %10s bytes\n",
  1.2240 +      Show(sizes.mStackTracesUnused, gBuf1, kBufLen));
  1.2241 +
  1.2242 +    W("  Stack trace table:    %10s bytes (%s entries, %s used)\n",
  1.2243 +      Show(sizes.mStackTraceTable,       gBuf1, kBufLen),
  1.2244 +      Show(gStackTraceTable->capacity(), gBuf2, kBufLen),
  1.2245 +      Show(gStackTraceTable->count(),    gBuf3, kBufLen));
  1.2246 +
  1.2247 +    W("  Block table:          %10s bytes (%s entries, %s used)\n",
  1.2248 +      Show(sizes.mBlockTable,       gBuf1, kBufLen),
  1.2249 +      Show(gBlockTable->capacity(), gBuf2, kBufLen),
  1.2250 +      Show(gBlockTable->count(),    gBuf3, kBufLen));
  1.2251 +
  1.2252 +    W("\nData structures that are destroyed after Dump() ends:\n");
  1.2253 +
  1.2254 +    size_t unreportedSize =
  1.2255 +      unreportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf);
  1.2256 +    W("  Unreported table:     %10s bytes (%s entries, %s used)\n",
  1.2257 +      Show(unreportedSize,                        gBuf1, kBufLen),
  1.2258 +      Show(unreportedTraceRecordTable.capacity(), gBuf2, kBufLen),
  1.2259 +      Show(unreportedTraceRecordTable.count(),    gBuf3, kBufLen));
  1.2260 +
  1.2261 +    size_t onceReportedSize =
  1.2262 +      onceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf);
  1.2263 +    W("  Once-reported table:  %10s bytes (%s entries, %s used)\n",
  1.2264 +      Show(onceReportedSize,                        gBuf1, kBufLen),
  1.2265 +      Show(onceReportedTraceRecordTable.capacity(), gBuf2, kBufLen),
  1.2266 +      Show(onceReportedTraceRecordTable.count(),    gBuf3, kBufLen));
  1.2267 +
  1.2268 +    size_t twiceReportedSize =
  1.2269 +      twiceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf);
  1.2270 +    W("  Twice-reported table: %10s bytes (%s entries, %s used)\n",
  1.2271 +      Show(twiceReportedSize,                        gBuf1, kBufLen),
  1.2272 +      Show(twiceReportedTraceRecordTable.capacity(), gBuf2, kBufLen),
  1.2273 +      Show(twiceReportedTraceRecordTable.count(),    gBuf3, kBufLen));
  1.2274 +
  1.2275 +    W("  Location service:     %10s bytes\n",
  1.2276 +      Show(locService->SizeOfIncludingThis(), gBuf1, kBufLen));
  1.2277 +
  1.2278 +    W("\nCounts:\n");
  1.2279 +
  1.2280 +    size_t hits   = locService->NumCacheHits();
  1.2281 +    size_t misses = locService->NumCacheMisses();
  1.2282 +    size_t requests = hits + misses;
  1.2283 +    W("  Location service:    %10s requests\n",
  1.2284 +      Show(requests, gBuf1, kBufLen));
  1.2285 +
  1.2286 +    size_t count    = locService->CacheCount();
  1.2287 +    size_t capacity = locService->CacheCapacity();
  1.2288 +    W("  Location service cache:  %4.1f%% hit rate, %.1f%% occupancy at end\n",
  1.2289 +      Percent(hits, requests), Percent(count, capacity));
  1.2290 +
  1.2291 +    W("\n");
  1.2292 +  }
  1.2293 +
  1.2294 +  InfallibleAllocPolicy::delete_(locService);
  1.2295 +
  1.2296 +  ClearReportsInternal(); // Use internal version, we already have the lock.
  1.2297 +
  1.2298 +  StatusMsg("}\n");
  1.2299 +}
  1.2300 +
  1.2301 +//---------------------------------------------------------------------------
  1.2302 +// Testing
  1.2303 +//---------------------------------------------------------------------------
  1.2304 +
  1.2305 +// This function checks that heap blocks that have the same stack trace but
  1.2306 +// different (or no) reporters get aggregated separately.
  1.2307 +void foo()
  1.2308 +{
  1.2309 +   char* a[6];
  1.2310 +   for (int i = 0; i < 6; i++) {
  1.2311 +      a[i] = (char*) malloc(128 - 16*i);
  1.2312 +   }
  1.2313 +
  1.2314 +   for (int i = 0; i <= 1; i++)
  1.2315 +      Report(a[i]);                     // reported
  1.2316 +   Report(a[2]);                        // reported
  1.2317 +   Report(a[3]);                        // reported
  1.2318 +   // a[4], a[5] unreported
  1.2319 +}
  1.2320 +
  1.2321 +// This stops otherwise-unused variables from being optimized away.
  1.2322 +static void
  1.2323 +UseItOrLoseIt(void* a)
  1.2324 +{
  1.2325 +  char buf[64];
  1.2326 +  sprintf(buf, "%p\n", a);
  1.2327 +  fwrite(buf, 1, strlen(buf) + 1, stderr);
  1.2328 +}
  1.2329 +
  1.2330 +// The output from this should be compared against test-expected.dmd.  It's
  1.2331 +// been tested on Linux64, and probably will give different results on other
  1.2332 +// platforms.
  1.2333 +static void
  1.2334 +RunTestMode(FILE* fp)
  1.2335 +{
  1.2336 +  Writer writer(FpWrite, fp);
  1.2337 +
  1.2338 +  // The first part of this test requires sampling to be disabled.
  1.2339 +  gOptions->SetSampleBelowSize(1);
  1.2340 +
  1.2341 +  // Dump 1.  Zero for everything.
  1.2342 +  Dump(writer);
  1.2343 +
  1.2344 +  // Dump 2: 1 freed, 9 out of 10 unreported.
  1.2345 +  // Dump 3: still present and unreported.
  1.2346 +  int i;
  1.2347 +  char* a;
  1.2348 +  for (i = 0; i < 10; i++) {
  1.2349 +      a = (char*) malloc(100);
  1.2350 +      UseItOrLoseIt(a);
  1.2351 +  }
  1.2352 +  free(a);
  1.2353 +
  1.2354 +  // Min-sized block.
  1.2355 +  // Dump 2: reported.
  1.2356 +  // Dump 3: thrice-reported.
  1.2357 +  char* a2 = (char*) malloc(0);
  1.2358 +  Report(a2);
  1.2359 +
  1.2360 +  // Operator new[].
  1.2361 +  // Dump 2: reported.
  1.2362 +  // Dump 3: reportedness carries over, due to ReportOnAlloc.
  1.2363 +  char* b = new char[10];
  1.2364 +  ReportOnAlloc(b);
  1.2365 +
  1.2366 +  // ReportOnAlloc, then freed.
  1.2367 +  // Dump 2: freed, irrelevant.
  1.2368 +  // Dump 3: freed, irrelevant.
  1.2369 +  char* b2 = new char;
  1.2370 +  ReportOnAlloc(b2);
  1.2371 +  free(b2);
  1.2372 +
  1.2373 +  // Dump 2: reported 4 times.
  1.2374 +  // Dump 3: freed, irrelevant.
  1.2375 +  char* c = (char*) calloc(10, 3);
  1.2376 +  Report(c);
  1.2377 +  for (int i = 0; i < 3; i++) {
  1.2378 +    Report(c);
  1.2379 +  }
  1.2380 +
  1.2381 +  // Dump 2: ignored.
  1.2382 +  // Dump 3: irrelevant.
  1.2383 +  Report((void*)(intptr_t)i);
  1.2384 +
  1.2385 +  // jemalloc rounds this up to 8192.
  1.2386 +  // Dump 2: reported.
  1.2387 +  // Dump 3: freed.
  1.2388 +  char* e = (char*) malloc(4096);
  1.2389 +  e = (char*) realloc(e, 4097);
  1.2390 +  Report(e);
  1.2391 +
  1.2392 +  // First realloc is like malloc;  second realloc is shrinking.
  1.2393 +  // Dump 2: reported.
  1.2394 +  // Dump 3: re-reported.
  1.2395 +  char* e2 = (char*) realloc(nullptr, 1024);
  1.2396 +  e2 = (char*) realloc(e2, 512);
  1.2397 +  Report(e2);
  1.2398 +
  1.2399 +  // First realloc is like malloc;  second realloc creates a min-sized block.
  1.2400 +  // XXX: on Windows, second realloc frees the block.
  1.2401 +  // Dump 2: reported.
  1.2402 +  // Dump 3: freed, irrelevant.
  1.2403 +  char* e3 = (char*) realloc(nullptr, 1023);
  1.2404 +//e3 = (char*) realloc(e3, 0);
  1.2405 +  MOZ_ASSERT(e3);
  1.2406 +  Report(e3);
  1.2407 +
  1.2408 +  // Dump 2: freed, irrelevant.
  1.2409 +  // Dump 3: freed, irrelevant.
  1.2410 +  char* f = (char*) malloc(64);
  1.2411 +  free(f);
  1.2412 +
  1.2413 +  // Dump 2: ignored.
  1.2414 +  // Dump 3: irrelevant.
  1.2415 +  Report((void*)(intptr_t)0x0);
  1.2416 +
  1.2417 +  // Dump 2: mixture of reported and unreported.
  1.2418 +  // Dump 3: all unreported.
  1.2419 +  foo();
  1.2420 +  foo();
  1.2421 +
  1.2422 +  // Dump 2: twice-reported.
  1.2423 +  // Dump 3: twice-reported.
  1.2424 +  char* g1 = (char*) malloc(77);
  1.2425 +  ReportOnAlloc(g1);
  1.2426 +  ReportOnAlloc(g1);
  1.2427 +
  1.2428 +  // Dump 2: twice-reported.
  1.2429 +  // Dump 3: once-reported.
  1.2430 +  char* g2 = (char*) malloc(78);
  1.2431 +  Report(g2);
  1.2432 +  ReportOnAlloc(g2);
  1.2433 +
  1.2434 +  // Dump 2: twice-reported.
  1.2435 +  // Dump 3: once-reported.
  1.2436 +  char* g3 = (char*) malloc(79);
  1.2437 +  ReportOnAlloc(g3);
  1.2438 +  Report(g3);
  1.2439 +
  1.2440 +  // All the odd-ball ones.
  1.2441 +  // Dump 2: all unreported.
  1.2442 +  // Dump 3: all freed, irrelevant.
  1.2443 +  // XXX: no memalign on Mac
  1.2444 +//void* x = memalign(64, 65);           // rounds up to 128
  1.2445 +//UseItOrLoseIt(x);
  1.2446 +  // XXX: posix_memalign doesn't work on B2G
  1.2447 +//void* y;
  1.2448 +//posix_memalign(&y, 128, 129);         // rounds up to 256
  1.2449 +//UseItOrLoseIt(y);
  1.2450 +  // XXX: valloc doesn't work on Windows.
  1.2451 +//void* z = valloc(1);                  // rounds up to 4096
  1.2452 +//UseItOrLoseIt(z);
  1.2453 +//aligned_alloc(64, 256);               // XXX: C11 only
  1.2454 +
  1.2455 +  // Dump 2.
  1.2456 +  Dump(writer);
  1.2457 +
  1.2458 +  //---------
  1.2459 +
  1.2460 +  Report(a2);
  1.2461 +  Report(a2);
  1.2462 +  free(c);
  1.2463 +  free(e);
  1.2464 +  Report(e2);
  1.2465 +  free(e3);
  1.2466 +//free(x);
  1.2467 +//free(y);
  1.2468 +//free(z);
  1.2469 +
  1.2470 +  // Dump 3.
  1.2471 +  Dump(writer);
  1.2472 +
  1.2473 +  //---------
  1.2474 +
  1.2475 +  // Clear all knowledge of existing blocks to give us a clean slate.
  1.2476 +  gBlockTable->clear();
  1.2477 +
  1.2478 +  gOptions->SetSampleBelowSize(128);
  1.2479 +
  1.2480 +  char* s;
  1.2481 +
  1.2482 +  // This equals the sample size, and so is reported exactly.  It should be
  1.2483 +  // listed before records of the same size that are sampled.
  1.2484 +  s = (char*) malloc(128);
  1.2485 +  UseItOrLoseIt(s);
  1.2486 +
  1.2487 +  // This exceeds the sample size, and so is reported exactly.
  1.2488 +  s = (char*) malloc(144);
  1.2489 +  UseItOrLoseIt(s);
  1.2490 +
  1.2491 +  // These together constitute exactly one sample.
  1.2492 +  for (int i = 0; i < 16; i++) {
  1.2493 +    s = (char*) malloc(8);
  1.2494 +    UseItOrLoseIt(s);
  1.2495 +  }
  1.2496 +  MOZ_ASSERT(gSmallBlockActualSizeCounter == 0);
  1.2497 +
  1.2498 +  // These fall 8 bytes short of a full sample.
  1.2499 +  for (int i = 0; i < 15; i++) {
  1.2500 +    s = (char*) malloc(8);
  1.2501 +    UseItOrLoseIt(s);
  1.2502 +  }
  1.2503 +  MOZ_ASSERT(gSmallBlockActualSizeCounter == 120);
  1.2504 +
  1.2505 +  // This exceeds the sample size, and so is recorded exactly.
  1.2506 +  s = (char*) malloc(256);
  1.2507 +  UseItOrLoseIt(s);
  1.2508 +  MOZ_ASSERT(gSmallBlockActualSizeCounter == 120);
  1.2509 +
  1.2510 +  // This gets more than to a full sample from the |i < 15| loop above.
  1.2511 +  s = (char*) malloc(96);
  1.2512 +  UseItOrLoseIt(s);
  1.2513 +  MOZ_ASSERT(gSmallBlockActualSizeCounter == 88);
  1.2514 +
  1.2515 +  // This gets to another full sample.
  1.2516 +  for (int i = 0; i < 5; i++) {
  1.2517 +    s = (char*) malloc(8);
  1.2518 +    UseItOrLoseIt(s);
  1.2519 +  }
  1.2520 +  MOZ_ASSERT(gSmallBlockActualSizeCounter == 0);
  1.2521 +
  1.2522 +  // This allocates 16, 32, ..., 128 bytes, which results in a stack trace
  1.2523 +  // record that contains a mix of sample and non-sampled blocks, and so should
  1.2524 +  // be printed with '~' signs.
  1.2525 +  for (int i = 1; i <= 8; i++) {
  1.2526 +    s = (char*) malloc(i * 16);
  1.2527 +    UseItOrLoseIt(s);
  1.2528 +  }
  1.2529 +  MOZ_ASSERT(gSmallBlockActualSizeCounter == 64);
  1.2530 +
  1.2531 +  // At the end we're 64 bytes into the current sample so we report ~1,424
  1.2532 +  // bytes of allocation overall, which is 64 less than the real value 1,488.
  1.2533 +
  1.2534 +  // Dump 4.
  1.2535 +  Dump(writer);
  1.2536 +}
  1.2537 +
  1.2538 +//---------------------------------------------------------------------------
  1.2539 +// Stress testing microbenchmark
  1.2540 +//---------------------------------------------------------------------------
  1.2541 +
  1.2542 +// This stops otherwise-unused variables from being optimized away.
  1.2543 +static void
  1.2544 +UseItOrLoseIt2(void* a)
  1.2545 +{
  1.2546 +  if (a == (void*)0x42) {
  1.2547 +    printf("UseItOrLoseIt2\n");
  1.2548 +  }
  1.2549 +}
  1.2550 +
  1.2551 +MOZ_NEVER_INLINE static void
  1.2552 +stress5()
  1.2553 +{
  1.2554 +  for (int i = 0; i < 10; i++) {
  1.2555 +    void* x = malloc(64);
  1.2556 +    UseItOrLoseIt2(x);
  1.2557 +    if (i & 1) {
  1.2558 +      free(x);
  1.2559 +    }
  1.2560 +  }
  1.2561 +}
  1.2562 +
  1.2563 +MOZ_NEVER_INLINE static void
  1.2564 +stress4()
  1.2565 +{
  1.2566 +  stress5(); stress5(); stress5(); stress5(); stress5();
  1.2567 +  stress5(); stress5(); stress5(); stress5(); stress5();
  1.2568 +}
  1.2569 +
  1.2570 +MOZ_NEVER_INLINE static void
  1.2571 +stress3()
  1.2572 +{
  1.2573 +  for (int i = 0; i < 10; i++) {
  1.2574 +    stress4();
  1.2575 +  }
  1.2576 +}
  1.2577 +
  1.2578 +MOZ_NEVER_INLINE static void
  1.2579 +stress2()
  1.2580 +{
  1.2581 +  stress3(); stress3(); stress3(); stress3(); stress3();
  1.2582 +  stress3(); stress3(); stress3(); stress3(); stress3();
  1.2583 +}
  1.2584 +
  1.2585 +MOZ_NEVER_INLINE static void
  1.2586 +stress1()
  1.2587 +{
  1.2588 +  for (int i = 0; i < 10; i++) {
  1.2589 +    stress2();
  1.2590 +  }
  1.2591 +}
  1.2592 +
  1.2593 +// This stress test does lots of allocations and frees, which is where most of
  1.2594 +// DMD's overhead occurs.  It allocates 1,000,000 64-byte blocks, spread evenly
  1.2595 +// across 1,000 distinct stack traces.  It frees every second one immediately
  1.2596 +// after allocating it.
  1.2597 +//
  1.2598 +// It's highly artificial, but it's deterministic and easy to run.  It can be
  1.2599 +// timed under different conditions to glean performance data.
  1.2600 +static void
  1.2601 +RunStressMode(FILE* fp)
  1.2602 +{
  1.2603 +  Writer writer(FpWrite, fp);
  1.2604 +
  1.2605 +  // Disable sampling for maximum stress.
  1.2606 +  gOptions->SetSampleBelowSize(1);
  1.2607 +
  1.2608 +  stress1(); stress1(); stress1(); stress1(); stress1();
  1.2609 +  stress1(); stress1(); stress1(); stress1(); stress1();
  1.2610 +
  1.2611 +  Dump(writer);
  1.2612 +}
  1.2613 +
  1.2614 +}   // namespace dmd
  1.2615 +}   // namespace mozilla

mercurial