memory/replace/dmd/DMD.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #include "DMD.h"
     9 #include <ctype.h>
    10 #include <errno.h>
    11 #include <limits.h>
    12 #include <stdarg.h>
    13 #include <stdio.h>
    14 #include <stdlib.h>
    15 #include <string.h>
    17 #ifdef XP_WIN
    18 #if defined(MOZ_OPTIMIZE) && !defined(MOZ_PROFILING)
    19 #error "Optimized, DMD-enabled builds on Windows must be built with --enable-profiling"
    20 #endif
    21 #include <windows.h>
    22 #include <process.h>
    23 #else
    24 #include <unistd.h>
    25 #endif
    27 #ifdef ANDROID
    28 #include <android/log.h>
    29 #endif
    31 #include "nscore.h"
    32 #include "nsStackWalk.h"
    34 #include "js/HashTable.h"
    35 #include "js/Vector.h"
    37 #include "mozilla/Assertions.h"
    38 #include "mozilla/HashFunctions.h"
    39 #include "mozilla/Likely.h"
    40 #include "mozilla/MemoryReporting.h"
    42 // MOZ_REPLACE_ONLY_MEMALIGN saves us from having to define
    43 // replace_{posix_memalign,aligned_alloc,valloc}.  It requires defining
    44 // PAGE_SIZE.  Nb: sysconf() is expensive, but it's only used for (the obsolete
    45 // and rarely used) valloc.
    46 #define MOZ_REPLACE_ONLY_MEMALIGN 1
    47 #ifdef XP_WIN
    48 #define PAGE_SIZE GetPageSize()
    49 static long GetPageSize()
    50 {
    51   SYSTEM_INFO si;
    52   GetSystemInfo(&si);
    53   return si.dwPageSize;
    54 }
    55 #else
    56 #define PAGE_SIZE sysconf(_SC_PAGESIZE)
    57 #endif
    58 #include "replace_malloc.h"
    59 #undef MOZ_REPLACE_ONLY_MEMALIGN
    60 #undef PAGE_SIZE
    62 namespace mozilla {
    63 namespace dmd {
    65 //---------------------------------------------------------------------------
    66 // Utilities
    67 //---------------------------------------------------------------------------
    69 #ifndef DISALLOW_COPY_AND_ASSIGN
    70 #define DISALLOW_COPY_AND_ASSIGN(T) \
    71   T(const T&);                      \
    72   void operator=(const T&)
    73 #endif
    75 static const malloc_table_t* gMallocTable = nullptr;
    77 // This enables/disables DMD.
    78 static bool gIsDMDRunning = false;
    80 // This provides infallible allocations (they abort on OOM).  We use it for all
    81 // of DMD's own allocations, which fall into the following three cases.
    82 // - Direct allocations (the easy case).
    83 // - Indirect allocations in js::{Vector,HashSet,HashMap} -- this class serves
    84 //   as their AllocPolicy.
    85 // - Other indirect allocations (e.g. NS_StackWalk) -- see the comments on
    86 //   Thread::mBlockIntercepts and in replace_malloc for how these work.
    87 //
    88 class InfallibleAllocPolicy
    89 {
    90   static void ExitOnFailure(const void* aP);
    92 public:
    93   static void* malloc_(size_t aSize)
    94   {
    95     void* p = gMallocTable->malloc(aSize);
    96     ExitOnFailure(p);
    97     return p;
    98   }
   100   static void* calloc_(size_t aSize)
   101   {
   102     void* p = gMallocTable->calloc(1, aSize);
   103     ExitOnFailure(p);
   104     return p;
   105   }
   107   // This realloc_ is the one we use for direct reallocs within DMD.
   108   static void* realloc_(void* aPtr, size_t aNewSize)
   109   {
   110     void* p = gMallocTable->realloc(aPtr, aNewSize);
   111     ExitOnFailure(p);
   112     return p;
   113   }
   115   // This realloc_ is required for this to be a JS container AllocPolicy.
   116   static void* realloc_(void* aPtr, size_t aOldSize, size_t aNewSize)
   117   {
   118     return InfallibleAllocPolicy::realloc_(aPtr, aNewSize);
   119   }
   121   static void* memalign_(size_t aAlignment, size_t aSize)
   122   {
   123     void* p = gMallocTable->memalign(aAlignment, aSize);
   124     ExitOnFailure(p);
   125     return p;
   126   }
   128   static void free_(void* aPtr) { gMallocTable->free(aPtr); }
   130   static char* strdup_(const char* aStr)
   131   {
   132     char* s = (char*) InfallibleAllocPolicy::malloc_(strlen(aStr) + 1);
   133     strcpy(s, aStr);
   134     return s;
   135   }
   137   template <class T>
   138   static T* new_()
   139   {
   140     void* mem = malloc_(sizeof(T));
   141     ExitOnFailure(mem);
   142     return new (mem) T;
   143   }
   145   template <class T, typename P1>
   146   static T* new_(P1 p1)
   147   {
   148     void* mem = malloc_(sizeof(T));
   149     ExitOnFailure(mem);
   150     return new (mem) T(p1);
   151   }
   153   template <class T>
   154   static void delete_(T *p)
   155   {
   156     if (p) {
   157       p->~T();
   158       InfallibleAllocPolicy::free_(p);
   159     }
   160   }
   162   static void reportAllocOverflow() { ExitOnFailure(nullptr); }
   163 };
   165 // This is only needed because of the |const void*| vs |void*| arg mismatch.
   166 static size_t
   167 MallocSizeOf(const void* aPtr)
   168 {
   169   return gMallocTable->malloc_usable_size(const_cast<void*>(aPtr));
   170 }
   172 static void
   173 StatusMsg(const char* aFmt, ...)
   174 {
   175   va_list ap;
   176   va_start(ap, aFmt);
   177 #ifdef ANDROID
   178   __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, ap);
   179 #else
   180   // The +64 is easily enough for the "DMD[<pid>] " prefix and the NUL.
   181   char* fmt = (char*) InfallibleAllocPolicy::malloc_(strlen(aFmt) + 64);
   182   sprintf(fmt, "DMD[%d] %s", getpid(), aFmt);
   183   vfprintf(stderr, fmt, ap);
   184   InfallibleAllocPolicy::free_(fmt);
   185 #endif
   186   va_end(ap);
   187 }
   189 /* static */ void
   190 InfallibleAllocPolicy::ExitOnFailure(const void* aP)
   191 {
   192   if (!aP) {
   193     StatusMsg("out of memory;  aborting\n");
   194     MOZ_CRASH();
   195   }
   196 }
   198 void
   199 Writer::Write(const char* aFmt, ...) const
   200 {
   201   va_list ap;
   202   va_start(ap, aFmt);
   203   mWriterFun(mWriteState, aFmt, ap);
   204   va_end(ap);
   205 }
   207 #define W(...) aWriter.Write(__VA_ARGS__);
   209 #define WriteTitle(...)                                                       \
   210   W("------------------------------------------------------------------\n");  \
   211   W(__VA_ARGS__);                                                             \
   212   W("------------------------------------------------------------------\n\n");
   214 MOZ_EXPORT void
   215 FpWrite(void* aWriteState, const char* aFmt, va_list aAp)
   216 {
   217   FILE* fp = static_cast<FILE*>(aWriteState);
   218   vfprintf(fp, aFmt, aAp);
   219 }
   221 static double
   222 Percent(size_t part, size_t whole)
   223 {
   224   return (whole == 0) ? 0 : 100 * (double)part / whole;
   225 }
   227 // Commifies the number and prepends a '~' if requested.  Best used with
   228 // |kBufLen| and |gBuf[1234]|, because they should be big enough for any number
   229 // we'll see.
   230 static char*
   231 Show(size_t n, char* buf, size_t buflen, bool addTilde = false)
   232 {
   233   int nc = 0, i = 0, lasti = buflen - 2;
   234   buf[lasti + 1] = '\0';
   235   if (n == 0) {
   236     buf[lasti - i] = '0';
   237     i++;
   238   } else {
   239     while (n > 0) {
   240       if (((i - nc) % 3) == 0 && i != 0) {
   241         buf[lasti - i] = ',';
   242         i++;
   243         nc++;
   244       }
   245       buf[lasti - i] = static_cast<char>((n % 10) + '0');
   246       i++;
   247       n /= 10;
   248     }
   249   }
   250   int firstCharIndex = lasti - i + 1;
   252   if (addTilde) {
   253     firstCharIndex--;
   254     buf[firstCharIndex] = '~';
   255   }
   257   MOZ_ASSERT(firstCharIndex >= 0);
   258   return &buf[firstCharIndex];
   259 }
   261 static const char*
   262 Plural(size_t aN)
   263 {
   264   return aN == 1 ? "" : "s";
   265 }
   267 // Used by calls to Show().
   268 static const size_t kBufLen = 64;
   269 static char gBuf1[kBufLen];
   270 static char gBuf2[kBufLen];
   271 static char gBuf3[kBufLen];
   272 static char gBuf4[kBufLen];
   274 //---------------------------------------------------------------------------
   275 // Options (Part 1)
   276 //---------------------------------------------------------------------------
   278 class Options
   279 {
   280   template <typename T>
   281   struct NumOption
   282   {
   283     const T mDefault;
   284     const T mMax;
   285     T       mActual;
   286     NumOption(T aDefault, T aMax)
   287       : mDefault(aDefault), mMax(aMax), mActual(aDefault)
   288     {}
   289   };
   291   enum Mode {
   292     Normal,   // run normally
   293     Test,     // do some basic correctness tests
   294     Stress    // do some performance stress tests
   295   };
   297   char* mDMDEnvVar;   // a saved copy, for printing during Dump()
   299   NumOption<size_t>   mSampleBelowSize;
   300   NumOption<uint32_t> mMaxFrames;
   301   NumOption<uint32_t> mMaxRecords;
   302   Mode mMode;
   304   void BadArg(const char* aArg);
   305   static const char* ValueIfMatch(const char* aArg, const char* aOptionName);
   306   static bool GetLong(const char* aArg, const char* aOptionName,
   307                       long aMin, long aMax, long* aN);
   309 public:
   310   Options(const char* aDMDEnvVar);
   312   const char* DMDEnvVar() const { return mDMDEnvVar; }
   314   size_t SampleBelowSize() const { return mSampleBelowSize.mActual; }
   315   size_t MaxFrames()       const { return mMaxFrames.mActual; }
   316   size_t MaxRecords()      const { return mMaxRecords.mActual; }
   318   void SetSampleBelowSize(size_t aN) { mSampleBelowSize.mActual = aN; }
   320   bool IsTestMode()   const { return mMode == Test; }
   321   bool IsStressMode() const { return mMode == Stress; }
   322 };
   324 static Options *gOptions;
   326 //---------------------------------------------------------------------------
   327 // The global lock
   328 //---------------------------------------------------------------------------
   330 // MutexBase implements the platform-specific parts of a mutex.
   332 #ifdef XP_WIN
   334 class MutexBase
   335 {
   336   CRITICAL_SECTION mCS;
   338   DISALLOW_COPY_AND_ASSIGN(MutexBase);
   340 public:
   341   MutexBase()
   342   {
   343     InitializeCriticalSection(&mCS);
   344   }
   346   ~MutexBase()
   347   {
   348     DeleteCriticalSection(&mCS);
   349   }
   351   void Lock()
   352   {
   353     EnterCriticalSection(&mCS);
   354   }
   356   void Unlock()
   357   {
   358     LeaveCriticalSection(&mCS);
   359   }
   360 };
   362 #else
   364 #include <pthread.h>
   365 #include <sys/types.h>
   367 class MutexBase
   368 {
   369   pthread_mutex_t mMutex;
   371   DISALLOW_COPY_AND_ASSIGN(MutexBase);
   373 public:
   374   MutexBase()
   375   {
   376     pthread_mutex_init(&mMutex, nullptr);
   377   }
   379   void Lock()
   380   {
   381     pthread_mutex_lock(&mMutex);
   382   }
   384   void Unlock()
   385   {
   386     pthread_mutex_unlock(&mMutex);
   387   }
   388 };
   390 #endif
   392 class Mutex : private MutexBase
   393 {
   394   bool mIsLocked;
   396   DISALLOW_COPY_AND_ASSIGN(Mutex);
   398 public:
   399   Mutex()
   400     : mIsLocked(false)
   401   {}
   403   void Lock()
   404   {
   405     MutexBase::Lock();
   406     MOZ_ASSERT(!mIsLocked);
   407     mIsLocked = true;
   408   }
   410   void Unlock()
   411   {
   412     MOZ_ASSERT(mIsLocked);
   413     mIsLocked = false;
   414     MutexBase::Unlock();
   415   }
   417   bool IsLocked()
   418   {
   419     return mIsLocked;
   420   }
   421 };
   423 // This lock must be held while manipulating global state, such as
   424 // gStackTraceTable, gBlockTable, etc.
   425 static Mutex* gStateLock = nullptr;
   427 class AutoLockState
   428 {
   429   DISALLOW_COPY_AND_ASSIGN(AutoLockState);
   431 public:
   432   AutoLockState()
   433   {
   434     gStateLock->Lock();
   435   }
   436   ~AutoLockState()
   437   {
   438     gStateLock->Unlock();
   439   }
   440 };
   442 class AutoUnlockState
   443 {
   444   DISALLOW_COPY_AND_ASSIGN(AutoUnlockState);
   446 public:
   447   AutoUnlockState()
   448   {
   449     gStateLock->Unlock();
   450   }
   451   ~AutoUnlockState()
   452   {
   453     gStateLock->Lock();
   454   }
   455 };
   457 //---------------------------------------------------------------------------
   458 // Thread-local storage and blocking of intercepts
   459 //---------------------------------------------------------------------------
   461 #ifdef XP_WIN
   463 #define DMD_TLS_INDEX_TYPE              DWORD
   464 #define DMD_CREATE_TLS_INDEX(i_)        do {                                  \
   465                                           (i_) = TlsAlloc();                  \
   466                                         } while (0)
   467 #define DMD_DESTROY_TLS_INDEX(i_)       TlsFree((i_))
   468 #define DMD_GET_TLS_DATA(i_)            TlsGetValue((i_))
   469 #define DMD_SET_TLS_DATA(i_, v_)        TlsSetValue((i_), (v_))
   471 #else
   473 #include <pthread.h>
   475 #define DMD_TLS_INDEX_TYPE               pthread_key_t
   476 #define DMD_CREATE_TLS_INDEX(i_)         pthread_key_create(&(i_), nullptr)
   477 #define DMD_DESTROY_TLS_INDEX(i_)        pthread_key_delete((i_))
   478 #define DMD_GET_TLS_DATA(i_)             pthread_getspecific((i_))
   479 #define DMD_SET_TLS_DATA(i_, v_)         pthread_setspecific((i_), (v_))
   481 #endif
   483 static DMD_TLS_INDEX_TYPE gTlsIndex;
   485 class Thread
   486 {
   487   // Required for allocation via InfallibleAllocPolicy::new_.
   488   friend class InfallibleAllocPolicy;
   490   // When true, this blocks intercepts, which allows malloc interception
   491   // functions to themselves call malloc.  (Nb: for direct calls to malloc we
   492   // can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes
   493   // indirectly call vanilla malloc via functions like NS_StackWalk.)
   494   bool mBlockIntercepts;
   496   Thread()
   497     : mBlockIntercepts(false)
   498   {}
   500   DISALLOW_COPY_AND_ASSIGN(Thread);
   502 public:
   503   static Thread* Fetch();
   505   bool BlockIntercepts()
   506   {
   507     MOZ_ASSERT(!mBlockIntercepts);
   508     return mBlockIntercepts = true;
   509   }
   511   bool UnblockIntercepts()
   512   {
   513     MOZ_ASSERT(mBlockIntercepts);
   514     return mBlockIntercepts = false;
   515   }
   517   bool InterceptsAreBlocked() const
   518   {
   519     return mBlockIntercepts;
   520   }
   521 };
   523 /* static */ Thread*
   524 Thread::Fetch()
   525 {
   526   Thread* t = static_cast<Thread*>(DMD_GET_TLS_DATA(gTlsIndex));
   528   if (MOZ_UNLIKELY(!t)) {
   529     // This memory is never freed, even if the thread dies.  It's a leak, but
   530     // only a tiny one.
   531     t = InfallibleAllocPolicy::new_<Thread>();
   532     DMD_SET_TLS_DATA(gTlsIndex, t);
   533   }
   535   return t;
   536 }
   538 // An object of this class must be created (on the stack) before running any
   539 // code that might allocate.
   540 class AutoBlockIntercepts
   541 {
   542   Thread* const mT;
   544   DISALLOW_COPY_AND_ASSIGN(AutoBlockIntercepts);
   546 public:
   547   AutoBlockIntercepts(Thread* aT)
   548     : mT(aT)
   549   {
   550     mT->BlockIntercepts();
   551   }
   552   ~AutoBlockIntercepts()
   553   {
   554     MOZ_ASSERT(mT->InterceptsAreBlocked());
   555     mT->UnblockIntercepts();
   556   }
   557 };
   559 //---------------------------------------------------------------------------
   560 // Location service
   561 //---------------------------------------------------------------------------
   563 // This class is used to print details about code locations.
   564 class LocationService
   565 {
   566   // WriteLocation() is the key function in this class.  It's basically a
   567   // wrapper around NS_DescribeCodeAddress.
   568   //
   569   // However, NS_DescribeCodeAddress is very slow on some platforms, and we
   570   // have lots of repeated (i.e. same PC) calls to it.  So we do some caching
   571   // of results.  Each cached result includes two strings (|mFunction| and
   572   // |mLibrary|), so we also optimize them for space in the following ways.
   573   //
   574   // - The number of distinct library names is small, e.g. a few dozen.  There
   575   //   is lots of repetition, especially of libxul.  So we intern them in their
   576   //   own table, which saves space over duplicating them for each cache entry.
   577   //
   578   // - The number of distinct function names is much higher, so we duplicate
   579   //   them in each cache entry.  That's more space-efficient than interning
   580   //   because entries containing single-occurrence function names are quickly
   581   //   overwritten, and their copies released.  In addition, empty function
   582   //   names are common, so we use nullptr to represent them compactly.
   584   struct StringHasher
   585   {
   586       typedef const char* Lookup;
   588       static uint32_t hash(const char* const& aS)
   589       {
   590           return HashString(aS);
   591       }
   593       static bool match(const char* const& aA, const char* const& aB)
   594       {
   595           return strcmp(aA, aB) == 0;
   596       }
   597   };
   599   typedef js::HashSet<const char*, StringHasher, InfallibleAllocPolicy>
   600           StringTable;
   602   StringTable mLibraryStrings;
   604   struct Entry
   605   {
   606     const void* mPc;
   607     char*       mFunction;  // owned by the Entry;  may be null
   608     const char* mLibrary;   // owned by mLibraryStrings;  never null
   609                             //   in a non-empty entry is in use
   610     ptrdiff_t   mLOffset;
   611     char*       mFileName;  // owned by the Entry; may be null
   612     uint32_t    mLineNo:31;
   613     uint32_t    mInUse:1;   // is the entry used?
   615     Entry()
   616       : mPc(0), mFunction(nullptr), mLibrary(nullptr), mLOffset(0), mFileName(nullptr), mLineNo(0), mInUse(0)
   617     {}
   619     ~Entry()
   620     {
   621       // We don't free mLibrary because it's externally owned.
   622       InfallibleAllocPolicy::free_(mFunction);
   623       InfallibleAllocPolicy::free_(mFileName);
   624     }
   626     void Replace(const void* aPc, const char* aFunction,
   627                  const char* aLibrary, ptrdiff_t aLOffset,
   628                  const char* aFileName, unsigned long aLineNo)
   629     {
   630       mPc = aPc;
   632       // Convert "" to nullptr.  Otherwise, make a copy of the name.
   633       InfallibleAllocPolicy::free_(mFunction);
   634       mFunction =
   635         !aFunction[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFunction);
   636       InfallibleAllocPolicy::free_(mFileName);
   637       mFileName =
   638         !aFileName[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFileName);
   641       mLibrary = aLibrary;
   642       mLOffset = aLOffset;
   643       mLineNo = aLineNo;
   645       mInUse = 1;
   646     }
   648     size_t SizeOfExcludingThis() {
   649       // Don't measure mLibrary because it's externally owned.
   650       return MallocSizeOf(mFunction) + MallocSizeOf(mFileName);
   651     }
   652   };
   654   // A direct-mapped cache.  When doing a dump just after starting desktop
   655   // Firefox (which is similar to dumping after a longer-running session,
   656   // thanks to the limit on how many records we dump), a cache with 2^24
   657   // entries (which approximates an infinite-entry cache) has a ~91% hit rate.
   658   // A cache with 2^12 entries has a ~83% hit rate, and takes up ~85 KiB (on
   659   // 32-bit platforms) or ~150 KiB (on 64-bit platforms).
   660   static const size_t kNumEntries = 1 << 12;
   661   static const size_t kMask = kNumEntries - 1;
   662   Entry mEntries[kNumEntries];
   664   size_t mNumCacheHits;
   665   size_t mNumCacheMisses;
   667 public:
   668   LocationService()
   669     : mEntries(), mNumCacheHits(0), mNumCacheMisses(0)
   670   {
   671     (void)mLibraryStrings.init(64);
   672   }
   674   void WriteLocation(const Writer& aWriter, const void* aPc)
   675   {
   676     MOZ_ASSERT(gStateLock->IsLocked());
   678     uint32_t index = HashGeneric(aPc) & kMask;
   679     MOZ_ASSERT(index < kNumEntries);
   680     Entry& entry = mEntries[index];
   682     if (!entry.mInUse || entry.mPc != aPc) {
   683       mNumCacheMisses++;
   685       // NS_DescribeCodeAddress can (on Linux) acquire a lock inside
   686       // the shared library loader.  Another thread might call malloc
   687       // while holding that lock (when loading a shared library).  So
   688       // we have to exit gStateLock around this call.  For details, see
   689       // https://bugzilla.mozilla.org/show_bug.cgi?id=363334#c3
   690       nsCodeAddressDetails details;
   691       {
   692         AutoUnlockState unlock;
   693         (void)NS_DescribeCodeAddress(const_cast<void*>(aPc), &details);
   694       }
   696       // Intern the library name.
   697       const char* library = nullptr;
   698       StringTable::AddPtr p = mLibraryStrings.lookupForAdd(details.library);
   699       if (!p) {
   700         library = InfallibleAllocPolicy::strdup_(details.library);
   701         (void)mLibraryStrings.add(p, library);
   702       } else {
   703         library = *p;
   704       }
   706       entry.Replace(aPc, details.function, library, details.loffset, details.filename, details.lineno);
   708     } else {
   709       mNumCacheHits++;
   710     }
   712     MOZ_ASSERT(entry.mPc == aPc);
   714     uintptr_t entryPc = (uintptr_t)(entry.mPc);
   715     // Sometimes we get nothing useful.  Just print "???" for the entire entry
   716     // so that fix-linux-stack.pl doesn't complain about an empty filename.
   717     if (!entry.mFunction && !entry.mLibrary[0] && entry.mLOffset == 0) {
   718       W("   ??? 0x%x\n", entryPc);
   719     } else {
   720       // Use "???" for unknown functions.
   721       const char* entryFunction = entry.mFunction ? entry.mFunction : "???";
   722       if (entry.mFileName) {
   723         // On Windows we can get the filename and line number at runtime.
   724         W("   %s (%s:%lu) 0x%x\n",
   725           entryFunction, entry.mFileName, entry.mLineNo, entryPc);
   726       } else {
   727         // On Linux and Mac we cannot get the filename and line number at
   728         // runtime, so we print the offset in a form that fix-linux-stack.pl and
   729         // fix_macosx_stack.py can post-process.
   730         W("   %s[%s +0x%X] 0x%x\n",
   731           entryFunction, entry.mLibrary, entry.mLOffset, entryPc);
   732       }
   733     }
   734   }
   736   size_t SizeOfIncludingThis()
   737   {
   738     size_t n = MallocSizeOf(this);
   739     for (uint32_t i = 0; i < kNumEntries; i++) {
   740       n += mEntries[i].SizeOfExcludingThis();
   741     }
   743     n += mLibraryStrings.sizeOfExcludingThis(MallocSizeOf);
   744     for (StringTable::Range r = mLibraryStrings.all();
   745          !r.empty();
   746          r.popFront()) {
   747       n += MallocSizeOf(r.front());
   748     }
   750     return n;
   751   }
   753   size_t CacheCapacity() const { return kNumEntries; }
   755   size_t CacheCount() const
   756   {
   757     size_t n = 0;
   758     for (size_t i = 0; i < kNumEntries; i++) {
   759       if (mEntries[i].mInUse) {
   760         n++;
   761       }
   762     }
   763     return n;
   764   }
   766   size_t NumCacheHits()   const { return mNumCacheHits; }
   767   size_t NumCacheMisses() const { return mNumCacheMisses; }
   768 };
   770 //---------------------------------------------------------------------------
   771 // Stack traces
   772 //---------------------------------------------------------------------------
   774 class StackTrace
   775 {
   776 public:
   777   static const uint32_t MaxFrames = 24;
   779 private:
   780   uint32_t mLength;         // The number of PCs.
   781   void* mPcs[MaxFrames];    // The PCs themselves.  If --max-frames is less
   782                             // than 24, this array is bigger than necessary,
   783                             // but that case is unusual.
   785 public:
   786   StackTrace() : mLength(0) {}
   788   uint32_t Length() const { return mLength; }
   789   void* Pc(uint32_t i) const { MOZ_ASSERT(i < mLength); return mPcs[i]; }
   791   uint32_t Size() const { return mLength * sizeof(mPcs[0]); }
   793   // The stack trace returned by this function is interned in gStackTraceTable,
   794   // and so is immortal and unmovable.
   795   static const StackTrace* Get(Thread* aT);
   797   void Sort()
   798   {
   799     qsort(mPcs, mLength, sizeof(mPcs[0]), StackTrace::QsortCmp);
   800   }
   802   void Print(const Writer& aWriter, LocationService* aLocService) const;
   804   // Hash policy.
   806   typedef StackTrace* Lookup;
   808   static uint32_t hash(const StackTrace* const& aSt)
   809   {
   810     return mozilla::HashBytes(aSt->mPcs, aSt->Size());
   811   }
   813   static bool match(const StackTrace* const& aA,
   814                     const StackTrace* const& aB)
   815   {
   816     return aA->mLength == aB->mLength &&
   817            memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0;
   818   }
   820 private:
   821   static void StackWalkCallback(void* aPc, void* aSp, void* aClosure)
   822   {
   823     StackTrace* st = (StackTrace*) aClosure;
   824     MOZ_ASSERT(st->mLength < MaxFrames);
   825     st->mPcs[st->mLength] = aPc;
   826     st->mLength++;
   827   }
   829   static int QsortCmp(const void* aA, const void* aB)
   830   {
   831     const void* const a = *static_cast<const void* const*>(aA);
   832     const void* const b = *static_cast<const void* const*>(aB);
   833     if (a < b) return -1;
   834     if (a > b) return  1;
   835     return 0;
   836   }
   837 };
   839 typedef js::HashSet<StackTrace*, StackTrace, InfallibleAllocPolicy>
   840         StackTraceTable;
   841 static StackTraceTable* gStackTraceTable = nullptr;
   843 // We won't GC the stack trace table until it this many elements.
   844 static uint32_t gGCStackTraceTableWhenSizeExceeds = 4 * 1024;
   846 void
   847 StackTrace::Print(const Writer& aWriter, LocationService* aLocService) const
   848 {
   849   if (mLength == 0) {
   850     W("   (empty)\n");  // StackTrace::Get() must have failed
   851     return;
   852   }
   854   for (uint32_t i = 0; i < mLength; i++) {
   855     aLocService->WriteLocation(aWriter, Pc(i));
   856   }
   857 }
   859 /* static */ const StackTrace*
   860 StackTrace::Get(Thread* aT)
   861 {
   862   MOZ_ASSERT(gStateLock->IsLocked());
   863   MOZ_ASSERT(aT->InterceptsAreBlocked());
   865   // On Windows, NS_StackWalk can acquire a lock from the shared library
   866   // loader.  Another thread might call malloc while holding that lock (when
   867   // loading a shared library).  So we can't be in gStateLock during the call
   868   // to NS_StackWalk.  For details, see
   869   // https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8
   870   // On Linux, something similar can happen;  see bug 824340.
   871   // So let's just release it on all platforms.
   872   nsresult rv;
   873   StackTrace tmp;
   874   {
   875     AutoUnlockState unlock;
   876     uint32_t skipFrames = 2;
   877     rv = NS_StackWalk(StackWalkCallback, skipFrames,
   878                       gOptions->MaxFrames(), &tmp, 0, nullptr);
   879   }
   881   if (rv == NS_OK) {
   882     // Handle the common case first.  All is ok.  Nothing to do.
   883   } else if (rv == NS_ERROR_NOT_IMPLEMENTED || rv == NS_ERROR_FAILURE) {
   884     tmp.mLength = 0;
   885   } else if (rv == NS_ERROR_UNEXPECTED) {
   886     // XXX: This |rv| only happens on Mac, and it indicates that we're handling
   887     // a call to malloc that happened inside a mutex-handling function.  Any
   888     // attempt to create a semaphore (which can happen in printf) could
   889     // deadlock.
   890     //
   891     // However, the most complex thing DMD does after Get() returns is to put
   892     // something in a hash table, which might call
   893     // InfallibleAllocPolicy::malloc_.  I'm not yet sure if this needs special
   894     // handling, hence the forced abort.  Sorry.  If you hit this, please file
   895     // a bug and CC nnethercote.
   896     MOZ_CRASH();
   897   } else {
   898     MOZ_CRASH();  // should be impossible
   899   }
   901   StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp);
   902   if (!p) {
   903     StackTrace* stnew = InfallibleAllocPolicy::new_<StackTrace>(tmp);
   904     (void)gStackTraceTable->add(p, stnew);
   905   }
   906   return *p;
   907 }
   909 //---------------------------------------------------------------------------
   910 // Heap blocks
   911 //---------------------------------------------------------------------------
   913 // This class combines a 2-byte-aligned pointer (i.e. one whose bottom bit
   914 // is zero) with a 1-bit tag.
   915 //
   916 // |T| is the pointer type, e.g. |int*|, not the pointed-to type.  This makes
   917 // is easier to have const pointers, e.g. |TaggedPtr<const int*>|.
   918 template <typename T>
   919 class TaggedPtr
   920 {
   921   union
   922   {
   923     T         mPtr;
   924     uintptr_t mUint;
   925   };
   927   static const uintptr_t kTagMask = uintptr_t(0x1);
   928   static const uintptr_t kPtrMask = ~kTagMask;
   930   static bool IsTwoByteAligned(T aPtr)
   931   {
   932     return (uintptr_t(aPtr) & kTagMask) == 0;
   933   }
   935 public:
   936   TaggedPtr()
   937     : mPtr(nullptr)
   938   {}
   940   TaggedPtr(T aPtr, bool aBool)
   941     : mPtr(aPtr)
   942   {
   943     MOZ_ASSERT(IsTwoByteAligned(aPtr));
   944     uintptr_t tag = uintptr_t(aBool);
   945     MOZ_ASSERT(tag <= kTagMask);
   946     mUint |= (tag & kTagMask);
   947   }
   949   void Set(T aPtr, bool aBool)
   950   {
   951     MOZ_ASSERT(IsTwoByteAligned(aPtr));
   952     mPtr = aPtr;
   953     uintptr_t tag = uintptr_t(aBool);
   954     MOZ_ASSERT(tag <= kTagMask);
   955     mUint |= (tag & kTagMask);
   956   }
   958   T Ptr() const { return reinterpret_cast<T>(mUint & kPtrMask); }
   960   bool Tag() const { return bool(mUint & kTagMask); }
   961 };
   963 // A live heap block.
   964 class Block
   965 {
   966   const void*  mPtr;
   967   const size_t mReqSize;    // size requested
   969   // Ptr: |mAllocStackTrace| - stack trace where this block was allocated.
   970   // Tag bit 0: |mSampled| - was this block sampled? (if so, slop == 0).
   971   TaggedPtr<const StackTrace* const>
   972     mAllocStackTrace_mSampled;
   974   // This array has two elements because we record at most two reports of a
   975   // block.
   976   // - Ptr: |mReportStackTrace| - stack trace where this block was reported.
   977   //   nullptr if not reported.
   978   // - Tag bit 0: |mReportedOnAlloc| - was the block reported immediately on
   979   //   allocation?  If so, DMD must not clear the report at the end of Dump().
   980   //   Only relevant if |mReportStackTrace| is non-nullptr.
   981   //
   982   // |mPtr| is used as the key in BlockTable, so it's ok for this member
   983   // to be |mutable|.
   984   mutable TaggedPtr<const StackTrace*> mReportStackTrace_mReportedOnAlloc[2];
   986 public:
   987   Block(const void* aPtr, size_t aReqSize, const StackTrace* aAllocStackTrace,
   988         bool aSampled)
   989     : mPtr(aPtr),
   990       mReqSize(aReqSize),
   991       mAllocStackTrace_mSampled(aAllocStackTrace, aSampled),
   992       mReportStackTrace_mReportedOnAlloc()     // all fields get zeroed
   993   {
   994     MOZ_ASSERT(aAllocStackTrace);
   995   }
   997   size_t ReqSize() const { return mReqSize; }
   999   // Sampled blocks always have zero slop.
  1000   size_t SlopSize() const
  1002     return IsSampled() ? 0 : MallocSizeOf(mPtr) - mReqSize;
  1005   size_t UsableSize() const
  1007     return IsSampled() ? mReqSize : MallocSizeOf(mPtr);
  1010   bool IsSampled() const
  1012     return mAllocStackTrace_mSampled.Tag();
  1015   const StackTrace* AllocStackTrace() const
  1017     return mAllocStackTrace_mSampled.Ptr();
  1020   const StackTrace* ReportStackTrace1() const {
  1021     return mReportStackTrace_mReportedOnAlloc[0].Ptr();
  1024   const StackTrace* ReportStackTrace2() const {
  1025     return mReportStackTrace_mReportedOnAlloc[1].Ptr();
  1028   bool ReportedOnAlloc1() const {
  1029     return mReportStackTrace_mReportedOnAlloc[0].Tag();
  1032   bool ReportedOnAlloc2() const {
  1033     return mReportStackTrace_mReportedOnAlloc[1].Tag();
  1036   uint32_t NumReports() const {
  1037     if (ReportStackTrace2()) {
  1038       MOZ_ASSERT(ReportStackTrace1());
  1039       return 2;
  1041     if (ReportStackTrace1()) {
  1042       return 1;
  1044     return 0;
  1047   // This is |const| thanks to the |mutable| fields above.
  1048   void Report(Thread* aT, bool aReportedOnAlloc) const
  1050     // We don't bother recording reports after the 2nd one.
  1051     uint32_t numReports = NumReports();
  1052     if (numReports < 2) {
  1053       mReportStackTrace_mReportedOnAlloc[numReports].Set(StackTrace::Get(aT),
  1054                                                          aReportedOnAlloc);
  1058   void UnreportIfNotReportedOnAlloc() const
  1060     if (!ReportedOnAlloc1() && !ReportedOnAlloc2()) {
  1061       mReportStackTrace_mReportedOnAlloc[0].Set(nullptr, 0);
  1062       mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0);
  1064     } else if (!ReportedOnAlloc1() && ReportedOnAlloc2()) {
  1065       // Shift the 2nd report down to the 1st one.
  1066       mReportStackTrace_mReportedOnAlloc[0] =
  1067         mReportStackTrace_mReportedOnAlloc[1];
  1068       mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0);
  1070     } else if (ReportedOnAlloc1() && !ReportedOnAlloc2()) {
  1071       mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0);
  1075   // Hash policy.
  1077   typedef const void* Lookup;
  1079   static uint32_t hash(const void* const& aPtr)
  1081     return mozilla::HashGeneric(aPtr);
  1084   static bool match(const Block& aB, const void* const& aPtr)
  1086     return aB.mPtr == aPtr;
  1088 };
  1090 typedef js::HashSet<Block, Block, InfallibleAllocPolicy> BlockTable;
  1091 static BlockTable* gBlockTable = nullptr;
  1093 typedef js::HashSet<const StackTrace*, js::DefaultHasher<const StackTrace*>,
  1094                     InfallibleAllocPolicy>
  1095         StackTraceSet;
  1097 // Add a pointer to each live stack trace into the given StackTraceSet.  (A
  1098 // stack trace is live if it's used by one of the live blocks.)
  1099 static void
  1100 GatherUsedStackTraces(StackTraceSet& aStackTraces)
  1102   MOZ_ASSERT(gStateLock->IsLocked());
  1103   MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked());
  1105   aStackTraces.finish();
  1106   aStackTraces.init(1024);
  1108   for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) {
  1109     const Block& b = r.front();
  1110     aStackTraces.put(b.AllocStackTrace());
  1111     aStackTraces.put(b.ReportStackTrace1());
  1112     aStackTraces.put(b.ReportStackTrace2());
  1115   // Any of the stack traces added above may have been null.  For the sake of
  1116   // cleanliness, don't leave the null pointer in the set.
  1117   aStackTraces.remove(nullptr);
  1120 // Delete stack traces that we aren't using, and compact our hashtable.
  1121 static void
  1122 GCStackTraces()
  1124   MOZ_ASSERT(gStateLock->IsLocked());
  1125   MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked());
  1127   StackTraceSet usedStackTraces;
  1128   GatherUsedStackTraces(usedStackTraces);
  1130   // Delete all unused stack traces from gStackTraceTable.  The Enum destructor
  1131   // will automatically rehash and compact the table.
  1132   for (StackTraceTable::Enum e(*gStackTraceTable);
  1133        !e.empty();
  1134        e.popFront()) {
  1135     StackTrace* const& st = e.front();
  1137     if (!usedStackTraces.has(st)) {
  1138       e.removeFront();
  1139       InfallibleAllocPolicy::delete_(st);
  1143   // Schedule a GC when we have twice as many stack traces as we had right after
  1144   // this GC finished.
  1145   gGCStackTraceTableWhenSizeExceeds = 2 * gStackTraceTable->count();
  1148 //---------------------------------------------------------------------------
  1149 // malloc/free callbacks
  1150 //---------------------------------------------------------------------------
  1152 static size_t gSmallBlockActualSizeCounter = 0;
  1154 static void
  1155 AllocCallback(void* aPtr, size_t aReqSize, Thread* aT)
  1157   MOZ_ASSERT(gIsDMDRunning);
  1159   if (!aPtr) {
  1160     return;
  1163   AutoLockState lock;
  1164   AutoBlockIntercepts block(aT);
  1166   size_t actualSize = gMallocTable->malloc_usable_size(aPtr);
  1167   size_t sampleBelowSize = gOptions->SampleBelowSize();
  1169   if (actualSize < sampleBelowSize) {
  1170     // If this allocation is smaller than the sample-below size, increment the
  1171     // cumulative counter.  Then, if that counter now exceeds the sample size,
  1172     // blame this allocation for |sampleBelowSize| bytes.  This precludes the
  1173     // measurement of slop.
  1174     gSmallBlockActualSizeCounter += actualSize;
  1175     if (gSmallBlockActualSizeCounter >= sampleBelowSize) {
  1176       gSmallBlockActualSizeCounter -= sampleBelowSize;
  1178       Block b(aPtr, sampleBelowSize, StackTrace::Get(aT), /* sampled */ true);
  1179       (void)gBlockTable->putNew(aPtr, b);
  1181   } else {
  1182     // If this block size is larger than the sample size, record it exactly.
  1183     Block b(aPtr, aReqSize, StackTrace::Get(aT), /* sampled */ false);
  1184     (void)gBlockTable->putNew(aPtr, b);
  1188 static void
  1189 FreeCallback(void* aPtr, Thread* aT)
  1191   MOZ_ASSERT(gIsDMDRunning);
  1193   if (!aPtr) {
  1194     return;
  1197   AutoLockState lock;
  1198   AutoBlockIntercepts block(aT);
  1200   gBlockTable->remove(aPtr);
  1202   if (gStackTraceTable->count() > gGCStackTraceTableWhenSizeExceeds) {
  1203     GCStackTraces();
  1207 //---------------------------------------------------------------------------
  1208 // malloc/free interception
  1209 //---------------------------------------------------------------------------
  1211 static void Init(const malloc_table_t* aMallocTable);
  1213 }   // namespace dmd
  1214 }   // namespace mozilla
  1216 void
  1217 replace_init(const malloc_table_t* aMallocTable)
  1219   mozilla::dmd::Init(aMallocTable);
  1222 void*
  1223 replace_malloc(size_t aSize)
  1225   using namespace mozilla::dmd;
  1227   if (!gIsDMDRunning) {
  1228     // DMD hasn't started up, either because it wasn't enabled by the user, or
  1229     // we're still in Init() and something has indirectly called malloc.  Do a
  1230     // vanilla malloc.  (In the latter case, if it fails we'll crash.  But
  1231     // OOM is highly unlikely so early on.)
  1232     return gMallocTable->malloc(aSize);
  1235   Thread* t = Thread::Fetch();
  1236   if (t->InterceptsAreBlocked()) {
  1237     // Intercepts are blocked, which means this must be a call to malloc
  1238     // triggered indirectly by DMD (e.g. via NS_StackWalk).  Be infallible.
  1239     return InfallibleAllocPolicy::malloc_(aSize);
  1242   // This must be a call to malloc from outside DMD.  Intercept it.
  1243   void* ptr = gMallocTable->malloc(aSize);
  1244   AllocCallback(ptr, aSize, t);
  1245   return ptr;
  1248 void*
  1249 replace_calloc(size_t aCount, size_t aSize)
  1251   using namespace mozilla::dmd;
  1253   if (!gIsDMDRunning) {
  1254     return gMallocTable->calloc(aCount, aSize);
  1257   Thread* t = Thread::Fetch();
  1258   if (t->InterceptsAreBlocked()) {
  1259     return InfallibleAllocPolicy::calloc_(aCount * aSize);
  1262   void* ptr = gMallocTable->calloc(aCount, aSize);
  1263   AllocCallback(ptr, aCount * aSize, t);
  1264   return ptr;
  1267 void*
  1268 replace_realloc(void* aOldPtr, size_t aSize)
  1270   using namespace mozilla::dmd;
  1272   if (!gIsDMDRunning) {
  1273     return gMallocTable->realloc(aOldPtr, aSize);
  1276   Thread* t = Thread::Fetch();
  1277   if (t->InterceptsAreBlocked()) {
  1278     return InfallibleAllocPolicy::realloc_(aOldPtr, aSize);
  1281   // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|.
  1282   if (!aOldPtr) {
  1283     return replace_malloc(aSize);
  1286   // Be very careful here!  Must remove the block from the table before doing
  1287   // the realloc to avoid races, just like in replace_free().
  1288   // Nb: This does an unnecessary hashtable remove+add if the block doesn't
  1289   // move, but doing better isn't worth the effort.
  1290   FreeCallback(aOldPtr, t);
  1291   void* ptr = gMallocTable->realloc(aOldPtr, aSize);
  1292   if (ptr) {
  1293     AllocCallback(ptr, aSize, t);
  1294   } else {
  1295     // If realloc fails, we re-insert the old pointer.  It will look like it
  1296     // was allocated for the first time here, which is untrue, and the slop
  1297     // bytes will be zero, which may be untrue.  But this case is rare and
  1298     // doing better isn't worth the effort.
  1299     AllocCallback(aOldPtr, gMallocTable->malloc_usable_size(aOldPtr), t);
  1301   return ptr;
  1304 void*
  1305 replace_memalign(size_t aAlignment, size_t aSize)
  1307   using namespace mozilla::dmd;
  1309   if (!gIsDMDRunning) {
  1310     return gMallocTable->memalign(aAlignment, aSize);
  1313   Thread* t = Thread::Fetch();
  1314   if (t->InterceptsAreBlocked()) {
  1315     return InfallibleAllocPolicy::memalign_(aAlignment, aSize);
  1318   void* ptr = gMallocTable->memalign(aAlignment, aSize);
  1319   AllocCallback(ptr, aSize, t);
  1320   return ptr;
  1323 void
  1324 replace_free(void* aPtr)
  1326   using namespace mozilla::dmd;
  1328   if (!gIsDMDRunning) {
  1329     gMallocTable->free(aPtr);
  1330     return;
  1333   Thread* t = Thread::Fetch();
  1334   if (t->InterceptsAreBlocked()) {
  1335     return InfallibleAllocPolicy::free_(aPtr);
  1338   // Do the actual free after updating the table.  Otherwise, another thread
  1339   // could call malloc and get the freed block and update the table, and then
  1340   // our update here would remove the newly-malloc'd block.
  1341   FreeCallback(aPtr, t);
  1342   gMallocTable->free(aPtr);
  1345 namespace mozilla {
  1346 namespace dmd {
  1348 //---------------------------------------------------------------------------
  1349 // Stack trace records
  1350 //---------------------------------------------------------------------------
  1352 class TraceRecordKey
  1354 public:
  1355   const StackTrace* const mAllocStackTrace;   // never null
  1356 protected:
  1357   const StackTrace* const mReportStackTrace1; // nullptr if unreported
  1358   const StackTrace* const mReportStackTrace2; // nullptr if not 2x-reported
  1360 public:
  1361   TraceRecordKey(const Block& aB)
  1362     : mAllocStackTrace(aB.AllocStackTrace()),
  1363       mReportStackTrace1(aB.ReportStackTrace1()),
  1364       mReportStackTrace2(aB.ReportStackTrace2())
  1366     MOZ_ASSERT(mAllocStackTrace);
  1369   // Hash policy.
  1371   typedef TraceRecordKey Lookup;
  1373   static uint32_t hash(const TraceRecordKey& aKey)
  1375     return mozilla::HashGeneric(aKey.mAllocStackTrace,
  1376                                 aKey.mReportStackTrace1,
  1377                                 aKey.mReportStackTrace2);
  1380   static bool match(const TraceRecordKey& aA, const TraceRecordKey& aB)
  1382     return aA.mAllocStackTrace   == aB.mAllocStackTrace &&
  1383            aA.mReportStackTrace1 == aB.mReportStackTrace1 &&
  1384            aA.mReportStackTrace2 == aB.mReportStackTrace2;
  1386 };
  1388 class RecordSize
  1390   static const size_t kReqBits = sizeof(size_t) * 8 - 1;  // 31 or 63
  1392   size_t mReq;              // size requested
  1393   size_t mSlop:kReqBits;    // slop bytes
  1394   size_t mSampled:1;        // were one or more blocks contributing to this
  1395                             //   RecordSize sampled?
  1396 public:
  1397   RecordSize()
  1398     : mReq(0),
  1399       mSlop(0),
  1400       mSampled(false)
  1401   {}
  1403   size_t Req()    const { return mReq; }
  1404   size_t Slop()   const { return mSlop; }
  1405   size_t Usable() const { return mReq + mSlop; }
  1407   bool IsSampled() const { return mSampled; }
  1409   void Add(const Block& aB)
  1411     mReq  += aB.ReqSize();
  1412     mSlop += aB.SlopSize();
  1413     mSampled = mSampled || aB.IsSampled();
  1416   void Add(const RecordSize& aRecordSize)
  1418     mReq  += aRecordSize.Req();
  1419     mSlop += aRecordSize.Slop();
  1420     mSampled = mSampled || aRecordSize.IsSampled();
  1423   static int Cmp(const RecordSize& aA, const RecordSize& aB)
  1425     // Primary sort: put bigger usable sizes first.
  1426     if (aA.Usable() > aB.Usable()) return -1;
  1427     if (aA.Usable() < aB.Usable()) return  1;
  1429     // Secondary sort: put bigger requested sizes first.
  1430     if (aA.Req() > aB.Req()) return -1;
  1431     if (aA.Req() < aB.Req()) return  1;
  1433     // Tertiary sort: put non-sampled records before sampled records.
  1434     if (!aA.mSampled &&  aB.mSampled) return -1;
  1435     if ( aA.mSampled && !aB.mSampled) return  1;
  1437     return 0;
  1439 };
  1441 // A collection of one or more heap blocks with a common TraceRecordKey.
  1442 class TraceRecord : public TraceRecordKey
  1444   // The TraceRecordKey base class serves as the key in TraceRecordTables.
  1445   // These two fields constitute the value, so it's ok for them to be
  1446   // |mutable|.
  1447   mutable uint32_t    mNumBlocks; // number of blocks with this TraceRecordKey
  1448   mutable RecordSize mRecordSize; // combined size of those blocks
  1450 public:
  1451   explicit TraceRecord(const TraceRecordKey& aKey)
  1452     : TraceRecordKey(aKey),
  1453       mNumBlocks(0),
  1454       mRecordSize()
  1455   {}
  1457   uint32_t NumBlocks() const { return mNumBlocks; }
  1459   const RecordSize& GetRecordSize() const { return mRecordSize; }
  1461   // This is |const| thanks to the |mutable| fields above.
  1462   void Add(const Block& aB) const
  1464     mNumBlocks++;
  1465     mRecordSize.Add(aB);
  1468   // For PrintSortedRecords.
  1469   static const char* const kRecordKind;
  1470   static bool recordsOverlap() { return false; }
  1472   void Print(const Writer& aWriter, LocationService* aLocService,
  1473              uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
  1474              size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
  1475              size_t aTotalUsableSize) const;
  1477   static int QsortCmp(const void* aA, const void* aB)
  1479     const TraceRecord* const a = *static_cast<const TraceRecord* const*>(aA);
  1480     const TraceRecord* const b = *static_cast<const TraceRecord* const*>(aB);
  1482     return RecordSize::Cmp(a->mRecordSize, b->mRecordSize);
  1484 };
  1486 const char* const TraceRecord::kRecordKind = "trace";
  1488 typedef js::HashSet<TraceRecord, TraceRecord, InfallibleAllocPolicy>
  1489         TraceRecordTable;
  1491 void
  1492 TraceRecord::Print(const Writer& aWriter, LocationService* aLocService,
  1493                    uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
  1494                    size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
  1495                    size_t aTotalUsableSize) const
  1497   bool showTilde = mRecordSize.IsSampled();
  1499   W("%s: %s block%s in stack trace record %s of %s\n",
  1500     aStr,
  1501     Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
  1502     Show(aM, gBuf2, kBufLen),
  1503     Show(aN, gBuf3, kBufLen));
  1505   W(" %s bytes (%s requested / %s slop)\n",
  1506     Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde),
  1507     Show(mRecordSize.Req(),    gBuf2, kBufLen, showTilde),
  1508     Show(mRecordSize.Slop(),   gBuf3, kBufLen, showTilde));
  1510   W(" %4.2f%% of the heap (%4.2f%% cumulative); "
  1511     " %4.2f%% of %s (%4.2f%% cumulative)\n",
  1512     Percent(mRecordSize.Usable(), aTotalUsableSize),
  1513     Percent(aCumulativeUsableSize, aTotalUsableSize),
  1514     Percent(mRecordSize.Usable(), aCategoryUsableSize),
  1515     astr,
  1516     Percent(aCumulativeUsableSize, aCategoryUsableSize));
  1518   W(" Allocated at\n");
  1519   mAllocStackTrace->Print(aWriter, aLocService);
  1521   if (mReportStackTrace1) {
  1522     W("\n Reported at\n");
  1523     mReportStackTrace1->Print(aWriter, aLocService);
  1525   if (mReportStackTrace2) {
  1526     W("\n Reported again at\n");
  1527     mReportStackTrace2->Print(aWriter, aLocService);
  1530   W("\n");
  1533 //---------------------------------------------------------------------------
  1534 // Stack frame records
  1535 //---------------------------------------------------------------------------
  1537 // A collection of one or more stack frames (from heap block allocation stack
  1538 // traces) with a common PC.
  1539 class FrameRecord
  1541   // mPc is used as the key in FrameRecordTable, and the other members
  1542   // constitute the value, so it's ok for them to be |mutable|.
  1543   const void* const  mPc;
  1544   mutable size_t     mNumBlocks;
  1545   mutable size_t     mNumTraceRecords;
  1546   mutable RecordSize mRecordSize;
  1548 public:
  1549   explicit FrameRecord(const void* aPc)
  1550     : mPc(aPc),
  1551       mNumBlocks(0),
  1552       mNumTraceRecords(0),
  1553       mRecordSize()
  1554   {}
  1556   const RecordSize& GetRecordSize() const { return mRecordSize; }
  1558   // This is |const| thanks to the |mutable| fields above.
  1559   void Add(const TraceRecord& aTr) const
  1561     mNumBlocks += aTr.NumBlocks();
  1562     mNumTraceRecords++;
  1563     mRecordSize.Add(aTr.GetRecordSize());
  1566   void Print(const Writer& aWriter, LocationService* aLocService,
  1567              uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
  1568              size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
  1569              size_t aTotalUsableSize) const;
  1571   static int QsortCmp(const void* aA, const void* aB)
  1573     const FrameRecord* const a = *static_cast<const FrameRecord* const*>(aA);
  1574     const FrameRecord* const b = *static_cast<const FrameRecord* const*>(aB);
  1576     return RecordSize::Cmp(a->mRecordSize, b->mRecordSize);
  1579   // For PrintSortedRecords.
  1580   static const char* const kRecordKind;
  1581   static bool recordsOverlap() { return true; }
  1583   // Hash policy.
  1585   typedef const void* Lookup;
  1587   static uint32_t hash(const void* const& aPc)
  1589     return mozilla::HashGeneric(aPc);
  1592   static bool match(const FrameRecord& aFr, const void* const& aPc)
  1594     return aFr.mPc == aPc;
  1596 };
  1598 const char* const FrameRecord::kRecordKind = "frame";
  1600 typedef js::HashSet<FrameRecord, FrameRecord, InfallibleAllocPolicy>
  1601         FrameRecordTable;
  1603 void
  1604 FrameRecord::Print(const Writer& aWriter, LocationService* aLocService,
  1605                    uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
  1606                    size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
  1607                    size_t aTotalUsableSize) const
  1609   (void)aCumulativeUsableSize;
  1611   bool showTilde = mRecordSize.IsSampled();
  1613   W("%s: %s block%s from %s stack trace record%s in stack frame record %s of %s\n",
  1614     aStr,
  1615     Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
  1616     Show(mNumTraceRecords, gBuf2, kBufLen, showTilde), Plural(mNumTraceRecords),
  1617     Show(aM, gBuf3, kBufLen),
  1618     Show(aN, gBuf4, kBufLen));
  1620   W(" %s bytes (%s requested / %s slop)\n",
  1621     Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde),
  1622     Show(mRecordSize.Req(),    gBuf2, kBufLen, showTilde),
  1623     Show(mRecordSize.Slop(),   gBuf3, kBufLen, showTilde));
  1625   W(" %4.2f%% of the heap;  %4.2f%% of %s\n",
  1626     Percent(mRecordSize.Usable(), aTotalUsableSize),
  1627     Percent(mRecordSize.Usable(), aCategoryUsableSize),
  1628     astr);
  1630   W(" PC is\n");
  1631   aLocService->WriteLocation(aWriter, mPc);
  1632   W("\n");
  1635 //---------------------------------------------------------------------------
  1636 // Options (Part 2)
  1637 //---------------------------------------------------------------------------
  1639 // Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah"
  1640 // (where "blah" is non-empty) and return the pointer to "blah".  |aArg| can
  1641 // have leading space chars (but not other whitespace).
  1642 const char*
  1643 Options::ValueIfMatch(const char* aArg, const char* aOptionName)
  1645   MOZ_ASSERT(!isspace(*aArg));  // any leading whitespace should not remain
  1646   size_t optionLen = strlen(aOptionName);
  1647   if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' &&
  1648       aArg[optionLen + 1]) {
  1649     return aArg + optionLen + 1;
  1651   return nullptr;
  1654 // Extracts a |long| value for an option from an argument.  It must be within
  1655 // the range |aMin..aMax| (inclusive).
  1656 bool
  1657 Options::GetLong(const char* aArg, const char* aOptionName,
  1658                  long aMin, long aMax, long* aN)
  1660   if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) {
  1661     char* endPtr;
  1662     *aN = strtol(optionValue, &endPtr, /* base */ 10);
  1663     if (!*endPtr && aMin <= *aN && *aN <= aMax &&
  1664         *aN != LONG_MIN && *aN != LONG_MAX) {
  1665       return true;
  1668   return false;
  1671 // The sample-below default is a prime number close to 4096.
  1672 // - Why that size?  Because it's *much* faster but only moderately less precise
  1673 //   than a size of 1.
  1674 // - Why prime?  Because it makes our sampling more random.  If we used a size
  1675 //   of 4096, for example, then our alloc counter would only take on even
  1676 //   values, because jemalloc always rounds up requests sizes.  In contrast, a
  1677 //   prime size will explore all possible values of the alloc counter.
  1678 //
  1679 Options::Options(const char* aDMDEnvVar)
  1680   : mDMDEnvVar(InfallibleAllocPolicy::strdup_(aDMDEnvVar)),
  1681     mSampleBelowSize(4093, 100 * 100 * 1000),
  1682     mMaxFrames(StackTrace::MaxFrames, StackTrace::MaxFrames),
  1683     mMaxRecords(1000, 1000000),
  1684     mMode(Normal)
  1686   char* e = mDMDEnvVar;
  1687   if (strcmp(e, "1") != 0) {
  1688     bool isEnd = false;
  1689     while (!isEnd) {
  1690       // Consume leading whitespace.
  1691       while (isspace(*e)) {
  1692         e++;
  1695       // Save the start of the arg.
  1696       const char* arg = e;
  1698       // Find the first char after the arg, and temporarily change it to '\0'
  1699       // to isolate the arg.
  1700       while (!isspace(*e) && *e != '\0') {
  1701         e++;
  1703       char replacedChar = *e;
  1704       isEnd = replacedChar == '\0';
  1705       *e = '\0';
  1707       // Handle arg
  1708       long myLong;
  1709       if (GetLong(arg, "--sample-below", 1, mSampleBelowSize.mMax, &myLong)) {
  1710         mSampleBelowSize.mActual = myLong;
  1712       } else if (GetLong(arg, "--max-frames", 1, mMaxFrames.mMax, &myLong)) {
  1713         mMaxFrames.mActual = myLong;
  1715       } else if (GetLong(arg, "--max-records", 1, mMaxRecords.mMax, &myLong)) {
  1716         mMaxRecords.mActual = myLong;
  1718       } else if (strcmp(arg, "--mode=normal") == 0) {
  1719         mMode = Options::Normal;
  1720       } else if (strcmp(arg, "--mode=test")   == 0) {
  1721         mMode = Options::Test;
  1722       } else if (strcmp(arg, "--mode=stress") == 0) {
  1723         mMode = Options::Stress;
  1725       } else if (strcmp(arg, "") == 0) {
  1726         // This can only happen if there is trailing whitespace.  Ignore.
  1727         MOZ_ASSERT(isEnd);
  1729       } else {
  1730         BadArg(arg);
  1733       // Undo the temporary isolation.
  1734       *e = replacedChar;
  1739 void
  1740 Options::BadArg(const char* aArg)
  1742   StatusMsg("\n");
  1743   StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg);
  1744   StatusMsg("\n");
  1745   StatusMsg("Valid values of $DMD are:\n");
  1746   StatusMsg("- undefined or \"\" or \"0\", which disables DMD, or\n");
  1747   StatusMsg("- \"1\", which enables it with the default options, or\n");
  1748   StatusMsg("- a whitespace-separated list of |--option=val| entries, which\n");
  1749   StatusMsg("  enables it with non-default options.\n");
  1750   StatusMsg("\n");
  1751   StatusMsg("The following options are allowed;  defaults are shown in [].\n");
  1752   StatusMsg("  --sample-below=<1..%d> Sample blocks smaller than this [%d]\n",
  1753             int(mSampleBelowSize.mMax),
  1754             int(mSampleBelowSize.mDefault));
  1755   StatusMsg("                               (prime numbers are recommended)\n");
  1756   StatusMsg("  --max-frames=<1..%d>         Max. depth of stack traces [%d]\n",
  1757             int(mMaxFrames.mMax),
  1758             int(mMaxFrames.mDefault));
  1759   StatusMsg("  --max-records=<1..%u>   Max. number of records printed [%u]\n",
  1760             mMaxRecords.mMax,
  1761             mMaxRecords.mDefault);
  1762   StatusMsg("  --mode=<normal|test|stress>  Mode of operation [normal]\n");
  1763   StatusMsg("\n");
  1764   exit(1);
  1767 //---------------------------------------------------------------------------
  1768 // DMD start-up
  1769 //---------------------------------------------------------------------------
  1771 #ifdef XP_MACOSX
  1772 static void
  1773 NopStackWalkCallback(void* aPc, void* aSp, void* aClosure)
  1776 #endif
  1778 // Note that fopen() can allocate.
  1779 static FILE*
  1780 OpenOutputFile(const char* aFilename)
  1782   FILE* fp = fopen(aFilename, "w");
  1783   if (!fp) {
  1784     StatusMsg("can't create %s file: %s\n", aFilename, strerror(errno));
  1785     exit(1);
  1787   return fp;
  1790 static void RunTestMode(FILE* fp);
  1791 static void RunStressMode(FILE* fp);
  1793 // WARNING: this function runs *very* early -- before all static initializers
  1794 // have run.  For this reason, non-scalar globals such as gStateLock and
  1795 // gStackTraceTable are allocated dynamically (so we can guarantee their
  1796 // construction in this function) rather than statically.
  1797 static void
  1798 Init(const malloc_table_t* aMallocTable)
  1800   MOZ_ASSERT(!gIsDMDRunning);
  1802   gMallocTable = aMallocTable;
  1804   // DMD is controlled by the |DMD| environment variable.
  1805   // - If it's unset or empty or "0", DMD doesn't run.
  1806   // - Otherwise, the contents dictate DMD's behaviour.
  1808   char* e = getenv("DMD");
  1809   StatusMsg("$DMD = '%s'\n", e);
  1811   if (!e || strcmp(e, "") == 0 || strcmp(e, "0") == 0) {
  1812     StatusMsg("DMD is not enabled\n");
  1813     return;
  1816   // Parse $DMD env var.
  1817   gOptions = InfallibleAllocPolicy::new_<Options>(e);
  1819   StatusMsg("DMD is enabled\n");
  1821 #ifdef XP_MACOSX
  1822   // On Mac OS X we need to call StackWalkInitCriticalAddress() very early
  1823   // (prior to the creation of any mutexes, apparently) otherwise we can get
  1824   // hangs when getting stack traces (bug 821577).  But
  1825   // StackWalkInitCriticalAddress() isn't exported from xpcom/, so instead we
  1826   // just call NS_StackWalk, because that calls StackWalkInitCriticalAddress().
  1827   // See the comment above StackWalkInitCriticalAddress() for more details.
  1828   (void)NS_StackWalk(NopStackWalkCallback, /* skipFrames */ 0,
  1829                      /* maxFrames */ 1, nullptr, 0, nullptr);
  1830 #endif
  1832   gStateLock = InfallibleAllocPolicy::new_<Mutex>();
  1834   gSmallBlockActualSizeCounter = 0;
  1836   DMD_CREATE_TLS_INDEX(gTlsIndex);
  1839     AutoLockState lock;
  1841     gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>();
  1842     gStackTraceTable->init(8192);
  1844     gBlockTable = InfallibleAllocPolicy::new_<BlockTable>();
  1845     gBlockTable->init(8192);
  1848   if (gOptions->IsTestMode()) {
  1849     // OpenOutputFile() can allocate.  So do this before setting
  1850     // gIsDMDRunning so those allocations don't show up in our results.  Once
  1851     // gIsDMDRunning is set we are intercepting malloc et al. in earnest.
  1852     FILE* fp = OpenOutputFile("test.dmd");
  1853     gIsDMDRunning = true;
  1855     StatusMsg("running test mode...\n");
  1856     RunTestMode(fp);
  1857     StatusMsg("finished test mode\n");
  1858     fclose(fp);
  1859     exit(0);
  1862   if (gOptions->IsStressMode()) {
  1863     FILE* fp = OpenOutputFile("stress.dmd");
  1864     gIsDMDRunning = true;
  1866     StatusMsg("running stress mode...\n");
  1867     RunStressMode(fp);
  1868     StatusMsg("finished stress mode\n");
  1869     fclose(fp);
  1870     exit(0);
  1873   gIsDMDRunning = true;
  1876 //---------------------------------------------------------------------------
  1877 // DMD reporting and unreporting
  1878 //---------------------------------------------------------------------------
  1880 static void
  1881 ReportHelper(const void* aPtr, bool aReportedOnAlloc)
  1883   if (!gIsDMDRunning || !aPtr) {
  1884     return;
  1887   Thread* t = Thread::Fetch();
  1889   AutoBlockIntercepts block(t);
  1890   AutoLockState lock;
  1892   if (BlockTable::Ptr p = gBlockTable->lookup(aPtr)) {
  1893     p->Report(t, aReportedOnAlloc);
  1894   } else {
  1895     // We have no record of the block.  Do nothing.  Either:
  1896     // - We're sampling and we skipped this block.  This is likely.
  1897     // - It's a bogus pointer.  This is unlikely because Report() is almost
  1898     //   always called in conjunction with a malloc_size_of-style function.
  1902 MOZ_EXPORT void
  1903 Report(const void* aPtr)
  1905   ReportHelper(aPtr, /* onAlloc */ false);
  1908 MOZ_EXPORT void
  1909 ReportOnAlloc(const void* aPtr)
  1911   ReportHelper(aPtr, /* onAlloc */ true);
  1914 //---------------------------------------------------------------------------
  1915 // DMD output
  1916 //---------------------------------------------------------------------------
  1918 // This works for both TraceRecords and StackFrameRecords.
  1919 template <class Record>
  1920 static void
  1921 PrintSortedRecords(const Writer& aWriter, LocationService* aLocService,
  1922                    const char* aStr, const char* astr,
  1923                    const js::HashSet<Record, Record, InfallibleAllocPolicy>&
  1924                          aRecordTable,
  1925                    size_t aCategoryUsableSize, size_t aTotalUsableSize)
  1927   const char* kind = Record::kRecordKind;
  1928   StatusMsg("  creating and sorting %s stack %s record array...\n", astr, kind);
  1930   // Convert the table into a sorted array.
  1931   js::Vector<const Record*, 0, InfallibleAllocPolicy> recordArray;
  1932   recordArray.reserve(aRecordTable.count());
  1933   typedef js::HashSet<Record, Record, InfallibleAllocPolicy> RecordTable;
  1934   for (typename RecordTable::Range r = aRecordTable.all();
  1935        !r.empty();
  1936        r.popFront()) {
  1937     recordArray.infallibleAppend(&r.front());
  1939   qsort(recordArray.begin(), recordArray.length(), sizeof(recordArray[0]),
  1940         Record::QsortCmp);
  1942   WriteTitle("%s stack %s records\n", aStr, kind);
  1944   if (recordArray.length() == 0) {
  1945     W("(none)\n\n");
  1946     return;
  1949   StatusMsg("  printing %s stack %s record array...\n", astr, kind);
  1950   size_t cumulativeUsableSize = 0;
  1952   // Limit the number of records printed, because fix-linux-stack.pl is too
  1953   // damn slow.  Note that we don't break out of this loop because we need to
  1954   // keep adding to |cumulativeUsableSize|.
  1955   uint32_t numRecords = recordArray.length();
  1956   uint32_t maxRecords = gOptions->MaxRecords();
  1957   for (uint32_t i = 0; i < numRecords; i++) {
  1958     const Record* r = recordArray[i];
  1959     cumulativeUsableSize += r->GetRecordSize().Usable();
  1960     if (i < maxRecords) {
  1961       r->Print(aWriter, aLocService, i+1, numRecords, aStr, astr,
  1962                aCategoryUsableSize, cumulativeUsableSize, aTotalUsableSize);
  1963     } else if (i == maxRecords) {
  1964       W("%s: stopping after %s stack %s records\n\n", aStr,
  1965         Show(maxRecords, gBuf1, kBufLen), kind);
  1969   // This holds for TraceRecords, but not for FrameRecords.
  1970   MOZ_ASSERT_IF(!Record::recordsOverlap(),
  1971                 aCategoryUsableSize == cumulativeUsableSize);
  1974 static void
  1975 PrintSortedTraceAndFrameRecords(const Writer& aWriter,
  1976                                 LocationService* aLocService,
  1977                                 const char* aStr, const char* astr,
  1978                                 const TraceRecordTable& aTraceRecordTable,
  1979                                 size_t aCategoryUsableSize,
  1980                                 size_t aTotalUsableSize)
  1982   PrintSortedRecords(aWriter, aLocService, aStr, astr, aTraceRecordTable,
  1983                      aCategoryUsableSize, aTotalUsableSize);
  1985   FrameRecordTable frameRecordTable;
  1986   (void)frameRecordTable.init(2048);
  1987   for (TraceRecordTable::Range r = aTraceRecordTable.all();
  1988        !r.empty();
  1989        r.popFront()) {
  1990     const TraceRecord& tr = r.front();
  1991     const StackTrace* st = tr.mAllocStackTrace;
  1993     // A single PC can appear multiple times in a stack trace.  We ignore
  1994     // duplicates by first sorting and then ignoring adjacent duplicates.
  1995     StackTrace sorted(*st);
  1996     sorted.Sort();              // sorts the copy, not the original
  1997     void* prevPc = (void*)intptr_t(-1);
  1998     for (uint32_t i = 0; i < sorted.Length(); i++) {
  1999       void* pc = sorted.Pc(i);
  2000       if (pc == prevPc) {
  2001         continue;               // ignore duplicate
  2003       prevPc = pc;
  2005       FrameRecordTable::AddPtr p = frameRecordTable.lookupForAdd(pc);
  2006       if (!p) {
  2007         FrameRecord fr(pc);
  2008         (void)frameRecordTable.add(p, fr);
  2010       p->Add(tr);
  2014   PrintSortedRecords(aWriter, aLocService, aStr, astr, frameRecordTable,
  2015                      aCategoryUsableSize, aTotalUsableSize);
  2018 // Note that, unlike most SizeOf* functions, this function does not take a
  2019 // |mozilla::MallocSizeOf| argument.  That's because those arguments are
  2020 // primarily to aid DMD track heap blocks... but DMD deliberately doesn't track
  2021 // heap blocks it allocated for itself!
  2022 //
  2023 // SizeOfInternal should be called while you're holding the state lock and
  2024 // while intercepts are blocked; SizeOf acquires the lock and blocks
  2025 // intercepts.
  2027 static void
  2028 SizeOfInternal(Sizes* aSizes)
  2030   MOZ_ASSERT(gStateLock->IsLocked());
  2031   MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked());
  2033   aSizes->Clear();
  2035   if (!gIsDMDRunning) {
  2036     return;
  2039   StackTraceSet usedStackTraces;
  2040   GatherUsedStackTraces(usedStackTraces);
  2042   for (StackTraceTable::Range r = gStackTraceTable->all();
  2043        !r.empty();
  2044        r.popFront()) {
  2045     StackTrace* const& st = r.front();
  2047     if (usedStackTraces.has(st)) {
  2048       aSizes->mStackTracesUsed += MallocSizeOf(st);
  2049     } else {
  2050       aSizes->mStackTracesUnused += MallocSizeOf(st);
  2054   aSizes->mStackTraceTable =
  2055     gStackTraceTable->sizeOfIncludingThis(MallocSizeOf);
  2057   aSizes->mBlockTable = gBlockTable->sizeOfIncludingThis(MallocSizeOf);
  2060 MOZ_EXPORT void
  2061 SizeOf(Sizes* aSizes)
  2063   aSizes->Clear();
  2065   if (!gIsDMDRunning) {
  2066     return;
  2069   AutoBlockIntercepts block(Thread::Fetch());
  2070   AutoLockState lock;
  2071   SizeOfInternal(aSizes);
  2074 void
  2075 ClearReportsInternal()
  2077   MOZ_ASSERT(gStateLock->IsLocked());
  2079   // Unreport all blocks that were marked reported by a memory reporter.  This
  2080   // excludes those that were reported on allocation, because they need to keep
  2081   // their reported marking.
  2082   for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) {
  2083     r.front().UnreportIfNotReportedOnAlloc();
  2087 MOZ_EXPORT void
  2088 ClearReports()
  2090   if (!gIsDMDRunning) {
  2091     return;
  2094   AutoLockState lock;
  2095   ClearReportsInternal();
  2098 MOZ_EXPORT bool
  2099 IsEnabled()
  2101   return gIsDMDRunning;
  2104 MOZ_EXPORT void
  2105 Dump(Writer aWriter)
  2107   if (!gIsDMDRunning) {
  2108     const char* msg = "cannot Dump();  DMD was not enabled at startup\n";
  2109     StatusMsg("%s", msg);
  2110     W("%s", msg);
  2111     return;
  2114   AutoBlockIntercepts block(Thread::Fetch());
  2115   AutoLockState lock;
  2117   static int dumpCount = 1;
  2118   StatusMsg("Dump %d {\n", dumpCount++);
  2120   StatusMsg("  gathering stack trace records...\n");
  2122   TraceRecordTable unreportedTraceRecordTable;
  2123   (void)unreportedTraceRecordTable.init(1024);
  2124   size_t unreportedUsableSize = 0;
  2125   size_t unreportedNumBlocks = 0;
  2127   TraceRecordTable onceReportedTraceRecordTable;
  2128   (void)onceReportedTraceRecordTable.init(1024);
  2129   size_t onceReportedUsableSize = 0;
  2130   size_t onceReportedNumBlocks = 0;
  2132   TraceRecordTable twiceReportedTraceRecordTable;
  2133   (void)twiceReportedTraceRecordTable.init(0);
  2134   size_t twiceReportedUsableSize = 0;
  2135   size_t twiceReportedNumBlocks = 0;
  2137   bool anyBlocksSampled = false;
  2139   for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) {
  2140     const Block& b = r.front();
  2142     TraceRecordTable* table;
  2143     uint32_t numReports = b.NumReports();
  2144     if (numReports == 0) {
  2145       unreportedUsableSize += b.UsableSize();
  2146       unreportedNumBlocks++;
  2147       table = &unreportedTraceRecordTable;
  2148     } else if (numReports == 1) {
  2149       onceReportedUsableSize += b.UsableSize();
  2150       onceReportedNumBlocks++;
  2151       table = &onceReportedTraceRecordTable;
  2152     } else {
  2153       MOZ_ASSERT(numReports == 2);
  2154       twiceReportedUsableSize += b.UsableSize();
  2155       twiceReportedNumBlocks++;
  2156       table = &twiceReportedTraceRecordTable;
  2158     TraceRecordKey key(b);
  2159     TraceRecordTable::AddPtr p = table->lookupForAdd(key);
  2160     if (!p) {
  2161       TraceRecord tr(b);
  2162       (void)table->add(p, tr);
  2164     p->Add(b);
  2166     anyBlocksSampled = anyBlocksSampled || b.IsSampled();
  2168   size_t totalUsableSize =
  2169     unreportedUsableSize + onceReportedUsableSize + twiceReportedUsableSize;
  2170   size_t totalNumBlocks =
  2171     unreportedNumBlocks + onceReportedNumBlocks + twiceReportedNumBlocks;
  2173   WriteTitle("Invocation\n");
  2174   W("$DMD = '%s'\n", gOptions->DMDEnvVar());
  2175   W("Sample-below size = %lld\n\n",
  2176     (long long)(gOptions->SampleBelowSize()));
  2178   // Allocate this on the heap instead of the stack because it's fairly large.
  2179   LocationService* locService = InfallibleAllocPolicy::new_<LocationService>();
  2181   PrintSortedRecords(aWriter, locService, "Twice-reported", "twice-reported",
  2182                      twiceReportedTraceRecordTable, twiceReportedUsableSize,
  2183                      totalUsableSize);
  2185   PrintSortedTraceAndFrameRecords(aWriter, locService,
  2186                                   "Unreported", "unreported",
  2187                                   unreportedTraceRecordTable,
  2188                                   unreportedUsableSize, totalUsableSize);
  2190   PrintSortedTraceAndFrameRecords(aWriter, locService,
  2191                                  "Once-reported", "once-reported",
  2192                                  onceReportedTraceRecordTable,
  2193                                  onceReportedUsableSize, totalUsableSize);
  2195   bool showTilde = anyBlocksSampled;
  2196   WriteTitle("Summary\n");
  2198   W("Total:          %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
  2199     Show(totalUsableSize, gBuf1, kBufLen, showTilde),
  2200     100.0,
  2201     Show(totalNumBlocks,  gBuf2, kBufLen, showTilde),
  2202     100.0);
  2204   W("Unreported:     %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
  2205     Show(unreportedUsableSize, gBuf1, kBufLen, showTilde),
  2206     Percent(unreportedUsableSize, totalUsableSize),
  2207     Show(unreportedNumBlocks, gBuf2, kBufLen, showTilde),
  2208     Percent(unreportedNumBlocks, totalNumBlocks));
  2210   W("Once-reported:  %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
  2211     Show(onceReportedUsableSize, gBuf1, kBufLen, showTilde),
  2212     Percent(onceReportedUsableSize, totalUsableSize),
  2213     Show(onceReportedNumBlocks, gBuf2, kBufLen, showTilde),
  2214     Percent(onceReportedNumBlocks, totalNumBlocks));
  2216   W("Twice-reported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
  2217     Show(twiceReportedUsableSize, gBuf1, kBufLen, showTilde),
  2218     Percent(twiceReportedUsableSize, totalUsableSize),
  2219     Show(twiceReportedNumBlocks, gBuf2, kBufLen, showTilde),
  2220     Percent(twiceReportedNumBlocks, totalNumBlocks));
  2222   W("\n");
  2224   // Stats are non-deterministic, so don't show them in test mode.
  2225   if (!gOptions->IsTestMode()) {
  2226     Sizes sizes;
  2227     SizeOfInternal(&sizes);
  2229     WriteTitle("Execution measurements\n");
  2231     W("Data structures that persist after Dump() ends:\n");
  2233     W("  Used stack traces:    %10s bytes\n",
  2234       Show(sizes.mStackTracesUsed, gBuf1, kBufLen));
  2236     W("  Unused stack traces:  %10s bytes\n",
  2237       Show(sizes.mStackTracesUnused, gBuf1, kBufLen));
  2239     W("  Stack trace table:    %10s bytes (%s entries, %s used)\n",
  2240       Show(sizes.mStackTraceTable,       gBuf1, kBufLen),
  2241       Show(gStackTraceTable->capacity(), gBuf2, kBufLen),
  2242       Show(gStackTraceTable->count(),    gBuf3, kBufLen));
  2244     W("  Block table:          %10s bytes (%s entries, %s used)\n",
  2245       Show(sizes.mBlockTable,       gBuf1, kBufLen),
  2246       Show(gBlockTable->capacity(), gBuf2, kBufLen),
  2247       Show(gBlockTable->count(),    gBuf3, kBufLen));
  2249     W("\nData structures that are destroyed after Dump() ends:\n");
  2251     size_t unreportedSize =
  2252       unreportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf);
  2253     W("  Unreported table:     %10s bytes (%s entries, %s used)\n",
  2254       Show(unreportedSize,                        gBuf1, kBufLen),
  2255       Show(unreportedTraceRecordTable.capacity(), gBuf2, kBufLen),
  2256       Show(unreportedTraceRecordTable.count(),    gBuf3, kBufLen));
  2258     size_t onceReportedSize =
  2259       onceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf);
  2260     W("  Once-reported table:  %10s bytes (%s entries, %s used)\n",
  2261       Show(onceReportedSize,                        gBuf1, kBufLen),
  2262       Show(onceReportedTraceRecordTable.capacity(), gBuf2, kBufLen),
  2263       Show(onceReportedTraceRecordTable.count(),    gBuf3, kBufLen));
  2265     size_t twiceReportedSize =
  2266       twiceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf);
  2267     W("  Twice-reported table: %10s bytes (%s entries, %s used)\n",
  2268       Show(twiceReportedSize,                        gBuf1, kBufLen),
  2269       Show(twiceReportedTraceRecordTable.capacity(), gBuf2, kBufLen),
  2270       Show(twiceReportedTraceRecordTable.count(),    gBuf3, kBufLen));
  2272     W("  Location service:     %10s bytes\n",
  2273       Show(locService->SizeOfIncludingThis(), gBuf1, kBufLen));
  2275     W("\nCounts:\n");
  2277     size_t hits   = locService->NumCacheHits();
  2278     size_t misses = locService->NumCacheMisses();
  2279     size_t requests = hits + misses;
  2280     W("  Location service:    %10s requests\n",
  2281       Show(requests, gBuf1, kBufLen));
  2283     size_t count    = locService->CacheCount();
  2284     size_t capacity = locService->CacheCapacity();
  2285     W("  Location service cache:  %4.1f%% hit rate, %.1f%% occupancy at end\n",
  2286       Percent(hits, requests), Percent(count, capacity));
  2288     W("\n");
  2291   InfallibleAllocPolicy::delete_(locService);
  2293   ClearReportsInternal(); // Use internal version, we already have the lock.
  2295   StatusMsg("}\n");
  2298 //---------------------------------------------------------------------------
  2299 // Testing
  2300 //---------------------------------------------------------------------------
  2302 // This function checks that heap blocks that have the same stack trace but
  2303 // different (or no) reporters get aggregated separately.
  2304 void foo()
  2306    char* a[6];
  2307    for (int i = 0; i < 6; i++) {
  2308       a[i] = (char*) malloc(128 - 16*i);
  2311    for (int i = 0; i <= 1; i++)
  2312       Report(a[i]);                     // reported
  2313    Report(a[2]);                        // reported
  2314    Report(a[3]);                        // reported
  2315    // a[4], a[5] unreported
  2318 // This stops otherwise-unused variables from being optimized away.
  2319 static void
  2320 UseItOrLoseIt(void* a)
  2322   char buf[64];
  2323   sprintf(buf, "%p\n", a);
  2324   fwrite(buf, 1, strlen(buf) + 1, stderr);
  2327 // The output from this should be compared against test-expected.dmd.  It's
  2328 // been tested on Linux64, and probably will give different results on other
  2329 // platforms.
  2330 static void
  2331 RunTestMode(FILE* fp)
  2333   Writer writer(FpWrite, fp);
  2335   // The first part of this test requires sampling to be disabled.
  2336   gOptions->SetSampleBelowSize(1);
  2338   // Dump 1.  Zero for everything.
  2339   Dump(writer);
  2341   // Dump 2: 1 freed, 9 out of 10 unreported.
  2342   // Dump 3: still present and unreported.
  2343   int i;
  2344   char* a;
  2345   for (i = 0; i < 10; i++) {
  2346       a = (char*) malloc(100);
  2347       UseItOrLoseIt(a);
  2349   free(a);
  2351   // Min-sized block.
  2352   // Dump 2: reported.
  2353   // Dump 3: thrice-reported.
  2354   char* a2 = (char*) malloc(0);
  2355   Report(a2);
  2357   // Operator new[].
  2358   // Dump 2: reported.
  2359   // Dump 3: reportedness carries over, due to ReportOnAlloc.
  2360   char* b = new char[10];
  2361   ReportOnAlloc(b);
  2363   // ReportOnAlloc, then freed.
  2364   // Dump 2: freed, irrelevant.
  2365   // Dump 3: freed, irrelevant.
  2366   char* b2 = new char;
  2367   ReportOnAlloc(b2);
  2368   free(b2);
  2370   // Dump 2: reported 4 times.
  2371   // Dump 3: freed, irrelevant.
  2372   char* c = (char*) calloc(10, 3);
  2373   Report(c);
  2374   for (int i = 0; i < 3; i++) {
  2375     Report(c);
  2378   // Dump 2: ignored.
  2379   // Dump 3: irrelevant.
  2380   Report((void*)(intptr_t)i);
  2382   // jemalloc rounds this up to 8192.
  2383   // Dump 2: reported.
  2384   // Dump 3: freed.
  2385   char* e = (char*) malloc(4096);
  2386   e = (char*) realloc(e, 4097);
  2387   Report(e);
  2389   // First realloc is like malloc;  second realloc is shrinking.
  2390   // Dump 2: reported.
  2391   // Dump 3: re-reported.
  2392   char* e2 = (char*) realloc(nullptr, 1024);
  2393   e2 = (char*) realloc(e2, 512);
  2394   Report(e2);
  2396   // First realloc is like malloc;  second realloc creates a min-sized block.
  2397   // XXX: on Windows, second realloc frees the block.
  2398   // Dump 2: reported.
  2399   // Dump 3: freed, irrelevant.
  2400   char* e3 = (char*) realloc(nullptr, 1023);
  2401 //e3 = (char*) realloc(e3, 0);
  2402   MOZ_ASSERT(e3);
  2403   Report(e3);
  2405   // Dump 2: freed, irrelevant.
  2406   // Dump 3: freed, irrelevant.
  2407   char* f = (char*) malloc(64);
  2408   free(f);
  2410   // Dump 2: ignored.
  2411   // Dump 3: irrelevant.
  2412   Report((void*)(intptr_t)0x0);
  2414   // Dump 2: mixture of reported and unreported.
  2415   // Dump 3: all unreported.
  2416   foo();
  2417   foo();
  2419   // Dump 2: twice-reported.
  2420   // Dump 3: twice-reported.
  2421   char* g1 = (char*) malloc(77);
  2422   ReportOnAlloc(g1);
  2423   ReportOnAlloc(g1);
  2425   // Dump 2: twice-reported.
  2426   // Dump 3: once-reported.
  2427   char* g2 = (char*) malloc(78);
  2428   Report(g2);
  2429   ReportOnAlloc(g2);
  2431   // Dump 2: twice-reported.
  2432   // Dump 3: once-reported.
  2433   char* g3 = (char*) malloc(79);
  2434   ReportOnAlloc(g3);
  2435   Report(g3);
  2437   // All the odd-ball ones.
  2438   // Dump 2: all unreported.
  2439   // Dump 3: all freed, irrelevant.
  2440   // XXX: no memalign on Mac
  2441 //void* x = memalign(64, 65);           // rounds up to 128
  2442 //UseItOrLoseIt(x);
  2443   // XXX: posix_memalign doesn't work on B2G
  2444 //void* y;
  2445 //posix_memalign(&y, 128, 129);         // rounds up to 256
  2446 //UseItOrLoseIt(y);
  2447   // XXX: valloc doesn't work on Windows.
  2448 //void* z = valloc(1);                  // rounds up to 4096
  2449 //UseItOrLoseIt(z);
  2450 //aligned_alloc(64, 256);               // XXX: C11 only
  2452   // Dump 2.
  2453   Dump(writer);
  2455   //---------
  2457   Report(a2);
  2458   Report(a2);
  2459   free(c);
  2460   free(e);
  2461   Report(e2);
  2462   free(e3);
  2463 //free(x);
  2464 //free(y);
  2465 //free(z);
  2467   // Dump 3.
  2468   Dump(writer);
  2470   //---------
  2472   // Clear all knowledge of existing blocks to give us a clean slate.
  2473   gBlockTable->clear();
  2475   gOptions->SetSampleBelowSize(128);
  2477   char* s;
  2479   // This equals the sample size, and so is reported exactly.  It should be
  2480   // listed before records of the same size that are sampled.
  2481   s = (char*) malloc(128);
  2482   UseItOrLoseIt(s);
  2484   // This exceeds the sample size, and so is reported exactly.
  2485   s = (char*) malloc(144);
  2486   UseItOrLoseIt(s);
  2488   // These together constitute exactly one sample.
  2489   for (int i = 0; i < 16; i++) {
  2490     s = (char*) malloc(8);
  2491     UseItOrLoseIt(s);
  2493   MOZ_ASSERT(gSmallBlockActualSizeCounter == 0);
  2495   // These fall 8 bytes short of a full sample.
  2496   for (int i = 0; i < 15; i++) {
  2497     s = (char*) malloc(8);
  2498     UseItOrLoseIt(s);
  2500   MOZ_ASSERT(gSmallBlockActualSizeCounter == 120);
  2502   // This exceeds the sample size, and so is recorded exactly.
  2503   s = (char*) malloc(256);
  2504   UseItOrLoseIt(s);
  2505   MOZ_ASSERT(gSmallBlockActualSizeCounter == 120);
  2507   // This gets more than to a full sample from the |i < 15| loop above.
  2508   s = (char*) malloc(96);
  2509   UseItOrLoseIt(s);
  2510   MOZ_ASSERT(gSmallBlockActualSizeCounter == 88);
  2512   // This gets to another full sample.
  2513   for (int i = 0; i < 5; i++) {
  2514     s = (char*) malloc(8);
  2515     UseItOrLoseIt(s);
  2517   MOZ_ASSERT(gSmallBlockActualSizeCounter == 0);
  2519   // This allocates 16, 32, ..., 128 bytes, which results in a stack trace
  2520   // record that contains a mix of sample and non-sampled blocks, and so should
  2521   // be printed with '~' signs.
  2522   for (int i = 1; i <= 8; i++) {
  2523     s = (char*) malloc(i * 16);
  2524     UseItOrLoseIt(s);
  2526   MOZ_ASSERT(gSmallBlockActualSizeCounter == 64);
  2528   // At the end we're 64 bytes into the current sample so we report ~1,424
  2529   // bytes of allocation overall, which is 64 less than the real value 1,488.
  2531   // Dump 4.
  2532   Dump(writer);
  2535 //---------------------------------------------------------------------------
  2536 // Stress testing microbenchmark
  2537 //---------------------------------------------------------------------------
  2539 // This stops otherwise-unused variables from being optimized away.
  2540 static void
  2541 UseItOrLoseIt2(void* a)
  2543   if (a == (void*)0x42) {
  2544     printf("UseItOrLoseIt2\n");
  2548 MOZ_NEVER_INLINE static void
  2549 stress5()
  2551   for (int i = 0; i < 10; i++) {
  2552     void* x = malloc(64);
  2553     UseItOrLoseIt2(x);
  2554     if (i & 1) {
  2555       free(x);
  2560 MOZ_NEVER_INLINE static void
  2561 stress4()
  2563   stress5(); stress5(); stress5(); stress5(); stress5();
  2564   stress5(); stress5(); stress5(); stress5(); stress5();
  2567 MOZ_NEVER_INLINE static void
  2568 stress3()
  2570   for (int i = 0; i < 10; i++) {
  2571     stress4();
  2575 MOZ_NEVER_INLINE static void
  2576 stress2()
  2578   stress3(); stress3(); stress3(); stress3(); stress3();
  2579   stress3(); stress3(); stress3(); stress3(); stress3();
  2582 MOZ_NEVER_INLINE static void
  2583 stress1()
  2585   for (int i = 0; i < 10; i++) {
  2586     stress2();
  2590 // This stress test does lots of allocations and frees, which is where most of
  2591 // DMD's overhead occurs.  It allocates 1,000,000 64-byte blocks, spread evenly
  2592 // across 1,000 distinct stack traces.  It frees every second one immediately
  2593 // after allocating it.
  2594 //
  2595 // It's highly artificial, but it's deterministic and easy to run.  It can be
  2596 // timed under different conditions to glean performance data.
  2597 static void
  2598 RunStressMode(FILE* fp)
  2600   Writer writer(FpWrite, fp);
  2602   // Disable sampling for maximum stress.
  2603   gOptions->SetSampleBelowSize(1);
  2605   stress1(); stress1(); stress1(); stress1(); stress1();
  2606   stress1(); stress1(); stress1(); stress1(); stress1();
  2608   Dump(writer);
  2611 }   // namespace dmd
  2612 }   // namespace mozilla

mercurial