1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/memory/replace/dmd/DMD.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2612 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include "DMD.h" 1.11 + 1.12 +#include <ctype.h> 1.13 +#include <errno.h> 1.14 +#include <limits.h> 1.15 +#include <stdarg.h> 1.16 +#include <stdio.h> 1.17 +#include <stdlib.h> 1.18 +#include <string.h> 1.19 + 1.20 +#ifdef XP_WIN 1.21 +#if defined(MOZ_OPTIMIZE) && !defined(MOZ_PROFILING) 1.22 +#error "Optimized, DMD-enabled builds on Windows must be built with --enable-profiling" 1.23 +#endif 1.24 +#include <windows.h> 1.25 +#include <process.h> 1.26 +#else 1.27 +#include <unistd.h> 1.28 +#endif 1.29 + 1.30 +#ifdef ANDROID 1.31 +#include <android/log.h> 1.32 +#endif 1.33 + 1.34 +#include "nscore.h" 1.35 +#include "nsStackWalk.h" 1.36 + 1.37 +#include "js/HashTable.h" 1.38 +#include "js/Vector.h" 1.39 + 1.40 +#include "mozilla/Assertions.h" 1.41 +#include "mozilla/HashFunctions.h" 1.42 +#include "mozilla/Likely.h" 1.43 +#include "mozilla/MemoryReporting.h" 1.44 + 1.45 +// MOZ_REPLACE_ONLY_MEMALIGN saves us from having to define 1.46 +// replace_{posix_memalign,aligned_alloc,valloc}. It requires defining 1.47 +// PAGE_SIZE. Nb: sysconf() is expensive, but it's only used for (the obsolete 1.48 +// and rarely used) valloc. 1.49 +#define MOZ_REPLACE_ONLY_MEMALIGN 1 1.50 +#ifdef XP_WIN 1.51 +#define PAGE_SIZE GetPageSize() 1.52 +static long GetPageSize() 1.53 +{ 1.54 + SYSTEM_INFO si; 1.55 + GetSystemInfo(&si); 1.56 + return si.dwPageSize; 1.57 +} 1.58 +#else 1.59 +#define PAGE_SIZE sysconf(_SC_PAGESIZE) 1.60 +#endif 1.61 +#include "replace_malloc.h" 1.62 +#undef MOZ_REPLACE_ONLY_MEMALIGN 1.63 +#undef PAGE_SIZE 1.64 + 1.65 +namespace mozilla { 1.66 +namespace dmd { 1.67 + 1.68 +//--------------------------------------------------------------------------- 1.69 +// Utilities 1.70 +//--------------------------------------------------------------------------- 1.71 + 1.72 +#ifndef DISALLOW_COPY_AND_ASSIGN 1.73 +#define DISALLOW_COPY_AND_ASSIGN(T) \ 1.74 + T(const T&); \ 1.75 + void operator=(const T&) 1.76 +#endif 1.77 + 1.78 +static const malloc_table_t* gMallocTable = nullptr; 1.79 + 1.80 +// This enables/disables DMD. 1.81 +static bool gIsDMDRunning = false; 1.82 + 1.83 +// This provides infallible allocations (they abort on OOM). We use it for all 1.84 +// of DMD's own allocations, which fall into the following three cases. 1.85 +// - Direct allocations (the easy case). 1.86 +// - Indirect allocations in js::{Vector,HashSet,HashMap} -- this class serves 1.87 +// as their AllocPolicy. 1.88 +// - Other indirect allocations (e.g. NS_StackWalk) -- see the comments on 1.89 +// Thread::mBlockIntercepts and in replace_malloc for how these work. 1.90 +// 1.91 +class InfallibleAllocPolicy 1.92 +{ 1.93 + static void ExitOnFailure(const void* aP); 1.94 + 1.95 +public: 1.96 + static void* malloc_(size_t aSize) 1.97 + { 1.98 + void* p = gMallocTable->malloc(aSize); 1.99 + ExitOnFailure(p); 1.100 + return p; 1.101 + } 1.102 + 1.103 + static void* calloc_(size_t aSize) 1.104 + { 1.105 + void* p = gMallocTable->calloc(1, aSize); 1.106 + ExitOnFailure(p); 1.107 + return p; 1.108 + } 1.109 + 1.110 + // This realloc_ is the one we use for direct reallocs within DMD. 1.111 + static void* realloc_(void* aPtr, size_t aNewSize) 1.112 + { 1.113 + void* p = gMallocTable->realloc(aPtr, aNewSize); 1.114 + ExitOnFailure(p); 1.115 + return p; 1.116 + } 1.117 + 1.118 + // This realloc_ is required for this to be a JS container AllocPolicy. 1.119 + static void* realloc_(void* aPtr, size_t aOldSize, size_t aNewSize) 1.120 + { 1.121 + return InfallibleAllocPolicy::realloc_(aPtr, aNewSize); 1.122 + } 1.123 + 1.124 + static void* memalign_(size_t aAlignment, size_t aSize) 1.125 + { 1.126 + void* p = gMallocTable->memalign(aAlignment, aSize); 1.127 + ExitOnFailure(p); 1.128 + return p; 1.129 + } 1.130 + 1.131 + static void free_(void* aPtr) { gMallocTable->free(aPtr); } 1.132 + 1.133 + static char* strdup_(const char* aStr) 1.134 + { 1.135 + char* s = (char*) InfallibleAllocPolicy::malloc_(strlen(aStr) + 1); 1.136 + strcpy(s, aStr); 1.137 + return s; 1.138 + } 1.139 + 1.140 + template <class T> 1.141 + static T* new_() 1.142 + { 1.143 + void* mem = malloc_(sizeof(T)); 1.144 + ExitOnFailure(mem); 1.145 + return new (mem) T; 1.146 + } 1.147 + 1.148 + template <class T, typename P1> 1.149 + static T* new_(P1 p1) 1.150 + { 1.151 + void* mem = malloc_(sizeof(T)); 1.152 + ExitOnFailure(mem); 1.153 + return new (mem) T(p1); 1.154 + } 1.155 + 1.156 + template <class T> 1.157 + static void delete_(T *p) 1.158 + { 1.159 + if (p) { 1.160 + p->~T(); 1.161 + InfallibleAllocPolicy::free_(p); 1.162 + } 1.163 + } 1.164 + 1.165 + static void reportAllocOverflow() { ExitOnFailure(nullptr); } 1.166 +}; 1.167 + 1.168 +// This is only needed because of the |const void*| vs |void*| arg mismatch. 1.169 +static size_t 1.170 +MallocSizeOf(const void* aPtr) 1.171 +{ 1.172 + return gMallocTable->malloc_usable_size(const_cast<void*>(aPtr)); 1.173 +} 1.174 + 1.175 +static void 1.176 +StatusMsg(const char* aFmt, ...) 1.177 +{ 1.178 + va_list ap; 1.179 + va_start(ap, aFmt); 1.180 +#ifdef ANDROID 1.181 + __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, ap); 1.182 +#else 1.183 + // The +64 is easily enough for the "DMD[<pid>] " prefix and the NUL. 1.184 + char* fmt = (char*) InfallibleAllocPolicy::malloc_(strlen(aFmt) + 64); 1.185 + sprintf(fmt, "DMD[%d] %s", getpid(), aFmt); 1.186 + vfprintf(stderr, fmt, ap); 1.187 + InfallibleAllocPolicy::free_(fmt); 1.188 +#endif 1.189 + va_end(ap); 1.190 +} 1.191 + 1.192 +/* static */ void 1.193 +InfallibleAllocPolicy::ExitOnFailure(const void* aP) 1.194 +{ 1.195 + if (!aP) { 1.196 + StatusMsg("out of memory; aborting\n"); 1.197 + MOZ_CRASH(); 1.198 + } 1.199 +} 1.200 + 1.201 +void 1.202 +Writer::Write(const char* aFmt, ...) const 1.203 +{ 1.204 + va_list ap; 1.205 + va_start(ap, aFmt); 1.206 + mWriterFun(mWriteState, aFmt, ap); 1.207 + va_end(ap); 1.208 +} 1.209 + 1.210 +#define W(...) aWriter.Write(__VA_ARGS__); 1.211 + 1.212 +#define WriteTitle(...) \ 1.213 + W("------------------------------------------------------------------\n"); \ 1.214 + W(__VA_ARGS__); \ 1.215 + W("------------------------------------------------------------------\n\n"); 1.216 + 1.217 +MOZ_EXPORT void 1.218 +FpWrite(void* aWriteState, const char* aFmt, va_list aAp) 1.219 +{ 1.220 + FILE* fp = static_cast<FILE*>(aWriteState); 1.221 + vfprintf(fp, aFmt, aAp); 1.222 +} 1.223 + 1.224 +static double 1.225 +Percent(size_t part, size_t whole) 1.226 +{ 1.227 + return (whole == 0) ? 0 : 100 * (double)part / whole; 1.228 +} 1.229 + 1.230 +// Commifies the number and prepends a '~' if requested. Best used with 1.231 +// |kBufLen| and |gBuf[1234]|, because they should be big enough for any number 1.232 +// we'll see. 1.233 +static char* 1.234 +Show(size_t n, char* buf, size_t buflen, bool addTilde = false) 1.235 +{ 1.236 + int nc = 0, i = 0, lasti = buflen - 2; 1.237 + buf[lasti + 1] = '\0'; 1.238 + if (n == 0) { 1.239 + buf[lasti - i] = '0'; 1.240 + i++; 1.241 + } else { 1.242 + while (n > 0) { 1.243 + if (((i - nc) % 3) == 0 && i != 0) { 1.244 + buf[lasti - i] = ','; 1.245 + i++; 1.246 + nc++; 1.247 + } 1.248 + buf[lasti - i] = static_cast<char>((n % 10) + '0'); 1.249 + i++; 1.250 + n /= 10; 1.251 + } 1.252 + } 1.253 + int firstCharIndex = lasti - i + 1; 1.254 + 1.255 + if (addTilde) { 1.256 + firstCharIndex--; 1.257 + buf[firstCharIndex] = '~'; 1.258 + } 1.259 + 1.260 + MOZ_ASSERT(firstCharIndex >= 0); 1.261 + return &buf[firstCharIndex]; 1.262 +} 1.263 + 1.264 +static const char* 1.265 +Plural(size_t aN) 1.266 +{ 1.267 + return aN == 1 ? "" : "s"; 1.268 +} 1.269 + 1.270 +// Used by calls to Show(). 1.271 +static const size_t kBufLen = 64; 1.272 +static char gBuf1[kBufLen]; 1.273 +static char gBuf2[kBufLen]; 1.274 +static char gBuf3[kBufLen]; 1.275 +static char gBuf4[kBufLen]; 1.276 + 1.277 +//--------------------------------------------------------------------------- 1.278 +// Options (Part 1) 1.279 +//--------------------------------------------------------------------------- 1.280 + 1.281 +class Options 1.282 +{ 1.283 + template <typename T> 1.284 + struct NumOption 1.285 + { 1.286 + const T mDefault; 1.287 + const T mMax; 1.288 + T mActual; 1.289 + NumOption(T aDefault, T aMax) 1.290 + : mDefault(aDefault), mMax(aMax), mActual(aDefault) 1.291 + {} 1.292 + }; 1.293 + 1.294 + enum Mode { 1.295 + Normal, // run normally 1.296 + Test, // do some basic correctness tests 1.297 + Stress // do some performance stress tests 1.298 + }; 1.299 + 1.300 + char* mDMDEnvVar; // a saved copy, for printing during Dump() 1.301 + 1.302 + NumOption<size_t> mSampleBelowSize; 1.303 + NumOption<uint32_t> mMaxFrames; 1.304 + NumOption<uint32_t> mMaxRecords; 1.305 + Mode mMode; 1.306 + 1.307 + void BadArg(const char* aArg); 1.308 + static const char* ValueIfMatch(const char* aArg, const char* aOptionName); 1.309 + static bool GetLong(const char* aArg, const char* aOptionName, 1.310 + long aMin, long aMax, long* aN); 1.311 + 1.312 +public: 1.313 + Options(const char* aDMDEnvVar); 1.314 + 1.315 + const char* DMDEnvVar() const { return mDMDEnvVar; } 1.316 + 1.317 + size_t SampleBelowSize() const { return mSampleBelowSize.mActual; } 1.318 + size_t MaxFrames() const { return mMaxFrames.mActual; } 1.319 + size_t MaxRecords() const { return mMaxRecords.mActual; } 1.320 + 1.321 + void SetSampleBelowSize(size_t aN) { mSampleBelowSize.mActual = aN; } 1.322 + 1.323 + bool IsTestMode() const { return mMode == Test; } 1.324 + bool IsStressMode() const { return mMode == Stress; } 1.325 +}; 1.326 + 1.327 +static Options *gOptions; 1.328 + 1.329 +//--------------------------------------------------------------------------- 1.330 +// The global lock 1.331 +//--------------------------------------------------------------------------- 1.332 + 1.333 +// MutexBase implements the platform-specific parts of a mutex. 1.334 + 1.335 +#ifdef XP_WIN 1.336 + 1.337 +class MutexBase 1.338 +{ 1.339 + CRITICAL_SECTION mCS; 1.340 + 1.341 + DISALLOW_COPY_AND_ASSIGN(MutexBase); 1.342 + 1.343 +public: 1.344 + MutexBase() 1.345 + { 1.346 + InitializeCriticalSection(&mCS); 1.347 + } 1.348 + 1.349 + ~MutexBase() 1.350 + { 1.351 + DeleteCriticalSection(&mCS); 1.352 + } 1.353 + 1.354 + void Lock() 1.355 + { 1.356 + EnterCriticalSection(&mCS); 1.357 + } 1.358 + 1.359 + void Unlock() 1.360 + { 1.361 + LeaveCriticalSection(&mCS); 1.362 + } 1.363 +}; 1.364 + 1.365 +#else 1.366 + 1.367 +#include <pthread.h> 1.368 +#include <sys/types.h> 1.369 + 1.370 +class MutexBase 1.371 +{ 1.372 + pthread_mutex_t mMutex; 1.373 + 1.374 + DISALLOW_COPY_AND_ASSIGN(MutexBase); 1.375 + 1.376 +public: 1.377 + MutexBase() 1.378 + { 1.379 + pthread_mutex_init(&mMutex, nullptr); 1.380 + } 1.381 + 1.382 + void Lock() 1.383 + { 1.384 + pthread_mutex_lock(&mMutex); 1.385 + } 1.386 + 1.387 + void Unlock() 1.388 + { 1.389 + pthread_mutex_unlock(&mMutex); 1.390 + } 1.391 +}; 1.392 + 1.393 +#endif 1.394 + 1.395 +class Mutex : private MutexBase 1.396 +{ 1.397 + bool mIsLocked; 1.398 + 1.399 + DISALLOW_COPY_AND_ASSIGN(Mutex); 1.400 + 1.401 +public: 1.402 + Mutex() 1.403 + : mIsLocked(false) 1.404 + {} 1.405 + 1.406 + void Lock() 1.407 + { 1.408 + MutexBase::Lock(); 1.409 + MOZ_ASSERT(!mIsLocked); 1.410 + mIsLocked = true; 1.411 + } 1.412 + 1.413 + void Unlock() 1.414 + { 1.415 + MOZ_ASSERT(mIsLocked); 1.416 + mIsLocked = false; 1.417 + MutexBase::Unlock(); 1.418 + } 1.419 + 1.420 + bool IsLocked() 1.421 + { 1.422 + return mIsLocked; 1.423 + } 1.424 +}; 1.425 + 1.426 +// This lock must be held while manipulating global state, such as 1.427 +// gStackTraceTable, gBlockTable, etc. 1.428 +static Mutex* gStateLock = nullptr; 1.429 + 1.430 +class AutoLockState 1.431 +{ 1.432 + DISALLOW_COPY_AND_ASSIGN(AutoLockState); 1.433 + 1.434 +public: 1.435 + AutoLockState() 1.436 + { 1.437 + gStateLock->Lock(); 1.438 + } 1.439 + ~AutoLockState() 1.440 + { 1.441 + gStateLock->Unlock(); 1.442 + } 1.443 +}; 1.444 + 1.445 +class AutoUnlockState 1.446 +{ 1.447 + DISALLOW_COPY_AND_ASSIGN(AutoUnlockState); 1.448 + 1.449 +public: 1.450 + AutoUnlockState() 1.451 + { 1.452 + gStateLock->Unlock(); 1.453 + } 1.454 + ~AutoUnlockState() 1.455 + { 1.456 + gStateLock->Lock(); 1.457 + } 1.458 +}; 1.459 + 1.460 +//--------------------------------------------------------------------------- 1.461 +// Thread-local storage and blocking of intercepts 1.462 +//--------------------------------------------------------------------------- 1.463 + 1.464 +#ifdef XP_WIN 1.465 + 1.466 +#define DMD_TLS_INDEX_TYPE DWORD 1.467 +#define DMD_CREATE_TLS_INDEX(i_) do { \ 1.468 + (i_) = TlsAlloc(); \ 1.469 + } while (0) 1.470 +#define DMD_DESTROY_TLS_INDEX(i_) TlsFree((i_)) 1.471 +#define DMD_GET_TLS_DATA(i_) TlsGetValue((i_)) 1.472 +#define DMD_SET_TLS_DATA(i_, v_) TlsSetValue((i_), (v_)) 1.473 + 1.474 +#else 1.475 + 1.476 +#include <pthread.h> 1.477 + 1.478 +#define DMD_TLS_INDEX_TYPE pthread_key_t 1.479 +#define DMD_CREATE_TLS_INDEX(i_) pthread_key_create(&(i_), nullptr) 1.480 +#define DMD_DESTROY_TLS_INDEX(i_) pthread_key_delete((i_)) 1.481 +#define DMD_GET_TLS_DATA(i_) pthread_getspecific((i_)) 1.482 +#define DMD_SET_TLS_DATA(i_, v_) pthread_setspecific((i_), (v_)) 1.483 + 1.484 +#endif 1.485 + 1.486 +static DMD_TLS_INDEX_TYPE gTlsIndex; 1.487 + 1.488 +class Thread 1.489 +{ 1.490 + // Required for allocation via InfallibleAllocPolicy::new_. 1.491 + friend class InfallibleAllocPolicy; 1.492 + 1.493 + // When true, this blocks intercepts, which allows malloc interception 1.494 + // functions to themselves call malloc. (Nb: for direct calls to malloc we 1.495 + // can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes 1.496 + // indirectly call vanilla malloc via functions like NS_StackWalk.) 1.497 + bool mBlockIntercepts; 1.498 + 1.499 + Thread() 1.500 + : mBlockIntercepts(false) 1.501 + {} 1.502 + 1.503 + DISALLOW_COPY_AND_ASSIGN(Thread); 1.504 + 1.505 +public: 1.506 + static Thread* Fetch(); 1.507 + 1.508 + bool BlockIntercepts() 1.509 + { 1.510 + MOZ_ASSERT(!mBlockIntercepts); 1.511 + return mBlockIntercepts = true; 1.512 + } 1.513 + 1.514 + bool UnblockIntercepts() 1.515 + { 1.516 + MOZ_ASSERT(mBlockIntercepts); 1.517 + return mBlockIntercepts = false; 1.518 + } 1.519 + 1.520 + bool InterceptsAreBlocked() const 1.521 + { 1.522 + return mBlockIntercepts; 1.523 + } 1.524 +}; 1.525 + 1.526 +/* static */ Thread* 1.527 +Thread::Fetch() 1.528 +{ 1.529 + Thread* t = static_cast<Thread*>(DMD_GET_TLS_DATA(gTlsIndex)); 1.530 + 1.531 + if (MOZ_UNLIKELY(!t)) { 1.532 + // This memory is never freed, even if the thread dies. It's a leak, but 1.533 + // only a tiny one. 1.534 + t = InfallibleAllocPolicy::new_<Thread>(); 1.535 + DMD_SET_TLS_DATA(gTlsIndex, t); 1.536 + } 1.537 + 1.538 + return t; 1.539 +} 1.540 + 1.541 +// An object of this class must be created (on the stack) before running any 1.542 +// code that might allocate. 1.543 +class AutoBlockIntercepts 1.544 +{ 1.545 + Thread* const mT; 1.546 + 1.547 + DISALLOW_COPY_AND_ASSIGN(AutoBlockIntercepts); 1.548 + 1.549 +public: 1.550 + AutoBlockIntercepts(Thread* aT) 1.551 + : mT(aT) 1.552 + { 1.553 + mT->BlockIntercepts(); 1.554 + } 1.555 + ~AutoBlockIntercepts() 1.556 + { 1.557 + MOZ_ASSERT(mT->InterceptsAreBlocked()); 1.558 + mT->UnblockIntercepts(); 1.559 + } 1.560 +}; 1.561 + 1.562 +//--------------------------------------------------------------------------- 1.563 +// Location service 1.564 +//--------------------------------------------------------------------------- 1.565 + 1.566 +// This class is used to print details about code locations. 1.567 +class LocationService 1.568 +{ 1.569 + // WriteLocation() is the key function in this class. It's basically a 1.570 + // wrapper around NS_DescribeCodeAddress. 1.571 + // 1.572 + // However, NS_DescribeCodeAddress is very slow on some platforms, and we 1.573 + // have lots of repeated (i.e. same PC) calls to it. So we do some caching 1.574 + // of results. Each cached result includes two strings (|mFunction| and 1.575 + // |mLibrary|), so we also optimize them for space in the following ways. 1.576 + // 1.577 + // - The number of distinct library names is small, e.g. a few dozen. There 1.578 + // is lots of repetition, especially of libxul. So we intern them in their 1.579 + // own table, which saves space over duplicating them for each cache entry. 1.580 + // 1.581 + // - The number of distinct function names is much higher, so we duplicate 1.582 + // them in each cache entry. That's more space-efficient than interning 1.583 + // because entries containing single-occurrence function names are quickly 1.584 + // overwritten, and their copies released. In addition, empty function 1.585 + // names are common, so we use nullptr to represent them compactly. 1.586 + 1.587 + struct StringHasher 1.588 + { 1.589 + typedef const char* Lookup; 1.590 + 1.591 + static uint32_t hash(const char* const& aS) 1.592 + { 1.593 + return HashString(aS); 1.594 + } 1.595 + 1.596 + static bool match(const char* const& aA, const char* const& aB) 1.597 + { 1.598 + return strcmp(aA, aB) == 0; 1.599 + } 1.600 + }; 1.601 + 1.602 + typedef js::HashSet<const char*, StringHasher, InfallibleAllocPolicy> 1.603 + StringTable; 1.604 + 1.605 + StringTable mLibraryStrings; 1.606 + 1.607 + struct Entry 1.608 + { 1.609 + const void* mPc; 1.610 + char* mFunction; // owned by the Entry; may be null 1.611 + const char* mLibrary; // owned by mLibraryStrings; never null 1.612 + // in a non-empty entry is in use 1.613 + ptrdiff_t mLOffset; 1.614 + char* mFileName; // owned by the Entry; may be null 1.615 + uint32_t mLineNo:31; 1.616 + uint32_t mInUse:1; // is the entry used? 1.617 + 1.618 + Entry() 1.619 + : mPc(0), mFunction(nullptr), mLibrary(nullptr), mLOffset(0), mFileName(nullptr), mLineNo(0), mInUse(0) 1.620 + {} 1.621 + 1.622 + ~Entry() 1.623 + { 1.624 + // We don't free mLibrary because it's externally owned. 1.625 + InfallibleAllocPolicy::free_(mFunction); 1.626 + InfallibleAllocPolicy::free_(mFileName); 1.627 + } 1.628 + 1.629 + void Replace(const void* aPc, const char* aFunction, 1.630 + const char* aLibrary, ptrdiff_t aLOffset, 1.631 + const char* aFileName, unsigned long aLineNo) 1.632 + { 1.633 + mPc = aPc; 1.634 + 1.635 + // Convert "" to nullptr. Otherwise, make a copy of the name. 1.636 + InfallibleAllocPolicy::free_(mFunction); 1.637 + mFunction = 1.638 + !aFunction[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFunction); 1.639 + InfallibleAllocPolicy::free_(mFileName); 1.640 + mFileName = 1.641 + !aFileName[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFileName); 1.642 + 1.643 + 1.644 + mLibrary = aLibrary; 1.645 + mLOffset = aLOffset; 1.646 + mLineNo = aLineNo; 1.647 + 1.648 + mInUse = 1; 1.649 + } 1.650 + 1.651 + size_t SizeOfExcludingThis() { 1.652 + // Don't measure mLibrary because it's externally owned. 1.653 + return MallocSizeOf(mFunction) + MallocSizeOf(mFileName); 1.654 + } 1.655 + }; 1.656 + 1.657 + // A direct-mapped cache. When doing a dump just after starting desktop 1.658 + // Firefox (which is similar to dumping after a longer-running session, 1.659 + // thanks to the limit on how many records we dump), a cache with 2^24 1.660 + // entries (which approximates an infinite-entry cache) has a ~91% hit rate. 1.661 + // A cache with 2^12 entries has a ~83% hit rate, and takes up ~85 KiB (on 1.662 + // 32-bit platforms) or ~150 KiB (on 64-bit platforms). 1.663 + static const size_t kNumEntries = 1 << 12; 1.664 + static const size_t kMask = kNumEntries - 1; 1.665 + Entry mEntries[kNumEntries]; 1.666 + 1.667 + size_t mNumCacheHits; 1.668 + size_t mNumCacheMisses; 1.669 + 1.670 +public: 1.671 + LocationService() 1.672 + : mEntries(), mNumCacheHits(0), mNumCacheMisses(0) 1.673 + { 1.674 + (void)mLibraryStrings.init(64); 1.675 + } 1.676 + 1.677 + void WriteLocation(const Writer& aWriter, const void* aPc) 1.678 + { 1.679 + MOZ_ASSERT(gStateLock->IsLocked()); 1.680 + 1.681 + uint32_t index = HashGeneric(aPc) & kMask; 1.682 + MOZ_ASSERT(index < kNumEntries); 1.683 + Entry& entry = mEntries[index]; 1.684 + 1.685 + if (!entry.mInUse || entry.mPc != aPc) { 1.686 + mNumCacheMisses++; 1.687 + 1.688 + // NS_DescribeCodeAddress can (on Linux) acquire a lock inside 1.689 + // the shared library loader. Another thread might call malloc 1.690 + // while holding that lock (when loading a shared library). So 1.691 + // we have to exit gStateLock around this call. For details, see 1.692 + // https://bugzilla.mozilla.org/show_bug.cgi?id=363334#c3 1.693 + nsCodeAddressDetails details; 1.694 + { 1.695 + AutoUnlockState unlock; 1.696 + (void)NS_DescribeCodeAddress(const_cast<void*>(aPc), &details); 1.697 + } 1.698 + 1.699 + // Intern the library name. 1.700 + const char* library = nullptr; 1.701 + StringTable::AddPtr p = mLibraryStrings.lookupForAdd(details.library); 1.702 + if (!p) { 1.703 + library = InfallibleAllocPolicy::strdup_(details.library); 1.704 + (void)mLibraryStrings.add(p, library); 1.705 + } else { 1.706 + library = *p; 1.707 + } 1.708 + 1.709 + entry.Replace(aPc, details.function, library, details.loffset, details.filename, details.lineno); 1.710 + 1.711 + } else { 1.712 + mNumCacheHits++; 1.713 + } 1.714 + 1.715 + MOZ_ASSERT(entry.mPc == aPc); 1.716 + 1.717 + uintptr_t entryPc = (uintptr_t)(entry.mPc); 1.718 + // Sometimes we get nothing useful. Just print "???" for the entire entry 1.719 + // so that fix-linux-stack.pl doesn't complain about an empty filename. 1.720 + if (!entry.mFunction && !entry.mLibrary[0] && entry.mLOffset == 0) { 1.721 + W(" ??? 0x%x\n", entryPc); 1.722 + } else { 1.723 + // Use "???" for unknown functions. 1.724 + const char* entryFunction = entry.mFunction ? entry.mFunction : "???"; 1.725 + if (entry.mFileName) { 1.726 + // On Windows we can get the filename and line number at runtime. 1.727 + W(" %s (%s:%lu) 0x%x\n", 1.728 + entryFunction, entry.mFileName, entry.mLineNo, entryPc); 1.729 + } else { 1.730 + // On Linux and Mac we cannot get the filename and line number at 1.731 + // runtime, so we print the offset in a form that fix-linux-stack.pl and 1.732 + // fix_macosx_stack.py can post-process. 1.733 + W(" %s[%s +0x%X] 0x%x\n", 1.734 + entryFunction, entry.mLibrary, entry.mLOffset, entryPc); 1.735 + } 1.736 + } 1.737 + } 1.738 + 1.739 + size_t SizeOfIncludingThis() 1.740 + { 1.741 + size_t n = MallocSizeOf(this); 1.742 + for (uint32_t i = 0; i < kNumEntries; i++) { 1.743 + n += mEntries[i].SizeOfExcludingThis(); 1.744 + } 1.745 + 1.746 + n += mLibraryStrings.sizeOfExcludingThis(MallocSizeOf); 1.747 + for (StringTable::Range r = mLibraryStrings.all(); 1.748 + !r.empty(); 1.749 + r.popFront()) { 1.750 + n += MallocSizeOf(r.front()); 1.751 + } 1.752 + 1.753 + return n; 1.754 + } 1.755 + 1.756 + size_t CacheCapacity() const { return kNumEntries; } 1.757 + 1.758 + size_t CacheCount() const 1.759 + { 1.760 + size_t n = 0; 1.761 + for (size_t i = 0; i < kNumEntries; i++) { 1.762 + if (mEntries[i].mInUse) { 1.763 + n++; 1.764 + } 1.765 + } 1.766 + return n; 1.767 + } 1.768 + 1.769 + size_t NumCacheHits() const { return mNumCacheHits; } 1.770 + size_t NumCacheMisses() const { return mNumCacheMisses; } 1.771 +}; 1.772 + 1.773 +//--------------------------------------------------------------------------- 1.774 +// Stack traces 1.775 +//--------------------------------------------------------------------------- 1.776 + 1.777 +class StackTrace 1.778 +{ 1.779 +public: 1.780 + static const uint32_t MaxFrames = 24; 1.781 + 1.782 +private: 1.783 + uint32_t mLength; // The number of PCs. 1.784 + void* mPcs[MaxFrames]; // The PCs themselves. If --max-frames is less 1.785 + // than 24, this array is bigger than necessary, 1.786 + // but that case is unusual. 1.787 + 1.788 +public: 1.789 + StackTrace() : mLength(0) {} 1.790 + 1.791 + uint32_t Length() const { return mLength; } 1.792 + void* Pc(uint32_t i) const { MOZ_ASSERT(i < mLength); return mPcs[i]; } 1.793 + 1.794 + uint32_t Size() const { return mLength * sizeof(mPcs[0]); } 1.795 + 1.796 + // The stack trace returned by this function is interned in gStackTraceTable, 1.797 + // and so is immortal and unmovable. 1.798 + static const StackTrace* Get(Thread* aT); 1.799 + 1.800 + void Sort() 1.801 + { 1.802 + qsort(mPcs, mLength, sizeof(mPcs[0]), StackTrace::QsortCmp); 1.803 + } 1.804 + 1.805 + void Print(const Writer& aWriter, LocationService* aLocService) const; 1.806 + 1.807 + // Hash policy. 1.808 + 1.809 + typedef StackTrace* Lookup; 1.810 + 1.811 + static uint32_t hash(const StackTrace* const& aSt) 1.812 + { 1.813 + return mozilla::HashBytes(aSt->mPcs, aSt->Size()); 1.814 + } 1.815 + 1.816 + static bool match(const StackTrace* const& aA, 1.817 + const StackTrace* const& aB) 1.818 + { 1.819 + return aA->mLength == aB->mLength && 1.820 + memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0; 1.821 + } 1.822 + 1.823 +private: 1.824 + static void StackWalkCallback(void* aPc, void* aSp, void* aClosure) 1.825 + { 1.826 + StackTrace* st = (StackTrace*) aClosure; 1.827 + MOZ_ASSERT(st->mLength < MaxFrames); 1.828 + st->mPcs[st->mLength] = aPc; 1.829 + st->mLength++; 1.830 + } 1.831 + 1.832 + static int QsortCmp(const void* aA, const void* aB) 1.833 + { 1.834 + const void* const a = *static_cast<const void* const*>(aA); 1.835 + const void* const b = *static_cast<const void* const*>(aB); 1.836 + if (a < b) return -1; 1.837 + if (a > b) return 1; 1.838 + return 0; 1.839 + } 1.840 +}; 1.841 + 1.842 +typedef js::HashSet<StackTrace*, StackTrace, InfallibleAllocPolicy> 1.843 + StackTraceTable; 1.844 +static StackTraceTable* gStackTraceTable = nullptr; 1.845 + 1.846 +// We won't GC the stack trace table until it this many elements. 1.847 +static uint32_t gGCStackTraceTableWhenSizeExceeds = 4 * 1024; 1.848 + 1.849 +void 1.850 +StackTrace::Print(const Writer& aWriter, LocationService* aLocService) const 1.851 +{ 1.852 + if (mLength == 0) { 1.853 + W(" (empty)\n"); // StackTrace::Get() must have failed 1.854 + return; 1.855 + } 1.856 + 1.857 + for (uint32_t i = 0; i < mLength; i++) { 1.858 + aLocService->WriteLocation(aWriter, Pc(i)); 1.859 + } 1.860 +} 1.861 + 1.862 +/* static */ const StackTrace* 1.863 +StackTrace::Get(Thread* aT) 1.864 +{ 1.865 + MOZ_ASSERT(gStateLock->IsLocked()); 1.866 + MOZ_ASSERT(aT->InterceptsAreBlocked()); 1.867 + 1.868 + // On Windows, NS_StackWalk can acquire a lock from the shared library 1.869 + // loader. Another thread might call malloc while holding that lock (when 1.870 + // loading a shared library). So we can't be in gStateLock during the call 1.871 + // to NS_StackWalk. For details, see 1.872 + // https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8 1.873 + // On Linux, something similar can happen; see bug 824340. 1.874 + // So let's just release it on all platforms. 1.875 + nsresult rv; 1.876 + StackTrace tmp; 1.877 + { 1.878 + AutoUnlockState unlock; 1.879 + uint32_t skipFrames = 2; 1.880 + rv = NS_StackWalk(StackWalkCallback, skipFrames, 1.881 + gOptions->MaxFrames(), &tmp, 0, nullptr); 1.882 + } 1.883 + 1.884 + if (rv == NS_OK) { 1.885 + // Handle the common case first. All is ok. Nothing to do. 1.886 + } else if (rv == NS_ERROR_NOT_IMPLEMENTED || rv == NS_ERROR_FAILURE) { 1.887 + tmp.mLength = 0; 1.888 + } else if (rv == NS_ERROR_UNEXPECTED) { 1.889 + // XXX: This |rv| only happens on Mac, and it indicates that we're handling 1.890 + // a call to malloc that happened inside a mutex-handling function. Any 1.891 + // attempt to create a semaphore (which can happen in printf) could 1.892 + // deadlock. 1.893 + // 1.894 + // However, the most complex thing DMD does after Get() returns is to put 1.895 + // something in a hash table, which might call 1.896 + // InfallibleAllocPolicy::malloc_. I'm not yet sure if this needs special 1.897 + // handling, hence the forced abort. Sorry. If you hit this, please file 1.898 + // a bug and CC nnethercote. 1.899 + MOZ_CRASH(); 1.900 + } else { 1.901 + MOZ_CRASH(); // should be impossible 1.902 + } 1.903 + 1.904 + StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp); 1.905 + if (!p) { 1.906 + StackTrace* stnew = InfallibleAllocPolicy::new_<StackTrace>(tmp); 1.907 + (void)gStackTraceTable->add(p, stnew); 1.908 + } 1.909 + return *p; 1.910 +} 1.911 + 1.912 +//--------------------------------------------------------------------------- 1.913 +// Heap blocks 1.914 +//--------------------------------------------------------------------------- 1.915 + 1.916 +// This class combines a 2-byte-aligned pointer (i.e. one whose bottom bit 1.917 +// is zero) with a 1-bit tag. 1.918 +// 1.919 +// |T| is the pointer type, e.g. |int*|, not the pointed-to type. This makes 1.920 +// is easier to have const pointers, e.g. |TaggedPtr<const int*>|. 1.921 +template <typename T> 1.922 +class TaggedPtr 1.923 +{ 1.924 + union 1.925 + { 1.926 + T mPtr; 1.927 + uintptr_t mUint; 1.928 + }; 1.929 + 1.930 + static const uintptr_t kTagMask = uintptr_t(0x1); 1.931 + static const uintptr_t kPtrMask = ~kTagMask; 1.932 + 1.933 + static bool IsTwoByteAligned(T aPtr) 1.934 + { 1.935 + return (uintptr_t(aPtr) & kTagMask) == 0; 1.936 + } 1.937 + 1.938 +public: 1.939 + TaggedPtr() 1.940 + : mPtr(nullptr) 1.941 + {} 1.942 + 1.943 + TaggedPtr(T aPtr, bool aBool) 1.944 + : mPtr(aPtr) 1.945 + { 1.946 + MOZ_ASSERT(IsTwoByteAligned(aPtr)); 1.947 + uintptr_t tag = uintptr_t(aBool); 1.948 + MOZ_ASSERT(tag <= kTagMask); 1.949 + mUint |= (tag & kTagMask); 1.950 + } 1.951 + 1.952 + void Set(T aPtr, bool aBool) 1.953 + { 1.954 + MOZ_ASSERT(IsTwoByteAligned(aPtr)); 1.955 + mPtr = aPtr; 1.956 + uintptr_t tag = uintptr_t(aBool); 1.957 + MOZ_ASSERT(tag <= kTagMask); 1.958 + mUint |= (tag & kTagMask); 1.959 + } 1.960 + 1.961 + T Ptr() const { return reinterpret_cast<T>(mUint & kPtrMask); } 1.962 + 1.963 + bool Tag() const { return bool(mUint & kTagMask); } 1.964 +}; 1.965 + 1.966 +// A live heap block. 1.967 +class Block 1.968 +{ 1.969 + const void* mPtr; 1.970 + const size_t mReqSize; // size requested 1.971 + 1.972 + // Ptr: |mAllocStackTrace| - stack trace where this block was allocated. 1.973 + // Tag bit 0: |mSampled| - was this block sampled? (if so, slop == 0). 1.974 + TaggedPtr<const StackTrace* const> 1.975 + mAllocStackTrace_mSampled; 1.976 + 1.977 + // This array has two elements because we record at most two reports of a 1.978 + // block. 1.979 + // - Ptr: |mReportStackTrace| - stack trace where this block was reported. 1.980 + // nullptr if not reported. 1.981 + // - Tag bit 0: |mReportedOnAlloc| - was the block reported immediately on 1.982 + // allocation? If so, DMD must not clear the report at the end of Dump(). 1.983 + // Only relevant if |mReportStackTrace| is non-nullptr. 1.984 + // 1.985 + // |mPtr| is used as the key in BlockTable, so it's ok for this member 1.986 + // to be |mutable|. 1.987 + mutable TaggedPtr<const StackTrace*> mReportStackTrace_mReportedOnAlloc[2]; 1.988 + 1.989 +public: 1.990 + Block(const void* aPtr, size_t aReqSize, const StackTrace* aAllocStackTrace, 1.991 + bool aSampled) 1.992 + : mPtr(aPtr), 1.993 + mReqSize(aReqSize), 1.994 + mAllocStackTrace_mSampled(aAllocStackTrace, aSampled), 1.995 + mReportStackTrace_mReportedOnAlloc() // all fields get zeroed 1.996 + { 1.997 + MOZ_ASSERT(aAllocStackTrace); 1.998 + } 1.999 + 1.1000 + size_t ReqSize() const { return mReqSize; } 1.1001 + 1.1002 + // Sampled blocks always have zero slop. 1.1003 + size_t SlopSize() const 1.1004 + { 1.1005 + return IsSampled() ? 0 : MallocSizeOf(mPtr) - mReqSize; 1.1006 + } 1.1007 + 1.1008 + size_t UsableSize() const 1.1009 + { 1.1010 + return IsSampled() ? mReqSize : MallocSizeOf(mPtr); 1.1011 + } 1.1012 + 1.1013 + bool IsSampled() const 1.1014 + { 1.1015 + return mAllocStackTrace_mSampled.Tag(); 1.1016 + } 1.1017 + 1.1018 + const StackTrace* AllocStackTrace() const 1.1019 + { 1.1020 + return mAllocStackTrace_mSampled.Ptr(); 1.1021 + } 1.1022 + 1.1023 + const StackTrace* ReportStackTrace1() const { 1.1024 + return mReportStackTrace_mReportedOnAlloc[0].Ptr(); 1.1025 + } 1.1026 + 1.1027 + const StackTrace* ReportStackTrace2() const { 1.1028 + return mReportStackTrace_mReportedOnAlloc[1].Ptr(); 1.1029 + } 1.1030 + 1.1031 + bool ReportedOnAlloc1() const { 1.1032 + return mReportStackTrace_mReportedOnAlloc[0].Tag(); 1.1033 + } 1.1034 + 1.1035 + bool ReportedOnAlloc2() const { 1.1036 + return mReportStackTrace_mReportedOnAlloc[1].Tag(); 1.1037 + } 1.1038 + 1.1039 + uint32_t NumReports() const { 1.1040 + if (ReportStackTrace2()) { 1.1041 + MOZ_ASSERT(ReportStackTrace1()); 1.1042 + return 2; 1.1043 + } 1.1044 + if (ReportStackTrace1()) { 1.1045 + return 1; 1.1046 + } 1.1047 + return 0; 1.1048 + } 1.1049 + 1.1050 + // This is |const| thanks to the |mutable| fields above. 1.1051 + void Report(Thread* aT, bool aReportedOnAlloc) const 1.1052 + { 1.1053 + // We don't bother recording reports after the 2nd one. 1.1054 + uint32_t numReports = NumReports(); 1.1055 + if (numReports < 2) { 1.1056 + mReportStackTrace_mReportedOnAlloc[numReports].Set(StackTrace::Get(aT), 1.1057 + aReportedOnAlloc); 1.1058 + } 1.1059 + } 1.1060 + 1.1061 + void UnreportIfNotReportedOnAlloc() const 1.1062 + { 1.1063 + if (!ReportedOnAlloc1() && !ReportedOnAlloc2()) { 1.1064 + mReportStackTrace_mReportedOnAlloc[0].Set(nullptr, 0); 1.1065 + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); 1.1066 + 1.1067 + } else if (!ReportedOnAlloc1() && ReportedOnAlloc2()) { 1.1068 + // Shift the 2nd report down to the 1st one. 1.1069 + mReportStackTrace_mReportedOnAlloc[0] = 1.1070 + mReportStackTrace_mReportedOnAlloc[1]; 1.1071 + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); 1.1072 + 1.1073 + } else if (ReportedOnAlloc1() && !ReportedOnAlloc2()) { 1.1074 + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); 1.1075 + } 1.1076 + } 1.1077 + 1.1078 + // Hash policy. 1.1079 + 1.1080 + typedef const void* Lookup; 1.1081 + 1.1082 + static uint32_t hash(const void* const& aPtr) 1.1083 + { 1.1084 + return mozilla::HashGeneric(aPtr); 1.1085 + } 1.1086 + 1.1087 + static bool match(const Block& aB, const void* const& aPtr) 1.1088 + { 1.1089 + return aB.mPtr == aPtr; 1.1090 + } 1.1091 +}; 1.1092 + 1.1093 +typedef js::HashSet<Block, Block, InfallibleAllocPolicy> BlockTable; 1.1094 +static BlockTable* gBlockTable = nullptr; 1.1095 + 1.1096 +typedef js::HashSet<const StackTrace*, js::DefaultHasher<const StackTrace*>, 1.1097 + InfallibleAllocPolicy> 1.1098 + StackTraceSet; 1.1099 + 1.1100 +// Add a pointer to each live stack trace into the given StackTraceSet. (A 1.1101 +// stack trace is live if it's used by one of the live blocks.) 1.1102 +static void 1.1103 +GatherUsedStackTraces(StackTraceSet& aStackTraces) 1.1104 +{ 1.1105 + MOZ_ASSERT(gStateLock->IsLocked()); 1.1106 + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); 1.1107 + 1.1108 + aStackTraces.finish(); 1.1109 + aStackTraces.init(1024); 1.1110 + 1.1111 + for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) { 1.1112 + const Block& b = r.front(); 1.1113 + aStackTraces.put(b.AllocStackTrace()); 1.1114 + aStackTraces.put(b.ReportStackTrace1()); 1.1115 + aStackTraces.put(b.ReportStackTrace2()); 1.1116 + } 1.1117 + 1.1118 + // Any of the stack traces added above may have been null. For the sake of 1.1119 + // cleanliness, don't leave the null pointer in the set. 1.1120 + aStackTraces.remove(nullptr); 1.1121 +} 1.1122 + 1.1123 +// Delete stack traces that we aren't using, and compact our hashtable. 1.1124 +static void 1.1125 +GCStackTraces() 1.1126 +{ 1.1127 + MOZ_ASSERT(gStateLock->IsLocked()); 1.1128 + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); 1.1129 + 1.1130 + StackTraceSet usedStackTraces; 1.1131 + GatherUsedStackTraces(usedStackTraces); 1.1132 + 1.1133 + // Delete all unused stack traces from gStackTraceTable. The Enum destructor 1.1134 + // will automatically rehash and compact the table. 1.1135 + for (StackTraceTable::Enum e(*gStackTraceTable); 1.1136 + !e.empty(); 1.1137 + e.popFront()) { 1.1138 + StackTrace* const& st = e.front(); 1.1139 + 1.1140 + if (!usedStackTraces.has(st)) { 1.1141 + e.removeFront(); 1.1142 + InfallibleAllocPolicy::delete_(st); 1.1143 + } 1.1144 + } 1.1145 + 1.1146 + // Schedule a GC when we have twice as many stack traces as we had right after 1.1147 + // this GC finished. 1.1148 + gGCStackTraceTableWhenSizeExceeds = 2 * gStackTraceTable->count(); 1.1149 +} 1.1150 + 1.1151 +//--------------------------------------------------------------------------- 1.1152 +// malloc/free callbacks 1.1153 +//--------------------------------------------------------------------------- 1.1154 + 1.1155 +static size_t gSmallBlockActualSizeCounter = 0; 1.1156 + 1.1157 +static void 1.1158 +AllocCallback(void* aPtr, size_t aReqSize, Thread* aT) 1.1159 +{ 1.1160 + MOZ_ASSERT(gIsDMDRunning); 1.1161 + 1.1162 + if (!aPtr) { 1.1163 + return; 1.1164 + } 1.1165 + 1.1166 + AutoLockState lock; 1.1167 + AutoBlockIntercepts block(aT); 1.1168 + 1.1169 + size_t actualSize = gMallocTable->malloc_usable_size(aPtr); 1.1170 + size_t sampleBelowSize = gOptions->SampleBelowSize(); 1.1171 + 1.1172 + if (actualSize < sampleBelowSize) { 1.1173 + // If this allocation is smaller than the sample-below size, increment the 1.1174 + // cumulative counter. Then, if that counter now exceeds the sample size, 1.1175 + // blame this allocation for |sampleBelowSize| bytes. This precludes the 1.1176 + // measurement of slop. 1.1177 + gSmallBlockActualSizeCounter += actualSize; 1.1178 + if (gSmallBlockActualSizeCounter >= sampleBelowSize) { 1.1179 + gSmallBlockActualSizeCounter -= sampleBelowSize; 1.1180 + 1.1181 + Block b(aPtr, sampleBelowSize, StackTrace::Get(aT), /* sampled */ true); 1.1182 + (void)gBlockTable->putNew(aPtr, b); 1.1183 + } 1.1184 + } else { 1.1185 + // If this block size is larger than the sample size, record it exactly. 1.1186 + Block b(aPtr, aReqSize, StackTrace::Get(aT), /* sampled */ false); 1.1187 + (void)gBlockTable->putNew(aPtr, b); 1.1188 + } 1.1189 +} 1.1190 + 1.1191 +static void 1.1192 +FreeCallback(void* aPtr, Thread* aT) 1.1193 +{ 1.1194 + MOZ_ASSERT(gIsDMDRunning); 1.1195 + 1.1196 + if (!aPtr) { 1.1197 + return; 1.1198 + } 1.1199 + 1.1200 + AutoLockState lock; 1.1201 + AutoBlockIntercepts block(aT); 1.1202 + 1.1203 + gBlockTable->remove(aPtr); 1.1204 + 1.1205 + if (gStackTraceTable->count() > gGCStackTraceTableWhenSizeExceeds) { 1.1206 + GCStackTraces(); 1.1207 + } 1.1208 +} 1.1209 + 1.1210 +//--------------------------------------------------------------------------- 1.1211 +// malloc/free interception 1.1212 +//--------------------------------------------------------------------------- 1.1213 + 1.1214 +static void Init(const malloc_table_t* aMallocTable); 1.1215 + 1.1216 +} // namespace dmd 1.1217 +} // namespace mozilla 1.1218 + 1.1219 +void 1.1220 +replace_init(const malloc_table_t* aMallocTable) 1.1221 +{ 1.1222 + mozilla::dmd::Init(aMallocTable); 1.1223 +} 1.1224 + 1.1225 +void* 1.1226 +replace_malloc(size_t aSize) 1.1227 +{ 1.1228 + using namespace mozilla::dmd; 1.1229 + 1.1230 + if (!gIsDMDRunning) { 1.1231 + // DMD hasn't started up, either because it wasn't enabled by the user, or 1.1232 + // we're still in Init() and something has indirectly called malloc. Do a 1.1233 + // vanilla malloc. (In the latter case, if it fails we'll crash. But 1.1234 + // OOM is highly unlikely so early on.) 1.1235 + return gMallocTable->malloc(aSize); 1.1236 + } 1.1237 + 1.1238 + Thread* t = Thread::Fetch(); 1.1239 + if (t->InterceptsAreBlocked()) { 1.1240 + // Intercepts are blocked, which means this must be a call to malloc 1.1241 + // triggered indirectly by DMD (e.g. via NS_StackWalk). Be infallible. 1.1242 + return InfallibleAllocPolicy::malloc_(aSize); 1.1243 + } 1.1244 + 1.1245 + // This must be a call to malloc from outside DMD. Intercept it. 1.1246 + void* ptr = gMallocTable->malloc(aSize); 1.1247 + AllocCallback(ptr, aSize, t); 1.1248 + return ptr; 1.1249 +} 1.1250 + 1.1251 +void* 1.1252 +replace_calloc(size_t aCount, size_t aSize) 1.1253 +{ 1.1254 + using namespace mozilla::dmd; 1.1255 + 1.1256 + if (!gIsDMDRunning) { 1.1257 + return gMallocTable->calloc(aCount, aSize); 1.1258 + } 1.1259 + 1.1260 + Thread* t = Thread::Fetch(); 1.1261 + if (t->InterceptsAreBlocked()) { 1.1262 + return InfallibleAllocPolicy::calloc_(aCount * aSize); 1.1263 + } 1.1264 + 1.1265 + void* ptr = gMallocTable->calloc(aCount, aSize); 1.1266 + AllocCallback(ptr, aCount * aSize, t); 1.1267 + return ptr; 1.1268 +} 1.1269 + 1.1270 +void* 1.1271 +replace_realloc(void* aOldPtr, size_t aSize) 1.1272 +{ 1.1273 + using namespace mozilla::dmd; 1.1274 + 1.1275 + if (!gIsDMDRunning) { 1.1276 + return gMallocTable->realloc(aOldPtr, aSize); 1.1277 + } 1.1278 + 1.1279 + Thread* t = Thread::Fetch(); 1.1280 + if (t->InterceptsAreBlocked()) { 1.1281 + return InfallibleAllocPolicy::realloc_(aOldPtr, aSize); 1.1282 + } 1.1283 + 1.1284 + // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|. 1.1285 + if (!aOldPtr) { 1.1286 + return replace_malloc(aSize); 1.1287 + } 1.1288 + 1.1289 + // Be very careful here! Must remove the block from the table before doing 1.1290 + // the realloc to avoid races, just like in replace_free(). 1.1291 + // Nb: This does an unnecessary hashtable remove+add if the block doesn't 1.1292 + // move, but doing better isn't worth the effort. 1.1293 + FreeCallback(aOldPtr, t); 1.1294 + void* ptr = gMallocTable->realloc(aOldPtr, aSize); 1.1295 + if (ptr) { 1.1296 + AllocCallback(ptr, aSize, t); 1.1297 + } else { 1.1298 + // If realloc fails, we re-insert the old pointer. It will look like it 1.1299 + // was allocated for the first time here, which is untrue, and the slop 1.1300 + // bytes will be zero, which may be untrue. But this case is rare and 1.1301 + // doing better isn't worth the effort. 1.1302 + AllocCallback(aOldPtr, gMallocTable->malloc_usable_size(aOldPtr), t); 1.1303 + } 1.1304 + return ptr; 1.1305 +} 1.1306 + 1.1307 +void* 1.1308 +replace_memalign(size_t aAlignment, size_t aSize) 1.1309 +{ 1.1310 + using namespace mozilla::dmd; 1.1311 + 1.1312 + if (!gIsDMDRunning) { 1.1313 + return gMallocTable->memalign(aAlignment, aSize); 1.1314 + } 1.1315 + 1.1316 + Thread* t = Thread::Fetch(); 1.1317 + if (t->InterceptsAreBlocked()) { 1.1318 + return InfallibleAllocPolicy::memalign_(aAlignment, aSize); 1.1319 + } 1.1320 + 1.1321 + void* ptr = gMallocTable->memalign(aAlignment, aSize); 1.1322 + AllocCallback(ptr, aSize, t); 1.1323 + return ptr; 1.1324 +} 1.1325 + 1.1326 +void 1.1327 +replace_free(void* aPtr) 1.1328 +{ 1.1329 + using namespace mozilla::dmd; 1.1330 + 1.1331 + if (!gIsDMDRunning) { 1.1332 + gMallocTable->free(aPtr); 1.1333 + return; 1.1334 + } 1.1335 + 1.1336 + Thread* t = Thread::Fetch(); 1.1337 + if (t->InterceptsAreBlocked()) { 1.1338 + return InfallibleAllocPolicy::free_(aPtr); 1.1339 + } 1.1340 + 1.1341 + // Do the actual free after updating the table. Otherwise, another thread 1.1342 + // could call malloc and get the freed block and update the table, and then 1.1343 + // our update here would remove the newly-malloc'd block. 1.1344 + FreeCallback(aPtr, t); 1.1345 + gMallocTable->free(aPtr); 1.1346 +} 1.1347 + 1.1348 +namespace mozilla { 1.1349 +namespace dmd { 1.1350 + 1.1351 +//--------------------------------------------------------------------------- 1.1352 +// Stack trace records 1.1353 +//--------------------------------------------------------------------------- 1.1354 + 1.1355 +class TraceRecordKey 1.1356 +{ 1.1357 +public: 1.1358 + const StackTrace* const mAllocStackTrace; // never null 1.1359 +protected: 1.1360 + const StackTrace* const mReportStackTrace1; // nullptr if unreported 1.1361 + const StackTrace* const mReportStackTrace2; // nullptr if not 2x-reported 1.1362 + 1.1363 +public: 1.1364 + TraceRecordKey(const Block& aB) 1.1365 + : mAllocStackTrace(aB.AllocStackTrace()), 1.1366 + mReportStackTrace1(aB.ReportStackTrace1()), 1.1367 + mReportStackTrace2(aB.ReportStackTrace2()) 1.1368 + { 1.1369 + MOZ_ASSERT(mAllocStackTrace); 1.1370 + } 1.1371 + 1.1372 + // Hash policy. 1.1373 + 1.1374 + typedef TraceRecordKey Lookup; 1.1375 + 1.1376 + static uint32_t hash(const TraceRecordKey& aKey) 1.1377 + { 1.1378 + return mozilla::HashGeneric(aKey.mAllocStackTrace, 1.1379 + aKey.mReportStackTrace1, 1.1380 + aKey.mReportStackTrace2); 1.1381 + } 1.1382 + 1.1383 + static bool match(const TraceRecordKey& aA, const TraceRecordKey& aB) 1.1384 + { 1.1385 + return aA.mAllocStackTrace == aB.mAllocStackTrace && 1.1386 + aA.mReportStackTrace1 == aB.mReportStackTrace1 && 1.1387 + aA.mReportStackTrace2 == aB.mReportStackTrace2; 1.1388 + } 1.1389 +}; 1.1390 + 1.1391 +class RecordSize 1.1392 +{ 1.1393 + static const size_t kReqBits = sizeof(size_t) * 8 - 1; // 31 or 63 1.1394 + 1.1395 + size_t mReq; // size requested 1.1396 + size_t mSlop:kReqBits; // slop bytes 1.1397 + size_t mSampled:1; // were one or more blocks contributing to this 1.1398 + // RecordSize sampled? 1.1399 +public: 1.1400 + RecordSize() 1.1401 + : mReq(0), 1.1402 + mSlop(0), 1.1403 + mSampled(false) 1.1404 + {} 1.1405 + 1.1406 + size_t Req() const { return mReq; } 1.1407 + size_t Slop() const { return mSlop; } 1.1408 + size_t Usable() const { return mReq + mSlop; } 1.1409 + 1.1410 + bool IsSampled() const { return mSampled; } 1.1411 + 1.1412 + void Add(const Block& aB) 1.1413 + { 1.1414 + mReq += aB.ReqSize(); 1.1415 + mSlop += aB.SlopSize(); 1.1416 + mSampled = mSampled || aB.IsSampled(); 1.1417 + } 1.1418 + 1.1419 + void Add(const RecordSize& aRecordSize) 1.1420 + { 1.1421 + mReq += aRecordSize.Req(); 1.1422 + mSlop += aRecordSize.Slop(); 1.1423 + mSampled = mSampled || aRecordSize.IsSampled(); 1.1424 + } 1.1425 + 1.1426 + static int Cmp(const RecordSize& aA, const RecordSize& aB) 1.1427 + { 1.1428 + // Primary sort: put bigger usable sizes first. 1.1429 + if (aA.Usable() > aB.Usable()) return -1; 1.1430 + if (aA.Usable() < aB.Usable()) return 1; 1.1431 + 1.1432 + // Secondary sort: put bigger requested sizes first. 1.1433 + if (aA.Req() > aB.Req()) return -1; 1.1434 + if (aA.Req() < aB.Req()) return 1; 1.1435 + 1.1436 + // Tertiary sort: put non-sampled records before sampled records. 1.1437 + if (!aA.mSampled && aB.mSampled) return -1; 1.1438 + if ( aA.mSampled && !aB.mSampled) return 1; 1.1439 + 1.1440 + return 0; 1.1441 + } 1.1442 +}; 1.1443 + 1.1444 +// A collection of one or more heap blocks with a common TraceRecordKey. 1.1445 +class TraceRecord : public TraceRecordKey 1.1446 +{ 1.1447 + // The TraceRecordKey base class serves as the key in TraceRecordTables. 1.1448 + // These two fields constitute the value, so it's ok for them to be 1.1449 + // |mutable|. 1.1450 + mutable uint32_t mNumBlocks; // number of blocks with this TraceRecordKey 1.1451 + mutable RecordSize mRecordSize; // combined size of those blocks 1.1452 + 1.1453 +public: 1.1454 + explicit TraceRecord(const TraceRecordKey& aKey) 1.1455 + : TraceRecordKey(aKey), 1.1456 + mNumBlocks(0), 1.1457 + mRecordSize() 1.1458 + {} 1.1459 + 1.1460 + uint32_t NumBlocks() const { return mNumBlocks; } 1.1461 + 1.1462 + const RecordSize& GetRecordSize() const { return mRecordSize; } 1.1463 + 1.1464 + // This is |const| thanks to the |mutable| fields above. 1.1465 + void Add(const Block& aB) const 1.1466 + { 1.1467 + mNumBlocks++; 1.1468 + mRecordSize.Add(aB); 1.1469 + } 1.1470 + 1.1471 + // For PrintSortedRecords. 1.1472 + static const char* const kRecordKind; 1.1473 + static bool recordsOverlap() { return false; } 1.1474 + 1.1475 + void Print(const Writer& aWriter, LocationService* aLocService, 1.1476 + uint32_t aM, uint32_t aN, const char* aStr, const char* astr, 1.1477 + size_t aCategoryUsableSize, size_t aCumulativeUsableSize, 1.1478 + size_t aTotalUsableSize) const; 1.1479 + 1.1480 + static int QsortCmp(const void* aA, const void* aB) 1.1481 + { 1.1482 + const TraceRecord* const a = *static_cast<const TraceRecord* const*>(aA); 1.1483 + const TraceRecord* const b = *static_cast<const TraceRecord* const*>(aB); 1.1484 + 1.1485 + return RecordSize::Cmp(a->mRecordSize, b->mRecordSize); 1.1486 + } 1.1487 +}; 1.1488 + 1.1489 +const char* const TraceRecord::kRecordKind = "trace"; 1.1490 + 1.1491 +typedef js::HashSet<TraceRecord, TraceRecord, InfallibleAllocPolicy> 1.1492 + TraceRecordTable; 1.1493 + 1.1494 +void 1.1495 +TraceRecord::Print(const Writer& aWriter, LocationService* aLocService, 1.1496 + uint32_t aM, uint32_t aN, const char* aStr, const char* astr, 1.1497 + size_t aCategoryUsableSize, size_t aCumulativeUsableSize, 1.1498 + size_t aTotalUsableSize) const 1.1499 +{ 1.1500 + bool showTilde = mRecordSize.IsSampled(); 1.1501 + 1.1502 + W("%s: %s block%s in stack trace record %s of %s\n", 1.1503 + aStr, 1.1504 + Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks), 1.1505 + Show(aM, gBuf2, kBufLen), 1.1506 + Show(aN, gBuf3, kBufLen)); 1.1507 + 1.1508 + W(" %s bytes (%s requested / %s slop)\n", 1.1509 + Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde), 1.1510 + Show(mRecordSize.Req(), gBuf2, kBufLen, showTilde), 1.1511 + Show(mRecordSize.Slop(), gBuf3, kBufLen, showTilde)); 1.1512 + 1.1513 + W(" %4.2f%% of the heap (%4.2f%% cumulative); " 1.1514 + " %4.2f%% of %s (%4.2f%% cumulative)\n", 1.1515 + Percent(mRecordSize.Usable(), aTotalUsableSize), 1.1516 + Percent(aCumulativeUsableSize, aTotalUsableSize), 1.1517 + Percent(mRecordSize.Usable(), aCategoryUsableSize), 1.1518 + astr, 1.1519 + Percent(aCumulativeUsableSize, aCategoryUsableSize)); 1.1520 + 1.1521 + W(" Allocated at\n"); 1.1522 + mAllocStackTrace->Print(aWriter, aLocService); 1.1523 + 1.1524 + if (mReportStackTrace1) { 1.1525 + W("\n Reported at\n"); 1.1526 + mReportStackTrace1->Print(aWriter, aLocService); 1.1527 + } 1.1528 + if (mReportStackTrace2) { 1.1529 + W("\n Reported again at\n"); 1.1530 + mReportStackTrace2->Print(aWriter, aLocService); 1.1531 + } 1.1532 + 1.1533 + W("\n"); 1.1534 +} 1.1535 + 1.1536 +//--------------------------------------------------------------------------- 1.1537 +// Stack frame records 1.1538 +//--------------------------------------------------------------------------- 1.1539 + 1.1540 +// A collection of one or more stack frames (from heap block allocation stack 1.1541 +// traces) with a common PC. 1.1542 +class FrameRecord 1.1543 +{ 1.1544 + // mPc is used as the key in FrameRecordTable, and the other members 1.1545 + // constitute the value, so it's ok for them to be |mutable|. 1.1546 + const void* const mPc; 1.1547 + mutable size_t mNumBlocks; 1.1548 + mutable size_t mNumTraceRecords; 1.1549 + mutable RecordSize mRecordSize; 1.1550 + 1.1551 +public: 1.1552 + explicit FrameRecord(const void* aPc) 1.1553 + : mPc(aPc), 1.1554 + mNumBlocks(0), 1.1555 + mNumTraceRecords(0), 1.1556 + mRecordSize() 1.1557 + {} 1.1558 + 1.1559 + const RecordSize& GetRecordSize() const { return mRecordSize; } 1.1560 + 1.1561 + // This is |const| thanks to the |mutable| fields above. 1.1562 + void Add(const TraceRecord& aTr) const 1.1563 + { 1.1564 + mNumBlocks += aTr.NumBlocks(); 1.1565 + mNumTraceRecords++; 1.1566 + mRecordSize.Add(aTr.GetRecordSize()); 1.1567 + } 1.1568 + 1.1569 + void Print(const Writer& aWriter, LocationService* aLocService, 1.1570 + uint32_t aM, uint32_t aN, const char* aStr, const char* astr, 1.1571 + size_t aCategoryUsableSize, size_t aCumulativeUsableSize, 1.1572 + size_t aTotalUsableSize) const; 1.1573 + 1.1574 + static int QsortCmp(const void* aA, const void* aB) 1.1575 + { 1.1576 + const FrameRecord* const a = *static_cast<const FrameRecord* const*>(aA); 1.1577 + const FrameRecord* const b = *static_cast<const FrameRecord* const*>(aB); 1.1578 + 1.1579 + return RecordSize::Cmp(a->mRecordSize, b->mRecordSize); 1.1580 + } 1.1581 + 1.1582 + // For PrintSortedRecords. 1.1583 + static const char* const kRecordKind; 1.1584 + static bool recordsOverlap() { return true; } 1.1585 + 1.1586 + // Hash policy. 1.1587 + 1.1588 + typedef const void* Lookup; 1.1589 + 1.1590 + static uint32_t hash(const void* const& aPc) 1.1591 + { 1.1592 + return mozilla::HashGeneric(aPc); 1.1593 + } 1.1594 + 1.1595 + static bool match(const FrameRecord& aFr, const void* const& aPc) 1.1596 + { 1.1597 + return aFr.mPc == aPc; 1.1598 + } 1.1599 +}; 1.1600 + 1.1601 +const char* const FrameRecord::kRecordKind = "frame"; 1.1602 + 1.1603 +typedef js::HashSet<FrameRecord, FrameRecord, InfallibleAllocPolicy> 1.1604 + FrameRecordTable; 1.1605 + 1.1606 +void 1.1607 +FrameRecord::Print(const Writer& aWriter, LocationService* aLocService, 1.1608 + uint32_t aM, uint32_t aN, const char* aStr, const char* astr, 1.1609 + size_t aCategoryUsableSize, size_t aCumulativeUsableSize, 1.1610 + size_t aTotalUsableSize) const 1.1611 +{ 1.1612 + (void)aCumulativeUsableSize; 1.1613 + 1.1614 + bool showTilde = mRecordSize.IsSampled(); 1.1615 + 1.1616 + W("%s: %s block%s from %s stack trace record%s in stack frame record %s of %s\n", 1.1617 + aStr, 1.1618 + Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks), 1.1619 + Show(mNumTraceRecords, gBuf2, kBufLen, showTilde), Plural(mNumTraceRecords), 1.1620 + Show(aM, gBuf3, kBufLen), 1.1621 + Show(aN, gBuf4, kBufLen)); 1.1622 + 1.1623 + W(" %s bytes (%s requested / %s slop)\n", 1.1624 + Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde), 1.1625 + Show(mRecordSize.Req(), gBuf2, kBufLen, showTilde), 1.1626 + Show(mRecordSize.Slop(), gBuf3, kBufLen, showTilde)); 1.1627 + 1.1628 + W(" %4.2f%% of the heap; %4.2f%% of %s\n", 1.1629 + Percent(mRecordSize.Usable(), aTotalUsableSize), 1.1630 + Percent(mRecordSize.Usable(), aCategoryUsableSize), 1.1631 + astr); 1.1632 + 1.1633 + W(" PC is\n"); 1.1634 + aLocService->WriteLocation(aWriter, mPc); 1.1635 + W("\n"); 1.1636 +} 1.1637 + 1.1638 +//--------------------------------------------------------------------------- 1.1639 +// Options (Part 2) 1.1640 +//--------------------------------------------------------------------------- 1.1641 + 1.1642 +// Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah" 1.1643 +// (where "blah" is non-empty) and return the pointer to "blah". |aArg| can 1.1644 +// have leading space chars (but not other whitespace). 1.1645 +const char* 1.1646 +Options::ValueIfMatch(const char* aArg, const char* aOptionName) 1.1647 +{ 1.1648 + MOZ_ASSERT(!isspace(*aArg)); // any leading whitespace should not remain 1.1649 + size_t optionLen = strlen(aOptionName); 1.1650 + if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' && 1.1651 + aArg[optionLen + 1]) { 1.1652 + return aArg + optionLen + 1; 1.1653 + } 1.1654 + return nullptr; 1.1655 +} 1.1656 + 1.1657 +// Extracts a |long| value for an option from an argument. It must be within 1.1658 +// the range |aMin..aMax| (inclusive). 1.1659 +bool 1.1660 +Options::GetLong(const char* aArg, const char* aOptionName, 1.1661 + long aMin, long aMax, long* aN) 1.1662 +{ 1.1663 + if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) { 1.1664 + char* endPtr; 1.1665 + *aN = strtol(optionValue, &endPtr, /* base */ 10); 1.1666 + if (!*endPtr && aMin <= *aN && *aN <= aMax && 1.1667 + *aN != LONG_MIN && *aN != LONG_MAX) { 1.1668 + return true; 1.1669 + } 1.1670 + } 1.1671 + return false; 1.1672 +} 1.1673 + 1.1674 +// The sample-below default is a prime number close to 4096. 1.1675 +// - Why that size? Because it's *much* faster but only moderately less precise 1.1676 +// than a size of 1. 1.1677 +// - Why prime? Because it makes our sampling more random. If we used a size 1.1678 +// of 4096, for example, then our alloc counter would only take on even 1.1679 +// values, because jemalloc always rounds up requests sizes. In contrast, a 1.1680 +// prime size will explore all possible values of the alloc counter. 1.1681 +// 1.1682 +Options::Options(const char* aDMDEnvVar) 1.1683 + : mDMDEnvVar(InfallibleAllocPolicy::strdup_(aDMDEnvVar)), 1.1684 + mSampleBelowSize(4093, 100 * 100 * 1000), 1.1685 + mMaxFrames(StackTrace::MaxFrames, StackTrace::MaxFrames), 1.1686 + mMaxRecords(1000, 1000000), 1.1687 + mMode(Normal) 1.1688 +{ 1.1689 + char* e = mDMDEnvVar; 1.1690 + if (strcmp(e, "1") != 0) { 1.1691 + bool isEnd = false; 1.1692 + while (!isEnd) { 1.1693 + // Consume leading whitespace. 1.1694 + while (isspace(*e)) { 1.1695 + e++; 1.1696 + } 1.1697 + 1.1698 + // Save the start of the arg. 1.1699 + const char* arg = e; 1.1700 + 1.1701 + // Find the first char after the arg, and temporarily change it to '\0' 1.1702 + // to isolate the arg. 1.1703 + while (!isspace(*e) && *e != '\0') { 1.1704 + e++; 1.1705 + } 1.1706 + char replacedChar = *e; 1.1707 + isEnd = replacedChar == '\0'; 1.1708 + *e = '\0'; 1.1709 + 1.1710 + // Handle arg 1.1711 + long myLong; 1.1712 + if (GetLong(arg, "--sample-below", 1, mSampleBelowSize.mMax, &myLong)) { 1.1713 + mSampleBelowSize.mActual = myLong; 1.1714 + 1.1715 + } else if (GetLong(arg, "--max-frames", 1, mMaxFrames.mMax, &myLong)) { 1.1716 + mMaxFrames.mActual = myLong; 1.1717 + 1.1718 + } else if (GetLong(arg, "--max-records", 1, mMaxRecords.mMax, &myLong)) { 1.1719 + mMaxRecords.mActual = myLong; 1.1720 + 1.1721 + } else if (strcmp(arg, "--mode=normal") == 0) { 1.1722 + mMode = Options::Normal; 1.1723 + } else if (strcmp(arg, "--mode=test") == 0) { 1.1724 + mMode = Options::Test; 1.1725 + } else if (strcmp(arg, "--mode=stress") == 0) { 1.1726 + mMode = Options::Stress; 1.1727 + 1.1728 + } else if (strcmp(arg, "") == 0) { 1.1729 + // This can only happen if there is trailing whitespace. Ignore. 1.1730 + MOZ_ASSERT(isEnd); 1.1731 + 1.1732 + } else { 1.1733 + BadArg(arg); 1.1734 + } 1.1735 + 1.1736 + // Undo the temporary isolation. 1.1737 + *e = replacedChar; 1.1738 + } 1.1739 + } 1.1740 +} 1.1741 + 1.1742 +void 1.1743 +Options::BadArg(const char* aArg) 1.1744 +{ 1.1745 + StatusMsg("\n"); 1.1746 + StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg); 1.1747 + StatusMsg("\n"); 1.1748 + StatusMsg("Valid values of $DMD are:\n"); 1.1749 + StatusMsg("- undefined or \"\" or \"0\", which disables DMD, or\n"); 1.1750 + StatusMsg("- \"1\", which enables it with the default options, or\n"); 1.1751 + StatusMsg("- a whitespace-separated list of |--option=val| entries, which\n"); 1.1752 + StatusMsg(" enables it with non-default options.\n"); 1.1753 + StatusMsg("\n"); 1.1754 + StatusMsg("The following options are allowed; defaults are shown in [].\n"); 1.1755 + StatusMsg(" --sample-below=<1..%d> Sample blocks smaller than this [%d]\n", 1.1756 + int(mSampleBelowSize.mMax), 1.1757 + int(mSampleBelowSize.mDefault)); 1.1758 + StatusMsg(" (prime numbers are recommended)\n"); 1.1759 + StatusMsg(" --max-frames=<1..%d> Max. depth of stack traces [%d]\n", 1.1760 + int(mMaxFrames.mMax), 1.1761 + int(mMaxFrames.mDefault)); 1.1762 + StatusMsg(" --max-records=<1..%u> Max. number of records printed [%u]\n", 1.1763 + mMaxRecords.mMax, 1.1764 + mMaxRecords.mDefault); 1.1765 + StatusMsg(" --mode=<normal|test|stress> Mode of operation [normal]\n"); 1.1766 + StatusMsg("\n"); 1.1767 + exit(1); 1.1768 +} 1.1769 + 1.1770 +//--------------------------------------------------------------------------- 1.1771 +// DMD start-up 1.1772 +//--------------------------------------------------------------------------- 1.1773 + 1.1774 +#ifdef XP_MACOSX 1.1775 +static void 1.1776 +NopStackWalkCallback(void* aPc, void* aSp, void* aClosure) 1.1777 +{ 1.1778 +} 1.1779 +#endif 1.1780 + 1.1781 +// Note that fopen() can allocate. 1.1782 +static FILE* 1.1783 +OpenOutputFile(const char* aFilename) 1.1784 +{ 1.1785 + FILE* fp = fopen(aFilename, "w"); 1.1786 + if (!fp) { 1.1787 + StatusMsg("can't create %s file: %s\n", aFilename, strerror(errno)); 1.1788 + exit(1); 1.1789 + } 1.1790 + return fp; 1.1791 +} 1.1792 + 1.1793 +static void RunTestMode(FILE* fp); 1.1794 +static void RunStressMode(FILE* fp); 1.1795 + 1.1796 +// WARNING: this function runs *very* early -- before all static initializers 1.1797 +// have run. For this reason, non-scalar globals such as gStateLock and 1.1798 +// gStackTraceTable are allocated dynamically (so we can guarantee their 1.1799 +// construction in this function) rather than statically. 1.1800 +static void 1.1801 +Init(const malloc_table_t* aMallocTable) 1.1802 +{ 1.1803 + MOZ_ASSERT(!gIsDMDRunning); 1.1804 + 1.1805 + gMallocTable = aMallocTable; 1.1806 + 1.1807 + // DMD is controlled by the |DMD| environment variable. 1.1808 + // - If it's unset or empty or "0", DMD doesn't run. 1.1809 + // - Otherwise, the contents dictate DMD's behaviour. 1.1810 + 1.1811 + char* e = getenv("DMD"); 1.1812 + StatusMsg("$DMD = '%s'\n", e); 1.1813 + 1.1814 + if (!e || strcmp(e, "") == 0 || strcmp(e, "0") == 0) { 1.1815 + StatusMsg("DMD is not enabled\n"); 1.1816 + return; 1.1817 + } 1.1818 + 1.1819 + // Parse $DMD env var. 1.1820 + gOptions = InfallibleAllocPolicy::new_<Options>(e); 1.1821 + 1.1822 + StatusMsg("DMD is enabled\n"); 1.1823 + 1.1824 +#ifdef XP_MACOSX 1.1825 + // On Mac OS X we need to call StackWalkInitCriticalAddress() very early 1.1826 + // (prior to the creation of any mutexes, apparently) otherwise we can get 1.1827 + // hangs when getting stack traces (bug 821577). But 1.1828 + // StackWalkInitCriticalAddress() isn't exported from xpcom/, so instead we 1.1829 + // just call NS_StackWalk, because that calls StackWalkInitCriticalAddress(). 1.1830 + // See the comment above StackWalkInitCriticalAddress() for more details. 1.1831 + (void)NS_StackWalk(NopStackWalkCallback, /* skipFrames */ 0, 1.1832 + /* maxFrames */ 1, nullptr, 0, nullptr); 1.1833 +#endif 1.1834 + 1.1835 + gStateLock = InfallibleAllocPolicy::new_<Mutex>(); 1.1836 + 1.1837 + gSmallBlockActualSizeCounter = 0; 1.1838 + 1.1839 + DMD_CREATE_TLS_INDEX(gTlsIndex); 1.1840 + 1.1841 + { 1.1842 + AutoLockState lock; 1.1843 + 1.1844 + gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>(); 1.1845 + gStackTraceTable->init(8192); 1.1846 + 1.1847 + gBlockTable = InfallibleAllocPolicy::new_<BlockTable>(); 1.1848 + gBlockTable->init(8192); 1.1849 + } 1.1850 + 1.1851 + if (gOptions->IsTestMode()) { 1.1852 + // OpenOutputFile() can allocate. So do this before setting 1.1853 + // gIsDMDRunning so those allocations don't show up in our results. Once 1.1854 + // gIsDMDRunning is set we are intercepting malloc et al. in earnest. 1.1855 + FILE* fp = OpenOutputFile("test.dmd"); 1.1856 + gIsDMDRunning = true; 1.1857 + 1.1858 + StatusMsg("running test mode...\n"); 1.1859 + RunTestMode(fp); 1.1860 + StatusMsg("finished test mode\n"); 1.1861 + fclose(fp); 1.1862 + exit(0); 1.1863 + } 1.1864 + 1.1865 + if (gOptions->IsStressMode()) { 1.1866 + FILE* fp = OpenOutputFile("stress.dmd"); 1.1867 + gIsDMDRunning = true; 1.1868 + 1.1869 + StatusMsg("running stress mode...\n"); 1.1870 + RunStressMode(fp); 1.1871 + StatusMsg("finished stress mode\n"); 1.1872 + fclose(fp); 1.1873 + exit(0); 1.1874 + } 1.1875 + 1.1876 + gIsDMDRunning = true; 1.1877 +} 1.1878 + 1.1879 +//--------------------------------------------------------------------------- 1.1880 +// DMD reporting and unreporting 1.1881 +//--------------------------------------------------------------------------- 1.1882 + 1.1883 +static void 1.1884 +ReportHelper(const void* aPtr, bool aReportedOnAlloc) 1.1885 +{ 1.1886 + if (!gIsDMDRunning || !aPtr) { 1.1887 + return; 1.1888 + } 1.1889 + 1.1890 + Thread* t = Thread::Fetch(); 1.1891 + 1.1892 + AutoBlockIntercepts block(t); 1.1893 + AutoLockState lock; 1.1894 + 1.1895 + if (BlockTable::Ptr p = gBlockTable->lookup(aPtr)) { 1.1896 + p->Report(t, aReportedOnAlloc); 1.1897 + } else { 1.1898 + // We have no record of the block. Do nothing. Either: 1.1899 + // - We're sampling and we skipped this block. This is likely. 1.1900 + // - It's a bogus pointer. This is unlikely because Report() is almost 1.1901 + // always called in conjunction with a malloc_size_of-style function. 1.1902 + } 1.1903 +} 1.1904 + 1.1905 +MOZ_EXPORT void 1.1906 +Report(const void* aPtr) 1.1907 +{ 1.1908 + ReportHelper(aPtr, /* onAlloc */ false); 1.1909 +} 1.1910 + 1.1911 +MOZ_EXPORT void 1.1912 +ReportOnAlloc(const void* aPtr) 1.1913 +{ 1.1914 + ReportHelper(aPtr, /* onAlloc */ true); 1.1915 +} 1.1916 + 1.1917 +//--------------------------------------------------------------------------- 1.1918 +// DMD output 1.1919 +//--------------------------------------------------------------------------- 1.1920 + 1.1921 +// This works for both TraceRecords and StackFrameRecords. 1.1922 +template <class Record> 1.1923 +static void 1.1924 +PrintSortedRecords(const Writer& aWriter, LocationService* aLocService, 1.1925 + const char* aStr, const char* astr, 1.1926 + const js::HashSet<Record, Record, InfallibleAllocPolicy>& 1.1927 + aRecordTable, 1.1928 + size_t aCategoryUsableSize, size_t aTotalUsableSize) 1.1929 +{ 1.1930 + const char* kind = Record::kRecordKind; 1.1931 + StatusMsg(" creating and sorting %s stack %s record array...\n", astr, kind); 1.1932 + 1.1933 + // Convert the table into a sorted array. 1.1934 + js::Vector<const Record*, 0, InfallibleAllocPolicy> recordArray; 1.1935 + recordArray.reserve(aRecordTable.count()); 1.1936 + typedef js::HashSet<Record, Record, InfallibleAllocPolicy> RecordTable; 1.1937 + for (typename RecordTable::Range r = aRecordTable.all(); 1.1938 + !r.empty(); 1.1939 + r.popFront()) { 1.1940 + recordArray.infallibleAppend(&r.front()); 1.1941 + } 1.1942 + qsort(recordArray.begin(), recordArray.length(), sizeof(recordArray[0]), 1.1943 + Record::QsortCmp); 1.1944 + 1.1945 + WriteTitle("%s stack %s records\n", aStr, kind); 1.1946 + 1.1947 + if (recordArray.length() == 0) { 1.1948 + W("(none)\n\n"); 1.1949 + return; 1.1950 + } 1.1951 + 1.1952 + StatusMsg(" printing %s stack %s record array...\n", astr, kind); 1.1953 + size_t cumulativeUsableSize = 0; 1.1954 + 1.1955 + // Limit the number of records printed, because fix-linux-stack.pl is too 1.1956 + // damn slow. Note that we don't break out of this loop because we need to 1.1957 + // keep adding to |cumulativeUsableSize|. 1.1958 + uint32_t numRecords = recordArray.length(); 1.1959 + uint32_t maxRecords = gOptions->MaxRecords(); 1.1960 + for (uint32_t i = 0; i < numRecords; i++) { 1.1961 + const Record* r = recordArray[i]; 1.1962 + cumulativeUsableSize += r->GetRecordSize().Usable(); 1.1963 + if (i < maxRecords) { 1.1964 + r->Print(aWriter, aLocService, i+1, numRecords, aStr, astr, 1.1965 + aCategoryUsableSize, cumulativeUsableSize, aTotalUsableSize); 1.1966 + } else if (i == maxRecords) { 1.1967 + W("%s: stopping after %s stack %s records\n\n", aStr, 1.1968 + Show(maxRecords, gBuf1, kBufLen), kind); 1.1969 + } 1.1970 + } 1.1971 + 1.1972 + // This holds for TraceRecords, but not for FrameRecords. 1.1973 + MOZ_ASSERT_IF(!Record::recordsOverlap(), 1.1974 + aCategoryUsableSize == cumulativeUsableSize); 1.1975 +} 1.1976 + 1.1977 +static void 1.1978 +PrintSortedTraceAndFrameRecords(const Writer& aWriter, 1.1979 + LocationService* aLocService, 1.1980 + const char* aStr, const char* astr, 1.1981 + const TraceRecordTable& aTraceRecordTable, 1.1982 + size_t aCategoryUsableSize, 1.1983 + size_t aTotalUsableSize) 1.1984 +{ 1.1985 + PrintSortedRecords(aWriter, aLocService, aStr, astr, aTraceRecordTable, 1.1986 + aCategoryUsableSize, aTotalUsableSize); 1.1987 + 1.1988 + FrameRecordTable frameRecordTable; 1.1989 + (void)frameRecordTable.init(2048); 1.1990 + for (TraceRecordTable::Range r = aTraceRecordTable.all(); 1.1991 + !r.empty(); 1.1992 + r.popFront()) { 1.1993 + const TraceRecord& tr = r.front(); 1.1994 + const StackTrace* st = tr.mAllocStackTrace; 1.1995 + 1.1996 + // A single PC can appear multiple times in a stack trace. We ignore 1.1997 + // duplicates by first sorting and then ignoring adjacent duplicates. 1.1998 + StackTrace sorted(*st); 1.1999 + sorted.Sort(); // sorts the copy, not the original 1.2000 + void* prevPc = (void*)intptr_t(-1); 1.2001 + for (uint32_t i = 0; i < sorted.Length(); i++) { 1.2002 + void* pc = sorted.Pc(i); 1.2003 + if (pc == prevPc) { 1.2004 + continue; // ignore duplicate 1.2005 + } 1.2006 + prevPc = pc; 1.2007 + 1.2008 + FrameRecordTable::AddPtr p = frameRecordTable.lookupForAdd(pc); 1.2009 + if (!p) { 1.2010 + FrameRecord fr(pc); 1.2011 + (void)frameRecordTable.add(p, fr); 1.2012 + } 1.2013 + p->Add(tr); 1.2014 + } 1.2015 + } 1.2016 + 1.2017 + PrintSortedRecords(aWriter, aLocService, aStr, astr, frameRecordTable, 1.2018 + aCategoryUsableSize, aTotalUsableSize); 1.2019 +} 1.2020 + 1.2021 +// Note that, unlike most SizeOf* functions, this function does not take a 1.2022 +// |mozilla::MallocSizeOf| argument. That's because those arguments are 1.2023 +// primarily to aid DMD track heap blocks... but DMD deliberately doesn't track 1.2024 +// heap blocks it allocated for itself! 1.2025 +// 1.2026 +// SizeOfInternal should be called while you're holding the state lock and 1.2027 +// while intercepts are blocked; SizeOf acquires the lock and blocks 1.2028 +// intercepts. 1.2029 + 1.2030 +static void 1.2031 +SizeOfInternal(Sizes* aSizes) 1.2032 +{ 1.2033 + MOZ_ASSERT(gStateLock->IsLocked()); 1.2034 + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); 1.2035 + 1.2036 + aSizes->Clear(); 1.2037 + 1.2038 + if (!gIsDMDRunning) { 1.2039 + return; 1.2040 + } 1.2041 + 1.2042 + StackTraceSet usedStackTraces; 1.2043 + GatherUsedStackTraces(usedStackTraces); 1.2044 + 1.2045 + for (StackTraceTable::Range r = gStackTraceTable->all(); 1.2046 + !r.empty(); 1.2047 + r.popFront()) { 1.2048 + StackTrace* const& st = r.front(); 1.2049 + 1.2050 + if (usedStackTraces.has(st)) { 1.2051 + aSizes->mStackTracesUsed += MallocSizeOf(st); 1.2052 + } else { 1.2053 + aSizes->mStackTracesUnused += MallocSizeOf(st); 1.2054 + } 1.2055 + } 1.2056 + 1.2057 + aSizes->mStackTraceTable = 1.2058 + gStackTraceTable->sizeOfIncludingThis(MallocSizeOf); 1.2059 + 1.2060 + aSizes->mBlockTable = gBlockTable->sizeOfIncludingThis(MallocSizeOf); 1.2061 +} 1.2062 + 1.2063 +MOZ_EXPORT void 1.2064 +SizeOf(Sizes* aSizes) 1.2065 +{ 1.2066 + aSizes->Clear(); 1.2067 + 1.2068 + if (!gIsDMDRunning) { 1.2069 + return; 1.2070 + } 1.2071 + 1.2072 + AutoBlockIntercepts block(Thread::Fetch()); 1.2073 + AutoLockState lock; 1.2074 + SizeOfInternal(aSizes); 1.2075 +} 1.2076 + 1.2077 +void 1.2078 +ClearReportsInternal() 1.2079 +{ 1.2080 + MOZ_ASSERT(gStateLock->IsLocked()); 1.2081 + 1.2082 + // Unreport all blocks that were marked reported by a memory reporter. This 1.2083 + // excludes those that were reported on allocation, because they need to keep 1.2084 + // their reported marking. 1.2085 + for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) { 1.2086 + r.front().UnreportIfNotReportedOnAlloc(); 1.2087 + } 1.2088 +} 1.2089 + 1.2090 +MOZ_EXPORT void 1.2091 +ClearReports() 1.2092 +{ 1.2093 + if (!gIsDMDRunning) { 1.2094 + return; 1.2095 + } 1.2096 + 1.2097 + AutoLockState lock; 1.2098 + ClearReportsInternal(); 1.2099 +} 1.2100 + 1.2101 +MOZ_EXPORT bool 1.2102 +IsEnabled() 1.2103 +{ 1.2104 + return gIsDMDRunning; 1.2105 +} 1.2106 + 1.2107 +MOZ_EXPORT void 1.2108 +Dump(Writer aWriter) 1.2109 +{ 1.2110 + if (!gIsDMDRunning) { 1.2111 + const char* msg = "cannot Dump(); DMD was not enabled at startup\n"; 1.2112 + StatusMsg("%s", msg); 1.2113 + W("%s", msg); 1.2114 + return; 1.2115 + } 1.2116 + 1.2117 + AutoBlockIntercepts block(Thread::Fetch()); 1.2118 + AutoLockState lock; 1.2119 + 1.2120 + static int dumpCount = 1; 1.2121 + StatusMsg("Dump %d {\n", dumpCount++); 1.2122 + 1.2123 + StatusMsg(" gathering stack trace records...\n"); 1.2124 + 1.2125 + TraceRecordTable unreportedTraceRecordTable; 1.2126 + (void)unreportedTraceRecordTable.init(1024); 1.2127 + size_t unreportedUsableSize = 0; 1.2128 + size_t unreportedNumBlocks = 0; 1.2129 + 1.2130 + TraceRecordTable onceReportedTraceRecordTable; 1.2131 + (void)onceReportedTraceRecordTable.init(1024); 1.2132 + size_t onceReportedUsableSize = 0; 1.2133 + size_t onceReportedNumBlocks = 0; 1.2134 + 1.2135 + TraceRecordTable twiceReportedTraceRecordTable; 1.2136 + (void)twiceReportedTraceRecordTable.init(0); 1.2137 + size_t twiceReportedUsableSize = 0; 1.2138 + size_t twiceReportedNumBlocks = 0; 1.2139 + 1.2140 + bool anyBlocksSampled = false; 1.2141 + 1.2142 + for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) { 1.2143 + const Block& b = r.front(); 1.2144 + 1.2145 + TraceRecordTable* table; 1.2146 + uint32_t numReports = b.NumReports(); 1.2147 + if (numReports == 0) { 1.2148 + unreportedUsableSize += b.UsableSize(); 1.2149 + unreportedNumBlocks++; 1.2150 + table = &unreportedTraceRecordTable; 1.2151 + } else if (numReports == 1) { 1.2152 + onceReportedUsableSize += b.UsableSize(); 1.2153 + onceReportedNumBlocks++; 1.2154 + table = &onceReportedTraceRecordTable; 1.2155 + } else { 1.2156 + MOZ_ASSERT(numReports == 2); 1.2157 + twiceReportedUsableSize += b.UsableSize(); 1.2158 + twiceReportedNumBlocks++; 1.2159 + table = &twiceReportedTraceRecordTable; 1.2160 + } 1.2161 + TraceRecordKey key(b); 1.2162 + TraceRecordTable::AddPtr p = table->lookupForAdd(key); 1.2163 + if (!p) { 1.2164 + TraceRecord tr(b); 1.2165 + (void)table->add(p, tr); 1.2166 + } 1.2167 + p->Add(b); 1.2168 + 1.2169 + anyBlocksSampled = anyBlocksSampled || b.IsSampled(); 1.2170 + } 1.2171 + size_t totalUsableSize = 1.2172 + unreportedUsableSize + onceReportedUsableSize + twiceReportedUsableSize; 1.2173 + size_t totalNumBlocks = 1.2174 + unreportedNumBlocks + onceReportedNumBlocks + twiceReportedNumBlocks; 1.2175 + 1.2176 + WriteTitle("Invocation\n"); 1.2177 + W("$DMD = '%s'\n", gOptions->DMDEnvVar()); 1.2178 + W("Sample-below size = %lld\n\n", 1.2179 + (long long)(gOptions->SampleBelowSize())); 1.2180 + 1.2181 + // Allocate this on the heap instead of the stack because it's fairly large. 1.2182 + LocationService* locService = InfallibleAllocPolicy::new_<LocationService>(); 1.2183 + 1.2184 + PrintSortedRecords(aWriter, locService, "Twice-reported", "twice-reported", 1.2185 + twiceReportedTraceRecordTable, twiceReportedUsableSize, 1.2186 + totalUsableSize); 1.2187 + 1.2188 + PrintSortedTraceAndFrameRecords(aWriter, locService, 1.2189 + "Unreported", "unreported", 1.2190 + unreportedTraceRecordTable, 1.2191 + unreportedUsableSize, totalUsableSize); 1.2192 + 1.2193 + PrintSortedTraceAndFrameRecords(aWriter, locService, 1.2194 + "Once-reported", "once-reported", 1.2195 + onceReportedTraceRecordTable, 1.2196 + onceReportedUsableSize, totalUsableSize); 1.2197 + 1.2198 + bool showTilde = anyBlocksSampled; 1.2199 + WriteTitle("Summary\n"); 1.2200 + 1.2201 + W("Total: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", 1.2202 + Show(totalUsableSize, gBuf1, kBufLen, showTilde), 1.2203 + 100.0, 1.2204 + Show(totalNumBlocks, gBuf2, kBufLen, showTilde), 1.2205 + 100.0); 1.2206 + 1.2207 + W("Unreported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", 1.2208 + Show(unreportedUsableSize, gBuf1, kBufLen, showTilde), 1.2209 + Percent(unreportedUsableSize, totalUsableSize), 1.2210 + Show(unreportedNumBlocks, gBuf2, kBufLen, showTilde), 1.2211 + Percent(unreportedNumBlocks, totalNumBlocks)); 1.2212 + 1.2213 + W("Once-reported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", 1.2214 + Show(onceReportedUsableSize, gBuf1, kBufLen, showTilde), 1.2215 + Percent(onceReportedUsableSize, totalUsableSize), 1.2216 + Show(onceReportedNumBlocks, gBuf2, kBufLen, showTilde), 1.2217 + Percent(onceReportedNumBlocks, totalNumBlocks)); 1.2218 + 1.2219 + W("Twice-reported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", 1.2220 + Show(twiceReportedUsableSize, gBuf1, kBufLen, showTilde), 1.2221 + Percent(twiceReportedUsableSize, totalUsableSize), 1.2222 + Show(twiceReportedNumBlocks, gBuf2, kBufLen, showTilde), 1.2223 + Percent(twiceReportedNumBlocks, totalNumBlocks)); 1.2224 + 1.2225 + W("\n"); 1.2226 + 1.2227 + // Stats are non-deterministic, so don't show them in test mode. 1.2228 + if (!gOptions->IsTestMode()) { 1.2229 + Sizes sizes; 1.2230 + SizeOfInternal(&sizes); 1.2231 + 1.2232 + WriteTitle("Execution measurements\n"); 1.2233 + 1.2234 + W("Data structures that persist after Dump() ends:\n"); 1.2235 + 1.2236 + W(" Used stack traces: %10s bytes\n", 1.2237 + Show(sizes.mStackTracesUsed, gBuf1, kBufLen)); 1.2238 + 1.2239 + W(" Unused stack traces: %10s bytes\n", 1.2240 + Show(sizes.mStackTracesUnused, gBuf1, kBufLen)); 1.2241 + 1.2242 + W(" Stack trace table: %10s bytes (%s entries, %s used)\n", 1.2243 + Show(sizes.mStackTraceTable, gBuf1, kBufLen), 1.2244 + Show(gStackTraceTable->capacity(), gBuf2, kBufLen), 1.2245 + Show(gStackTraceTable->count(), gBuf3, kBufLen)); 1.2246 + 1.2247 + W(" Block table: %10s bytes (%s entries, %s used)\n", 1.2248 + Show(sizes.mBlockTable, gBuf1, kBufLen), 1.2249 + Show(gBlockTable->capacity(), gBuf2, kBufLen), 1.2250 + Show(gBlockTable->count(), gBuf3, kBufLen)); 1.2251 + 1.2252 + W("\nData structures that are destroyed after Dump() ends:\n"); 1.2253 + 1.2254 + size_t unreportedSize = 1.2255 + unreportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf); 1.2256 + W(" Unreported table: %10s bytes (%s entries, %s used)\n", 1.2257 + Show(unreportedSize, gBuf1, kBufLen), 1.2258 + Show(unreportedTraceRecordTable.capacity(), gBuf2, kBufLen), 1.2259 + Show(unreportedTraceRecordTable.count(), gBuf3, kBufLen)); 1.2260 + 1.2261 + size_t onceReportedSize = 1.2262 + onceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf); 1.2263 + W(" Once-reported table: %10s bytes (%s entries, %s used)\n", 1.2264 + Show(onceReportedSize, gBuf1, kBufLen), 1.2265 + Show(onceReportedTraceRecordTable.capacity(), gBuf2, kBufLen), 1.2266 + Show(onceReportedTraceRecordTable.count(), gBuf3, kBufLen)); 1.2267 + 1.2268 + size_t twiceReportedSize = 1.2269 + twiceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf); 1.2270 + W(" Twice-reported table: %10s bytes (%s entries, %s used)\n", 1.2271 + Show(twiceReportedSize, gBuf1, kBufLen), 1.2272 + Show(twiceReportedTraceRecordTable.capacity(), gBuf2, kBufLen), 1.2273 + Show(twiceReportedTraceRecordTable.count(), gBuf3, kBufLen)); 1.2274 + 1.2275 + W(" Location service: %10s bytes\n", 1.2276 + Show(locService->SizeOfIncludingThis(), gBuf1, kBufLen)); 1.2277 + 1.2278 + W("\nCounts:\n"); 1.2279 + 1.2280 + size_t hits = locService->NumCacheHits(); 1.2281 + size_t misses = locService->NumCacheMisses(); 1.2282 + size_t requests = hits + misses; 1.2283 + W(" Location service: %10s requests\n", 1.2284 + Show(requests, gBuf1, kBufLen)); 1.2285 + 1.2286 + size_t count = locService->CacheCount(); 1.2287 + size_t capacity = locService->CacheCapacity(); 1.2288 + W(" Location service cache: %4.1f%% hit rate, %.1f%% occupancy at end\n", 1.2289 + Percent(hits, requests), Percent(count, capacity)); 1.2290 + 1.2291 + W("\n"); 1.2292 + } 1.2293 + 1.2294 + InfallibleAllocPolicy::delete_(locService); 1.2295 + 1.2296 + ClearReportsInternal(); // Use internal version, we already have the lock. 1.2297 + 1.2298 + StatusMsg("}\n"); 1.2299 +} 1.2300 + 1.2301 +//--------------------------------------------------------------------------- 1.2302 +// Testing 1.2303 +//--------------------------------------------------------------------------- 1.2304 + 1.2305 +// This function checks that heap blocks that have the same stack trace but 1.2306 +// different (or no) reporters get aggregated separately. 1.2307 +void foo() 1.2308 +{ 1.2309 + char* a[6]; 1.2310 + for (int i = 0; i < 6; i++) { 1.2311 + a[i] = (char*) malloc(128 - 16*i); 1.2312 + } 1.2313 + 1.2314 + for (int i = 0; i <= 1; i++) 1.2315 + Report(a[i]); // reported 1.2316 + Report(a[2]); // reported 1.2317 + Report(a[3]); // reported 1.2318 + // a[4], a[5] unreported 1.2319 +} 1.2320 + 1.2321 +// This stops otherwise-unused variables from being optimized away. 1.2322 +static void 1.2323 +UseItOrLoseIt(void* a) 1.2324 +{ 1.2325 + char buf[64]; 1.2326 + sprintf(buf, "%p\n", a); 1.2327 + fwrite(buf, 1, strlen(buf) + 1, stderr); 1.2328 +} 1.2329 + 1.2330 +// The output from this should be compared against test-expected.dmd. It's 1.2331 +// been tested on Linux64, and probably will give different results on other 1.2332 +// platforms. 1.2333 +static void 1.2334 +RunTestMode(FILE* fp) 1.2335 +{ 1.2336 + Writer writer(FpWrite, fp); 1.2337 + 1.2338 + // The first part of this test requires sampling to be disabled. 1.2339 + gOptions->SetSampleBelowSize(1); 1.2340 + 1.2341 + // Dump 1. Zero for everything. 1.2342 + Dump(writer); 1.2343 + 1.2344 + // Dump 2: 1 freed, 9 out of 10 unreported. 1.2345 + // Dump 3: still present and unreported. 1.2346 + int i; 1.2347 + char* a; 1.2348 + for (i = 0; i < 10; i++) { 1.2349 + a = (char*) malloc(100); 1.2350 + UseItOrLoseIt(a); 1.2351 + } 1.2352 + free(a); 1.2353 + 1.2354 + // Min-sized block. 1.2355 + // Dump 2: reported. 1.2356 + // Dump 3: thrice-reported. 1.2357 + char* a2 = (char*) malloc(0); 1.2358 + Report(a2); 1.2359 + 1.2360 + // Operator new[]. 1.2361 + // Dump 2: reported. 1.2362 + // Dump 3: reportedness carries over, due to ReportOnAlloc. 1.2363 + char* b = new char[10]; 1.2364 + ReportOnAlloc(b); 1.2365 + 1.2366 + // ReportOnAlloc, then freed. 1.2367 + // Dump 2: freed, irrelevant. 1.2368 + // Dump 3: freed, irrelevant. 1.2369 + char* b2 = new char; 1.2370 + ReportOnAlloc(b2); 1.2371 + free(b2); 1.2372 + 1.2373 + // Dump 2: reported 4 times. 1.2374 + // Dump 3: freed, irrelevant. 1.2375 + char* c = (char*) calloc(10, 3); 1.2376 + Report(c); 1.2377 + for (int i = 0; i < 3; i++) { 1.2378 + Report(c); 1.2379 + } 1.2380 + 1.2381 + // Dump 2: ignored. 1.2382 + // Dump 3: irrelevant. 1.2383 + Report((void*)(intptr_t)i); 1.2384 + 1.2385 + // jemalloc rounds this up to 8192. 1.2386 + // Dump 2: reported. 1.2387 + // Dump 3: freed. 1.2388 + char* e = (char*) malloc(4096); 1.2389 + e = (char*) realloc(e, 4097); 1.2390 + Report(e); 1.2391 + 1.2392 + // First realloc is like malloc; second realloc is shrinking. 1.2393 + // Dump 2: reported. 1.2394 + // Dump 3: re-reported. 1.2395 + char* e2 = (char*) realloc(nullptr, 1024); 1.2396 + e2 = (char*) realloc(e2, 512); 1.2397 + Report(e2); 1.2398 + 1.2399 + // First realloc is like malloc; second realloc creates a min-sized block. 1.2400 + // XXX: on Windows, second realloc frees the block. 1.2401 + // Dump 2: reported. 1.2402 + // Dump 3: freed, irrelevant. 1.2403 + char* e3 = (char*) realloc(nullptr, 1023); 1.2404 +//e3 = (char*) realloc(e3, 0); 1.2405 + MOZ_ASSERT(e3); 1.2406 + Report(e3); 1.2407 + 1.2408 + // Dump 2: freed, irrelevant. 1.2409 + // Dump 3: freed, irrelevant. 1.2410 + char* f = (char*) malloc(64); 1.2411 + free(f); 1.2412 + 1.2413 + // Dump 2: ignored. 1.2414 + // Dump 3: irrelevant. 1.2415 + Report((void*)(intptr_t)0x0); 1.2416 + 1.2417 + // Dump 2: mixture of reported and unreported. 1.2418 + // Dump 3: all unreported. 1.2419 + foo(); 1.2420 + foo(); 1.2421 + 1.2422 + // Dump 2: twice-reported. 1.2423 + // Dump 3: twice-reported. 1.2424 + char* g1 = (char*) malloc(77); 1.2425 + ReportOnAlloc(g1); 1.2426 + ReportOnAlloc(g1); 1.2427 + 1.2428 + // Dump 2: twice-reported. 1.2429 + // Dump 3: once-reported. 1.2430 + char* g2 = (char*) malloc(78); 1.2431 + Report(g2); 1.2432 + ReportOnAlloc(g2); 1.2433 + 1.2434 + // Dump 2: twice-reported. 1.2435 + // Dump 3: once-reported. 1.2436 + char* g3 = (char*) malloc(79); 1.2437 + ReportOnAlloc(g3); 1.2438 + Report(g3); 1.2439 + 1.2440 + // All the odd-ball ones. 1.2441 + // Dump 2: all unreported. 1.2442 + // Dump 3: all freed, irrelevant. 1.2443 + // XXX: no memalign on Mac 1.2444 +//void* x = memalign(64, 65); // rounds up to 128 1.2445 +//UseItOrLoseIt(x); 1.2446 + // XXX: posix_memalign doesn't work on B2G 1.2447 +//void* y; 1.2448 +//posix_memalign(&y, 128, 129); // rounds up to 256 1.2449 +//UseItOrLoseIt(y); 1.2450 + // XXX: valloc doesn't work on Windows. 1.2451 +//void* z = valloc(1); // rounds up to 4096 1.2452 +//UseItOrLoseIt(z); 1.2453 +//aligned_alloc(64, 256); // XXX: C11 only 1.2454 + 1.2455 + // Dump 2. 1.2456 + Dump(writer); 1.2457 + 1.2458 + //--------- 1.2459 + 1.2460 + Report(a2); 1.2461 + Report(a2); 1.2462 + free(c); 1.2463 + free(e); 1.2464 + Report(e2); 1.2465 + free(e3); 1.2466 +//free(x); 1.2467 +//free(y); 1.2468 +//free(z); 1.2469 + 1.2470 + // Dump 3. 1.2471 + Dump(writer); 1.2472 + 1.2473 + //--------- 1.2474 + 1.2475 + // Clear all knowledge of existing blocks to give us a clean slate. 1.2476 + gBlockTable->clear(); 1.2477 + 1.2478 + gOptions->SetSampleBelowSize(128); 1.2479 + 1.2480 + char* s; 1.2481 + 1.2482 + // This equals the sample size, and so is reported exactly. It should be 1.2483 + // listed before records of the same size that are sampled. 1.2484 + s = (char*) malloc(128); 1.2485 + UseItOrLoseIt(s); 1.2486 + 1.2487 + // This exceeds the sample size, and so is reported exactly. 1.2488 + s = (char*) malloc(144); 1.2489 + UseItOrLoseIt(s); 1.2490 + 1.2491 + // These together constitute exactly one sample. 1.2492 + for (int i = 0; i < 16; i++) { 1.2493 + s = (char*) malloc(8); 1.2494 + UseItOrLoseIt(s); 1.2495 + } 1.2496 + MOZ_ASSERT(gSmallBlockActualSizeCounter == 0); 1.2497 + 1.2498 + // These fall 8 bytes short of a full sample. 1.2499 + for (int i = 0; i < 15; i++) { 1.2500 + s = (char*) malloc(8); 1.2501 + UseItOrLoseIt(s); 1.2502 + } 1.2503 + MOZ_ASSERT(gSmallBlockActualSizeCounter == 120); 1.2504 + 1.2505 + // This exceeds the sample size, and so is recorded exactly. 1.2506 + s = (char*) malloc(256); 1.2507 + UseItOrLoseIt(s); 1.2508 + MOZ_ASSERT(gSmallBlockActualSizeCounter == 120); 1.2509 + 1.2510 + // This gets more than to a full sample from the |i < 15| loop above. 1.2511 + s = (char*) malloc(96); 1.2512 + UseItOrLoseIt(s); 1.2513 + MOZ_ASSERT(gSmallBlockActualSizeCounter == 88); 1.2514 + 1.2515 + // This gets to another full sample. 1.2516 + for (int i = 0; i < 5; i++) { 1.2517 + s = (char*) malloc(8); 1.2518 + UseItOrLoseIt(s); 1.2519 + } 1.2520 + MOZ_ASSERT(gSmallBlockActualSizeCounter == 0); 1.2521 + 1.2522 + // This allocates 16, 32, ..., 128 bytes, which results in a stack trace 1.2523 + // record that contains a mix of sample and non-sampled blocks, and so should 1.2524 + // be printed with '~' signs. 1.2525 + for (int i = 1; i <= 8; i++) { 1.2526 + s = (char*) malloc(i * 16); 1.2527 + UseItOrLoseIt(s); 1.2528 + } 1.2529 + MOZ_ASSERT(gSmallBlockActualSizeCounter == 64); 1.2530 + 1.2531 + // At the end we're 64 bytes into the current sample so we report ~1,424 1.2532 + // bytes of allocation overall, which is 64 less than the real value 1,488. 1.2533 + 1.2534 + // Dump 4. 1.2535 + Dump(writer); 1.2536 +} 1.2537 + 1.2538 +//--------------------------------------------------------------------------- 1.2539 +// Stress testing microbenchmark 1.2540 +//--------------------------------------------------------------------------- 1.2541 + 1.2542 +// This stops otherwise-unused variables from being optimized away. 1.2543 +static void 1.2544 +UseItOrLoseIt2(void* a) 1.2545 +{ 1.2546 + if (a == (void*)0x42) { 1.2547 + printf("UseItOrLoseIt2\n"); 1.2548 + } 1.2549 +} 1.2550 + 1.2551 +MOZ_NEVER_INLINE static void 1.2552 +stress5() 1.2553 +{ 1.2554 + for (int i = 0; i < 10; i++) { 1.2555 + void* x = malloc(64); 1.2556 + UseItOrLoseIt2(x); 1.2557 + if (i & 1) { 1.2558 + free(x); 1.2559 + } 1.2560 + } 1.2561 +} 1.2562 + 1.2563 +MOZ_NEVER_INLINE static void 1.2564 +stress4() 1.2565 +{ 1.2566 + stress5(); stress5(); stress5(); stress5(); stress5(); 1.2567 + stress5(); stress5(); stress5(); stress5(); stress5(); 1.2568 +} 1.2569 + 1.2570 +MOZ_NEVER_INLINE static void 1.2571 +stress3() 1.2572 +{ 1.2573 + for (int i = 0; i < 10; i++) { 1.2574 + stress4(); 1.2575 + } 1.2576 +} 1.2577 + 1.2578 +MOZ_NEVER_INLINE static void 1.2579 +stress2() 1.2580 +{ 1.2581 + stress3(); stress3(); stress3(); stress3(); stress3(); 1.2582 + stress3(); stress3(); stress3(); stress3(); stress3(); 1.2583 +} 1.2584 + 1.2585 +MOZ_NEVER_INLINE static void 1.2586 +stress1() 1.2587 +{ 1.2588 + for (int i = 0; i < 10; i++) { 1.2589 + stress2(); 1.2590 + } 1.2591 +} 1.2592 + 1.2593 +// This stress test does lots of allocations and frees, which is where most of 1.2594 +// DMD's overhead occurs. It allocates 1,000,000 64-byte blocks, spread evenly 1.2595 +// across 1,000 distinct stack traces. It frees every second one immediately 1.2596 +// after allocating it. 1.2597 +// 1.2598 +// It's highly artificial, but it's deterministic and easy to run. It can be 1.2599 +// timed under different conditions to glean performance data. 1.2600 +static void 1.2601 +RunStressMode(FILE* fp) 1.2602 +{ 1.2603 + Writer writer(FpWrite, fp); 1.2604 + 1.2605 + // Disable sampling for maximum stress. 1.2606 + gOptions->SetSampleBelowSize(1); 1.2607 + 1.2608 + stress1(); stress1(); stress1(); stress1(); stress1(); 1.2609 + stress1(); stress1(); stress1(); stress1(); stress1(); 1.2610 + 1.2611 + Dump(writer); 1.2612 +} 1.2613 + 1.2614 +} // namespace dmd 1.2615 +} // namespace mozilla