michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set ts=8 sts=2 et sw=2 tw=80: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "DMD.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #ifdef XP_WIN michael@0: #if defined(MOZ_OPTIMIZE) && !defined(MOZ_PROFILING) michael@0: #error "Optimized, DMD-enabled builds on Windows must be built with --enable-profiling" michael@0: #endif michael@0: #include michael@0: #include michael@0: #else michael@0: #include michael@0: #endif michael@0: michael@0: #ifdef ANDROID michael@0: #include michael@0: #endif michael@0: michael@0: #include "nscore.h" michael@0: #include "nsStackWalk.h" michael@0: michael@0: #include "js/HashTable.h" michael@0: #include "js/Vector.h" michael@0: michael@0: #include "mozilla/Assertions.h" michael@0: #include "mozilla/HashFunctions.h" michael@0: #include "mozilla/Likely.h" michael@0: #include "mozilla/MemoryReporting.h" michael@0: michael@0: // MOZ_REPLACE_ONLY_MEMALIGN saves us from having to define michael@0: // replace_{posix_memalign,aligned_alloc,valloc}. It requires defining michael@0: // PAGE_SIZE. Nb: sysconf() is expensive, but it's only used for (the obsolete michael@0: // and rarely used) valloc. michael@0: #define MOZ_REPLACE_ONLY_MEMALIGN 1 michael@0: #ifdef XP_WIN michael@0: #define PAGE_SIZE GetPageSize() michael@0: static long GetPageSize() michael@0: { michael@0: SYSTEM_INFO si; michael@0: GetSystemInfo(&si); michael@0: return si.dwPageSize; michael@0: } michael@0: #else michael@0: #define PAGE_SIZE sysconf(_SC_PAGESIZE) michael@0: #endif michael@0: #include "replace_malloc.h" michael@0: #undef MOZ_REPLACE_ONLY_MEMALIGN michael@0: #undef PAGE_SIZE michael@0: michael@0: namespace mozilla { michael@0: namespace dmd { michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // Utilities michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: #ifndef DISALLOW_COPY_AND_ASSIGN michael@0: #define DISALLOW_COPY_AND_ASSIGN(T) \ michael@0: T(const T&); \ michael@0: void operator=(const T&) michael@0: #endif michael@0: michael@0: static const malloc_table_t* gMallocTable = nullptr; michael@0: michael@0: // This enables/disables DMD. michael@0: static bool gIsDMDRunning = false; michael@0: michael@0: // This provides infallible allocations (they abort on OOM). We use it for all michael@0: // of DMD's own allocations, which fall into the following three cases. michael@0: // - Direct allocations (the easy case). michael@0: // - Indirect allocations in js::{Vector,HashSet,HashMap} -- this class serves michael@0: // as their AllocPolicy. michael@0: // - Other indirect allocations (e.g. NS_StackWalk) -- see the comments on michael@0: // Thread::mBlockIntercepts and in replace_malloc for how these work. michael@0: // michael@0: class InfallibleAllocPolicy michael@0: { michael@0: static void ExitOnFailure(const void* aP); michael@0: michael@0: public: michael@0: static void* malloc_(size_t aSize) michael@0: { michael@0: void* p = gMallocTable->malloc(aSize); michael@0: ExitOnFailure(p); michael@0: return p; michael@0: } michael@0: michael@0: static void* calloc_(size_t aSize) michael@0: { michael@0: void* p = gMallocTable->calloc(1, aSize); michael@0: ExitOnFailure(p); michael@0: return p; michael@0: } michael@0: michael@0: // This realloc_ is the one we use for direct reallocs within DMD. michael@0: static void* realloc_(void* aPtr, size_t aNewSize) michael@0: { michael@0: void* p = gMallocTable->realloc(aPtr, aNewSize); michael@0: ExitOnFailure(p); michael@0: return p; michael@0: } michael@0: michael@0: // This realloc_ is required for this to be a JS container AllocPolicy. michael@0: static void* realloc_(void* aPtr, size_t aOldSize, size_t aNewSize) michael@0: { michael@0: return InfallibleAllocPolicy::realloc_(aPtr, aNewSize); michael@0: } michael@0: michael@0: static void* memalign_(size_t aAlignment, size_t aSize) michael@0: { michael@0: void* p = gMallocTable->memalign(aAlignment, aSize); michael@0: ExitOnFailure(p); michael@0: return p; michael@0: } michael@0: michael@0: static void free_(void* aPtr) { gMallocTable->free(aPtr); } michael@0: michael@0: static char* strdup_(const char* aStr) michael@0: { michael@0: char* s = (char*) InfallibleAllocPolicy::malloc_(strlen(aStr) + 1); michael@0: strcpy(s, aStr); michael@0: return s; michael@0: } michael@0: michael@0: template michael@0: static T* new_() michael@0: { michael@0: void* mem = malloc_(sizeof(T)); michael@0: ExitOnFailure(mem); michael@0: return new (mem) T; michael@0: } michael@0: michael@0: template michael@0: static T* new_(P1 p1) michael@0: { michael@0: void* mem = malloc_(sizeof(T)); michael@0: ExitOnFailure(mem); michael@0: return new (mem) T(p1); michael@0: } michael@0: michael@0: template michael@0: static void delete_(T *p) michael@0: { michael@0: if (p) { michael@0: p->~T(); michael@0: InfallibleAllocPolicy::free_(p); michael@0: } michael@0: } michael@0: michael@0: static void reportAllocOverflow() { ExitOnFailure(nullptr); } michael@0: }; michael@0: michael@0: // This is only needed because of the |const void*| vs |void*| arg mismatch. michael@0: static size_t michael@0: MallocSizeOf(const void* aPtr) michael@0: { michael@0: return gMallocTable->malloc_usable_size(const_cast(aPtr)); michael@0: } michael@0: michael@0: static void michael@0: StatusMsg(const char* aFmt, ...) michael@0: { michael@0: va_list ap; michael@0: va_start(ap, aFmt); michael@0: #ifdef ANDROID michael@0: __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, ap); michael@0: #else michael@0: // The +64 is easily enough for the "DMD[] " prefix and the NUL. michael@0: char* fmt = (char*) InfallibleAllocPolicy::malloc_(strlen(aFmt) + 64); michael@0: sprintf(fmt, "DMD[%d] %s", getpid(), aFmt); michael@0: vfprintf(stderr, fmt, ap); michael@0: InfallibleAllocPolicy::free_(fmt); michael@0: #endif michael@0: va_end(ap); michael@0: } michael@0: michael@0: /* static */ void michael@0: InfallibleAllocPolicy::ExitOnFailure(const void* aP) michael@0: { michael@0: if (!aP) { michael@0: StatusMsg("out of memory; aborting\n"); michael@0: MOZ_CRASH(); michael@0: } michael@0: } michael@0: michael@0: void michael@0: Writer::Write(const char* aFmt, ...) const michael@0: { michael@0: va_list ap; michael@0: va_start(ap, aFmt); michael@0: mWriterFun(mWriteState, aFmt, ap); michael@0: va_end(ap); michael@0: } michael@0: michael@0: #define W(...) aWriter.Write(__VA_ARGS__); michael@0: michael@0: #define WriteTitle(...) \ michael@0: W("------------------------------------------------------------------\n"); \ michael@0: W(__VA_ARGS__); \ michael@0: W("------------------------------------------------------------------\n\n"); michael@0: michael@0: MOZ_EXPORT void michael@0: FpWrite(void* aWriteState, const char* aFmt, va_list aAp) michael@0: { michael@0: FILE* fp = static_cast(aWriteState); michael@0: vfprintf(fp, aFmt, aAp); michael@0: } michael@0: michael@0: static double michael@0: Percent(size_t part, size_t whole) michael@0: { michael@0: return (whole == 0) ? 0 : 100 * (double)part / whole; michael@0: } michael@0: michael@0: // Commifies the number and prepends a '~' if requested. Best used with michael@0: // |kBufLen| and |gBuf[1234]|, because they should be big enough for any number michael@0: // we'll see. michael@0: static char* michael@0: Show(size_t n, char* buf, size_t buflen, bool addTilde = false) michael@0: { michael@0: int nc = 0, i = 0, lasti = buflen - 2; michael@0: buf[lasti + 1] = '\0'; michael@0: if (n == 0) { michael@0: buf[lasti - i] = '0'; michael@0: i++; michael@0: } else { michael@0: while (n > 0) { michael@0: if (((i - nc) % 3) == 0 && i != 0) { michael@0: buf[lasti - i] = ','; michael@0: i++; michael@0: nc++; michael@0: } michael@0: buf[lasti - i] = static_cast((n % 10) + '0'); michael@0: i++; michael@0: n /= 10; michael@0: } michael@0: } michael@0: int firstCharIndex = lasti - i + 1; michael@0: michael@0: if (addTilde) { michael@0: firstCharIndex--; michael@0: buf[firstCharIndex] = '~'; michael@0: } michael@0: michael@0: MOZ_ASSERT(firstCharIndex >= 0); michael@0: return &buf[firstCharIndex]; michael@0: } michael@0: michael@0: static const char* michael@0: Plural(size_t aN) michael@0: { michael@0: return aN == 1 ? "" : "s"; michael@0: } michael@0: michael@0: // Used by calls to Show(). michael@0: static const size_t kBufLen = 64; michael@0: static char gBuf1[kBufLen]; michael@0: static char gBuf2[kBufLen]; michael@0: static char gBuf3[kBufLen]; michael@0: static char gBuf4[kBufLen]; michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // Options (Part 1) michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: class Options michael@0: { michael@0: template michael@0: struct NumOption michael@0: { michael@0: const T mDefault; michael@0: const T mMax; michael@0: T mActual; michael@0: NumOption(T aDefault, T aMax) michael@0: : mDefault(aDefault), mMax(aMax), mActual(aDefault) michael@0: {} michael@0: }; michael@0: michael@0: enum Mode { michael@0: Normal, // run normally michael@0: Test, // do some basic correctness tests michael@0: Stress // do some performance stress tests michael@0: }; michael@0: michael@0: char* mDMDEnvVar; // a saved copy, for printing during Dump() michael@0: michael@0: NumOption mSampleBelowSize; michael@0: NumOption mMaxFrames; michael@0: NumOption mMaxRecords; michael@0: Mode mMode; michael@0: michael@0: void BadArg(const char* aArg); michael@0: static const char* ValueIfMatch(const char* aArg, const char* aOptionName); michael@0: static bool GetLong(const char* aArg, const char* aOptionName, michael@0: long aMin, long aMax, long* aN); michael@0: michael@0: public: michael@0: Options(const char* aDMDEnvVar); michael@0: michael@0: const char* DMDEnvVar() const { return mDMDEnvVar; } michael@0: michael@0: size_t SampleBelowSize() const { return mSampleBelowSize.mActual; } michael@0: size_t MaxFrames() const { return mMaxFrames.mActual; } michael@0: size_t MaxRecords() const { return mMaxRecords.mActual; } michael@0: michael@0: void SetSampleBelowSize(size_t aN) { mSampleBelowSize.mActual = aN; } michael@0: michael@0: bool IsTestMode() const { return mMode == Test; } michael@0: bool IsStressMode() const { return mMode == Stress; } michael@0: }; michael@0: michael@0: static Options *gOptions; michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // The global lock michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: // MutexBase implements the platform-specific parts of a mutex. michael@0: michael@0: #ifdef XP_WIN michael@0: michael@0: class MutexBase michael@0: { michael@0: CRITICAL_SECTION mCS; michael@0: michael@0: DISALLOW_COPY_AND_ASSIGN(MutexBase); michael@0: michael@0: public: michael@0: MutexBase() michael@0: { michael@0: InitializeCriticalSection(&mCS); michael@0: } michael@0: michael@0: ~MutexBase() michael@0: { michael@0: DeleteCriticalSection(&mCS); michael@0: } michael@0: michael@0: void Lock() michael@0: { michael@0: EnterCriticalSection(&mCS); michael@0: } michael@0: michael@0: void Unlock() michael@0: { michael@0: LeaveCriticalSection(&mCS); michael@0: } michael@0: }; michael@0: michael@0: #else michael@0: michael@0: #include michael@0: #include michael@0: michael@0: class MutexBase michael@0: { michael@0: pthread_mutex_t mMutex; michael@0: michael@0: DISALLOW_COPY_AND_ASSIGN(MutexBase); michael@0: michael@0: public: michael@0: MutexBase() michael@0: { michael@0: pthread_mutex_init(&mMutex, nullptr); michael@0: } michael@0: michael@0: void Lock() michael@0: { michael@0: pthread_mutex_lock(&mMutex); michael@0: } michael@0: michael@0: void Unlock() michael@0: { michael@0: pthread_mutex_unlock(&mMutex); michael@0: } michael@0: }; michael@0: michael@0: #endif michael@0: michael@0: class Mutex : private MutexBase michael@0: { michael@0: bool mIsLocked; michael@0: michael@0: DISALLOW_COPY_AND_ASSIGN(Mutex); michael@0: michael@0: public: michael@0: Mutex() michael@0: : mIsLocked(false) michael@0: {} michael@0: michael@0: void Lock() michael@0: { michael@0: MutexBase::Lock(); michael@0: MOZ_ASSERT(!mIsLocked); michael@0: mIsLocked = true; michael@0: } michael@0: michael@0: void Unlock() michael@0: { michael@0: MOZ_ASSERT(mIsLocked); michael@0: mIsLocked = false; michael@0: MutexBase::Unlock(); michael@0: } michael@0: michael@0: bool IsLocked() michael@0: { michael@0: return mIsLocked; michael@0: } michael@0: }; michael@0: michael@0: // This lock must be held while manipulating global state, such as michael@0: // gStackTraceTable, gBlockTable, etc. michael@0: static Mutex* gStateLock = nullptr; michael@0: michael@0: class AutoLockState michael@0: { michael@0: DISALLOW_COPY_AND_ASSIGN(AutoLockState); michael@0: michael@0: public: michael@0: AutoLockState() michael@0: { michael@0: gStateLock->Lock(); michael@0: } michael@0: ~AutoLockState() michael@0: { michael@0: gStateLock->Unlock(); michael@0: } michael@0: }; michael@0: michael@0: class AutoUnlockState michael@0: { michael@0: DISALLOW_COPY_AND_ASSIGN(AutoUnlockState); michael@0: michael@0: public: michael@0: AutoUnlockState() michael@0: { michael@0: gStateLock->Unlock(); michael@0: } michael@0: ~AutoUnlockState() michael@0: { michael@0: gStateLock->Lock(); michael@0: } michael@0: }; michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // Thread-local storage and blocking of intercepts michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: #ifdef XP_WIN michael@0: michael@0: #define DMD_TLS_INDEX_TYPE DWORD michael@0: #define DMD_CREATE_TLS_INDEX(i_) do { \ michael@0: (i_) = TlsAlloc(); \ michael@0: } while (0) michael@0: #define DMD_DESTROY_TLS_INDEX(i_) TlsFree((i_)) michael@0: #define DMD_GET_TLS_DATA(i_) TlsGetValue((i_)) michael@0: #define DMD_SET_TLS_DATA(i_, v_) TlsSetValue((i_), (v_)) michael@0: michael@0: #else michael@0: michael@0: #include michael@0: michael@0: #define DMD_TLS_INDEX_TYPE pthread_key_t michael@0: #define DMD_CREATE_TLS_INDEX(i_) pthread_key_create(&(i_), nullptr) michael@0: #define DMD_DESTROY_TLS_INDEX(i_) pthread_key_delete((i_)) michael@0: #define DMD_GET_TLS_DATA(i_) pthread_getspecific((i_)) michael@0: #define DMD_SET_TLS_DATA(i_, v_) pthread_setspecific((i_), (v_)) michael@0: michael@0: #endif michael@0: michael@0: static DMD_TLS_INDEX_TYPE gTlsIndex; michael@0: michael@0: class Thread michael@0: { michael@0: // Required for allocation via InfallibleAllocPolicy::new_. michael@0: friend class InfallibleAllocPolicy; michael@0: michael@0: // When true, this blocks intercepts, which allows malloc interception michael@0: // functions to themselves call malloc. (Nb: for direct calls to malloc we michael@0: // can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes michael@0: // indirectly call vanilla malloc via functions like NS_StackWalk.) michael@0: bool mBlockIntercepts; michael@0: michael@0: Thread() michael@0: : mBlockIntercepts(false) michael@0: {} michael@0: michael@0: DISALLOW_COPY_AND_ASSIGN(Thread); michael@0: michael@0: public: michael@0: static Thread* Fetch(); michael@0: michael@0: bool BlockIntercepts() michael@0: { michael@0: MOZ_ASSERT(!mBlockIntercepts); michael@0: return mBlockIntercepts = true; michael@0: } michael@0: michael@0: bool UnblockIntercepts() michael@0: { michael@0: MOZ_ASSERT(mBlockIntercepts); michael@0: return mBlockIntercepts = false; michael@0: } michael@0: michael@0: bool InterceptsAreBlocked() const michael@0: { michael@0: return mBlockIntercepts; michael@0: } michael@0: }; michael@0: michael@0: /* static */ Thread* michael@0: Thread::Fetch() michael@0: { michael@0: Thread* t = static_cast(DMD_GET_TLS_DATA(gTlsIndex)); michael@0: michael@0: if (MOZ_UNLIKELY(!t)) { michael@0: // This memory is never freed, even if the thread dies. It's a leak, but michael@0: // only a tiny one. michael@0: t = InfallibleAllocPolicy::new_(); michael@0: DMD_SET_TLS_DATA(gTlsIndex, t); michael@0: } michael@0: michael@0: return t; michael@0: } michael@0: michael@0: // An object of this class must be created (on the stack) before running any michael@0: // code that might allocate. michael@0: class AutoBlockIntercepts michael@0: { michael@0: Thread* const mT; michael@0: michael@0: DISALLOW_COPY_AND_ASSIGN(AutoBlockIntercepts); michael@0: michael@0: public: michael@0: AutoBlockIntercepts(Thread* aT) michael@0: : mT(aT) michael@0: { michael@0: mT->BlockIntercepts(); michael@0: } michael@0: ~AutoBlockIntercepts() michael@0: { michael@0: MOZ_ASSERT(mT->InterceptsAreBlocked()); michael@0: mT->UnblockIntercepts(); michael@0: } michael@0: }; michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // Location service michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: // This class is used to print details about code locations. michael@0: class LocationService michael@0: { michael@0: // WriteLocation() is the key function in this class. It's basically a michael@0: // wrapper around NS_DescribeCodeAddress. michael@0: // michael@0: // However, NS_DescribeCodeAddress is very slow on some platforms, and we michael@0: // have lots of repeated (i.e. same PC) calls to it. So we do some caching michael@0: // of results. Each cached result includes two strings (|mFunction| and michael@0: // |mLibrary|), so we also optimize them for space in the following ways. michael@0: // michael@0: // - The number of distinct library names is small, e.g. a few dozen. There michael@0: // is lots of repetition, especially of libxul. So we intern them in their michael@0: // own table, which saves space over duplicating them for each cache entry. michael@0: // michael@0: // - The number of distinct function names is much higher, so we duplicate michael@0: // them in each cache entry. That's more space-efficient than interning michael@0: // because entries containing single-occurrence function names are quickly michael@0: // overwritten, and their copies released. In addition, empty function michael@0: // names are common, so we use nullptr to represent them compactly. michael@0: michael@0: struct StringHasher michael@0: { michael@0: typedef const char* Lookup; michael@0: michael@0: static uint32_t hash(const char* const& aS) michael@0: { michael@0: return HashString(aS); michael@0: } michael@0: michael@0: static bool match(const char* const& aA, const char* const& aB) michael@0: { michael@0: return strcmp(aA, aB) == 0; michael@0: } michael@0: }; michael@0: michael@0: typedef js::HashSet michael@0: StringTable; michael@0: michael@0: StringTable mLibraryStrings; michael@0: michael@0: struct Entry michael@0: { michael@0: const void* mPc; michael@0: char* mFunction; // owned by the Entry; may be null michael@0: const char* mLibrary; // owned by mLibraryStrings; never null michael@0: // in a non-empty entry is in use michael@0: ptrdiff_t mLOffset; michael@0: char* mFileName; // owned by the Entry; may be null michael@0: uint32_t mLineNo:31; michael@0: uint32_t mInUse:1; // is the entry used? michael@0: michael@0: Entry() michael@0: : mPc(0), mFunction(nullptr), mLibrary(nullptr), mLOffset(0), mFileName(nullptr), mLineNo(0), mInUse(0) michael@0: {} michael@0: michael@0: ~Entry() michael@0: { michael@0: // We don't free mLibrary because it's externally owned. michael@0: InfallibleAllocPolicy::free_(mFunction); michael@0: InfallibleAllocPolicy::free_(mFileName); michael@0: } michael@0: michael@0: void Replace(const void* aPc, const char* aFunction, michael@0: const char* aLibrary, ptrdiff_t aLOffset, michael@0: const char* aFileName, unsigned long aLineNo) michael@0: { michael@0: mPc = aPc; michael@0: michael@0: // Convert "" to nullptr. Otherwise, make a copy of the name. michael@0: InfallibleAllocPolicy::free_(mFunction); michael@0: mFunction = michael@0: !aFunction[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFunction); michael@0: InfallibleAllocPolicy::free_(mFileName); michael@0: mFileName = michael@0: !aFileName[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFileName); michael@0: michael@0: michael@0: mLibrary = aLibrary; michael@0: mLOffset = aLOffset; michael@0: mLineNo = aLineNo; michael@0: michael@0: mInUse = 1; michael@0: } michael@0: michael@0: size_t SizeOfExcludingThis() { michael@0: // Don't measure mLibrary because it's externally owned. michael@0: return MallocSizeOf(mFunction) + MallocSizeOf(mFileName); michael@0: } michael@0: }; michael@0: michael@0: // A direct-mapped cache. When doing a dump just after starting desktop michael@0: // Firefox (which is similar to dumping after a longer-running session, michael@0: // thanks to the limit on how many records we dump), a cache with 2^24 michael@0: // entries (which approximates an infinite-entry cache) has a ~91% hit rate. michael@0: // A cache with 2^12 entries has a ~83% hit rate, and takes up ~85 KiB (on michael@0: // 32-bit platforms) or ~150 KiB (on 64-bit platforms). michael@0: static const size_t kNumEntries = 1 << 12; michael@0: static const size_t kMask = kNumEntries - 1; michael@0: Entry mEntries[kNumEntries]; michael@0: michael@0: size_t mNumCacheHits; michael@0: size_t mNumCacheMisses; michael@0: michael@0: public: michael@0: LocationService() michael@0: : mEntries(), mNumCacheHits(0), mNumCacheMisses(0) michael@0: { michael@0: (void)mLibraryStrings.init(64); michael@0: } michael@0: michael@0: void WriteLocation(const Writer& aWriter, const void* aPc) michael@0: { michael@0: MOZ_ASSERT(gStateLock->IsLocked()); michael@0: michael@0: uint32_t index = HashGeneric(aPc) & kMask; michael@0: MOZ_ASSERT(index < kNumEntries); michael@0: Entry& entry = mEntries[index]; michael@0: michael@0: if (!entry.mInUse || entry.mPc != aPc) { michael@0: mNumCacheMisses++; michael@0: michael@0: // NS_DescribeCodeAddress can (on Linux) acquire a lock inside michael@0: // the shared library loader. Another thread might call malloc michael@0: // while holding that lock (when loading a shared library). So michael@0: // we have to exit gStateLock around this call. For details, see michael@0: // https://bugzilla.mozilla.org/show_bug.cgi?id=363334#c3 michael@0: nsCodeAddressDetails details; michael@0: { michael@0: AutoUnlockState unlock; michael@0: (void)NS_DescribeCodeAddress(const_cast(aPc), &details); michael@0: } michael@0: michael@0: // Intern the library name. michael@0: const char* library = nullptr; michael@0: StringTable::AddPtr p = mLibraryStrings.lookupForAdd(details.library); michael@0: if (!p) { michael@0: library = InfallibleAllocPolicy::strdup_(details.library); michael@0: (void)mLibraryStrings.add(p, library); michael@0: } else { michael@0: library = *p; michael@0: } michael@0: michael@0: entry.Replace(aPc, details.function, library, details.loffset, details.filename, details.lineno); michael@0: michael@0: } else { michael@0: mNumCacheHits++; michael@0: } michael@0: michael@0: MOZ_ASSERT(entry.mPc == aPc); michael@0: michael@0: uintptr_t entryPc = (uintptr_t)(entry.mPc); michael@0: // Sometimes we get nothing useful. Just print "???" for the entire entry michael@0: // so that fix-linux-stack.pl doesn't complain about an empty filename. michael@0: if (!entry.mFunction && !entry.mLibrary[0] && entry.mLOffset == 0) { michael@0: W(" ??? 0x%x\n", entryPc); michael@0: } else { michael@0: // Use "???" for unknown functions. michael@0: const char* entryFunction = entry.mFunction ? entry.mFunction : "???"; michael@0: if (entry.mFileName) { michael@0: // On Windows we can get the filename and line number at runtime. michael@0: W(" %s (%s:%lu) 0x%x\n", michael@0: entryFunction, entry.mFileName, entry.mLineNo, entryPc); michael@0: } else { michael@0: // On Linux and Mac we cannot get the filename and line number at michael@0: // runtime, so we print the offset in a form that fix-linux-stack.pl and michael@0: // fix_macosx_stack.py can post-process. michael@0: W(" %s[%s +0x%X] 0x%x\n", michael@0: entryFunction, entry.mLibrary, entry.mLOffset, entryPc); michael@0: } michael@0: } michael@0: } michael@0: michael@0: size_t SizeOfIncludingThis() michael@0: { michael@0: size_t n = MallocSizeOf(this); michael@0: for (uint32_t i = 0; i < kNumEntries; i++) { michael@0: n += mEntries[i].SizeOfExcludingThis(); michael@0: } michael@0: michael@0: n += mLibraryStrings.sizeOfExcludingThis(MallocSizeOf); michael@0: for (StringTable::Range r = mLibraryStrings.all(); michael@0: !r.empty(); michael@0: r.popFront()) { michael@0: n += MallocSizeOf(r.front()); michael@0: } michael@0: michael@0: return n; michael@0: } michael@0: michael@0: size_t CacheCapacity() const { return kNumEntries; } michael@0: michael@0: size_t CacheCount() const michael@0: { michael@0: size_t n = 0; michael@0: for (size_t i = 0; i < kNumEntries; i++) { michael@0: if (mEntries[i].mInUse) { michael@0: n++; michael@0: } michael@0: } michael@0: return n; michael@0: } michael@0: michael@0: size_t NumCacheHits() const { return mNumCacheHits; } michael@0: size_t NumCacheMisses() const { return mNumCacheMisses; } michael@0: }; michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // Stack traces michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: class StackTrace michael@0: { michael@0: public: michael@0: static const uint32_t MaxFrames = 24; michael@0: michael@0: private: michael@0: uint32_t mLength; // The number of PCs. michael@0: void* mPcs[MaxFrames]; // The PCs themselves. If --max-frames is less michael@0: // than 24, this array is bigger than necessary, michael@0: // but that case is unusual. michael@0: michael@0: public: michael@0: StackTrace() : mLength(0) {} michael@0: michael@0: uint32_t Length() const { return mLength; } michael@0: void* Pc(uint32_t i) const { MOZ_ASSERT(i < mLength); return mPcs[i]; } michael@0: michael@0: uint32_t Size() const { return mLength * sizeof(mPcs[0]); } michael@0: michael@0: // The stack trace returned by this function is interned in gStackTraceTable, michael@0: // and so is immortal and unmovable. michael@0: static const StackTrace* Get(Thread* aT); michael@0: michael@0: void Sort() michael@0: { michael@0: qsort(mPcs, mLength, sizeof(mPcs[0]), StackTrace::QsortCmp); michael@0: } michael@0: michael@0: void Print(const Writer& aWriter, LocationService* aLocService) const; michael@0: michael@0: // Hash policy. michael@0: michael@0: typedef StackTrace* Lookup; michael@0: michael@0: static uint32_t hash(const StackTrace* const& aSt) michael@0: { michael@0: return mozilla::HashBytes(aSt->mPcs, aSt->Size()); michael@0: } michael@0: michael@0: static bool match(const StackTrace* const& aA, michael@0: const StackTrace* const& aB) michael@0: { michael@0: return aA->mLength == aB->mLength && michael@0: memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0; michael@0: } michael@0: michael@0: private: michael@0: static void StackWalkCallback(void* aPc, void* aSp, void* aClosure) michael@0: { michael@0: StackTrace* st = (StackTrace*) aClosure; michael@0: MOZ_ASSERT(st->mLength < MaxFrames); michael@0: st->mPcs[st->mLength] = aPc; michael@0: st->mLength++; michael@0: } michael@0: michael@0: static int QsortCmp(const void* aA, const void* aB) michael@0: { michael@0: const void* const a = *static_cast(aA); michael@0: const void* const b = *static_cast(aB); michael@0: if (a < b) return -1; michael@0: if (a > b) return 1; michael@0: return 0; michael@0: } michael@0: }; michael@0: michael@0: typedef js::HashSet michael@0: StackTraceTable; michael@0: static StackTraceTable* gStackTraceTable = nullptr; michael@0: michael@0: // We won't GC the stack trace table until it this many elements. michael@0: static uint32_t gGCStackTraceTableWhenSizeExceeds = 4 * 1024; michael@0: michael@0: void michael@0: StackTrace::Print(const Writer& aWriter, LocationService* aLocService) const michael@0: { michael@0: if (mLength == 0) { michael@0: W(" (empty)\n"); // StackTrace::Get() must have failed michael@0: return; michael@0: } michael@0: michael@0: for (uint32_t i = 0; i < mLength; i++) { michael@0: aLocService->WriteLocation(aWriter, Pc(i)); michael@0: } michael@0: } michael@0: michael@0: /* static */ const StackTrace* michael@0: StackTrace::Get(Thread* aT) michael@0: { michael@0: MOZ_ASSERT(gStateLock->IsLocked()); michael@0: MOZ_ASSERT(aT->InterceptsAreBlocked()); michael@0: michael@0: // On Windows, NS_StackWalk can acquire a lock from the shared library michael@0: // loader. Another thread might call malloc while holding that lock (when michael@0: // loading a shared library). So we can't be in gStateLock during the call michael@0: // to NS_StackWalk. For details, see michael@0: // https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8 michael@0: // On Linux, something similar can happen; see bug 824340. michael@0: // So let's just release it on all platforms. michael@0: nsresult rv; michael@0: StackTrace tmp; michael@0: { michael@0: AutoUnlockState unlock; michael@0: uint32_t skipFrames = 2; michael@0: rv = NS_StackWalk(StackWalkCallback, skipFrames, michael@0: gOptions->MaxFrames(), &tmp, 0, nullptr); michael@0: } michael@0: michael@0: if (rv == NS_OK) { michael@0: // Handle the common case first. All is ok. Nothing to do. michael@0: } else if (rv == NS_ERROR_NOT_IMPLEMENTED || rv == NS_ERROR_FAILURE) { michael@0: tmp.mLength = 0; michael@0: } else if (rv == NS_ERROR_UNEXPECTED) { michael@0: // XXX: This |rv| only happens on Mac, and it indicates that we're handling michael@0: // a call to malloc that happened inside a mutex-handling function. Any michael@0: // attempt to create a semaphore (which can happen in printf) could michael@0: // deadlock. michael@0: // michael@0: // However, the most complex thing DMD does after Get() returns is to put michael@0: // something in a hash table, which might call michael@0: // InfallibleAllocPolicy::malloc_. I'm not yet sure if this needs special michael@0: // handling, hence the forced abort. Sorry. If you hit this, please file michael@0: // a bug and CC nnethercote. michael@0: MOZ_CRASH(); michael@0: } else { michael@0: MOZ_CRASH(); // should be impossible michael@0: } michael@0: michael@0: StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp); michael@0: if (!p) { michael@0: StackTrace* stnew = InfallibleAllocPolicy::new_(tmp); michael@0: (void)gStackTraceTable->add(p, stnew); michael@0: } michael@0: return *p; michael@0: } michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // Heap blocks michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: // This class combines a 2-byte-aligned pointer (i.e. one whose bottom bit michael@0: // is zero) with a 1-bit tag. michael@0: // michael@0: // |T| is the pointer type, e.g. |int*|, not the pointed-to type. This makes michael@0: // is easier to have const pointers, e.g. |TaggedPtr|. michael@0: template michael@0: class TaggedPtr michael@0: { michael@0: union michael@0: { michael@0: T mPtr; michael@0: uintptr_t mUint; michael@0: }; michael@0: michael@0: static const uintptr_t kTagMask = uintptr_t(0x1); michael@0: static const uintptr_t kPtrMask = ~kTagMask; michael@0: michael@0: static bool IsTwoByteAligned(T aPtr) michael@0: { michael@0: return (uintptr_t(aPtr) & kTagMask) == 0; michael@0: } michael@0: michael@0: public: michael@0: TaggedPtr() michael@0: : mPtr(nullptr) michael@0: {} michael@0: michael@0: TaggedPtr(T aPtr, bool aBool) michael@0: : mPtr(aPtr) michael@0: { michael@0: MOZ_ASSERT(IsTwoByteAligned(aPtr)); michael@0: uintptr_t tag = uintptr_t(aBool); michael@0: MOZ_ASSERT(tag <= kTagMask); michael@0: mUint |= (tag & kTagMask); michael@0: } michael@0: michael@0: void Set(T aPtr, bool aBool) michael@0: { michael@0: MOZ_ASSERT(IsTwoByteAligned(aPtr)); michael@0: mPtr = aPtr; michael@0: uintptr_t tag = uintptr_t(aBool); michael@0: MOZ_ASSERT(tag <= kTagMask); michael@0: mUint |= (tag & kTagMask); michael@0: } michael@0: michael@0: T Ptr() const { return reinterpret_cast(mUint & kPtrMask); } michael@0: michael@0: bool Tag() const { return bool(mUint & kTagMask); } michael@0: }; michael@0: michael@0: // A live heap block. michael@0: class Block michael@0: { michael@0: const void* mPtr; michael@0: const size_t mReqSize; // size requested michael@0: michael@0: // Ptr: |mAllocStackTrace| - stack trace where this block was allocated. michael@0: // Tag bit 0: |mSampled| - was this block sampled? (if so, slop == 0). michael@0: TaggedPtr michael@0: mAllocStackTrace_mSampled; michael@0: michael@0: // This array has two elements because we record at most two reports of a michael@0: // block. michael@0: // - Ptr: |mReportStackTrace| - stack trace where this block was reported. michael@0: // nullptr if not reported. michael@0: // - Tag bit 0: |mReportedOnAlloc| - was the block reported immediately on michael@0: // allocation? If so, DMD must not clear the report at the end of Dump(). michael@0: // Only relevant if |mReportStackTrace| is non-nullptr. michael@0: // michael@0: // |mPtr| is used as the key in BlockTable, so it's ok for this member michael@0: // to be |mutable|. michael@0: mutable TaggedPtr mReportStackTrace_mReportedOnAlloc[2]; michael@0: michael@0: public: michael@0: Block(const void* aPtr, size_t aReqSize, const StackTrace* aAllocStackTrace, michael@0: bool aSampled) michael@0: : mPtr(aPtr), michael@0: mReqSize(aReqSize), michael@0: mAllocStackTrace_mSampled(aAllocStackTrace, aSampled), michael@0: mReportStackTrace_mReportedOnAlloc() // all fields get zeroed michael@0: { michael@0: MOZ_ASSERT(aAllocStackTrace); michael@0: } michael@0: michael@0: size_t ReqSize() const { return mReqSize; } michael@0: michael@0: // Sampled blocks always have zero slop. michael@0: size_t SlopSize() const michael@0: { michael@0: return IsSampled() ? 0 : MallocSizeOf(mPtr) - mReqSize; michael@0: } michael@0: michael@0: size_t UsableSize() const michael@0: { michael@0: return IsSampled() ? mReqSize : MallocSizeOf(mPtr); michael@0: } michael@0: michael@0: bool IsSampled() const michael@0: { michael@0: return mAllocStackTrace_mSampled.Tag(); michael@0: } michael@0: michael@0: const StackTrace* AllocStackTrace() const michael@0: { michael@0: return mAllocStackTrace_mSampled.Ptr(); michael@0: } michael@0: michael@0: const StackTrace* ReportStackTrace1() const { michael@0: return mReportStackTrace_mReportedOnAlloc[0].Ptr(); michael@0: } michael@0: michael@0: const StackTrace* ReportStackTrace2() const { michael@0: return mReportStackTrace_mReportedOnAlloc[1].Ptr(); michael@0: } michael@0: michael@0: bool ReportedOnAlloc1() const { michael@0: return mReportStackTrace_mReportedOnAlloc[0].Tag(); michael@0: } michael@0: michael@0: bool ReportedOnAlloc2() const { michael@0: return mReportStackTrace_mReportedOnAlloc[1].Tag(); michael@0: } michael@0: michael@0: uint32_t NumReports() const { michael@0: if (ReportStackTrace2()) { michael@0: MOZ_ASSERT(ReportStackTrace1()); michael@0: return 2; michael@0: } michael@0: if (ReportStackTrace1()) { michael@0: return 1; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: // This is |const| thanks to the |mutable| fields above. michael@0: void Report(Thread* aT, bool aReportedOnAlloc) const michael@0: { michael@0: // We don't bother recording reports after the 2nd one. michael@0: uint32_t numReports = NumReports(); michael@0: if (numReports < 2) { michael@0: mReportStackTrace_mReportedOnAlloc[numReports].Set(StackTrace::Get(aT), michael@0: aReportedOnAlloc); michael@0: } michael@0: } michael@0: michael@0: void UnreportIfNotReportedOnAlloc() const michael@0: { michael@0: if (!ReportedOnAlloc1() && !ReportedOnAlloc2()) { michael@0: mReportStackTrace_mReportedOnAlloc[0].Set(nullptr, 0); michael@0: mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); michael@0: michael@0: } else if (!ReportedOnAlloc1() && ReportedOnAlloc2()) { michael@0: // Shift the 2nd report down to the 1st one. michael@0: mReportStackTrace_mReportedOnAlloc[0] = michael@0: mReportStackTrace_mReportedOnAlloc[1]; michael@0: mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); michael@0: michael@0: } else if (ReportedOnAlloc1() && !ReportedOnAlloc2()) { michael@0: mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); michael@0: } michael@0: } michael@0: michael@0: // Hash policy. michael@0: michael@0: typedef const void* Lookup; michael@0: michael@0: static uint32_t hash(const void* const& aPtr) michael@0: { michael@0: return mozilla::HashGeneric(aPtr); michael@0: } michael@0: michael@0: static bool match(const Block& aB, const void* const& aPtr) michael@0: { michael@0: return aB.mPtr == aPtr; michael@0: } michael@0: }; michael@0: michael@0: typedef js::HashSet BlockTable; michael@0: static BlockTable* gBlockTable = nullptr; michael@0: michael@0: typedef js::HashSet, michael@0: InfallibleAllocPolicy> michael@0: StackTraceSet; michael@0: michael@0: // Add a pointer to each live stack trace into the given StackTraceSet. (A michael@0: // stack trace is live if it's used by one of the live blocks.) michael@0: static void michael@0: GatherUsedStackTraces(StackTraceSet& aStackTraces) michael@0: { michael@0: MOZ_ASSERT(gStateLock->IsLocked()); michael@0: MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); michael@0: michael@0: aStackTraces.finish(); michael@0: aStackTraces.init(1024); michael@0: michael@0: for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) { michael@0: const Block& b = r.front(); michael@0: aStackTraces.put(b.AllocStackTrace()); michael@0: aStackTraces.put(b.ReportStackTrace1()); michael@0: aStackTraces.put(b.ReportStackTrace2()); michael@0: } michael@0: michael@0: // Any of the stack traces added above may have been null. For the sake of michael@0: // cleanliness, don't leave the null pointer in the set. michael@0: aStackTraces.remove(nullptr); michael@0: } michael@0: michael@0: // Delete stack traces that we aren't using, and compact our hashtable. michael@0: static void michael@0: GCStackTraces() michael@0: { michael@0: MOZ_ASSERT(gStateLock->IsLocked()); michael@0: MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); michael@0: michael@0: StackTraceSet usedStackTraces; michael@0: GatherUsedStackTraces(usedStackTraces); michael@0: michael@0: // Delete all unused stack traces from gStackTraceTable. The Enum destructor michael@0: // will automatically rehash and compact the table. michael@0: for (StackTraceTable::Enum e(*gStackTraceTable); michael@0: !e.empty(); michael@0: e.popFront()) { michael@0: StackTrace* const& st = e.front(); michael@0: michael@0: if (!usedStackTraces.has(st)) { michael@0: e.removeFront(); michael@0: InfallibleAllocPolicy::delete_(st); michael@0: } michael@0: } michael@0: michael@0: // Schedule a GC when we have twice as many stack traces as we had right after michael@0: // this GC finished. michael@0: gGCStackTraceTableWhenSizeExceeds = 2 * gStackTraceTable->count(); michael@0: } michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // malloc/free callbacks michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: static size_t gSmallBlockActualSizeCounter = 0; michael@0: michael@0: static void michael@0: AllocCallback(void* aPtr, size_t aReqSize, Thread* aT) michael@0: { michael@0: MOZ_ASSERT(gIsDMDRunning); michael@0: michael@0: if (!aPtr) { michael@0: return; michael@0: } michael@0: michael@0: AutoLockState lock; michael@0: AutoBlockIntercepts block(aT); michael@0: michael@0: size_t actualSize = gMallocTable->malloc_usable_size(aPtr); michael@0: size_t sampleBelowSize = gOptions->SampleBelowSize(); michael@0: michael@0: if (actualSize < sampleBelowSize) { michael@0: // If this allocation is smaller than the sample-below size, increment the michael@0: // cumulative counter. Then, if that counter now exceeds the sample size, michael@0: // blame this allocation for |sampleBelowSize| bytes. This precludes the michael@0: // measurement of slop. michael@0: gSmallBlockActualSizeCounter += actualSize; michael@0: if (gSmallBlockActualSizeCounter >= sampleBelowSize) { michael@0: gSmallBlockActualSizeCounter -= sampleBelowSize; michael@0: michael@0: Block b(aPtr, sampleBelowSize, StackTrace::Get(aT), /* sampled */ true); michael@0: (void)gBlockTable->putNew(aPtr, b); michael@0: } michael@0: } else { michael@0: // If this block size is larger than the sample size, record it exactly. michael@0: Block b(aPtr, aReqSize, StackTrace::Get(aT), /* sampled */ false); michael@0: (void)gBlockTable->putNew(aPtr, b); michael@0: } michael@0: } michael@0: michael@0: static void michael@0: FreeCallback(void* aPtr, Thread* aT) michael@0: { michael@0: MOZ_ASSERT(gIsDMDRunning); michael@0: michael@0: if (!aPtr) { michael@0: return; michael@0: } michael@0: michael@0: AutoLockState lock; michael@0: AutoBlockIntercepts block(aT); michael@0: michael@0: gBlockTable->remove(aPtr); michael@0: michael@0: if (gStackTraceTable->count() > gGCStackTraceTableWhenSizeExceeds) { michael@0: GCStackTraces(); michael@0: } michael@0: } michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // malloc/free interception michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: static void Init(const malloc_table_t* aMallocTable); michael@0: michael@0: } // namespace dmd michael@0: } // namespace mozilla michael@0: michael@0: void michael@0: replace_init(const malloc_table_t* aMallocTable) michael@0: { michael@0: mozilla::dmd::Init(aMallocTable); michael@0: } michael@0: michael@0: void* michael@0: replace_malloc(size_t aSize) michael@0: { michael@0: using namespace mozilla::dmd; michael@0: michael@0: if (!gIsDMDRunning) { michael@0: // DMD hasn't started up, either because it wasn't enabled by the user, or michael@0: // we're still in Init() and something has indirectly called malloc. Do a michael@0: // vanilla malloc. (In the latter case, if it fails we'll crash. But michael@0: // OOM is highly unlikely so early on.) michael@0: return gMallocTable->malloc(aSize); michael@0: } michael@0: michael@0: Thread* t = Thread::Fetch(); michael@0: if (t->InterceptsAreBlocked()) { michael@0: // Intercepts are blocked, which means this must be a call to malloc michael@0: // triggered indirectly by DMD (e.g. via NS_StackWalk). Be infallible. michael@0: return InfallibleAllocPolicy::malloc_(aSize); michael@0: } michael@0: michael@0: // This must be a call to malloc from outside DMD. Intercept it. michael@0: void* ptr = gMallocTable->malloc(aSize); michael@0: AllocCallback(ptr, aSize, t); michael@0: return ptr; michael@0: } michael@0: michael@0: void* michael@0: replace_calloc(size_t aCount, size_t aSize) michael@0: { michael@0: using namespace mozilla::dmd; michael@0: michael@0: if (!gIsDMDRunning) { michael@0: return gMallocTable->calloc(aCount, aSize); michael@0: } michael@0: michael@0: Thread* t = Thread::Fetch(); michael@0: if (t->InterceptsAreBlocked()) { michael@0: return InfallibleAllocPolicy::calloc_(aCount * aSize); michael@0: } michael@0: michael@0: void* ptr = gMallocTable->calloc(aCount, aSize); michael@0: AllocCallback(ptr, aCount * aSize, t); michael@0: return ptr; michael@0: } michael@0: michael@0: void* michael@0: replace_realloc(void* aOldPtr, size_t aSize) michael@0: { michael@0: using namespace mozilla::dmd; michael@0: michael@0: if (!gIsDMDRunning) { michael@0: return gMallocTable->realloc(aOldPtr, aSize); michael@0: } michael@0: michael@0: Thread* t = Thread::Fetch(); michael@0: if (t->InterceptsAreBlocked()) { michael@0: return InfallibleAllocPolicy::realloc_(aOldPtr, aSize); michael@0: } michael@0: michael@0: // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|. michael@0: if (!aOldPtr) { michael@0: return replace_malloc(aSize); michael@0: } michael@0: michael@0: // Be very careful here! Must remove the block from the table before doing michael@0: // the realloc to avoid races, just like in replace_free(). michael@0: // Nb: This does an unnecessary hashtable remove+add if the block doesn't michael@0: // move, but doing better isn't worth the effort. michael@0: FreeCallback(aOldPtr, t); michael@0: void* ptr = gMallocTable->realloc(aOldPtr, aSize); michael@0: if (ptr) { michael@0: AllocCallback(ptr, aSize, t); michael@0: } else { michael@0: // If realloc fails, we re-insert the old pointer. It will look like it michael@0: // was allocated for the first time here, which is untrue, and the slop michael@0: // bytes will be zero, which may be untrue. But this case is rare and michael@0: // doing better isn't worth the effort. michael@0: AllocCallback(aOldPtr, gMallocTable->malloc_usable_size(aOldPtr), t); michael@0: } michael@0: return ptr; michael@0: } michael@0: michael@0: void* michael@0: replace_memalign(size_t aAlignment, size_t aSize) michael@0: { michael@0: using namespace mozilla::dmd; michael@0: michael@0: if (!gIsDMDRunning) { michael@0: return gMallocTable->memalign(aAlignment, aSize); michael@0: } michael@0: michael@0: Thread* t = Thread::Fetch(); michael@0: if (t->InterceptsAreBlocked()) { michael@0: return InfallibleAllocPolicy::memalign_(aAlignment, aSize); michael@0: } michael@0: michael@0: void* ptr = gMallocTable->memalign(aAlignment, aSize); michael@0: AllocCallback(ptr, aSize, t); michael@0: return ptr; michael@0: } michael@0: michael@0: void michael@0: replace_free(void* aPtr) michael@0: { michael@0: using namespace mozilla::dmd; michael@0: michael@0: if (!gIsDMDRunning) { michael@0: gMallocTable->free(aPtr); michael@0: return; michael@0: } michael@0: michael@0: Thread* t = Thread::Fetch(); michael@0: if (t->InterceptsAreBlocked()) { michael@0: return InfallibleAllocPolicy::free_(aPtr); michael@0: } michael@0: michael@0: // Do the actual free after updating the table. Otherwise, another thread michael@0: // could call malloc and get the freed block and update the table, and then michael@0: // our update here would remove the newly-malloc'd block. michael@0: FreeCallback(aPtr, t); michael@0: gMallocTable->free(aPtr); michael@0: } michael@0: michael@0: namespace mozilla { michael@0: namespace dmd { michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // Stack trace records michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: class TraceRecordKey michael@0: { michael@0: public: michael@0: const StackTrace* const mAllocStackTrace; // never null michael@0: protected: michael@0: const StackTrace* const mReportStackTrace1; // nullptr if unreported michael@0: const StackTrace* const mReportStackTrace2; // nullptr if not 2x-reported michael@0: michael@0: public: michael@0: TraceRecordKey(const Block& aB) michael@0: : mAllocStackTrace(aB.AllocStackTrace()), michael@0: mReportStackTrace1(aB.ReportStackTrace1()), michael@0: mReportStackTrace2(aB.ReportStackTrace2()) michael@0: { michael@0: MOZ_ASSERT(mAllocStackTrace); michael@0: } michael@0: michael@0: // Hash policy. michael@0: michael@0: typedef TraceRecordKey Lookup; michael@0: michael@0: static uint32_t hash(const TraceRecordKey& aKey) michael@0: { michael@0: return mozilla::HashGeneric(aKey.mAllocStackTrace, michael@0: aKey.mReportStackTrace1, michael@0: aKey.mReportStackTrace2); michael@0: } michael@0: michael@0: static bool match(const TraceRecordKey& aA, const TraceRecordKey& aB) michael@0: { michael@0: return aA.mAllocStackTrace == aB.mAllocStackTrace && michael@0: aA.mReportStackTrace1 == aB.mReportStackTrace1 && michael@0: aA.mReportStackTrace2 == aB.mReportStackTrace2; michael@0: } michael@0: }; michael@0: michael@0: class RecordSize michael@0: { michael@0: static const size_t kReqBits = sizeof(size_t) * 8 - 1; // 31 or 63 michael@0: michael@0: size_t mReq; // size requested michael@0: size_t mSlop:kReqBits; // slop bytes michael@0: size_t mSampled:1; // were one or more blocks contributing to this michael@0: // RecordSize sampled? michael@0: public: michael@0: RecordSize() michael@0: : mReq(0), michael@0: mSlop(0), michael@0: mSampled(false) michael@0: {} michael@0: michael@0: size_t Req() const { return mReq; } michael@0: size_t Slop() const { return mSlop; } michael@0: size_t Usable() const { return mReq + mSlop; } michael@0: michael@0: bool IsSampled() const { return mSampled; } michael@0: michael@0: void Add(const Block& aB) michael@0: { michael@0: mReq += aB.ReqSize(); michael@0: mSlop += aB.SlopSize(); michael@0: mSampled = mSampled || aB.IsSampled(); michael@0: } michael@0: michael@0: void Add(const RecordSize& aRecordSize) michael@0: { michael@0: mReq += aRecordSize.Req(); michael@0: mSlop += aRecordSize.Slop(); michael@0: mSampled = mSampled || aRecordSize.IsSampled(); michael@0: } michael@0: michael@0: static int Cmp(const RecordSize& aA, const RecordSize& aB) michael@0: { michael@0: // Primary sort: put bigger usable sizes first. michael@0: if (aA.Usable() > aB.Usable()) return -1; michael@0: if (aA.Usable() < aB.Usable()) return 1; michael@0: michael@0: // Secondary sort: put bigger requested sizes first. michael@0: if (aA.Req() > aB.Req()) return -1; michael@0: if (aA.Req() < aB.Req()) return 1; michael@0: michael@0: // Tertiary sort: put non-sampled records before sampled records. michael@0: if (!aA.mSampled && aB.mSampled) return -1; michael@0: if ( aA.mSampled && !aB.mSampled) return 1; michael@0: michael@0: return 0; michael@0: } michael@0: }; michael@0: michael@0: // A collection of one or more heap blocks with a common TraceRecordKey. michael@0: class TraceRecord : public TraceRecordKey michael@0: { michael@0: // The TraceRecordKey base class serves as the key in TraceRecordTables. michael@0: // These two fields constitute the value, so it's ok for them to be michael@0: // |mutable|. michael@0: mutable uint32_t mNumBlocks; // number of blocks with this TraceRecordKey michael@0: mutable RecordSize mRecordSize; // combined size of those blocks michael@0: michael@0: public: michael@0: explicit TraceRecord(const TraceRecordKey& aKey) michael@0: : TraceRecordKey(aKey), michael@0: mNumBlocks(0), michael@0: mRecordSize() michael@0: {} michael@0: michael@0: uint32_t NumBlocks() const { return mNumBlocks; } michael@0: michael@0: const RecordSize& GetRecordSize() const { return mRecordSize; } michael@0: michael@0: // This is |const| thanks to the |mutable| fields above. michael@0: void Add(const Block& aB) const michael@0: { michael@0: mNumBlocks++; michael@0: mRecordSize.Add(aB); michael@0: } michael@0: michael@0: // For PrintSortedRecords. michael@0: static const char* const kRecordKind; michael@0: static bool recordsOverlap() { return false; } michael@0: michael@0: void Print(const Writer& aWriter, LocationService* aLocService, michael@0: uint32_t aM, uint32_t aN, const char* aStr, const char* astr, michael@0: size_t aCategoryUsableSize, size_t aCumulativeUsableSize, michael@0: size_t aTotalUsableSize) const; michael@0: michael@0: static int QsortCmp(const void* aA, const void* aB) michael@0: { michael@0: const TraceRecord* const a = *static_cast(aA); michael@0: const TraceRecord* const b = *static_cast(aB); michael@0: michael@0: return RecordSize::Cmp(a->mRecordSize, b->mRecordSize); michael@0: } michael@0: }; michael@0: michael@0: const char* const TraceRecord::kRecordKind = "trace"; michael@0: michael@0: typedef js::HashSet michael@0: TraceRecordTable; michael@0: michael@0: void michael@0: TraceRecord::Print(const Writer& aWriter, LocationService* aLocService, michael@0: uint32_t aM, uint32_t aN, const char* aStr, const char* astr, michael@0: size_t aCategoryUsableSize, size_t aCumulativeUsableSize, michael@0: size_t aTotalUsableSize) const michael@0: { michael@0: bool showTilde = mRecordSize.IsSampled(); michael@0: michael@0: W("%s: %s block%s in stack trace record %s of %s\n", michael@0: aStr, michael@0: Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks), michael@0: Show(aM, gBuf2, kBufLen), michael@0: Show(aN, gBuf3, kBufLen)); michael@0: michael@0: W(" %s bytes (%s requested / %s slop)\n", michael@0: Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde), michael@0: Show(mRecordSize.Req(), gBuf2, kBufLen, showTilde), michael@0: Show(mRecordSize.Slop(), gBuf3, kBufLen, showTilde)); michael@0: michael@0: W(" %4.2f%% of the heap (%4.2f%% cumulative); " michael@0: " %4.2f%% of %s (%4.2f%% cumulative)\n", michael@0: Percent(mRecordSize.Usable(), aTotalUsableSize), michael@0: Percent(aCumulativeUsableSize, aTotalUsableSize), michael@0: Percent(mRecordSize.Usable(), aCategoryUsableSize), michael@0: astr, michael@0: Percent(aCumulativeUsableSize, aCategoryUsableSize)); michael@0: michael@0: W(" Allocated at\n"); michael@0: mAllocStackTrace->Print(aWriter, aLocService); michael@0: michael@0: if (mReportStackTrace1) { michael@0: W("\n Reported at\n"); michael@0: mReportStackTrace1->Print(aWriter, aLocService); michael@0: } michael@0: if (mReportStackTrace2) { michael@0: W("\n Reported again at\n"); michael@0: mReportStackTrace2->Print(aWriter, aLocService); michael@0: } michael@0: michael@0: W("\n"); michael@0: } michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // Stack frame records michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: // A collection of one or more stack frames (from heap block allocation stack michael@0: // traces) with a common PC. michael@0: class FrameRecord michael@0: { michael@0: // mPc is used as the key in FrameRecordTable, and the other members michael@0: // constitute the value, so it's ok for them to be |mutable|. michael@0: const void* const mPc; michael@0: mutable size_t mNumBlocks; michael@0: mutable size_t mNumTraceRecords; michael@0: mutable RecordSize mRecordSize; michael@0: michael@0: public: michael@0: explicit FrameRecord(const void* aPc) michael@0: : mPc(aPc), michael@0: mNumBlocks(0), michael@0: mNumTraceRecords(0), michael@0: mRecordSize() michael@0: {} michael@0: michael@0: const RecordSize& GetRecordSize() const { return mRecordSize; } michael@0: michael@0: // This is |const| thanks to the |mutable| fields above. michael@0: void Add(const TraceRecord& aTr) const michael@0: { michael@0: mNumBlocks += aTr.NumBlocks(); michael@0: mNumTraceRecords++; michael@0: mRecordSize.Add(aTr.GetRecordSize()); michael@0: } michael@0: michael@0: void Print(const Writer& aWriter, LocationService* aLocService, michael@0: uint32_t aM, uint32_t aN, const char* aStr, const char* astr, michael@0: size_t aCategoryUsableSize, size_t aCumulativeUsableSize, michael@0: size_t aTotalUsableSize) const; michael@0: michael@0: static int QsortCmp(const void* aA, const void* aB) michael@0: { michael@0: const FrameRecord* const a = *static_cast(aA); michael@0: const FrameRecord* const b = *static_cast(aB); michael@0: michael@0: return RecordSize::Cmp(a->mRecordSize, b->mRecordSize); michael@0: } michael@0: michael@0: // For PrintSortedRecords. michael@0: static const char* const kRecordKind; michael@0: static bool recordsOverlap() { return true; } michael@0: michael@0: // Hash policy. michael@0: michael@0: typedef const void* Lookup; michael@0: michael@0: static uint32_t hash(const void* const& aPc) michael@0: { michael@0: return mozilla::HashGeneric(aPc); michael@0: } michael@0: michael@0: static bool match(const FrameRecord& aFr, const void* const& aPc) michael@0: { michael@0: return aFr.mPc == aPc; michael@0: } michael@0: }; michael@0: michael@0: const char* const FrameRecord::kRecordKind = "frame"; michael@0: michael@0: typedef js::HashSet michael@0: FrameRecordTable; michael@0: michael@0: void michael@0: FrameRecord::Print(const Writer& aWriter, LocationService* aLocService, michael@0: uint32_t aM, uint32_t aN, const char* aStr, const char* astr, michael@0: size_t aCategoryUsableSize, size_t aCumulativeUsableSize, michael@0: size_t aTotalUsableSize) const michael@0: { michael@0: (void)aCumulativeUsableSize; michael@0: michael@0: bool showTilde = mRecordSize.IsSampled(); michael@0: michael@0: W("%s: %s block%s from %s stack trace record%s in stack frame record %s of %s\n", michael@0: aStr, michael@0: Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks), michael@0: Show(mNumTraceRecords, gBuf2, kBufLen, showTilde), Plural(mNumTraceRecords), michael@0: Show(aM, gBuf3, kBufLen), michael@0: Show(aN, gBuf4, kBufLen)); michael@0: michael@0: W(" %s bytes (%s requested / %s slop)\n", michael@0: Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde), michael@0: Show(mRecordSize.Req(), gBuf2, kBufLen, showTilde), michael@0: Show(mRecordSize.Slop(), gBuf3, kBufLen, showTilde)); michael@0: michael@0: W(" %4.2f%% of the heap; %4.2f%% of %s\n", michael@0: Percent(mRecordSize.Usable(), aTotalUsableSize), michael@0: Percent(mRecordSize.Usable(), aCategoryUsableSize), michael@0: astr); michael@0: michael@0: W(" PC is\n"); michael@0: aLocService->WriteLocation(aWriter, mPc); michael@0: W("\n"); michael@0: } michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // Options (Part 2) michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: // Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah" michael@0: // (where "blah" is non-empty) and return the pointer to "blah". |aArg| can michael@0: // have leading space chars (but not other whitespace). michael@0: const char* michael@0: Options::ValueIfMatch(const char* aArg, const char* aOptionName) michael@0: { michael@0: MOZ_ASSERT(!isspace(*aArg)); // any leading whitespace should not remain michael@0: size_t optionLen = strlen(aOptionName); michael@0: if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' && michael@0: aArg[optionLen + 1]) { michael@0: return aArg + optionLen + 1; michael@0: } michael@0: return nullptr; michael@0: } michael@0: michael@0: // Extracts a |long| value for an option from an argument. It must be within michael@0: // the range |aMin..aMax| (inclusive). michael@0: bool michael@0: Options::GetLong(const char* aArg, const char* aOptionName, michael@0: long aMin, long aMax, long* aN) michael@0: { michael@0: if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) { michael@0: char* endPtr; michael@0: *aN = strtol(optionValue, &endPtr, /* base */ 10); michael@0: if (!*endPtr && aMin <= *aN && *aN <= aMax && michael@0: *aN != LONG_MIN && *aN != LONG_MAX) { michael@0: return true; michael@0: } michael@0: } michael@0: return false; michael@0: } michael@0: michael@0: // The sample-below default is a prime number close to 4096. michael@0: // - Why that size? Because it's *much* faster but only moderately less precise michael@0: // than a size of 1. michael@0: // - Why prime? Because it makes our sampling more random. If we used a size michael@0: // of 4096, for example, then our alloc counter would only take on even michael@0: // values, because jemalloc always rounds up requests sizes. In contrast, a michael@0: // prime size will explore all possible values of the alloc counter. michael@0: // michael@0: Options::Options(const char* aDMDEnvVar) michael@0: : mDMDEnvVar(InfallibleAllocPolicy::strdup_(aDMDEnvVar)), michael@0: mSampleBelowSize(4093, 100 * 100 * 1000), michael@0: mMaxFrames(StackTrace::MaxFrames, StackTrace::MaxFrames), michael@0: mMaxRecords(1000, 1000000), michael@0: mMode(Normal) michael@0: { michael@0: char* e = mDMDEnvVar; michael@0: if (strcmp(e, "1") != 0) { michael@0: bool isEnd = false; michael@0: while (!isEnd) { michael@0: // Consume leading whitespace. michael@0: while (isspace(*e)) { michael@0: e++; michael@0: } michael@0: michael@0: // Save the start of the arg. michael@0: const char* arg = e; michael@0: michael@0: // Find the first char after the arg, and temporarily change it to '\0' michael@0: // to isolate the arg. michael@0: while (!isspace(*e) && *e != '\0') { michael@0: e++; michael@0: } michael@0: char replacedChar = *e; michael@0: isEnd = replacedChar == '\0'; michael@0: *e = '\0'; michael@0: michael@0: // Handle arg michael@0: long myLong; michael@0: if (GetLong(arg, "--sample-below", 1, mSampleBelowSize.mMax, &myLong)) { michael@0: mSampleBelowSize.mActual = myLong; michael@0: michael@0: } else if (GetLong(arg, "--max-frames", 1, mMaxFrames.mMax, &myLong)) { michael@0: mMaxFrames.mActual = myLong; michael@0: michael@0: } else if (GetLong(arg, "--max-records", 1, mMaxRecords.mMax, &myLong)) { michael@0: mMaxRecords.mActual = myLong; michael@0: michael@0: } else if (strcmp(arg, "--mode=normal") == 0) { michael@0: mMode = Options::Normal; michael@0: } else if (strcmp(arg, "--mode=test") == 0) { michael@0: mMode = Options::Test; michael@0: } else if (strcmp(arg, "--mode=stress") == 0) { michael@0: mMode = Options::Stress; michael@0: michael@0: } else if (strcmp(arg, "") == 0) { michael@0: // This can only happen if there is trailing whitespace. Ignore. michael@0: MOZ_ASSERT(isEnd); michael@0: michael@0: } else { michael@0: BadArg(arg); michael@0: } michael@0: michael@0: // Undo the temporary isolation. michael@0: *e = replacedChar; michael@0: } michael@0: } michael@0: } michael@0: michael@0: void michael@0: Options::BadArg(const char* aArg) michael@0: { michael@0: StatusMsg("\n"); michael@0: StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg); michael@0: StatusMsg("\n"); michael@0: StatusMsg("Valid values of $DMD are:\n"); michael@0: StatusMsg("- undefined or \"\" or \"0\", which disables DMD, or\n"); michael@0: StatusMsg("- \"1\", which enables it with the default options, or\n"); michael@0: StatusMsg("- a whitespace-separated list of |--option=val| entries, which\n"); michael@0: StatusMsg(" enables it with non-default options.\n"); michael@0: StatusMsg("\n"); michael@0: StatusMsg("The following options are allowed; defaults are shown in [].\n"); michael@0: StatusMsg(" --sample-below=<1..%d> Sample blocks smaller than this [%d]\n", michael@0: int(mSampleBelowSize.mMax), michael@0: int(mSampleBelowSize.mDefault)); michael@0: StatusMsg(" (prime numbers are recommended)\n"); michael@0: StatusMsg(" --max-frames=<1..%d> Max. depth of stack traces [%d]\n", michael@0: int(mMaxFrames.mMax), michael@0: int(mMaxFrames.mDefault)); michael@0: StatusMsg(" --max-records=<1..%u> Max. number of records printed [%u]\n", michael@0: mMaxRecords.mMax, michael@0: mMaxRecords.mDefault); michael@0: StatusMsg(" --mode= Mode of operation [normal]\n"); michael@0: StatusMsg("\n"); michael@0: exit(1); michael@0: } michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // DMD start-up michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: #ifdef XP_MACOSX michael@0: static void michael@0: NopStackWalkCallback(void* aPc, void* aSp, void* aClosure) michael@0: { michael@0: } michael@0: #endif michael@0: michael@0: // Note that fopen() can allocate. michael@0: static FILE* michael@0: OpenOutputFile(const char* aFilename) michael@0: { michael@0: FILE* fp = fopen(aFilename, "w"); michael@0: if (!fp) { michael@0: StatusMsg("can't create %s file: %s\n", aFilename, strerror(errno)); michael@0: exit(1); michael@0: } michael@0: return fp; michael@0: } michael@0: michael@0: static void RunTestMode(FILE* fp); michael@0: static void RunStressMode(FILE* fp); michael@0: michael@0: // WARNING: this function runs *very* early -- before all static initializers michael@0: // have run. For this reason, non-scalar globals such as gStateLock and michael@0: // gStackTraceTable are allocated dynamically (so we can guarantee their michael@0: // construction in this function) rather than statically. michael@0: static void michael@0: Init(const malloc_table_t* aMallocTable) michael@0: { michael@0: MOZ_ASSERT(!gIsDMDRunning); michael@0: michael@0: gMallocTable = aMallocTable; michael@0: michael@0: // DMD is controlled by the |DMD| environment variable. michael@0: // - If it's unset or empty or "0", DMD doesn't run. michael@0: // - Otherwise, the contents dictate DMD's behaviour. michael@0: michael@0: char* e = getenv("DMD"); michael@0: StatusMsg("$DMD = '%s'\n", e); michael@0: michael@0: if (!e || strcmp(e, "") == 0 || strcmp(e, "0") == 0) { michael@0: StatusMsg("DMD is not enabled\n"); michael@0: return; michael@0: } michael@0: michael@0: // Parse $DMD env var. michael@0: gOptions = InfallibleAllocPolicy::new_(e); michael@0: michael@0: StatusMsg("DMD is enabled\n"); michael@0: michael@0: #ifdef XP_MACOSX michael@0: // On Mac OS X we need to call StackWalkInitCriticalAddress() very early michael@0: // (prior to the creation of any mutexes, apparently) otherwise we can get michael@0: // hangs when getting stack traces (bug 821577). But michael@0: // StackWalkInitCriticalAddress() isn't exported from xpcom/, so instead we michael@0: // just call NS_StackWalk, because that calls StackWalkInitCriticalAddress(). michael@0: // See the comment above StackWalkInitCriticalAddress() for more details. michael@0: (void)NS_StackWalk(NopStackWalkCallback, /* skipFrames */ 0, michael@0: /* maxFrames */ 1, nullptr, 0, nullptr); michael@0: #endif michael@0: michael@0: gStateLock = InfallibleAllocPolicy::new_(); michael@0: michael@0: gSmallBlockActualSizeCounter = 0; michael@0: michael@0: DMD_CREATE_TLS_INDEX(gTlsIndex); michael@0: michael@0: { michael@0: AutoLockState lock; michael@0: michael@0: gStackTraceTable = InfallibleAllocPolicy::new_(); michael@0: gStackTraceTable->init(8192); michael@0: michael@0: gBlockTable = InfallibleAllocPolicy::new_(); michael@0: gBlockTable->init(8192); michael@0: } michael@0: michael@0: if (gOptions->IsTestMode()) { michael@0: // OpenOutputFile() can allocate. So do this before setting michael@0: // gIsDMDRunning so those allocations don't show up in our results. Once michael@0: // gIsDMDRunning is set we are intercepting malloc et al. in earnest. michael@0: FILE* fp = OpenOutputFile("test.dmd"); michael@0: gIsDMDRunning = true; michael@0: michael@0: StatusMsg("running test mode...\n"); michael@0: RunTestMode(fp); michael@0: StatusMsg("finished test mode\n"); michael@0: fclose(fp); michael@0: exit(0); michael@0: } michael@0: michael@0: if (gOptions->IsStressMode()) { michael@0: FILE* fp = OpenOutputFile("stress.dmd"); michael@0: gIsDMDRunning = true; michael@0: michael@0: StatusMsg("running stress mode...\n"); michael@0: RunStressMode(fp); michael@0: StatusMsg("finished stress mode\n"); michael@0: fclose(fp); michael@0: exit(0); michael@0: } michael@0: michael@0: gIsDMDRunning = true; michael@0: } michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // DMD reporting and unreporting michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: static void michael@0: ReportHelper(const void* aPtr, bool aReportedOnAlloc) michael@0: { michael@0: if (!gIsDMDRunning || !aPtr) { michael@0: return; michael@0: } michael@0: michael@0: Thread* t = Thread::Fetch(); michael@0: michael@0: AutoBlockIntercepts block(t); michael@0: AutoLockState lock; michael@0: michael@0: if (BlockTable::Ptr p = gBlockTable->lookup(aPtr)) { michael@0: p->Report(t, aReportedOnAlloc); michael@0: } else { michael@0: // We have no record of the block. Do nothing. Either: michael@0: // - We're sampling and we skipped this block. This is likely. michael@0: // - It's a bogus pointer. This is unlikely because Report() is almost michael@0: // always called in conjunction with a malloc_size_of-style function. michael@0: } michael@0: } michael@0: michael@0: MOZ_EXPORT void michael@0: Report(const void* aPtr) michael@0: { michael@0: ReportHelper(aPtr, /* onAlloc */ false); michael@0: } michael@0: michael@0: MOZ_EXPORT void michael@0: ReportOnAlloc(const void* aPtr) michael@0: { michael@0: ReportHelper(aPtr, /* onAlloc */ true); michael@0: } michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // DMD output michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: // This works for both TraceRecords and StackFrameRecords. michael@0: template michael@0: static void michael@0: PrintSortedRecords(const Writer& aWriter, LocationService* aLocService, michael@0: const char* aStr, const char* astr, michael@0: const js::HashSet& michael@0: aRecordTable, michael@0: size_t aCategoryUsableSize, size_t aTotalUsableSize) michael@0: { michael@0: const char* kind = Record::kRecordKind; michael@0: StatusMsg(" creating and sorting %s stack %s record array...\n", astr, kind); michael@0: michael@0: // Convert the table into a sorted array. michael@0: js::Vector recordArray; michael@0: recordArray.reserve(aRecordTable.count()); michael@0: typedef js::HashSet RecordTable; michael@0: for (typename RecordTable::Range r = aRecordTable.all(); michael@0: !r.empty(); michael@0: r.popFront()) { michael@0: recordArray.infallibleAppend(&r.front()); michael@0: } michael@0: qsort(recordArray.begin(), recordArray.length(), sizeof(recordArray[0]), michael@0: Record::QsortCmp); michael@0: michael@0: WriteTitle("%s stack %s records\n", aStr, kind); michael@0: michael@0: if (recordArray.length() == 0) { michael@0: W("(none)\n\n"); michael@0: return; michael@0: } michael@0: michael@0: StatusMsg(" printing %s stack %s record array...\n", astr, kind); michael@0: size_t cumulativeUsableSize = 0; michael@0: michael@0: // Limit the number of records printed, because fix-linux-stack.pl is too michael@0: // damn slow. Note that we don't break out of this loop because we need to michael@0: // keep adding to |cumulativeUsableSize|. michael@0: uint32_t numRecords = recordArray.length(); michael@0: uint32_t maxRecords = gOptions->MaxRecords(); michael@0: for (uint32_t i = 0; i < numRecords; i++) { michael@0: const Record* r = recordArray[i]; michael@0: cumulativeUsableSize += r->GetRecordSize().Usable(); michael@0: if (i < maxRecords) { michael@0: r->Print(aWriter, aLocService, i+1, numRecords, aStr, astr, michael@0: aCategoryUsableSize, cumulativeUsableSize, aTotalUsableSize); michael@0: } else if (i == maxRecords) { michael@0: W("%s: stopping after %s stack %s records\n\n", aStr, michael@0: Show(maxRecords, gBuf1, kBufLen), kind); michael@0: } michael@0: } michael@0: michael@0: // This holds for TraceRecords, but not for FrameRecords. michael@0: MOZ_ASSERT_IF(!Record::recordsOverlap(), michael@0: aCategoryUsableSize == cumulativeUsableSize); michael@0: } michael@0: michael@0: static void michael@0: PrintSortedTraceAndFrameRecords(const Writer& aWriter, michael@0: LocationService* aLocService, michael@0: const char* aStr, const char* astr, michael@0: const TraceRecordTable& aTraceRecordTable, michael@0: size_t aCategoryUsableSize, michael@0: size_t aTotalUsableSize) michael@0: { michael@0: PrintSortedRecords(aWriter, aLocService, aStr, astr, aTraceRecordTable, michael@0: aCategoryUsableSize, aTotalUsableSize); michael@0: michael@0: FrameRecordTable frameRecordTable; michael@0: (void)frameRecordTable.init(2048); michael@0: for (TraceRecordTable::Range r = aTraceRecordTable.all(); michael@0: !r.empty(); michael@0: r.popFront()) { michael@0: const TraceRecord& tr = r.front(); michael@0: const StackTrace* st = tr.mAllocStackTrace; michael@0: michael@0: // A single PC can appear multiple times in a stack trace. We ignore michael@0: // duplicates by first sorting and then ignoring adjacent duplicates. michael@0: StackTrace sorted(*st); michael@0: sorted.Sort(); // sorts the copy, not the original michael@0: void* prevPc = (void*)intptr_t(-1); michael@0: for (uint32_t i = 0; i < sorted.Length(); i++) { michael@0: void* pc = sorted.Pc(i); michael@0: if (pc == prevPc) { michael@0: continue; // ignore duplicate michael@0: } michael@0: prevPc = pc; michael@0: michael@0: FrameRecordTable::AddPtr p = frameRecordTable.lookupForAdd(pc); michael@0: if (!p) { michael@0: FrameRecord fr(pc); michael@0: (void)frameRecordTable.add(p, fr); michael@0: } michael@0: p->Add(tr); michael@0: } michael@0: } michael@0: michael@0: PrintSortedRecords(aWriter, aLocService, aStr, astr, frameRecordTable, michael@0: aCategoryUsableSize, aTotalUsableSize); michael@0: } michael@0: michael@0: // Note that, unlike most SizeOf* functions, this function does not take a michael@0: // |mozilla::MallocSizeOf| argument. That's because those arguments are michael@0: // primarily to aid DMD track heap blocks... but DMD deliberately doesn't track michael@0: // heap blocks it allocated for itself! michael@0: // michael@0: // SizeOfInternal should be called while you're holding the state lock and michael@0: // while intercepts are blocked; SizeOf acquires the lock and blocks michael@0: // intercepts. michael@0: michael@0: static void michael@0: SizeOfInternal(Sizes* aSizes) michael@0: { michael@0: MOZ_ASSERT(gStateLock->IsLocked()); michael@0: MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); michael@0: michael@0: aSizes->Clear(); michael@0: michael@0: if (!gIsDMDRunning) { michael@0: return; michael@0: } michael@0: michael@0: StackTraceSet usedStackTraces; michael@0: GatherUsedStackTraces(usedStackTraces); michael@0: michael@0: for (StackTraceTable::Range r = gStackTraceTable->all(); michael@0: !r.empty(); michael@0: r.popFront()) { michael@0: StackTrace* const& st = r.front(); michael@0: michael@0: if (usedStackTraces.has(st)) { michael@0: aSizes->mStackTracesUsed += MallocSizeOf(st); michael@0: } else { michael@0: aSizes->mStackTracesUnused += MallocSizeOf(st); michael@0: } michael@0: } michael@0: michael@0: aSizes->mStackTraceTable = michael@0: gStackTraceTable->sizeOfIncludingThis(MallocSizeOf); michael@0: michael@0: aSizes->mBlockTable = gBlockTable->sizeOfIncludingThis(MallocSizeOf); michael@0: } michael@0: michael@0: MOZ_EXPORT void michael@0: SizeOf(Sizes* aSizes) michael@0: { michael@0: aSizes->Clear(); michael@0: michael@0: if (!gIsDMDRunning) { michael@0: return; michael@0: } michael@0: michael@0: AutoBlockIntercepts block(Thread::Fetch()); michael@0: AutoLockState lock; michael@0: SizeOfInternal(aSizes); michael@0: } michael@0: michael@0: void michael@0: ClearReportsInternal() michael@0: { michael@0: MOZ_ASSERT(gStateLock->IsLocked()); michael@0: michael@0: // Unreport all blocks that were marked reported by a memory reporter. This michael@0: // excludes those that were reported on allocation, because they need to keep michael@0: // their reported marking. michael@0: for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) { michael@0: r.front().UnreportIfNotReportedOnAlloc(); michael@0: } michael@0: } michael@0: michael@0: MOZ_EXPORT void michael@0: ClearReports() michael@0: { michael@0: if (!gIsDMDRunning) { michael@0: return; michael@0: } michael@0: michael@0: AutoLockState lock; michael@0: ClearReportsInternal(); michael@0: } michael@0: michael@0: MOZ_EXPORT bool michael@0: IsEnabled() michael@0: { michael@0: return gIsDMDRunning; michael@0: } michael@0: michael@0: MOZ_EXPORT void michael@0: Dump(Writer aWriter) michael@0: { michael@0: if (!gIsDMDRunning) { michael@0: const char* msg = "cannot Dump(); DMD was not enabled at startup\n"; michael@0: StatusMsg("%s", msg); michael@0: W("%s", msg); michael@0: return; michael@0: } michael@0: michael@0: AutoBlockIntercepts block(Thread::Fetch()); michael@0: AutoLockState lock; michael@0: michael@0: static int dumpCount = 1; michael@0: StatusMsg("Dump %d {\n", dumpCount++); michael@0: michael@0: StatusMsg(" gathering stack trace records...\n"); michael@0: michael@0: TraceRecordTable unreportedTraceRecordTable; michael@0: (void)unreportedTraceRecordTable.init(1024); michael@0: size_t unreportedUsableSize = 0; michael@0: size_t unreportedNumBlocks = 0; michael@0: michael@0: TraceRecordTable onceReportedTraceRecordTable; michael@0: (void)onceReportedTraceRecordTable.init(1024); michael@0: size_t onceReportedUsableSize = 0; michael@0: size_t onceReportedNumBlocks = 0; michael@0: michael@0: TraceRecordTable twiceReportedTraceRecordTable; michael@0: (void)twiceReportedTraceRecordTable.init(0); michael@0: size_t twiceReportedUsableSize = 0; michael@0: size_t twiceReportedNumBlocks = 0; michael@0: michael@0: bool anyBlocksSampled = false; michael@0: michael@0: for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) { michael@0: const Block& b = r.front(); michael@0: michael@0: TraceRecordTable* table; michael@0: uint32_t numReports = b.NumReports(); michael@0: if (numReports == 0) { michael@0: unreportedUsableSize += b.UsableSize(); michael@0: unreportedNumBlocks++; michael@0: table = &unreportedTraceRecordTable; michael@0: } else if (numReports == 1) { michael@0: onceReportedUsableSize += b.UsableSize(); michael@0: onceReportedNumBlocks++; michael@0: table = &onceReportedTraceRecordTable; michael@0: } else { michael@0: MOZ_ASSERT(numReports == 2); michael@0: twiceReportedUsableSize += b.UsableSize(); michael@0: twiceReportedNumBlocks++; michael@0: table = &twiceReportedTraceRecordTable; michael@0: } michael@0: TraceRecordKey key(b); michael@0: TraceRecordTable::AddPtr p = table->lookupForAdd(key); michael@0: if (!p) { michael@0: TraceRecord tr(b); michael@0: (void)table->add(p, tr); michael@0: } michael@0: p->Add(b); michael@0: michael@0: anyBlocksSampled = anyBlocksSampled || b.IsSampled(); michael@0: } michael@0: size_t totalUsableSize = michael@0: unreportedUsableSize + onceReportedUsableSize + twiceReportedUsableSize; michael@0: size_t totalNumBlocks = michael@0: unreportedNumBlocks + onceReportedNumBlocks + twiceReportedNumBlocks; michael@0: michael@0: WriteTitle("Invocation\n"); michael@0: W("$DMD = '%s'\n", gOptions->DMDEnvVar()); michael@0: W("Sample-below size = %lld\n\n", michael@0: (long long)(gOptions->SampleBelowSize())); michael@0: michael@0: // Allocate this on the heap instead of the stack because it's fairly large. michael@0: LocationService* locService = InfallibleAllocPolicy::new_(); michael@0: michael@0: PrintSortedRecords(aWriter, locService, "Twice-reported", "twice-reported", michael@0: twiceReportedTraceRecordTable, twiceReportedUsableSize, michael@0: totalUsableSize); michael@0: michael@0: PrintSortedTraceAndFrameRecords(aWriter, locService, michael@0: "Unreported", "unreported", michael@0: unreportedTraceRecordTable, michael@0: unreportedUsableSize, totalUsableSize); michael@0: michael@0: PrintSortedTraceAndFrameRecords(aWriter, locService, michael@0: "Once-reported", "once-reported", michael@0: onceReportedTraceRecordTable, michael@0: onceReportedUsableSize, totalUsableSize); michael@0: michael@0: bool showTilde = anyBlocksSampled; michael@0: WriteTitle("Summary\n"); michael@0: michael@0: W("Total: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", michael@0: Show(totalUsableSize, gBuf1, kBufLen, showTilde), michael@0: 100.0, michael@0: Show(totalNumBlocks, gBuf2, kBufLen, showTilde), michael@0: 100.0); michael@0: michael@0: W("Unreported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", michael@0: Show(unreportedUsableSize, gBuf1, kBufLen, showTilde), michael@0: Percent(unreportedUsableSize, totalUsableSize), michael@0: Show(unreportedNumBlocks, gBuf2, kBufLen, showTilde), michael@0: Percent(unreportedNumBlocks, totalNumBlocks)); michael@0: michael@0: W("Once-reported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", michael@0: Show(onceReportedUsableSize, gBuf1, kBufLen, showTilde), michael@0: Percent(onceReportedUsableSize, totalUsableSize), michael@0: Show(onceReportedNumBlocks, gBuf2, kBufLen, showTilde), michael@0: Percent(onceReportedNumBlocks, totalNumBlocks)); michael@0: michael@0: W("Twice-reported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", michael@0: Show(twiceReportedUsableSize, gBuf1, kBufLen, showTilde), michael@0: Percent(twiceReportedUsableSize, totalUsableSize), michael@0: Show(twiceReportedNumBlocks, gBuf2, kBufLen, showTilde), michael@0: Percent(twiceReportedNumBlocks, totalNumBlocks)); michael@0: michael@0: W("\n"); michael@0: michael@0: // Stats are non-deterministic, so don't show them in test mode. michael@0: if (!gOptions->IsTestMode()) { michael@0: Sizes sizes; michael@0: SizeOfInternal(&sizes); michael@0: michael@0: WriteTitle("Execution measurements\n"); michael@0: michael@0: W("Data structures that persist after Dump() ends:\n"); michael@0: michael@0: W(" Used stack traces: %10s bytes\n", michael@0: Show(sizes.mStackTracesUsed, gBuf1, kBufLen)); michael@0: michael@0: W(" Unused stack traces: %10s bytes\n", michael@0: Show(sizes.mStackTracesUnused, gBuf1, kBufLen)); michael@0: michael@0: W(" Stack trace table: %10s bytes (%s entries, %s used)\n", michael@0: Show(sizes.mStackTraceTable, gBuf1, kBufLen), michael@0: Show(gStackTraceTable->capacity(), gBuf2, kBufLen), michael@0: Show(gStackTraceTable->count(), gBuf3, kBufLen)); michael@0: michael@0: W(" Block table: %10s bytes (%s entries, %s used)\n", michael@0: Show(sizes.mBlockTable, gBuf1, kBufLen), michael@0: Show(gBlockTable->capacity(), gBuf2, kBufLen), michael@0: Show(gBlockTable->count(), gBuf3, kBufLen)); michael@0: michael@0: W("\nData structures that are destroyed after Dump() ends:\n"); michael@0: michael@0: size_t unreportedSize = michael@0: unreportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf); michael@0: W(" Unreported table: %10s bytes (%s entries, %s used)\n", michael@0: Show(unreportedSize, gBuf1, kBufLen), michael@0: Show(unreportedTraceRecordTable.capacity(), gBuf2, kBufLen), michael@0: Show(unreportedTraceRecordTable.count(), gBuf3, kBufLen)); michael@0: michael@0: size_t onceReportedSize = michael@0: onceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf); michael@0: W(" Once-reported table: %10s bytes (%s entries, %s used)\n", michael@0: Show(onceReportedSize, gBuf1, kBufLen), michael@0: Show(onceReportedTraceRecordTable.capacity(), gBuf2, kBufLen), michael@0: Show(onceReportedTraceRecordTable.count(), gBuf3, kBufLen)); michael@0: michael@0: size_t twiceReportedSize = michael@0: twiceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf); michael@0: W(" Twice-reported table: %10s bytes (%s entries, %s used)\n", michael@0: Show(twiceReportedSize, gBuf1, kBufLen), michael@0: Show(twiceReportedTraceRecordTable.capacity(), gBuf2, kBufLen), michael@0: Show(twiceReportedTraceRecordTable.count(), gBuf3, kBufLen)); michael@0: michael@0: W(" Location service: %10s bytes\n", michael@0: Show(locService->SizeOfIncludingThis(), gBuf1, kBufLen)); michael@0: michael@0: W("\nCounts:\n"); michael@0: michael@0: size_t hits = locService->NumCacheHits(); michael@0: size_t misses = locService->NumCacheMisses(); michael@0: size_t requests = hits + misses; michael@0: W(" Location service: %10s requests\n", michael@0: Show(requests, gBuf1, kBufLen)); michael@0: michael@0: size_t count = locService->CacheCount(); michael@0: size_t capacity = locService->CacheCapacity(); michael@0: W(" Location service cache: %4.1f%% hit rate, %.1f%% occupancy at end\n", michael@0: Percent(hits, requests), Percent(count, capacity)); michael@0: michael@0: W("\n"); michael@0: } michael@0: michael@0: InfallibleAllocPolicy::delete_(locService); michael@0: michael@0: ClearReportsInternal(); // Use internal version, we already have the lock. michael@0: michael@0: StatusMsg("}\n"); michael@0: } michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // Testing michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: // This function checks that heap blocks that have the same stack trace but michael@0: // different (or no) reporters get aggregated separately. michael@0: void foo() michael@0: { michael@0: char* a[6]; michael@0: for (int i = 0; i < 6; i++) { michael@0: a[i] = (char*) malloc(128 - 16*i); michael@0: } michael@0: michael@0: for (int i = 0; i <= 1; i++) michael@0: Report(a[i]); // reported michael@0: Report(a[2]); // reported michael@0: Report(a[3]); // reported michael@0: // a[4], a[5] unreported michael@0: } michael@0: michael@0: // This stops otherwise-unused variables from being optimized away. michael@0: static void michael@0: UseItOrLoseIt(void* a) michael@0: { michael@0: char buf[64]; michael@0: sprintf(buf, "%p\n", a); michael@0: fwrite(buf, 1, strlen(buf) + 1, stderr); michael@0: } michael@0: michael@0: // The output from this should be compared against test-expected.dmd. It's michael@0: // been tested on Linux64, and probably will give different results on other michael@0: // platforms. michael@0: static void michael@0: RunTestMode(FILE* fp) michael@0: { michael@0: Writer writer(FpWrite, fp); michael@0: michael@0: // The first part of this test requires sampling to be disabled. michael@0: gOptions->SetSampleBelowSize(1); michael@0: michael@0: // Dump 1. Zero for everything. michael@0: Dump(writer); michael@0: michael@0: // Dump 2: 1 freed, 9 out of 10 unreported. michael@0: // Dump 3: still present and unreported. michael@0: int i; michael@0: char* a; michael@0: for (i = 0; i < 10; i++) { michael@0: a = (char*) malloc(100); michael@0: UseItOrLoseIt(a); michael@0: } michael@0: free(a); michael@0: michael@0: // Min-sized block. michael@0: // Dump 2: reported. michael@0: // Dump 3: thrice-reported. michael@0: char* a2 = (char*) malloc(0); michael@0: Report(a2); michael@0: michael@0: // Operator new[]. michael@0: // Dump 2: reported. michael@0: // Dump 3: reportedness carries over, due to ReportOnAlloc. michael@0: char* b = new char[10]; michael@0: ReportOnAlloc(b); michael@0: michael@0: // ReportOnAlloc, then freed. michael@0: // Dump 2: freed, irrelevant. michael@0: // Dump 3: freed, irrelevant. michael@0: char* b2 = new char; michael@0: ReportOnAlloc(b2); michael@0: free(b2); michael@0: michael@0: // Dump 2: reported 4 times. michael@0: // Dump 3: freed, irrelevant. michael@0: char* c = (char*) calloc(10, 3); michael@0: Report(c); michael@0: for (int i = 0; i < 3; i++) { michael@0: Report(c); michael@0: } michael@0: michael@0: // Dump 2: ignored. michael@0: // Dump 3: irrelevant. michael@0: Report((void*)(intptr_t)i); michael@0: michael@0: // jemalloc rounds this up to 8192. michael@0: // Dump 2: reported. michael@0: // Dump 3: freed. michael@0: char* e = (char*) malloc(4096); michael@0: e = (char*) realloc(e, 4097); michael@0: Report(e); michael@0: michael@0: // First realloc is like malloc; second realloc is shrinking. michael@0: // Dump 2: reported. michael@0: // Dump 3: re-reported. michael@0: char* e2 = (char*) realloc(nullptr, 1024); michael@0: e2 = (char*) realloc(e2, 512); michael@0: Report(e2); michael@0: michael@0: // First realloc is like malloc; second realloc creates a min-sized block. michael@0: // XXX: on Windows, second realloc frees the block. michael@0: // Dump 2: reported. michael@0: // Dump 3: freed, irrelevant. michael@0: char* e3 = (char*) realloc(nullptr, 1023); michael@0: //e3 = (char*) realloc(e3, 0); michael@0: MOZ_ASSERT(e3); michael@0: Report(e3); michael@0: michael@0: // Dump 2: freed, irrelevant. michael@0: // Dump 3: freed, irrelevant. michael@0: char* f = (char*) malloc(64); michael@0: free(f); michael@0: michael@0: // Dump 2: ignored. michael@0: // Dump 3: irrelevant. michael@0: Report((void*)(intptr_t)0x0); michael@0: michael@0: // Dump 2: mixture of reported and unreported. michael@0: // Dump 3: all unreported. michael@0: foo(); michael@0: foo(); michael@0: michael@0: // Dump 2: twice-reported. michael@0: // Dump 3: twice-reported. michael@0: char* g1 = (char*) malloc(77); michael@0: ReportOnAlloc(g1); michael@0: ReportOnAlloc(g1); michael@0: michael@0: // Dump 2: twice-reported. michael@0: // Dump 3: once-reported. michael@0: char* g2 = (char*) malloc(78); michael@0: Report(g2); michael@0: ReportOnAlloc(g2); michael@0: michael@0: // Dump 2: twice-reported. michael@0: // Dump 3: once-reported. michael@0: char* g3 = (char*) malloc(79); michael@0: ReportOnAlloc(g3); michael@0: Report(g3); michael@0: michael@0: // All the odd-ball ones. michael@0: // Dump 2: all unreported. michael@0: // Dump 3: all freed, irrelevant. michael@0: // XXX: no memalign on Mac michael@0: //void* x = memalign(64, 65); // rounds up to 128 michael@0: //UseItOrLoseIt(x); michael@0: // XXX: posix_memalign doesn't work on B2G michael@0: //void* y; michael@0: //posix_memalign(&y, 128, 129); // rounds up to 256 michael@0: //UseItOrLoseIt(y); michael@0: // XXX: valloc doesn't work on Windows. michael@0: //void* z = valloc(1); // rounds up to 4096 michael@0: //UseItOrLoseIt(z); michael@0: //aligned_alloc(64, 256); // XXX: C11 only michael@0: michael@0: // Dump 2. michael@0: Dump(writer); michael@0: michael@0: //--------- michael@0: michael@0: Report(a2); michael@0: Report(a2); michael@0: free(c); michael@0: free(e); michael@0: Report(e2); michael@0: free(e3); michael@0: //free(x); michael@0: //free(y); michael@0: //free(z); michael@0: michael@0: // Dump 3. michael@0: Dump(writer); michael@0: michael@0: //--------- michael@0: michael@0: // Clear all knowledge of existing blocks to give us a clean slate. michael@0: gBlockTable->clear(); michael@0: michael@0: gOptions->SetSampleBelowSize(128); michael@0: michael@0: char* s; michael@0: michael@0: // This equals the sample size, and so is reported exactly. It should be michael@0: // listed before records of the same size that are sampled. michael@0: s = (char*) malloc(128); michael@0: UseItOrLoseIt(s); michael@0: michael@0: // This exceeds the sample size, and so is reported exactly. michael@0: s = (char*) malloc(144); michael@0: UseItOrLoseIt(s); michael@0: michael@0: // These together constitute exactly one sample. michael@0: for (int i = 0; i < 16; i++) { michael@0: s = (char*) malloc(8); michael@0: UseItOrLoseIt(s); michael@0: } michael@0: MOZ_ASSERT(gSmallBlockActualSizeCounter == 0); michael@0: michael@0: // These fall 8 bytes short of a full sample. michael@0: for (int i = 0; i < 15; i++) { michael@0: s = (char*) malloc(8); michael@0: UseItOrLoseIt(s); michael@0: } michael@0: MOZ_ASSERT(gSmallBlockActualSizeCounter == 120); michael@0: michael@0: // This exceeds the sample size, and so is recorded exactly. michael@0: s = (char*) malloc(256); michael@0: UseItOrLoseIt(s); michael@0: MOZ_ASSERT(gSmallBlockActualSizeCounter == 120); michael@0: michael@0: // This gets more than to a full sample from the |i < 15| loop above. michael@0: s = (char*) malloc(96); michael@0: UseItOrLoseIt(s); michael@0: MOZ_ASSERT(gSmallBlockActualSizeCounter == 88); michael@0: michael@0: // This gets to another full sample. michael@0: for (int i = 0; i < 5; i++) { michael@0: s = (char*) malloc(8); michael@0: UseItOrLoseIt(s); michael@0: } michael@0: MOZ_ASSERT(gSmallBlockActualSizeCounter == 0); michael@0: michael@0: // This allocates 16, 32, ..., 128 bytes, which results in a stack trace michael@0: // record that contains a mix of sample and non-sampled blocks, and so should michael@0: // be printed with '~' signs. michael@0: for (int i = 1; i <= 8; i++) { michael@0: s = (char*) malloc(i * 16); michael@0: UseItOrLoseIt(s); michael@0: } michael@0: MOZ_ASSERT(gSmallBlockActualSizeCounter == 64); michael@0: michael@0: // At the end we're 64 bytes into the current sample so we report ~1,424 michael@0: // bytes of allocation overall, which is 64 less than the real value 1,488. michael@0: michael@0: // Dump 4. michael@0: Dump(writer); michael@0: } michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // Stress testing microbenchmark michael@0: //--------------------------------------------------------------------------- michael@0: michael@0: // This stops otherwise-unused variables from being optimized away. michael@0: static void michael@0: UseItOrLoseIt2(void* a) michael@0: { michael@0: if (a == (void*)0x42) { michael@0: printf("UseItOrLoseIt2\n"); michael@0: } michael@0: } michael@0: michael@0: MOZ_NEVER_INLINE static void michael@0: stress5() michael@0: { michael@0: for (int i = 0; i < 10; i++) { michael@0: void* x = malloc(64); michael@0: UseItOrLoseIt2(x); michael@0: if (i & 1) { michael@0: free(x); michael@0: } michael@0: } michael@0: } michael@0: michael@0: MOZ_NEVER_INLINE static void michael@0: stress4() michael@0: { michael@0: stress5(); stress5(); stress5(); stress5(); stress5(); michael@0: stress5(); stress5(); stress5(); stress5(); stress5(); michael@0: } michael@0: michael@0: MOZ_NEVER_INLINE static void michael@0: stress3() michael@0: { michael@0: for (int i = 0; i < 10; i++) { michael@0: stress4(); michael@0: } michael@0: } michael@0: michael@0: MOZ_NEVER_INLINE static void michael@0: stress2() michael@0: { michael@0: stress3(); stress3(); stress3(); stress3(); stress3(); michael@0: stress3(); stress3(); stress3(); stress3(); stress3(); michael@0: } michael@0: michael@0: MOZ_NEVER_INLINE static void michael@0: stress1() michael@0: { michael@0: for (int i = 0; i < 10; i++) { michael@0: stress2(); michael@0: } michael@0: } michael@0: michael@0: // This stress test does lots of allocations and frees, which is where most of michael@0: // DMD's overhead occurs. It allocates 1,000,000 64-byte blocks, spread evenly michael@0: // across 1,000 distinct stack traces. It frees every second one immediately michael@0: // after allocating it. michael@0: // michael@0: // It's highly artificial, but it's deterministic and easy to run. It can be michael@0: // timed under different conditions to glean performance data. michael@0: static void michael@0: RunStressMode(FILE* fp) michael@0: { michael@0: Writer writer(FpWrite, fp); michael@0: michael@0: // Disable sampling for maximum stress. michael@0: gOptions->SetSampleBelowSize(1); michael@0: michael@0: stress1(); stress1(); stress1(); stress1(); stress1(); michael@0: stress1(); stress1(); stress1(); stress1(); stress1(); michael@0: michael@0: Dump(writer); michael@0: } michael@0: michael@0: } // namespace dmd michael@0: } // namespace mozilla