Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "DMD.h"
9 #include <ctype.h>
10 #include <errno.h>
11 #include <limits.h>
12 #include <stdarg.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
17 #ifdef XP_WIN
18 #if defined(MOZ_OPTIMIZE) && !defined(MOZ_PROFILING)
19 #error "Optimized, DMD-enabled builds on Windows must be built with --enable-profiling"
20 #endif
21 #include <windows.h>
22 #include <process.h>
23 #else
24 #include <unistd.h>
25 #endif
27 #ifdef ANDROID
28 #include <android/log.h>
29 #endif
31 #include "nscore.h"
32 #include "nsStackWalk.h"
34 #include "js/HashTable.h"
35 #include "js/Vector.h"
37 #include "mozilla/Assertions.h"
38 #include "mozilla/HashFunctions.h"
39 #include "mozilla/Likely.h"
40 #include "mozilla/MemoryReporting.h"
42 // MOZ_REPLACE_ONLY_MEMALIGN saves us from having to define
43 // replace_{posix_memalign,aligned_alloc,valloc}. It requires defining
44 // PAGE_SIZE. Nb: sysconf() is expensive, but it's only used for (the obsolete
45 // and rarely used) valloc.
46 #define MOZ_REPLACE_ONLY_MEMALIGN 1
47 #ifdef XP_WIN
48 #define PAGE_SIZE GetPageSize()
49 static long GetPageSize()
50 {
51 SYSTEM_INFO si;
52 GetSystemInfo(&si);
53 return si.dwPageSize;
54 }
55 #else
56 #define PAGE_SIZE sysconf(_SC_PAGESIZE)
57 #endif
58 #include "replace_malloc.h"
59 #undef MOZ_REPLACE_ONLY_MEMALIGN
60 #undef PAGE_SIZE
62 namespace mozilla {
63 namespace dmd {
65 //---------------------------------------------------------------------------
66 // Utilities
67 //---------------------------------------------------------------------------
69 #ifndef DISALLOW_COPY_AND_ASSIGN
70 #define DISALLOW_COPY_AND_ASSIGN(T) \
71 T(const T&); \
72 void operator=(const T&)
73 #endif
75 static const malloc_table_t* gMallocTable = nullptr;
77 // This enables/disables DMD.
78 static bool gIsDMDRunning = false;
80 // This provides infallible allocations (they abort on OOM). We use it for all
81 // of DMD's own allocations, which fall into the following three cases.
82 // - Direct allocations (the easy case).
83 // - Indirect allocations in js::{Vector,HashSet,HashMap} -- this class serves
84 // as their AllocPolicy.
85 // - Other indirect allocations (e.g. NS_StackWalk) -- see the comments on
86 // Thread::mBlockIntercepts and in replace_malloc for how these work.
87 //
88 class InfallibleAllocPolicy
89 {
90 static void ExitOnFailure(const void* aP);
92 public:
93 static void* malloc_(size_t aSize)
94 {
95 void* p = gMallocTable->malloc(aSize);
96 ExitOnFailure(p);
97 return p;
98 }
100 static void* calloc_(size_t aSize)
101 {
102 void* p = gMallocTable->calloc(1, aSize);
103 ExitOnFailure(p);
104 return p;
105 }
107 // This realloc_ is the one we use for direct reallocs within DMD.
108 static void* realloc_(void* aPtr, size_t aNewSize)
109 {
110 void* p = gMallocTable->realloc(aPtr, aNewSize);
111 ExitOnFailure(p);
112 return p;
113 }
115 // This realloc_ is required for this to be a JS container AllocPolicy.
116 static void* realloc_(void* aPtr, size_t aOldSize, size_t aNewSize)
117 {
118 return InfallibleAllocPolicy::realloc_(aPtr, aNewSize);
119 }
121 static void* memalign_(size_t aAlignment, size_t aSize)
122 {
123 void* p = gMallocTable->memalign(aAlignment, aSize);
124 ExitOnFailure(p);
125 return p;
126 }
128 static void free_(void* aPtr) { gMallocTable->free(aPtr); }
130 static char* strdup_(const char* aStr)
131 {
132 char* s = (char*) InfallibleAllocPolicy::malloc_(strlen(aStr) + 1);
133 strcpy(s, aStr);
134 return s;
135 }
137 template <class T>
138 static T* new_()
139 {
140 void* mem = malloc_(sizeof(T));
141 ExitOnFailure(mem);
142 return new (mem) T;
143 }
145 template <class T, typename P1>
146 static T* new_(P1 p1)
147 {
148 void* mem = malloc_(sizeof(T));
149 ExitOnFailure(mem);
150 return new (mem) T(p1);
151 }
153 template <class T>
154 static void delete_(T *p)
155 {
156 if (p) {
157 p->~T();
158 InfallibleAllocPolicy::free_(p);
159 }
160 }
162 static void reportAllocOverflow() { ExitOnFailure(nullptr); }
163 };
165 // This is only needed because of the |const void*| vs |void*| arg mismatch.
166 static size_t
167 MallocSizeOf(const void* aPtr)
168 {
169 return gMallocTable->malloc_usable_size(const_cast<void*>(aPtr));
170 }
172 static void
173 StatusMsg(const char* aFmt, ...)
174 {
175 va_list ap;
176 va_start(ap, aFmt);
177 #ifdef ANDROID
178 __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, ap);
179 #else
180 // The +64 is easily enough for the "DMD[<pid>] " prefix and the NUL.
181 char* fmt = (char*) InfallibleAllocPolicy::malloc_(strlen(aFmt) + 64);
182 sprintf(fmt, "DMD[%d] %s", getpid(), aFmt);
183 vfprintf(stderr, fmt, ap);
184 InfallibleAllocPolicy::free_(fmt);
185 #endif
186 va_end(ap);
187 }
189 /* static */ void
190 InfallibleAllocPolicy::ExitOnFailure(const void* aP)
191 {
192 if (!aP) {
193 StatusMsg("out of memory; aborting\n");
194 MOZ_CRASH();
195 }
196 }
198 void
199 Writer::Write(const char* aFmt, ...) const
200 {
201 va_list ap;
202 va_start(ap, aFmt);
203 mWriterFun(mWriteState, aFmt, ap);
204 va_end(ap);
205 }
207 #define W(...) aWriter.Write(__VA_ARGS__);
209 #define WriteTitle(...) \
210 W("------------------------------------------------------------------\n"); \
211 W(__VA_ARGS__); \
212 W("------------------------------------------------------------------\n\n");
214 MOZ_EXPORT void
215 FpWrite(void* aWriteState, const char* aFmt, va_list aAp)
216 {
217 FILE* fp = static_cast<FILE*>(aWriteState);
218 vfprintf(fp, aFmt, aAp);
219 }
221 static double
222 Percent(size_t part, size_t whole)
223 {
224 return (whole == 0) ? 0 : 100 * (double)part / whole;
225 }
227 // Commifies the number and prepends a '~' if requested. Best used with
228 // |kBufLen| and |gBuf[1234]|, because they should be big enough for any number
229 // we'll see.
230 static char*
231 Show(size_t n, char* buf, size_t buflen, bool addTilde = false)
232 {
233 int nc = 0, i = 0, lasti = buflen - 2;
234 buf[lasti + 1] = '\0';
235 if (n == 0) {
236 buf[lasti - i] = '0';
237 i++;
238 } else {
239 while (n > 0) {
240 if (((i - nc) % 3) == 0 && i != 0) {
241 buf[lasti - i] = ',';
242 i++;
243 nc++;
244 }
245 buf[lasti - i] = static_cast<char>((n % 10) + '0');
246 i++;
247 n /= 10;
248 }
249 }
250 int firstCharIndex = lasti - i + 1;
252 if (addTilde) {
253 firstCharIndex--;
254 buf[firstCharIndex] = '~';
255 }
257 MOZ_ASSERT(firstCharIndex >= 0);
258 return &buf[firstCharIndex];
259 }
261 static const char*
262 Plural(size_t aN)
263 {
264 return aN == 1 ? "" : "s";
265 }
267 // Used by calls to Show().
268 static const size_t kBufLen = 64;
269 static char gBuf1[kBufLen];
270 static char gBuf2[kBufLen];
271 static char gBuf3[kBufLen];
272 static char gBuf4[kBufLen];
274 //---------------------------------------------------------------------------
275 // Options (Part 1)
276 //---------------------------------------------------------------------------
278 class Options
279 {
280 template <typename T>
281 struct NumOption
282 {
283 const T mDefault;
284 const T mMax;
285 T mActual;
286 NumOption(T aDefault, T aMax)
287 : mDefault(aDefault), mMax(aMax), mActual(aDefault)
288 {}
289 };
291 enum Mode {
292 Normal, // run normally
293 Test, // do some basic correctness tests
294 Stress // do some performance stress tests
295 };
297 char* mDMDEnvVar; // a saved copy, for printing during Dump()
299 NumOption<size_t> mSampleBelowSize;
300 NumOption<uint32_t> mMaxFrames;
301 NumOption<uint32_t> mMaxRecords;
302 Mode mMode;
304 void BadArg(const char* aArg);
305 static const char* ValueIfMatch(const char* aArg, const char* aOptionName);
306 static bool GetLong(const char* aArg, const char* aOptionName,
307 long aMin, long aMax, long* aN);
309 public:
310 Options(const char* aDMDEnvVar);
312 const char* DMDEnvVar() const { return mDMDEnvVar; }
314 size_t SampleBelowSize() const { return mSampleBelowSize.mActual; }
315 size_t MaxFrames() const { return mMaxFrames.mActual; }
316 size_t MaxRecords() const { return mMaxRecords.mActual; }
318 void SetSampleBelowSize(size_t aN) { mSampleBelowSize.mActual = aN; }
320 bool IsTestMode() const { return mMode == Test; }
321 bool IsStressMode() const { return mMode == Stress; }
322 };
324 static Options *gOptions;
326 //---------------------------------------------------------------------------
327 // The global lock
328 //---------------------------------------------------------------------------
330 // MutexBase implements the platform-specific parts of a mutex.
332 #ifdef XP_WIN
334 class MutexBase
335 {
336 CRITICAL_SECTION mCS;
338 DISALLOW_COPY_AND_ASSIGN(MutexBase);
340 public:
341 MutexBase()
342 {
343 InitializeCriticalSection(&mCS);
344 }
346 ~MutexBase()
347 {
348 DeleteCriticalSection(&mCS);
349 }
351 void Lock()
352 {
353 EnterCriticalSection(&mCS);
354 }
356 void Unlock()
357 {
358 LeaveCriticalSection(&mCS);
359 }
360 };
362 #else
364 #include <pthread.h>
365 #include <sys/types.h>
367 class MutexBase
368 {
369 pthread_mutex_t mMutex;
371 DISALLOW_COPY_AND_ASSIGN(MutexBase);
373 public:
374 MutexBase()
375 {
376 pthread_mutex_init(&mMutex, nullptr);
377 }
379 void Lock()
380 {
381 pthread_mutex_lock(&mMutex);
382 }
384 void Unlock()
385 {
386 pthread_mutex_unlock(&mMutex);
387 }
388 };
390 #endif
392 class Mutex : private MutexBase
393 {
394 bool mIsLocked;
396 DISALLOW_COPY_AND_ASSIGN(Mutex);
398 public:
399 Mutex()
400 : mIsLocked(false)
401 {}
403 void Lock()
404 {
405 MutexBase::Lock();
406 MOZ_ASSERT(!mIsLocked);
407 mIsLocked = true;
408 }
410 void Unlock()
411 {
412 MOZ_ASSERT(mIsLocked);
413 mIsLocked = false;
414 MutexBase::Unlock();
415 }
417 bool IsLocked()
418 {
419 return mIsLocked;
420 }
421 };
423 // This lock must be held while manipulating global state, such as
424 // gStackTraceTable, gBlockTable, etc.
425 static Mutex* gStateLock = nullptr;
427 class AutoLockState
428 {
429 DISALLOW_COPY_AND_ASSIGN(AutoLockState);
431 public:
432 AutoLockState()
433 {
434 gStateLock->Lock();
435 }
436 ~AutoLockState()
437 {
438 gStateLock->Unlock();
439 }
440 };
442 class AutoUnlockState
443 {
444 DISALLOW_COPY_AND_ASSIGN(AutoUnlockState);
446 public:
447 AutoUnlockState()
448 {
449 gStateLock->Unlock();
450 }
451 ~AutoUnlockState()
452 {
453 gStateLock->Lock();
454 }
455 };
457 //---------------------------------------------------------------------------
458 // Thread-local storage and blocking of intercepts
459 //---------------------------------------------------------------------------
461 #ifdef XP_WIN
463 #define DMD_TLS_INDEX_TYPE DWORD
464 #define DMD_CREATE_TLS_INDEX(i_) do { \
465 (i_) = TlsAlloc(); \
466 } while (0)
467 #define DMD_DESTROY_TLS_INDEX(i_) TlsFree((i_))
468 #define DMD_GET_TLS_DATA(i_) TlsGetValue((i_))
469 #define DMD_SET_TLS_DATA(i_, v_) TlsSetValue((i_), (v_))
471 #else
473 #include <pthread.h>
475 #define DMD_TLS_INDEX_TYPE pthread_key_t
476 #define DMD_CREATE_TLS_INDEX(i_) pthread_key_create(&(i_), nullptr)
477 #define DMD_DESTROY_TLS_INDEX(i_) pthread_key_delete((i_))
478 #define DMD_GET_TLS_DATA(i_) pthread_getspecific((i_))
479 #define DMD_SET_TLS_DATA(i_, v_) pthread_setspecific((i_), (v_))
481 #endif
483 static DMD_TLS_INDEX_TYPE gTlsIndex;
485 class Thread
486 {
487 // Required for allocation via InfallibleAllocPolicy::new_.
488 friend class InfallibleAllocPolicy;
490 // When true, this blocks intercepts, which allows malloc interception
491 // functions to themselves call malloc. (Nb: for direct calls to malloc we
492 // can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes
493 // indirectly call vanilla malloc via functions like NS_StackWalk.)
494 bool mBlockIntercepts;
496 Thread()
497 : mBlockIntercepts(false)
498 {}
500 DISALLOW_COPY_AND_ASSIGN(Thread);
502 public:
503 static Thread* Fetch();
505 bool BlockIntercepts()
506 {
507 MOZ_ASSERT(!mBlockIntercepts);
508 return mBlockIntercepts = true;
509 }
511 bool UnblockIntercepts()
512 {
513 MOZ_ASSERT(mBlockIntercepts);
514 return mBlockIntercepts = false;
515 }
517 bool InterceptsAreBlocked() const
518 {
519 return mBlockIntercepts;
520 }
521 };
523 /* static */ Thread*
524 Thread::Fetch()
525 {
526 Thread* t = static_cast<Thread*>(DMD_GET_TLS_DATA(gTlsIndex));
528 if (MOZ_UNLIKELY(!t)) {
529 // This memory is never freed, even if the thread dies. It's a leak, but
530 // only a tiny one.
531 t = InfallibleAllocPolicy::new_<Thread>();
532 DMD_SET_TLS_DATA(gTlsIndex, t);
533 }
535 return t;
536 }
538 // An object of this class must be created (on the stack) before running any
539 // code that might allocate.
540 class AutoBlockIntercepts
541 {
542 Thread* const mT;
544 DISALLOW_COPY_AND_ASSIGN(AutoBlockIntercepts);
546 public:
547 AutoBlockIntercepts(Thread* aT)
548 : mT(aT)
549 {
550 mT->BlockIntercepts();
551 }
552 ~AutoBlockIntercepts()
553 {
554 MOZ_ASSERT(mT->InterceptsAreBlocked());
555 mT->UnblockIntercepts();
556 }
557 };
559 //---------------------------------------------------------------------------
560 // Location service
561 //---------------------------------------------------------------------------
563 // This class is used to print details about code locations.
564 class LocationService
565 {
566 // WriteLocation() is the key function in this class. It's basically a
567 // wrapper around NS_DescribeCodeAddress.
568 //
569 // However, NS_DescribeCodeAddress is very slow on some platforms, and we
570 // have lots of repeated (i.e. same PC) calls to it. So we do some caching
571 // of results. Each cached result includes two strings (|mFunction| and
572 // |mLibrary|), so we also optimize them for space in the following ways.
573 //
574 // - The number of distinct library names is small, e.g. a few dozen. There
575 // is lots of repetition, especially of libxul. So we intern them in their
576 // own table, which saves space over duplicating them for each cache entry.
577 //
578 // - The number of distinct function names is much higher, so we duplicate
579 // them in each cache entry. That's more space-efficient than interning
580 // because entries containing single-occurrence function names are quickly
581 // overwritten, and their copies released. In addition, empty function
582 // names are common, so we use nullptr to represent them compactly.
584 struct StringHasher
585 {
586 typedef const char* Lookup;
588 static uint32_t hash(const char* const& aS)
589 {
590 return HashString(aS);
591 }
593 static bool match(const char* const& aA, const char* const& aB)
594 {
595 return strcmp(aA, aB) == 0;
596 }
597 };
599 typedef js::HashSet<const char*, StringHasher, InfallibleAllocPolicy>
600 StringTable;
602 StringTable mLibraryStrings;
604 struct Entry
605 {
606 const void* mPc;
607 char* mFunction; // owned by the Entry; may be null
608 const char* mLibrary; // owned by mLibraryStrings; never null
609 // in a non-empty entry is in use
610 ptrdiff_t mLOffset;
611 char* mFileName; // owned by the Entry; may be null
612 uint32_t mLineNo:31;
613 uint32_t mInUse:1; // is the entry used?
615 Entry()
616 : mPc(0), mFunction(nullptr), mLibrary(nullptr), mLOffset(0), mFileName(nullptr), mLineNo(0), mInUse(0)
617 {}
619 ~Entry()
620 {
621 // We don't free mLibrary because it's externally owned.
622 InfallibleAllocPolicy::free_(mFunction);
623 InfallibleAllocPolicy::free_(mFileName);
624 }
626 void Replace(const void* aPc, const char* aFunction,
627 const char* aLibrary, ptrdiff_t aLOffset,
628 const char* aFileName, unsigned long aLineNo)
629 {
630 mPc = aPc;
632 // Convert "" to nullptr. Otherwise, make a copy of the name.
633 InfallibleAllocPolicy::free_(mFunction);
634 mFunction =
635 !aFunction[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFunction);
636 InfallibleAllocPolicy::free_(mFileName);
637 mFileName =
638 !aFileName[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFileName);
641 mLibrary = aLibrary;
642 mLOffset = aLOffset;
643 mLineNo = aLineNo;
645 mInUse = 1;
646 }
648 size_t SizeOfExcludingThis() {
649 // Don't measure mLibrary because it's externally owned.
650 return MallocSizeOf(mFunction) + MallocSizeOf(mFileName);
651 }
652 };
654 // A direct-mapped cache. When doing a dump just after starting desktop
655 // Firefox (which is similar to dumping after a longer-running session,
656 // thanks to the limit on how many records we dump), a cache with 2^24
657 // entries (which approximates an infinite-entry cache) has a ~91% hit rate.
658 // A cache with 2^12 entries has a ~83% hit rate, and takes up ~85 KiB (on
659 // 32-bit platforms) or ~150 KiB (on 64-bit platforms).
660 static const size_t kNumEntries = 1 << 12;
661 static const size_t kMask = kNumEntries - 1;
662 Entry mEntries[kNumEntries];
664 size_t mNumCacheHits;
665 size_t mNumCacheMisses;
667 public:
668 LocationService()
669 : mEntries(), mNumCacheHits(0), mNumCacheMisses(0)
670 {
671 (void)mLibraryStrings.init(64);
672 }
674 void WriteLocation(const Writer& aWriter, const void* aPc)
675 {
676 MOZ_ASSERT(gStateLock->IsLocked());
678 uint32_t index = HashGeneric(aPc) & kMask;
679 MOZ_ASSERT(index < kNumEntries);
680 Entry& entry = mEntries[index];
682 if (!entry.mInUse || entry.mPc != aPc) {
683 mNumCacheMisses++;
685 // NS_DescribeCodeAddress can (on Linux) acquire a lock inside
686 // the shared library loader. Another thread might call malloc
687 // while holding that lock (when loading a shared library). So
688 // we have to exit gStateLock around this call. For details, see
689 // https://bugzilla.mozilla.org/show_bug.cgi?id=363334#c3
690 nsCodeAddressDetails details;
691 {
692 AutoUnlockState unlock;
693 (void)NS_DescribeCodeAddress(const_cast<void*>(aPc), &details);
694 }
696 // Intern the library name.
697 const char* library = nullptr;
698 StringTable::AddPtr p = mLibraryStrings.lookupForAdd(details.library);
699 if (!p) {
700 library = InfallibleAllocPolicy::strdup_(details.library);
701 (void)mLibraryStrings.add(p, library);
702 } else {
703 library = *p;
704 }
706 entry.Replace(aPc, details.function, library, details.loffset, details.filename, details.lineno);
708 } else {
709 mNumCacheHits++;
710 }
712 MOZ_ASSERT(entry.mPc == aPc);
714 uintptr_t entryPc = (uintptr_t)(entry.mPc);
715 // Sometimes we get nothing useful. Just print "???" for the entire entry
716 // so that fix-linux-stack.pl doesn't complain about an empty filename.
717 if (!entry.mFunction && !entry.mLibrary[0] && entry.mLOffset == 0) {
718 W(" ??? 0x%x\n", entryPc);
719 } else {
720 // Use "???" for unknown functions.
721 const char* entryFunction = entry.mFunction ? entry.mFunction : "???";
722 if (entry.mFileName) {
723 // On Windows we can get the filename and line number at runtime.
724 W(" %s (%s:%lu) 0x%x\n",
725 entryFunction, entry.mFileName, entry.mLineNo, entryPc);
726 } else {
727 // On Linux and Mac we cannot get the filename and line number at
728 // runtime, so we print the offset in a form that fix-linux-stack.pl and
729 // fix_macosx_stack.py can post-process.
730 W(" %s[%s +0x%X] 0x%x\n",
731 entryFunction, entry.mLibrary, entry.mLOffset, entryPc);
732 }
733 }
734 }
736 size_t SizeOfIncludingThis()
737 {
738 size_t n = MallocSizeOf(this);
739 for (uint32_t i = 0; i < kNumEntries; i++) {
740 n += mEntries[i].SizeOfExcludingThis();
741 }
743 n += mLibraryStrings.sizeOfExcludingThis(MallocSizeOf);
744 for (StringTable::Range r = mLibraryStrings.all();
745 !r.empty();
746 r.popFront()) {
747 n += MallocSizeOf(r.front());
748 }
750 return n;
751 }
753 size_t CacheCapacity() const { return kNumEntries; }
755 size_t CacheCount() const
756 {
757 size_t n = 0;
758 for (size_t i = 0; i < kNumEntries; i++) {
759 if (mEntries[i].mInUse) {
760 n++;
761 }
762 }
763 return n;
764 }
766 size_t NumCacheHits() const { return mNumCacheHits; }
767 size_t NumCacheMisses() const { return mNumCacheMisses; }
768 };
770 //---------------------------------------------------------------------------
771 // Stack traces
772 //---------------------------------------------------------------------------
774 class StackTrace
775 {
776 public:
777 static const uint32_t MaxFrames = 24;
779 private:
780 uint32_t mLength; // The number of PCs.
781 void* mPcs[MaxFrames]; // The PCs themselves. If --max-frames is less
782 // than 24, this array is bigger than necessary,
783 // but that case is unusual.
785 public:
786 StackTrace() : mLength(0) {}
788 uint32_t Length() const { return mLength; }
789 void* Pc(uint32_t i) const { MOZ_ASSERT(i < mLength); return mPcs[i]; }
791 uint32_t Size() const { return mLength * sizeof(mPcs[0]); }
793 // The stack trace returned by this function is interned in gStackTraceTable,
794 // and so is immortal and unmovable.
795 static const StackTrace* Get(Thread* aT);
797 void Sort()
798 {
799 qsort(mPcs, mLength, sizeof(mPcs[0]), StackTrace::QsortCmp);
800 }
802 void Print(const Writer& aWriter, LocationService* aLocService) const;
804 // Hash policy.
806 typedef StackTrace* Lookup;
808 static uint32_t hash(const StackTrace* const& aSt)
809 {
810 return mozilla::HashBytes(aSt->mPcs, aSt->Size());
811 }
813 static bool match(const StackTrace* const& aA,
814 const StackTrace* const& aB)
815 {
816 return aA->mLength == aB->mLength &&
817 memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0;
818 }
820 private:
821 static void StackWalkCallback(void* aPc, void* aSp, void* aClosure)
822 {
823 StackTrace* st = (StackTrace*) aClosure;
824 MOZ_ASSERT(st->mLength < MaxFrames);
825 st->mPcs[st->mLength] = aPc;
826 st->mLength++;
827 }
829 static int QsortCmp(const void* aA, const void* aB)
830 {
831 const void* const a = *static_cast<const void* const*>(aA);
832 const void* const b = *static_cast<const void* const*>(aB);
833 if (a < b) return -1;
834 if (a > b) return 1;
835 return 0;
836 }
837 };
839 typedef js::HashSet<StackTrace*, StackTrace, InfallibleAllocPolicy>
840 StackTraceTable;
841 static StackTraceTable* gStackTraceTable = nullptr;
843 // We won't GC the stack trace table until it this many elements.
844 static uint32_t gGCStackTraceTableWhenSizeExceeds = 4 * 1024;
846 void
847 StackTrace::Print(const Writer& aWriter, LocationService* aLocService) const
848 {
849 if (mLength == 0) {
850 W(" (empty)\n"); // StackTrace::Get() must have failed
851 return;
852 }
854 for (uint32_t i = 0; i < mLength; i++) {
855 aLocService->WriteLocation(aWriter, Pc(i));
856 }
857 }
859 /* static */ const StackTrace*
860 StackTrace::Get(Thread* aT)
861 {
862 MOZ_ASSERT(gStateLock->IsLocked());
863 MOZ_ASSERT(aT->InterceptsAreBlocked());
865 // On Windows, NS_StackWalk can acquire a lock from the shared library
866 // loader. Another thread might call malloc while holding that lock (when
867 // loading a shared library). So we can't be in gStateLock during the call
868 // to NS_StackWalk. For details, see
869 // https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8
870 // On Linux, something similar can happen; see bug 824340.
871 // So let's just release it on all platforms.
872 nsresult rv;
873 StackTrace tmp;
874 {
875 AutoUnlockState unlock;
876 uint32_t skipFrames = 2;
877 rv = NS_StackWalk(StackWalkCallback, skipFrames,
878 gOptions->MaxFrames(), &tmp, 0, nullptr);
879 }
881 if (rv == NS_OK) {
882 // Handle the common case first. All is ok. Nothing to do.
883 } else if (rv == NS_ERROR_NOT_IMPLEMENTED || rv == NS_ERROR_FAILURE) {
884 tmp.mLength = 0;
885 } else if (rv == NS_ERROR_UNEXPECTED) {
886 // XXX: This |rv| only happens on Mac, and it indicates that we're handling
887 // a call to malloc that happened inside a mutex-handling function. Any
888 // attempt to create a semaphore (which can happen in printf) could
889 // deadlock.
890 //
891 // However, the most complex thing DMD does after Get() returns is to put
892 // something in a hash table, which might call
893 // InfallibleAllocPolicy::malloc_. I'm not yet sure if this needs special
894 // handling, hence the forced abort. Sorry. If you hit this, please file
895 // a bug and CC nnethercote.
896 MOZ_CRASH();
897 } else {
898 MOZ_CRASH(); // should be impossible
899 }
901 StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp);
902 if (!p) {
903 StackTrace* stnew = InfallibleAllocPolicy::new_<StackTrace>(tmp);
904 (void)gStackTraceTable->add(p, stnew);
905 }
906 return *p;
907 }
909 //---------------------------------------------------------------------------
910 // Heap blocks
911 //---------------------------------------------------------------------------
913 // This class combines a 2-byte-aligned pointer (i.e. one whose bottom bit
914 // is zero) with a 1-bit tag.
915 //
916 // |T| is the pointer type, e.g. |int*|, not the pointed-to type. This makes
917 // is easier to have const pointers, e.g. |TaggedPtr<const int*>|.
918 template <typename T>
919 class TaggedPtr
920 {
921 union
922 {
923 T mPtr;
924 uintptr_t mUint;
925 };
927 static const uintptr_t kTagMask = uintptr_t(0x1);
928 static const uintptr_t kPtrMask = ~kTagMask;
930 static bool IsTwoByteAligned(T aPtr)
931 {
932 return (uintptr_t(aPtr) & kTagMask) == 0;
933 }
935 public:
936 TaggedPtr()
937 : mPtr(nullptr)
938 {}
940 TaggedPtr(T aPtr, bool aBool)
941 : mPtr(aPtr)
942 {
943 MOZ_ASSERT(IsTwoByteAligned(aPtr));
944 uintptr_t tag = uintptr_t(aBool);
945 MOZ_ASSERT(tag <= kTagMask);
946 mUint |= (tag & kTagMask);
947 }
949 void Set(T aPtr, bool aBool)
950 {
951 MOZ_ASSERT(IsTwoByteAligned(aPtr));
952 mPtr = aPtr;
953 uintptr_t tag = uintptr_t(aBool);
954 MOZ_ASSERT(tag <= kTagMask);
955 mUint |= (tag & kTagMask);
956 }
958 T Ptr() const { return reinterpret_cast<T>(mUint & kPtrMask); }
960 bool Tag() const { return bool(mUint & kTagMask); }
961 };
963 // A live heap block.
964 class Block
965 {
966 const void* mPtr;
967 const size_t mReqSize; // size requested
969 // Ptr: |mAllocStackTrace| - stack trace where this block was allocated.
970 // Tag bit 0: |mSampled| - was this block sampled? (if so, slop == 0).
971 TaggedPtr<const StackTrace* const>
972 mAllocStackTrace_mSampled;
974 // This array has two elements because we record at most two reports of a
975 // block.
976 // - Ptr: |mReportStackTrace| - stack trace where this block was reported.
977 // nullptr if not reported.
978 // - Tag bit 0: |mReportedOnAlloc| - was the block reported immediately on
979 // allocation? If so, DMD must not clear the report at the end of Dump().
980 // Only relevant if |mReportStackTrace| is non-nullptr.
981 //
982 // |mPtr| is used as the key in BlockTable, so it's ok for this member
983 // to be |mutable|.
984 mutable TaggedPtr<const StackTrace*> mReportStackTrace_mReportedOnAlloc[2];
986 public:
987 Block(const void* aPtr, size_t aReqSize, const StackTrace* aAllocStackTrace,
988 bool aSampled)
989 : mPtr(aPtr),
990 mReqSize(aReqSize),
991 mAllocStackTrace_mSampled(aAllocStackTrace, aSampled),
992 mReportStackTrace_mReportedOnAlloc() // all fields get zeroed
993 {
994 MOZ_ASSERT(aAllocStackTrace);
995 }
997 size_t ReqSize() const { return mReqSize; }
999 // Sampled blocks always have zero slop.
1000 size_t SlopSize() const
1001 {
1002 return IsSampled() ? 0 : MallocSizeOf(mPtr) - mReqSize;
1003 }
1005 size_t UsableSize() const
1006 {
1007 return IsSampled() ? mReqSize : MallocSizeOf(mPtr);
1008 }
1010 bool IsSampled() const
1011 {
1012 return mAllocStackTrace_mSampled.Tag();
1013 }
1015 const StackTrace* AllocStackTrace() const
1016 {
1017 return mAllocStackTrace_mSampled.Ptr();
1018 }
1020 const StackTrace* ReportStackTrace1() const {
1021 return mReportStackTrace_mReportedOnAlloc[0].Ptr();
1022 }
1024 const StackTrace* ReportStackTrace2() const {
1025 return mReportStackTrace_mReportedOnAlloc[1].Ptr();
1026 }
1028 bool ReportedOnAlloc1() const {
1029 return mReportStackTrace_mReportedOnAlloc[0].Tag();
1030 }
1032 bool ReportedOnAlloc2() const {
1033 return mReportStackTrace_mReportedOnAlloc[1].Tag();
1034 }
1036 uint32_t NumReports() const {
1037 if (ReportStackTrace2()) {
1038 MOZ_ASSERT(ReportStackTrace1());
1039 return 2;
1040 }
1041 if (ReportStackTrace1()) {
1042 return 1;
1043 }
1044 return 0;
1045 }
1047 // This is |const| thanks to the |mutable| fields above.
1048 void Report(Thread* aT, bool aReportedOnAlloc) const
1049 {
1050 // We don't bother recording reports after the 2nd one.
1051 uint32_t numReports = NumReports();
1052 if (numReports < 2) {
1053 mReportStackTrace_mReportedOnAlloc[numReports].Set(StackTrace::Get(aT),
1054 aReportedOnAlloc);
1055 }
1056 }
1058 void UnreportIfNotReportedOnAlloc() const
1059 {
1060 if (!ReportedOnAlloc1() && !ReportedOnAlloc2()) {
1061 mReportStackTrace_mReportedOnAlloc[0].Set(nullptr, 0);
1062 mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0);
1064 } else if (!ReportedOnAlloc1() && ReportedOnAlloc2()) {
1065 // Shift the 2nd report down to the 1st one.
1066 mReportStackTrace_mReportedOnAlloc[0] =
1067 mReportStackTrace_mReportedOnAlloc[1];
1068 mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0);
1070 } else if (ReportedOnAlloc1() && !ReportedOnAlloc2()) {
1071 mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0);
1072 }
1073 }
1075 // Hash policy.
1077 typedef const void* Lookup;
1079 static uint32_t hash(const void* const& aPtr)
1080 {
1081 return mozilla::HashGeneric(aPtr);
1082 }
1084 static bool match(const Block& aB, const void* const& aPtr)
1085 {
1086 return aB.mPtr == aPtr;
1087 }
1088 };
1090 typedef js::HashSet<Block, Block, InfallibleAllocPolicy> BlockTable;
1091 static BlockTable* gBlockTable = nullptr;
1093 typedef js::HashSet<const StackTrace*, js::DefaultHasher<const StackTrace*>,
1094 InfallibleAllocPolicy>
1095 StackTraceSet;
1097 // Add a pointer to each live stack trace into the given StackTraceSet. (A
1098 // stack trace is live if it's used by one of the live blocks.)
1099 static void
1100 GatherUsedStackTraces(StackTraceSet& aStackTraces)
1101 {
1102 MOZ_ASSERT(gStateLock->IsLocked());
1103 MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked());
1105 aStackTraces.finish();
1106 aStackTraces.init(1024);
1108 for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) {
1109 const Block& b = r.front();
1110 aStackTraces.put(b.AllocStackTrace());
1111 aStackTraces.put(b.ReportStackTrace1());
1112 aStackTraces.put(b.ReportStackTrace2());
1113 }
1115 // Any of the stack traces added above may have been null. For the sake of
1116 // cleanliness, don't leave the null pointer in the set.
1117 aStackTraces.remove(nullptr);
1118 }
1120 // Delete stack traces that we aren't using, and compact our hashtable.
1121 static void
1122 GCStackTraces()
1123 {
1124 MOZ_ASSERT(gStateLock->IsLocked());
1125 MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked());
1127 StackTraceSet usedStackTraces;
1128 GatherUsedStackTraces(usedStackTraces);
1130 // Delete all unused stack traces from gStackTraceTable. The Enum destructor
1131 // will automatically rehash and compact the table.
1132 for (StackTraceTable::Enum e(*gStackTraceTable);
1133 !e.empty();
1134 e.popFront()) {
1135 StackTrace* const& st = e.front();
1137 if (!usedStackTraces.has(st)) {
1138 e.removeFront();
1139 InfallibleAllocPolicy::delete_(st);
1140 }
1141 }
1143 // Schedule a GC when we have twice as many stack traces as we had right after
1144 // this GC finished.
1145 gGCStackTraceTableWhenSizeExceeds = 2 * gStackTraceTable->count();
1146 }
1148 //---------------------------------------------------------------------------
1149 // malloc/free callbacks
1150 //---------------------------------------------------------------------------
1152 static size_t gSmallBlockActualSizeCounter = 0;
1154 static void
1155 AllocCallback(void* aPtr, size_t aReqSize, Thread* aT)
1156 {
1157 MOZ_ASSERT(gIsDMDRunning);
1159 if (!aPtr) {
1160 return;
1161 }
1163 AutoLockState lock;
1164 AutoBlockIntercepts block(aT);
1166 size_t actualSize = gMallocTable->malloc_usable_size(aPtr);
1167 size_t sampleBelowSize = gOptions->SampleBelowSize();
1169 if (actualSize < sampleBelowSize) {
1170 // If this allocation is smaller than the sample-below size, increment the
1171 // cumulative counter. Then, if that counter now exceeds the sample size,
1172 // blame this allocation for |sampleBelowSize| bytes. This precludes the
1173 // measurement of slop.
1174 gSmallBlockActualSizeCounter += actualSize;
1175 if (gSmallBlockActualSizeCounter >= sampleBelowSize) {
1176 gSmallBlockActualSizeCounter -= sampleBelowSize;
1178 Block b(aPtr, sampleBelowSize, StackTrace::Get(aT), /* sampled */ true);
1179 (void)gBlockTable->putNew(aPtr, b);
1180 }
1181 } else {
1182 // If this block size is larger than the sample size, record it exactly.
1183 Block b(aPtr, aReqSize, StackTrace::Get(aT), /* sampled */ false);
1184 (void)gBlockTable->putNew(aPtr, b);
1185 }
1186 }
1188 static void
1189 FreeCallback(void* aPtr, Thread* aT)
1190 {
1191 MOZ_ASSERT(gIsDMDRunning);
1193 if (!aPtr) {
1194 return;
1195 }
1197 AutoLockState lock;
1198 AutoBlockIntercepts block(aT);
1200 gBlockTable->remove(aPtr);
1202 if (gStackTraceTable->count() > gGCStackTraceTableWhenSizeExceeds) {
1203 GCStackTraces();
1204 }
1205 }
1207 //---------------------------------------------------------------------------
1208 // malloc/free interception
1209 //---------------------------------------------------------------------------
1211 static void Init(const malloc_table_t* aMallocTable);
1213 } // namespace dmd
1214 } // namespace mozilla
1216 void
1217 replace_init(const malloc_table_t* aMallocTable)
1218 {
1219 mozilla::dmd::Init(aMallocTable);
1220 }
1222 void*
1223 replace_malloc(size_t aSize)
1224 {
1225 using namespace mozilla::dmd;
1227 if (!gIsDMDRunning) {
1228 // DMD hasn't started up, either because it wasn't enabled by the user, or
1229 // we're still in Init() and something has indirectly called malloc. Do a
1230 // vanilla malloc. (In the latter case, if it fails we'll crash. But
1231 // OOM is highly unlikely so early on.)
1232 return gMallocTable->malloc(aSize);
1233 }
1235 Thread* t = Thread::Fetch();
1236 if (t->InterceptsAreBlocked()) {
1237 // Intercepts are blocked, which means this must be a call to malloc
1238 // triggered indirectly by DMD (e.g. via NS_StackWalk). Be infallible.
1239 return InfallibleAllocPolicy::malloc_(aSize);
1240 }
1242 // This must be a call to malloc from outside DMD. Intercept it.
1243 void* ptr = gMallocTable->malloc(aSize);
1244 AllocCallback(ptr, aSize, t);
1245 return ptr;
1246 }
1248 void*
1249 replace_calloc(size_t aCount, size_t aSize)
1250 {
1251 using namespace mozilla::dmd;
1253 if (!gIsDMDRunning) {
1254 return gMallocTable->calloc(aCount, aSize);
1255 }
1257 Thread* t = Thread::Fetch();
1258 if (t->InterceptsAreBlocked()) {
1259 return InfallibleAllocPolicy::calloc_(aCount * aSize);
1260 }
1262 void* ptr = gMallocTable->calloc(aCount, aSize);
1263 AllocCallback(ptr, aCount * aSize, t);
1264 return ptr;
1265 }
1267 void*
1268 replace_realloc(void* aOldPtr, size_t aSize)
1269 {
1270 using namespace mozilla::dmd;
1272 if (!gIsDMDRunning) {
1273 return gMallocTable->realloc(aOldPtr, aSize);
1274 }
1276 Thread* t = Thread::Fetch();
1277 if (t->InterceptsAreBlocked()) {
1278 return InfallibleAllocPolicy::realloc_(aOldPtr, aSize);
1279 }
1281 // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|.
1282 if (!aOldPtr) {
1283 return replace_malloc(aSize);
1284 }
1286 // Be very careful here! Must remove the block from the table before doing
1287 // the realloc to avoid races, just like in replace_free().
1288 // Nb: This does an unnecessary hashtable remove+add if the block doesn't
1289 // move, but doing better isn't worth the effort.
1290 FreeCallback(aOldPtr, t);
1291 void* ptr = gMallocTable->realloc(aOldPtr, aSize);
1292 if (ptr) {
1293 AllocCallback(ptr, aSize, t);
1294 } else {
1295 // If realloc fails, we re-insert the old pointer. It will look like it
1296 // was allocated for the first time here, which is untrue, and the slop
1297 // bytes will be zero, which may be untrue. But this case is rare and
1298 // doing better isn't worth the effort.
1299 AllocCallback(aOldPtr, gMallocTable->malloc_usable_size(aOldPtr), t);
1300 }
1301 return ptr;
1302 }
1304 void*
1305 replace_memalign(size_t aAlignment, size_t aSize)
1306 {
1307 using namespace mozilla::dmd;
1309 if (!gIsDMDRunning) {
1310 return gMallocTable->memalign(aAlignment, aSize);
1311 }
1313 Thread* t = Thread::Fetch();
1314 if (t->InterceptsAreBlocked()) {
1315 return InfallibleAllocPolicy::memalign_(aAlignment, aSize);
1316 }
1318 void* ptr = gMallocTable->memalign(aAlignment, aSize);
1319 AllocCallback(ptr, aSize, t);
1320 return ptr;
1321 }
1323 void
1324 replace_free(void* aPtr)
1325 {
1326 using namespace mozilla::dmd;
1328 if (!gIsDMDRunning) {
1329 gMallocTable->free(aPtr);
1330 return;
1331 }
1333 Thread* t = Thread::Fetch();
1334 if (t->InterceptsAreBlocked()) {
1335 return InfallibleAllocPolicy::free_(aPtr);
1336 }
1338 // Do the actual free after updating the table. Otherwise, another thread
1339 // could call malloc and get the freed block and update the table, and then
1340 // our update here would remove the newly-malloc'd block.
1341 FreeCallback(aPtr, t);
1342 gMallocTable->free(aPtr);
1343 }
1345 namespace mozilla {
1346 namespace dmd {
1348 //---------------------------------------------------------------------------
1349 // Stack trace records
1350 //---------------------------------------------------------------------------
1352 class TraceRecordKey
1353 {
1354 public:
1355 const StackTrace* const mAllocStackTrace; // never null
1356 protected:
1357 const StackTrace* const mReportStackTrace1; // nullptr if unreported
1358 const StackTrace* const mReportStackTrace2; // nullptr if not 2x-reported
1360 public:
1361 TraceRecordKey(const Block& aB)
1362 : mAllocStackTrace(aB.AllocStackTrace()),
1363 mReportStackTrace1(aB.ReportStackTrace1()),
1364 mReportStackTrace2(aB.ReportStackTrace2())
1365 {
1366 MOZ_ASSERT(mAllocStackTrace);
1367 }
1369 // Hash policy.
1371 typedef TraceRecordKey Lookup;
1373 static uint32_t hash(const TraceRecordKey& aKey)
1374 {
1375 return mozilla::HashGeneric(aKey.mAllocStackTrace,
1376 aKey.mReportStackTrace1,
1377 aKey.mReportStackTrace2);
1378 }
1380 static bool match(const TraceRecordKey& aA, const TraceRecordKey& aB)
1381 {
1382 return aA.mAllocStackTrace == aB.mAllocStackTrace &&
1383 aA.mReportStackTrace1 == aB.mReportStackTrace1 &&
1384 aA.mReportStackTrace2 == aB.mReportStackTrace2;
1385 }
1386 };
1388 class RecordSize
1389 {
1390 static const size_t kReqBits = sizeof(size_t) * 8 - 1; // 31 or 63
1392 size_t mReq; // size requested
1393 size_t mSlop:kReqBits; // slop bytes
1394 size_t mSampled:1; // were one or more blocks contributing to this
1395 // RecordSize sampled?
1396 public:
1397 RecordSize()
1398 : mReq(0),
1399 mSlop(0),
1400 mSampled(false)
1401 {}
1403 size_t Req() const { return mReq; }
1404 size_t Slop() const { return mSlop; }
1405 size_t Usable() const { return mReq + mSlop; }
1407 bool IsSampled() const { return mSampled; }
1409 void Add(const Block& aB)
1410 {
1411 mReq += aB.ReqSize();
1412 mSlop += aB.SlopSize();
1413 mSampled = mSampled || aB.IsSampled();
1414 }
1416 void Add(const RecordSize& aRecordSize)
1417 {
1418 mReq += aRecordSize.Req();
1419 mSlop += aRecordSize.Slop();
1420 mSampled = mSampled || aRecordSize.IsSampled();
1421 }
1423 static int Cmp(const RecordSize& aA, const RecordSize& aB)
1424 {
1425 // Primary sort: put bigger usable sizes first.
1426 if (aA.Usable() > aB.Usable()) return -1;
1427 if (aA.Usable() < aB.Usable()) return 1;
1429 // Secondary sort: put bigger requested sizes first.
1430 if (aA.Req() > aB.Req()) return -1;
1431 if (aA.Req() < aB.Req()) return 1;
1433 // Tertiary sort: put non-sampled records before sampled records.
1434 if (!aA.mSampled && aB.mSampled) return -1;
1435 if ( aA.mSampled && !aB.mSampled) return 1;
1437 return 0;
1438 }
1439 };
1441 // A collection of one or more heap blocks with a common TraceRecordKey.
1442 class TraceRecord : public TraceRecordKey
1443 {
1444 // The TraceRecordKey base class serves as the key in TraceRecordTables.
1445 // These two fields constitute the value, so it's ok for them to be
1446 // |mutable|.
1447 mutable uint32_t mNumBlocks; // number of blocks with this TraceRecordKey
1448 mutable RecordSize mRecordSize; // combined size of those blocks
1450 public:
1451 explicit TraceRecord(const TraceRecordKey& aKey)
1452 : TraceRecordKey(aKey),
1453 mNumBlocks(0),
1454 mRecordSize()
1455 {}
1457 uint32_t NumBlocks() const { return mNumBlocks; }
1459 const RecordSize& GetRecordSize() const { return mRecordSize; }
1461 // This is |const| thanks to the |mutable| fields above.
1462 void Add(const Block& aB) const
1463 {
1464 mNumBlocks++;
1465 mRecordSize.Add(aB);
1466 }
1468 // For PrintSortedRecords.
1469 static const char* const kRecordKind;
1470 static bool recordsOverlap() { return false; }
1472 void Print(const Writer& aWriter, LocationService* aLocService,
1473 uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
1474 size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
1475 size_t aTotalUsableSize) const;
1477 static int QsortCmp(const void* aA, const void* aB)
1478 {
1479 const TraceRecord* const a = *static_cast<const TraceRecord* const*>(aA);
1480 const TraceRecord* const b = *static_cast<const TraceRecord* const*>(aB);
1482 return RecordSize::Cmp(a->mRecordSize, b->mRecordSize);
1483 }
1484 };
1486 const char* const TraceRecord::kRecordKind = "trace";
1488 typedef js::HashSet<TraceRecord, TraceRecord, InfallibleAllocPolicy>
1489 TraceRecordTable;
1491 void
1492 TraceRecord::Print(const Writer& aWriter, LocationService* aLocService,
1493 uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
1494 size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
1495 size_t aTotalUsableSize) const
1496 {
1497 bool showTilde = mRecordSize.IsSampled();
1499 W("%s: %s block%s in stack trace record %s of %s\n",
1500 aStr,
1501 Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
1502 Show(aM, gBuf2, kBufLen),
1503 Show(aN, gBuf3, kBufLen));
1505 W(" %s bytes (%s requested / %s slop)\n",
1506 Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde),
1507 Show(mRecordSize.Req(), gBuf2, kBufLen, showTilde),
1508 Show(mRecordSize.Slop(), gBuf3, kBufLen, showTilde));
1510 W(" %4.2f%% of the heap (%4.2f%% cumulative); "
1511 " %4.2f%% of %s (%4.2f%% cumulative)\n",
1512 Percent(mRecordSize.Usable(), aTotalUsableSize),
1513 Percent(aCumulativeUsableSize, aTotalUsableSize),
1514 Percent(mRecordSize.Usable(), aCategoryUsableSize),
1515 astr,
1516 Percent(aCumulativeUsableSize, aCategoryUsableSize));
1518 W(" Allocated at\n");
1519 mAllocStackTrace->Print(aWriter, aLocService);
1521 if (mReportStackTrace1) {
1522 W("\n Reported at\n");
1523 mReportStackTrace1->Print(aWriter, aLocService);
1524 }
1525 if (mReportStackTrace2) {
1526 W("\n Reported again at\n");
1527 mReportStackTrace2->Print(aWriter, aLocService);
1528 }
1530 W("\n");
1531 }
1533 //---------------------------------------------------------------------------
1534 // Stack frame records
1535 //---------------------------------------------------------------------------
1537 // A collection of one or more stack frames (from heap block allocation stack
1538 // traces) with a common PC.
1539 class FrameRecord
1540 {
1541 // mPc is used as the key in FrameRecordTable, and the other members
1542 // constitute the value, so it's ok for them to be |mutable|.
1543 const void* const mPc;
1544 mutable size_t mNumBlocks;
1545 mutable size_t mNumTraceRecords;
1546 mutable RecordSize mRecordSize;
1548 public:
1549 explicit FrameRecord(const void* aPc)
1550 : mPc(aPc),
1551 mNumBlocks(0),
1552 mNumTraceRecords(0),
1553 mRecordSize()
1554 {}
1556 const RecordSize& GetRecordSize() const { return mRecordSize; }
1558 // This is |const| thanks to the |mutable| fields above.
1559 void Add(const TraceRecord& aTr) const
1560 {
1561 mNumBlocks += aTr.NumBlocks();
1562 mNumTraceRecords++;
1563 mRecordSize.Add(aTr.GetRecordSize());
1564 }
1566 void Print(const Writer& aWriter, LocationService* aLocService,
1567 uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
1568 size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
1569 size_t aTotalUsableSize) const;
1571 static int QsortCmp(const void* aA, const void* aB)
1572 {
1573 const FrameRecord* const a = *static_cast<const FrameRecord* const*>(aA);
1574 const FrameRecord* const b = *static_cast<const FrameRecord* const*>(aB);
1576 return RecordSize::Cmp(a->mRecordSize, b->mRecordSize);
1577 }
1579 // For PrintSortedRecords.
1580 static const char* const kRecordKind;
1581 static bool recordsOverlap() { return true; }
1583 // Hash policy.
1585 typedef const void* Lookup;
1587 static uint32_t hash(const void* const& aPc)
1588 {
1589 return mozilla::HashGeneric(aPc);
1590 }
1592 static bool match(const FrameRecord& aFr, const void* const& aPc)
1593 {
1594 return aFr.mPc == aPc;
1595 }
1596 };
1598 const char* const FrameRecord::kRecordKind = "frame";
1600 typedef js::HashSet<FrameRecord, FrameRecord, InfallibleAllocPolicy>
1601 FrameRecordTable;
1603 void
1604 FrameRecord::Print(const Writer& aWriter, LocationService* aLocService,
1605 uint32_t aM, uint32_t aN, const char* aStr, const char* astr,
1606 size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
1607 size_t aTotalUsableSize) const
1608 {
1609 (void)aCumulativeUsableSize;
1611 bool showTilde = mRecordSize.IsSampled();
1613 W("%s: %s block%s from %s stack trace record%s in stack frame record %s of %s\n",
1614 aStr,
1615 Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
1616 Show(mNumTraceRecords, gBuf2, kBufLen, showTilde), Plural(mNumTraceRecords),
1617 Show(aM, gBuf3, kBufLen),
1618 Show(aN, gBuf4, kBufLen));
1620 W(" %s bytes (%s requested / %s slop)\n",
1621 Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde),
1622 Show(mRecordSize.Req(), gBuf2, kBufLen, showTilde),
1623 Show(mRecordSize.Slop(), gBuf3, kBufLen, showTilde));
1625 W(" %4.2f%% of the heap; %4.2f%% of %s\n",
1626 Percent(mRecordSize.Usable(), aTotalUsableSize),
1627 Percent(mRecordSize.Usable(), aCategoryUsableSize),
1628 astr);
1630 W(" PC is\n");
1631 aLocService->WriteLocation(aWriter, mPc);
1632 W("\n");
1633 }
1635 //---------------------------------------------------------------------------
1636 // Options (Part 2)
1637 //---------------------------------------------------------------------------
1639 // Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah"
1640 // (where "blah" is non-empty) and return the pointer to "blah". |aArg| can
1641 // have leading space chars (but not other whitespace).
1642 const char*
1643 Options::ValueIfMatch(const char* aArg, const char* aOptionName)
1644 {
1645 MOZ_ASSERT(!isspace(*aArg)); // any leading whitespace should not remain
1646 size_t optionLen = strlen(aOptionName);
1647 if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' &&
1648 aArg[optionLen + 1]) {
1649 return aArg + optionLen + 1;
1650 }
1651 return nullptr;
1652 }
1654 // Extracts a |long| value for an option from an argument. It must be within
1655 // the range |aMin..aMax| (inclusive).
1656 bool
1657 Options::GetLong(const char* aArg, const char* aOptionName,
1658 long aMin, long aMax, long* aN)
1659 {
1660 if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) {
1661 char* endPtr;
1662 *aN = strtol(optionValue, &endPtr, /* base */ 10);
1663 if (!*endPtr && aMin <= *aN && *aN <= aMax &&
1664 *aN != LONG_MIN && *aN != LONG_MAX) {
1665 return true;
1666 }
1667 }
1668 return false;
1669 }
1671 // The sample-below default is a prime number close to 4096.
1672 // - Why that size? Because it's *much* faster but only moderately less precise
1673 // than a size of 1.
1674 // - Why prime? Because it makes our sampling more random. If we used a size
1675 // of 4096, for example, then our alloc counter would only take on even
1676 // values, because jemalloc always rounds up requests sizes. In contrast, a
1677 // prime size will explore all possible values of the alloc counter.
1678 //
1679 Options::Options(const char* aDMDEnvVar)
1680 : mDMDEnvVar(InfallibleAllocPolicy::strdup_(aDMDEnvVar)),
1681 mSampleBelowSize(4093, 100 * 100 * 1000),
1682 mMaxFrames(StackTrace::MaxFrames, StackTrace::MaxFrames),
1683 mMaxRecords(1000, 1000000),
1684 mMode(Normal)
1685 {
1686 char* e = mDMDEnvVar;
1687 if (strcmp(e, "1") != 0) {
1688 bool isEnd = false;
1689 while (!isEnd) {
1690 // Consume leading whitespace.
1691 while (isspace(*e)) {
1692 e++;
1693 }
1695 // Save the start of the arg.
1696 const char* arg = e;
1698 // Find the first char after the arg, and temporarily change it to '\0'
1699 // to isolate the arg.
1700 while (!isspace(*e) && *e != '\0') {
1701 e++;
1702 }
1703 char replacedChar = *e;
1704 isEnd = replacedChar == '\0';
1705 *e = '\0';
1707 // Handle arg
1708 long myLong;
1709 if (GetLong(arg, "--sample-below", 1, mSampleBelowSize.mMax, &myLong)) {
1710 mSampleBelowSize.mActual = myLong;
1712 } else if (GetLong(arg, "--max-frames", 1, mMaxFrames.mMax, &myLong)) {
1713 mMaxFrames.mActual = myLong;
1715 } else if (GetLong(arg, "--max-records", 1, mMaxRecords.mMax, &myLong)) {
1716 mMaxRecords.mActual = myLong;
1718 } else if (strcmp(arg, "--mode=normal") == 0) {
1719 mMode = Options::Normal;
1720 } else if (strcmp(arg, "--mode=test") == 0) {
1721 mMode = Options::Test;
1722 } else if (strcmp(arg, "--mode=stress") == 0) {
1723 mMode = Options::Stress;
1725 } else if (strcmp(arg, "") == 0) {
1726 // This can only happen if there is trailing whitespace. Ignore.
1727 MOZ_ASSERT(isEnd);
1729 } else {
1730 BadArg(arg);
1731 }
1733 // Undo the temporary isolation.
1734 *e = replacedChar;
1735 }
1736 }
1737 }
1739 void
1740 Options::BadArg(const char* aArg)
1741 {
1742 StatusMsg("\n");
1743 StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg);
1744 StatusMsg("\n");
1745 StatusMsg("Valid values of $DMD are:\n");
1746 StatusMsg("- undefined or \"\" or \"0\", which disables DMD, or\n");
1747 StatusMsg("- \"1\", which enables it with the default options, or\n");
1748 StatusMsg("- a whitespace-separated list of |--option=val| entries, which\n");
1749 StatusMsg(" enables it with non-default options.\n");
1750 StatusMsg("\n");
1751 StatusMsg("The following options are allowed; defaults are shown in [].\n");
1752 StatusMsg(" --sample-below=<1..%d> Sample blocks smaller than this [%d]\n",
1753 int(mSampleBelowSize.mMax),
1754 int(mSampleBelowSize.mDefault));
1755 StatusMsg(" (prime numbers are recommended)\n");
1756 StatusMsg(" --max-frames=<1..%d> Max. depth of stack traces [%d]\n",
1757 int(mMaxFrames.mMax),
1758 int(mMaxFrames.mDefault));
1759 StatusMsg(" --max-records=<1..%u> Max. number of records printed [%u]\n",
1760 mMaxRecords.mMax,
1761 mMaxRecords.mDefault);
1762 StatusMsg(" --mode=<normal|test|stress> Mode of operation [normal]\n");
1763 StatusMsg("\n");
1764 exit(1);
1765 }
1767 //---------------------------------------------------------------------------
1768 // DMD start-up
1769 //---------------------------------------------------------------------------
1771 #ifdef XP_MACOSX
1772 static void
1773 NopStackWalkCallback(void* aPc, void* aSp, void* aClosure)
1774 {
1775 }
1776 #endif
1778 // Note that fopen() can allocate.
1779 static FILE*
1780 OpenOutputFile(const char* aFilename)
1781 {
1782 FILE* fp = fopen(aFilename, "w");
1783 if (!fp) {
1784 StatusMsg("can't create %s file: %s\n", aFilename, strerror(errno));
1785 exit(1);
1786 }
1787 return fp;
1788 }
1790 static void RunTestMode(FILE* fp);
1791 static void RunStressMode(FILE* fp);
1793 // WARNING: this function runs *very* early -- before all static initializers
1794 // have run. For this reason, non-scalar globals such as gStateLock and
1795 // gStackTraceTable are allocated dynamically (so we can guarantee their
1796 // construction in this function) rather than statically.
1797 static void
1798 Init(const malloc_table_t* aMallocTable)
1799 {
1800 MOZ_ASSERT(!gIsDMDRunning);
1802 gMallocTable = aMallocTable;
1804 // DMD is controlled by the |DMD| environment variable.
1805 // - If it's unset or empty or "0", DMD doesn't run.
1806 // - Otherwise, the contents dictate DMD's behaviour.
1808 char* e = getenv("DMD");
1809 StatusMsg("$DMD = '%s'\n", e);
1811 if (!e || strcmp(e, "") == 0 || strcmp(e, "0") == 0) {
1812 StatusMsg("DMD is not enabled\n");
1813 return;
1814 }
1816 // Parse $DMD env var.
1817 gOptions = InfallibleAllocPolicy::new_<Options>(e);
1819 StatusMsg("DMD is enabled\n");
1821 #ifdef XP_MACOSX
1822 // On Mac OS X we need to call StackWalkInitCriticalAddress() very early
1823 // (prior to the creation of any mutexes, apparently) otherwise we can get
1824 // hangs when getting stack traces (bug 821577). But
1825 // StackWalkInitCriticalAddress() isn't exported from xpcom/, so instead we
1826 // just call NS_StackWalk, because that calls StackWalkInitCriticalAddress().
1827 // See the comment above StackWalkInitCriticalAddress() for more details.
1828 (void)NS_StackWalk(NopStackWalkCallback, /* skipFrames */ 0,
1829 /* maxFrames */ 1, nullptr, 0, nullptr);
1830 #endif
1832 gStateLock = InfallibleAllocPolicy::new_<Mutex>();
1834 gSmallBlockActualSizeCounter = 0;
1836 DMD_CREATE_TLS_INDEX(gTlsIndex);
1838 {
1839 AutoLockState lock;
1841 gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>();
1842 gStackTraceTable->init(8192);
1844 gBlockTable = InfallibleAllocPolicy::new_<BlockTable>();
1845 gBlockTable->init(8192);
1846 }
1848 if (gOptions->IsTestMode()) {
1849 // OpenOutputFile() can allocate. So do this before setting
1850 // gIsDMDRunning so those allocations don't show up in our results. Once
1851 // gIsDMDRunning is set we are intercepting malloc et al. in earnest.
1852 FILE* fp = OpenOutputFile("test.dmd");
1853 gIsDMDRunning = true;
1855 StatusMsg("running test mode...\n");
1856 RunTestMode(fp);
1857 StatusMsg("finished test mode\n");
1858 fclose(fp);
1859 exit(0);
1860 }
1862 if (gOptions->IsStressMode()) {
1863 FILE* fp = OpenOutputFile("stress.dmd");
1864 gIsDMDRunning = true;
1866 StatusMsg("running stress mode...\n");
1867 RunStressMode(fp);
1868 StatusMsg("finished stress mode\n");
1869 fclose(fp);
1870 exit(0);
1871 }
1873 gIsDMDRunning = true;
1874 }
1876 //---------------------------------------------------------------------------
1877 // DMD reporting and unreporting
1878 //---------------------------------------------------------------------------
1880 static void
1881 ReportHelper(const void* aPtr, bool aReportedOnAlloc)
1882 {
1883 if (!gIsDMDRunning || !aPtr) {
1884 return;
1885 }
1887 Thread* t = Thread::Fetch();
1889 AutoBlockIntercepts block(t);
1890 AutoLockState lock;
1892 if (BlockTable::Ptr p = gBlockTable->lookup(aPtr)) {
1893 p->Report(t, aReportedOnAlloc);
1894 } else {
1895 // We have no record of the block. Do nothing. Either:
1896 // - We're sampling and we skipped this block. This is likely.
1897 // - It's a bogus pointer. This is unlikely because Report() is almost
1898 // always called in conjunction with a malloc_size_of-style function.
1899 }
1900 }
1902 MOZ_EXPORT void
1903 Report(const void* aPtr)
1904 {
1905 ReportHelper(aPtr, /* onAlloc */ false);
1906 }
1908 MOZ_EXPORT void
1909 ReportOnAlloc(const void* aPtr)
1910 {
1911 ReportHelper(aPtr, /* onAlloc */ true);
1912 }
1914 //---------------------------------------------------------------------------
1915 // DMD output
1916 //---------------------------------------------------------------------------
1918 // This works for both TraceRecords and StackFrameRecords.
1919 template <class Record>
1920 static void
1921 PrintSortedRecords(const Writer& aWriter, LocationService* aLocService,
1922 const char* aStr, const char* astr,
1923 const js::HashSet<Record, Record, InfallibleAllocPolicy>&
1924 aRecordTable,
1925 size_t aCategoryUsableSize, size_t aTotalUsableSize)
1926 {
1927 const char* kind = Record::kRecordKind;
1928 StatusMsg(" creating and sorting %s stack %s record array...\n", astr, kind);
1930 // Convert the table into a sorted array.
1931 js::Vector<const Record*, 0, InfallibleAllocPolicy> recordArray;
1932 recordArray.reserve(aRecordTable.count());
1933 typedef js::HashSet<Record, Record, InfallibleAllocPolicy> RecordTable;
1934 for (typename RecordTable::Range r = aRecordTable.all();
1935 !r.empty();
1936 r.popFront()) {
1937 recordArray.infallibleAppend(&r.front());
1938 }
1939 qsort(recordArray.begin(), recordArray.length(), sizeof(recordArray[0]),
1940 Record::QsortCmp);
1942 WriteTitle("%s stack %s records\n", aStr, kind);
1944 if (recordArray.length() == 0) {
1945 W("(none)\n\n");
1946 return;
1947 }
1949 StatusMsg(" printing %s stack %s record array...\n", astr, kind);
1950 size_t cumulativeUsableSize = 0;
1952 // Limit the number of records printed, because fix-linux-stack.pl is too
1953 // damn slow. Note that we don't break out of this loop because we need to
1954 // keep adding to |cumulativeUsableSize|.
1955 uint32_t numRecords = recordArray.length();
1956 uint32_t maxRecords = gOptions->MaxRecords();
1957 for (uint32_t i = 0; i < numRecords; i++) {
1958 const Record* r = recordArray[i];
1959 cumulativeUsableSize += r->GetRecordSize().Usable();
1960 if (i < maxRecords) {
1961 r->Print(aWriter, aLocService, i+1, numRecords, aStr, astr,
1962 aCategoryUsableSize, cumulativeUsableSize, aTotalUsableSize);
1963 } else if (i == maxRecords) {
1964 W("%s: stopping after %s stack %s records\n\n", aStr,
1965 Show(maxRecords, gBuf1, kBufLen), kind);
1966 }
1967 }
1969 // This holds for TraceRecords, but not for FrameRecords.
1970 MOZ_ASSERT_IF(!Record::recordsOverlap(),
1971 aCategoryUsableSize == cumulativeUsableSize);
1972 }
1974 static void
1975 PrintSortedTraceAndFrameRecords(const Writer& aWriter,
1976 LocationService* aLocService,
1977 const char* aStr, const char* astr,
1978 const TraceRecordTable& aTraceRecordTable,
1979 size_t aCategoryUsableSize,
1980 size_t aTotalUsableSize)
1981 {
1982 PrintSortedRecords(aWriter, aLocService, aStr, astr, aTraceRecordTable,
1983 aCategoryUsableSize, aTotalUsableSize);
1985 FrameRecordTable frameRecordTable;
1986 (void)frameRecordTable.init(2048);
1987 for (TraceRecordTable::Range r = aTraceRecordTable.all();
1988 !r.empty();
1989 r.popFront()) {
1990 const TraceRecord& tr = r.front();
1991 const StackTrace* st = tr.mAllocStackTrace;
1993 // A single PC can appear multiple times in a stack trace. We ignore
1994 // duplicates by first sorting and then ignoring adjacent duplicates.
1995 StackTrace sorted(*st);
1996 sorted.Sort(); // sorts the copy, not the original
1997 void* prevPc = (void*)intptr_t(-1);
1998 for (uint32_t i = 0; i < sorted.Length(); i++) {
1999 void* pc = sorted.Pc(i);
2000 if (pc == prevPc) {
2001 continue; // ignore duplicate
2002 }
2003 prevPc = pc;
2005 FrameRecordTable::AddPtr p = frameRecordTable.lookupForAdd(pc);
2006 if (!p) {
2007 FrameRecord fr(pc);
2008 (void)frameRecordTable.add(p, fr);
2009 }
2010 p->Add(tr);
2011 }
2012 }
2014 PrintSortedRecords(aWriter, aLocService, aStr, astr, frameRecordTable,
2015 aCategoryUsableSize, aTotalUsableSize);
2016 }
2018 // Note that, unlike most SizeOf* functions, this function does not take a
2019 // |mozilla::MallocSizeOf| argument. That's because those arguments are
2020 // primarily to aid DMD track heap blocks... but DMD deliberately doesn't track
2021 // heap blocks it allocated for itself!
2022 //
2023 // SizeOfInternal should be called while you're holding the state lock and
2024 // while intercepts are blocked; SizeOf acquires the lock and blocks
2025 // intercepts.
2027 static void
2028 SizeOfInternal(Sizes* aSizes)
2029 {
2030 MOZ_ASSERT(gStateLock->IsLocked());
2031 MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked());
2033 aSizes->Clear();
2035 if (!gIsDMDRunning) {
2036 return;
2037 }
2039 StackTraceSet usedStackTraces;
2040 GatherUsedStackTraces(usedStackTraces);
2042 for (StackTraceTable::Range r = gStackTraceTable->all();
2043 !r.empty();
2044 r.popFront()) {
2045 StackTrace* const& st = r.front();
2047 if (usedStackTraces.has(st)) {
2048 aSizes->mStackTracesUsed += MallocSizeOf(st);
2049 } else {
2050 aSizes->mStackTracesUnused += MallocSizeOf(st);
2051 }
2052 }
2054 aSizes->mStackTraceTable =
2055 gStackTraceTable->sizeOfIncludingThis(MallocSizeOf);
2057 aSizes->mBlockTable = gBlockTable->sizeOfIncludingThis(MallocSizeOf);
2058 }
2060 MOZ_EXPORT void
2061 SizeOf(Sizes* aSizes)
2062 {
2063 aSizes->Clear();
2065 if (!gIsDMDRunning) {
2066 return;
2067 }
2069 AutoBlockIntercepts block(Thread::Fetch());
2070 AutoLockState lock;
2071 SizeOfInternal(aSizes);
2072 }
2074 void
2075 ClearReportsInternal()
2076 {
2077 MOZ_ASSERT(gStateLock->IsLocked());
2079 // Unreport all blocks that were marked reported by a memory reporter. This
2080 // excludes those that were reported on allocation, because they need to keep
2081 // their reported marking.
2082 for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) {
2083 r.front().UnreportIfNotReportedOnAlloc();
2084 }
2085 }
2087 MOZ_EXPORT void
2088 ClearReports()
2089 {
2090 if (!gIsDMDRunning) {
2091 return;
2092 }
2094 AutoLockState lock;
2095 ClearReportsInternal();
2096 }
2098 MOZ_EXPORT bool
2099 IsEnabled()
2100 {
2101 return gIsDMDRunning;
2102 }
2104 MOZ_EXPORT void
2105 Dump(Writer aWriter)
2106 {
2107 if (!gIsDMDRunning) {
2108 const char* msg = "cannot Dump(); DMD was not enabled at startup\n";
2109 StatusMsg("%s", msg);
2110 W("%s", msg);
2111 return;
2112 }
2114 AutoBlockIntercepts block(Thread::Fetch());
2115 AutoLockState lock;
2117 static int dumpCount = 1;
2118 StatusMsg("Dump %d {\n", dumpCount++);
2120 StatusMsg(" gathering stack trace records...\n");
2122 TraceRecordTable unreportedTraceRecordTable;
2123 (void)unreportedTraceRecordTable.init(1024);
2124 size_t unreportedUsableSize = 0;
2125 size_t unreportedNumBlocks = 0;
2127 TraceRecordTable onceReportedTraceRecordTable;
2128 (void)onceReportedTraceRecordTable.init(1024);
2129 size_t onceReportedUsableSize = 0;
2130 size_t onceReportedNumBlocks = 0;
2132 TraceRecordTable twiceReportedTraceRecordTable;
2133 (void)twiceReportedTraceRecordTable.init(0);
2134 size_t twiceReportedUsableSize = 0;
2135 size_t twiceReportedNumBlocks = 0;
2137 bool anyBlocksSampled = false;
2139 for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) {
2140 const Block& b = r.front();
2142 TraceRecordTable* table;
2143 uint32_t numReports = b.NumReports();
2144 if (numReports == 0) {
2145 unreportedUsableSize += b.UsableSize();
2146 unreportedNumBlocks++;
2147 table = &unreportedTraceRecordTable;
2148 } else if (numReports == 1) {
2149 onceReportedUsableSize += b.UsableSize();
2150 onceReportedNumBlocks++;
2151 table = &onceReportedTraceRecordTable;
2152 } else {
2153 MOZ_ASSERT(numReports == 2);
2154 twiceReportedUsableSize += b.UsableSize();
2155 twiceReportedNumBlocks++;
2156 table = &twiceReportedTraceRecordTable;
2157 }
2158 TraceRecordKey key(b);
2159 TraceRecordTable::AddPtr p = table->lookupForAdd(key);
2160 if (!p) {
2161 TraceRecord tr(b);
2162 (void)table->add(p, tr);
2163 }
2164 p->Add(b);
2166 anyBlocksSampled = anyBlocksSampled || b.IsSampled();
2167 }
2168 size_t totalUsableSize =
2169 unreportedUsableSize + onceReportedUsableSize + twiceReportedUsableSize;
2170 size_t totalNumBlocks =
2171 unreportedNumBlocks + onceReportedNumBlocks + twiceReportedNumBlocks;
2173 WriteTitle("Invocation\n");
2174 W("$DMD = '%s'\n", gOptions->DMDEnvVar());
2175 W("Sample-below size = %lld\n\n",
2176 (long long)(gOptions->SampleBelowSize()));
2178 // Allocate this on the heap instead of the stack because it's fairly large.
2179 LocationService* locService = InfallibleAllocPolicy::new_<LocationService>();
2181 PrintSortedRecords(aWriter, locService, "Twice-reported", "twice-reported",
2182 twiceReportedTraceRecordTable, twiceReportedUsableSize,
2183 totalUsableSize);
2185 PrintSortedTraceAndFrameRecords(aWriter, locService,
2186 "Unreported", "unreported",
2187 unreportedTraceRecordTable,
2188 unreportedUsableSize, totalUsableSize);
2190 PrintSortedTraceAndFrameRecords(aWriter, locService,
2191 "Once-reported", "once-reported",
2192 onceReportedTraceRecordTable,
2193 onceReportedUsableSize, totalUsableSize);
2195 bool showTilde = anyBlocksSampled;
2196 WriteTitle("Summary\n");
2198 W("Total: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
2199 Show(totalUsableSize, gBuf1, kBufLen, showTilde),
2200 100.0,
2201 Show(totalNumBlocks, gBuf2, kBufLen, showTilde),
2202 100.0);
2204 W("Unreported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
2205 Show(unreportedUsableSize, gBuf1, kBufLen, showTilde),
2206 Percent(unreportedUsableSize, totalUsableSize),
2207 Show(unreportedNumBlocks, gBuf2, kBufLen, showTilde),
2208 Percent(unreportedNumBlocks, totalNumBlocks));
2210 W("Once-reported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
2211 Show(onceReportedUsableSize, gBuf1, kBufLen, showTilde),
2212 Percent(onceReportedUsableSize, totalUsableSize),
2213 Show(onceReportedNumBlocks, gBuf2, kBufLen, showTilde),
2214 Percent(onceReportedNumBlocks, totalNumBlocks));
2216 W("Twice-reported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n",
2217 Show(twiceReportedUsableSize, gBuf1, kBufLen, showTilde),
2218 Percent(twiceReportedUsableSize, totalUsableSize),
2219 Show(twiceReportedNumBlocks, gBuf2, kBufLen, showTilde),
2220 Percent(twiceReportedNumBlocks, totalNumBlocks));
2222 W("\n");
2224 // Stats are non-deterministic, so don't show them in test mode.
2225 if (!gOptions->IsTestMode()) {
2226 Sizes sizes;
2227 SizeOfInternal(&sizes);
2229 WriteTitle("Execution measurements\n");
2231 W("Data structures that persist after Dump() ends:\n");
2233 W(" Used stack traces: %10s bytes\n",
2234 Show(sizes.mStackTracesUsed, gBuf1, kBufLen));
2236 W(" Unused stack traces: %10s bytes\n",
2237 Show(sizes.mStackTracesUnused, gBuf1, kBufLen));
2239 W(" Stack trace table: %10s bytes (%s entries, %s used)\n",
2240 Show(sizes.mStackTraceTable, gBuf1, kBufLen),
2241 Show(gStackTraceTable->capacity(), gBuf2, kBufLen),
2242 Show(gStackTraceTable->count(), gBuf3, kBufLen));
2244 W(" Block table: %10s bytes (%s entries, %s used)\n",
2245 Show(sizes.mBlockTable, gBuf1, kBufLen),
2246 Show(gBlockTable->capacity(), gBuf2, kBufLen),
2247 Show(gBlockTable->count(), gBuf3, kBufLen));
2249 W("\nData structures that are destroyed after Dump() ends:\n");
2251 size_t unreportedSize =
2252 unreportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf);
2253 W(" Unreported table: %10s bytes (%s entries, %s used)\n",
2254 Show(unreportedSize, gBuf1, kBufLen),
2255 Show(unreportedTraceRecordTable.capacity(), gBuf2, kBufLen),
2256 Show(unreportedTraceRecordTable.count(), gBuf3, kBufLen));
2258 size_t onceReportedSize =
2259 onceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf);
2260 W(" Once-reported table: %10s bytes (%s entries, %s used)\n",
2261 Show(onceReportedSize, gBuf1, kBufLen),
2262 Show(onceReportedTraceRecordTable.capacity(), gBuf2, kBufLen),
2263 Show(onceReportedTraceRecordTable.count(), gBuf3, kBufLen));
2265 size_t twiceReportedSize =
2266 twiceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf);
2267 W(" Twice-reported table: %10s bytes (%s entries, %s used)\n",
2268 Show(twiceReportedSize, gBuf1, kBufLen),
2269 Show(twiceReportedTraceRecordTable.capacity(), gBuf2, kBufLen),
2270 Show(twiceReportedTraceRecordTable.count(), gBuf3, kBufLen));
2272 W(" Location service: %10s bytes\n",
2273 Show(locService->SizeOfIncludingThis(), gBuf1, kBufLen));
2275 W("\nCounts:\n");
2277 size_t hits = locService->NumCacheHits();
2278 size_t misses = locService->NumCacheMisses();
2279 size_t requests = hits + misses;
2280 W(" Location service: %10s requests\n",
2281 Show(requests, gBuf1, kBufLen));
2283 size_t count = locService->CacheCount();
2284 size_t capacity = locService->CacheCapacity();
2285 W(" Location service cache: %4.1f%% hit rate, %.1f%% occupancy at end\n",
2286 Percent(hits, requests), Percent(count, capacity));
2288 W("\n");
2289 }
2291 InfallibleAllocPolicy::delete_(locService);
2293 ClearReportsInternal(); // Use internal version, we already have the lock.
2295 StatusMsg("}\n");
2296 }
2298 //---------------------------------------------------------------------------
2299 // Testing
2300 //---------------------------------------------------------------------------
2302 // This function checks that heap blocks that have the same stack trace but
2303 // different (or no) reporters get aggregated separately.
2304 void foo()
2305 {
2306 char* a[6];
2307 for (int i = 0; i < 6; i++) {
2308 a[i] = (char*) malloc(128 - 16*i);
2309 }
2311 for (int i = 0; i <= 1; i++)
2312 Report(a[i]); // reported
2313 Report(a[2]); // reported
2314 Report(a[3]); // reported
2315 // a[4], a[5] unreported
2316 }
2318 // This stops otherwise-unused variables from being optimized away.
2319 static void
2320 UseItOrLoseIt(void* a)
2321 {
2322 char buf[64];
2323 sprintf(buf, "%p\n", a);
2324 fwrite(buf, 1, strlen(buf) + 1, stderr);
2325 }
2327 // The output from this should be compared against test-expected.dmd. It's
2328 // been tested on Linux64, and probably will give different results on other
2329 // platforms.
2330 static void
2331 RunTestMode(FILE* fp)
2332 {
2333 Writer writer(FpWrite, fp);
2335 // The first part of this test requires sampling to be disabled.
2336 gOptions->SetSampleBelowSize(1);
2338 // Dump 1. Zero for everything.
2339 Dump(writer);
2341 // Dump 2: 1 freed, 9 out of 10 unreported.
2342 // Dump 3: still present and unreported.
2343 int i;
2344 char* a;
2345 for (i = 0; i < 10; i++) {
2346 a = (char*) malloc(100);
2347 UseItOrLoseIt(a);
2348 }
2349 free(a);
2351 // Min-sized block.
2352 // Dump 2: reported.
2353 // Dump 3: thrice-reported.
2354 char* a2 = (char*) malloc(0);
2355 Report(a2);
2357 // Operator new[].
2358 // Dump 2: reported.
2359 // Dump 3: reportedness carries over, due to ReportOnAlloc.
2360 char* b = new char[10];
2361 ReportOnAlloc(b);
2363 // ReportOnAlloc, then freed.
2364 // Dump 2: freed, irrelevant.
2365 // Dump 3: freed, irrelevant.
2366 char* b2 = new char;
2367 ReportOnAlloc(b2);
2368 free(b2);
2370 // Dump 2: reported 4 times.
2371 // Dump 3: freed, irrelevant.
2372 char* c = (char*) calloc(10, 3);
2373 Report(c);
2374 for (int i = 0; i < 3; i++) {
2375 Report(c);
2376 }
2378 // Dump 2: ignored.
2379 // Dump 3: irrelevant.
2380 Report((void*)(intptr_t)i);
2382 // jemalloc rounds this up to 8192.
2383 // Dump 2: reported.
2384 // Dump 3: freed.
2385 char* e = (char*) malloc(4096);
2386 e = (char*) realloc(e, 4097);
2387 Report(e);
2389 // First realloc is like malloc; second realloc is shrinking.
2390 // Dump 2: reported.
2391 // Dump 3: re-reported.
2392 char* e2 = (char*) realloc(nullptr, 1024);
2393 e2 = (char*) realloc(e2, 512);
2394 Report(e2);
2396 // First realloc is like malloc; second realloc creates a min-sized block.
2397 // XXX: on Windows, second realloc frees the block.
2398 // Dump 2: reported.
2399 // Dump 3: freed, irrelevant.
2400 char* e3 = (char*) realloc(nullptr, 1023);
2401 //e3 = (char*) realloc(e3, 0);
2402 MOZ_ASSERT(e3);
2403 Report(e3);
2405 // Dump 2: freed, irrelevant.
2406 // Dump 3: freed, irrelevant.
2407 char* f = (char*) malloc(64);
2408 free(f);
2410 // Dump 2: ignored.
2411 // Dump 3: irrelevant.
2412 Report((void*)(intptr_t)0x0);
2414 // Dump 2: mixture of reported and unreported.
2415 // Dump 3: all unreported.
2416 foo();
2417 foo();
2419 // Dump 2: twice-reported.
2420 // Dump 3: twice-reported.
2421 char* g1 = (char*) malloc(77);
2422 ReportOnAlloc(g1);
2423 ReportOnAlloc(g1);
2425 // Dump 2: twice-reported.
2426 // Dump 3: once-reported.
2427 char* g2 = (char*) malloc(78);
2428 Report(g2);
2429 ReportOnAlloc(g2);
2431 // Dump 2: twice-reported.
2432 // Dump 3: once-reported.
2433 char* g3 = (char*) malloc(79);
2434 ReportOnAlloc(g3);
2435 Report(g3);
2437 // All the odd-ball ones.
2438 // Dump 2: all unreported.
2439 // Dump 3: all freed, irrelevant.
2440 // XXX: no memalign on Mac
2441 //void* x = memalign(64, 65); // rounds up to 128
2442 //UseItOrLoseIt(x);
2443 // XXX: posix_memalign doesn't work on B2G
2444 //void* y;
2445 //posix_memalign(&y, 128, 129); // rounds up to 256
2446 //UseItOrLoseIt(y);
2447 // XXX: valloc doesn't work on Windows.
2448 //void* z = valloc(1); // rounds up to 4096
2449 //UseItOrLoseIt(z);
2450 //aligned_alloc(64, 256); // XXX: C11 only
2452 // Dump 2.
2453 Dump(writer);
2455 //---------
2457 Report(a2);
2458 Report(a2);
2459 free(c);
2460 free(e);
2461 Report(e2);
2462 free(e3);
2463 //free(x);
2464 //free(y);
2465 //free(z);
2467 // Dump 3.
2468 Dump(writer);
2470 //---------
2472 // Clear all knowledge of existing blocks to give us a clean slate.
2473 gBlockTable->clear();
2475 gOptions->SetSampleBelowSize(128);
2477 char* s;
2479 // This equals the sample size, and so is reported exactly. It should be
2480 // listed before records of the same size that are sampled.
2481 s = (char*) malloc(128);
2482 UseItOrLoseIt(s);
2484 // This exceeds the sample size, and so is reported exactly.
2485 s = (char*) malloc(144);
2486 UseItOrLoseIt(s);
2488 // These together constitute exactly one sample.
2489 for (int i = 0; i < 16; i++) {
2490 s = (char*) malloc(8);
2491 UseItOrLoseIt(s);
2492 }
2493 MOZ_ASSERT(gSmallBlockActualSizeCounter == 0);
2495 // These fall 8 bytes short of a full sample.
2496 for (int i = 0; i < 15; i++) {
2497 s = (char*) malloc(8);
2498 UseItOrLoseIt(s);
2499 }
2500 MOZ_ASSERT(gSmallBlockActualSizeCounter == 120);
2502 // This exceeds the sample size, and so is recorded exactly.
2503 s = (char*) malloc(256);
2504 UseItOrLoseIt(s);
2505 MOZ_ASSERT(gSmallBlockActualSizeCounter == 120);
2507 // This gets more than to a full sample from the |i < 15| loop above.
2508 s = (char*) malloc(96);
2509 UseItOrLoseIt(s);
2510 MOZ_ASSERT(gSmallBlockActualSizeCounter == 88);
2512 // This gets to another full sample.
2513 for (int i = 0; i < 5; i++) {
2514 s = (char*) malloc(8);
2515 UseItOrLoseIt(s);
2516 }
2517 MOZ_ASSERT(gSmallBlockActualSizeCounter == 0);
2519 // This allocates 16, 32, ..., 128 bytes, which results in a stack trace
2520 // record that contains a mix of sample and non-sampled blocks, and so should
2521 // be printed with '~' signs.
2522 for (int i = 1; i <= 8; i++) {
2523 s = (char*) malloc(i * 16);
2524 UseItOrLoseIt(s);
2525 }
2526 MOZ_ASSERT(gSmallBlockActualSizeCounter == 64);
2528 // At the end we're 64 bytes into the current sample so we report ~1,424
2529 // bytes of allocation overall, which is 64 less than the real value 1,488.
2531 // Dump 4.
2532 Dump(writer);
2533 }
2535 //---------------------------------------------------------------------------
2536 // Stress testing microbenchmark
2537 //---------------------------------------------------------------------------
2539 // This stops otherwise-unused variables from being optimized away.
2540 static void
2541 UseItOrLoseIt2(void* a)
2542 {
2543 if (a == (void*)0x42) {
2544 printf("UseItOrLoseIt2\n");
2545 }
2546 }
2548 MOZ_NEVER_INLINE static void
2549 stress5()
2550 {
2551 for (int i = 0; i < 10; i++) {
2552 void* x = malloc(64);
2553 UseItOrLoseIt2(x);
2554 if (i & 1) {
2555 free(x);
2556 }
2557 }
2558 }
2560 MOZ_NEVER_INLINE static void
2561 stress4()
2562 {
2563 stress5(); stress5(); stress5(); stress5(); stress5();
2564 stress5(); stress5(); stress5(); stress5(); stress5();
2565 }
2567 MOZ_NEVER_INLINE static void
2568 stress3()
2569 {
2570 for (int i = 0; i < 10; i++) {
2571 stress4();
2572 }
2573 }
2575 MOZ_NEVER_INLINE static void
2576 stress2()
2577 {
2578 stress3(); stress3(); stress3(); stress3(); stress3();
2579 stress3(); stress3(); stress3(); stress3(); stress3();
2580 }
2582 MOZ_NEVER_INLINE static void
2583 stress1()
2584 {
2585 for (int i = 0; i < 10; i++) {
2586 stress2();
2587 }
2588 }
2590 // This stress test does lots of allocations and frees, which is where most of
2591 // DMD's overhead occurs. It allocates 1,000,000 64-byte blocks, spread evenly
2592 // across 1,000 distinct stack traces. It frees every second one immediately
2593 // after allocating it.
2594 //
2595 // It's highly artificial, but it's deterministic and easy to run. It can be
2596 // timed under different conditions to glean performance data.
2597 static void
2598 RunStressMode(FILE* fp)
2599 {
2600 Writer writer(FpWrite, fp);
2602 // Disable sampling for maximum stress.
2603 gOptions->SetSampleBelowSize(1);
2605 stress1(); stress1(); stress1(); stress1(); stress1();
2606 stress1(); stress1(); stress1(); stress1(); stress1();
2608 Dump(writer);
2609 }
2611 } // namespace dmd
2612 } // namespace mozilla