|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include "DMD.h" |
|
8 |
|
9 #include <ctype.h> |
|
10 #include <errno.h> |
|
11 #include <limits.h> |
|
12 #include <stdarg.h> |
|
13 #include <stdio.h> |
|
14 #include <stdlib.h> |
|
15 #include <string.h> |
|
16 |
|
17 #ifdef XP_WIN |
|
18 #if defined(MOZ_OPTIMIZE) && !defined(MOZ_PROFILING) |
|
19 #error "Optimized, DMD-enabled builds on Windows must be built with --enable-profiling" |
|
20 #endif |
|
21 #include <windows.h> |
|
22 #include <process.h> |
|
23 #else |
|
24 #include <unistd.h> |
|
25 #endif |
|
26 |
|
27 #ifdef ANDROID |
|
28 #include <android/log.h> |
|
29 #endif |
|
30 |
|
31 #include "nscore.h" |
|
32 #include "nsStackWalk.h" |
|
33 |
|
34 #include "js/HashTable.h" |
|
35 #include "js/Vector.h" |
|
36 |
|
37 #include "mozilla/Assertions.h" |
|
38 #include "mozilla/HashFunctions.h" |
|
39 #include "mozilla/Likely.h" |
|
40 #include "mozilla/MemoryReporting.h" |
|
41 |
|
42 // MOZ_REPLACE_ONLY_MEMALIGN saves us from having to define |
|
43 // replace_{posix_memalign,aligned_alloc,valloc}. It requires defining |
|
44 // PAGE_SIZE. Nb: sysconf() is expensive, but it's only used for (the obsolete |
|
45 // and rarely used) valloc. |
|
46 #define MOZ_REPLACE_ONLY_MEMALIGN 1 |
|
47 #ifdef XP_WIN |
|
48 #define PAGE_SIZE GetPageSize() |
|
49 static long GetPageSize() |
|
50 { |
|
51 SYSTEM_INFO si; |
|
52 GetSystemInfo(&si); |
|
53 return si.dwPageSize; |
|
54 } |
|
55 #else |
|
56 #define PAGE_SIZE sysconf(_SC_PAGESIZE) |
|
57 #endif |
|
58 #include "replace_malloc.h" |
|
59 #undef MOZ_REPLACE_ONLY_MEMALIGN |
|
60 #undef PAGE_SIZE |
|
61 |
|
62 namespace mozilla { |
|
63 namespace dmd { |
|
64 |
|
65 //--------------------------------------------------------------------------- |
|
66 // Utilities |
|
67 //--------------------------------------------------------------------------- |
|
68 |
|
69 #ifndef DISALLOW_COPY_AND_ASSIGN |
|
70 #define DISALLOW_COPY_AND_ASSIGN(T) \ |
|
71 T(const T&); \ |
|
72 void operator=(const T&) |
|
73 #endif |
|
74 |
|
75 static const malloc_table_t* gMallocTable = nullptr; |
|
76 |
|
77 // This enables/disables DMD. |
|
78 static bool gIsDMDRunning = false; |
|
79 |
|
80 // This provides infallible allocations (they abort on OOM). We use it for all |
|
81 // of DMD's own allocations, which fall into the following three cases. |
|
82 // - Direct allocations (the easy case). |
|
83 // - Indirect allocations in js::{Vector,HashSet,HashMap} -- this class serves |
|
84 // as their AllocPolicy. |
|
85 // - Other indirect allocations (e.g. NS_StackWalk) -- see the comments on |
|
86 // Thread::mBlockIntercepts and in replace_malloc for how these work. |
|
87 // |
|
88 class InfallibleAllocPolicy |
|
89 { |
|
90 static void ExitOnFailure(const void* aP); |
|
91 |
|
92 public: |
|
93 static void* malloc_(size_t aSize) |
|
94 { |
|
95 void* p = gMallocTable->malloc(aSize); |
|
96 ExitOnFailure(p); |
|
97 return p; |
|
98 } |
|
99 |
|
100 static void* calloc_(size_t aSize) |
|
101 { |
|
102 void* p = gMallocTable->calloc(1, aSize); |
|
103 ExitOnFailure(p); |
|
104 return p; |
|
105 } |
|
106 |
|
107 // This realloc_ is the one we use for direct reallocs within DMD. |
|
108 static void* realloc_(void* aPtr, size_t aNewSize) |
|
109 { |
|
110 void* p = gMallocTable->realloc(aPtr, aNewSize); |
|
111 ExitOnFailure(p); |
|
112 return p; |
|
113 } |
|
114 |
|
115 // This realloc_ is required for this to be a JS container AllocPolicy. |
|
116 static void* realloc_(void* aPtr, size_t aOldSize, size_t aNewSize) |
|
117 { |
|
118 return InfallibleAllocPolicy::realloc_(aPtr, aNewSize); |
|
119 } |
|
120 |
|
121 static void* memalign_(size_t aAlignment, size_t aSize) |
|
122 { |
|
123 void* p = gMallocTable->memalign(aAlignment, aSize); |
|
124 ExitOnFailure(p); |
|
125 return p; |
|
126 } |
|
127 |
|
128 static void free_(void* aPtr) { gMallocTable->free(aPtr); } |
|
129 |
|
130 static char* strdup_(const char* aStr) |
|
131 { |
|
132 char* s = (char*) InfallibleAllocPolicy::malloc_(strlen(aStr) + 1); |
|
133 strcpy(s, aStr); |
|
134 return s; |
|
135 } |
|
136 |
|
137 template <class T> |
|
138 static T* new_() |
|
139 { |
|
140 void* mem = malloc_(sizeof(T)); |
|
141 ExitOnFailure(mem); |
|
142 return new (mem) T; |
|
143 } |
|
144 |
|
145 template <class T, typename P1> |
|
146 static T* new_(P1 p1) |
|
147 { |
|
148 void* mem = malloc_(sizeof(T)); |
|
149 ExitOnFailure(mem); |
|
150 return new (mem) T(p1); |
|
151 } |
|
152 |
|
153 template <class T> |
|
154 static void delete_(T *p) |
|
155 { |
|
156 if (p) { |
|
157 p->~T(); |
|
158 InfallibleAllocPolicy::free_(p); |
|
159 } |
|
160 } |
|
161 |
|
162 static void reportAllocOverflow() { ExitOnFailure(nullptr); } |
|
163 }; |
|
164 |
|
165 // This is only needed because of the |const void*| vs |void*| arg mismatch. |
|
166 static size_t |
|
167 MallocSizeOf(const void* aPtr) |
|
168 { |
|
169 return gMallocTable->malloc_usable_size(const_cast<void*>(aPtr)); |
|
170 } |
|
171 |
|
172 static void |
|
173 StatusMsg(const char* aFmt, ...) |
|
174 { |
|
175 va_list ap; |
|
176 va_start(ap, aFmt); |
|
177 #ifdef ANDROID |
|
178 __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, ap); |
|
179 #else |
|
180 // The +64 is easily enough for the "DMD[<pid>] " prefix and the NUL. |
|
181 char* fmt = (char*) InfallibleAllocPolicy::malloc_(strlen(aFmt) + 64); |
|
182 sprintf(fmt, "DMD[%d] %s", getpid(), aFmt); |
|
183 vfprintf(stderr, fmt, ap); |
|
184 InfallibleAllocPolicy::free_(fmt); |
|
185 #endif |
|
186 va_end(ap); |
|
187 } |
|
188 |
|
189 /* static */ void |
|
190 InfallibleAllocPolicy::ExitOnFailure(const void* aP) |
|
191 { |
|
192 if (!aP) { |
|
193 StatusMsg("out of memory; aborting\n"); |
|
194 MOZ_CRASH(); |
|
195 } |
|
196 } |
|
197 |
|
198 void |
|
199 Writer::Write(const char* aFmt, ...) const |
|
200 { |
|
201 va_list ap; |
|
202 va_start(ap, aFmt); |
|
203 mWriterFun(mWriteState, aFmt, ap); |
|
204 va_end(ap); |
|
205 } |
|
206 |
|
207 #define W(...) aWriter.Write(__VA_ARGS__); |
|
208 |
|
209 #define WriteTitle(...) \ |
|
210 W("------------------------------------------------------------------\n"); \ |
|
211 W(__VA_ARGS__); \ |
|
212 W("------------------------------------------------------------------\n\n"); |
|
213 |
|
214 MOZ_EXPORT void |
|
215 FpWrite(void* aWriteState, const char* aFmt, va_list aAp) |
|
216 { |
|
217 FILE* fp = static_cast<FILE*>(aWriteState); |
|
218 vfprintf(fp, aFmt, aAp); |
|
219 } |
|
220 |
|
221 static double |
|
222 Percent(size_t part, size_t whole) |
|
223 { |
|
224 return (whole == 0) ? 0 : 100 * (double)part / whole; |
|
225 } |
|
226 |
|
227 // Commifies the number and prepends a '~' if requested. Best used with |
|
228 // |kBufLen| and |gBuf[1234]|, because they should be big enough for any number |
|
229 // we'll see. |
|
230 static char* |
|
231 Show(size_t n, char* buf, size_t buflen, bool addTilde = false) |
|
232 { |
|
233 int nc = 0, i = 0, lasti = buflen - 2; |
|
234 buf[lasti + 1] = '\0'; |
|
235 if (n == 0) { |
|
236 buf[lasti - i] = '0'; |
|
237 i++; |
|
238 } else { |
|
239 while (n > 0) { |
|
240 if (((i - nc) % 3) == 0 && i != 0) { |
|
241 buf[lasti - i] = ','; |
|
242 i++; |
|
243 nc++; |
|
244 } |
|
245 buf[lasti - i] = static_cast<char>((n % 10) + '0'); |
|
246 i++; |
|
247 n /= 10; |
|
248 } |
|
249 } |
|
250 int firstCharIndex = lasti - i + 1; |
|
251 |
|
252 if (addTilde) { |
|
253 firstCharIndex--; |
|
254 buf[firstCharIndex] = '~'; |
|
255 } |
|
256 |
|
257 MOZ_ASSERT(firstCharIndex >= 0); |
|
258 return &buf[firstCharIndex]; |
|
259 } |
|
260 |
|
261 static const char* |
|
262 Plural(size_t aN) |
|
263 { |
|
264 return aN == 1 ? "" : "s"; |
|
265 } |
|
266 |
|
267 // Used by calls to Show(). |
|
268 static const size_t kBufLen = 64; |
|
269 static char gBuf1[kBufLen]; |
|
270 static char gBuf2[kBufLen]; |
|
271 static char gBuf3[kBufLen]; |
|
272 static char gBuf4[kBufLen]; |
|
273 |
|
274 //--------------------------------------------------------------------------- |
|
275 // Options (Part 1) |
|
276 //--------------------------------------------------------------------------- |
|
277 |
|
278 class Options |
|
279 { |
|
280 template <typename T> |
|
281 struct NumOption |
|
282 { |
|
283 const T mDefault; |
|
284 const T mMax; |
|
285 T mActual; |
|
286 NumOption(T aDefault, T aMax) |
|
287 : mDefault(aDefault), mMax(aMax), mActual(aDefault) |
|
288 {} |
|
289 }; |
|
290 |
|
291 enum Mode { |
|
292 Normal, // run normally |
|
293 Test, // do some basic correctness tests |
|
294 Stress // do some performance stress tests |
|
295 }; |
|
296 |
|
297 char* mDMDEnvVar; // a saved copy, for printing during Dump() |
|
298 |
|
299 NumOption<size_t> mSampleBelowSize; |
|
300 NumOption<uint32_t> mMaxFrames; |
|
301 NumOption<uint32_t> mMaxRecords; |
|
302 Mode mMode; |
|
303 |
|
304 void BadArg(const char* aArg); |
|
305 static const char* ValueIfMatch(const char* aArg, const char* aOptionName); |
|
306 static bool GetLong(const char* aArg, const char* aOptionName, |
|
307 long aMin, long aMax, long* aN); |
|
308 |
|
309 public: |
|
310 Options(const char* aDMDEnvVar); |
|
311 |
|
312 const char* DMDEnvVar() const { return mDMDEnvVar; } |
|
313 |
|
314 size_t SampleBelowSize() const { return mSampleBelowSize.mActual; } |
|
315 size_t MaxFrames() const { return mMaxFrames.mActual; } |
|
316 size_t MaxRecords() const { return mMaxRecords.mActual; } |
|
317 |
|
318 void SetSampleBelowSize(size_t aN) { mSampleBelowSize.mActual = aN; } |
|
319 |
|
320 bool IsTestMode() const { return mMode == Test; } |
|
321 bool IsStressMode() const { return mMode == Stress; } |
|
322 }; |
|
323 |
|
324 static Options *gOptions; |
|
325 |
|
326 //--------------------------------------------------------------------------- |
|
327 // The global lock |
|
328 //--------------------------------------------------------------------------- |
|
329 |
|
330 // MutexBase implements the platform-specific parts of a mutex. |
|
331 |
|
332 #ifdef XP_WIN |
|
333 |
|
334 class MutexBase |
|
335 { |
|
336 CRITICAL_SECTION mCS; |
|
337 |
|
338 DISALLOW_COPY_AND_ASSIGN(MutexBase); |
|
339 |
|
340 public: |
|
341 MutexBase() |
|
342 { |
|
343 InitializeCriticalSection(&mCS); |
|
344 } |
|
345 |
|
346 ~MutexBase() |
|
347 { |
|
348 DeleteCriticalSection(&mCS); |
|
349 } |
|
350 |
|
351 void Lock() |
|
352 { |
|
353 EnterCriticalSection(&mCS); |
|
354 } |
|
355 |
|
356 void Unlock() |
|
357 { |
|
358 LeaveCriticalSection(&mCS); |
|
359 } |
|
360 }; |
|
361 |
|
362 #else |
|
363 |
|
364 #include <pthread.h> |
|
365 #include <sys/types.h> |
|
366 |
|
367 class MutexBase |
|
368 { |
|
369 pthread_mutex_t mMutex; |
|
370 |
|
371 DISALLOW_COPY_AND_ASSIGN(MutexBase); |
|
372 |
|
373 public: |
|
374 MutexBase() |
|
375 { |
|
376 pthread_mutex_init(&mMutex, nullptr); |
|
377 } |
|
378 |
|
379 void Lock() |
|
380 { |
|
381 pthread_mutex_lock(&mMutex); |
|
382 } |
|
383 |
|
384 void Unlock() |
|
385 { |
|
386 pthread_mutex_unlock(&mMutex); |
|
387 } |
|
388 }; |
|
389 |
|
390 #endif |
|
391 |
|
392 class Mutex : private MutexBase |
|
393 { |
|
394 bool mIsLocked; |
|
395 |
|
396 DISALLOW_COPY_AND_ASSIGN(Mutex); |
|
397 |
|
398 public: |
|
399 Mutex() |
|
400 : mIsLocked(false) |
|
401 {} |
|
402 |
|
403 void Lock() |
|
404 { |
|
405 MutexBase::Lock(); |
|
406 MOZ_ASSERT(!mIsLocked); |
|
407 mIsLocked = true; |
|
408 } |
|
409 |
|
410 void Unlock() |
|
411 { |
|
412 MOZ_ASSERT(mIsLocked); |
|
413 mIsLocked = false; |
|
414 MutexBase::Unlock(); |
|
415 } |
|
416 |
|
417 bool IsLocked() |
|
418 { |
|
419 return mIsLocked; |
|
420 } |
|
421 }; |
|
422 |
|
423 // This lock must be held while manipulating global state, such as |
|
424 // gStackTraceTable, gBlockTable, etc. |
|
425 static Mutex* gStateLock = nullptr; |
|
426 |
|
427 class AutoLockState |
|
428 { |
|
429 DISALLOW_COPY_AND_ASSIGN(AutoLockState); |
|
430 |
|
431 public: |
|
432 AutoLockState() |
|
433 { |
|
434 gStateLock->Lock(); |
|
435 } |
|
436 ~AutoLockState() |
|
437 { |
|
438 gStateLock->Unlock(); |
|
439 } |
|
440 }; |
|
441 |
|
442 class AutoUnlockState |
|
443 { |
|
444 DISALLOW_COPY_AND_ASSIGN(AutoUnlockState); |
|
445 |
|
446 public: |
|
447 AutoUnlockState() |
|
448 { |
|
449 gStateLock->Unlock(); |
|
450 } |
|
451 ~AutoUnlockState() |
|
452 { |
|
453 gStateLock->Lock(); |
|
454 } |
|
455 }; |
|
456 |
|
457 //--------------------------------------------------------------------------- |
|
458 // Thread-local storage and blocking of intercepts |
|
459 //--------------------------------------------------------------------------- |
|
460 |
|
461 #ifdef XP_WIN |
|
462 |
|
463 #define DMD_TLS_INDEX_TYPE DWORD |
|
464 #define DMD_CREATE_TLS_INDEX(i_) do { \ |
|
465 (i_) = TlsAlloc(); \ |
|
466 } while (0) |
|
467 #define DMD_DESTROY_TLS_INDEX(i_) TlsFree((i_)) |
|
468 #define DMD_GET_TLS_DATA(i_) TlsGetValue((i_)) |
|
469 #define DMD_SET_TLS_DATA(i_, v_) TlsSetValue((i_), (v_)) |
|
470 |
|
471 #else |
|
472 |
|
473 #include <pthread.h> |
|
474 |
|
475 #define DMD_TLS_INDEX_TYPE pthread_key_t |
|
476 #define DMD_CREATE_TLS_INDEX(i_) pthread_key_create(&(i_), nullptr) |
|
477 #define DMD_DESTROY_TLS_INDEX(i_) pthread_key_delete((i_)) |
|
478 #define DMD_GET_TLS_DATA(i_) pthread_getspecific((i_)) |
|
479 #define DMD_SET_TLS_DATA(i_, v_) pthread_setspecific((i_), (v_)) |
|
480 |
|
481 #endif |
|
482 |
|
483 static DMD_TLS_INDEX_TYPE gTlsIndex; |
|
484 |
|
485 class Thread |
|
486 { |
|
487 // Required for allocation via InfallibleAllocPolicy::new_. |
|
488 friend class InfallibleAllocPolicy; |
|
489 |
|
490 // When true, this blocks intercepts, which allows malloc interception |
|
491 // functions to themselves call malloc. (Nb: for direct calls to malloc we |
|
492 // can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes |
|
493 // indirectly call vanilla malloc via functions like NS_StackWalk.) |
|
494 bool mBlockIntercepts; |
|
495 |
|
496 Thread() |
|
497 : mBlockIntercepts(false) |
|
498 {} |
|
499 |
|
500 DISALLOW_COPY_AND_ASSIGN(Thread); |
|
501 |
|
502 public: |
|
503 static Thread* Fetch(); |
|
504 |
|
505 bool BlockIntercepts() |
|
506 { |
|
507 MOZ_ASSERT(!mBlockIntercepts); |
|
508 return mBlockIntercepts = true; |
|
509 } |
|
510 |
|
511 bool UnblockIntercepts() |
|
512 { |
|
513 MOZ_ASSERT(mBlockIntercepts); |
|
514 return mBlockIntercepts = false; |
|
515 } |
|
516 |
|
517 bool InterceptsAreBlocked() const |
|
518 { |
|
519 return mBlockIntercepts; |
|
520 } |
|
521 }; |
|
522 |
|
523 /* static */ Thread* |
|
524 Thread::Fetch() |
|
525 { |
|
526 Thread* t = static_cast<Thread*>(DMD_GET_TLS_DATA(gTlsIndex)); |
|
527 |
|
528 if (MOZ_UNLIKELY(!t)) { |
|
529 // This memory is never freed, even if the thread dies. It's a leak, but |
|
530 // only a tiny one. |
|
531 t = InfallibleAllocPolicy::new_<Thread>(); |
|
532 DMD_SET_TLS_DATA(gTlsIndex, t); |
|
533 } |
|
534 |
|
535 return t; |
|
536 } |
|
537 |
|
538 // An object of this class must be created (on the stack) before running any |
|
539 // code that might allocate. |
|
540 class AutoBlockIntercepts |
|
541 { |
|
542 Thread* const mT; |
|
543 |
|
544 DISALLOW_COPY_AND_ASSIGN(AutoBlockIntercepts); |
|
545 |
|
546 public: |
|
547 AutoBlockIntercepts(Thread* aT) |
|
548 : mT(aT) |
|
549 { |
|
550 mT->BlockIntercepts(); |
|
551 } |
|
552 ~AutoBlockIntercepts() |
|
553 { |
|
554 MOZ_ASSERT(mT->InterceptsAreBlocked()); |
|
555 mT->UnblockIntercepts(); |
|
556 } |
|
557 }; |
|
558 |
|
559 //--------------------------------------------------------------------------- |
|
560 // Location service |
|
561 //--------------------------------------------------------------------------- |
|
562 |
|
563 // This class is used to print details about code locations. |
|
564 class LocationService |
|
565 { |
|
566 // WriteLocation() is the key function in this class. It's basically a |
|
567 // wrapper around NS_DescribeCodeAddress. |
|
568 // |
|
569 // However, NS_DescribeCodeAddress is very slow on some platforms, and we |
|
570 // have lots of repeated (i.e. same PC) calls to it. So we do some caching |
|
571 // of results. Each cached result includes two strings (|mFunction| and |
|
572 // |mLibrary|), so we also optimize them for space in the following ways. |
|
573 // |
|
574 // - The number of distinct library names is small, e.g. a few dozen. There |
|
575 // is lots of repetition, especially of libxul. So we intern them in their |
|
576 // own table, which saves space over duplicating them for each cache entry. |
|
577 // |
|
578 // - The number of distinct function names is much higher, so we duplicate |
|
579 // them in each cache entry. That's more space-efficient than interning |
|
580 // because entries containing single-occurrence function names are quickly |
|
581 // overwritten, and their copies released. In addition, empty function |
|
582 // names are common, so we use nullptr to represent them compactly. |
|
583 |
|
584 struct StringHasher |
|
585 { |
|
586 typedef const char* Lookup; |
|
587 |
|
588 static uint32_t hash(const char* const& aS) |
|
589 { |
|
590 return HashString(aS); |
|
591 } |
|
592 |
|
593 static bool match(const char* const& aA, const char* const& aB) |
|
594 { |
|
595 return strcmp(aA, aB) == 0; |
|
596 } |
|
597 }; |
|
598 |
|
599 typedef js::HashSet<const char*, StringHasher, InfallibleAllocPolicy> |
|
600 StringTable; |
|
601 |
|
602 StringTable mLibraryStrings; |
|
603 |
|
604 struct Entry |
|
605 { |
|
606 const void* mPc; |
|
607 char* mFunction; // owned by the Entry; may be null |
|
608 const char* mLibrary; // owned by mLibraryStrings; never null |
|
609 // in a non-empty entry is in use |
|
610 ptrdiff_t mLOffset; |
|
611 char* mFileName; // owned by the Entry; may be null |
|
612 uint32_t mLineNo:31; |
|
613 uint32_t mInUse:1; // is the entry used? |
|
614 |
|
615 Entry() |
|
616 : mPc(0), mFunction(nullptr), mLibrary(nullptr), mLOffset(0), mFileName(nullptr), mLineNo(0), mInUse(0) |
|
617 {} |
|
618 |
|
619 ~Entry() |
|
620 { |
|
621 // We don't free mLibrary because it's externally owned. |
|
622 InfallibleAllocPolicy::free_(mFunction); |
|
623 InfallibleAllocPolicy::free_(mFileName); |
|
624 } |
|
625 |
|
626 void Replace(const void* aPc, const char* aFunction, |
|
627 const char* aLibrary, ptrdiff_t aLOffset, |
|
628 const char* aFileName, unsigned long aLineNo) |
|
629 { |
|
630 mPc = aPc; |
|
631 |
|
632 // Convert "" to nullptr. Otherwise, make a copy of the name. |
|
633 InfallibleAllocPolicy::free_(mFunction); |
|
634 mFunction = |
|
635 !aFunction[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFunction); |
|
636 InfallibleAllocPolicy::free_(mFileName); |
|
637 mFileName = |
|
638 !aFileName[0] ? nullptr : InfallibleAllocPolicy::strdup_(aFileName); |
|
639 |
|
640 |
|
641 mLibrary = aLibrary; |
|
642 mLOffset = aLOffset; |
|
643 mLineNo = aLineNo; |
|
644 |
|
645 mInUse = 1; |
|
646 } |
|
647 |
|
648 size_t SizeOfExcludingThis() { |
|
649 // Don't measure mLibrary because it's externally owned. |
|
650 return MallocSizeOf(mFunction) + MallocSizeOf(mFileName); |
|
651 } |
|
652 }; |
|
653 |
|
654 // A direct-mapped cache. When doing a dump just after starting desktop |
|
655 // Firefox (which is similar to dumping after a longer-running session, |
|
656 // thanks to the limit on how many records we dump), a cache with 2^24 |
|
657 // entries (which approximates an infinite-entry cache) has a ~91% hit rate. |
|
658 // A cache with 2^12 entries has a ~83% hit rate, and takes up ~85 KiB (on |
|
659 // 32-bit platforms) or ~150 KiB (on 64-bit platforms). |
|
660 static const size_t kNumEntries = 1 << 12; |
|
661 static const size_t kMask = kNumEntries - 1; |
|
662 Entry mEntries[kNumEntries]; |
|
663 |
|
664 size_t mNumCacheHits; |
|
665 size_t mNumCacheMisses; |
|
666 |
|
667 public: |
|
668 LocationService() |
|
669 : mEntries(), mNumCacheHits(0), mNumCacheMisses(0) |
|
670 { |
|
671 (void)mLibraryStrings.init(64); |
|
672 } |
|
673 |
|
674 void WriteLocation(const Writer& aWriter, const void* aPc) |
|
675 { |
|
676 MOZ_ASSERT(gStateLock->IsLocked()); |
|
677 |
|
678 uint32_t index = HashGeneric(aPc) & kMask; |
|
679 MOZ_ASSERT(index < kNumEntries); |
|
680 Entry& entry = mEntries[index]; |
|
681 |
|
682 if (!entry.mInUse || entry.mPc != aPc) { |
|
683 mNumCacheMisses++; |
|
684 |
|
685 // NS_DescribeCodeAddress can (on Linux) acquire a lock inside |
|
686 // the shared library loader. Another thread might call malloc |
|
687 // while holding that lock (when loading a shared library). So |
|
688 // we have to exit gStateLock around this call. For details, see |
|
689 // https://bugzilla.mozilla.org/show_bug.cgi?id=363334#c3 |
|
690 nsCodeAddressDetails details; |
|
691 { |
|
692 AutoUnlockState unlock; |
|
693 (void)NS_DescribeCodeAddress(const_cast<void*>(aPc), &details); |
|
694 } |
|
695 |
|
696 // Intern the library name. |
|
697 const char* library = nullptr; |
|
698 StringTable::AddPtr p = mLibraryStrings.lookupForAdd(details.library); |
|
699 if (!p) { |
|
700 library = InfallibleAllocPolicy::strdup_(details.library); |
|
701 (void)mLibraryStrings.add(p, library); |
|
702 } else { |
|
703 library = *p; |
|
704 } |
|
705 |
|
706 entry.Replace(aPc, details.function, library, details.loffset, details.filename, details.lineno); |
|
707 |
|
708 } else { |
|
709 mNumCacheHits++; |
|
710 } |
|
711 |
|
712 MOZ_ASSERT(entry.mPc == aPc); |
|
713 |
|
714 uintptr_t entryPc = (uintptr_t)(entry.mPc); |
|
715 // Sometimes we get nothing useful. Just print "???" for the entire entry |
|
716 // so that fix-linux-stack.pl doesn't complain about an empty filename. |
|
717 if (!entry.mFunction && !entry.mLibrary[0] && entry.mLOffset == 0) { |
|
718 W(" ??? 0x%x\n", entryPc); |
|
719 } else { |
|
720 // Use "???" for unknown functions. |
|
721 const char* entryFunction = entry.mFunction ? entry.mFunction : "???"; |
|
722 if (entry.mFileName) { |
|
723 // On Windows we can get the filename and line number at runtime. |
|
724 W(" %s (%s:%lu) 0x%x\n", |
|
725 entryFunction, entry.mFileName, entry.mLineNo, entryPc); |
|
726 } else { |
|
727 // On Linux and Mac we cannot get the filename and line number at |
|
728 // runtime, so we print the offset in a form that fix-linux-stack.pl and |
|
729 // fix_macosx_stack.py can post-process. |
|
730 W(" %s[%s +0x%X] 0x%x\n", |
|
731 entryFunction, entry.mLibrary, entry.mLOffset, entryPc); |
|
732 } |
|
733 } |
|
734 } |
|
735 |
|
736 size_t SizeOfIncludingThis() |
|
737 { |
|
738 size_t n = MallocSizeOf(this); |
|
739 for (uint32_t i = 0; i < kNumEntries; i++) { |
|
740 n += mEntries[i].SizeOfExcludingThis(); |
|
741 } |
|
742 |
|
743 n += mLibraryStrings.sizeOfExcludingThis(MallocSizeOf); |
|
744 for (StringTable::Range r = mLibraryStrings.all(); |
|
745 !r.empty(); |
|
746 r.popFront()) { |
|
747 n += MallocSizeOf(r.front()); |
|
748 } |
|
749 |
|
750 return n; |
|
751 } |
|
752 |
|
753 size_t CacheCapacity() const { return kNumEntries; } |
|
754 |
|
755 size_t CacheCount() const |
|
756 { |
|
757 size_t n = 0; |
|
758 for (size_t i = 0; i < kNumEntries; i++) { |
|
759 if (mEntries[i].mInUse) { |
|
760 n++; |
|
761 } |
|
762 } |
|
763 return n; |
|
764 } |
|
765 |
|
766 size_t NumCacheHits() const { return mNumCacheHits; } |
|
767 size_t NumCacheMisses() const { return mNumCacheMisses; } |
|
768 }; |
|
769 |
|
770 //--------------------------------------------------------------------------- |
|
771 // Stack traces |
|
772 //--------------------------------------------------------------------------- |
|
773 |
|
774 class StackTrace |
|
775 { |
|
776 public: |
|
777 static const uint32_t MaxFrames = 24; |
|
778 |
|
779 private: |
|
780 uint32_t mLength; // The number of PCs. |
|
781 void* mPcs[MaxFrames]; // The PCs themselves. If --max-frames is less |
|
782 // than 24, this array is bigger than necessary, |
|
783 // but that case is unusual. |
|
784 |
|
785 public: |
|
786 StackTrace() : mLength(0) {} |
|
787 |
|
788 uint32_t Length() const { return mLength; } |
|
789 void* Pc(uint32_t i) const { MOZ_ASSERT(i < mLength); return mPcs[i]; } |
|
790 |
|
791 uint32_t Size() const { return mLength * sizeof(mPcs[0]); } |
|
792 |
|
793 // The stack trace returned by this function is interned in gStackTraceTable, |
|
794 // and so is immortal and unmovable. |
|
795 static const StackTrace* Get(Thread* aT); |
|
796 |
|
797 void Sort() |
|
798 { |
|
799 qsort(mPcs, mLength, sizeof(mPcs[0]), StackTrace::QsortCmp); |
|
800 } |
|
801 |
|
802 void Print(const Writer& aWriter, LocationService* aLocService) const; |
|
803 |
|
804 // Hash policy. |
|
805 |
|
806 typedef StackTrace* Lookup; |
|
807 |
|
808 static uint32_t hash(const StackTrace* const& aSt) |
|
809 { |
|
810 return mozilla::HashBytes(aSt->mPcs, aSt->Size()); |
|
811 } |
|
812 |
|
813 static bool match(const StackTrace* const& aA, |
|
814 const StackTrace* const& aB) |
|
815 { |
|
816 return aA->mLength == aB->mLength && |
|
817 memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0; |
|
818 } |
|
819 |
|
820 private: |
|
821 static void StackWalkCallback(void* aPc, void* aSp, void* aClosure) |
|
822 { |
|
823 StackTrace* st = (StackTrace*) aClosure; |
|
824 MOZ_ASSERT(st->mLength < MaxFrames); |
|
825 st->mPcs[st->mLength] = aPc; |
|
826 st->mLength++; |
|
827 } |
|
828 |
|
829 static int QsortCmp(const void* aA, const void* aB) |
|
830 { |
|
831 const void* const a = *static_cast<const void* const*>(aA); |
|
832 const void* const b = *static_cast<const void* const*>(aB); |
|
833 if (a < b) return -1; |
|
834 if (a > b) return 1; |
|
835 return 0; |
|
836 } |
|
837 }; |
|
838 |
|
839 typedef js::HashSet<StackTrace*, StackTrace, InfallibleAllocPolicy> |
|
840 StackTraceTable; |
|
841 static StackTraceTable* gStackTraceTable = nullptr; |
|
842 |
|
843 // We won't GC the stack trace table until it this many elements. |
|
844 static uint32_t gGCStackTraceTableWhenSizeExceeds = 4 * 1024; |
|
845 |
|
846 void |
|
847 StackTrace::Print(const Writer& aWriter, LocationService* aLocService) const |
|
848 { |
|
849 if (mLength == 0) { |
|
850 W(" (empty)\n"); // StackTrace::Get() must have failed |
|
851 return; |
|
852 } |
|
853 |
|
854 for (uint32_t i = 0; i < mLength; i++) { |
|
855 aLocService->WriteLocation(aWriter, Pc(i)); |
|
856 } |
|
857 } |
|
858 |
|
859 /* static */ const StackTrace* |
|
860 StackTrace::Get(Thread* aT) |
|
861 { |
|
862 MOZ_ASSERT(gStateLock->IsLocked()); |
|
863 MOZ_ASSERT(aT->InterceptsAreBlocked()); |
|
864 |
|
865 // On Windows, NS_StackWalk can acquire a lock from the shared library |
|
866 // loader. Another thread might call malloc while holding that lock (when |
|
867 // loading a shared library). So we can't be in gStateLock during the call |
|
868 // to NS_StackWalk. For details, see |
|
869 // https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8 |
|
870 // On Linux, something similar can happen; see bug 824340. |
|
871 // So let's just release it on all platforms. |
|
872 nsresult rv; |
|
873 StackTrace tmp; |
|
874 { |
|
875 AutoUnlockState unlock; |
|
876 uint32_t skipFrames = 2; |
|
877 rv = NS_StackWalk(StackWalkCallback, skipFrames, |
|
878 gOptions->MaxFrames(), &tmp, 0, nullptr); |
|
879 } |
|
880 |
|
881 if (rv == NS_OK) { |
|
882 // Handle the common case first. All is ok. Nothing to do. |
|
883 } else if (rv == NS_ERROR_NOT_IMPLEMENTED || rv == NS_ERROR_FAILURE) { |
|
884 tmp.mLength = 0; |
|
885 } else if (rv == NS_ERROR_UNEXPECTED) { |
|
886 // XXX: This |rv| only happens on Mac, and it indicates that we're handling |
|
887 // a call to malloc that happened inside a mutex-handling function. Any |
|
888 // attempt to create a semaphore (which can happen in printf) could |
|
889 // deadlock. |
|
890 // |
|
891 // However, the most complex thing DMD does after Get() returns is to put |
|
892 // something in a hash table, which might call |
|
893 // InfallibleAllocPolicy::malloc_. I'm not yet sure if this needs special |
|
894 // handling, hence the forced abort. Sorry. If you hit this, please file |
|
895 // a bug and CC nnethercote. |
|
896 MOZ_CRASH(); |
|
897 } else { |
|
898 MOZ_CRASH(); // should be impossible |
|
899 } |
|
900 |
|
901 StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp); |
|
902 if (!p) { |
|
903 StackTrace* stnew = InfallibleAllocPolicy::new_<StackTrace>(tmp); |
|
904 (void)gStackTraceTable->add(p, stnew); |
|
905 } |
|
906 return *p; |
|
907 } |
|
908 |
|
909 //--------------------------------------------------------------------------- |
|
910 // Heap blocks |
|
911 //--------------------------------------------------------------------------- |
|
912 |
|
913 // This class combines a 2-byte-aligned pointer (i.e. one whose bottom bit |
|
914 // is zero) with a 1-bit tag. |
|
915 // |
|
916 // |T| is the pointer type, e.g. |int*|, not the pointed-to type. This makes |
|
917 // is easier to have const pointers, e.g. |TaggedPtr<const int*>|. |
|
918 template <typename T> |
|
919 class TaggedPtr |
|
920 { |
|
921 union |
|
922 { |
|
923 T mPtr; |
|
924 uintptr_t mUint; |
|
925 }; |
|
926 |
|
927 static const uintptr_t kTagMask = uintptr_t(0x1); |
|
928 static const uintptr_t kPtrMask = ~kTagMask; |
|
929 |
|
930 static bool IsTwoByteAligned(T aPtr) |
|
931 { |
|
932 return (uintptr_t(aPtr) & kTagMask) == 0; |
|
933 } |
|
934 |
|
935 public: |
|
936 TaggedPtr() |
|
937 : mPtr(nullptr) |
|
938 {} |
|
939 |
|
940 TaggedPtr(T aPtr, bool aBool) |
|
941 : mPtr(aPtr) |
|
942 { |
|
943 MOZ_ASSERT(IsTwoByteAligned(aPtr)); |
|
944 uintptr_t tag = uintptr_t(aBool); |
|
945 MOZ_ASSERT(tag <= kTagMask); |
|
946 mUint |= (tag & kTagMask); |
|
947 } |
|
948 |
|
949 void Set(T aPtr, bool aBool) |
|
950 { |
|
951 MOZ_ASSERT(IsTwoByteAligned(aPtr)); |
|
952 mPtr = aPtr; |
|
953 uintptr_t tag = uintptr_t(aBool); |
|
954 MOZ_ASSERT(tag <= kTagMask); |
|
955 mUint |= (tag & kTagMask); |
|
956 } |
|
957 |
|
958 T Ptr() const { return reinterpret_cast<T>(mUint & kPtrMask); } |
|
959 |
|
960 bool Tag() const { return bool(mUint & kTagMask); } |
|
961 }; |
|
962 |
|
963 // A live heap block. |
|
964 class Block |
|
965 { |
|
966 const void* mPtr; |
|
967 const size_t mReqSize; // size requested |
|
968 |
|
969 // Ptr: |mAllocStackTrace| - stack trace where this block was allocated. |
|
970 // Tag bit 0: |mSampled| - was this block sampled? (if so, slop == 0). |
|
971 TaggedPtr<const StackTrace* const> |
|
972 mAllocStackTrace_mSampled; |
|
973 |
|
974 // This array has two elements because we record at most two reports of a |
|
975 // block. |
|
976 // - Ptr: |mReportStackTrace| - stack trace where this block was reported. |
|
977 // nullptr if not reported. |
|
978 // - Tag bit 0: |mReportedOnAlloc| - was the block reported immediately on |
|
979 // allocation? If so, DMD must not clear the report at the end of Dump(). |
|
980 // Only relevant if |mReportStackTrace| is non-nullptr. |
|
981 // |
|
982 // |mPtr| is used as the key in BlockTable, so it's ok for this member |
|
983 // to be |mutable|. |
|
984 mutable TaggedPtr<const StackTrace*> mReportStackTrace_mReportedOnAlloc[2]; |
|
985 |
|
986 public: |
|
987 Block(const void* aPtr, size_t aReqSize, const StackTrace* aAllocStackTrace, |
|
988 bool aSampled) |
|
989 : mPtr(aPtr), |
|
990 mReqSize(aReqSize), |
|
991 mAllocStackTrace_mSampled(aAllocStackTrace, aSampled), |
|
992 mReportStackTrace_mReportedOnAlloc() // all fields get zeroed |
|
993 { |
|
994 MOZ_ASSERT(aAllocStackTrace); |
|
995 } |
|
996 |
|
997 size_t ReqSize() const { return mReqSize; } |
|
998 |
|
999 // Sampled blocks always have zero slop. |
|
1000 size_t SlopSize() const |
|
1001 { |
|
1002 return IsSampled() ? 0 : MallocSizeOf(mPtr) - mReqSize; |
|
1003 } |
|
1004 |
|
1005 size_t UsableSize() const |
|
1006 { |
|
1007 return IsSampled() ? mReqSize : MallocSizeOf(mPtr); |
|
1008 } |
|
1009 |
|
1010 bool IsSampled() const |
|
1011 { |
|
1012 return mAllocStackTrace_mSampled.Tag(); |
|
1013 } |
|
1014 |
|
1015 const StackTrace* AllocStackTrace() const |
|
1016 { |
|
1017 return mAllocStackTrace_mSampled.Ptr(); |
|
1018 } |
|
1019 |
|
1020 const StackTrace* ReportStackTrace1() const { |
|
1021 return mReportStackTrace_mReportedOnAlloc[0].Ptr(); |
|
1022 } |
|
1023 |
|
1024 const StackTrace* ReportStackTrace2() const { |
|
1025 return mReportStackTrace_mReportedOnAlloc[1].Ptr(); |
|
1026 } |
|
1027 |
|
1028 bool ReportedOnAlloc1() const { |
|
1029 return mReportStackTrace_mReportedOnAlloc[0].Tag(); |
|
1030 } |
|
1031 |
|
1032 bool ReportedOnAlloc2() const { |
|
1033 return mReportStackTrace_mReportedOnAlloc[1].Tag(); |
|
1034 } |
|
1035 |
|
1036 uint32_t NumReports() const { |
|
1037 if (ReportStackTrace2()) { |
|
1038 MOZ_ASSERT(ReportStackTrace1()); |
|
1039 return 2; |
|
1040 } |
|
1041 if (ReportStackTrace1()) { |
|
1042 return 1; |
|
1043 } |
|
1044 return 0; |
|
1045 } |
|
1046 |
|
1047 // This is |const| thanks to the |mutable| fields above. |
|
1048 void Report(Thread* aT, bool aReportedOnAlloc) const |
|
1049 { |
|
1050 // We don't bother recording reports after the 2nd one. |
|
1051 uint32_t numReports = NumReports(); |
|
1052 if (numReports < 2) { |
|
1053 mReportStackTrace_mReportedOnAlloc[numReports].Set(StackTrace::Get(aT), |
|
1054 aReportedOnAlloc); |
|
1055 } |
|
1056 } |
|
1057 |
|
1058 void UnreportIfNotReportedOnAlloc() const |
|
1059 { |
|
1060 if (!ReportedOnAlloc1() && !ReportedOnAlloc2()) { |
|
1061 mReportStackTrace_mReportedOnAlloc[0].Set(nullptr, 0); |
|
1062 mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); |
|
1063 |
|
1064 } else if (!ReportedOnAlloc1() && ReportedOnAlloc2()) { |
|
1065 // Shift the 2nd report down to the 1st one. |
|
1066 mReportStackTrace_mReportedOnAlloc[0] = |
|
1067 mReportStackTrace_mReportedOnAlloc[1]; |
|
1068 mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); |
|
1069 |
|
1070 } else if (ReportedOnAlloc1() && !ReportedOnAlloc2()) { |
|
1071 mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); |
|
1072 } |
|
1073 } |
|
1074 |
|
1075 // Hash policy. |
|
1076 |
|
1077 typedef const void* Lookup; |
|
1078 |
|
1079 static uint32_t hash(const void* const& aPtr) |
|
1080 { |
|
1081 return mozilla::HashGeneric(aPtr); |
|
1082 } |
|
1083 |
|
1084 static bool match(const Block& aB, const void* const& aPtr) |
|
1085 { |
|
1086 return aB.mPtr == aPtr; |
|
1087 } |
|
1088 }; |
|
1089 |
|
1090 typedef js::HashSet<Block, Block, InfallibleAllocPolicy> BlockTable; |
|
1091 static BlockTable* gBlockTable = nullptr; |
|
1092 |
|
1093 typedef js::HashSet<const StackTrace*, js::DefaultHasher<const StackTrace*>, |
|
1094 InfallibleAllocPolicy> |
|
1095 StackTraceSet; |
|
1096 |
|
1097 // Add a pointer to each live stack trace into the given StackTraceSet. (A |
|
1098 // stack trace is live if it's used by one of the live blocks.) |
|
1099 static void |
|
1100 GatherUsedStackTraces(StackTraceSet& aStackTraces) |
|
1101 { |
|
1102 MOZ_ASSERT(gStateLock->IsLocked()); |
|
1103 MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); |
|
1104 |
|
1105 aStackTraces.finish(); |
|
1106 aStackTraces.init(1024); |
|
1107 |
|
1108 for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) { |
|
1109 const Block& b = r.front(); |
|
1110 aStackTraces.put(b.AllocStackTrace()); |
|
1111 aStackTraces.put(b.ReportStackTrace1()); |
|
1112 aStackTraces.put(b.ReportStackTrace2()); |
|
1113 } |
|
1114 |
|
1115 // Any of the stack traces added above may have been null. For the sake of |
|
1116 // cleanliness, don't leave the null pointer in the set. |
|
1117 aStackTraces.remove(nullptr); |
|
1118 } |
|
1119 |
|
1120 // Delete stack traces that we aren't using, and compact our hashtable. |
|
1121 static void |
|
1122 GCStackTraces() |
|
1123 { |
|
1124 MOZ_ASSERT(gStateLock->IsLocked()); |
|
1125 MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); |
|
1126 |
|
1127 StackTraceSet usedStackTraces; |
|
1128 GatherUsedStackTraces(usedStackTraces); |
|
1129 |
|
1130 // Delete all unused stack traces from gStackTraceTable. The Enum destructor |
|
1131 // will automatically rehash and compact the table. |
|
1132 for (StackTraceTable::Enum e(*gStackTraceTable); |
|
1133 !e.empty(); |
|
1134 e.popFront()) { |
|
1135 StackTrace* const& st = e.front(); |
|
1136 |
|
1137 if (!usedStackTraces.has(st)) { |
|
1138 e.removeFront(); |
|
1139 InfallibleAllocPolicy::delete_(st); |
|
1140 } |
|
1141 } |
|
1142 |
|
1143 // Schedule a GC when we have twice as many stack traces as we had right after |
|
1144 // this GC finished. |
|
1145 gGCStackTraceTableWhenSizeExceeds = 2 * gStackTraceTable->count(); |
|
1146 } |
|
1147 |
|
1148 //--------------------------------------------------------------------------- |
|
1149 // malloc/free callbacks |
|
1150 //--------------------------------------------------------------------------- |
|
1151 |
|
1152 static size_t gSmallBlockActualSizeCounter = 0; |
|
1153 |
|
1154 static void |
|
1155 AllocCallback(void* aPtr, size_t aReqSize, Thread* aT) |
|
1156 { |
|
1157 MOZ_ASSERT(gIsDMDRunning); |
|
1158 |
|
1159 if (!aPtr) { |
|
1160 return; |
|
1161 } |
|
1162 |
|
1163 AutoLockState lock; |
|
1164 AutoBlockIntercepts block(aT); |
|
1165 |
|
1166 size_t actualSize = gMallocTable->malloc_usable_size(aPtr); |
|
1167 size_t sampleBelowSize = gOptions->SampleBelowSize(); |
|
1168 |
|
1169 if (actualSize < sampleBelowSize) { |
|
1170 // If this allocation is smaller than the sample-below size, increment the |
|
1171 // cumulative counter. Then, if that counter now exceeds the sample size, |
|
1172 // blame this allocation for |sampleBelowSize| bytes. This precludes the |
|
1173 // measurement of slop. |
|
1174 gSmallBlockActualSizeCounter += actualSize; |
|
1175 if (gSmallBlockActualSizeCounter >= sampleBelowSize) { |
|
1176 gSmallBlockActualSizeCounter -= sampleBelowSize; |
|
1177 |
|
1178 Block b(aPtr, sampleBelowSize, StackTrace::Get(aT), /* sampled */ true); |
|
1179 (void)gBlockTable->putNew(aPtr, b); |
|
1180 } |
|
1181 } else { |
|
1182 // If this block size is larger than the sample size, record it exactly. |
|
1183 Block b(aPtr, aReqSize, StackTrace::Get(aT), /* sampled */ false); |
|
1184 (void)gBlockTable->putNew(aPtr, b); |
|
1185 } |
|
1186 } |
|
1187 |
|
1188 static void |
|
1189 FreeCallback(void* aPtr, Thread* aT) |
|
1190 { |
|
1191 MOZ_ASSERT(gIsDMDRunning); |
|
1192 |
|
1193 if (!aPtr) { |
|
1194 return; |
|
1195 } |
|
1196 |
|
1197 AutoLockState lock; |
|
1198 AutoBlockIntercepts block(aT); |
|
1199 |
|
1200 gBlockTable->remove(aPtr); |
|
1201 |
|
1202 if (gStackTraceTable->count() > gGCStackTraceTableWhenSizeExceeds) { |
|
1203 GCStackTraces(); |
|
1204 } |
|
1205 } |
|
1206 |
|
1207 //--------------------------------------------------------------------------- |
|
1208 // malloc/free interception |
|
1209 //--------------------------------------------------------------------------- |
|
1210 |
|
1211 static void Init(const malloc_table_t* aMallocTable); |
|
1212 |
|
1213 } // namespace dmd |
|
1214 } // namespace mozilla |
|
1215 |
|
1216 void |
|
1217 replace_init(const malloc_table_t* aMallocTable) |
|
1218 { |
|
1219 mozilla::dmd::Init(aMallocTable); |
|
1220 } |
|
1221 |
|
1222 void* |
|
1223 replace_malloc(size_t aSize) |
|
1224 { |
|
1225 using namespace mozilla::dmd; |
|
1226 |
|
1227 if (!gIsDMDRunning) { |
|
1228 // DMD hasn't started up, either because it wasn't enabled by the user, or |
|
1229 // we're still in Init() and something has indirectly called malloc. Do a |
|
1230 // vanilla malloc. (In the latter case, if it fails we'll crash. But |
|
1231 // OOM is highly unlikely so early on.) |
|
1232 return gMallocTable->malloc(aSize); |
|
1233 } |
|
1234 |
|
1235 Thread* t = Thread::Fetch(); |
|
1236 if (t->InterceptsAreBlocked()) { |
|
1237 // Intercepts are blocked, which means this must be a call to malloc |
|
1238 // triggered indirectly by DMD (e.g. via NS_StackWalk). Be infallible. |
|
1239 return InfallibleAllocPolicy::malloc_(aSize); |
|
1240 } |
|
1241 |
|
1242 // This must be a call to malloc from outside DMD. Intercept it. |
|
1243 void* ptr = gMallocTable->malloc(aSize); |
|
1244 AllocCallback(ptr, aSize, t); |
|
1245 return ptr; |
|
1246 } |
|
1247 |
|
1248 void* |
|
1249 replace_calloc(size_t aCount, size_t aSize) |
|
1250 { |
|
1251 using namespace mozilla::dmd; |
|
1252 |
|
1253 if (!gIsDMDRunning) { |
|
1254 return gMallocTable->calloc(aCount, aSize); |
|
1255 } |
|
1256 |
|
1257 Thread* t = Thread::Fetch(); |
|
1258 if (t->InterceptsAreBlocked()) { |
|
1259 return InfallibleAllocPolicy::calloc_(aCount * aSize); |
|
1260 } |
|
1261 |
|
1262 void* ptr = gMallocTable->calloc(aCount, aSize); |
|
1263 AllocCallback(ptr, aCount * aSize, t); |
|
1264 return ptr; |
|
1265 } |
|
1266 |
|
1267 void* |
|
1268 replace_realloc(void* aOldPtr, size_t aSize) |
|
1269 { |
|
1270 using namespace mozilla::dmd; |
|
1271 |
|
1272 if (!gIsDMDRunning) { |
|
1273 return gMallocTable->realloc(aOldPtr, aSize); |
|
1274 } |
|
1275 |
|
1276 Thread* t = Thread::Fetch(); |
|
1277 if (t->InterceptsAreBlocked()) { |
|
1278 return InfallibleAllocPolicy::realloc_(aOldPtr, aSize); |
|
1279 } |
|
1280 |
|
1281 // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|. |
|
1282 if (!aOldPtr) { |
|
1283 return replace_malloc(aSize); |
|
1284 } |
|
1285 |
|
1286 // Be very careful here! Must remove the block from the table before doing |
|
1287 // the realloc to avoid races, just like in replace_free(). |
|
1288 // Nb: This does an unnecessary hashtable remove+add if the block doesn't |
|
1289 // move, but doing better isn't worth the effort. |
|
1290 FreeCallback(aOldPtr, t); |
|
1291 void* ptr = gMallocTable->realloc(aOldPtr, aSize); |
|
1292 if (ptr) { |
|
1293 AllocCallback(ptr, aSize, t); |
|
1294 } else { |
|
1295 // If realloc fails, we re-insert the old pointer. It will look like it |
|
1296 // was allocated for the first time here, which is untrue, and the slop |
|
1297 // bytes will be zero, which may be untrue. But this case is rare and |
|
1298 // doing better isn't worth the effort. |
|
1299 AllocCallback(aOldPtr, gMallocTable->malloc_usable_size(aOldPtr), t); |
|
1300 } |
|
1301 return ptr; |
|
1302 } |
|
1303 |
|
1304 void* |
|
1305 replace_memalign(size_t aAlignment, size_t aSize) |
|
1306 { |
|
1307 using namespace mozilla::dmd; |
|
1308 |
|
1309 if (!gIsDMDRunning) { |
|
1310 return gMallocTable->memalign(aAlignment, aSize); |
|
1311 } |
|
1312 |
|
1313 Thread* t = Thread::Fetch(); |
|
1314 if (t->InterceptsAreBlocked()) { |
|
1315 return InfallibleAllocPolicy::memalign_(aAlignment, aSize); |
|
1316 } |
|
1317 |
|
1318 void* ptr = gMallocTable->memalign(aAlignment, aSize); |
|
1319 AllocCallback(ptr, aSize, t); |
|
1320 return ptr; |
|
1321 } |
|
1322 |
|
1323 void |
|
1324 replace_free(void* aPtr) |
|
1325 { |
|
1326 using namespace mozilla::dmd; |
|
1327 |
|
1328 if (!gIsDMDRunning) { |
|
1329 gMallocTable->free(aPtr); |
|
1330 return; |
|
1331 } |
|
1332 |
|
1333 Thread* t = Thread::Fetch(); |
|
1334 if (t->InterceptsAreBlocked()) { |
|
1335 return InfallibleAllocPolicy::free_(aPtr); |
|
1336 } |
|
1337 |
|
1338 // Do the actual free after updating the table. Otherwise, another thread |
|
1339 // could call malloc and get the freed block and update the table, and then |
|
1340 // our update here would remove the newly-malloc'd block. |
|
1341 FreeCallback(aPtr, t); |
|
1342 gMallocTable->free(aPtr); |
|
1343 } |
|
1344 |
|
1345 namespace mozilla { |
|
1346 namespace dmd { |
|
1347 |
|
1348 //--------------------------------------------------------------------------- |
|
1349 // Stack trace records |
|
1350 //--------------------------------------------------------------------------- |
|
1351 |
|
1352 class TraceRecordKey |
|
1353 { |
|
1354 public: |
|
1355 const StackTrace* const mAllocStackTrace; // never null |
|
1356 protected: |
|
1357 const StackTrace* const mReportStackTrace1; // nullptr if unreported |
|
1358 const StackTrace* const mReportStackTrace2; // nullptr if not 2x-reported |
|
1359 |
|
1360 public: |
|
1361 TraceRecordKey(const Block& aB) |
|
1362 : mAllocStackTrace(aB.AllocStackTrace()), |
|
1363 mReportStackTrace1(aB.ReportStackTrace1()), |
|
1364 mReportStackTrace2(aB.ReportStackTrace2()) |
|
1365 { |
|
1366 MOZ_ASSERT(mAllocStackTrace); |
|
1367 } |
|
1368 |
|
1369 // Hash policy. |
|
1370 |
|
1371 typedef TraceRecordKey Lookup; |
|
1372 |
|
1373 static uint32_t hash(const TraceRecordKey& aKey) |
|
1374 { |
|
1375 return mozilla::HashGeneric(aKey.mAllocStackTrace, |
|
1376 aKey.mReportStackTrace1, |
|
1377 aKey.mReportStackTrace2); |
|
1378 } |
|
1379 |
|
1380 static bool match(const TraceRecordKey& aA, const TraceRecordKey& aB) |
|
1381 { |
|
1382 return aA.mAllocStackTrace == aB.mAllocStackTrace && |
|
1383 aA.mReportStackTrace1 == aB.mReportStackTrace1 && |
|
1384 aA.mReportStackTrace2 == aB.mReportStackTrace2; |
|
1385 } |
|
1386 }; |
|
1387 |
|
1388 class RecordSize |
|
1389 { |
|
1390 static const size_t kReqBits = sizeof(size_t) * 8 - 1; // 31 or 63 |
|
1391 |
|
1392 size_t mReq; // size requested |
|
1393 size_t mSlop:kReqBits; // slop bytes |
|
1394 size_t mSampled:1; // were one or more blocks contributing to this |
|
1395 // RecordSize sampled? |
|
1396 public: |
|
1397 RecordSize() |
|
1398 : mReq(0), |
|
1399 mSlop(0), |
|
1400 mSampled(false) |
|
1401 {} |
|
1402 |
|
1403 size_t Req() const { return mReq; } |
|
1404 size_t Slop() const { return mSlop; } |
|
1405 size_t Usable() const { return mReq + mSlop; } |
|
1406 |
|
1407 bool IsSampled() const { return mSampled; } |
|
1408 |
|
1409 void Add(const Block& aB) |
|
1410 { |
|
1411 mReq += aB.ReqSize(); |
|
1412 mSlop += aB.SlopSize(); |
|
1413 mSampled = mSampled || aB.IsSampled(); |
|
1414 } |
|
1415 |
|
1416 void Add(const RecordSize& aRecordSize) |
|
1417 { |
|
1418 mReq += aRecordSize.Req(); |
|
1419 mSlop += aRecordSize.Slop(); |
|
1420 mSampled = mSampled || aRecordSize.IsSampled(); |
|
1421 } |
|
1422 |
|
1423 static int Cmp(const RecordSize& aA, const RecordSize& aB) |
|
1424 { |
|
1425 // Primary sort: put bigger usable sizes first. |
|
1426 if (aA.Usable() > aB.Usable()) return -1; |
|
1427 if (aA.Usable() < aB.Usable()) return 1; |
|
1428 |
|
1429 // Secondary sort: put bigger requested sizes first. |
|
1430 if (aA.Req() > aB.Req()) return -1; |
|
1431 if (aA.Req() < aB.Req()) return 1; |
|
1432 |
|
1433 // Tertiary sort: put non-sampled records before sampled records. |
|
1434 if (!aA.mSampled && aB.mSampled) return -1; |
|
1435 if ( aA.mSampled && !aB.mSampled) return 1; |
|
1436 |
|
1437 return 0; |
|
1438 } |
|
1439 }; |
|
1440 |
|
1441 // A collection of one or more heap blocks with a common TraceRecordKey. |
|
1442 class TraceRecord : public TraceRecordKey |
|
1443 { |
|
1444 // The TraceRecordKey base class serves as the key in TraceRecordTables. |
|
1445 // These two fields constitute the value, so it's ok for them to be |
|
1446 // |mutable|. |
|
1447 mutable uint32_t mNumBlocks; // number of blocks with this TraceRecordKey |
|
1448 mutable RecordSize mRecordSize; // combined size of those blocks |
|
1449 |
|
1450 public: |
|
1451 explicit TraceRecord(const TraceRecordKey& aKey) |
|
1452 : TraceRecordKey(aKey), |
|
1453 mNumBlocks(0), |
|
1454 mRecordSize() |
|
1455 {} |
|
1456 |
|
1457 uint32_t NumBlocks() const { return mNumBlocks; } |
|
1458 |
|
1459 const RecordSize& GetRecordSize() const { return mRecordSize; } |
|
1460 |
|
1461 // This is |const| thanks to the |mutable| fields above. |
|
1462 void Add(const Block& aB) const |
|
1463 { |
|
1464 mNumBlocks++; |
|
1465 mRecordSize.Add(aB); |
|
1466 } |
|
1467 |
|
1468 // For PrintSortedRecords. |
|
1469 static const char* const kRecordKind; |
|
1470 static bool recordsOverlap() { return false; } |
|
1471 |
|
1472 void Print(const Writer& aWriter, LocationService* aLocService, |
|
1473 uint32_t aM, uint32_t aN, const char* aStr, const char* astr, |
|
1474 size_t aCategoryUsableSize, size_t aCumulativeUsableSize, |
|
1475 size_t aTotalUsableSize) const; |
|
1476 |
|
1477 static int QsortCmp(const void* aA, const void* aB) |
|
1478 { |
|
1479 const TraceRecord* const a = *static_cast<const TraceRecord* const*>(aA); |
|
1480 const TraceRecord* const b = *static_cast<const TraceRecord* const*>(aB); |
|
1481 |
|
1482 return RecordSize::Cmp(a->mRecordSize, b->mRecordSize); |
|
1483 } |
|
1484 }; |
|
1485 |
|
1486 const char* const TraceRecord::kRecordKind = "trace"; |
|
1487 |
|
1488 typedef js::HashSet<TraceRecord, TraceRecord, InfallibleAllocPolicy> |
|
1489 TraceRecordTable; |
|
1490 |
|
1491 void |
|
1492 TraceRecord::Print(const Writer& aWriter, LocationService* aLocService, |
|
1493 uint32_t aM, uint32_t aN, const char* aStr, const char* astr, |
|
1494 size_t aCategoryUsableSize, size_t aCumulativeUsableSize, |
|
1495 size_t aTotalUsableSize) const |
|
1496 { |
|
1497 bool showTilde = mRecordSize.IsSampled(); |
|
1498 |
|
1499 W("%s: %s block%s in stack trace record %s of %s\n", |
|
1500 aStr, |
|
1501 Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks), |
|
1502 Show(aM, gBuf2, kBufLen), |
|
1503 Show(aN, gBuf3, kBufLen)); |
|
1504 |
|
1505 W(" %s bytes (%s requested / %s slop)\n", |
|
1506 Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde), |
|
1507 Show(mRecordSize.Req(), gBuf2, kBufLen, showTilde), |
|
1508 Show(mRecordSize.Slop(), gBuf3, kBufLen, showTilde)); |
|
1509 |
|
1510 W(" %4.2f%% of the heap (%4.2f%% cumulative); " |
|
1511 " %4.2f%% of %s (%4.2f%% cumulative)\n", |
|
1512 Percent(mRecordSize.Usable(), aTotalUsableSize), |
|
1513 Percent(aCumulativeUsableSize, aTotalUsableSize), |
|
1514 Percent(mRecordSize.Usable(), aCategoryUsableSize), |
|
1515 astr, |
|
1516 Percent(aCumulativeUsableSize, aCategoryUsableSize)); |
|
1517 |
|
1518 W(" Allocated at\n"); |
|
1519 mAllocStackTrace->Print(aWriter, aLocService); |
|
1520 |
|
1521 if (mReportStackTrace1) { |
|
1522 W("\n Reported at\n"); |
|
1523 mReportStackTrace1->Print(aWriter, aLocService); |
|
1524 } |
|
1525 if (mReportStackTrace2) { |
|
1526 W("\n Reported again at\n"); |
|
1527 mReportStackTrace2->Print(aWriter, aLocService); |
|
1528 } |
|
1529 |
|
1530 W("\n"); |
|
1531 } |
|
1532 |
|
1533 //--------------------------------------------------------------------------- |
|
1534 // Stack frame records |
|
1535 //--------------------------------------------------------------------------- |
|
1536 |
|
1537 // A collection of one or more stack frames (from heap block allocation stack |
|
1538 // traces) with a common PC. |
|
1539 class FrameRecord |
|
1540 { |
|
1541 // mPc is used as the key in FrameRecordTable, and the other members |
|
1542 // constitute the value, so it's ok for them to be |mutable|. |
|
1543 const void* const mPc; |
|
1544 mutable size_t mNumBlocks; |
|
1545 mutable size_t mNumTraceRecords; |
|
1546 mutable RecordSize mRecordSize; |
|
1547 |
|
1548 public: |
|
1549 explicit FrameRecord(const void* aPc) |
|
1550 : mPc(aPc), |
|
1551 mNumBlocks(0), |
|
1552 mNumTraceRecords(0), |
|
1553 mRecordSize() |
|
1554 {} |
|
1555 |
|
1556 const RecordSize& GetRecordSize() const { return mRecordSize; } |
|
1557 |
|
1558 // This is |const| thanks to the |mutable| fields above. |
|
1559 void Add(const TraceRecord& aTr) const |
|
1560 { |
|
1561 mNumBlocks += aTr.NumBlocks(); |
|
1562 mNumTraceRecords++; |
|
1563 mRecordSize.Add(aTr.GetRecordSize()); |
|
1564 } |
|
1565 |
|
1566 void Print(const Writer& aWriter, LocationService* aLocService, |
|
1567 uint32_t aM, uint32_t aN, const char* aStr, const char* astr, |
|
1568 size_t aCategoryUsableSize, size_t aCumulativeUsableSize, |
|
1569 size_t aTotalUsableSize) const; |
|
1570 |
|
1571 static int QsortCmp(const void* aA, const void* aB) |
|
1572 { |
|
1573 const FrameRecord* const a = *static_cast<const FrameRecord* const*>(aA); |
|
1574 const FrameRecord* const b = *static_cast<const FrameRecord* const*>(aB); |
|
1575 |
|
1576 return RecordSize::Cmp(a->mRecordSize, b->mRecordSize); |
|
1577 } |
|
1578 |
|
1579 // For PrintSortedRecords. |
|
1580 static const char* const kRecordKind; |
|
1581 static bool recordsOverlap() { return true; } |
|
1582 |
|
1583 // Hash policy. |
|
1584 |
|
1585 typedef const void* Lookup; |
|
1586 |
|
1587 static uint32_t hash(const void* const& aPc) |
|
1588 { |
|
1589 return mozilla::HashGeneric(aPc); |
|
1590 } |
|
1591 |
|
1592 static bool match(const FrameRecord& aFr, const void* const& aPc) |
|
1593 { |
|
1594 return aFr.mPc == aPc; |
|
1595 } |
|
1596 }; |
|
1597 |
|
1598 const char* const FrameRecord::kRecordKind = "frame"; |
|
1599 |
|
1600 typedef js::HashSet<FrameRecord, FrameRecord, InfallibleAllocPolicy> |
|
1601 FrameRecordTable; |
|
1602 |
|
1603 void |
|
1604 FrameRecord::Print(const Writer& aWriter, LocationService* aLocService, |
|
1605 uint32_t aM, uint32_t aN, const char* aStr, const char* astr, |
|
1606 size_t aCategoryUsableSize, size_t aCumulativeUsableSize, |
|
1607 size_t aTotalUsableSize) const |
|
1608 { |
|
1609 (void)aCumulativeUsableSize; |
|
1610 |
|
1611 bool showTilde = mRecordSize.IsSampled(); |
|
1612 |
|
1613 W("%s: %s block%s from %s stack trace record%s in stack frame record %s of %s\n", |
|
1614 aStr, |
|
1615 Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks), |
|
1616 Show(mNumTraceRecords, gBuf2, kBufLen, showTilde), Plural(mNumTraceRecords), |
|
1617 Show(aM, gBuf3, kBufLen), |
|
1618 Show(aN, gBuf4, kBufLen)); |
|
1619 |
|
1620 W(" %s bytes (%s requested / %s slop)\n", |
|
1621 Show(mRecordSize.Usable(), gBuf1, kBufLen, showTilde), |
|
1622 Show(mRecordSize.Req(), gBuf2, kBufLen, showTilde), |
|
1623 Show(mRecordSize.Slop(), gBuf3, kBufLen, showTilde)); |
|
1624 |
|
1625 W(" %4.2f%% of the heap; %4.2f%% of %s\n", |
|
1626 Percent(mRecordSize.Usable(), aTotalUsableSize), |
|
1627 Percent(mRecordSize.Usable(), aCategoryUsableSize), |
|
1628 astr); |
|
1629 |
|
1630 W(" PC is\n"); |
|
1631 aLocService->WriteLocation(aWriter, mPc); |
|
1632 W("\n"); |
|
1633 } |
|
1634 |
|
1635 //--------------------------------------------------------------------------- |
|
1636 // Options (Part 2) |
|
1637 //--------------------------------------------------------------------------- |
|
1638 |
|
1639 // Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah" |
|
1640 // (where "blah" is non-empty) and return the pointer to "blah". |aArg| can |
|
1641 // have leading space chars (but not other whitespace). |
|
1642 const char* |
|
1643 Options::ValueIfMatch(const char* aArg, const char* aOptionName) |
|
1644 { |
|
1645 MOZ_ASSERT(!isspace(*aArg)); // any leading whitespace should not remain |
|
1646 size_t optionLen = strlen(aOptionName); |
|
1647 if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' && |
|
1648 aArg[optionLen + 1]) { |
|
1649 return aArg + optionLen + 1; |
|
1650 } |
|
1651 return nullptr; |
|
1652 } |
|
1653 |
|
1654 // Extracts a |long| value for an option from an argument. It must be within |
|
1655 // the range |aMin..aMax| (inclusive). |
|
1656 bool |
|
1657 Options::GetLong(const char* aArg, const char* aOptionName, |
|
1658 long aMin, long aMax, long* aN) |
|
1659 { |
|
1660 if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) { |
|
1661 char* endPtr; |
|
1662 *aN = strtol(optionValue, &endPtr, /* base */ 10); |
|
1663 if (!*endPtr && aMin <= *aN && *aN <= aMax && |
|
1664 *aN != LONG_MIN && *aN != LONG_MAX) { |
|
1665 return true; |
|
1666 } |
|
1667 } |
|
1668 return false; |
|
1669 } |
|
1670 |
|
1671 // The sample-below default is a prime number close to 4096. |
|
1672 // - Why that size? Because it's *much* faster but only moderately less precise |
|
1673 // than a size of 1. |
|
1674 // - Why prime? Because it makes our sampling more random. If we used a size |
|
1675 // of 4096, for example, then our alloc counter would only take on even |
|
1676 // values, because jemalloc always rounds up requests sizes. In contrast, a |
|
1677 // prime size will explore all possible values of the alloc counter. |
|
1678 // |
|
1679 Options::Options(const char* aDMDEnvVar) |
|
1680 : mDMDEnvVar(InfallibleAllocPolicy::strdup_(aDMDEnvVar)), |
|
1681 mSampleBelowSize(4093, 100 * 100 * 1000), |
|
1682 mMaxFrames(StackTrace::MaxFrames, StackTrace::MaxFrames), |
|
1683 mMaxRecords(1000, 1000000), |
|
1684 mMode(Normal) |
|
1685 { |
|
1686 char* e = mDMDEnvVar; |
|
1687 if (strcmp(e, "1") != 0) { |
|
1688 bool isEnd = false; |
|
1689 while (!isEnd) { |
|
1690 // Consume leading whitespace. |
|
1691 while (isspace(*e)) { |
|
1692 e++; |
|
1693 } |
|
1694 |
|
1695 // Save the start of the arg. |
|
1696 const char* arg = e; |
|
1697 |
|
1698 // Find the first char after the arg, and temporarily change it to '\0' |
|
1699 // to isolate the arg. |
|
1700 while (!isspace(*e) && *e != '\0') { |
|
1701 e++; |
|
1702 } |
|
1703 char replacedChar = *e; |
|
1704 isEnd = replacedChar == '\0'; |
|
1705 *e = '\0'; |
|
1706 |
|
1707 // Handle arg |
|
1708 long myLong; |
|
1709 if (GetLong(arg, "--sample-below", 1, mSampleBelowSize.mMax, &myLong)) { |
|
1710 mSampleBelowSize.mActual = myLong; |
|
1711 |
|
1712 } else if (GetLong(arg, "--max-frames", 1, mMaxFrames.mMax, &myLong)) { |
|
1713 mMaxFrames.mActual = myLong; |
|
1714 |
|
1715 } else if (GetLong(arg, "--max-records", 1, mMaxRecords.mMax, &myLong)) { |
|
1716 mMaxRecords.mActual = myLong; |
|
1717 |
|
1718 } else if (strcmp(arg, "--mode=normal") == 0) { |
|
1719 mMode = Options::Normal; |
|
1720 } else if (strcmp(arg, "--mode=test") == 0) { |
|
1721 mMode = Options::Test; |
|
1722 } else if (strcmp(arg, "--mode=stress") == 0) { |
|
1723 mMode = Options::Stress; |
|
1724 |
|
1725 } else if (strcmp(arg, "") == 0) { |
|
1726 // This can only happen if there is trailing whitespace. Ignore. |
|
1727 MOZ_ASSERT(isEnd); |
|
1728 |
|
1729 } else { |
|
1730 BadArg(arg); |
|
1731 } |
|
1732 |
|
1733 // Undo the temporary isolation. |
|
1734 *e = replacedChar; |
|
1735 } |
|
1736 } |
|
1737 } |
|
1738 |
|
1739 void |
|
1740 Options::BadArg(const char* aArg) |
|
1741 { |
|
1742 StatusMsg("\n"); |
|
1743 StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg); |
|
1744 StatusMsg("\n"); |
|
1745 StatusMsg("Valid values of $DMD are:\n"); |
|
1746 StatusMsg("- undefined or \"\" or \"0\", which disables DMD, or\n"); |
|
1747 StatusMsg("- \"1\", which enables it with the default options, or\n"); |
|
1748 StatusMsg("- a whitespace-separated list of |--option=val| entries, which\n"); |
|
1749 StatusMsg(" enables it with non-default options.\n"); |
|
1750 StatusMsg("\n"); |
|
1751 StatusMsg("The following options are allowed; defaults are shown in [].\n"); |
|
1752 StatusMsg(" --sample-below=<1..%d> Sample blocks smaller than this [%d]\n", |
|
1753 int(mSampleBelowSize.mMax), |
|
1754 int(mSampleBelowSize.mDefault)); |
|
1755 StatusMsg(" (prime numbers are recommended)\n"); |
|
1756 StatusMsg(" --max-frames=<1..%d> Max. depth of stack traces [%d]\n", |
|
1757 int(mMaxFrames.mMax), |
|
1758 int(mMaxFrames.mDefault)); |
|
1759 StatusMsg(" --max-records=<1..%u> Max. number of records printed [%u]\n", |
|
1760 mMaxRecords.mMax, |
|
1761 mMaxRecords.mDefault); |
|
1762 StatusMsg(" --mode=<normal|test|stress> Mode of operation [normal]\n"); |
|
1763 StatusMsg("\n"); |
|
1764 exit(1); |
|
1765 } |
|
1766 |
|
1767 //--------------------------------------------------------------------------- |
|
1768 // DMD start-up |
|
1769 //--------------------------------------------------------------------------- |
|
1770 |
|
1771 #ifdef XP_MACOSX |
|
1772 static void |
|
1773 NopStackWalkCallback(void* aPc, void* aSp, void* aClosure) |
|
1774 { |
|
1775 } |
|
1776 #endif |
|
1777 |
|
1778 // Note that fopen() can allocate. |
|
1779 static FILE* |
|
1780 OpenOutputFile(const char* aFilename) |
|
1781 { |
|
1782 FILE* fp = fopen(aFilename, "w"); |
|
1783 if (!fp) { |
|
1784 StatusMsg("can't create %s file: %s\n", aFilename, strerror(errno)); |
|
1785 exit(1); |
|
1786 } |
|
1787 return fp; |
|
1788 } |
|
1789 |
|
1790 static void RunTestMode(FILE* fp); |
|
1791 static void RunStressMode(FILE* fp); |
|
1792 |
|
1793 // WARNING: this function runs *very* early -- before all static initializers |
|
1794 // have run. For this reason, non-scalar globals such as gStateLock and |
|
1795 // gStackTraceTable are allocated dynamically (so we can guarantee their |
|
1796 // construction in this function) rather than statically. |
|
1797 static void |
|
1798 Init(const malloc_table_t* aMallocTable) |
|
1799 { |
|
1800 MOZ_ASSERT(!gIsDMDRunning); |
|
1801 |
|
1802 gMallocTable = aMallocTable; |
|
1803 |
|
1804 // DMD is controlled by the |DMD| environment variable. |
|
1805 // - If it's unset or empty or "0", DMD doesn't run. |
|
1806 // - Otherwise, the contents dictate DMD's behaviour. |
|
1807 |
|
1808 char* e = getenv("DMD"); |
|
1809 StatusMsg("$DMD = '%s'\n", e); |
|
1810 |
|
1811 if (!e || strcmp(e, "") == 0 || strcmp(e, "0") == 0) { |
|
1812 StatusMsg("DMD is not enabled\n"); |
|
1813 return; |
|
1814 } |
|
1815 |
|
1816 // Parse $DMD env var. |
|
1817 gOptions = InfallibleAllocPolicy::new_<Options>(e); |
|
1818 |
|
1819 StatusMsg("DMD is enabled\n"); |
|
1820 |
|
1821 #ifdef XP_MACOSX |
|
1822 // On Mac OS X we need to call StackWalkInitCriticalAddress() very early |
|
1823 // (prior to the creation of any mutexes, apparently) otherwise we can get |
|
1824 // hangs when getting stack traces (bug 821577). But |
|
1825 // StackWalkInitCriticalAddress() isn't exported from xpcom/, so instead we |
|
1826 // just call NS_StackWalk, because that calls StackWalkInitCriticalAddress(). |
|
1827 // See the comment above StackWalkInitCriticalAddress() for more details. |
|
1828 (void)NS_StackWalk(NopStackWalkCallback, /* skipFrames */ 0, |
|
1829 /* maxFrames */ 1, nullptr, 0, nullptr); |
|
1830 #endif |
|
1831 |
|
1832 gStateLock = InfallibleAllocPolicy::new_<Mutex>(); |
|
1833 |
|
1834 gSmallBlockActualSizeCounter = 0; |
|
1835 |
|
1836 DMD_CREATE_TLS_INDEX(gTlsIndex); |
|
1837 |
|
1838 { |
|
1839 AutoLockState lock; |
|
1840 |
|
1841 gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>(); |
|
1842 gStackTraceTable->init(8192); |
|
1843 |
|
1844 gBlockTable = InfallibleAllocPolicy::new_<BlockTable>(); |
|
1845 gBlockTable->init(8192); |
|
1846 } |
|
1847 |
|
1848 if (gOptions->IsTestMode()) { |
|
1849 // OpenOutputFile() can allocate. So do this before setting |
|
1850 // gIsDMDRunning so those allocations don't show up in our results. Once |
|
1851 // gIsDMDRunning is set we are intercepting malloc et al. in earnest. |
|
1852 FILE* fp = OpenOutputFile("test.dmd"); |
|
1853 gIsDMDRunning = true; |
|
1854 |
|
1855 StatusMsg("running test mode...\n"); |
|
1856 RunTestMode(fp); |
|
1857 StatusMsg("finished test mode\n"); |
|
1858 fclose(fp); |
|
1859 exit(0); |
|
1860 } |
|
1861 |
|
1862 if (gOptions->IsStressMode()) { |
|
1863 FILE* fp = OpenOutputFile("stress.dmd"); |
|
1864 gIsDMDRunning = true; |
|
1865 |
|
1866 StatusMsg("running stress mode...\n"); |
|
1867 RunStressMode(fp); |
|
1868 StatusMsg("finished stress mode\n"); |
|
1869 fclose(fp); |
|
1870 exit(0); |
|
1871 } |
|
1872 |
|
1873 gIsDMDRunning = true; |
|
1874 } |
|
1875 |
|
1876 //--------------------------------------------------------------------------- |
|
1877 // DMD reporting and unreporting |
|
1878 //--------------------------------------------------------------------------- |
|
1879 |
|
1880 static void |
|
1881 ReportHelper(const void* aPtr, bool aReportedOnAlloc) |
|
1882 { |
|
1883 if (!gIsDMDRunning || !aPtr) { |
|
1884 return; |
|
1885 } |
|
1886 |
|
1887 Thread* t = Thread::Fetch(); |
|
1888 |
|
1889 AutoBlockIntercepts block(t); |
|
1890 AutoLockState lock; |
|
1891 |
|
1892 if (BlockTable::Ptr p = gBlockTable->lookup(aPtr)) { |
|
1893 p->Report(t, aReportedOnAlloc); |
|
1894 } else { |
|
1895 // We have no record of the block. Do nothing. Either: |
|
1896 // - We're sampling and we skipped this block. This is likely. |
|
1897 // - It's a bogus pointer. This is unlikely because Report() is almost |
|
1898 // always called in conjunction with a malloc_size_of-style function. |
|
1899 } |
|
1900 } |
|
1901 |
|
1902 MOZ_EXPORT void |
|
1903 Report(const void* aPtr) |
|
1904 { |
|
1905 ReportHelper(aPtr, /* onAlloc */ false); |
|
1906 } |
|
1907 |
|
1908 MOZ_EXPORT void |
|
1909 ReportOnAlloc(const void* aPtr) |
|
1910 { |
|
1911 ReportHelper(aPtr, /* onAlloc */ true); |
|
1912 } |
|
1913 |
|
1914 //--------------------------------------------------------------------------- |
|
1915 // DMD output |
|
1916 //--------------------------------------------------------------------------- |
|
1917 |
|
1918 // This works for both TraceRecords and StackFrameRecords. |
|
1919 template <class Record> |
|
1920 static void |
|
1921 PrintSortedRecords(const Writer& aWriter, LocationService* aLocService, |
|
1922 const char* aStr, const char* astr, |
|
1923 const js::HashSet<Record, Record, InfallibleAllocPolicy>& |
|
1924 aRecordTable, |
|
1925 size_t aCategoryUsableSize, size_t aTotalUsableSize) |
|
1926 { |
|
1927 const char* kind = Record::kRecordKind; |
|
1928 StatusMsg(" creating and sorting %s stack %s record array...\n", astr, kind); |
|
1929 |
|
1930 // Convert the table into a sorted array. |
|
1931 js::Vector<const Record*, 0, InfallibleAllocPolicy> recordArray; |
|
1932 recordArray.reserve(aRecordTable.count()); |
|
1933 typedef js::HashSet<Record, Record, InfallibleAllocPolicy> RecordTable; |
|
1934 for (typename RecordTable::Range r = aRecordTable.all(); |
|
1935 !r.empty(); |
|
1936 r.popFront()) { |
|
1937 recordArray.infallibleAppend(&r.front()); |
|
1938 } |
|
1939 qsort(recordArray.begin(), recordArray.length(), sizeof(recordArray[0]), |
|
1940 Record::QsortCmp); |
|
1941 |
|
1942 WriteTitle("%s stack %s records\n", aStr, kind); |
|
1943 |
|
1944 if (recordArray.length() == 0) { |
|
1945 W("(none)\n\n"); |
|
1946 return; |
|
1947 } |
|
1948 |
|
1949 StatusMsg(" printing %s stack %s record array...\n", astr, kind); |
|
1950 size_t cumulativeUsableSize = 0; |
|
1951 |
|
1952 // Limit the number of records printed, because fix-linux-stack.pl is too |
|
1953 // damn slow. Note that we don't break out of this loop because we need to |
|
1954 // keep adding to |cumulativeUsableSize|. |
|
1955 uint32_t numRecords = recordArray.length(); |
|
1956 uint32_t maxRecords = gOptions->MaxRecords(); |
|
1957 for (uint32_t i = 0; i < numRecords; i++) { |
|
1958 const Record* r = recordArray[i]; |
|
1959 cumulativeUsableSize += r->GetRecordSize().Usable(); |
|
1960 if (i < maxRecords) { |
|
1961 r->Print(aWriter, aLocService, i+1, numRecords, aStr, astr, |
|
1962 aCategoryUsableSize, cumulativeUsableSize, aTotalUsableSize); |
|
1963 } else if (i == maxRecords) { |
|
1964 W("%s: stopping after %s stack %s records\n\n", aStr, |
|
1965 Show(maxRecords, gBuf1, kBufLen), kind); |
|
1966 } |
|
1967 } |
|
1968 |
|
1969 // This holds for TraceRecords, but not for FrameRecords. |
|
1970 MOZ_ASSERT_IF(!Record::recordsOverlap(), |
|
1971 aCategoryUsableSize == cumulativeUsableSize); |
|
1972 } |
|
1973 |
|
1974 static void |
|
1975 PrintSortedTraceAndFrameRecords(const Writer& aWriter, |
|
1976 LocationService* aLocService, |
|
1977 const char* aStr, const char* astr, |
|
1978 const TraceRecordTable& aTraceRecordTable, |
|
1979 size_t aCategoryUsableSize, |
|
1980 size_t aTotalUsableSize) |
|
1981 { |
|
1982 PrintSortedRecords(aWriter, aLocService, aStr, astr, aTraceRecordTable, |
|
1983 aCategoryUsableSize, aTotalUsableSize); |
|
1984 |
|
1985 FrameRecordTable frameRecordTable; |
|
1986 (void)frameRecordTable.init(2048); |
|
1987 for (TraceRecordTable::Range r = aTraceRecordTable.all(); |
|
1988 !r.empty(); |
|
1989 r.popFront()) { |
|
1990 const TraceRecord& tr = r.front(); |
|
1991 const StackTrace* st = tr.mAllocStackTrace; |
|
1992 |
|
1993 // A single PC can appear multiple times in a stack trace. We ignore |
|
1994 // duplicates by first sorting and then ignoring adjacent duplicates. |
|
1995 StackTrace sorted(*st); |
|
1996 sorted.Sort(); // sorts the copy, not the original |
|
1997 void* prevPc = (void*)intptr_t(-1); |
|
1998 for (uint32_t i = 0; i < sorted.Length(); i++) { |
|
1999 void* pc = sorted.Pc(i); |
|
2000 if (pc == prevPc) { |
|
2001 continue; // ignore duplicate |
|
2002 } |
|
2003 prevPc = pc; |
|
2004 |
|
2005 FrameRecordTable::AddPtr p = frameRecordTable.lookupForAdd(pc); |
|
2006 if (!p) { |
|
2007 FrameRecord fr(pc); |
|
2008 (void)frameRecordTable.add(p, fr); |
|
2009 } |
|
2010 p->Add(tr); |
|
2011 } |
|
2012 } |
|
2013 |
|
2014 PrintSortedRecords(aWriter, aLocService, aStr, astr, frameRecordTable, |
|
2015 aCategoryUsableSize, aTotalUsableSize); |
|
2016 } |
|
2017 |
|
2018 // Note that, unlike most SizeOf* functions, this function does not take a |
|
2019 // |mozilla::MallocSizeOf| argument. That's because those arguments are |
|
2020 // primarily to aid DMD track heap blocks... but DMD deliberately doesn't track |
|
2021 // heap blocks it allocated for itself! |
|
2022 // |
|
2023 // SizeOfInternal should be called while you're holding the state lock and |
|
2024 // while intercepts are blocked; SizeOf acquires the lock and blocks |
|
2025 // intercepts. |
|
2026 |
|
2027 static void |
|
2028 SizeOfInternal(Sizes* aSizes) |
|
2029 { |
|
2030 MOZ_ASSERT(gStateLock->IsLocked()); |
|
2031 MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); |
|
2032 |
|
2033 aSizes->Clear(); |
|
2034 |
|
2035 if (!gIsDMDRunning) { |
|
2036 return; |
|
2037 } |
|
2038 |
|
2039 StackTraceSet usedStackTraces; |
|
2040 GatherUsedStackTraces(usedStackTraces); |
|
2041 |
|
2042 for (StackTraceTable::Range r = gStackTraceTable->all(); |
|
2043 !r.empty(); |
|
2044 r.popFront()) { |
|
2045 StackTrace* const& st = r.front(); |
|
2046 |
|
2047 if (usedStackTraces.has(st)) { |
|
2048 aSizes->mStackTracesUsed += MallocSizeOf(st); |
|
2049 } else { |
|
2050 aSizes->mStackTracesUnused += MallocSizeOf(st); |
|
2051 } |
|
2052 } |
|
2053 |
|
2054 aSizes->mStackTraceTable = |
|
2055 gStackTraceTable->sizeOfIncludingThis(MallocSizeOf); |
|
2056 |
|
2057 aSizes->mBlockTable = gBlockTable->sizeOfIncludingThis(MallocSizeOf); |
|
2058 } |
|
2059 |
|
2060 MOZ_EXPORT void |
|
2061 SizeOf(Sizes* aSizes) |
|
2062 { |
|
2063 aSizes->Clear(); |
|
2064 |
|
2065 if (!gIsDMDRunning) { |
|
2066 return; |
|
2067 } |
|
2068 |
|
2069 AutoBlockIntercepts block(Thread::Fetch()); |
|
2070 AutoLockState lock; |
|
2071 SizeOfInternal(aSizes); |
|
2072 } |
|
2073 |
|
2074 void |
|
2075 ClearReportsInternal() |
|
2076 { |
|
2077 MOZ_ASSERT(gStateLock->IsLocked()); |
|
2078 |
|
2079 // Unreport all blocks that were marked reported by a memory reporter. This |
|
2080 // excludes those that were reported on allocation, because they need to keep |
|
2081 // their reported marking. |
|
2082 for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) { |
|
2083 r.front().UnreportIfNotReportedOnAlloc(); |
|
2084 } |
|
2085 } |
|
2086 |
|
2087 MOZ_EXPORT void |
|
2088 ClearReports() |
|
2089 { |
|
2090 if (!gIsDMDRunning) { |
|
2091 return; |
|
2092 } |
|
2093 |
|
2094 AutoLockState lock; |
|
2095 ClearReportsInternal(); |
|
2096 } |
|
2097 |
|
2098 MOZ_EXPORT bool |
|
2099 IsEnabled() |
|
2100 { |
|
2101 return gIsDMDRunning; |
|
2102 } |
|
2103 |
|
2104 MOZ_EXPORT void |
|
2105 Dump(Writer aWriter) |
|
2106 { |
|
2107 if (!gIsDMDRunning) { |
|
2108 const char* msg = "cannot Dump(); DMD was not enabled at startup\n"; |
|
2109 StatusMsg("%s", msg); |
|
2110 W("%s", msg); |
|
2111 return; |
|
2112 } |
|
2113 |
|
2114 AutoBlockIntercepts block(Thread::Fetch()); |
|
2115 AutoLockState lock; |
|
2116 |
|
2117 static int dumpCount = 1; |
|
2118 StatusMsg("Dump %d {\n", dumpCount++); |
|
2119 |
|
2120 StatusMsg(" gathering stack trace records...\n"); |
|
2121 |
|
2122 TraceRecordTable unreportedTraceRecordTable; |
|
2123 (void)unreportedTraceRecordTable.init(1024); |
|
2124 size_t unreportedUsableSize = 0; |
|
2125 size_t unreportedNumBlocks = 0; |
|
2126 |
|
2127 TraceRecordTable onceReportedTraceRecordTable; |
|
2128 (void)onceReportedTraceRecordTable.init(1024); |
|
2129 size_t onceReportedUsableSize = 0; |
|
2130 size_t onceReportedNumBlocks = 0; |
|
2131 |
|
2132 TraceRecordTable twiceReportedTraceRecordTable; |
|
2133 (void)twiceReportedTraceRecordTable.init(0); |
|
2134 size_t twiceReportedUsableSize = 0; |
|
2135 size_t twiceReportedNumBlocks = 0; |
|
2136 |
|
2137 bool anyBlocksSampled = false; |
|
2138 |
|
2139 for (BlockTable::Range r = gBlockTable->all(); !r.empty(); r.popFront()) { |
|
2140 const Block& b = r.front(); |
|
2141 |
|
2142 TraceRecordTable* table; |
|
2143 uint32_t numReports = b.NumReports(); |
|
2144 if (numReports == 0) { |
|
2145 unreportedUsableSize += b.UsableSize(); |
|
2146 unreportedNumBlocks++; |
|
2147 table = &unreportedTraceRecordTable; |
|
2148 } else if (numReports == 1) { |
|
2149 onceReportedUsableSize += b.UsableSize(); |
|
2150 onceReportedNumBlocks++; |
|
2151 table = &onceReportedTraceRecordTable; |
|
2152 } else { |
|
2153 MOZ_ASSERT(numReports == 2); |
|
2154 twiceReportedUsableSize += b.UsableSize(); |
|
2155 twiceReportedNumBlocks++; |
|
2156 table = &twiceReportedTraceRecordTable; |
|
2157 } |
|
2158 TraceRecordKey key(b); |
|
2159 TraceRecordTable::AddPtr p = table->lookupForAdd(key); |
|
2160 if (!p) { |
|
2161 TraceRecord tr(b); |
|
2162 (void)table->add(p, tr); |
|
2163 } |
|
2164 p->Add(b); |
|
2165 |
|
2166 anyBlocksSampled = anyBlocksSampled || b.IsSampled(); |
|
2167 } |
|
2168 size_t totalUsableSize = |
|
2169 unreportedUsableSize + onceReportedUsableSize + twiceReportedUsableSize; |
|
2170 size_t totalNumBlocks = |
|
2171 unreportedNumBlocks + onceReportedNumBlocks + twiceReportedNumBlocks; |
|
2172 |
|
2173 WriteTitle("Invocation\n"); |
|
2174 W("$DMD = '%s'\n", gOptions->DMDEnvVar()); |
|
2175 W("Sample-below size = %lld\n\n", |
|
2176 (long long)(gOptions->SampleBelowSize())); |
|
2177 |
|
2178 // Allocate this on the heap instead of the stack because it's fairly large. |
|
2179 LocationService* locService = InfallibleAllocPolicy::new_<LocationService>(); |
|
2180 |
|
2181 PrintSortedRecords(aWriter, locService, "Twice-reported", "twice-reported", |
|
2182 twiceReportedTraceRecordTable, twiceReportedUsableSize, |
|
2183 totalUsableSize); |
|
2184 |
|
2185 PrintSortedTraceAndFrameRecords(aWriter, locService, |
|
2186 "Unreported", "unreported", |
|
2187 unreportedTraceRecordTable, |
|
2188 unreportedUsableSize, totalUsableSize); |
|
2189 |
|
2190 PrintSortedTraceAndFrameRecords(aWriter, locService, |
|
2191 "Once-reported", "once-reported", |
|
2192 onceReportedTraceRecordTable, |
|
2193 onceReportedUsableSize, totalUsableSize); |
|
2194 |
|
2195 bool showTilde = anyBlocksSampled; |
|
2196 WriteTitle("Summary\n"); |
|
2197 |
|
2198 W("Total: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", |
|
2199 Show(totalUsableSize, gBuf1, kBufLen, showTilde), |
|
2200 100.0, |
|
2201 Show(totalNumBlocks, gBuf2, kBufLen, showTilde), |
|
2202 100.0); |
|
2203 |
|
2204 W("Unreported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", |
|
2205 Show(unreportedUsableSize, gBuf1, kBufLen, showTilde), |
|
2206 Percent(unreportedUsableSize, totalUsableSize), |
|
2207 Show(unreportedNumBlocks, gBuf2, kBufLen, showTilde), |
|
2208 Percent(unreportedNumBlocks, totalNumBlocks)); |
|
2209 |
|
2210 W("Once-reported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", |
|
2211 Show(onceReportedUsableSize, gBuf1, kBufLen, showTilde), |
|
2212 Percent(onceReportedUsableSize, totalUsableSize), |
|
2213 Show(onceReportedNumBlocks, gBuf2, kBufLen, showTilde), |
|
2214 Percent(onceReportedNumBlocks, totalNumBlocks)); |
|
2215 |
|
2216 W("Twice-reported: %12s bytes (%6.2f%%) in %7s blocks (%6.2f%%)\n", |
|
2217 Show(twiceReportedUsableSize, gBuf1, kBufLen, showTilde), |
|
2218 Percent(twiceReportedUsableSize, totalUsableSize), |
|
2219 Show(twiceReportedNumBlocks, gBuf2, kBufLen, showTilde), |
|
2220 Percent(twiceReportedNumBlocks, totalNumBlocks)); |
|
2221 |
|
2222 W("\n"); |
|
2223 |
|
2224 // Stats are non-deterministic, so don't show them in test mode. |
|
2225 if (!gOptions->IsTestMode()) { |
|
2226 Sizes sizes; |
|
2227 SizeOfInternal(&sizes); |
|
2228 |
|
2229 WriteTitle("Execution measurements\n"); |
|
2230 |
|
2231 W("Data structures that persist after Dump() ends:\n"); |
|
2232 |
|
2233 W(" Used stack traces: %10s bytes\n", |
|
2234 Show(sizes.mStackTracesUsed, gBuf1, kBufLen)); |
|
2235 |
|
2236 W(" Unused stack traces: %10s bytes\n", |
|
2237 Show(sizes.mStackTracesUnused, gBuf1, kBufLen)); |
|
2238 |
|
2239 W(" Stack trace table: %10s bytes (%s entries, %s used)\n", |
|
2240 Show(sizes.mStackTraceTable, gBuf1, kBufLen), |
|
2241 Show(gStackTraceTable->capacity(), gBuf2, kBufLen), |
|
2242 Show(gStackTraceTable->count(), gBuf3, kBufLen)); |
|
2243 |
|
2244 W(" Block table: %10s bytes (%s entries, %s used)\n", |
|
2245 Show(sizes.mBlockTable, gBuf1, kBufLen), |
|
2246 Show(gBlockTable->capacity(), gBuf2, kBufLen), |
|
2247 Show(gBlockTable->count(), gBuf3, kBufLen)); |
|
2248 |
|
2249 W("\nData structures that are destroyed after Dump() ends:\n"); |
|
2250 |
|
2251 size_t unreportedSize = |
|
2252 unreportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf); |
|
2253 W(" Unreported table: %10s bytes (%s entries, %s used)\n", |
|
2254 Show(unreportedSize, gBuf1, kBufLen), |
|
2255 Show(unreportedTraceRecordTable.capacity(), gBuf2, kBufLen), |
|
2256 Show(unreportedTraceRecordTable.count(), gBuf3, kBufLen)); |
|
2257 |
|
2258 size_t onceReportedSize = |
|
2259 onceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf); |
|
2260 W(" Once-reported table: %10s bytes (%s entries, %s used)\n", |
|
2261 Show(onceReportedSize, gBuf1, kBufLen), |
|
2262 Show(onceReportedTraceRecordTable.capacity(), gBuf2, kBufLen), |
|
2263 Show(onceReportedTraceRecordTable.count(), gBuf3, kBufLen)); |
|
2264 |
|
2265 size_t twiceReportedSize = |
|
2266 twiceReportedTraceRecordTable.sizeOfIncludingThis(MallocSizeOf); |
|
2267 W(" Twice-reported table: %10s bytes (%s entries, %s used)\n", |
|
2268 Show(twiceReportedSize, gBuf1, kBufLen), |
|
2269 Show(twiceReportedTraceRecordTable.capacity(), gBuf2, kBufLen), |
|
2270 Show(twiceReportedTraceRecordTable.count(), gBuf3, kBufLen)); |
|
2271 |
|
2272 W(" Location service: %10s bytes\n", |
|
2273 Show(locService->SizeOfIncludingThis(), gBuf1, kBufLen)); |
|
2274 |
|
2275 W("\nCounts:\n"); |
|
2276 |
|
2277 size_t hits = locService->NumCacheHits(); |
|
2278 size_t misses = locService->NumCacheMisses(); |
|
2279 size_t requests = hits + misses; |
|
2280 W(" Location service: %10s requests\n", |
|
2281 Show(requests, gBuf1, kBufLen)); |
|
2282 |
|
2283 size_t count = locService->CacheCount(); |
|
2284 size_t capacity = locService->CacheCapacity(); |
|
2285 W(" Location service cache: %4.1f%% hit rate, %.1f%% occupancy at end\n", |
|
2286 Percent(hits, requests), Percent(count, capacity)); |
|
2287 |
|
2288 W("\n"); |
|
2289 } |
|
2290 |
|
2291 InfallibleAllocPolicy::delete_(locService); |
|
2292 |
|
2293 ClearReportsInternal(); // Use internal version, we already have the lock. |
|
2294 |
|
2295 StatusMsg("}\n"); |
|
2296 } |
|
2297 |
|
2298 //--------------------------------------------------------------------------- |
|
2299 // Testing |
|
2300 //--------------------------------------------------------------------------- |
|
2301 |
|
2302 // This function checks that heap blocks that have the same stack trace but |
|
2303 // different (or no) reporters get aggregated separately. |
|
2304 void foo() |
|
2305 { |
|
2306 char* a[6]; |
|
2307 for (int i = 0; i < 6; i++) { |
|
2308 a[i] = (char*) malloc(128 - 16*i); |
|
2309 } |
|
2310 |
|
2311 for (int i = 0; i <= 1; i++) |
|
2312 Report(a[i]); // reported |
|
2313 Report(a[2]); // reported |
|
2314 Report(a[3]); // reported |
|
2315 // a[4], a[5] unreported |
|
2316 } |
|
2317 |
|
2318 // This stops otherwise-unused variables from being optimized away. |
|
2319 static void |
|
2320 UseItOrLoseIt(void* a) |
|
2321 { |
|
2322 char buf[64]; |
|
2323 sprintf(buf, "%p\n", a); |
|
2324 fwrite(buf, 1, strlen(buf) + 1, stderr); |
|
2325 } |
|
2326 |
|
2327 // The output from this should be compared against test-expected.dmd. It's |
|
2328 // been tested on Linux64, and probably will give different results on other |
|
2329 // platforms. |
|
2330 static void |
|
2331 RunTestMode(FILE* fp) |
|
2332 { |
|
2333 Writer writer(FpWrite, fp); |
|
2334 |
|
2335 // The first part of this test requires sampling to be disabled. |
|
2336 gOptions->SetSampleBelowSize(1); |
|
2337 |
|
2338 // Dump 1. Zero for everything. |
|
2339 Dump(writer); |
|
2340 |
|
2341 // Dump 2: 1 freed, 9 out of 10 unreported. |
|
2342 // Dump 3: still present and unreported. |
|
2343 int i; |
|
2344 char* a; |
|
2345 for (i = 0; i < 10; i++) { |
|
2346 a = (char*) malloc(100); |
|
2347 UseItOrLoseIt(a); |
|
2348 } |
|
2349 free(a); |
|
2350 |
|
2351 // Min-sized block. |
|
2352 // Dump 2: reported. |
|
2353 // Dump 3: thrice-reported. |
|
2354 char* a2 = (char*) malloc(0); |
|
2355 Report(a2); |
|
2356 |
|
2357 // Operator new[]. |
|
2358 // Dump 2: reported. |
|
2359 // Dump 3: reportedness carries over, due to ReportOnAlloc. |
|
2360 char* b = new char[10]; |
|
2361 ReportOnAlloc(b); |
|
2362 |
|
2363 // ReportOnAlloc, then freed. |
|
2364 // Dump 2: freed, irrelevant. |
|
2365 // Dump 3: freed, irrelevant. |
|
2366 char* b2 = new char; |
|
2367 ReportOnAlloc(b2); |
|
2368 free(b2); |
|
2369 |
|
2370 // Dump 2: reported 4 times. |
|
2371 // Dump 3: freed, irrelevant. |
|
2372 char* c = (char*) calloc(10, 3); |
|
2373 Report(c); |
|
2374 for (int i = 0; i < 3; i++) { |
|
2375 Report(c); |
|
2376 } |
|
2377 |
|
2378 // Dump 2: ignored. |
|
2379 // Dump 3: irrelevant. |
|
2380 Report((void*)(intptr_t)i); |
|
2381 |
|
2382 // jemalloc rounds this up to 8192. |
|
2383 // Dump 2: reported. |
|
2384 // Dump 3: freed. |
|
2385 char* e = (char*) malloc(4096); |
|
2386 e = (char*) realloc(e, 4097); |
|
2387 Report(e); |
|
2388 |
|
2389 // First realloc is like malloc; second realloc is shrinking. |
|
2390 // Dump 2: reported. |
|
2391 // Dump 3: re-reported. |
|
2392 char* e2 = (char*) realloc(nullptr, 1024); |
|
2393 e2 = (char*) realloc(e2, 512); |
|
2394 Report(e2); |
|
2395 |
|
2396 // First realloc is like malloc; second realloc creates a min-sized block. |
|
2397 // XXX: on Windows, second realloc frees the block. |
|
2398 // Dump 2: reported. |
|
2399 // Dump 3: freed, irrelevant. |
|
2400 char* e3 = (char*) realloc(nullptr, 1023); |
|
2401 //e3 = (char*) realloc(e3, 0); |
|
2402 MOZ_ASSERT(e3); |
|
2403 Report(e3); |
|
2404 |
|
2405 // Dump 2: freed, irrelevant. |
|
2406 // Dump 3: freed, irrelevant. |
|
2407 char* f = (char*) malloc(64); |
|
2408 free(f); |
|
2409 |
|
2410 // Dump 2: ignored. |
|
2411 // Dump 3: irrelevant. |
|
2412 Report((void*)(intptr_t)0x0); |
|
2413 |
|
2414 // Dump 2: mixture of reported and unreported. |
|
2415 // Dump 3: all unreported. |
|
2416 foo(); |
|
2417 foo(); |
|
2418 |
|
2419 // Dump 2: twice-reported. |
|
2420 // Dump 3: twice-reported. |
|
2421 char* g1 = (char*) malloc(77); |
|
2422 ReportOnAlloc(g1); |
|
2423 ReportOnAlloc(g1); |
|
2424 |
|
2425 // Dump 2: twice-reported. |
|
2426 // Dump 3: once-reported. |
|
2427 char* g2 = (char*) malloc(78); |
|
2428 Report(g2); |
|
2429 ReportOnAlloc(g2); |
|
2430 |
|
2431 // Dump 2: twice-reported. |
|
2432 // Dump 3: once-reported. |
|
2433 char* g3 = (char*) malloc(79); |
|
2434 ReportOnAlloc(g3); |
|
2435 Report(g3); |
|
2436 |
|
2437 // All the odd-ball ones. |
|
2438 // Dump 2: all unreported. |
|
2439 // Dump 3: all freed, irrelevant. |
|
2440 // XXX: no memalign on Mac |
|
2441 //void* x = memalign(64, 65); // rounds up to 128 |
|
2442 //UseItOrLoseIt(x); |
|
2443 // XXX: posix_memalign doesn't work on B2G |
|
2444 //void* y; |
|
2445 //posix_memalign(&y, 128, 129); // rounds up to 256 |
|
2446 //UseItOrLoseIt(y); |
|
2447 // XXX: valloc doesn't work on Windows. |
|
2448 //void* z = valloc(1); // rounds up to 4096 |
|
2449 //UseItOrLoseIt(z); |
|
2450 //aligned_alloc(64, 256); // XXX: C11 only |
|
2451 |
|
2452 // Dump 2. |
|
2453 Dump(writer); |
|
2454 |
|
2455 //--------- |
|
2456 |
|
2457 Report(a2); |
|
2458 Report(a2); |
|
2459 free(c); |
|
2460 free(e); |
|
2461 Report(e2); |
|
2462 free(e3); |
|
2463 //free(x); |
|
2464 //free(y); |
|
2465 //free(z); |
|
2466 |
|
2467 // Dump 3. |
|
2468 Dump(writer); |
|
2469 |
|
2470 //--------- |
|
2471 |
|
2472 // Clear all knowledge of existing blocks to give us a clean slate. |
|
2473 gBlockTable->clear(); |
|
2474 |
|
2475 gOptions->SetSampleBelowSize(128); |
|
2476 |
|
2477 char* s; |
|
2478 |
|
2479 // This equals the sample size, and so is reported exactly. It should be |
|
2480 // listed before records of the same size that are sampled. |
|
2481 s = (char*) malloc(128); |
|
2482 UseItOrLoseIt(s); |
|
2483 |
|
2484 // This exceeds the sample size, and so is reported exactly. |
|
2485 s = (char*) malloc(144); |
|
2486 UseItOrLoseIt(s); |
|
2487 |
|
2488 // These together constitute exactly one sample. |
|
2489 for (int i = 0; i < 16; i++) { |
|
2490 s = (char*) malloc(8); |
|
2491 UseItOrLoseIt(s); |
|
2492 } |
|
2493 MOZ_ASSERT(gSmallBlockActualSizeCounter == 0); |
|
2494 |
|
2495 // These fall 8 bytes short of a full sample. |
|
2496 for (int i = 0; i < 15; i++) { |
|
2497 s = (char*) malloc(8); |
|
2498 UseItOrLoseIt(s); |
|
2499 } |
|
2500 MOZ_ASSERT(gSmallBlockActualSizeCounter == 120); |
|
2501 |
|
2502 // This exceeds the sample size, and so is recorded exactly. |
|
2503 s = (char*) malloc(256); |
|
2504 UseItOrLoseIt(s); |
|
2505 MOZ_ASSERT(gSmallBlockActualSizeCounter == 120); |
|
2506 |
|
2507 // This gets more than to a full sample from the |i < 15| loop above. |
|
2508 s = (char*) malloc(96); |
|
2509 UseItOrLoseIt(s); |
|
2510 MOZ_ASSERT(gSmallBlockActualSizeCounter == 88); |
|
2511 |
|
2512 // This gets to another full sample. |
|
2513 for (int i = 0; i < 5; i++) { |
|
2514 s = (char*) malloc(8); |
|
2515 UseItOrLoseIt(s); |
|
2516 } |
|
2517 MOZ_ASSERT(gSmallBlockActualSizeCounter == 0); |
|
2518 |
|
2519 // This allocates 16, 32, ..., 128 bytes, which results in a stack trace |
|
2520 // record that contains a mix of sample and non-sampled blocks, and so should |
|
2521 // be printed with '~' signs. |
|
2522 for (int i = 1; i <= 8; i++) { |
|
2523 s = (char*) malloc(i * 16); |
|
2524 UseItOrLoseIt(s); |
|
2525 } |
|
2526 MOZ_ASSERT(gSmallBlockActualSizeCounter == 64); |
|
2527 |
|
2528 // At the end we're 64 bytes into the current sample so we report ~1,424 |
|
2529 // bytes of allocation overall, which is 64 less than the real value 1,488. |
|
2530 |
|
2531 // Dump 4. |
|
2532 Dump(writer); |
|
2533 } |
|
2534 |
|
2535 //--------------------------------------------------------------------------- |
|
2536 // Stress testing microbenchmark |
|
2537 //--------------------------------------------------------------------------- |
|
2538 |
|
2539 // This stops otherwise-unused variables from being optimized away. |
|
2540 static void |
|
2541 UseItOrLoseIt2(void* a) |
|
2542 { |
|
2543 if (a == (void*)0x42) { |
|
2544 printf("UseItOrLoseIt2\n"); |
|
2545 } |
|
2546 } |
|
2547 |
|
2548 MOZ_NEVER_INLINE static void |
|
2549 stress5() |
|
2550 { |
|
2551 for (int i = 0; i < 10; i++) { |
|
2552 void* x = malloc(64); |
|
2553 UseItOrLoseIt2(x); |
|
2554 if (i & 1) { |
|
2555 free(x); |
|
2556 } |
|
2557 } |
|
2558 } |
|
2559 |
|
2560 MOZ_NEVER_INLINE static void |
|
2561 stress4() |
|
2562 { |
|
2563 stress5(); stress5(); stress5(); stress5(); stress5(); |
|
2564 stress5(); stress5(); stress5(); stress5(); stress5(); |
|
2565 } |
|
2566 |
|
2567 MOZ_NEVER_INLINE static void |
|
2568 stress3() |
|
2569 { |
|
2570 for (int i = 0; i < 10; i++) { |
|
2571 stress4(); |
|
2572 } |
|
2573 } |
|
2574 |
|
2575 MOZ_NEVER_INLINE static void |
|
2576 stress2() |
|
2577 { |
|
2578 stress3(); stress3(); stress3(); stress3(); stress3(); |
|
2579 stress3(); stress3(); stress3(); stress3(); stress3(); |
|
2580 } |
|
2581 |
|
2582 MOZ_NEVER_INLINE static void |
|
2583 stress1() |
|
2584 { |
|
2585 for (int i = 0; i < 10; i++) { |
|
2586 stress2(); |
|
2587 } |
|
2588 } |
|
2589 |
|
2590 // This stress test does lots of allocations and frees, which is where most of |
|
2591 // DMD's overhead occurs. It allocates 1,000,000 64-byte blocks, spread evenly |
|
2592 // across 1,000 distinct stack traces. It frees every second one immediately |
|
2593 // after allocating it. |
|
2594 // |
|
2595 // It's highly artificial, but it's deterministic and easy to run. It can be |
|
2596 // timed under different conditions to glean performance data. |
|
2597 static void |
|
2598 RunStressMode(FILE* fp) |
|
2599 { |
|
2600 Writer writer(FpWrite, fp); |
|
2601 |
|
2602 // Disable sampling for maximum stress. |
|
2603 gOptions->SetSampleBelowSize(1); |
|
2604 |
|
2605 stress1(); stress1(); stress1(); stress1(); stress1(); |
|
2606 stress1(); stress1(); stress1(); stress1(); stress1(); |
|
2607 |
|
2608 Dump(writer); |
|
2609 } |
|
2610 |
|
2611 } // namespace dmd |
|
2612 } // namespace mozilla |