michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #ifdef MOZ_VALGRIND michael@0: # include michael@0: # include michael@0: #else michael@0: # define VALGRIND_HG_MUTEX_LOCK_PRE(_mx,_istry) /* */ michael@0: # define VALGRIND_HG_MUTEX_LOCK_POST(_mx) /* */ michael@0: # define VALGRIND_HG_MUTEX_UNLOCK_PRE(_mx) /* */ michael@0: # define VALGRIND_HG_MUTEX_UNLOCK_POST(_mx) /* */ michael@0: # define VALGRIND_MAKE_MEM_DEFINED(_addr,_len) ((void)0) michael@0: # define VALGRIND_MAKE_MEM_UNDEFINED(_addr,_len) ((void)0) michael@0: #endif michael@0: michael@0: #include "prenv.h" michael@0: #include "mozilla/arm.h" michael@0: #include "mozilla/DebugOnly.h" michael@0: #include michael@0: #include "PlatformMacros.h" michael@0: michael@0: #include "platform.h" michael@0: #include michael@0: #include michael@0: michael@0: #include "ProfileEntry.h" michael@0: #include "SyncProfile.h" michael@0: #include "AutoObjectMapper.h" michael@0: #include "UnwinderThread2.h" michael@0: michael@0: #if !defined(SPS_OS_windows) michael@0: # include michael@0: #endif michael@0: michael@0: #if defined(SPS_OS_android) || defined(SPS_OS_linux) michael@0: # include michael@0: # include "LulMain.h" michael@0: #endif michael@0: michael@0: #include "shared-libraries.h" michael@0: michael@0: michael@0: // Verbosity of this module, for debugging: michael@0: // 0 silent michael@0: // 1 adds info about debuginfo load success/failure michael@0: // 2 adds slow-summary stats for buffer fills/misses (RECOMMENDED) michael@0: // 3 adds per-sample summary lines michael@0: // 4 adds per-sample frame listing michael@0: // Note that level 3 and above produces risk of deadlock, and michael@0: // are not recommended for extended use. michael@0: #define LOGLEVEL 2 michael@0: michael@0: // The maximum number of frames that the native unwinder will michael@0: // produce. Setting it too high gives a risk of it wasting a michael@0: // lot of time looping on corrupted stacks. michael@0: #define MAX_NATIVE_FRAMES 256 michael@0: michael@0: michael@0: // The 'else' of this covers the entire rest of the file michael@0: #if defined(SPS_OS_windows) || defined(SPS_OS_darwin) michael@0: michael@0: ////////////////////////////////////////////////////////// michael@0: //// BEGIN externally visible functions (WINDOWS and OSX STUBS) michael@0: michael@0: // On Windows and OSX this will all need reworking. michael@0: // GeckoProfilerImpl.h will ensure these functions are never actually michael@0: // called, so just provide no-op stubs for now. michael@0: michael@0: void uwt__init() michael@0: { michael@0: } michael@0: michael@0: void uwt__stop() michael@0: { michael@0: } michael@0: michael@0: void uwt__deinit() michael@0: { michael@0: } michael@0: michael@0: void uwt__register_thread_for_profiling ( void* stackTop ) michael@0: { michael@0: } michael@0: michael@0: void uwt__unregister_thread_for_profiling() michael@0: { michael@0: } michael@0: michael@0: LinkedUWTBuffer* utb__acquire_sync_buffer(void* stackTop) michael@0: { michael@0: return nullptr; michael@0: } michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: UnwinderThreadBuffer* uwt__acquire_empty_buffer() michael@0: { michael@0: return nullptr; michael@0: } michael@0: michael@0: void michael@0: utb__finish_sync_buffer(ThreadProfile* aProfile, michael@0: UnwinderThreadBuffer* utb, michael@0: void* /* ucontext_t*, really */ ucV) michael@0: { michael@0: } michael@0: michael@0: void michael@0: utb__release_sync_buffer(LinkedUWTBuffer* utb) michael@0: { michael@0: } michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: void michael@0: uwt__release_full_buffer(ThreadProfile* aProfile, michael@0: UnwinderThreadBuffer* utb, michael@0: void* /* ucontext_t*, really */ ucV ) michael@0: { michael@0: } michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: void michael@0: utb__addEntry(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent) michael@0: { michael@0: } michael@0: michael@0: //// END externally visible functions (WINDOWS and OSX STUBS) michael@0: ////////////////////////////////////////////////////////// michael@0: michael@0: #else // a supported target michael@0: michael@0: ////////////////////////////////////////////////////////// michael@0: //// BEGIN externally visible functions michael@0: michael@0: // Forward references michael@0: // the unwinder thread ID, its fn, and a stop-now flag michael@0: static void* unwind_thr_fn ( void* exit_nowV ); michael@0: static pthread_t unwind_thr; michael@0: static int unwind_thr_exit_now = 0; // RACED ON michael@0: michael@0: // Threads must be registered with this file before they can be michael@0: // sampled. So that we know the max safe stack address for each michael@0: // registered thread. michael@0: static void thread_register_for_profiling ( void* stackTop ); michael@0: michael@0: // Unregister a thread. michael@0: static void thread_unregister_for_profiling(); michael@0: michael@0: // Empties out the buffer queue. Used when the unwinder thread is michael@0: // shut down. michael@0: static void empty_buffer_queue(); michael@0: michael@0: // Allocate a buffer for synchronous unwinding michael@0: static LinkedUWTBuffer* acquire_sync_buffer(void* stackTop); michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: // Acquire an empty buffer and mark it as FILLING michael@0: static UnwinderThreadBuffer* acquire_empty_buffer(); michael@0: michael@0: static void finish_sync_buffer(ThreadProfile* aProfile, michael@0: UnwinderThreadBuffer* utb, michael@0: void* /* ucontext_t*, really */ ucV); michael@0: michael@0: // Release an empty synchronous unwind buffer. michael@0: static void release_sync_buffer(LinkedUWTBuffer* utb); michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: // Put this buffer in the queue of stuff going to the unwinder michael@0: // thread, and mark it as FULL. Before doing that, fill in stack michael@0: // chunk and register fields if a native unwind is requested. michael@0: // APROFILE is where the profile data should be added to. UTB michael@0: // is the partially-filled-in buffer, containing ProfileEntries. michael@0: // UCV is the ucontext_t* from the signal handler. If non-nullptr, michael@0: // is taken as a cue to request native unwind. michael@0: static void release_full_buffer(ThreadProfile* aProfile, michael@0: UnwinderThreadBuffer* utb, michael@0: void* /* ucontext_t*, really */ ucV ); michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: static void utb_add_prof_ent(UnwinderThreadBuffer* utb, ProfileEntry ent); michael@0: michael@0: // Do a store memory barrier. michael@0: static void do_MBAR(); michael@0: michael@0: michael@0: // This is the single instance of the LUL unwind library that we will michael@0: // use. Currently the library is operated with multiple sampling michael@0: // threads but only one unwinder thread. It should also be possible michael@0: // to use the library with multiple unwinder threads, to improve michael@0: // throughput. The setup here makes it possible to use multiple michael@0: // unwinder threads, although that is as-yet untested. michael@0: // michael@0: // |sLULmutex| protects |sLUL| and |sLULcount| and also is used to michael@0: // ensure that only the first unwinder thread requests |sLUL| to read michael@0: // debug info. |sLUL| may only be assigned to (and the object it michael@0: // points at may only be created/destroyed) when |sLULcount| is zero. michael@0: // |sLULcount| holds the number of unwinder threads currently in michael@0: // existence. michael@0: static pthread_mutex_t sLULmutex = PTHREAD_MUTEX_INITIALIZER; michael@0: static lul::LUL* sLUL = nullptr; michael@0: static int sLULcount = 0; michael@0: michael@0: michael@0: void uwt__init() michael@0: { michael@0: // Create the unwinder thread. michael@0: MOZ_ASSERT(unwind_thr_exit_now == 0); michael@0: int r = pthread_create( &unwind_thr, nullptr, michael@0: unwind_thr_fn, (void*)&unwind_thr_exit_now ); michael@0: MOZ_ALWAYS_TRUE(r == 0); michael@0: } michael@0: michael@0: void uwt__stop() michael@0: { michael@0: // Shut down the unwinder thread. michael@0: MOZ_ASSERT(unwind_thr_exit_now == 0); michael@0: unwind_thr_exit_now = 1; michael@0: do_MBAR(); michael@0: int r = pthread_join(unwind_thr, nullptr); michael@0: MOZ_ALWAYS_TRUE(r == 0); michael@0: } michael@0: michael@0: void uwt__deinit() michael@0: { michael@0: empty_buffer_queue(); michael@0: } michael@0: michael@0: void uwt__register_thread_for_profiling(void* stackTop) michael@0: { michael@0: thread_register_for_profiling(stackTop); michael@0: } michael@0: michael@0: void uwt__unregister_thread_for_profiling() michael@0: { michael@0: thread_unregister_for_profiling(); michael@0: } michael@0: michael@0: LinkedUWTBuffer* utb__acquire_sync_buffer(void* stackTop) michael@0: { michael@0: return acquire_sync_buffer(stackTop); michael@0: } michael@0: michael@0: void utb__finish_sync_buffer(ThreadProfile* profile, michael@0: UnwinderThreadBuffer* buff, michael@0: void* /* ucontext_t*, really */ ucV) michael@0: { michael@0: finish_sync_buffer(profile, buff, ucV); michael@0: } michael@0: michael@0: void utb__release_sync_buffer(LinkedUWTBuffer* buff) michael@0: { michael@0: release_sync_buffer(buff); michael@0: } michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: UnwinderThreadBuffer* uwt__acquire_empty_buffer() michael@0: { michael@0: return acquire_empty_buffer(); michael@0: } michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: void michael@0: uwt__release_full_buffer(ThreadProfile* aProfile, michael@0: UnwinderThreadBuffer* utb, michael@0: void* /* ucontext_t*, really */ ucV ) michael@0: { michael@0: release_full_buffer( aProfile, utb, ucV ); michael@0: } michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: void michael@0: utb__addEntry(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent) michael@0: { michael@0: utb_add_prof_ent(utb, ent); michael@0: } michael@0: michael@0: //// END externally visible functions michael@0: ////////////////////////////////////////////////////////// michael@0: michael@0: michael@0: ////////////////////////////////////////////////////////// michael@0: //// BEGIN type UnwindThreadBuffer michael@0: michael@0: static_assert(sizeof(uint32_t) == 4, "uint32_t size incorrect"); michael@0: static_assert(sizeof(uint64_t) == 8, "uint64_t size incorrect"); michael@0: static_assert(sizeof(uintptr_t) == sizeof(void*), michael@0: "uintptr_t size incorrect"); michael@0: michael@0: typedef michael@0: struct { michael@0: uint64_t rsp; michael@0: uint64_t rbp; michael@0: uint64_t rip; michael@0: } michael@0: AMD64Regs; michael@0: michael@0: typedef michael@0: struct { michael@0: uint32_t r15; michael@0: uint32_t r14; michael@0: uint32_t r13; michael@0: uint32_t r12; michael@0: uint32_t r11; michael@0: uint32_t r7; michael@0: } michael@0: ARMRegs; michael@0: michael@0: typedef michael@0: struct { michael@0: uint32_t esp; michael@0: uint32_t ebp; michael@0: uint32_t eip; michael@0: } michael@0: X86Regs; michael@0: michael@0: #if defined(SPS_ARCH_amd64) michael@0: typedef AMD64Regs ArchRegs; michael@0: #elif defined(SPS_ARCH_arm) michael@0: typedef ARMRegs ArchRegs; michael@0: #elif defined(SPS_ARCH_x86) michael@0: typedef X86Regs ArchRegs; michael@0: #else michael@0: # error "Unknown plat" michael@0: #endif michael@0: michael@0: #if defined(SPS_ARCH_amd64) || defined(SPS_ARCH_arm) || defined(SPS_ARCH_x86) michael@0: # define SPS_PAGE_SIZE 4096 michael@0: #else michael@0: # error "Unknown plat" michael@0: #endif michael@0: michael@0: typedef enum { S_EMPTY, S_FILLING, S_EMPTYING, S_FULL } State; michael@0: michael@0: typedef struct { uintptr_t val; } SpinLock; michael@0: michael@0: /* CONFIGURABLE */ michael@0: /* The number of fixed ProfileEntry slots. If more are required, they michael@0: are placed in mmap'd pages. */ michael@0: #define N_FIXED_PROF_ENTS 20 michael@0: michael@0: /* CONFIGURABLE */ michael@0: /* The number of extra pages of ProfileEntries. If (on arm) each michael@0: ProfileEntry is 8 bytes, then a page holds 512, and so 100 pages michael@0: is enough to hold 51200. */ michael@0: #define N_PROF_ENT_PAGES 100 michael@0: michael@0: /* DERIVATIVE */ michael@0: #define N_PROF_ENTS_PER_PAGE (SPS_PAGE_SIZE / sizeof(ProfileEntry)) michael@0: michael@0: /* A page of ProfileEntrys. This might actually be slightly smaller michael@0: than a page if SPS_PAGE_SIZE is not an exact multiple of michael@0: sizeof(ProfileEntry). */ michael@0: typedef michael@0: struct { ProfileEntry ents[N_PROF_ENTS_PER_PAGE]; } michael@0: ProfEntsPage; michael@0: michael@0: #define ProfEntsPage_INVALID ((ProfEntsPage*)1) michael@0: michael@0: michael@0: /* Fields protected by the spinlock are marked SL */ michael@0: michael@0: struct _UnwinderThreadBuffer { michael@0: /*SL*/ State state; michael@0: /* The rest of these are protected, in some sense, by ::state. If michael@0: ::state is S_FILLING, they are 'owned' by the sampler thread michael@0: that set the state to S_FILLING. If ::state is S_EMPTYING, michael@0: they are 'owned' by the unwinder thread that set the state to michael@0: S_EMPTYING. If ::state is S_EMPTY or S_FULL, the buffer isn't michael@0: owned by any thread, and so no thread may access these michael@0: fields. */ michael@0: /* Sample number, needed to process samples in order */ michael@0: uint64_t seqNo; michael@0: /* The ThreadProfile into which the results are eventually to be michael@0: dumped. */ michael@0: ThreadProfile* aProfile; michael@0: /* Pseudostack and other info, always present */ michael@0: ProfileEntry entsFixed[N_FIXED_PROF_ENTS]; michael@0: ProfEntsPage* entsPages[N_PROF_ENT_PAGES]; michael@0: uintptr_t entsUsed; michael@0: /* Do we also have data to do a native unwind? */ michael@0: bool haveNativeInfo; michael@0: /* If so, here is the register state and stack. Unset if michael@0: .haveNativeInfo is false. */ michael@0: lul::UnwindRegs startRegs; michael@0: lul::StackImage stackImg; michael@0: void* stackMaxSafe; /* Address for max safe stack reading. */ michael@0: }; michael@0: /* Indexing scheme for ents: michael@0: 0 <= i < N_FIXED_PROF_ENTS michael@0: is at entsFixed[i] michael@0: michael@0: i >= N_FIXED_PROF_ENTS michael@0: is at let j = i - N_FIXED_PROF_ENTS michael@0: in entsPages[j / N_PROFENTS_PER_PAGE] michael@0: ->ents[j % N_PROFENTS_PER_PAGE] michael@0: michael@0: entsPages[] are allocated on demand. Because zero can michael@0: theoretically be a valid page pointer, use michael@0: ProfEntsPage_INVALID == (ProfEntsPage*)1 to mark invalid pages. michael@0: michael@0: It follows that the max entsUsed value is N_FIXED_PROF_ENTS + michael@0: N_PROFENTS_PER_PAGE * N_PROFENTS_PAGES, and at that point no more michael@0: ProfileEntries can be storedd. michael@0: */ michael@0: michael@0: michael@0: typedef michael@0: struct { michael@0: pthread_t thrId; michael@0: void* stackTop; michael@0: uint64_t nSamples; michael@0: } michael@0: StackLimit; michael@0: michael@0: /* Globals -- the buffer array */ michael@0: #define N_UNW_THR_BUFFERS 10 michael@0: /*SL*/ static UnwinderThreadBuffer** g_buffers = nullptr; michael@0: /*SL*/ static uint64_t g_seqNo = 0; michael@0: /*SL*/ static SpinLock g_spinLock = { 0 }; michael@0: michael@0: /* Globals -- the thread array. The array is dynamically expanded on michael@0: demand. The spinlock must be held when accessing g_stackLimits, michael@0: g_stackLimits[some index], g_stackLimitsUsed and g_stackLimitsSize. michael@0: However, the spinlock must not be held when calling malloc to michael@0: allocate or expand the array, as that would risk deadlock against a michael@0: sampling thread that holds the malloc lock and is trying to acquire michael@0: the spinlock. */ michael@0: /*SL*/ static StackLimit* g_stackLimits = nullptr; michael@0: /*SL*/ static size_t g_stackLimitsUsed = 0; michael@0: /*SL*/ static size_t g_stackLimitsSize = 0; michael@0: michael@0: /* Stats -- atomically incremented, no lock needed */ michael@0: static uintptr_t g_stats_totalSamples = 0; // total # sample attempts michael@0: static uintptr_t g_stats_noBuffAvail = 0; // # failed due to no buffer avail michael@0: static uintptr_t g_stats_thrUnregd = 0; // # failed due to unregistered thr michael@0: michael@0: /* We must be VERY CAREFUL what we do with the spinlock held. The michael@0: only thing it is safe to do with it held is modify (viz, read or michael@0: write) g_buffers, g_buffers[], g_seqNo, g_buffers[]->state, michael@0: g_stackLimits, g_stackLimits[], g_stackLimitsUsed and michael@0: g_stackLimitsSize. No arbitrary computations, no syscalls, no michael@0: printfs, no file IO, and absolutely no dynamic memory allocation michael@0: (else we WILL eventually deadlock). michael@0: michael@0: This applies both to the signal handler and to the unwinder thread. michael@0: */ michael@0: michael@0: //// END type UnwindThreadBuffer michael@0: ////////////////////////////////////////////////////////// michael@0: michael@0: // This is the interface to LUL. michael@0: typedef struct { u_int64_t pc; u_int64_t sp; } PCandSP; michael@0: michael@0: // Forward declaration. Implementation is below. michael@0: static michael@0: void do_lul_unwind_Buffer(/*OUT*/PCandSP** pairs, michael@0: /*OUT*/unsigned int* nPairs, michael@0: UnwinderThreadBuffer* buff, michael@0: int buffNo /* for debug printing only */); michael@0: michael@0: static bool is_page_aligned(void* v) michael@0: { michael@0: uintptr_t w = (uintptr_t) v; michael@0: return (w & (SPS_PAGE_SIZE-1)) == 0 ? true : false; michael@0: } michael@0: michael@0: michael@0: /* Implement machine-word sized atomic compare-and-swap. Returns true michael@0: if success, false if failure. */ michael@0: static bool do_CASW(uintptr_t* addr, uintptr_t expected, uintptr_t nyu) michael@0: { michael@0: #if defined(__GNUC__) michael@0: return __sync_bool_compare_and_swap(addr, expected, nyu); michael@0: #else michael@0: # error "Unhandled compiler" michael@0: #endif michael@0: } michael@0: michael@0: /* Hint to the CPU core that we are in a spin-wait loop, and that michael@0: other processors/cores/threads-running-on-the-same-core should be michael@0: given priority on execute resources, if that is possible. Not michael@0: critical if this is a no-op on some targets. */ michael@0: static void do_SPINLOOP_RELAX() michael@0: { michael@0: #if (defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)) && defined(__GNUC__) michael@0: __asm__ __volatile__("rep; nop"); michael@0: #elif defined(SPS_PLAT_arm_android) && MOZILLA_ARM_ARCH >= 7 michael@0: __asm__ __volatile__("wfe"); michael@0: #endif michael@0: } michael@0: michael@0: /* Tell any cores snoozing in spin loops to wake up. */ michael@0: static void do_SPINLOOP_NUDGE() michael@0: { michael@0: #if (defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)) && defined(__GNUC__) michael@0: /* this is a no-op */ michael@0: #elif defined(SPS_PLAT_arm_android) && MOZILLA_ARM_ARCH >= 7 michael@0: __asm__ __volatile__("sev"); michael@0: #endif michael@0: } michael@0: michael@0: /* Perform a full memory barrier. */ michael@0: static void do_MBAR() michael@0: { michael@0: #if defined(__GNUC__) michael@0: __sync_synchronize(); michael@0: #else michael@0: # error "Unhandled compiler" michael@0: #endif michael@0: } michael@0: michael@0: static void spinLock_acquire(SpinLock* sl) michael@0: { michael@0: uintptr_t* val = &sl->val; michael@0: VALGRIND_HG_MUTEX_LOCK_PRE(sl, 0/*!isTryLock*/); michael@0: while (1) { michael@0: bool ok = do_CASW( val, 0, 1 ); michael@0: if (ok) break; michael@0: do_SPINLOOP_RELAX(); michael@0: } michael@0: do_MBAR(); michael@0: VALGRIND_HG_MUTEX_LOCK_POST(sl); michael@0: } michael@0: michael@0: static void spinLock_release(SpinLock* sl) michael@0: { michael@0: uintptr_t* val = &sl->val; michael@0: VALGRIND_HG_MUTEX_UNLOCK_PRE(sl); michael@0: do_MBAR(); michael@0: bool ok = do_CASW( val, 1, 0 ); michael@0: /* This must succeed at the first try. To fail would imply that michael@0: the lock was unheld. */ michael@0: MOZ_ALWAYS_TRUE(ok); michael@0: do_SPINLOOP_NUDGE(); michael@0: VALGRIND_HG_MUTEX_UNLOCK_POST(sl); michael@0: } michael@0: michael@0: static void sleep_ms(unsigned int ms) michael@0: { michael@0: struct timespec req; michael@0: req.tv_sec = ((time_t)ms) / 1000; michael@0: req.tv_nsec = 1000 * 1000 * (((unsigned long)ms) % 1000); michael@0: nanosleep(&req, nullptr); michael@0: } michael@0: michael@0: /* Use CAS to implement standalone atomic increment. */ michael@0: static void atomic_INC(uintptr_t* loc) michael@0: { michael@0: while (1) { michael@0: uintptr_t old = *loc; michael@0: uintptr_t nyu = old + 1; michael@0: bool ok = do_CASW( loc, old, nyu ); michael@0: if (ok) break; michael@0: } michael@0: } michael@0: michael@0: // Empties out the buffer queue. michael@0: static void empty_buffer_queue() michael@0: { michael@0: spinLock_acquire(&g_spinLock); michael@0: michael@0: UnwinderThreadBuffer** tmp_g_buffers = g_buffers; michael@0: g_stackLimitsUsed = 0; michael@0: g_seqNo = 0; michael@0: g_buffers = nullptr; michael@0: michael@0: spinLock_release(&g_spinLock); michael@0: michael@0: // Can't do any malloc/free when holding the spinlock. michael@0: free(tmp_g_buffers); michael@0: michael@0: // We could potentially free up g_stackLimits; but given the michael@0: // complications above involved in resizing it, it's probably michael@0: // safer just to leave it in place. michael@0: } michael@0: michael@0: michael@0: // Registers a thread for profiling. Detects and ignores duplicate michael@0: // registration. michael@0: static void thread_register_for_profiling(void* stackTop) michael@0: { michael@0: pthread_t me = pthread_self(); michael@0: michael@0: spinLock_acquire(&g_spinLock); michael@0: michael@0: // tmp copy of g_stackLimitsUsed, to avoid racing in message printing michael@0: int n_used; michael@0: michael@0: // Ignore spurious calls which aren't really registering anything. michael@0: if (stackTop == nullptr) { michael@0: n_used = g_stackLimitsUsed; michael@0: spinLock_release(&g_spinLock); michael@0: LOGF("BPUnw: [%d total] thread_register_for_profiling" michael@0: "(me=%p, stacktop=NULL) (IGNORED)", n_used, (void*)me); michael@0: return; michael@0: } michael@0: michael@0: /* Minimal sanity check on stackTop */ michael@0: MOZ_ASSERT((void*)&n_used/*any auto var will do*/ < stackTop); michael@0: michael@0: bool is_dup = false; michael@0: for (size_t i = 0; i < g_stackLimitsUsed; i++) { michael@0: if (g_stackLimits[i].thrId == me) { michael@0: is_dup = true; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: if (is_dup) { michael@0: /* It's a duplicate registration. Ignore it: drop the lock and michael@0: return. */ michael@0: n_used = g_stackLimitsUsed; michael@0: spinLock_release(&g_spinLock); michael@0: michael@0: LOGF("BPUnw: [%d total] thread_register_for_profiling" michael@0: "(me=%p, stacktop=%p) (DUPLICATE)", n_used, (void*)me, stackTop); michael@0: return; michael@0: } michael@0: michael@0: /* Make sure the g_stackLimits array is large enough to accommodate michael@0: this new entry. This is tricky. If it isn't large enough, we michael@0: can malloc a larger version, but we have to do that without michael@0: holding the spinlock, else we risk deadlock. The deadlock michael@0: scenario is: michael@0: michael@0: Some other thread that is being sampled michael@0: This thread michael@0: michael@0: call malloc call this function michael@0: acquire malloc lock acquire the spinlock michael@0: (sampling signal) discover thread array not big enough, michael@0: call uwt__acquire_empty_buffer call malloc to make it larger michael@0: acquire the spinlock acquire malloc lock michael@0: michael@0: This gives an inconsistent lock acquisition order on the malloc michael@0: lock and spinlock, hence risk of deadlock. michael@0: michael@0: Allocating more space for the array without holding the spinlock michael@0: implies tolerating races against other thread(s) who are also michael@0: trying to expand the array. How can we detect if we have been michael@0: out-raced? Every successful expansion of g_stackLimits[] results michael@0: in an increase in g_stackLimitsSize. Hence we can detect if we michael@0: got out-raced by remembering g_stackLimitsSize before we dropped michael@0: the spinlock and checking if it has changed after the spinlock is michael@0: reacquired. */ michael@0: michael@0: MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize); michael@0: michael@0: if (g_stackLimitsUsed == g_stackLimitsSize) { michael@0: /* g_stackLimits[] is full; resize it. */ michael@0: michael@0: size_t old_size = g_stackLimitsSize; michael@0: size_t new_size = old_size == 0 ? 4 : (2 * old_size); michael@0: michael@0: spinLock_release(&g_spinLock); michael@0: StackLimit* new_arr = (StackLimit*)malloc(new_size * sizeof(StackLimit)); michael@0: if (!new_arr) michael@0: return; michael@0: michael@0: spinLock_acquire(&g_spinLock); michael@0: michael@0: if (old_size != g_stackLimitsSize) { michael@0: /* We've been outraced. Instead of trying to deal in-line with michael@0: this extremely rare case, just start all over again by michael@0: tail-calling this routine. */ michael@0: spinLock_release(&g_spinLock); michael@0: free(new_arr); michael@0: thread_register_for_profiling(stackTop); michael@0: return; michael@0: } michael@0: michael@0: memcpy(new_arr, g_stackLimits, old_size * sizeof(StackLimit)); michael@0: if (g_stackLimits) michael@0: free(g_stackLimits); michael@0: michael@0: g_stackLimits = new_arr; michael@0: michael@0: MOZ_ASSERT(g_stackLimitsSize < new_size); michael@0: g_stackLimitsSize = new_size; michael@0: } michael@0: michael@0: MOZ_ASSERT(g_stackLimitsUsed < g_stackLimitsSize); michael@0: michael@0: /* Finally, we have a safe place to put the new entry. */ michael@0: michael@0: // Round |stackTop| up to the end of the containing page. We may michael@0: // as well do this -- there's no danger of a fault, and we might michael@0: // get a few more base-of-the-stack frames as a result. This michael@0: // assumes that no target has a page size smaller than 4096. michael@0: uintptr_t stackTopR = (uintptr_t)stackTop; michael@0: stackTopR = (stackTopR & ~(uintptr_t)4095) + (uintptr_t)4095; michael@0: michael@0: g_stackLimits[g_stackLimitsUsed].thrId = me; michael@0: g_stackLimits[g_stackLimitsUsed].stackTop = (void*)stackTopR; michael@0: g_stackLimits[g_stackLimitsUsed].nSamples = 0; michael@0: g_stackLimitsUsed++; michael@0: michael@0: n_used = g_stackLimitsUsed; michael@0: spinLock_release(&g_spinLock); michael@0: michael@0: LOGF("BPUnw: [%d total] thread_register_for_profiling" michael@0: "(me=%p, stacktop=%p)", n_used, (void*)me, stackTop); michael@0: } michael@0: michael@0: // Deregisters a thread from profiling. Detects and ignores attempts michael@0: // to deregister a not-registered thread. michael@0: static void thread_unregister_for_profiling() michael@0: { michael@0: spinLock_acquire(&g_spinLock); michael@0: michael@0: // tmp copy of g_stackLimitsUsed, to avoid racing in message printing michael@0: size_t n_used; michael@0: michael@0: size_t i; michael@0: bool found = false; michael@0: pthread_t me = pthread_self(); michael@0: for (i = 0; i < g_stackLimitsUsed; i++) { michael@0: if (g_stackLimits[i].thrId == me) michael@0: break; michael@0: } michael@0: if (i < g_stackLimitsUsed) { michael@0: // found this entry. Slide the remaining ones down one place. michael@0: for (; i+1 < g_stackLimitsUsed; i++) { michael@0: g_stackLimits[i] = g_stackLimits[i+1]; michael@0: } michael@0: g_stackLimitsUsed--; michael@0: found = true; michael@0: } michael@0: michael@0: n_used = g_stackLimitsUsed; michael@0: michael@0: spinLock_release(&g_spinLock); michael@0: LOGF("BPUnw: [%d total] thread_unregister_for_profiling(me=%p) %s", michael@0: (int)n_used, (void*)me, found ? "" : " (NOT REGISTERED) "); michael@0: } michael@0: michael@0: michael@0: __attribute__((unused)) michael@0: static void show_registered_threads() michael@0: { michael@0: size_t i; michael@0: spinLock_acquire(&g_spinLock); michael@0: for (i = 0; i < g_stackLimitsUsed; i++) { michael@0: LOGF("[%d] pthread_t=%p nSamples=%lld", michael@0: (int)i, (void*)g_stackLimits[i].thrId, michael@0: (unsigned long long int)g_stackLimits[i].nSamples); michael@0: } michael@0: spinLock_release(&g_spinLock); michael@0: } michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: /* The calling thread owns the buffer, as denoted by its state being michael@0: S_FILLING. So we can mess with it without further locking. */ michael@0: static void init_empty_buffer(UnwinderThreadBuffer* buff, void* stackTop) michael@0: { michael@0: /* Now we own the buffer, initialise it. */ michael@0: buff->aProfile = nullptr; michael@0: buff->entsUsed = 0; michael@0: buff->haveNativeInfo = false; michael@0: buff->stackImg.mLen = 0; michael@0: buff->stackImg.mStartAvma = 0; michael@0: buff->stackMaxSafe = stackTop; /* We will need this in michael@0: release_full_buffer() */ michael@0: for (size_t i = 0; i < N_PROF_ENT_PAGES; i++) michael@0: buff->entsPages[i] = ProfEntsPage_INVALID; michael@0: } michael@0: michael@0: struct SyncUnwinderThreadBuffer : public LinkedUWTBuffer michael@0: { michael@0: UnwinderThreadBuffer* GetBuffer() michael@0: { michael@0: return &mBuff; michael@0: } michael@0: michael@0: UnwinderThreadBuffer mBuff; michael@0: }; michael@0: michael@0: static LinkedUWTBuffer* acquire_sync_buffer(void* stackTop) michael@0: { michael@0: MOZ_ASSERT(stackTop); michael@0: SyncUnwinderThreadBuffer* buff = new SyncUnwinderThreadBuffer(); michael@0: // We can set state without locking here because this thread owns the buffer michael@0: // and it is going to fill it itself. michael@0: buff->GetBuffer()->state = S_FILLING; michael@0: init_empty_buffer(buff->GetBuffer(), stackTop); michael@0: return buff; michael@0: } michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: static UnwinderThreadBuffer* acquire_empty_buffer() michael@0: { michael@0: /* acq lock michael@0: if buffers == nullptr { rel lock; exit } michael@0: scan to find a free buff; if none { rel lock; exit } michael@0: set buff state to S_FILLING michael@0: fillseqno++; and remember it michael@0: rel lock michael@0: */ michael@0: size_t i; michael@0: michael@0: atomic_INC( &g_stats_totalSamples ); michael@0: michael@0: /* This code is critical. We are in a signal handler and possibly michael@0: with the malloc lock held. So we can't allocate any heap, and michael@0: can't safely call any C library functions, not even the pthread_ michael@0: functions. And we certainly can't do any syscalls. In short, michael@0: this function needs to be self contained, not do any allocation, michael@0: and not hold on to the spinlock for any significant length of michael@0: time. */ michael@0: michael@0: spinLock_acquire(&g_spinLock); michael@0: michael@0: /* First of all, look for this thread's entry in g_stackLimits[]. michael@0: We need to find it in order to figure out how much stack we can michael@0: safely copy into the sample. This assumes that pthread_self() michael@0: is safe to call in a signal handler, which strikes me as highly michael@0: likely. */ michael@0: pthread_t me = pthread_self(); michael@0: MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize); michael@0: for (i = 0; i < g_stackLimitsUsed; i++) { michael@0: if (g_stackLimits[i].thrId == me) michael@0: break; michael@0: } michael@0: michael@0: /* If the thread isn't registered for profiling, just ignore the call michael@0: and return nullptr. */ michael@0: if (i == g_stackLimitsUsed) { michael@0: spinLock_release(&g_spinLock); michael@0: atomic_INC( &g_stats_thrUnregd ); michael@0: return nullptr; michael@0: } michael@0: michael@0: /* "this thread is registered for profiling" */ michael@0: MOZ_ASSERT(i < g_stackLimitsUsed); michael@0: michael@0: /* The furthest point that we can safely scan back up the stack. */ michael@0: void* myStackTop = g_stackLimits[i].stackTop; michael@0: g_stackLimits[i].nSamples++; michael@0: michael@0: /* Try to find a free buffer to use. */ michael@0: if (g_buffers == nullptr) { michael@0: /* The unwinder thread hasn't allocated any buffers yet. michael@0: Nothing we can do. */ michael@0: spinLock_release(&g_spinLock); michael@0: atomic_INC( &g_stats_noBuffAvail ); michael@0: return nullptr; michael@0: } michael@0: michael@0: for (i = 0; i < N_UNW_THR_BUFFERS; i++) { michael@0: if (g_buffers[i]->state == S_EMPTY) michael@0: break; michael@0: } michael@0: MOZ_ASSERT(i <= N_UNW_THR_BUFFERS); michael@0: michael@0: if (i == N_UNW_THR_BUFFERS) { michael@0: /* Again, no free buffers .. give up. */ michael@0: spinLock_release(&g_spinLock); michael@0: atomic_INC( &g_stats_noBuffAvail ); michael@0: if (LOGLEVEL >= 3) michael@0: LOG("BPUnw: handler: no free buffers"); michael@0: return nullptr; michael@0: } michael@0: michael@0: /* So we can use this one safely. Whilst still holding the lock, michael@0: mark the buffer as belonging to us, and increment the sequence michael@0: number. */ michael@0: UnwinderThreadBuffer* buff = g_buffers[i]; michael@0: MOZ_ASSERT(buff->state == S_EMPTY); michael@0: buff->state = S_FILLING; michael@0: buff->seqNo = g_seqNo; michael@0: g_seqNo++; michael@0: michael@0: /* And drop the lock. We own the buffer, so go on and fill it. */ michael@0: spinLock_release(&g_spinLock); michael@0: michael@0: /* Now we own the buffer, initialise it. */ michael@0: init_empty_buffer(buff, myStackTop); michael@0: return buff; michael@0: } michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: /* The calling thread owns the buffer, as denoted by its state being michael@0: S_FILLING. So we can mess with it without further locking. */ michael@0: static void fill_buffer(ThreadProfile* aProfile, michael@0: UnwinderThreadBuffer* buff, michael@0: void* /* ucontext_t*, really */ ucV) michael@0: { michael@0: MOZ_ASSERT(buff->state == S_FILLING); michael@0: michael@0: //////////////////////////////////////////////////// michael@0: // BEGIN fill michael@0: michael@0: /* The buffer already will have some of its ProfileEntries filled michael@0: in, but everything else needs to be filled in at this point. */ michael@0: //LOGF("Release full buffer: %lu ents", buff->entsUsed); michael@0: /* Where the resulting info is to be dumped */ michael@0: buff->aProfile = aProfile; michael@0: michael@0: /* And, if we have register state, that and the stack top */ michael@0: buff->haveNativeInfo = ucV != nullptr; michael@0: if (buff->haveNativeInfo) { michael@0: # if defined(SPS_PLAT_amd64_linux) michael@0: ucontext_t* uc = (ucontext_t*)ucV; michael@0: mcontext_t* mc = &(uc->uc_mcontext); michael@0: buff->startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]); michael@0: buff->startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]); michael@0: buff->startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]); michael@0: # elif defined(SPS_PLAT_amd64_darwin) michael@0: ucontext_t* uc = (ucontext_t*)ucV; michael@0: struct __darwin_mcontext64* mc = uc->uc_mcontext; michael@0: struct __darwin_x86_thread_state64* ss = &mc->__ss; michael@0: buff->regs.rip = ss->__rip; michael@0: buff->regs.rsp = ss->__rsp; michael@0: buff->regs.rbp = ss->__rbp; michael@0: # elif defined(SPS_PLAT_arm_android) michael@0: ucontext_t* uc = (ucontext_t*)ucV; michael@0: mcontext_t* mc = &(uc->uc_mcontext); michael@0: buff->startRegs.r15 = lul::TaggedUWord(mc->arm_pc); michael@0: buff->startRegs.r14 = lul::TaggedUWord(mc->arm_lr); michael@0: buff->startRegs.r13 = lul::TaggedUWord(mc->arm_sp); michael@0: buff->startRegs.r12 = lul::TaggedUWord(mc->arm_ip); michael@0: buff->startRegs.r11 = lul::TaggedUWord(mc->arm_fp); michael@0: buff->startRegs.r7 = lul::TaggedUWord(mc->arm_r7); michael@0: # elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android) michael@0: ucontext_t* uc = (ucontext_t*)ucV; michael@0: mcontext_t* mc = &(uc->uc_mcontext); michael@0: buff->startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]); michael@0: buff->startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]); michael@0: buff->startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]); michael@0: # elif defined(SPS_PLAT_x86_darwin) michael@0: ucontext_t* uc = (ucontext_t*)ucV; michael@0: struct __darwin_mcontext32* mc = uc->uc_mcontext; michael@0: struct __darwin_i386_thread_state* ss = &mc->__ss; michael@0: buff->regs.eip = ss->__eip; michael@0: buff->regs.esp = ss->__esp; michael@0: buff->regs.ebp = ss->__ebp; michael@0: # else michael@0: # error "Unknown plat" michael@0: # endif michael@0: michael@0: /* Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not michael@0: going past the stack's registered top point. Do some basic michael@0: sanity checks too. This assumes that the TaggedUWord holding michael@0: the stack pointer value is valid, but it should be, since it michael@0: was constructed that way in the code just above. */ michael@0: { michael@0: # if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_amd64_darwin) michael@0: uintptr_t rEDZONE_SIZE = 128; michael@0: uintptr_t start = buff->startRegs.xsp.Value() - rEDZONE_SIZE; michael@0: # elif defined(SPS_PLAT_arm_android) michael@0: uintptr_t rEDZONE_SIZE = 0; michael@0: uintptr_t start = buff->startRegs.r13.Value() - rEDZONE_SIZE; michael@0: # elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_darwin) \ michael@0: || defined(SPS_PLAT_x86_android) michael@0: uintptr_t rEDZONE_SIZE = 0; michael@0: uintptr_t start = buff->startRegs.xsp.Value() - rEDZONE_SIZE; michael@0: # else michael@0: # error "Unknown plat" michael@0: # endif michael@0: uintptr_t end = (uintptr_t)buff->stackMaxSafe; michael@0: uintptr_t ws = sizeof(void*); michael@0: start &= ~(ws-1); michael@0: end &= ~(ws-1); michael@0: uintptr_t nToCopy = 0; michael@0: if (start < end) { michael@0: nToCopy = end - start; michael@0: if (nToCopy > lul::N_STACK_BYTES) michael@0: nToCopy = lul::N_STACK_BYTES; michael@0: } michael@0: MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); michael@0: buff->stackImg.mLen = nToCopy; michael@0: buff->stackImg.mStartAvma = start; michael@0: if (nToCopy > 0) { michael@0: memcpy(&buff->stackImg.mContents[0], (void*)start, nToCopy); michael@0: (void)VALGRIND_MAKE_MEM_DEFINED(&buff->stackImg.mContents[0], nToCopy); michael@0: } michael@0: } michael@0: } /* if (buff->haveNativeInfo) */ michael@0: // END fill michael@0: //////////////////////////////////////////////////// michael@0: } michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: /* The calling thread owns the buffer, as denoted by its state being michael@0: S_FILLING. So we can mess with it without further locking. */ michael@0: static void release_full_buffer(ThreadProfile* aProfile, michael@0: UnwinderThreadBuffer* buff, michael@0: void* /* ucontext_t*, really */ ucV ) michael@0: { michael@0: fill_buffer(aProfile, buff, ucV); michael@0: /* And now relinquish ownership of the buff, so that an unwinder michael@0: thread can pick it up. */ michael@0: spinLock_acquire(&g_spinLock); michael@0: buff->state = S_FULL; michael@0: spinLock_release(&g_spinLock); michael@0: } michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: // Allocate a ProfEntsPage, without using malloc, or return michael@0: // ProfEntsPage_INVALID if we can't for some reason. michael@0: static ProfEntsPage* mmap_anon_ProfEntsPage() michael@0: { michael@0: # if defined(SPS_OS_darwin) michael@0: void* v = ::mmap(nullptr, sizeof(ProfEntsPage), PROT_READ | PROT_WRITE, michael@0: MAP_PRIVATE | MAP_ANON, -1, 0); michael@0: # else michael@0: void* v = ::mmap(nullptr, sizeof(ProfEntsPage), PROT_READ | PROT_WRITE, michael@0: MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); michael@0: # endif michael@0: if (v == MAP_FAILED) { michael@0: return ProfEntsPage_INVALID; michael@0: } else { michael@0: return (ProfEntsPage*)v; michael@0: } michael@0: } michael@0: michael@0: // Runs in the unwinder thread michael@0: // Free a ProfEntsPage as allocated by mmap_anon_ProfEntsPage michael@0: static void munmap_ProfEntsPage(ProfEntsPage* pep) michael@0: { michael@0: MOZ_ALWAYS_TRUE(is_page_aligned(pep)); michael@0: ::munmap(pep, sizeof(ProfEntsPage)); michael@0: } michael@0: michael@0: michael@0: // RUNS IN SIGHANDLER CONTEXT michael@0: void michael@0: utb_add_prof_ent(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent) michael@0: { michael@0: uintptr_t limit michael@0: = N_FIXED_PROF_ENTS + (N_PROF_ENTS_PER_PAGE * N_PROF_ENT_PAGES); michael@0: if (utb->entsUsed == limit) { michael@0: /* We're full. Now what? */ michael@0: LOG("BPUnw: utb__addEntry: NO SPACE for ProfileEntry; ignoring."); michael@0: return; michael@0: } michael@0: MOZ_ASSERT(utb->entsUsed < limit); michael@0: michael@0: /* Will it fit in the fixed array? */ michael@0: if (utb->entsUsed < N_FIXED_PROF_ENTS) { michael@0: utb->entsFixed[utb->entsUsed] = ent; michael@0: utb->entsUsed++; michael@0: return; michael@0: } michael@0: michael@0: /* No. Put it in the extras. */ michael@0: uintptr_t i = utb->entsUsed; michael@0: uintptr_t j = i - N_FIXED_PROF_ENTS; michael@0: uintptr_t j_div = j / N_PROF_ENTS_PER_PAGE; /* page number */ michael@0: uintptr_t j_mod = j % N_PROF_ENTS_PER_PAGE; /* page offset */ michael@0: ProfEntsPage* pep = utb->entsPages[j_div]; michael@0: if (pep == ProfEntsPage_INVALID) { michael@0: pep = mmap_anon_ProfEntsPage(); michael@0: if (pep == ProfEntsPage_INVALID) { michael@0: /* Urr, we ran out of memory. Now what? */ michael@0: LOG("BPUnw: utb__addEntry: MMAP FAILED for ProfileEntry; ignoring."); michael@0: return; michael@0: } michael@0: utb->entsPages[j_div] = pep; michael@0: } michael@0: pep->ents[j_mod] = ent; michael@0: utb->entsUsed++; michael@0: } michael@0: michael@0: michael@0: // misc helper michael@0: static ProfileEntry utb_get_profent(UnwinderThreadBuffer* buff, uintptr_t i) michael@0: { michael@0: MOZ_ASSERT(i < buff->entsUsed); michael@0: if (i < N_FIXED_PROF_ENTS) { michael@0: return buff->entsFixed[i]; michael@0: } else { michael@0: uintptr_t j = i - N_FIXED_PROF_ENTS; michael@0: uintptr_t j_div = j / N_PROF_ENTS_PER_PAGE; /* page number */ michael@0: uintptr_t j_mod = j % N_PROF_ENTS_PER_PAGE; /* page offset */ michael@0: MOZ_ASSERT(buff->entsPages[j_div] != ProfEntsPage_INVALID); michael@0: return buff->entsPages[j_div]->ents[j_mod]; michael@0: } michael@0: } michael@0: michael@0: /* Copy ProfileEntries presented to us by the sampling thread. michael@0: Most of them are copied verbatim into |buff->aProfile|, michael@0: except for 'hint' tags, which direct us to do something michael@0: different. */ michael@0: static void process_buffer(UnwinderThreadBuffer* buff, int oldest_ix) michael@0: { michael@0: /* Need to lock |aProfile| so nobody tries to copy out entries michael@0: whilst we are putting them in. */ michael@0: buff->aProfile->BeginUnwind(); michael@0: michael@0: /* The buff is a sequence of ProfileEntries (ents). It has michael@0: this grammar: michael@0: michael@0: | --pre-tags-- | (h 'P' .. h 'Q')* | --post-tags-- | michael@0: ^ ^ michael@0: ix_first_hP ix_last_hQ michael@0: michael@0: Each (h 'P' .. h 'Q') subsequence represents one pseudostack michael@0: entry. These, if present, are in the order michael@0: outermost-frame-first, and that is the order that they should michael@0: be copied into aProfile. The --pre-tags-- and --post-tags-- michael@0: are to be copied into the aProfile verbatim, except that they michael@0: may contain the hints "h 'F'" for a flush and "h 'N'" to michael@0: indicate that a native unwind is also required, and must be michael@0: interleaved with the pseudostack entries. michael@0: michael@0: The hint tags that bound each pseudostack entry, "h 'P'" and "h michael@0: 'Q'", are not to be copied into the aProfile -- they are michael@0: present only to make parsing easy here. Also, the pseudostack michael@0: entries may contain an "'S' (void*)" entry, which is the stack michael@0: pointer value for that entry, and these are also not to be michael@0: copied. michael@0: */ michael@0: /* The first thing to do is therefore to find the pseudostack michael@0: entries, if any, and to find out also whether a native unwind michael@0: has been requested. */ michael@0: const uintptr_t infUW = ~(uintptr_t)0; // infinity michael@0: bool need_native_unw = false; michael@0: uintptr_t ix_first_hP = infUW; // "not found" michael@0: uintptr_t ix_last_hQ = infUW; // "not found" michael@0: michael@0: uintptr_t k; michael@0: for (k = 0; k < buff->entsUsed; k++) { michael@0: ProfileEntry ent = utb_get_profent(buff, k); michael@0: if (ent.is_ent_hint('N')) { michael@0: need_native_unw = true; michael@0: } michael@0: else if (ent.is_ent_hint('P') && ix_first_hP == ~(uintptr_t)0) { michael@0: ix_first_hP = k; michael@0: } michael@0: else if (ent.is_ent_hint('Q')) { michael@0: ix_last_hQ = k; michael@0: } michael@0: } michael@0: michael@0: if (0) LOGF("BPUnw: ix_first_hP %llu ix_last_hQ %llu need_native_unw %llu", michael@0: (unsigned long long int)ix_first_hP, michael@0: (unsigned long long int)ix_last_hQ, michael@0: (unsigned long long int)need_native_unw); michael@0: michael@0: /* There are four possibilities: native-only, pseudostack-only, michael@0: combined (both), and neither. We handle all four cases. */ michael@0: michael@0: MOZ_ASSERT( (ix_first_hP == infUW && ix_last_hQ == infUW) || michael@0: (ix_first_hP != infUW && ix_last_hQ != infUW) ); michael@0: bool have_P = ix_first_hP != infUW; michael@0: if (have_P) { michael@0: MOZ_ASSERT(ix_first_hP < ix_last_hQ); michael@0: MOZ_ASSERT(ix_last_hQ <= buff->entsUsed); michael@0: } michael@0: michael@0: /* Neither N nor P. This is very unusual but has been observed to happen. michael@0: Just copy to the output. */ michael@0: if (!need_native_unw && !have_P) { michael@0: for (k = 0; k < buff->entsUsed; k++) { michael@0: ProfileEntry ent = utb_get_profent(buff, k); michael@0: // action flush-hints michael@0: if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } michael@0: // skip ones we can't copy michael@0: if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } michael@0: // handle GetBacktrace() michael@0: if (ent.is_ent('B')) { michael@0: UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); michael@0: process_buffer(buff, -1); michael@0: continue; michael@0: } michael@0: // and copy everything else michael@0: buff->aProfile->addTag( ent ); michael@0: } michael@0: } michael@0: else /* Native only-case. */ michael@0: if (need_native_unw && !have_P) { michael@0: for (k = 0; k < buff->entsUsed; k++) { michael@0: ProfileEntry ent = utb_get_profent(buff, k); michael@0: // action a native-unwind-now hint michael@0: if (ent.is_ent_hint('N')) { michael@0: MOZ_ASSERT(buff->haveNativeInfo); michael@0: PCandSP* pairs = nullptr; michael@0: unsigned int nPairs = 0; michael@0: do_lul_unwind_Buffer(&pairs, &nPairs, buff, oldest_ix); michael@0: buff->aProfile->addTag( ProfileEntry('s', "(root)") ); michael@0: for (unsigned int i = 0; i < nPairs; i++) { michael@0: /* Skip any outermost frames that michael@0: do_lul_unwind_Buffer didn't give us. See comments michael@0: on that function for details. */ michael@0: if (pairs[i].pc == 0 && pairs[i].sp == 0) michael@0: continue; michael@0: buff->aProfile michael@0: ->addTag( ProfileEntry('l', reinterpret_cast(pairs[i].pc)) ); michael@0: } michael@0: if (pairs) michael@0: free(pairs); michael@0: continue; michael@0: } michael@0: // action flush-hints michael@0: if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } michael@0: // skip ones we can't copy michael@0: if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } michael@0: // handle GetBacktrace() michael@0: if (ent.is_ent('B')) { michael@0: UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); michael@0: process_buffer(buff, -1); michael@0: continue; michael@0: } michael@0: // and copy everything else michael@0: buff->aProfile->addTag( ent ); michael@0: } michael@0: } michael@0: else /* Pseudostack-only case */ michael@0: if (!need_native_unw && have_P) { michael@0: /* If there's no request for a native stack, it's easy: just michael@0: copy the tags verbatim into aProfile, skipping the ones that michael@0: can't be copied -- 'h' (hint) tags, and "'S' (void*)" michael@0: stack-pointer tags. Except, insert a sample-start tag when michael@0: we see the start of the first pseudostack frame. */ michael@0: for (k = 0; k < buff->entsUsed; k++) { michael@0: ProfileEntry ent = utb_get_profent(buff, k); michael@0: // We need to insert a sample-start tag before the first frame michael@0: if (k == ix_first_hP) { michael@0: buff->aProfile->addTag( ProfileEntry('s', "(root)") ); michael@0: } michael@0: // action flush-hints michael@0: if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } michael@0: // skip ones we can't copy michael@0: if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } michael@0: // handle GetBacktrace() michael@0: if (ent.is_ent('B')) { michael@0: UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); michael@0: process_buffer(buff, -1); michael@0: continue; michael@0: } michael@0: // and copy everything else michael@0: buff->aProfile->addTag( ent ); michael@0: } michael@0: } michael@0: else /* Combined case */ michael@0: if (need_native_unw && have_P) michael@0: { michael@0: /* We need to get a native stacktrace and merge it with the michael@0: pseudostack entries. This isn't too simple. First, copy all michael@0: the tags up to the start of the pseudostack tags. Then michael@0: generate a combined set of tags by native unwind and michael@0: pseudostack. Then, copy all the stuff after the pseudostack michael@0: tags. */ michael@0: MOZ_ASSERT(buff->haveNativeInfo); michael@0: michael@0: // Get native unwind info michael@0: PCandSP* pairs = nullptr; michael@0: unsigned int n_pairs = 0; michael@0: do_lul_unwind_Buffer(&pairs, &n_pairs, buff, oldest_ix); michael@0: michael@0: // Entries before the pseudostack frames michael@0: for (k = 0; k < ix_first_hP; k++) { michael@0: ProfileEntry ent = utb_get_profent(buff, k); michael@0: // action flush-hints michael@0: if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } michael@0: // skip ones we can't copy michael@0: if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } michael@0: // handle GetBacktrace() michael@0: if (ent.is_ent('B')) { michael@0: UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); michael@0: process_buffer(buff, -1); michael@0: continue; michael@0: } michael@0: // and copy everything else michael@0: buff->aProfile->addTag( ent ); michael@0: } michael@0: michael@0: // BEGIN merge michael@0: buff->aProfile->addTag( ProfileEntry('s', "(root)") ); michael@0: unsigned int next_N = 0; // index in pairs[] michael@0: unsigned int next_P = ix_first_hP; // index in buff profent array michael@0: bool last_was_P = false; michael@0: if (0) LOGF("at mergeloop: n_pairs %llu ix_last_hQ %llu", michael@0: (unsigned long long int)n_pairs, michael@0: (unsigned long long int)ix_last_hQ); michael@0: /* Skip any outermost frames that do_lul_unwind_Buffer michael@0: didn't give us. See comments on that function for michael@0: details. */ michael@0: while (next_N < n_pairs && pairs[next_N].pc == 0 && pairs[next_N].sp == 0) michael@0: next_N++; michael@0: michael@0: while (true) { michael@0: if (next_P <= ix_last_hQ) { michael@0: // Assert that next_P points at the start of an P entry michael@0: MOZ_ASSERT(utb_get_profent(buff, next_P).is_ent_hint('P')); michael@0: } michael@0: if (next_N >= n_pairs && next_P > ix_last_hQ) { michael@0: // both stacks empty michael@0: break; michael@0: } michael@0: /* Decide which entry to use next: michael@0: If N is empty, must use P, and vice versa michael@0: else michael@0: If the last was P and current P has zero SP, use P michael@0: else michael@0: we assume that both P and N have valid SP, in which case michael@0: use the one with the larger value michael@0: */ michael@0: bool use_P = true; michael@0: if (next_N >= n_pairs) { michael@0: // N empty, use P michael@0: use_P = true; michael@0: if (0) LOG(" P <= no remaining N entries"); michael@0: } michael@0: else if (next_P > ix_last_hQ) { michael@0: // P empty, use N michael@0: use_P = false; michael@0: if (0) LOG(" N <= no remaining P entries"); michael@0: } michael@0: else { michael@0: // We have at least one N and one P entry available. michael@0: // Scan forwards to find the SP of the current P entry michael@0: u_int64_t sp_cur_P = 0; michael@0: unsigned int m = next_P + 1; michael@0: while (1) { michael@0: /* This assertion should hold because in a well formed michael@0: input, we must eventually find the hint-Q that marks michael@0: the end of this frame's entries. */ michael@0: MOZ_ASSERT(m < buff->entsUsed); michael@0: ProfileEntry ent = utb_get_profent(buff, m); michael@0: if (ent.is_ent_hint('Q')) michael@0: break; michael@0: if (ent.is_ent('S')) { michael@0: sp_cur_P = reinterpret_cast(ent.get_tagPtr()); michael@0: break; michael@0: } michael@0: m++; michael@0: } michael@0: if (last_was_P && sp_cur_P == 0) { michael@0: if (0) LOG(" P <= last_was_P && sp_cur_P == 0"); michael@0: use_P = true; michael@0: } else { michael@0: u_int64_t sp_cur_N = pairs[next_N].sp; michael@0: use_P = (sp_cur_P > sp_cur_N); michael@0: if (0) LOGF(" %s <= sps P %p N %p", michael@0: use_P ? "P" : "N", (void*)(intptr_t)sp_cur_P, michael@0: (void*)(intptr_t)sp_cur_N); michael@0: } michael@0: } michael@0: /* So, we know which we are going to use. */ michael@0: if (use_P) { michael@0: unsigned int m = next_P + 1; michael@0: while (true) { michael@0: MOZ_ASSERT(m < buff->entsUsed); michael@0: ProfileEntry ent = utb_get_profent(buff, m); michael@0: if (ent.is_ent_hint('Q')) { michael@0: next_P = m + 1; michael@0: break; michael@0: } michael@0: // we don't expect a flush-hint here michael@0: MOZ_ASSERT(!ent.is_ent_hint('F')); michael@0: // skip ones we can't copy michael@0: if (ent.is_ent_hint() || ent.is_ent('S')) { m++; continue; } michael@0: // and copy everything else michael@0: buff->aProfile->addTag( ent ); michael@0: m++; michael@0: } michael@0: } else { michael@0: buff->aProfile michael@0: ->addTag( ProfileEntry('l', reinterpret_cast(pairs[next_N].pc)) ); michael@0: next_N++; michael@0: } michael@0: /* Remember what we chose, for next time. */ michael@0: last_was_P = use_P; michael@0: } michael@0: michael@0: MOZ_ASSERT(next_P == ix_last_hQ + 1); michael@0: MOZ_ASSERT(next_N == n_pairs); michael@0: // END merge michael@0: michael@0: // Entries after the pseudostack frames michael@0: for (k = ix_last_hQ+1; k < buff->entsUsed; k++) { michael@0: ProfileEntry ent = utb_get_profent(buff, k); michael@0: // action flush-hints michael@0: if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } michael@0: // skip ones we can't copy michael@0: if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } michael@0: // and copy everything else michael@0: buff->aProfile->addTag( ent ); michael@0: } michael@0: michael@0: // free native unwind info michael@0: if (pairs) michael@0: free(pairs); michael@0: } michael@0: michael@0: #if 0 michael@0: bool show = true; michael@0: if (show) LOG("----------------"); michael@0: for (k = 0; k < buff->entsUsed; k++) { michael@0: ProfileEntry ent = utb_get_profent(buff, k); michael@0: if (show) ent.log(); michael@0: if (ent.is_ent_hint('F')) { michael@0: /* This is a flush-hint */ michael@0: buff->aProfile->flush(); michael@0: } michael@0: else if (ent.is_ent_hint('N')) { michael@0: /* This is a do-a-native-unwind-right-now hint */ michael@0: MOZ_ASSERT(buff->haveNativeInfo); michael@0: PCandSP* pairs = nullptr; michael@0: unsigned int nPairs = 0; michael@0: do_lul_unwind_Buffer(&pairs, &nPairs, buff, oldest_ix); michael@0: buff->aProfile->addTag( ProfileEntry('s', "(root)") ); michael@0: for (unsigned int i = 0; i < nPairs; i++) { michael@0: buff->aProfile michael@0: ->addTag( ProfileEntry('l', reinterpret_cast(pairs[i].pc)) ); michael@0: } michael@0: if (pairs) michael@0: free(pairs); michael@0: } else { michael@0: /* Copy in verbatim */ michael@0: buff->aProfile->addTag( ent ); michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: buff->aProfile->EndUnwind(); michael@0: } michael@0: michael@0: michael@0: // Find out, in a platform-dependent way, where the code modules got michael@0: // mapped in the process' virtual address space, and get |aLUL| to michael@0: // load unwind info for them. michael@0: void michael@0: read_procmaps(lul::LUL* aLUL) michael@0: { michael@0: MOZ_ASSERT(aLUL->CountMappings() == 0); michael@0: michael@0: # if defined(SPS_OS_linux) || defined(SPS_OS_android) || defined(SPS_OS_darwin) michael@0: SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); michael@0: michael@0: for (size_t i = 0; i < info.GetSize(); i++) { michael@0: const SharedLibrary& lib = info.GetEntry(i); michael@0: michael@0: #if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) michael@0: // We're using faulty.lib. Use a special-case object mapper. michael@0: AutoObjectMapperFaultyLib mapper(aLUL->mLog); michael@0: #else michael@0: // We can use the standard POSIX-based mapper. michael@0: AutoObjectMapperPOSIX mapper(aLUL->mLog); michael@0: #endif michael@0: michael@0: // Ask |mapper| to map the object. Then hand its mapped address michael@0: // to NotifyAfterMap(). michael@0: void* image = nullptr; michael@0: size_t size = 0; michael@0: bool ok = mapper.Map(&image, &size, lib.GetName()); michael@0: if (ok && image && size > 0) { michael@0: aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd()-lib.GetStart(), michael@0: lib.GetName().c_str(), image); michael@0: } else if (!ok && lib.GetName() == "") { michael@0: // The object has no name and (as a consequence) the mapper michael@0: // failed to map it. This happens on Linux, where michael@0: // GetInfoForSelf() produces two such mappings: one for the michael@0: // executable and one for the VDSO. The executable one isn't a michael@0: // big deal since there's not much interesting code in there, michael@0: // but the VDSO one is a problem on x86-{linux,android} because michael@0: // lack of knowledge about the mapped area inhibits LUL's michael@0: // special __kernel_syscall handling. Hence notify |aLUL| at michael@0: // least of the mapping, even though it can't read any unwind michael@0: // information for the area. michael@0: aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd()-lib.GetStart()); michael@0: } michael@0: michael@0: // |mapper| goes out of scope at this point and so its destructor michael@0: // unmaps the object. michael@0: } michael@0: michael@0: # else michael@0: # error "Unknown platform" michael@0: # endif michael@0: } michael@0: michael@0: // LUL needs a callback for its logging sink. michael@0: static void michael@0: logging_sink_for_LUL(const char* str) { michael@0: // Ignore any trailing \n, since LOG will add one anyway. michael@0: size_t n = strlen(str); michael@0: if (n > 0 && str[n-1] == '\n') { michael@0: char* tmp = strdup(str); michael@0: tmp[n-1] = 0; michael@0: LOG(tmp); michael@0: free(tmp); michael@0: } else { michael@0: LOG(str); michael@0: } michael@0: } michael@0: michael@0: // Runs in the unwinder thread -- well, this _is_ the unwinder thread. michael@0: static void* unwind_thr_fn(void* exit_nowV) michael@0: { michael@0: // This is the unwinder thread function. The first thread in must michael@0: // create the unwinder library and request it to read the debug michael@0: // info. The last thread out must deallocate the library. These michael@0: // three tasks (create library, read debuginfo, destroy library) are michael@0: // sequentialised by |sLULmutex|. |sLUL| and |sLULcount| may only michael@0: // be modified whilst |sLULmutex| is held. michael@0: // michael@0: // Once the threads are up and running, |sLUL| (the pointer itself, michael@0: // that is) stays constant, and the multiple threads may make michael@0: // concurrent calls into |sLUL| to do concurrent unwinding. michael@0: LOG("unwind_thr_fn: START"); michael@0: michael@0: // A hook for testing LUL: at the first entrance here, check env var michael@0: // MOZ_PROFILER_LUL_TEST, and if set, run tests on LUL. Note that michael@0: // it is preferable to run the LUL tests via gtest, but gtest is not michael@0: // currently supported on all targets that LUL runs on. Hence the michael@0: // auxiliary mechanism here is also needed. michael@0: bool doLulTest = false; michael@0: michael@0: mozilla::DebugOnly r = pthread_mutex_lock(&sLULmutex); michael@0: MOZ_ASSERT(!r); michael@0: michael@0: if (!sLUL) { michael@0: // sLUL hasn't been allocated, so we must be the first thread in. michael@0: sLUL = new lul::LUL(logging_sink_for_LUL); michael@0: MOZ_ASSERT(sLUL); michael@0: MOZ_ASSERT(sLULcount == 0); michael@0: // Register this thread so it can read unwind info and do unwinding. michael@0: sLUL->RegisterUnwinderThread(); michael@0: // Read all the unwind info currently available. michael@0: read_procmaps(sLUL); michael@0: // Has a test been requested? michael@0: if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) { michael@0: doLulTest = true; michael@0: } michael@0: } else { michael@0: // sLUL has already been allocated, so we can't be the first michael@0: // thread in. michael@0: MOZ_ASSERT(sLULcount > 0); michael@0: // Register this thread so it can do unwinding. michael@0: sLUL->RegisterUnwinderThread(); michael@0: } michael@0: michael@0: sLULcount++; michael@0: michael@0: r = pthread_mutex_unlock(&sLULmutex); michael@0: MOZ_ASSERT(!r); michael@0: michael@0: // If a test has been requested for LUL, run it. Summary results michael@0: // are sent to sLUL's logging sink. Note that this happens after michael@0: // read_procmaps has read unwind information into sLUL, so that the michael@0: // tests have something to unwind against. Without that they'd be michael@0: // pretty meaningless. michael@0: if (doLulTest) { michael@0: int nTests = 0, nTestsPassed = 0; michael@0: RunLulUnitTests(&nTests, &nTestsPassed, sLUL); michael@0: } michael@0: michael@0: // At this point, sLUL -- the single instance of the library -- is michael@0: // allocated and has read the required unwind info. All running michael@0: // threads can now make Unwind() requests of it concurrently, if michael@0: // they wish. michael@0: michael@0: // Now go on to allocate the array of buffers used for communication michael@0: // between the sampling threads and the unwinder threads. michael@0: michael@0: // If we're the first thread in, we'll need to allocate the buffer michael@0: // array g_buffers plus the Buffer structs that it points at. */ michael@0: spinLock_acquire(&g_spinLock); michael@0: if (g_buffers == nullptr) { michael@0: // Drop the lock, make a complete copy in memory, reacquire the michael@0: // lock, and try to install it -- which might fail, if someone michael@0: // else beat us to it. */ michael@0: spinLock_release(&g_spinLock); michael@0: UnwinderThreadBuffer** buffers michael@0: = (UnwinderThreadBuffer**)malloc(N_UNW_THR_BUFFERS michael@0: * sizeof(UnwinderThreadBuffer*)); michael@0: MOZ_ASSERT(buffers); michael@0: int i; michael@0: for (i = 0; i < N_UNW_THR_BUFFERS; i++) { michael@0: /* These calloc-ations are shared between the sampling and michael@0: unwinding threads. They must be free after all such threads michael@0: have terminated. */ michael@0: buffers[i] = (UnwinderThreadBuffer*) michael@0: calloc(sizeof(UnwinderThreadBuffer), 1); michael@0: MOZ_ASSERT(buffers[i]); michael@0: buffers[i]->state = S_EMPTY; michael@0: } michael@0: /* Try to install it */ michael@0: spinLock_acquire(&g_spinLock); michael@0: if (g_buffers == nullptr) { michael@0: g_buffers = buffers; michael@0: spinLock_release(&g_spinLock); michael@0: } else { michael@0: /* Someone else beat us to it. Release what we just allocated michael@0: so as to avoid a leak. */ michael@0: spinLock_release(&g_spinLock); michael@0: for (i = 0; i < N_UNW_THR_BUFFERS; i++) { michael@0: free(buffers[i]); michael@0: } michael@0: free(buffers); michael@0: } michael@0: } else { michael@0: /* They are already allocated, so just drop the lock and continue. */ michael@0: spinLock_release(&g_spinLock); michael@0: } michael@0: michael@0: /* michael@0: while (1) { michael@0: acq lock michael@0: scan to find oldest full michael@0: if none { rel lock; sleep; continue } michael@0: set buff state to emptying michael@0: rel lock michael@0: acq MLock // implicitly michael@0: process buffer michael@0: rel MLock // implicitly michael@0: acq lock michael@0: set buff state to S_EMPTY michael@0: rel lock michael@0: } michael@0: */ michael@0: int* exit_now = (int*)exit_nowV; michael@0: int ms_to_sleep_if_empty = 1; michael@0: michael@0: const int longest_sleep_ms = 1000; michael@0: bool show_sleep_message = true; michael@0: michael@0: while (1) { michael@0: michael@0: if (*exit_now != 0) { michael@0: *exit_now = 0; michael@0: break; michael@0: } michael@0: michael@0: spinLock_acquire(&g_spinLock); michael@0: michael@0: /* Find the oldest filled buffer, if any. */ michael@0: uint64_t oldest_seqNo = ~0ULL; /* infinity */ michael@0: int oldest_ix = -1; michael@0: int i; michael@0: for (i = 0; i < N_UNW_THR_BUFFERS; i++) { michael@0: UnwinderThreadBuffer* buff = g_buffers[i]; michael@0: if (buff->state != S_FULL) continue; michael@0: if (buff->seqNo < oldest_seqNo) { michael@0: oldest_seqNo = buff->seqNo; michael@0: oldest_ix = i; michael@0: } michael@0: } michael@0: if (oldest_ix == -1) { michael@0: /* We didn't find a full buffer. Snooze and try again later. */ michael@0: MOZ_ASSERT(oldest_seqNo == ~0ULL); michael@0: spinLock_release(&g_spinLock); michael@0: if (ms_to_sleep_if_empty > 100 && LOGLEVEL >= 2) { michael@0: if (show_sleep_message) michael@0: LOGF("BPUnw: unwinder: sleep for %d ms", ms_to_sleep_if_empty); michael@0: /* If we've already shown the message for the longest sleep, michael@0: don't show it again, until the next round of sleeping michael@0: starts. */ michael@0: if (ms_to_sleep_if_empty == longest_sleep_ms) michael@0: show_sleep_message = false; michael@0: } michael@0: sleep_ms(ms_to_sleep_if_empty); michael@0: if (ms_to_sleep_if_empty < 20) { michael@0: ms_to_sleep_if_empty += 2; michael@0: } else { michael@0: ms_to_sleep_if_empty = (15 * ms_to_sleep_if_empty) / 10; michael@0: if (ms_to_sleep_if_empty > longest_sleep_ms) michael@0: ms_to_sleep_if_empty = longest_sleep_ms; michael@0: } michael@0: continue; michael@0: } michael@0: michael@0: /* We found a full a buffer. Mark it as 'ours' and drop the michael@0: lock; then we can safely throw breakpad at it. */ michael@0: UnwinderThreadBuffer* buff = g_buffers[oldest_ix]; michael@0: MOZ_ASSERT(buff->state == S_FULL); michael@0: buff->state = S_EMPTYING; michael@0: spinLock_release(&g_spinLock); michael@0: michael@0: /* unwind .. in which we can do anything we like, since any michael@0: resource stalls that we may encounter (eg malloc locks) in michael@0: competition with signal handler instances, will be short michael@0: lived since the signal handler is guaranteed nonblocking. */ michael@0: if (0) LOGF("BPUnw: unwinder: seqNo %llu: emptying buf %d\n", michael@0: (unsigned long long int)oldest_seqNo, oldest_ix); michael@0: michael@0: process_buffer(buff, oldest_ix); michael@0: michael@0: /* And .. we're done. Mark the buffer as empty so it can be michael@0: reused. First though, unmap any of the entsPages that got michael@0: mapped during filling. */ michael@0: for (i = 0; i < N_PROF_ENT_PAGES; i++) { michael@0: if (buff->entsPages[i] == ProfEntsPage_INVALID) michael@0: continue; michael@0: munmap_ProfEntsPage(buff->entsPages[i]); michael@0: buff->entsPages[i] = ProfEntsPage_INVALID; michael@0: } michael@0: michael@0: (void)VALGRIND_MAKE_MEM_UNDEFINED(&buff->stackImg.mContents[0], michael@0: lul::N_STACK_BYTES); michael@0: spinLock_acquire(&g_spinLock); michael@0: MOZ_ASSERT(buff->state == S_EMPTYING); michael@0: buff->state = S_EMPTY; michael@0: spinLock_release(&g_spinLock); michael@0: ms_to_sleep_if_empty = 1; michael@0: show_sleep_message = true; michael@0: } michael@0: michael@0: // This unwinder thread is exiting. If it's the last one out, michael@0: // shut down and deallocate the unwinder library. michael@0: r = pthread_mutex_lock(&sLULmutex); michael@0: MOZ_ASSERT(!r); michael@0: michael@0: MOZ_ASSERT(sLULcount > 0); michael@0: if (sLULcount == 1) { michael@0: // Tell the library to discard unwind info for the entire address michael@0: // space. michael@0: sLUL->NotifyBeforeUnmapAll(); michael@0: michael@0: delete sLUL; michael@0: sLUL = nullptr; michael@0: } michael@0: michael@0: sLULcount--; michael@0: michael@0: r = pthread_mutex_unlock(&sLULmutex); michael@0: MOZ_ASSERT(!r); michael@0: michael@0: LOG("unwind_thr_fn: STOP"); michael@0: return nullptr; michael@0: } michael@0: michael@0: static void finish_sync_buffer(ThreadProfile* profile, michael@0: UnwinderThreadBuffer* buff, michael@0: void* /* ucontext_t*, really */ ucV) michael@0: { michael@0: SyncProfile* syncProfile = profile->AsSyncProfile(); michael@0: MOZ_ASSERT(syncProfile); michael@0: SyncUnwinderThreadBuffer* utb = static_cast( michael@0: syncProfile->GetUWTBuffer()); michael@0: fill_buffer(profile, utb->GetBuffer(), ucV); michael@0: utb->GetBuffer()->state = S_FULL; michael@0: PseudoStack* stack = profile->GetPseudoStack(); michael@0: stack->addLinkedUWTBuffer(utb); michael@0: } michael@0: michael@0: static void release_sync_buffer(LinkedUWTBuffer* buff) michael@0: { michael@0: SyncUnwinderThreadBuffer* data = static_cast(buff); michael@0: MOZ_ASSERT(data->GetBuffer()->state == S_EMPTY); michael@0: delete data; michael@0: } michael@0: michael@0: //////////////////////////////////////////////////////////////// michael@0: //////////////////////////////////////////////////////////////// michael@0: //////////////////////////////////////////////////////////////// michael@0: //////////////////////////////////////////////////////////////// michael@0: //////////////////////////////////////////////////////////////// michael@0: //////////////////////////////////////////////////////////////// michael@0: michael@0: // Keeps count of how frames are recovered, which is useful for michael@0: // diagnostic purposes. michael@0: static void stats_notify_frame(int n_context, int n_cfi, int n_scanned) michael@0: { michael@0: // Gather stats in intervals. michael@0: static unsigned int nf_total = 0; // total frames since last printout michael@0: static unsigned int nf_CONTEXT = 0; michael@0: static unsigned int nf_CFI = 0; michael@0: static unsigned int nf_SCANNED = 0; michael@0: michael@0: nf_CONTEXT += n_context; michael@0: nf_CFI += n_cfi; michael@0: nf_SCANNED += n_scanned; michael@0: nf_total += (n_context + n_cfi + n_scanned); michael@0: michael@0: if (nf_total >= 5000) { michael@0: LOGF("BPUnw frame stats: TOTAL %5u" michael@0: " CTX %4u CFI %4u SCAN %4u", michael@0: nf_total, nf_CONTEXT, nf_CFI, nf_SCANNED); michael@0: nf_total = 0; michael@0: nf_CONTEXT = 0; michael@0: nf_CFI = 0; michael@0: nf_SCANNED = 0; michael@0: } michael@0: } michael@0: michael@0: static michael@0: void do_lul_unwind_Buffer(/*OUT*/PCandSP** pairs, michael@0: /*OUT*/unsigned int* nPairs, michael@0: UnwinderThreadBuffer* buff, michael@0: int buffNo /* for debug printing only */) michael@0: { michael@0: # if defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86) michael@0: lul::UnwindRegs startRegs = buff->startRegs; michael@0: if (0) { michael@0: LOGF("Initial RIP = 0x%llx", (unsigned long long int)startRegs.xip.Value()); michael@0: LOGF("Initial RSP = 0x%llx", (unsigned long long int)startRegs.xsp.Value()); michael@0: LOGF("Initial RBP = 0x%llx", (unsigned long long int)startRegs.xbp.Value()); michael@0: } michael@0: michael@0: # elif defined(SPS_ARCH_arm) michael@0: lul::UnwindRegs startRegs = buff->startRegs; michael@0: if (0) { michael@0: LOGF("Initial R15 = 0x%llx", (unsigned long long int)startRegs.r15.Value()); michael@0: LOGF("Initial R13 = 0x%llx", (unsigned long long int)startRegs.r13.Value()); michael@0: } michael@0: michael@0: # else michael@0: # error "Unknown plat" michael@0: # endif michael@0: michael@0: // FIXME: should we reinstate the ability to use separate debug objects? michael@0: // /* Make up a list of places where the debug objects might be. */ michael@0: // std::vector debug_dirs; michael@0: # if defined(SPS_OS_linux) michael@0: // debug_dirs.push_back("/usr/lib/debug/lib"); michael@0: // debug_dirs.push_back("/usr/lib/debug/usr/lib"); michael@0: // debug_dirs.push_back("/usr/lib/debug/lib/x86_64-linux-gnu"); michael@0: // debug_dirs.push_back("/usr/lib/debug/usr/lib/x86_64-linux-gnu"); michael@0: # elif defined(SPS_OS_android) michael@0: // debug_dirs.push_back("/sdcard/symbols/system/lib"); michael@0: // debug_dirs.push_back("/sdcard/symbols/system/bin"); michael@0: # elif defined(SPS_OS_darwin) michael@0: // /* Nothing */ michael@0: # else michael@0: # error "Unknown plat" michael@0: # endif michael@0: michael@0: // Set the max number of scanned or otherwise dubious frames michael@0: // to the user specified limit michael@0: size_t scannedFramesAllowed michael@0: = std::min(std::max(0, sUnwindStackScan), MAX_NATIVE_FRAMES); michael@0: michael@0: // The max number of frames is MAX_NATIVE_FRAMES, so as to avoid michael@0: // the unwinder wasting a lot of time looping on corrupted stacks. michael@0: uintptr_t framePCs[MAX_NATIVE_FRAMES]; michael@0: uintptr_t frameSPs[MAX_NATIVE_FRAMES]; michael@0: size_t framesAvail = mozilla::ArrayLength(framePCs); michael@0: size_t framesUsed = 0; michael@0: size_t scannedFramesAcquired = 0; michael@0: sLUL->Unwind( &framePCs[0], &frameSPs[0], michael@0: &framesUsed, &scannedFramesAcquired, michael@0: framesAvail, scannedFramesAllowed, michael@0: &startRegs, &buff->stackImg ); michael@0: michael@0: if (LOGLEVEL >= 2) michael@0: stats_notify_frame(/* context */ 1, michael@0: /* cfi */ framesUsed - 1 - scannedFramesAcquired, michael@0: /* scanned */ scannedFramesAcquired); michael@0: michael@0: // PC values are now in framePCs[0 .. framesUsed-1], with [0] being michael@0: // the innermost frame. SP values are likewise in frameSPs[]. michael@0: *pairs = (PCandSP*)calloc(framesUsed, sizeof(PCandSP)); michael@0: *nPairs = framesUsed; michael@0: if (*pairs == nullptr) { michael@0: *nPairs = 0; michael@0: return; michael@0: } michael@0: michael@0: if (framesUsed > 0) { michael@0: for (unsigned int frame_index = 0; michael@0: frame_index < framesUsed; ++frame_index) { michael@0: (*pairs)[framesUsed-1-frame_index].pc = framePCs[frame_index]; michael@0: (*pairs)[framesUsed-1-frame_index].sp = frameSPs[frame_index]; michael@0: } michael@0: } michael@0: michael@0: if (LOGLEVEL >= 3) { michael@0: LOGF("BPUnw: unwinder: seqNo %llu, buf %d: got %u frames", michael@0: (unsigned long long int)buff->seqNo, buffNo, michael@0: (unsigned int)framesUsed); michael@0: } michael@0: michael@0: if (LOGLEVEL >= 2) { michael@0: if (0 == (g_stats_totalSamples % 1000)) michael@0: LOGF("BPUnw: %llu total samples, %llu failed (buffer unavail), " michael@0: "%llu failed (thread unreg'd), ", michael@0: (unsigned long long int)g_stats_totalSamples, michael@0: (unsigned long long int)g_stats_noBuffAvail, michael@0: (unsigned long long int)g_stats_thrUnregd); michael@0: } michael@0: } michael@0: michael@0: #endif /* defined(SPS_OS_windows) */