Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #include <stdio.h> |
michael@0 | 7 | #include <signal.h> |
michael@0 | 8 | #include <string.h> |
michael@0 | 9 | #include <stdlib.h> |
michael@0 | 10 | #include <time.h> |
michael@0 | 11 | |
michael@0 | 12 | #ifdef MOZ_VALGRIND |
michael@0 | 13 | # include <valgrind/helgrind.h> |
michael@0 | 14 | # include <valgrind/memcheck.h> |
michael@0 | 15 | #else |
michael@0 | 16 | # define VALGRIND_HG_MUTEX_LOCK_PRE(_mx,_istry) /* */ |
michael@0 | 17 | # define VALGRIND_HG_MUTEX_LOCK_POST(_mx) /* */ |
michael@0 | 18 | # define VALGRIND_HG_MUTEX_UNLOCK_PRE(_mx) /* */ |
michael@0 | 19 | # define VALGRIND_HG_MUTEX_UNLOCK_POST(_mx) /* */ |
michael@0 | 20 | # define VALGRIND_MAKE_MEM_DEFINED(_addr,_len) ((void)0) |
michael@0 | 21 | # define VALGRIND_MAKE_MEM_UNDEFINED(_addr,_len) ((void)0) |
michael@0 | 22 | #endif |
michael@0 | 23 | |
michael@0 | 24 | #include "prenv.h" |
michael@0 | 25 | #include "mozilla/arm.h" |
michael@0 | 26 | #include "mozilla/DebugOnly.h" |
michael@0 | 27 | #include <stdint.h> |
michael@0 | 28 | #include "PlatformMacros.h" |
michael@0 | 29 | |
michael@0 | 30 | #include "platform.h" |
michael@0 | 31 | #include <ostream> |
michael@0 | 32 | #include <string> |
michael@0 | 33 | |
michael@0 | 34 | #include "ProfileEntry.h" |
michael@0 | 35 | #include "SyncProfile.h" |
michael@0 | 36 | #include "AutoObjectMapper.h" |
michael@0 | 37 | #include "UnwinderThread2.h" |
michael@0 | 38 | |
michael@0 | 39 | #if !defined(SPS_OS_windows) |
michael@0 | 40 | # include <sys/mman.h> |
michael@0 | 41 | #endif |
michael@0 | 42 | |
michael@0 | 43 | #if defined(SPS_OS_android) || defined(SPS_OS_linux) |
michael@0 | 44 | # include <ucontext.h> |
michael@0 | 45 | # include "LulMain.h" |
michael@0 | 46 | #endif |
michael@0 | 47 | |
michael@0 | 48 | #include "shared-libraries.h" |
michael@0 | 49 | |
michael@0 | 50 | |
michael@0 | 51 | // Verbosity of this module, for debugging: |
michael@0 | 52 | // 0 silent |
michael@0 | 53 | // 1 adds info about debuginfo load success/failure |
michael@0 | 54 | // 2 adds slow-summary stats for buffer fills/misses (RECOMMENDED) |
michael@0 | 55 | // 3 adds per-sample summary lines |
michael@0 | 56 | // 4 adds per-sample frame listing |
michael@0 | 57 | // Note that level 3 and above produces risk of deadlock, and |
michael@0 | 58 | // are not recommended for extended use. |
michael@0 | 59 | #define LOGLEVEL 2 |
michael@0 | 60 | |
michael@0 | 61 | // The maximum number of frames that the native unwinder will |
michael@0 | 62 | // produce. Setting it too high gives a risk of it wasting a |
michael@0 | 63 | // lot of time looping on corrupted stacks. |
michael@0 | 64 | #define MAX_NATIVE_FRAMES 256 |
michael@0 | 65 | |
michael@0 | 66 | |
michael@0 | 67 | // The 'else' of this covers the entire rest of the file |
michael@0 | 68 | #if defined(SPS_OS_windows) || defined(SPS_OS_darwin) |
michael@0 | 69 | |
michael@0 | 70 | ////////////////////////////////////////////////////////// |
michael@0 | 71 | //// BEGIN externally visible functions (WINDOWS and OSX STUBS) |
michael@0 | 72 | |
michael@0 | 73 | // On Windows and OSX this will all need reworking. |
michael@0 | 74 | // GeckoProfilerImpl.h will ensure these functions are never actually |
michael@0 | 75 | // called, so just provide no-op stubs for now. |
michael@0 | 76 | |
michael@0 | 77 | void uwt__init() |
michael@0 | 78 | { |
michael@0 | 79 | } |
michael@0 | 80 | |
michael@0 | 81 | void uwt__stop() |
michael@0 | 82 | { |
michael@0 | 83 | } |
michael@0 | 84 | |
michael@0 | 85 | void uwt__deinit() |
michael@0 | 86 | { |
michael@0 | 87 | } |
michael@0 | 88 | |
michael@0 | 89 | void uwt__register_thread_for_profiling ( void* stackTop ) |
michael@0 | 90 | { |
michael@0 | 91 | } |
michael@0 | 92 | |
michael@0 | 93 | void uwt__unregister_thread_for_profiling() |
michael@0 | 94 | { |
michael@0 | 95 | } |
michael@0 | 96 | |
michael@0 | 97 | LinkedUWTBuffer* utb__acquire_sync_buffer(void* stackTop) |
michael@0 | 98 | { |
michael@0 | 99 | return nullptr; |
michael@0 | 100 | } |
michael@0 | 101 | |
michael@0 | 102 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 103 | UnwinderThreadBuffer* uwt__acquire_empty_buffer() |
michael@0 | 104 | { |
michael@0 | 105 | return nullptr; |
michael@0 | 106 | } |
michael@0 | 107 | |
michael@0 | 108 | void |
michael@0 | 109 | utb__finish_sync_buffer(ThreadProfile* aProfile, |
michael@0 | 110 | UnwinderThreadBuffer* utb, |
michael@0 | 111 | void* /* ucontext_t*, really */ ucV) |
michael@0 | 112 | { |
michael@0 | 113 | } |
michael@0 | 114 | |
michael@0 | 115 | void |
michael@0 | 116 | utb__release_sync_buffer(LinkedUWTBuffer* utb) |
michael@0 | 117 | { |
michael@0 | 118 | } |
michael@0 | 119 | |
michael@0 | 120 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 121 | void |
michael@0 | 122 | uwt__release_full_buffer(ThreadProfile* aProfile, |
michael@0 | 123 | UnwinderThreadBuffer* utb, |
michael@0 | 124 | void* /* ucontext_t*, really */ ucV ) |
michael@0 | 125 | { |
michael@0 | 126 | } |
michael@0 | 127 | |
michael@0 | 128 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 129 | void |
michael@0 | 130 | utb__addEntry(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent) |
michael@0 | 131 | { |
michael@0 | 132 | } |
michael@0 | 133 | |
michael@0 | 134 | //// END externally visible functions (WINDOWS and OSX STUBS) |
michael@0 | 135 | ////////////////////////////////////////////////////////// |
michael@0 | 136 | |
michael@0 | 137 | #else // a supported target |
michael@0 | 138 | |
michael@0 | 139 | ////////////////////////////////////////////////////////// |
michael@0 | 140 | //// BEGIN externally visible functions |
michael@0 | 141 | |
michael@0 | 142 | // Forward references |
michael@0 | 143 | // the unwinder thread ID, its fn, and a stop-now flag |
michael@0 | 144 | static void* unwind_thr_fn ( void* exit_nowV ); |
michael@0 | 145 | static pthread_t unwind_thr; |
michael@0 | 146 | static int unwind_thr_exit_now = 0; // RACED ON |
michael@0 | 147 | |
michael@0 | 148 | // Threads must be registered with this file before they can be |
michael@0 | 149 | // sampled. So that we know the max safe stack address for each |
michael@0 | 150 | // registered thread. |
michael@0 | 151 | static void thread_register_for_profiling ( void* stackTop ); |
michael@0 | 152 | |
michael@0 | 153 | // Unregister a thread. |
michael@0 | 154 | static void thread_unregister_for_profiling(); |
michael@0 | 155 | |
michael@0 | 156 | // Empties out the buffer queue. Used when the unwinder thread is |
michael@0 | 157 | // shut down. |
michael@0 | 158 | static void empty_buffer_queue(); |
michael@0 | 159 | |
michael@0 | 160 | // Allocate a buffer for synchronous unwinding |
michael@0 | 161 | static LinkedUWTBuffer* acquire_sync_buffer(void* stackTop); |
michael@0 | 162 | |
michael@0 | 163 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 164 | // Acquire an empty buffer and mark it as FILLING |
michael@0 | 165 | static UnwinderThreadBuffer* acquire_empty_buffer(); |
michael@0 | 166 | |
michael@0 | 167 | static void finish_sync_buffer(ThreadProfile* aProfile, |
michael@0 | 168 | UnwinderThreadBuffer* utb, |
michael@0 | 169 | void* /* ucontext_t*, really */ ucV); |
michael@0 | 170 | |
michael@0 | 171 | // Release an empty synchronous unwind buffer. |
michael@0 | 172 | static void release_sync_buffer(LinkedUWTBuffer* utb); |
michael@0 | 173 | |
michael@0 | 174 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 175 | // Put this buffer in the queue of stuff going to the unwinder |
michael@0 | 176 | // thread, and mark it as FULL. Before doing that, fill in stack |
michael@0 | 177 | // chunk and register fields if a native unwind is requested. |
michael@0 | 178 | // APROFILE is where the profile data should be added to. UTB |
michael@0 | 179 | // is the partially-filled-in buffer, containing ProfileEntries. |
michael@0 | 180 | // UCV is the ucontext_t* from the signal handler. If non-nullptr, |
michael@0 | 181 | // is taken as a cue to request native unwind. |
michael@0 | 182 | static void release_full_buffer(ThreadProfile* aProfile, |
michael@0 | 183 | UnwinderThreadBuffer* utb, |
michael@0 | 184 | void* /* ucontext_t*, really */ ucV ); |
michael@0 | 185 | |
michael@0 | 186 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 187 | static void utb_add_prof_ent(UnwinderThreadBuffer* utb, ProfileEntry ent); |
michael@0 | 188 | |
michael@0 | 189 | // Do a store memory barrier. |
michael@0 | 190 | static void do_MBAR(); |
michael@0 | 191 | |
michael@0 | 192 | |
michael@0 | 193 | // This is the single instance of the LUL unwind library that we will |
michael@0 | 194 | // use. Currently the library is operated with multiple sampling |
michael@0 | 195 | // threads but only one unwinder thread. It should also be possible |
michael@0 | 196 | // to use the library with multiple unwinder threads, to improve |
michael@0 | 197 | // throughput. The setup here makes it possible to use multiple |
michael@0 | 198 | // unwinder threads, although that is as-yet untested. |
michael@0 | 199 | // |
michael@0 | 200 | // |sLULmutex| protects |sLUL| and |sLULcount| and also is used to |
michael@0 | 201 | // ensure that only the first unwinder thread requests |sLUL| to read |
michael@0 | 202 | // debug info. |sLUL| may only be assigned to (and the object it |
michael@0 | 203 | // points at may only be created/destroyed) when |sLULcount| is zero. |
michael@0 | 204 | // |sLULcount| holds the number of unwinder threads currently in |
michael@0 | 205 | // existence. |
michael@0 | 206 | static pthread_mutex_t sLULmutex = PTHREAD_MUTEX_INITIALIZER; |
michael@0 | 207 | static lul::LUL* sLUL = nullptr; |
michael@0 | 208 | static int sLULcount = 0; |
michael@0 | 209 | |
michael@0 | 210 | |
michael@0 | 211 | void uwt__init() |
michael@0 | 212 | { |
michael@0 | 213 | // Create the unwinder thread. |
michael@0 | 214 | MOZ_ASSERT(unwind_thr_exit_now == 0); |
michael@0 | 215 | int r = pthread_create( &unwind_thr, nullptr, |
michael@0 | 216 | unwind_thr_fn, (void*)&unwind_thr_exit_now ); |
michael@0 | 217 | MOZ_ALWAYS_TRUE(r == 0); |
michael@0 | 218 | } |
michael@0 | 219 | |
michael@0 | 220 | void uwt__stop() |
michael@0 | 221 | { |
michael@0 | 222 | // Shut down the unwinder thread. |
michael@0 | 223 | MOZ_ASSERT(unwind_thr_exit_now == 0); |
michael@0 | 224 | unwind_thr_exit_now = 1; |
michael@0 | 225 | do_MBAR(); |
michael@0 | 226 | int r = pthread_join(unwind_thr, nullptr); |
michael@0 | 227 | MOZ_ALWAYS_TRUE(r == 0); |
michael@0 | 228 | } |
michael@0 | 229 | |
michael@0 | 230 | void uwt__deinit() |
michael@0 | 231 | { |
michael@0 | 232 | empty_buffer_queue(); |
michael@0 | 233 | } |
michael@0 | 234 | |
michael@0 | 235 | void uwt__register_thread_for_profiling(void* stackTop) |
michael@0 | 236 | { |
michael@0 | 237 | thread_register_for_profiling(stackTop); |
michael@0 | 238 | } |
michael@0 | 239 | |
michael@0 | 240 | void uwt__unregister_thread_for_profiling() |
michael@0 | 241 | { |
michael@0 | 242 | thread_unregister_for_profiling(); |
michael@0 | 243 | } |
michael@0 | 244 | |
michael@0 | 245 | LinkedUWTBuffer* utb__acquire_sync_buffer(void* stackTop) |
michael@0 | 246 | { |
michael@0 | 247 | return acquire_sync_buffer(stackTop); |
michael@0 | 248 | } |
michael@0 | 249 | |
michael@0 | 250 | void utb__finish_sync_buffer(ThreadProfile* profile, |
michael@0 | 251 | UnwinderThreadBuffer* buff, |
michael@0 | 252 | void* /* ucontext_t*, really */ ucV) |
michael@0 | 253 | { |
michael@0 | 254 | finish_sync_buffer(profile, buff, ucV); |
michael@0 | 255 | } |
michael@0 | 256 | |
michael@0 | 257 | void utb__release_sync_buffer(LinkedUWTBuffer* buff) |
michael@0 | 258 | { |
michael@0 | 259 | release_sync_buffer(buff); |
michael@0 | 260 | } |
michael@0 | 261 | |
michael@0 | 262 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 263 | UnwinderThreadBuffer* uwt__acquire_empty_buffer() |
michael@0 | 264 | { |
michael@0 | 265 | return acquire_empty_buffer(); |
michael@0 | 266 | } |
michael@0 | 267 | |
michael@0 | 268 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 269 | void |
michael@0 | 270 | uwt__release_full_buffer(ThreadProfile* aProfile, |
michael@0 | 271 | UnwinderThreadBuffer* utb, |
michael@0 | 272 | void* /* ucontext_t*, really */ ucV ) |
michael@0 | 273 | { |
michael@0 | 274 | release_full_buffer( aProfile, utb, ucV ); |
michael@0 | 275 | } |
michael@0 | 276 | |
michael@0 | 277 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 278 | void |
michael@0 | 279 | utb__addEntry(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent) |
michael@0 | 280 | { |
michael@0 | 281 | utb_add_prof_ent(utb, ent); |
michael@0 | 282 | } |
michael@0 | 283 | |
michael@0 | 284 | //// END externally visible functions |
michael@0 | 285 | ////////////////////////////////////////////////////////// |
michael@0 | 286 | |
michael@0 | 287 | |
michael@0 | 288 | ////////////////////////////////////////////////////////// |
michael@0 | 289 | //// BEGIN type UnwindThreadBuffer |
michael@0 | 290 | |
michael@0 | 291 | static_assert(sizeof(uint32_t) == 4, "uint32_t size incorrect"); |
michael@0 | 292 | static_assert(sizeof(uint64_t) == 8, "uint64_t size incorrect"); |
michael@0 | 293 | static_assert(sizeof(uintptr_t) == sizeof(void*), |
michael@0 | 294 | "uintptr_t size incorrect"); |
michael@0 | 295 | |
michael@0 | 296 | typedef |
michael@0 | 297 | struct { |
michael@0 | 298 | uint64_t rsp; |
michael@0 | 299 | uint64_t rbp; |
michael@0 | 300 | uint64_t rip; |
michael@0 | 301 | } |
michael@0 | 302 | AMD64Regs; |
michael@0 | 303 | |
michael@0 | 304 | typedef |
michael@0 | 305 | struct { |
michael@0 | 306 | uint32_t r15; |
michael@0 | 307 | uint32_t r14; |
michael@0 | 308 | uint32_t r13; |
michael@0 | 309 | uint32_t r12; |
michael@0 | 310 | uint32_t r11; |
michael@0 | 311 | uint32_t r7; |
michael@0 | 312 | } |
michael@0 | 313 | ARMRegs; |
michael@0 | 314 | |
michael@0 | 315 | typedef |
michael@0 | 316 | struct { |
michael@0 | 317 | uint32_t esp; |
michael@0 | 318 | uint32_t ebp; |
michael@0 | 319 | uint32_t eip; |
michael@0 | 320 | } |
michael@0 | 321 | X86Regs; |
michael@0 | 322 | |
michael@0 | 323 | #if defined(SPS_ARCH_amd64) |
michael@0 | 324 | typedef AMD64Regs ArchRegs; |
michael@0 | 325 | #elif defined(SPS_ARCH_arm) |
michael@0 | 326 | typedef ARMRegs ArchRegs; |
michael@0 | 327 | #elif defined(SPS_ARCH_x86) |
michael@0 | 328 | typedef X86Regs ArchRegs; |
michael@0 | 329 | #else |
michael@0 | 330 | # error "Unknown plat" |
michael@0 | 331 | #endif |
michael@0 | 332 | |
michael@0 | 333 | #if defined(SPS_ARCH_amd64) || defined(SPS_ARCH_arm) || defined(SPS_ARCH_x86) |
michael@0 | 334 | # define SPS_PAGE_SIZE 4096 |
michael@0 | 335 | #else |
michael@0 | 336 | # error "Unknown plat" |
michael@0 | 337 | #endif |
michael@0 | 338 | |
michael@0 | 339 | typedef enum { S_EMPTY, S_FILLING, S_EMPTYING, S_FULL } State; |
michael@0 | 340 | |
michael@0 | 341 | typedef struct { uintptr_t val; } SpinLock; |
michael@0 | 342 | |
michael@0 | 343 | /* CONFIGURABLE */ |
michael@0 | 344 | /* The number of fixed ProfileEntry slots. If more are required, they |
michael@0 | 345 | are placed in mmap'd pages. */ |
michael@0 | 346 | #define N_FIXED_PROF_ENTS 20 |
michael@0 | 347 | |
michael@0 | 348 | /* CONFIGURABLE */ |
michael@0 | 349 | /* The number of extra pages of ProfileEntries. If (on arm) each |
michael@0 | 350 | ProfileEntry is 8 bytes, then a page holds 512, and so 100 pages |
michael@0 | 351 | is enough to hold 51200. */ |
michael@0 | 352 | #define N_PROF_ENT_PAGES 100 |
michael@0 | 353 | |
michael@0 | 354 | /* DERIVATIVE */ |
michael@0 | 355 | #define N_PROF_ENTS_PER_PAGE (SPS_PAGE_SIZE / sizeof(ProfileEntry)) |
michael@0 | 356 | |
michael@0 | 357 | /* A page of ProfileEntrys. This might actually be slightly smaller |
michael@0 | 358 | than a page if SPS_PAGE_SIZE is not an exact multiple of |
michael@0 | 359 | sizeof(ProfileEntry). */ |
michael@0 | 360 | typedef |
michael@0 | 361 | struct { ProfileEntry ents[N_PROF_ENTS_PER_PAGE]; } |
michael@0 | 362 | ProfEntsPage; |
michael@0 | 363 | |
michael@0 | 364 | #define ProfEntsPage_INVALID ((ProfEntsPage*)1) |
michael@0 | 365 | |
michael@0 | 366 | |
michael@0 | 367 | /* Fields protected by the spinlock are marked SL */ |
michael@0 | 368 | |
michael@0 | 369 | struct _UnwinderThreadBuffer { |
michael@0 | 370 | /*SL*/ State state; |
michael@0 | 371 | /* The rest of these are protected, in some sense, by ::state. If |
michael@0 | 372 | ::state is S_FILLING, they are 'owned' by the sampler thread |
michael@0 | 373 | that set the state to S_FILLING. If ::state is S_EMPTYING, |
michael@0 | 374 | they are 'owned' by the unwinder thread that set the state to |
michael@0 | 375 | S_EMPTYING. If ::state is S_EMPTY or S_FULL, the buffer isn't |
michael@0 | 376 | owned by any thread, and so no thread may access these |
michael@0 | 377 | fields. */ |
michael@0 | 378 | /* Sample number, needed to process samples in order */ |
michael@0 | 379 | uint64_t seqNo; |
michael@0 | 380 | /* The ThreadProfile into which the results are eventually to be |
michael@0 | 381 | dumped. */ |
michael@0 | 382 | ThreadProfile* aProfile; |
michael@0 | 383 | /* Pseudostack and other info, always present */ |
michael@0 | 384 | ProfileEntry entsFixed[N_FIXED_PROF_ENTS]; |
michael@0 | 385 | ProfEntsPage* entsPages[N_PROF_ENT_PAGES]; |
michael@0 | 386 | uintptr_t entsUsed; |
michael@0 | 387 | /* Do we also have data to do a native unwind? */ |
michael@0 | 388 | bool haveNativeInfo; |
michael@0 | 389 | /* If so, here is the register state and stack. Unset if |
michael@0 | 390 | .haveNativeInfo is false. */ |
michael@0 | 391 | lul::UnwindRegs startRegs; |
michael@0 | 392 | lul::StackImage stackImg; |
michael@0 | 393 | void* stackMaxSafe; /* Address for max safe stack reading. */ |
michael@0 | 394 | }; |
michael@0 | 395 | /* Indexing scheme for ents: |
michael@0 | 396 | 0 <= i < N_FIXED_PROF_ENTS |
michael@0 | 397 | is at entsFixed[i] |
michael@0 | 398 | |
michael@0 | 399 | i >= N_FIXED_PROF_ENTS |
michael@0 | 400 | is at let j = i - N_FIXED_PROF_ENTS |
michael@0 | 401 | in entsPages[j / N_PROFENTS_PER_PAGE] |
michael@0 | 402 | ->ents[j % N_PROFENTS_PER_PAGE] |
michael@0 | 403 | |
michael@0 | 404 | entsPages[] are allocated on demand. Because zero can |
michael@0 | 405 | theoretically be a valid page pointer, use |
michael@0 | 406 | ProfEntsPage_INVALID == (ProfEntsPage*)1 to mark invalid pages. |
michael@0 | 407 | |
michael@0 | 408 | It follows that the max entsUsed value is N_FIXED_PROF_ENTS + |
michael@0 | 409 | N_PROFENTS_PER_PAGE * N_PROFENTS_PAGES, and at that point no more |
michael@0 | 410 | ProfileEntries can be storedd. |
michael@0 | 411 | */ |
michael@0 | 412 | |
michael@0 | 413 | |
michael@0 | 414 | typedef |
michael@0 | 415 | struct { |
michael@0 | 416 | pthread_t thrId; |
michael@0 | 417 | void* stackTop; |
michael@0 | 418 | uint64_t nSamples; |
michael@0 | 419 | } |
michael@0 | 420 | StackLimit; |
michael@0 | 421 | |
michael@0 | 422 | /* Globals -- the buffer array */ |
michael@0 | 423 | #define N_UNW_THR_BUFFERS 10 |
michael@0 | 424 | /*SL*/ static UnwinderThreadBuffer** g_buffers = nullptr; |
michael@0 | 425 | /*SL*/ static uint64_t g_seqNo = 0; |
michael@0 | 426 | /*SL*/ static SpinLock g_spinLock = { 0 }; |
michael@0 | 427 | |
michael@0 | 428 | /* Globals -- the thread array. The array is dynamically expanded on |
michael@0 | 429 | demand. The spinlock must be held when accessing g_stackLimits, |
michael@0 | 430 | g_stackLimits[some index], g_stackLimitsUsed and g_stackLimitsSize. |
michael@0 | 431 | However, the spinlock must not be held when calling malloc to |
michael@0 | 432 | allocate or expand the array, as that would risk deadlock against a |
michael@0 | 433 | sampling thread that holds the malloc lock and is trying to acquire |
michael@0 | 434 | the spinlock. */ |
michael@0 | 435 | /*SL*/ static StackLimit* g_stackLimits = nullptr; |
michael@0 | 436 | /*SL*/ static size_t g_stackLimitsUsed = 0; |
michael@0 | 437 | /*SL*/ static size_t g_stackLimitsSize = 0; |
michael@0 | 438 | |
michael@0 | 439 | /* Stats -- atomically incremented, no lock needed */ |
michael@0 | 440 | static uintptr_t g_stats_totalSamples = 0; // total # sample attempts |
michael@0 | 441 | static uintptr_t g_stats_noBuffAvail = 0; // # failed due to no buffer avail |
michael@0 | 442 | static uintptr_t g_stats_thrUnregd = 0; // # failed due to unregistered thr |
michael@0 | 443 | |
michael@0 | 444 | /* We must be VERY CAREFUL what we do with the spinlock held. The |
michael@0 | 445 | only thing it is safe to do with it held is modify (viz, read or |
michael@0 | 446 | write) g_buffers, g_buffers[], g_seqNo, g_buffers[]->state, |
michael@0 | 447 | g_stackLimits, g_stackLimits[], g_stackLimitsUsed and |
michael@0 | 448 | g_stackLimitsSize. No arbitrary computations, no syscalls, no |
michael@0 | 449 | printfs, no file IO, and absolutely no dynamic memory allocation |
michael@0 | 450 | (else we WILL eventually deadlock). |
michael@0 | 451 | |
michael@0 | 452 | This applies both to the signal handler and to the unwinder thread. |
michael@0 | 453 | */ |
michael@0 | 454 | |
michael@0 | 455 | //// END type UnwindThreadBuffer |
michael@0 | 456 | ////////////////////////////////////////////////////////// |
michael@0 | 457 | |
michael@0 | 458 | // This is the interface to LUL. |
michael@0 | 459 | typedef struct { u_int64_t pc; u_int64_t sp; } PCandSP; |
michael@0 | 460 | |
michael@0 | 461 | // Forward declaration. Implementation is below. |
michael@0 | 462 | static |
michael@0 | 463 | void do_lul_unwind_Buffer(/*OUT*/PCandSP** pairs, |
michael@0 | 464 | /*OUT*/unsigned int* nPairs, |
michael@0 | 465 | UnwinderThreadBuffer* buff, |
michael@0 | 466 | int buffNo /* for debug printing only */); |
michael@0 | 467 | |
michael@0 | 468 | static bool is_page_aligned(void* v) |
michael@0 | 469 | { |
michael@0 | 470 | uintptr_t w = (uintptr_t) v; |
michael@0 | 471 | return (w & (SPS_PAGE_SIZE-1)) == 0 ? true : false; |
michael@0 | 472 | } |
michael@0 | 473 | |
michael@0 | 474 | |
michael@0 | 475 | /* Implement machine-word sized atomic compare-and-swap. Returns true |
michael@0 | 476 | if success, false if failure. */ |
michael@0 | 477 | static bool do_CASW(uintptr_t* addr, uintptr_t expected, uintptr_t nyu) |
michael@0 | 478 | { |
michael@0 | 479 | #if defined(__GNUC__) |
michael@0 | 480 | return __sync_bool_compare_and_swap(addr, expected, nyu); |
michael@0 | 481 | #else |
michael@0 | 482 | # error "Unhandled compiler" |
michael@0 | 483 | #endif |
michael@0 | 484 | } |
michael@0 | 485 | |
michael@0 | 486 | /* Hint to the CPU core that we are in a spin-wait loop, and that |
michael@0 | 487 | other processors/cores/threads-running-on-the-same-core should be |
michael@0 | 488 | given priority on execute resources, if that is possible. Not |
michael@0 | 489 | critical if this is a no-op on some targets. */ |
michael@0 | 490 | static void do_SPINLOOP_RELAX() |
michael@0 | 491 | { |
michael@0 | 492 | #if (defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)) && defined(__GNUC__) |
michael@0 | 493 | __asm__ __volatile__("rep; nop"); |
michael@0 | 494 | #elif defined(SPS_PLAT_arm_android) && MOZILLA_ARM_ARCH >= 7 |
michael@0 | 495 | __asm__ __volatile__("wfe"); |
michael@0 | 496 | #endif |
michael@0 | 497 | } |
michael@0 | 498 | |
michael@0 | 499 | /* Tell any cores snoozing in spin loops to wake up. */ |
michael@0 | 500 | static void do_SPINLOOP_NUDGE() |
michael@0 | 501 | { |
michael@0 | 502 | #if (defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)) && defined(__GNUC__) |
michael@0 | 503 | /* this is a no-op */ |
michael@0 | 504 | #elif defined(SPS_PLAT_arm_android) && MOZILLA_ARM_ARCH >= 7 |
michael@0 | 505 | __asm__ __volatile__("sev"); |
michael@0 | 506 | #endif |
michael@0 | 507 | } |
michael@0 | 508 | |
michael@0 | 509 | /* Perform a full memory barrier. */ |
michael@0 | 510 | static void do_MBAR() |
michael@0 | 511 | { |
michael@0 | 512 | #if defined(__GNUC__) |
michael@0 | 513 | __sync_synchronize(); |
michael@0 | 514 | #else |
michael@0 | 515 | # error "Unhandled compiler" |
michael@0 | 516 | #endif |
michael@0 | 517 | } |
michael@0 | 518 | |
michael@0 | 519 | static void spinLock_acquire(SpinLock* sl) |
michael@0 | 520 | { |
michael@0 | 521 | uintptr_t* val = &sl->val; |
michael@0 | 522 | VALGRIND_HG_MUTEX_LOCK_PRE(sl, 0/*!isTryLock*/); |
michael@0 | 523 | while (1) { |
michael@0 | 524 | bool ok = do_CASW( val, 0, 1 ); |
michael@0 | 525 | if (ok) break; |
michael@0 | 526 | do_SPINLOOP_RELAX(); |
michael@0 | 527 | } |
michael@0 | 528 | do_MBAR(); |
michael@0 | 529 | VALGRIND_HG_MUTEX_LOCK_POST(sl); |
michael@0 | 530 | } |
michael@0 | 531 | |
michael@0 | 532 | static void spinLock_release(SpinLock* sl) |
michael@0 | 533 | { |
michael@0 | 534 | uintptr_t* val = &sl->val; |
michael@0 | 535 | VALGRIND_HG_MUTEX_UNLOCK_PRE(sl); |
michael@0 | 536 | do_MBAR(); |
michael@0 | 537 | bool ok = do_CASW( val, 1, 0 ); |
michael@0 | 538 | /* This must succeed at the first try. To fail would imply that |
michael@0 | 539 | the lock was unheld. */ |
michael@0 | 540 | MOZ_ALWAYS_TRUE(ok); |
michael@0 | 541 | do_SPINLOOP_NUDGE(); |
michael@0 | 542 | VALGRIND_HG_MUTEX_UNLOCK_POST(sl); |
michael@0 | 543 | } |
michael@0 | 544 | |
michael@0 | 545 | static void sleep_ms(unsigned int ms) |
michael@0 | 546 | { |
michael@0 | 547 | struct timespec req; |
michael@0 | 548 | req.tv_sec = ((time_t)ms) / 1000; |
michael@0 | 549 | req.tv_nsec = 1000 * 1000 * (((unsigned long)ms) % 1000); |
michael@0 | 550 | nanosleep(&req, nullptr); |
michael@0 | 551 | } |
michael@0 | 552 | |
michael@0 | 553 | /* Use CAS to implement standalone atomic increment. */ |
michael@0 | 554 | static void atomic_INC(uintptr_t* loc) |
michael@0 | 555 | { |
michael@0 | 556 | while (1) { |
michael@0 | 557 | uintptr_t old = *loc; |
michael@0 | 558 | uintptr_t nyu = old + 1; |
michael@0 | 559 | bool ok = do_CASW( loc, old, nyu ); |
michael@0 | 560 | if (ok) break; |
michael@0 | 561 | } |
michael@0 | 562 | } |
michael@0 | 563 | |
michael@0 | 564 | // Empties out the buffer queue. |
michael@0 | 565 | static void empty_buffer_queue() |
michael@0 | 566 | { |
michael@0 | 567 | spinLock_acquire(&g_spinLock); |
michael@0 | 568 | |
michael@0 | 569 | UnwinderThreadBuffer** tmp_g_buffers = g_buffers; |
michael@0 | 570 | g_stackLimitsUsed = 0; |
michael@0 | 571 | g_seqNo = 0; |
michael@0 | 572 | g_buffers = nullptr; |
michael@0 | 573 | |
michael@0 | 574 | spinLock_release(&g_spinLock); |
michael@0 | 575 | |
michael@0 | 576 | // Can't do any malloc/free when holding the spinlock. |
michael@0 | 577 | free(tmp_g_buffers); |
michael@0 | 578 | |
michael@0 | 579 | // We could potentially free up g_stackLimits; but given the |
michael@0 | 580 | // complications above involved in resizing it, it's probably |
michael@0 | 581 | // safer just to leave it in place. |
michael@0 | 582 | } |
michael@0 | 583 | |
michael@0 | 584 | |
michael@0 | 585 | // Registers a thread for profiling. Detects and ignores duplicate |
michael@0 | 586 | // registration. |
michael@0 | 587 | static void thread_register_for_profiling(void* stackTop) |
michael@0 | 588 | { |
michael@0 | 589 | pthread_t me = pthread_self(); |
michael@0 | 590 | |
michael@0 | 591 | spinLock_acquire(&g_spinLock); |
michael@0 | 592 | |
michael@0 | 593 | // tmp copy of g_stackLimitsUsed, to avoid racing in message printing |
michael@0 | 594 | int n_used; |
michael@0 | 595 | |
michael@0 | 596 | // Ignore spurious calls which aren't really registering anything. |
michael@0 | 597 | if (stackTop == nullptr) { |
michael@0 | 598 | n_used = g_stackLimitsUsed; |
michael@0 | 599 | spinLock_release(&g_spinLock); |
michael@0 | 600 | LOGF("BPUnw: [%d total] thread_register_for_profiling" |
michael@0 | 601 | "(me=%p, stacktop=NULL) (IGNORED)", n_used, (void*)me); |
michael@0 | 602 | return; |
michael@0 | 603 | } |
michael@0 | 604 | |
michael@0 | 605 | /* Minimal sanity check on stackTop */ |
michael@0 | 606 | MOZ_ASSERT((void*)&n_used/*any auto var will do*/ < stackTop); |
michael@0 | 607 | |
michael@0 | 608 | bool is_dup = false; |
michael@0 | 609 | for (size_t i = 0; i < g_stackLimitsUsed; i++) { |
michael@0 | 610 | if (g_stackLimits[i].thrId == me) { |
michael@0 | 611 | is_dup = true; |
michael@0 | 612 | break; |
michael@0 | 613 | } |
michael@0 | 614 | } |
michael@0 | 615 | |
michael@0 | 616 | if (is_dup) { |
michael@0 | 617 | /* It's a duplicate registration. Ignore it: drop the lock and |
michael@0 | 618 | return. */ |
michael@0 | 619 | n_used = g_stackLimitsUsed; |
michael@0 | 620 | spinLock_release(&g_spinLock); |
michael@0 | 621 | |
michael@0 | 622 | LOGF("BPUnw: [%d total] thread_register_for_profiling" |
michael@0 | 623 | "(me=%p, stacktop=%p) (DUPLICATE)", n_used, (void*)me, stackTop); |
michael@0 | 624 | return; |
michael@0 | 625 | } |
michael@0 | 626 | |
michael@0 | 627 | /* Make sure the g_stackLimits array is large enough to accommodate |
michael@0 | 628 | this new entry. This is tricky. If it isn't large enough, we |
michael@0 | 629 | can malloc a larger version, but we have to do that without |
michael@0 | 630 | holding the spinlock, else we risk deadlock. The deadlock |
michael@0 | 631 | scenario is: |
michael@0 | 632 | |
michael@0 | 633 | Some other thread that is being sampled |
michael@0 | 634 | This thread |
michael@0 | 635 | |
michael@0 | 636 | call malloc call this function |
michael@0 | 637 | acquire malloc lock acquire the spinlock |
michael@0 | 638 | (sampling signal) discover thread array not big enough, |
michael@0 | 639 | call uwt__acquire_empty_buffer call malloc to make it larger |
michael@0 | 640 | acquire the spinlock acquire malloc lock |
michael@0 | 641 | |
michael@0 | 642 | This gives an inconsistent lock acquisition order on the malloc |
michael@0 | 643 | lock and spinlock, hence risk of deadlock. |
michael@0 | 644 | |
michael@0 | 645 | Allocating more space for the array without holding the spinlock |
michael@0 | 646 | implies tolerating races against other thread(s) who are also |
michael@0 | 647 | trying to expand the array. How can we detect if we have been |
michael@0 | 648 | out-raced? Every successful expansion of g_stackLimits[] results |
michael@0 | 649 | in an increase in g_stackLimitsSize. Hence we can detect if we |
michael@0 | 650 | got out-raced by remembering g_stackLimitsSize before we dropped |
michael@0 | 651 | the spinlock and checking if it has changed after the spinlock is |
michael@0 | 652 | reacquired. */ |
michael@0 | 653 | |
michael@0 | 654 | MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize); |
michael@0 | 655 | |
michael@0 | 656 | if (g_stackLimitsUsed == g_stackLimitsSize) { |
michael@0 | 657 | /* g_stackLimits[] is full; resize it. */ |
michael@0 | 658 | |
michael@0 | 659 | size_t old_size = g_stackLimitsSize; |
michael@0 | 660 | size_t new_size = old_size == 0 ? 4 : (2 * old_size); |
michael@0 | 661 | |
michael@0 | 662 | spinLock_release(&g_spinLock); |
michael@0 | 663 | StackLimit* new_arr = (StackLimit*)malloc(new_size * sizeof(StackLimit)); |
michael@0 | 664 | if (!new_arr) |
michael@0 | 665 | return; |
michael@0 | 666 | |
michael@0 | 667 | spinLock_acquire(&g_spinLock); |
michael@0 | 668 | |
michael@0 | 669 | if (old_size != g_stackLimitsSize) { |
michael@0 | 670 | /* We've been outraced. Instead of trying to deal in-line with |
michael@0 | 671 | this extremely rare case, just start all over again by |
michael@0 | 672 | tail-calling this routine. */ |
michael@0 | 673 | spinLock_release(&g_spinLock); |
michael@0 | 674 | free(new_arr); |
michael@0 | 675 | thread_register_for_profiling(stackTop); |
michael@0 | 676 | return; |
michael@0 | 677 | } |
michael@0 | 678 | |
michael@0 | 679 | memcpy(new_arr, g_stackLimits, old_size * sizeof(StackLimit)); |
michael@0 | 680 | if (g_stackLimits) |
michael@0 | 681 | free(g_stackLimits); |
michael@0 | 682 | |
michael@0 | 683 | g_stackLimits = new_arr; |
michael@0 | 684 | |
michael@0 | 685 | MOZ_ASSERT(g_stackLimitsSize < new_size); |
michael@0 | 686 | g_stackLimitsSize = new_size; |
michael@0 | 687 | } |
michael@0 | 688 | |
michael@0 | 689 | MOZ_ASSERT(g_stackLimitsUsed < g_stackLimitsSize); |
michael@0 | 690 | |
michael@0 | 691 | /* Finally, we have a safe place to put the new entry. */ |
michael@0 | 692 | |
michael@0 | 693 | // Round |stackTop| up to the end of the containing page. We may |
michael@0 | 694 | // as well do this -- there's no danger of a fault, and we might |
michael@0 | 695 | // get a few more base-of-the-stack frames as a result. This |
michael@0 | 696 | // assumes that no target has a page size smaller than 4096. |
michael@0 | 697 | uintptr_t stackTopR = (uintptr_t)stackTop; |
michael@0 | 698 | stackTopR = (stackTopR & ~(uintptr_t)4095) + (uintptr_t)4095; |
michael@0 | 699 | |
michael@0 | 700 | g_stackLimits[g_stackLimitsUsed].thrId = me; |
michael@0 | 701 | g_stackLimits[g_stackLimitsUsed].stackTop = (void*)stackTopR; |
michael@0 | 702 | g_stackLimits[g_stackLimitsUsed].nSamples = 0; |
michael@0 | 703 | g_stackLimitsUsed++; |
michael@0 | 704 | |
michael@0 | 705 | n_used = g_stackLimitsUsed; |
michael@0 | 706 | spinLock_release(&g_spinLock); |
michael@0 | 707 | |
michael@0 | 708 | LOGF("BPUnw: [%d total] thread_register_for_profiling" |
michael@0 | 709 | "(me=%p, stacktop=%p)", n_used, (void*)me, stackTop); |
michael@0 | 710 | } |
michael@0 | 711 | |
michael@0 | 712 | // Deregisters a thread from profiling. Detects and ignores attempts |
michael@0 | 713 | // to deregister a not-registered thread. |
michael@0 | 714 | static void thread_unregister_for_profiling() |
michael@0 | 715 | { |
michael@0 | 716 | spinLock_acquire(&g_spinLock); |
michael@0 | 717 | |
michael@0 | 718 | // tmp copy of g_stackLimitsUsed, to avoid racing in message printing |
michael@0 | 719 | size_t n_used; |
michael@0 | 720 | |
michael@0 | 721 | size_t i; |
michael@0 | 722 | bool found = false; |
michael@0 | 723 | pthread_t me = pthread_self(); |
michael@0 | 724 | for (i = 0; i < g_stackLimitsUsed; i++) { |
michael@0 | 725 | if (g_stackLimits[i].thrId == me) |
michael@0 | 726 | break; |
michael@0 | 727 | } |
michael@0 | 728 | if (i < g_stackLimitsUsed) { |
michael@0 | 729 | // found this entry. Slide the remaining ones down one place. |
michael@0 | 730 | for (; i+1 < g_stackLimitsUsed; i++) { |
michael@0 | 731 | g_stackLimits[i] = g_stackLimits[i+1]; |
michael@0 | 732 | } |
michael@0 | 733 | g_stackLimitsUsed--; |
michael@0 | 734 | found = true; |
michael@0 | 735 | } |
michael@0 | 736 | |
michael@0 | 737 | n_used = g_stackLimitsUsed; |
michael@0 | 738 | |
michael@0 | 739 | spinLock_release(&g_spinLock); |
michael@0 | 740 | LOGF("BPUnw: [%d total] thread_unregister_for_profiling(me=%p) %s", |
michael@0 | 741 | (int)n_used, (void*)me, found ? "" : " (NOT REGISTERED) "); |
michael@0 | 742 | } |
michael@0 | 743 | |
michael@0 | 744 | |
michael@0 | 745 | __attribute__((unused)) |
michael@0 | 746 | static void show_registered_threads() |
michael@0 | 747 | { |
michael@0 | 748 | size_t i; |
michael@0 | 749 | spinLock_acquire(&g_spinLock); |
michael@0 | 750 | for (i = 0; i < g_stackLimitsUsed; i++) { |
michael@0 | 751 | LOGF("[%d] pthread_t=%p nSamples=%lld", |
michael@0 | 752 | (int)i, (void*)g_stackLimits[i].thrId, |
michael@0 | 753 | (unsigned long long int)g_stackLimits[i].nSamples); |
michael@0 | 754 | } |
michael@0 | 755 | spinLock_release(&g_spinLock); |
michael@0 | 756 | } |
michael@0 | 757 | |
michael@0 | 758 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 759 | /* The calling thread owns the buffer, as denoted by its state being |
michael@0 | 760 | S_FILLING. So we can mess with it without further locking. */ |
michael@0 | 761 | static void init_empty_buffer(UnwinderThreadBuffer* buff, void* stackTop) |
michael@0 | 762 | { |
michael@0 | 763 | /* Now we own the buffer, initialise it. */ |
michael@0 | 764 | buff->aProfile = nullptr; |
michael@0 | 765 | buff->entsUsed = 0; |
michael@0 | 766 | buff->haveNativeInfo = false; |
michael@0 | 767 | buff->stackImg.mLen = 0; |
michael@0 | 768 | buff->stackImg.mStartAvma = 0; |
michael@0 | 769 | buff->stackMaxSafe = stackTop; /* We will need this in |
michael@0 | 770 | release_full_buffer() */ |
michael@0 | 771 | for (size_t i = 0; i < N_PROF_ENT_PAGES; i++) |
michael@0 | 772 | buff->entsPages[i] = ProfEntsPage_INVALID; |
michael@0 | 773 | } |
michael@0 | 774 | |
michael@0 | 775 | struct SyncUnwinderThreadBuffer : public LinkedUWTBuffer |
michael@0 | 776 | { |
michael@0 | 777 | UnwinderThreadBuffer* GetBuffer() |
michael@0 | 778 | { |
michael@0 | 779 | return &mBuff; |
michael@0 | 780 | } |
michael@0 | 781 | |
michael@0 | 782 | UnwinderThreadBuffer mBuff; |
michael@0 | 783 | }; |
michael@0 | 784 | |
michael@0 | 785 | static LinkedUWTBuffer* acquire_sync_buffer(void* stackTop) |
michael@0 | 786 | { |
michael@0 | 787 | MOZ_ASSERT(stackTop); |
michael@0 | 788 | SyncUnwinderThreadBuffer* buff = new SyncUnwinderThreadBuffer(); |
michael@0 | 789 | // We can set state without locking here because this thread owns the buffer |
michael@0 | 790 | // and it is going to fill it itself. |
michael@0 | 791 | buff->GetBuffer()->state = S_FILLING; |
michael@0 | 792 | init_empty_buffer(buff->GetBuffer(), stackTop); |
michael@0 | 793 | return buff; |
michael@0 | 794 | } |
michael@0 | 795 | |
michael@0 | 796 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 797 | static UnwinderThreadBuffer* acquire_empty_buffer() |
michael@0 | 798 | { |
michael@0 | 799 | /* acq lock |
michael@0 | 800 | if buffers == nullptr { rel lock; exit } |
michael@0 | 801 | scan to find a free buff; if none { rel lock; exit } |
michael@0 | 802 | set buff state to S_FILLING |
michael@0 | 803 | fillseqno++; and remember it |
michael@0 | 804 | rel lock |
michael@0 | 805 | */ |
michael@0 | 806 | size_t i; |
michael@0 | 807 | |
michael@0 | 808 | atomic_INC( &g_stats_totalSamples ); |
michael@0 | 809 | |
michael@0 | 810 | /* This code is critical. We are in a signal handler and possibly |
michael@0 | 811 | with the malloc lock held. So we can't allocate any heap, and |
michael@0 | 812 | can't safely call any C library functions, not even the pthread_ |
michael@0 | 813 | functions. And we certainly can't do any syscalls. In short, |
michael@0 | 814 | this function needs to be self contained, not do any allocation, |
michael@0 | 815 | and not hold on to the spinlock for any significant length of |
michael@0 | 816 | time. */ |
michael@0 | 817 | |
michael@0 | 818 | spinLock_acquire(&g_spinLock); |
michael@0 | 819 | |
michael@0 | 820 | /* First of all, look for this thread's entry in g_stackLimits[]. |
michael@0 | 821 | We need to find it in order to figure out how much stack we can |
michael@0 | 822 | safely copy into the sample. This assumes that pthread_self() |
michael@0 | 823 | is safe to call in a signal handler, which strikes me as highly |
michael@0 | 824 | likely. */ |
michael@0 | 825 | pthread_t me = pthread_self(); |
michael@0 | 826 | MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize); |
michael@0 | 827 | for (i = 0; i < g_stackLimitsUsed; i++) { |
michael@0 | 828 | if (g_stackLimits[i].thrId == me) |
michael@0 | 829 | break; |
michael@0 | 830 | } |
michael@0 | 831 | |
michael@0 | 832 | /* If the thread isn't registered for profiling, just ignore the call |
michael@0 | 833 | and return nullptr. */ |
michael@0 | 834 | if (i == g_stackLimitsUsed) { |
michael@0 | 835 | spinLock_release(&g_spinLock); |
michael@0 | 836 | atomic_INC( &g_stats_thrUnregd ); |
michael@0 | 837 | return nullptr; |
michael@0 | 838 | } |
michael@0 | 839 | |
michael@0 | 840 | /* "this thread is registered for profiling" */ |
michael@0 | 841 | MOZ_ASSERT(i < g_stackLimitsUsed); |
michael@0 | 842 | |
michael@0 | 843 | /* The furthest point that we can safely scan back up the stack. */ |
michael@0 | 844 | void* myStackTop = g_stackLimits[i].stackTop; |
michael@0 | 845 | g_stackLimits[i].nSamples++; |
michael@0 | 846 | |
michael@0 | 847 | /* Try to find a free buffer to use. */ |
michael@0 | 848 | if (g_buffers == nullptr) { |
michael@0 | 849 | /* The unwinder thread hasn't allocated any buffers yet. |
michael@0 | 850 | Nothing we can do. */ |
michael@0 | 851 | spinLock_release(&g_spinLock); |
michael@0 | 852 | atomic_INC( &g_stats_noBuffAvail ); |
michael@0 | 853 | return nullptr; |
michael@0 | 854 | } |
michael@0 | 855 | |
michael@0 | 856 | for (i = 0; i < N_UNW_THR_BUFFERS; i++) { |
michael@0 | 857 | if (g_buffers[i]->state == S_EMPTY) |
michael@0 | 858 | break; |
michael@0 | 859 | } |
michael@0 | 860 | MOZ_ASSERT(i <= N_UNW_THR_BUFFERS); |
michael@0 | 861 | |
michael@0 | 862 | if (i == N_UNW_THR_BUFFERS) { |
michael@0 | 863 | /* Again, no free buffers .. give up. */ |
michael@0 | 864 | spinLock_release(&g_spinLock); |
michael@0 | 865 | atomic_INC( &g_stats_noBuffAvail ); |
michael@0 | 866 | if (LOGLEVEL >= 3) |
michael@0 | 867 | LOG("BPUnw: handler: no free buffers"); |
michael@0 | 868 | return nullptr; |
michael@0 | 869 | } |
michael@0 | 870 | |
michael@0 | 871 | /* So we can use this one safely. Whilst still holding the lock, |
michael@0 | 872 | mark the buffer as belonging to us, and increment the sequence |
michael@0 | 873 | number. */ |
michael@0 | 874 | UnwinderThreadBuffer* buff = g_buffers[i]; |
michael@0 | 875 | MOZ_ASSERT(buff->state == S_EMPTY); |
michael@0 | 876 | buff->state = S_FILLING; |
michael@0 | 877 | buff->seqNo = g_seqNo; |
michael@0 | 878 | g_seqNo++; |
michael@0 | 879 | |
michael@0 | 880 | /* And drop the lock. We own the buffer, so go on and fill it. */ |
michael@0 | 881 | spinLock_release(&g_spinLock); |
michael@0 | 882 | |
michael@0 | 883 | /* Now we own the buffer, initialise it. */ |
michael@0 | 884 | init_empty_buffer(buff, myStackTop); |
michael@0 | 885 | return buff; |
michael@0 | 886 | } |
michael@0 | 887 | |
michael@0 | 888 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 889 | /* The calling thread owns the buffer, as denoted by its state being |
michael@0 | 890 | S_FILLING. So we can mess with it without further locking. */ |
michael@0 | 891 | static void fill_buffer(ThreadProfile* aProfile, |
michael@0 | 892 | UnwinderThreadBuffer* buff, |
michael@0 | 893 | void* /* ucontext_t*, really */ ucV) |
michael@0 | 894 | { |
michael@0 | 895 | MOZ_ASSERT(buff->state == S_FILLING); |
michael@0 | 896 | |
michael@0 | 897 | //////////////////////////////////////////////////// |
michael@0 | 898 | // BEGIN fill |
michael@0 | 899 | |
michael@0 | 900 | /* The buffer already will have some of its ProfileEntries filled |
michael@0 | 901 | in, but everything else needs to be filled in at this point. */ |
michael@0 | 902 | //LOGF("Release full buffer: %lu ents", buff->entsUsed); |
michael@0 | 903 | /* Where the resulting info is to be dumped */ |
michael@0 | 904 | buff->aProfile = aProfile; |
michael@0 | 905 | |
michael@0 | 906 | /* And, if we have register state, that and the stack top */ |
michael@0 | 907 | buff->haveNativeInfo = ucV != nullptr; |
michael@0 | 908 | if (buff->haveNativeInfo) { |
michael@0 | 909 | # if defined(SPS_PLAT_amd64_linux) |
michael@0 | 910 | ucontext_t* uc = (ucontext_t*)ucV; |
michael@0 | 911 | mcontext_t* mc = &(uc->uc_mcontext); |
michael@0 | 912 | buff->startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]); |
michael@0 | 913 | buff->startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]); |
michael@0 | 914 | buff->startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]); |
michael@0 | 915 | # elif defined(SPS_PLAT_amd64_darwin) |
michael@0 | 916 | ucontext_t* uc = (ucontext_t*)ucV; |
michael@0 | 917 | struct __darwin_mcontext64* mc = uc->uc_mcontext; |
michael@0 | 918 | struct __darwin_x86_thread_state64* ss = &mc->__ss; |
michael@0 | 919 | buff->regs.rip = ss->__rip; |
michael@0 | 920 | buff->regs.rsp = ss->__rsp; |
michael@0 | 921 | buff->regs.rbp = ss->__rbp; |
michael@0 | 922 | # elif defined(SPS_PLAT_arm_android) |
michael@0 | 923 | ucontext_t* uc = (ucontext_t*)ucV; |
michael@0 | 924 | mcontext_t* mc = &(uc->uc_mcontext); |
michael@0 | 925 | buff->startRegs.r15 = lul::TaggedUWord(mc->arm_pc); |
michael@0 | 926 | buff->startRegs.r14 = lul::TaggedUWord(mc->arm_lr); |
michael@0 | 927 | buff->startRegs.r13 = lul::TaggedUWord(mc->arm_sp); |
michael@0 | 928 | buff->startRegs.r12 = lul::TaggedUWord(mc->arm_ip); |
michael@0 | 929 | buff->startRegs.r11 = lul::TaggedUWord(mc->arm_fp); |
michael@0 | 930 | buff->startRegs.r7 = lul::TaggedUWord(mc->arm_r7); |
michael@0 | 931 | # elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android) |
michael@0 | 932 | ucontext_t* uc = (ucontext_t*)ucV; |
michael@0 | 933 | mcontext_t* mc = &(uc->uc_mcontext); |
michael@0 | 934 | buff->startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]); |
michael@0 | 935 | buff->startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]); |
michael@0 | 936 | buff->startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]); |
michael@0 | 937 | # elif defined(SPS_PLAT_x86_darwin) |
michael@0 | 938 | ucontext_t* uc = (ucontext_t*)ucV; |
michael@0 | 939 | struct __darwin_mcontext32* mc = uc->uc_mcontext; |
michael@0 | 940 | struct __darwin_i386_thread_state* ss = &mc->__ss; |
michael@0 | 941 | buff->regs.eip = ss->__eip; |
michael@0 | 942 | buff->regs.esp = ss->__esp; |
michael@0 | 943 | buff->regs.ebp = ss->__ebp; |
michael@0 | 944 | # else |
michael@0 | 945 | # error "Unknown plat" |
michael@0 | 946 | # endif |
michael@0 | 947 | |
michael@0 | 948 | /* Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not |
michael@0 | 949 | going past the stack's registered top point. Do some basic |
michael@0 | 950 | sanity checks too. This assumes that the TaggedUWord holding |
michael@0 | 951 | the stack pointer value is valid, but it should be, since it |
michael@0 | 952 | was constructed that way in the code just above. */ |
michael@0 | 953 | { |
michael@0 | 954 | # if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_amd64_darwin) |
michael@0 | 955 | uintptr_t rEDZONE_SIZE = 128; |
michael@0 | 956 | uintptr_t start = buff->startRegs.xsp.Value() - rEDZONE_SIZE; |
michael@0 | 957 | # elif defined(SPS_PLAT_arm_android) |
michael@0 | 958 | uintptr_t rEDZONE_SIZE = 0; |
michael@0 | 959 | uintptr_t start = buff->startRegs.r13.Value() - rEDZONE_SIZE; |
michael@0 | 960 | # elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_darwin) \ |
michael@0 | 961 | || defined(SPS_PLAT_x86_android) |
michael@0 | 962 | uintptr_t rEDZONE_SIZE = 0; |
michael@0 | 963 | uintptr_t start = buff->startRegs.xsp.Value() - rEDZONE_SIZE; |
michael@0 | 964 | # else |
michael@0 | 965 | # error "Unknown plat" |
michael@0 | 966 | # endif |
michael@0 | 967 | uintptr_t end = (uintptr_t)buff->stackMaxSafe; |
michael@0 | 968 | uintptr_t ws = sizeof(void*); |
michael@0 | 969 | start &= ~(ws-1); |
michael@0 | 970 | end &= ~(ws-1); |
michael@0 | 971 | uintptr_t nToCopy = 0; |
michael@0 | 972 | if (start < end) { |
michael@0 | 973 | nToCopy = end - start; |
michael@0 | 974 | if (nToCopy > lul::N_STACK_BYTES) |
michael@0 | 975 | nToCopy = lul::N_STACK_BYTES; |
michael@0 | 976 | } |
michael@0 | 977 | MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); |
michael@0 | 978 | buff->stackImg.mLen = nToCopy; |
michael@0 | 979 | buff->stackImg.mStartAvma = start; |
michael@0 | 980 | if (nToCopy > 0) { |
michael@0 | 981 | memcpy(&buff->stackImg.mContents[0], (void*)start, nToCopy); |
michael@0 | 982 | (void)VALGRIND_MAKE_MEM_DEFINED(&buff->stackImg.mContents[0], nToCopy); |
michael@0 | 983 | } |
michael@0 | 984 | } |
michael@0 | 985 | } /* if (buff->haveNativeInfo) */ |
michael@0 | 986 | // END fill |
michael@0 | 987 | //////////////////////////////////////////////////// |
michael@0 | 988 | } |
michael@0 | 989 | |
michael@0 | 990 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 991 | /* The calling thread owns the buffer, as denoted by its state being |
michael@0 | 992 | S_FILLING. So we can mess with it without further locking. */ |
michael@0 | 993 | static void release_full_buffer(ThreadProfile* aProfile, |
michael@0 | 994 | UnwinderThreadBuffer* buff, |
michael@0 | 995 | void* /* ucontext_t*, really */ ucV ) |
michael@0 | 996 | { |
michael@0 | 997 | fill_buffer(aProfile, buff, ucV); |
michael@0 | 998 | /* And now relinquish ownership of the buff, so that an unwinder |
michael@0 | 999 | thread can pick it up. */ |
michael@0 | 1000 | spinLock_acquire(&g_spinLock); |
michael@0 | 1001 | buff->state = S_FULL; |
michael@0 | 1002 | spinLock_release(&g_spinLock); |
michael@0 | 1003 | } |
michael@0 | 1004 | |
michael@0 | 1005 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 1006 | // Allocate a ProfEntsPage, without using malloc, or return |
michael@0 | 1007 | // ProfEntsPage_INVALID if we can't for some reason. |
michael@0 | 1008 | static ProfEntsPage* mmap_anon_ProfEntsPage() |
michael@0 | 1009 | { |
michael@0 | 1010 | # if defined(SPS_OS_darwin) |
michael@0 | 1011 | void* v = ::mmap(nullptr, sizeof(ProfEntsPage), PROT_READ | PROT_WRITE, |
michael@0 | 1012 | MAP_PRIVATE | MAP_ANON, -1, 0); |
michael@0 | 1013 | # else |
michael@0 | 1014 | void* v = ::mmap(nullptr, sizeof(ProfEntsPage), PROT_READ | PROT_WRITE, |
michael@0 | 1015 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
michael@0 | 1016 | # endif |
michael@0 | 1017 | if (v == MAP_FAILED) { |
michael@0 | 1018 | return ProfEntsPage_INVALID; |
michael@0 | 1019 | } else { |
michael@0 | 1020 | return (ProfEntsPage*)v; |
michael@0 | 1021 | } |
michael@0 | 1022 | } |
michael@0 | 1023 | |
michael@0 | 1024 | // Runs in the unwinder thread |
michael@0 | 1025 | // Free a ProfEntsPage as allocated by mmap_anon_ProfEntsPage |
michael@0 | 1026 | static void munmap_ProfEntsPage(ProfEntsPage* pep) |
michael@0 | 1027 | { |
michael@0 | 1028 | MOZ_ALWAYS_TRUE(is_page_aligned(pep)); |
michael@0 | 1029 | ::munmap(pep, sizeof(ProfEntsPage)); |
michael@0 | 1030 | } |
michael@0 | 1031 | |
michael@0 | 1032 | |
michael@0 | 1033 | // RUNS IN SIGHANDLER CONTEXT |
michael@0 | 1034 | void |
michael@0 | 1035 | utb_add_prof_ent(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent) |
michael@0 | 1036 | { |
michael@0 | 1037 | uintptr_t limit |
michael@0 | 1038 | = N_FIXED_PROF_ENTS + (N_PROF_ENTS_PER_PAGE * N_PROF_ENT_PAGES); |
michael@0 | 1039 | if (utb->entsUsed == limit) { |
michael@0 | 1040 | /* We're full. Now what? */ |
michael@0 | 1041 | LOG("BPUnw: utb__addEntry: NO SPACE for ProfileEntry; ignoring."); |
michael@0 | 1042 | return; |
michael@0 | 1043 | } |
michael@0 | 1044 | MOZ_ASSERT(utb->entsUsed < limit); |
michael@0 | 1045 | |
michael@0 | 1046 | /* Will it fit in the fixed array? */ |
michael@0 | 1047 | if (utb->entsUsed < N_FIXED_PROF_ENTS) { |
michael@0 | 1048 | utb->entsFixed[utb->entsUsed] = ent; |
michael@0 | 1049 | utb->entsUsed++; |
michael@0 | 1050 | return; |
michael@0 | 1051 | } |
michael@0 | 1052 | |
michael@0 | 1053 | /* No. Put it in the extras. */ |
michael@0 | 1054 | uintptr_t i = utb->entsUsed; |
michael@0 | 1055 | uintptr_t j = i - N_FIXED_PROF_ENTS; |
michael@0 | 1056 | uintptr_t j_div = j / N_PROF_ENTS_PER_PAGE; /* page number */ |
michael@0 | 1057 | uintptr_t j_mod = j % N_PROF_ENTS_PER_PAGE; /* page offset */ |
michael@0 | 1058 | ProfEntsPage* pep = utb->entsPages[j_div]; |
michael@0 | 1059 | if (pep == ProfEntsPage_INVALID) { |
michael@0 | 1060 | pep = mmap_anon_ProfEntsPage(); |
michael@0 | 1061 | if (pep == ProfEntsPage_INVALID) { |
michael@0 | 1062 | /* Urr, we ran out of memory. Now what? */ |
michael@0 | 1063 | LOG("BPUnw: utb__addEntry: MMAP FAILED for ProfileEntry; ignoring."); |
michael@0 | 1064 | return; |
michael@0 | 1065 | } |
michael@0 | 1066 | utb->entsPages[j_div] = pep; |
michael@0 | 1067 | } |
michael@0 | 1068 | pep->ents[j_mod] = ent; |
michael@0 | 1069 | utb->entsUsed++; |
michael@0 | 1070 | } |
michael@0 | 1071 | |
michael@0 | 1072 | |
michael@0 | 1073 | // misc helper |
michael@0 | 1074 | static ProfileEntry utb_get_profent(UnwinderThreadBuffer* buff, uintptr_t i) |
michael@0 | 1075 | { |
michael@0 | 1076 | MOZ_ASSERT(i < buff->entsUsed); |
michael@0 | 1077 | if (i < N_FIXED_PROF_ENTS) { |
michael@0 | 1078 | return buff->entsFixed[i]; |
michael@0 | 1079 | } else { |
michael@0 | 1080 | uintptr_t j = i - N_FIXED_PROF_ENTS; |
michael@0 | 1081 | uintptr_t j_div = j / N_PROF_ENTS_PER_PAGE; /* page number */ |
michael@0 | 1082 | uintptr_t j_mod = j % N_PROF_ENTS_PER_PAGE; /* page offset */ |
michael@0 | 1083 | MOZ_ASSERT(buff->entsPages[j_div] != ProfEntsPage_INVALID); |
michael@0 | 1084 | return buff->entsPages[j_div]->ents[j_mod]; |
michael@0 | 1085 | } |
michael@0 | 1086 | } |
michael@0 | 1087 | |
michael@0 | 1088 | /* Copy ProfileEntries presented to us by the sampling thread. |
michael@0 | 1089 | Most of them are copied verbatim into |buff->aProfile|, |
michael@0 | 1090 | except for 'hint' tags, which direct us to do something |
michael@0 | 1091 | different. */ |
michael@0 | 1092 | static void process_buffer(UnwinderThreadBuffer* buff, int oldest_ix) |
michael@0 | 1093 | { |
michael@0 | 1094 | /* Need to lock |aProfile| so nobody tries to copy out entries |
michael@0 | 1095 | whilst we are putting them in. */ |
michael@0 | 1096 | buff->aProfile->BeginUnwind(); |
michael@0 | 1097 | |
michael@0 | 1098 | /* The buff is a sequence of ProfileEntries (ents). It has |
michael@0 | 1099 | this grammar: |
michael@0 | 1100 | |
michael@0 | 1101 | | --pre-tags-- | (h 'P' .. h 'Q')* | --post-tags-- | |
michael@0 | 1102 | ^ ^ |
michael@0 | 1103 | ix_first_hP ix_last_hQ |
michael@0 | 1104 | |
michael@0 | 1105 | Each (h 'P' .. h 'Q') subsequence represents one pseudostack |
michael@0 | 1106 | entry. These, if present, are in the order |
michael@0 | 1107 | outermost-frame-first, and that is the order that they should |
michael@0 | 1108 | be copied into aProfile. The --pre-tags-- and --post-tags-- |
michael@0 | 1109 | are to be copied into the aProfile verbatim, except that they |
michael@0 | 1110 | may contain the hints "h 'F'" for a flush and "h 'N'" to |
michael@0 | 1111 | indicate that a native unwind is also required, and must be |
michael@0 | 1112 | interleaved with the pseudostack entries. |
michael@0 | 1113 | |
michael@0 | 1114 | The hint tags that bound each pseudostack entry, "h 'P'" and "h |
michael@0 | 1115 | 'Q'", are not to be copied into the aProfile -- they are |
michael@0 | 1116 | present only to make parsing easy here. Also, the pseudostack |
michael@0 | 1117 | entries may contain an "'S' (void*)" entry, which is the stack |
michael@0 | 1118 | pointer value for that entry, and these are also not to be |
michael@0 | 1119 | copied. |
michael@0 | 1120 | */ |
michael@0 | 1121 | /* The first thing to do is therefore to find the pseudostack |
michael@0 | 1122 | entries, if any, and to find out also whether a native unwind |
michael@0 | 1123 | has been requested. */ |
michael@0 | 1124 | const uintptr_t infUW = ~(uintptr_t)0; // infinity |
michael@0 | 1125 | bool need_native_unw = false; |
michael@0 | 1126 | uintptr_t ix_first_hP = infUW; // "not found" |
michael@0 | 1127 | uintptr_t ix_last_hQ = infUW; // "not found" |
michael@0 | 1128 | |
michael@0 | 1129 | uintptr_t k; |
michael@0 | 1130 | for (k = 0; k < buff->entsUsed; k++) { |
michael@0 | 1131 | ProfileEntry ent = utb_get_profent(buff, k); |
michael@0 | 1132 | if (ent.is_ent_hint('N')) { |
michael@0 | 1133 | need_native_unw = true; |
michael@0 | 1134 | } |
michael@0 | 1135 | else if (ent.is_ent_hint('P') && ix_first_hP == ~(uintptr_t)0) { |
michael@0 | 1136 | ix_first_hP = k; |
michael@0 | 1137 | } |
michael@0 | 1138 | else if (ent.is_ent_hint('Q')) { |
michael@0 | 1139 | ix_last_hQ = k; |
michael@0 | 1140 | } |
michael@0 | 1141 | } |
michael@0 | 1142 | |
michael@0 | 1143 | if (0) LOGF("BPUnw: ix_first_hP %llu ix_last_hQ %llu need_native_unw %llu", |
michael@0 | 1144 | (unsigned long long int)ix_first_hP, |
michael@0 | 1145 | (unsigned long long int)ix_last_hQ, |
michael@0 | 1146 | (unsigned long long int)need_native_unw); |
michael@0 | 1147 | |
michael@0 | 1148 | /* There are four possibilities: native-only, pseudostack-only, |
michael@0 | 1149 | combined (both), and neither. We handle all four cases. */ |
michael@0 | 1150 | |
michael@0 | 1151 | MOZ_ASSERT( (ix_first_hP == infUW && ix_last_hQ == infUW) || |
michael@0 | 1152 | (ix_first_hP != infUW && ix_last_hQ != infUW) ); |
michael@0 | 1153 | bool have_P = ix_first_hP != infUW; |
michael@0 | 1154 | if (have_P) { |
michael@0 | 1155 | MOZ_ASSERT(ix_first_hP < ix_last_hQ); |
michael@0 | 1156 | MOZ_ASSERT(ix_last_hQ <= buff->entsUsed); |
michael@0 | 1157 | } |
michael@0 | 1158 | |
michael@0 | 1159 | /* Neither N nor P. This is very unusual but has been observed to happen. |
michael@0 | 1160 | Just copy to the output. */ |
michael@0 | 1161 | if (!need_native_unw && !have_P) { |
michael@0 | 1162 | for (k = 0; k < buff->entsUsed; k++) { |
michael@0 | 1163 | ProfileEntry ent = utb_get_profent(buff, k); |
michael@0 | 1164 | // action flush-hints |
michael@0 | 1165 | if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } |
michael@0 | 1166 | // skip ones we can't copy |
michael@0 | 1167 | if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } |
michael@0 | 1168 | // handle GetBacktrace() |
michael@0 | 1169 | if (ent.is_ent('B')) { |
michael@0 | 1170 | UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); |
michael@0 | 1171 | process_buffer(buff, -1); |
michael@0 | 1172 | continue; |
michael@0 | 1173 | } |
michael@0 | 1174 | // and copy everything else |
michael@0 | 1175 | buff->aProfile->addTag( ent ); |
michael@0 | 1176 | } |
michael@0 | 1177 | } |
michael@0 | 1178 | else /* Native only-case. */ |
michael@0 | 1179 | if (need_native_unw && !have_P) { |
michael@0 | 1180 | for (k = 0; k < buff->entsUsed; k++) { |
michael@0 | 1181 | ProfileEntry ent = utb_get_profent(buff, k); |
michael@0 | 1182 | // action a native-unwind-now hint |
michael@0 | 1183 | if (ent.is_ent_hint('N')) { |
michael@0 | 1184 | MOZ_ASSERT(buff->haveNativeInfo); |
michael@0 | 1185 | PCandSP* pairs = nullptr; |
michael@0 | 1186 | unsigned int nPairs = 0; |
michael@0 | 1187 | do_lul_unwind_Buffer(&pairs, &nPairs, buff, oldest_ix); |
michael@0 | 1188 | buff->aProfile->addTag( ProfileEntry('s', "(root)") ); |
michael@0 | 1189 | for (unsigned int i = 0; i < nPairs; i++) { |
michael@0 | 1190 | /* Skip any outermost frames that |
michael@0 | 1191 | do_lul_unwind_Buffer didn't give us. See comments |
michael@0 | 1192 | on that function for details. */ |
michael@0 | 1193 | if (pairs[i].pc == 0 && pairs[i].sp == 0) |
michael@0 | 1194 | continue; |
michael@0 | 1195 | buff->aProfile |
michael@0 | 1196 | ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[i].pc)) ); |
michael@0 | 1197 | } |
michael@0 | 1198 | if (pairs) |
michael@0 | 1199 | free(pairs); |
michael@0 | 1200 | continue; |
michael@0 | 1201 | } |
michael@0 | 1202 | // action flush-hints |
michael@0 | 1203 | if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } |
michael@0 | 1204 | // skip ones we can't copy |
michael@0 | 1205 | if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } |
michael@0 | 1206 | // handle GetBacktrace() |
michael@0 | 1207 | if (ent.is_ent('B')) { |
michael@0 | 1208 | UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); |
michael@0 | 1209 | process_buffer(buff, -1); |
michael@0 | 1210 | continue; |
michael@0 | 1211 | } |
michael@0 | 1212 | // and copy everything else |
michael@0 | 1213 | buff->aProfile->addTag( ent ); |
michael@0 | 1214 | } |
michael@0 | 1215 | } |
michael@0 | 1216 | else /* Pseudostack-only case */ |
michael@0 | 1217 | if (!need_native_unw && have_P) { |
michael@0 | 1218 | /* If there's no request for a native stack, it's easy: just |
michael@0 | 1219 | copy the tags verbatim into aProfile, skipping the ones that |
michael@0 | 1220 | can't be copied -- 'h' (hint) tags, and "'S' (void*)" |
michael@0 | 1221 | stack-pointer tags. Except, insert a sample-start tag when |
michael@0 | 1222 | we see the start of the first pseudostack frame. */ |
michael@0 | 1223 | for (k = 0; k < buff->entsUsed; k++) { |
michael@0 | 1224 | ProfileEntry ent = utb_get_profent(buff, k); |
michael@0 | 1225 | // We need to insert a sample-start tag before the first frame |
michael@0 | 1226 | if (k == ix_first_hP) { |
michael@0 | 1227 | buff->aProfile->addTag( ProfileEntry('s', "(root)") ); |
michael@0 | 1228 | } |
michael@0 | 1229 | // action flush-hints |
michael@0 | 1230 | if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } |
michael@0 | 1231 | // skip ones we can't copy |
michael@0 | 1232 | if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } |
michael@0 | 1233 | // handle GetBacktrace() |
michael@0 | 1234 | if (ent.is_ent('B')) { |
michael@0 | 1235 | UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); |
michael@0 | 1236 | process_buffer(buff, -1); |
michael@0 | 1237 | continue; |
michael@0 | 1238 | } |
michael@0 | 1239 | // and copy everything else |
michael@0 | 1240 | buff->aProfile->addTag( ent ); |
michael@0 | 1241 | } |
michael@0 | 1242 | } |
michael@0 | 1243 | else /* Combined case */ |
michael@0 | 1244 | if (need_native_unw && have_P) |
michael@0 | 1245 | { |
michael@0 | 1246 | /* We need to get a native stacktrace and merge it with the |
michael@0 | 1247 | pseudostack entries. This isn't too simple. First, copy all |
michael@0 | 1248 | the tags up to the start of the pseudostack tags. Then |
michael@0 | 1249 | generate a combined set of tags by native unwind and |
michael@0 | 1250 | pseudostack. Then, copy all the stuff after the pseudostack |
michael@0 | 1251 | tags. */ |
michael@0 | 1252 | MOZ_ASSERT(buff->haveNativeInfo); |
michael@0 | 1253 | |
michael@0 | 1254 | // Get native unwind info |
michael@0 | 1255 | PCandSP* pairs = nullptr; |
michael@0 | 1256 | unsigned int n_pairs = 0; |
michael@0 | 1257 | do_lul_unwind_Buffer(&pairs, &n_pairs, buff, oldest_ix); |
michael@0 | 1258 | |
michael@0 | 1259 | // Entries before the pseudostack frames |
michael@0 | 1260 | for (k = 0; k < ix_first_hP; k++) { |
michael@0 | 1261 | ProfileEntry ent = utb_get_profent(buff, k); |
michael@0 | 1262 | // action flush-hints |
michael@0 | 1263 | if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } |
michael@0 | 1264 | // skip ones we can't copy |
michael@0 | 1265 | if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } |
michael@0 | 1266 | // handle GetBacktrace() |
michael@0 | 1267 | if (ent.is_ent('B')) { |
michael@0 | 1268 | UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); |
michael@0 | 1269 | process_buffer(buff, -1); |
michael@0 | 1270 | continue; |
michael@0 | 1271 | } |
michael@0 | 1272 | // and copy everything else |
michael@0 | 1273 | buff->aProfile->addTag( ent ); |
michael@0 | 1274 | } |
michael@0 | 1275 | |
michael@0 | 1276 | // BEGIN merge |
michael@0 | 1277 | buff->aProfile->addTag( ProfileEntry('s', "(root)") ); |
michael@0 | 1278 | unsigned int next_N = 0; // index in pairs[] |
michael@0 | 1279 | unsigned int next_P = ix_first_hP; // index in buff profent array |
michael@0 | 1280 | bool last_was_P = false; |
michael@0 | 1281 | if (0) LOGF("at mergeloop: n_pairs %llu ix_last_hQ %llu", |
michael@0 | 1282 | (unsigned long long int)n_pairs, |
michael@0 | 1283 | (unsigned long long int)ix_last_hQ); |
michael@0 | 1284 | /* Skip any outermost frames that do_lul_unwind_Buffer |
michael@0 | 1285 | didn't give us. See comments on that function for |
michael@0 | 1286 | details. */ |
michael@0 | 1287 | while (next_N < n_pairs && pairs[next_N].pc == 0 && pairs[next_N].sp == 0) |
michael@0 | 1288 | next_N++; |
michael@0 | 1289 | |
michael@0 | 1290 | while (true) { |
michael@0 | 1291 | if (next_P <= ix_last_hQ) { |
michael@0 | 1292 | // Assert that next_P points at the start of an P entry |
michael@0 | 1293 | MOZ_ASSERT(utb_get_profent(buff, next_P).is_ent_hint('P')); |
michael@0 | 1294 | } |
michael@0 | 1295 | if (next_N >= n_pairs && next_P > ix_last_hQ) { |
michael@0 | 1296 | // both stacks empty |
michael@0 | 1297 | break; |
michael@0 | 1298 | } |
michael@0 | 1299 | /* Decide which entry to use next: |
michael@0 | 1300 | If N is empty, must use P, and vice versa |
michael@0 | 1301 | else |
michael@0 | 1302 | If the last was P and current P has zero SP, use P |
michael@0 | 1303 | else |
michael@0 | 1304 | we assume that both P and N have valid SP, in which case |
michael@0 | 1305 | use the one with the larger value |
michael@0 | 1306 | */ |
michael@0 | 1307 | bool use_P = true; |
michael@0 | 1308 | if (next_N >= n_pairs) { |
michael@0 | 1309 | // N empty, use P |
michael@0 | 1310 | use_P = true; |
michael@0 | 1311 | if (0) LOG(" P <= no remaining N entries"); |
michael@0 | 1312 | } |
michael@0 | 1313 | else if (next_P > ix_last_hQ) { |
michael@0 | 1314 | // P empty, use N |
michael@0 | 1315 | use_P = false; |
michael@0 | 1316 | if (0) LOG(" N <= no remaining P entries"); |
michael@0 | 1317 | } |
michael@0 | 1318 | else { |
michael@0 | 1319 | // We have at least one N and one P entry available. |
michael@0 | 1320 | // Scan forwards to find the SP of the current P entry |
michael@0 | 1321 | u_int64_t sp_cur_P = 0; |
michael@0 | 1322 | unsigned int m = next_P + 1; |
michael@0 | 1323 | while (1) { |
michael@0 | 1324 | /* This assertion should hold because in a well formed |
michael@0 | 1325 | input, we must eventually find the hint-Q that marks |
michael@0 | 1326 | the end of this frame's entries. */ |
michael@0 | 1327 | MOZ_ASSERT(m < buff->entsUsed); |
michael@0 | 1328 | ProfileEntry ent = utb_get_profent(buff, m); |
michael@0 | 1329 | if (ent.is_ent_hint('Q')) |
michael@0 | 1330 | break; |
michael@0 | 1331 | if (ent.is_ent('S')) { |
michael@0 | 1332 | sp_cur_P = reinterpret_cast<u_int64_t>(ent.get_tagPtr()); |
michael@0 | 1333 | break; |
michael@0 | 1334 | } |
michael@0 | 1335 | m++; |
michael@0 | 1336 | } |
michael@0 | 1337 | if (last_was_P && sp_cur_P == 0) { |
michael@0 | 1338 | if (0) LOG(" P <= last_was_P && sp_cur_P == 0"); |
michael@0 | 1339 | use_P = true; |
michael@0 | 1340 | } else { |
michael@0 | 1341 | u_int64_t sp_cur_N = pairs[next_N].sp; |
michael@0 | 1342 | use_P = (sp_cur_P > sp_cur_N); |
michael@0 | 1343 | if (0) LOGF(" %s <= sps P %p N %p", |
michael@0 | 1344 | use_P ? "P" : "N", (void*)(intptr_t)sp_cur_P, |
michael@0 | 1345 | (void*)(intptr_t)sp_cur_N); |
michael@0 | 1346 | } |
michael@0 | 1347 | } |
michael@0 | 1348 | /* So, we know which we are going to use. */ |
michael@0 | 1349 | if (use_P) { |
michael@0 | 1350 | unsigned int m = next_P + 1; |
michael@0 | 1351 | while (true) { |
michael@0 | 1352 | MOZ_ASSERT(m < buff->entsUsed); |
michael@0 | 1353 | ProfileEntry ent = utb_get_profent(buff, m); |
michael@0 | 1354 | if (ent.is_ent_hint('Q')) { |
michael@0 | 1355 | next_P = m + 1; |
michael@0 | 1356 | break; |
michael@0 | 1357 | } |
michael@0 | 1358 | // we don't expect a flush-hint here |
michael@0 | 1359 | MOZ_ASSERT(!ent.is_ent_hint('F')); |
michael@0 | 1360 | // skip ones we can't copy |
michael@0 | 1361 | if (ent.is_ent_hint() || ent.is_ent('S')) { m++; continue; } |
michael@0 | 1362 | // and copy everything else |
michael@0 | 1363 | buff->aProfile->addTag( ent ); |
michael@0 | 1364 | m++; |
michael@0 | 1365 | } |
michael@0 | 1366 | } else { |
michael@0 | 1367 | buff->aProfile |
michael@0 | 1368 | ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[next_N].pc)) ); |
michael@0 | 1369 | next_N++; |
michael@0 | 1370 | } |
michael@0 | 1371 | /* Remember what we chose, for next time. */ |
michael@0 | 1372 | last_was_P = use_P; |
michael@0 | 1373 | } |
michael@0 | 1374 | |
michael@0 | 1375 | MOZ_ASSERT(next_P == ix_last_hQ + 1); |
michael@0 | 1376 | MOZ_ASSERT(next_N == n_pairs); |
michael@0 | 1377 | // END merge |
michael@0 | 1378 | |
michael@0 | 1379 | // Entries after the pseudostack frames |
michael@0 | 1380 | for (k = ix_last_hQ+1; k < buff->entsUsed; k++) { |
michael@0 | 1381 | ProfileEntry ent = utb_get_profent(buff, k); |
michael@0 | 1382 | // action flush-hints |
michael@0 | 1383 | if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } |
michael@0 | 1384 | // skip ones we can't copy |
michael@0 | 1385 | if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } |
michael@0 | 1386 | // and copy everything else |
michael@0 | 1387 | buff->aProfile->addTag( ent ); |
michael@0 | 1388 | } |
michael@0 | 1389 | |
michael@0 | 1390 | // free native unwind info |
michael@0 | 1391 | if (pairs) |
michael@0 | 1392 | free(pairs); |
michael@0 | 1393 | } |
michael@0 | 1394 | |
michael@0 | 1395 | #if 0 |
michael@0 | 1396 | bool show = true; |
michael@0 | 1397 | if (show) LOG("----------------"); |
michael@0 | 1398 | for (k = 0; k < buff->entsUsed; k++) { |
michael@0 | 1399 | ProfileEntry ent = utb_get_profent(buff, k); |
michael@0 | 1400 | if (show) ent.log(); |
michael@0 | 1401 | if (ent.is_ent_hint('F')) { |
michael@0 | 1402 | /* This is a flush-hint */ |
michael@0 | 1403 | buff->aProfile->flush(); |
michael@0 | 1404 | } |
michael@0 | 1405 | else if (ent.is_ent_hint('N')) { |
michael@0 | 1406 | /* This is a do-a-native-unwind-right-now hint */ |
michael@0 | 1407 | MOZ_ASSERT(buff->haveNativeInfo); |
michael@0 | 1408 | PCandSP* pairs = nullptr; |
michael@0 | 1409 | unsigned int nPairs = 0; |
michael@0 | 1410 | do_lul_unwind_Buffer(&pairs, &nPairs, buff, oldest_ix); |
michael@0 | 1411 | buff->aProfile->addTag( ProfileEntry('s', "(root)") ); |
michael@0 | 1412 | for (unsigned int i = 0; i < nPairs; i++) { |
michael@0 | 1413 | buff->aProfile |
michael@0 | 1414 | ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[i].pc)) ); |
michael@0 | 1415 | } |
michael@0 | 1416 | if (pairs) |
michael@0 | 1417 | free(pairs); |
michael@0 | 1418 | } else { |
michael@0 | 1419 | /* Copy in verbatim */ |
michael@0 | 1420 | buff->aProfile->addTag( ent ); |
michael@0 | 1421 | } |
michael@0 | 1422 | } |
michael@0 | 1423 | #endif |
michael@0 | 1424 | |
michael@0 | 1425 | buff->aProfile->EndUnwind(); |
michael@0 | 1426 | } |
michael@0 | 1427 | |
michael@0 | 1428 | |
michael@0 | 1429 | // Find out, in a platform-dependent way, where the code modules got |
michael@0 | 1430 | // mapped in the process' virtual address space, and get |aLUL| to |
michael@0 | 1431 | // load unwind info for them. |
michael@0 | 1432 | void |
michael@0 | 1433 | read_procmaps(lul::LUL* aLUL) |
michael@0 | 1434 | { |
michael@0 | 1435 | MOZ_ASSERT(aLUL->CountMappings() == 0); |
michael@0 | 1436 | |
michael@0 | 1437 | # if defined(SPS_OS_linux) || defined(SPS_OS_android) || defined(SPS_OS_darwin) |
michael@0 | 1438 | SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); |
michael@0 | 1439 | |
michael@0 | 1440 | for (size_t i = 0; i < info.GetSize(); i++) { |
michael@0 | 1441 | const SharedLibrary& lib = info.GetEntry(i); |
michael@0 | 1442 | |
michael@0 | 1443 | #if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) |
michael@0 | 1444 | // We're using faulty.lib. Use a special-case object mapper. |
michael@0 | 1445 | AutoObjectMapperFaultyLib mapper(aLUL->mLog); |
michael@0 | 1446 | #else |
michael@0 | 1447 | // We can use the standard POSIX-based mapper. |
michael@0 | 1448 | AutoObjectMapperPOSIX mapper(aLUL->mLog); |
michael@0 | 1449 | #endif |
michael@0 | 1450 | |
michael@0 | 1451 | // Ask |mapper| to map the object. Then hand its mapped address |
michael@0 | 1452 | // to NotifyAfterMap(). |
michael@0 | 1453 | void* image = nullptr; |
michael@0 | 1454 | size_t size = 0; |
michael@0 | 1455 | bool ok = mapper.Map(&image, &size, lib.GetName()); |
michael@0 | 1456 | if (ok && image && size > 0) { |
michael@0 | 1457 | aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd()-lib.GetStart(), |
michael@0 | 1458 | lib.GetName().c_str(), image); |
michael@0 | 1459 | } else if (!ok && lib.GetName() == "") { |
michael@0 | 1460 | // The object has no name and (as a consequence) the mapper |
michael@0 | 1461 | // failed to map it. This happens on Linux, where |
michael@0 | 1462 | // GetInfoForSelf() produces two such mappings: one for the |
michael@0 | 1463 | // executable and one for the VDSO. The executable one isn't a |
michael@0 | 1464 | // big deal since there's not much interesting code in there, |
michael@0 | 1465 | // but the VDSO one is a problem on x86-{linux,android} because |
michael@0 | 1466 | // lack of knowledge about the mapped area inhibits LUL's |
michael@0 | 1467 | // special __kernel_syscall handling. Hence notify |aLUL| at |
michael@0 | 1468 | // least of the mapping, even though it can't read any unwind |
michael@0 | 1469 | // information for the area. |
michael@0 | 1470 | aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd()-lib.GetStart()); |
michael@0 | 1471 | } |
michael@0 | 1472 | |
michael@0 | 1473 | // |mapper| goes out of scope at this point and so its destructor |
michael@0 | 1474 | // unmaps the object. |
michael@0 | 1475 | } |
michael@0 | 1476 | |
michael@0 | 1477 | # else |
michael@0 | 1478 | # error "Unknown platform" |
michael@0 | 1479 | # endif |
michael@0 | 1480 | } |
michael@0 | 1481 | |
michael@0 | 1482 | // LUL needs a callback for its logging sink. |
michael@0 | 1483 | static void |
michael@0 | 1484 | logging_sink_for_LUL(const char* str) { |
michael@0 | 1485 | // Ignore any trailing \n, since LOG will add one anyway. |
michael@0 | 1486 | size_t n = strlen(str); |
michael@0 | 1487 | if (n > 0 && str[n-1] == '\n') { |
michael@0 | 1488 | char* tmp = strdup(str); |
michael@0 | 1489 | tmp[n-1] = 0; |
michael@0 | 1490 | LOG(tmp); |
michael@0 | 1491 | free(tmp); |
michael@0 | 1492 | } else { |
michael@0 | 1493 | LOG(str); |
michael@0 | 1494 | } |
michael@0 | 1495 | } |
michael@0 | 1496 | |
michael@0 | 1497 | // Runs in the unwinder thread -- well, this _is_ the unwinder thread. |
michael@0 | 1498 | static void* unwind_thr_fn(void* exit_nowV) |
michael@0 | 1499 | { |
michael@0 | 1500 | // This is the unwinder thread function. The first thread in must |
michael@0 | 1501 | // create the unwinder library and request it to read the debug |
michael@0 | 1502 | // info. The last thread out must deallocate the library. These |
michael@0 | 1503 | // three tasks (create library, read debuginfo, destroy library) are |
michael@0 | 1504 | // sequentialised by |sLULmutex|. |sLUL| and |sLULcount| may only |
michael@0 | 1505 | // be modified whilst |sLULmutex| is held. |
michael@0 | 1506 | // |
michael@0 | 1507 | // Once the threads are up and running, |sLUL| (the pointer itself, |
michael@0 | 1508 | // that is) stays constant, and the multiple threads may make |
michael@0 | 1509 | // concurrent calls into |sLUL| to do concurrent unwinding. |
michael@0 | 1510 | LOG("unwind_thr_fn: START"); |
michael@0 | 1511 | |
michael@0 | 1512 | // A hook for testing LUL: at the first entrance here, check env var |
michael@0 | 1513 | // MOZ_PROFILER_LUL_TEST, and if set, run tests on LUL. Note that |
michael@0 | 1514 | // it is preferable to run the LUL tests via gtest, but gtest is not |
michael@0 | 1515 | // currently supported on all targets that LUL runs on. Hence the |
michael@0 | 1516 | // auxiliary mechanism here is also needed. |
michael@0 | 1517 | bool doLulTest = false; |
michael@0 | 1518 | |
michael@0 | 1519 | mozilla::DebugOnly<int> r = pthread_mutex_lock(&sLULmutex); |
michael@0 | 1520 | MOZ_ASSERT(!r); |
michael@0 | 1521 | |
michael@0 | 1522 | if (!sLUL) { |
michael@0 | 1523 | // sLUL hasn't been allocated, so we must be the first thread in. |
michael@0 | 1524 | sLUL = new lul::LUL(logging_sink_for_LUL); |
michael@0 | 1525 | MOZ_ASSERT(sLUL); |
michael@0 | 1526 | MOZ_ASSERT(sLULcount == 0); |
michael@0 | 1527 | // Register this thread so it can read unwind info and do unwinding. |
michael@0 | 1528 | sLUL->RegisterUnwinderThread(); |
michael@0 | 1529 | // Read all the unwind info currently available. |
michael@0 | 1530 | read_procmaps(sLUL); |
michael@0 | 1531 | // Has a test been requested? |
michael@0 | 1532 | if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) { |
michael@0 | 1533 | doLulTest = true; |
michael@0 | 1534 | } |
michael@0 | 1535 | } else { |
michael@0 | 1536 | // sLUL has already been allocated, so we can't be the first |
michael@0 | 1537 | // thread in. |
michael@0 | 1538 | MOZ_ASSERT(sLULcount > 0); |
michael@0 | 1539 | // Register this thread so it can do unwinding. |
michael@0 | 1540 | sLUL->RegisterUnwinderThread(); |
michael@0 | 1541 | } |
michael@0 | 1542 | |
michael@0 | 1543 | sLULcount++; |
michael@0 | 1544 | |
michael@0 | 1545 | r = pthread_mutex_unlock(&sLULmutex); |
michael@0 | 1546 | MOZ_ASSERT(!r); |
michael@0 | 1547 | |
michael@0 | 1548 | // If a test has been requested for LUL, run it. Summary results |
michael@0 | 1549 | // are sent to sLUL's logging sink. Note that this happens after |
michael@0 | 1550 | // read_procmaps has read unwind information into sLUL, so that the |
michael@0 | 1551 | // tests have something to unwind against. Without that they'd be |
michael@0 | 1552 | // pretty meaningless. |
michael@0 | 1553 | if (doLulTest) { |
michael@0 | 1554 | int nTests = 0, nTestsPassed = 0; |
michael@0 | 1555 | RunLulUnitTests(&nTests, &nTestsPassed, sLUL); |
michael@0 | 1556 | } |
michael@0 | 1557 | |
michael@0 | 1558 | // At this point, sLUL -- the single instance of the library -- is |
michael@0 | 1559 | // allocated and has read the required unwind info. All running |
michael@0 | 1560 | // threads can now make Unwind() requests of it concurrently, if |
michael@0 | 1561 | // they wish. |
michael@0 | 1562 | |
michael@0 | 1563 | // Now go on to allocate the array of buffers used for communication |
michael@0 | 1564 | // between the sampling threads and the unwinder threads. |
michael@0 | 1565 | |
michael@0 | 1566 | // If we're the first thread in, we'll need to allocate the buffer |
michael@0 | 1567 | // array g_buffers plus the Buffer structs that it points at. */ |
michael@0 | 1568 | spinLock_acquire(&g_spinLock); |
michael@0 | 1569 | if (g_buffers == nullptr) { |
michael@0 | 1570 | // Drop the lock, make a complete copy in memory, reacquire the |
michael@0 | 1571 | // lock, and try to install it -- which might fail, if someone |
michael@0 | 1572 | // else beat us to it. */ |
michael@0 | 1573 | spinLock_release(&g_spinLock); |
michael@0 | 1574 | UnwinderThreadBuffer** buffers |
michael@0 | 1575 | = (UnwinderThreadBuffer**)malloc(N_UNW_THR_BUFFERS |
michael@0 | 1576 | * sizeof(UnwinderThreadBuffer*)); |
michael@0 | 1577 | MOZ_ASSERT(buffers); |
michael@0 | 1578 | int i; |
michael@0 | 1579 | for (i = 0; i < N_UNW_THR_BUFFERS; i++) { |
michael@0 | 1580 | /* These calloc-ations are shared between the sampling and |
michael@0 | 1581 | unwinding threads. They must be free after all such threads |
michael@0 | 1582 | have terminated. */ |
michael@0 | 1583 | buffers[i] = (UnwinderThreadBuffer*) |
michael@0 | 1584 | calloc(sizeof(UnwinderThreadBuffer), 1); |
michael@0 | 1585 | MOZ_ASSERT(buffers[i]); |
michael@0 | 1586 | buffers[i]->state = S_EMPTY; |
michael@0 | 1587 | } |
michael@0 | 1588 | /* Try to install it */ |
michael@0 | 1589 | spinLock_acquire(&g_spinLock); |
michael@0 | 1590 | if (g_buffers == nullptr) { |
michael@0 | 1591 | g_buffers = buffers; |
michael@0 | 1592 | spinLock_release(&g_spinLock); |
michael@0 | 1593 | } else { |
michael@0 | 1594 | /* Someone else beat us to it. Release what we just allocated |
michael@0 | 1595 | so as to avoid a leak. */ |
michael@0 | 1596 | spinLock_release(&g_spinLock); |
michael@0 | 1597 | for (i = 0; i < N_UNW_THR_BUFFERS; i++) { |
michael@0 | 1598 | free(buffers[i]); |
michael@0 | 1599 | } |
michael@0 | 1600 | free(buffers); |
michael@0 | 1601 | } |
michael@0 | 1602 | } else { |
michael@0 | 1603 | /* They are already allocated, so just drop the lock and continue. */ |
michael@0 | 1604 | spinLock_release(&g_spinLock); |
michael@0 | 1605 | } |
michael@0 | 1606 | |
michael@0 | 1607 | /* |
michael@0 | 1608 | while (1) { |
michael@0 | 1609 | acq lock |
michael@0 | 1610 | scan to find oldest full |
michael@0 | 1611 | if none { rel lock; sleep; continue } |
michael@0 | 1612 | set buff state to emptying |
michael@0 | 1613 | rel lock |
michael@0 | 1614 | acq MLock // implicitly |
michael@0 | 1615 | process buffer |
michael@0 | 1616 | rel MLock // implicitly |
michael@0 | 1617 | acq lock |
michael@0 | 1618 | set buff state to S_EMPTY |
michael@0 | 1619 | rel lock |
michael@0 | 1620 | } |
michael@0 | 1621 | */ |
michael@0 | 1622 | int* exit_now = (int*)exit_nowV; |
michael@0 | 1623 | int ms_to_sleep_if_empty = 1; |
michael@0 | 1624 | |
michael@0 | 1625 | const int longest_sleep_ms = 1000; |
michael@0 | 1626 | bool show_sleep_message = true; |
michael@0 | 1627 | |
michael@0 | 1628 | while (1) { |
michael@0 | 1629 | |
michael@0 | 1630 | if (*exit_now != 0) { |
michael@0 | 1631 | *exit_now = 0; |
michael@0 | 1632 | break; |
michael@0 | 1633 | } |
michael@0 | 1634 | |
michael@0 | 1635 | spinLock_acquire(&g_spinLock); |
michael@0 | 1636 | |
michael@0 | 1637 | /* Find the oldest filled buffer, if any. */ |
michael@0 | 1638 | uint64_t oldest_seqNo = ~0ULL; /* infinity */ |
michael@0 | 1639 | int oldest_ix = -1; |
michael@0 | 1640 | int i; |
michael@0 | 1641 | for (i = 0; i < N_UNW_THR_BUFFERS; i++) { |
michael@0 | 1642 | UnwinderThreadBuffer* buff = g_buffers[i]; |
michael@0 | 1643 | if (buff->state != S_FULL) continue; |
michael@0 | 1644 | if (buff->seqNo < oldest_seqNo) { |
michael@0 | 1645 | oldest_seqNo = buff->seqNo; |
michael@0 | 1646 | oldest_ix = i; |
michael@0 | 1647 | } |
michael@0 | 1648 | } |
michael@0 | 1649 | if (oldest_ix == -1) { |
michael@0 | 1650 | /* We didn't find a full buffer. Snooze and try again later. */ |
michael@0 | 1651 | MOZ_ASSERT(oldest_seqNo == ~0ULL); |
michael@0 | 1652 | spinLock_release(&g_spinLock); |
michael@0 | 1653 | if (ms_to_sleep_if_empty > 100 && LOGLEVEL >= 2) { |
michael@0 | 1654 | if (show_sleep_message) |
michael@0 | 1655 | LOGF("BPUnw: unwinder: sleep for %d ms", ms_to_sleep_if_empty); |
michael@0 | 1656 | /* If we've already shown the message for the longest sleep, |
michael@0 | 1657 | don't show it again, until the next round of sleeping |
michael@0 | 1658 | starts. */ |
michael@0 | 1659 | if (ms_to_sleep_if_empty == longest_sleep_ms) |
michael@0 | 1660 | show_sleep_message = false; |
michael@0 | 1661 | } |
michael@0 | 1662 | sleep_ms(ms_to_sleep_if_empty); |
michael@0 | 1663 | if (ms_to_sleep_if_empty < 20) { |
michael@0 | 1664 | ms_to_sleep_if_empty += 2; |
michael@0 | 1665 | } else { |
michael@0 | 1666 | ms_to_sleep_if_empty = (15 * ms_to_sleep_if_empty) / 10; |
michael@0 | 1667 | if (ms_to_sleep_if_empty > longest_sleep_ms) |
michael@0 | 1668 | ms_to_sleep_if_empty = longest_sleep_ms; |
michael@0 | 1669 | } |
michael@0 | 1670 | continue; |
michael@0 | 1671 | } |
michael@0 | 1672 | |
michael@0 | 1673 | /* We found a full a buffer. Mark it as 'ours' and drop the |
michael@0 | 1674 | lock; then we can safely throw breakpad at it. */ |
michael@0 | 1675 | UnwinderThreadBuffer* buff = g_buffers[oldest_ix]; |
michael@0 | 1676 | MOZ_ASSERT(buff->state == S_FULL); |
michael@0 | 1677 | buff->state = S_EMPTYING; |
michael@0 | 1678 | spinLock_release(&g_spinLock); |
michael@0 | 1679 | |
michael@0 | 1680 | /* unwind .. in which we can do anything we like, since any |
michael@0 | 1681 | resource stalls that we may encounter (eg malloc locks) in |
michael@0 | 1682 | competition with signal handler instances, will be short |
michael@0 | 1683 | lived since the signal handler is guaranteed nonblocking. */ |
michael@0 | 1684 | if (0) LOGF("BPUnw: unwinder: seqNo %llu: emptying buf %d\n", |
michael@0 | 1685 | (unsigned long long int)oldest_seqNo, oldest_ix); |
michael@0 | 1686 | |
michael@0 | 1687 | process_buffer(buff, oldest_ix); |
michael@0 | 1688 | |
michael@0 | 1689 | /* And .. we're done. Mark the buffer as empty so it can be |
michael@0 | 1690 | reused. First though, unmap any of the entsPages that got |
michael@0 | 1691 | mapped during filling. */ |
michael@0 | 1692 | for (i = 0; i < N_PROF_ENT_PAGES; i++) { |
michael@0 | 1693 | if (buff->entsPages[i] == ProfEntsPage_INVALID) |
michael@0 | 1694 | continue; |
michael@0 | 1695 | munmap_ProfEntsPage(buff->entsPages[i]); |
michael@0 | 1696 | buff->entsPages[i] = ProfEntsPage_INVALID; |
michael@0 | 1697 | } |
michael@0 | 1698 | |
michael@0 | 1699 | (void)VALGRIND_MAKE_MEM_UNDEFINED(&buff->stackImg.mContents[0], |
michael@0 | 1700 | lul::N_STACK_BYTES); |
michael@0 | 1701 | spinLock_acquire(&g_spinLock); |
michael@0 | 1702 | MOZ_ASSERT(buff->state == S_EMPTYING); |
michael@0 | 1703 | buff->state = S_EMPTY; |
michael@0 | 1704 | spinLock_release(&g_spinLock); |
michael@0 | 1705 | ms_to_sleep_if_empty = 1; |
michael@0 | 1706 | show_sleep_message = true; |
michael@0 | 1707 | } |
michael@0 | 1708 | |
michael@0 | 1709 | // This unwinder thread is exiting. If it's the last one out, |
michael@0 | 1710 | // shut down and deallocate the unwinder library. |
michael@0 | 1711 | r = pthread_mutex_lock(&sLULmutex); |
michael@0 | 1712 | MOZ_ASSERT(!r); |
michael@0 | 1713 | |
michael@0 | 1714 | MOZ_ASSERT(sLULcount > 0); |
michael@0 | 1715 | if (sLULcount == 1) { |
michael@0 | 1716 | // Tell the library to discard unwind info for the entire address |
michael@0 | 1717 | // space. |
michael@0 | 1718 | sLUL->NotifyBeforeUnmapAll(); |
michael@0 | 1719 | |
michael@0 | 1720 | delete sLUL; |
michael@0 | 1721 | sLUL = nullptr; |
michael@0 | 1722 | } |
michael@0 | 1723 | |
michael@0 | 1724 | sLULcount--; |
michael@0 | 1725 | |
michael@0 | 1726 | r = pthread_mutex_unlock(&sLULmutex); |
michael@0 | 1727 | MOZ_ASSERT(!r); |
michael@0 | 1728 | |
michael@0 | 1729 | LOG("unwind_thr_fn: STOP"); |
michael@0 | 1730 | return nullptr; |
michael@0 | 1731 | } |
michael@0 | 1732 | |
michael@0 | 1733 | static void finish_sync_buffer(ThreadProfile* profile, |
michael@0 | 1734 | UnwinderThreadBuffer* buff, |
michael@0 | 1735 | void* /* ucontext_t*, really */ ucV) |
michael@0 | 1736 | { |
michael@0 | 1737 | SyncProfile* syncProfile = profile->AsSyncProfile(); |
michael@0 | 1738 | MOZ_ASSERT(syncProfile); |
michael@0 | 1739 | SyncUnwinderThreadBuffer* utb = static_cast<SyncUnwinderThreadBuffer*>( |
michael@0 | 1740 | syncProfile->GetUWTBuffer()); |
michael@0 | 1741 | fill_buffer(profile, utb->GetBuffer(), ucV); |
michael@0 | 1742 | utb->GetBuffer()->state = S_FULL; |
michael@0 | 1743 | PseudoStack* stack = profile->GetPseudoStack(); |
michael@0 | 1744 | stack->addLinkedUWTBuffer(utb); |
michael@0 | 1745 | } |
michael@0 | 1746 | |
michael@0 | 1747 | static void release_sync_buffer(LinkedUWTBuffer* buff) |
michael@0 | 1748 | { |
michael@0 | 1749 | SyncUnwinderThreadBuffer* data = static_cast<SyncUnwinderThreadBuffer*>(buff); |
michael@0 | 1750 | MOZ_ASSERT(data->GetBuffer()->state == S_EMPTY); |
michael@0 | 1751 | delete data; |
michael@0 | 1752 | } |
michael@0 | 1753 | |
michael@0 | 1754 | //////////////////////////////////////////////////////////////// |
michael@0 | 1755 | //////////////////////////////////////////////////////////////// |
michael@0 | 1756 | //////////////////////////////////////////////////////////////// |
michael@0 | 1757 | //////////////////////////////////////////////////////////////// |
michael@0 | 1758 | //////////////////////////////////////////////////////////////// |
michael@0 | 1759 | //////////////////////////////////////////////////////////////// |
michael@0 | 1760 | |
michael@0 | 1761 | // Keeps count of how frames are recovered, which is useful for |
michael@0 | 1762 | // diagnostic purposes. |
michael@0 | 1763 | static void stats_notify_frame(int n_context, int n_cfi, int n_scanned) |
michael@0 | 1764 | { |
michael@0 | 1765 | // Gather stats in intervals. |
michael@0 | 1766 | static unsigned int nf_total = 0; // total frames since last printout |
michael@0 | 1767 | static unsigned int nf_CONTEXT = 0; |
michael@0 | 1768 | static unsigned int nf_CFI = 0; |
michael@0 | 1769 | static unsigned int nf_SCANNED = 0; |
michael@0 | 1770 | |
michael@0 | 1771 | nf_CONTEXT += n_context; |
michael@0 | 1772 | nf_CFI += n_cfi; |
michael@0 | 1773 | nf_SCANNED += n_scanned; |
michael@0 | 1774 | nf_total += (n_context + n_cfi + n_scanned); |
michael@0 | 1775 | |
michael@0 | 1776 | if (nf_total >= 5000) { |
michael@0 | 1777 | LOGF("BPUnw frame stats: TOTAL %5u" |
michael@0 | 1778 | " CTX %4u CFI %4u SCAN %4u", |
michael@0 | 1779 | nf_total, nf_CONTEXT, nf_CFI, nf_SCANNED); |
michael@0 | 1780 | nf_total = 0; |
michael@0 | 1781 | nf_CONTEXT = 0; |
michael@0 | 1782 | nf_CFI = 0; |
michael@0 | 1783 | nf_SCANNED = 0; |
michael@0 | 1784 | } |
michael@0 | 1785 | } |
michael@0 | 1786 | |
michael@0 | 1787 | static |
michael@0 | 1788 | void do_lul_unwind_Buffer(/*OUT*/PCandSP** pairs, |
michael@0 | 1789 | /*OUT*/unsigned int* nPairs, |
michael@0 | 1790 | UnwinderThreadBuffer* buff, |
michael@0 | 1791 | int buffNo /* for debug printing only */) |
michael@0 | 1792 | { |
michael@0 | 1793 | # if defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86) |
michael@0 | 1794 | lul::UnwindRegs startRegs = buff->startRegs; |
michael@0 | 1795 | if (0) { |
michael@0 | 1796 | LOGF("Initial RIP = 0x%llx", (unsigned long long int)startRegs.xip.Value()); |
michael@0 | 1797 | LOGF("Initial RSP = 0x%llx", (unsigned long long int)startRegs.xsp.Value()); |
michael@0 | 1798 | LOGF("Initial RBP = 0x%llx", (unsigned long long int)startRegs.xbp.Value()); |
michael@0 | 1799 | } |
michael@0 | 1800 | |
michael@0 | 1801 | # elif defined(SPS_ARCH_arm) |
michael@0 | 1802 | lul::UnwindRegs startRegs = buff->startRegs; |
michael@0 | 1803 | if (0) { |
michael@0 | 1804 | LOGF("Initial R15 = 0x%llx", (unsigned long long int)startRegs.r15.Value()); |
michael@0 | 1805 | LOGF("Initial R13 = 0x%llx", (unsigned long long int)startRegs.r13.Value()); |
michael@0 | 1806 | } |
michael@0 | 1807 | |
michael@0 | 1808 | # else |
michael@0 | 1809 | # error "Unknown plat" |
michael@0 | 1810 | # endif |
michael@0 | 1811 | |
michael@0 | 1812 | // FIXME: should we reinstate the ability to use separate debug objects? |
michael@0 | 1813 | // /* Make up a list of places where the debug objects might be. */ |
michael@0 | 1814 | // std::vector<std::string> debug_dirs; |
michael@0 | 1815 | # if defined(SPS_OS_linux) |
michael@0 | 1816 | // debug_dirs.push_back("/usr/lib/debug/lib"); |
michael@0 | 1817 | // debug_dirs.push_back("/usr/lib/debug/usr/lib"); |
michael@0 | 1818 | // debug_dirs.push_back("/usr/lib/debug/lib/x86_64-linux-gnu"); |
michael@0 | 1819 | // debug_dirs.push_back("/usr/lib/debug/usr/lib/x86_64-linux-gnu"); |
michael@0 | 1820 | # elif defined(SPS_OS_android) |
michael@0 | 1821 | // debug_dirs.push_back("/sdcard/symbols/system/lib"); |
michael@0 | 1822 | // debug_dirs.push_back("/sdcard/symbols/system/bin"); |
michael@0 | 1823 | # elif defined(SPS_OS_darwin) |
michael@0 | 1824 | // /* Nothing */ |
michael@0 | 1825 | # else |
michael@0 | 1826 | # error "Unknown plat" |
michael@0 | 1827 | # endif |
michael@0 | 1828 | |
michael@0 | 1829 | // Set the max number of scanned or otherwise dubious frames |
michael@0 | 1830 | // to the user specified limit |
michael@0 | 1831 | size_t scannedFramesAllowed |
michael@0 | 1832 | = std::min(std::max(0, sUnwindStackScan), MAX_NATIVE_FRAMES); |
michael@0 | 1833 | |
michael@0 | 1834 | // The max number of frames is MAX_NATIVE_FRAMES, so as to avoid |
michael@0 | 1835 | // the unwinder wasting a lot of time looping on corrupted stacks. |
michael@0 | 1836 | uintptr_t framePCs[MAX_NATIVE_FRAMES]; |
michael@0 | 1837 | uintptr_t frameSPs[MAX_NATIVE_FRAMES]; |
michael@0 | 1838 | size_t framesAvail = mozilla::ArrayLength(framePCs); |
michael@0 | 1839 | size_t framesUsed = 0; |
michael@0 | 1840 | size_t scannedFramesAcquired = 0; |
michael@0 | 1841 | sLUL->Unwind( &framePCs[0], &frameSPs[0], |
michael@0 | 1842 | &framesUsed, &scannedFramesAcquired, |
michael@0 | 1843 | framesAvail, scannedFramesAllowed, |
michael@0 | 1844 | &startRegs, &buff->stackImg ); |
michael@0 | 1845 | |
michael@0 | 1846 | if (LOGLEVEL >= 2) |
michael@0 | 1847 | stats_notify_frame(/* context */ 1, |
michael@0 | 1848 | /* cfi */ framesUsed - 1 - scannedFramesAcquired, |
michael@0 | 1849 | /* scanned */ scannedFramesAcquired); |
michael@0 | 1850 | |
michael@0 | 1851 | // PC values are now in framePCs[0 .. framesUsed-1], with [0] being |
michael@0 | 1852 | // the innermost frame. SP values are likewise in frameSPs[]. |
michael@0 | 1853 | *pairs = (PCandSP*)calloc(framesUsed, sizeof(PCandSP)); |
michael@0 | 1854 | *nPairs = framesUsed; |
michael@0 | 1855 | if (*pairs == nullptr) { |
michael@0 | 1856 | *nPairs = 0; |
michael@0 | 1857 | return; |
michael@0 | 1858 | } |
michael@0 | 1859 | |
michael@0 | 1860 | if (framesUsed > 0) { |
michael@0 | 1861 | for (unsigned int frame_index = 0; |
michael@0 | 1862 | frame_index < framesUsed; ++frame_index) { |
michael@0 | 1863 | (*pairs)[framesUsed-1-frame_index].pc = framePCs[frame_index]; |
michael@0 | 1864 | (*pairs)[framesUsed-1-frame_index].sp = frameSPs[frame_index]; |
michael@0 | 1865 | } |
michael@0 | 1866 | } |
michael@0 | 1867 | |
michael@0 | 1868 | if (LOGLEVEL >= 3) { |
michael@0 | 1869 | LOGF("BPUnw: unwinder: seqNo %llu, buf %d: got %u frames", |
michael@0 | 1870 | (unsigned long long int)buff->seqNo, buffNo, |
michael@0 | 1871 | (unsigned int)framesUsed); |
michael@0 | 1872 | } |
michael@0 | 1873 | |
michael@0 | 1874 | if (LOGLEVEL >= 2) { |
michael@0 | 1875 | if (0 == (g_stats_totalSamples % 1000)) |
michael@0 | 1876 | LOGF("BPUnw: %llu total samples, %llu failed (buffer unavail), " |
michael@0 | 1877 | "%llu failed (thread unreg'd), ", |
michael@0 | 1878 | (unsigned long long int)g_stats_totalSamples, |
michael@0 | 1879 | (unsigned long long int)g_stats_noBuffAvail, |
michael@0 | 1880 | (unsigned long long int)g_stats_thrUnregd); |
michael@0 | 1881 | } |
michael@0 | 1882 | } |
michael@0 | 1883 | |
michael@0 | 1884 | #endif /* defined(SPS_OS_windows) */ |