|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include <stdio.h> |
|
7 #include <signal.h> |
|
8 #include <string.h> |
|
9 #include <stdlib.h> |
|
10 #include <time.h> |
|
11 |
|
12 #ifdef MOZ_VALGRIND |
|
13 # include <valgrind/helgrind.h> |
|
14 # include <valgrind/memcheck.h> |
|
15 #else |
|
16 # define VALGRIND_HG_MUTEX_LOCK_PRE(_mx,_istry) /* */ |
|
17 # define VALGRIND_HG_MUTEX_LOCK_POST(_mx) /* */ |
|
18 # define VALGRIND_HG_MUTEX_UNLOCK_PRE(_mx) /* */ |
|
19 # define VALGRIND_HG_MUTEX_UNLOCK_POST(_mx) /* */ |
|
20 # define VALGRIND_MAKE_MEM_DEFINED(_addr,_len) ((void)0) |
|
21 # define VALGRIND_MAKE_MEM_UNDEFINED(_addr,_len) ((void)0) |
|
22 #endif |
|
23 |
|
24 #include "prenv.h" |
|
25 #include "mozilla/arm.h" |
|
26 #include "mozilla/DebugOnly.h" |
|
27 #include <stdint.h> |
|
28 #include "PlatformMacros.h" |
|
29 |
|
30 #include "platform.h" |
|
31 #include <ostream> |
|
32 #include <string> |
|
33 |
|
34 #include "ProfileEntry.h" |
|
35 #include "SyncProfile.h" |
|
36 #include "AutoObjectMapper.h" |
|
37 #include "UnwinderThread2.h" |
|
38 |
|
39 #if !defined(SPS_OS_windows) |
|
40 # include <sys/mman.h> |
|
41 #endif |
|
42 |
|
43 #if defined(SPS_OS_android) || defined(SPS_OS_linux) |
|
44 # include <ucontext.h> |
|
45 # include "LulMain.h" |
|
46 #endif |
|
47 |
|
48 #include "shared-libraries.h" |
|
49 |
|
50 |
|
51 // Verbosity of this module, for debugging: |
|
52 // 0 silent |
|
53 // 1 adds info about debuginfo load success/failure |
|
54 // 2 adds slow-summary stats for buffer fills/misses (RECOMMENDED) |
|
55 // 3 adds per-sample summary lines |
|
56 // 4 adds per-sample frame listing |
|
57 // Note that level 3 and above produces risk of deadlock, and |
|
58 // are not recommended for extended use. |
|
59 #define LOGLEVEL 2 |
|
60 |
|
61 // The maximum number of frames that the native unwinder will |
|
62 // produce. Setting it too high gives a risk of it wasting a |
|
63 // lot of time looping on corrupted stacks. |
|
64 #define MAX_NATIVE_FRAMES 256 |
|
65 |
|
66 |
|
67 // The 'else' of this covers the entire rest of the file |
|
68 #if defined(SPS_OS_windows) || defined(SPS_OS_darwin) |
|
69 |
|
70 ////////////////////////////////////////////////////////// |
|
71 //// BEGIN externally visible functions (WINDOWS and OSX STUBS) |
|
72 |
|
73 // On Windows and OSX this will all need reworking. |
|
74 // GeckoProfilerImpl.h will ensure these functions are never actually |
|
75 // called, so just provide no-op stubs for now. |
|
76 |
|
77 void uwt__init() |
|
78 { |
|
79 } |
|
80 |
|
81 void uwt__stop() |
|
82 { |
|
83 } |
|
84 |
|
85 void uwt__deinit() |
|
86 { |
|
87 } |
|
88 |
|
89 void uwt__register_thread_for_profiling ( void* stackTop ) |
|
90 { |
|
91 } |
|
92 |
|
93 void uwt__unregister_thread_for_profiling() |
|
94 { |
|
95 } |
|
96 |
|
97 LinkedUWTBuffer* utb__acquire_sync_buffer(void* stackTop) |
|
98 { |
|
99 return nullptr; |
|
100 } |
|
101 |
|
102 // RUNS IN SIGHANDLER CONTEXT |
|
103 UnwinderThreadBuffer* uwt__acquire_empty_buffer() |
|
104 { |
|
105 return nullptr; |
|
106 } |
|
107 |
|
108 void |
|
109 utb__finish_sync_buffer(ThreadProfile* aProfile, |
|
110 UnwinderThreadBuffer* utb, |
|
111 void* /* ucontext_t*, really */ ucV) |
|
112 { |
|
113 } |
|
114 |
|
115 void |
|
116 utb__release_sync_buffer(LinkedUWTBuffer* utb) |
|
117 { |
|
118 } |
|
119 |
|
120 // RUNS IN SIGHANDLER CONTEXT |
|
121 void |
|
122 uwt__release_full_buffer(ThreadProfile* aProfile, |
|
123 UnwinderThreadBuffer* utb, |
|
124 void* /* ucontext_t*, really */ ucV ) |
|
125 { |
|
126 } |
|
127 |
|
128 // RUNS IN SIGHANDLER CONTEXT |
|
129 void |
|
130 utb__addEntry(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent) |
|
131 { |
|
132 } |
|
133 |
|
134 //// END externally visible functions (WINDOWS and OSX STUBS) |
|
135 ////////////////////////////////////////////////////////// |
|
136 |
|
137 #else // a supported target |
|
138 |
|
139 ////////////////////////////////////////////////////////// |
|
140 //// BEGIN externally visible functions |
|
141 |
|
142 // Forward references |
|
143 // the unwinder thread ID, its fn, and a stop-now flag |
|
144 static void* unwind_thr_fn ( void* exit_nowV ); |
|
145 static pthread_t unwind_thr; |
|
146 static int unwind_thr_exit_now = 0; // RACED ON |
|
147 |
|
148 // Threads must be registered with this file before they can be |
|
149 // sampled. So that we know the max safe stack address for each |
|
150 // registered thread. |
|
151 static void thread_register_for_profiling ( void* stackTop ); |
|
152 |
|
153 // Unregister a thread. |
|
154 static void thread_unregister_for_profiling(); |
|
155 |
|
156 // Empties out the buffer queue. Used when the unwinder thread is |
|
157 // shut down. |
|
158 static void empty_buffer_queue(); |
|
159 |
|
160 // Allocate a buffer for synchronous unwinding |
|
161 static LinkedUWTBuffer* acquire_sync_buffer(void* stackTop); |
|
162 |
|
163 // RUNS IN SIGHANDLER CONTEXT |
|
164 // Acquire an empty buffer and mark it as FILLING |
|
165 static UnwinderThreadBuffer* acquire_empty_buffer(); |
|
166 |
|
167 static void finish_sync_buffer(ThreadProfile* aProfile, |
|
168 UnwinderThreadBuffer* utb, |
|
169 void* /* ucontext_t*, really */ ucV); |
|
170 |
|
171 // Release an empty synchronous unwind buffer. |
|
172 static void release_sync_buffer(LinkedUWTBuffer* utb); |
|
173 |
|
174 // RUNS IN SIGHANDLER CONTEXT |
|
175 // Put this buffer in the queue of stuff going to the unwinder |
|
176 // thread, and mark it as FULL. Before doing that, fill in stack |
|
177 // chunk and register fields if a native unwind is requested. |
|
178 // APROFILE is where the profile data should be added to. UTB |
|
179 // is the partially-filled-in buffer, containing ProfileEntries. |
|
180 // UCV is the ucontext_t* from the signal handler. If non-nullptr, |
|
181 // is taken as a cue to request native unwind. |
|
182 static void release_full_buffer(ThreadProfile* aProfile, |
|
183 UnwinderThreadBuffer* utb, |
|
184 void* /* ucontext_t*, really */ ucV ); |
|
185 |
|
186 // RUNS IN SIGHANDLER CONTEXT |
|
187 static void utb_add_prof_ent(UnwinderThreadBuffer* utb, ProfileEntry ent); |
|
188 |
|
189 // Do a store memory barrier. |
|
190 static void do_MBAR(); |
|
191 |
|
192 |
|
193 // This is the single instance of the LUL unwind library that we will |
|
194 // use. Currently the library is operated with multiple sampling |
|
195 // threads but only one unwinder thread. It should also be possible |
|
196 // to use the library with multiple unwinder threads, to improve |
|
197 // throughput. The setup here makes it possible to use multiple |
|
198 // unwinder threads, although that is as-yet untested. |
|
199 // |
|
200 // |sLULmutex| protects |sLUL| and |sLULcount| and also is used to |
|
201 // ensure that only the first unwinder thread requests |sLUL| to read |
|
202 // debug info. |sLUL| may only be assigned to (and the object it |
|
203 // points at may only be created/destroyed) when |sLULcount| is zero. |
|
204 // |sLULcount| holds the number of unwinder threads currently in |
|
205 // existence. |
|
206 static pthread_mutex_t sLULmutex = PTHREAD_MUTEX_INITIALIZER; |
|
207 static lul::LUL* sLUL = nullptr; |
|
208 static int sLULcount = 0; |
|
209 |
|
210 |
|
211 void uwt__init() |
|
212 { |
|
213 // Create the unwinder thread. |
|
214 MOZ_ASSERT(unwind_thr_exit_now == 0); |
|
215 int r = pthread_create( &unwind_thr, nullptr, |
|
216 unwind_thr_fn, (void*)&unwind_thr_exit_now ); |
|
217 MOZ_ALWAYS_TRUE(r == 0); |
|
218 } |
|
219 |
|
220 void uwt__stop() |
|
221 { |
|
222 // Shut down the unwinder thread. |
|
223 MOZ_ASSERT(unwind_thr_exit_now == 0); |
|
224 unwind_thr_exit_now = 1; |
|
225 do_MBAR(); |
|
226 int r = pthread_join(unwind_thr, nullptr); |
|
227 MOZ_ALWAYS_TRUE(r == 0); |
|
228 } |
|
229 |
|
230 void uwt__deinit() |
|
231 { |
|
232 empty_buffer_queue(); |
|
233 } |
|
234 |
|
235 void uwt__register_thread_for_profiling(void* stackTop) |
|
236 { |
|
237 thread_register_for_profiling(stackTop); |
|
238 } |
|
239 |
|
240 void uwt__unregister_thread_for_profiling() |
|
241 { |
|
242 thread_unregister_for_profiling(); |
|
243 } |
|
244 |
|
245 LinkedUWTBuffer* utb__acquire_sync_buffer(void* stackTop) |
|
246 { |
|
247 return acquire_sync_buffer(stackTop); |
|
248 } |
|
249 |
|
250 void utb__finish_sync_buffer(ThreadProfile* profile, |
|
251 UnwinderThreadBuffer* buff, |
|
252 void* /* ucontext_t*, really */ ucV) |
|
253 { |
|
254 finish_sync_buffer(profile, buff, ucV); |
|
255 } |
|
256 |
|
257 void utb__release_sync_buffer(LinkedUWTBuffer* buff) |
|
258 { |
|
259 release_sync_buffer(buff); |
|
260 } |
|
261 |
|
262 // RUNS IN SIGHANDLER CONTEXT |
|
263 UnwinderThreadBuffer* uwt__acquire_empty_buffer() |
|
264 { |
|
265 return acquire_empty_buffer(); |
|
266 } |
|
267 |
|
268 // RUNS IN SIGHANDLER CONTEXT |
|
269 void |
|
270 uwt__release_full_buffer(ThreadProfile* aProfile, |
|
271 UnwinderThreadBuffer* utb, |
|
272 void* /* ucontext_t*, really */ ucV ) |
|
273 { |
|
274 release_full_buffer( aProfile, utb, ucV ); |
|
275 } |
|
276 |
|
277 // RUNS IN SIGHANDLER CONTEXT |
|
278 void |
|
279 utb__addEntry(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent) |
|
280 { |
|
281 utb_add_prof_ent(utb, ent); |
|
282 } |
|
283 |
|
284 //// END externally visible functions |
|
285 ////////////////////////////////////////////////////////// |
|
286 |
|
287 |
|
288 ////////////////////////////////////////////////////////// |
|
289 //// BEGIN type UnwindThreadBuffer |
|
290 |
|
291 static_assert(sizeof(uint32_t) == 4, "uint32_t size incorrect"); |
|
292 static_assert(sizeof(uint64_t) == 8, "uint64_t size incorrect"); |
|
293 static_assert(sizeof(uintptr_t) == sizeof(void*), |
|
294 "uintptr_t size incorrect"); |
|
295 |
|
296 typedef |
|
297 struct { |
|
298 uint64_t rsp; |
|
299 uint64_t rbp; |
|
300 uint64_t rip; |
|
301 } |
|
302 AMD64Regs; |
|
303 |
|
304 typedef |
|
305 struct { |
|
306 uint32_t r15; |
|
307 uint32_t r14; |
|
308 uint32_t r13; |
|
309 uint32_t r12; |
|
310 uint32_t r11; |
|
311 uint32_t r7; |
|
312 } |
|
313 ARMRegs; |
|
314 |
|
315 typedef |
|
316 struct { |
|
317 uint32_t esp; |
|
318 uint32_t ebp; |
|
319 uint32_t eip; |
|
320 } |
|
321 X86Regs; |
|
322 |
|
323 #if defined(SPS_ARCH_amd64) |
|
324 typedef AMD64Regs ArchRegs; |
|
325 #elif defined(SPS_ARCH_arm) |
|
326 typedef ARMRegs ArchRegs; |
|
327 #elif defined(SPS_ARCH_x86) |
|
328 typedef X86Regs ArchRegs; |
|
329 #else |
|
330 # error "Unknown plat" |
|
331 #endif |
|
332 |
|
333 #if defined(SPS_ARCH_amd64) || defined(SPS_ARCH_arm) || defined(SPS_ARCH_x86) |
|
334 # define SPS_PAGE_SIZE 4096 |
|
335 #else |
|
336 # error "Unknown plat" |
|
337 #endif |
|
338 |
|
339 typedef enum { S_EMPTY, S_FILLING, S_EMPTYING, S_FULL } State; |
|
340 |
|
341 typedef struct { uintptr_t val; } SpinLock; |
|
342 |
|
343 /* CONFIGURABLE */ |
|
344 /* The number of fixed ProfileEntry slots. If more are required, they |
|
345 are placed in mmap'd pages. */ |
|
346 #define N_FIXED_PROF_ENTS 20 |
|
347 |
|
348 /* CONFIGURABLE */ |
|
349 /* The number of extra pages of ProfileEntries. If (on arm) each |
|
350 ProfileEntry is 8 bytes, then a page holds 512, and so 100 pages |
|
351 is enough to hold 51200. */ |
|
352 #define N_PROF_ENT_PAGES 100 |
|
353 |
|
354 /* DERIVATIVE */ |
|
355 #define N_PROF_ENTS_PER_PAGE (SPS_PAGE_SIZE / sizeof(ProfileEntry)) |
|
356 |
|
357 /* A page of ProfileEntrys. This might actually be slightly smaller |
|
358 than a page if SPS_PAGE_SIZE is not an exact multiple of |
|
359 sizeof(ProfileEntry). */ |
|
360 typedef |
|
361 struct { ProfileEntry ents[N_PROF_ENTS_PER_PAGE]; } |
|
362 ProfEntsPage; |
|
363 |
|
364 #define ProfEntsPage_INVALID ((ProfEntsPage*)1) |
|
365 |
|
366 |
|
367 /* Fields protected by the spinlock are marked SL */ |
|
368 |
|
369 struct _UnwinderThreadBuffer { |
|
370 /*SL*/ State state; |
|
371 /* The rest of these are protected, in some sense, by ::state. If |
|
372 ::state is S_FILLING, they are 'owned' by the sampler thread |
|
373 that set the state to S_FILLING. If ::state is S_EMPTYING, |
|
374 they are 'owned' by the unwinder thread that set the state to |
|
375 S_EMPTYING. If ::state is S_EMPTY or S_FULL, the buffer isn't |
|
376 owned by any thread, and so no thread may access these |
|
377 fields. */ |
|
378 /* Sample number, needed to process samples in order */ |
|
379 uint64_t seqNo; |
|
380 /* The ThreadProfile into which the results are eventually to be |
|
381 dumped. */ |
|
382 ThreadProfile* aProfile; |
|
383 /* Pseudostack and other info, always present */ |
|
384 ProfileEntry entsFixed[N_FIXED_PROF_ENTS]; |
|
385 ProfEntsPage* entsPages[N_PROF_ENT_PAGES]; |
|
386 uintptr_t entsUsed; |
|
387 /* Do we also have data to do a native unwind? */ |
|
388 bool haveNativeInfo; |
|
389 /* If so, here is the register state and stack. Unset if |
|
390 .haveNativeInfo is false. */ |
|
391 lul::UnwindRegs startRegs; |
|
392 lul::StackImage stackImg; |
|
393 void* stackMaxSafe; /* Address for max safe stack reading. */ |
|
394 }; |
|
395 /* Indexing scheme for ents: |
|
396 0 <= i < N_FIXED_PROF_ENTS |
|
397 is at entsFixed[i] |
|
398 |
|
399 i >= N_FIXED_PROF_ENTS |
|
400 is at let j = i - N_FIXED_PROF_ENTS |
|
401 in entsPages[j / N_PROFENTS_PER_PAGE] |
|
402 ->ents[j % N_PROFENTS_PER_PAGE] |
|
403 |
|
404 entsPages[] are allocated on demand. Because zero can |
|
405 theoretically be a valid page pointer, use |
|
406 ProfEntsPage_INVALID == (ProfEntsPage*)1 to mark invalid pages. |
|
407 |
|
408 It follows that the max entsUsed value is N_FIXED_PROF_ENTS + |
|
409 N_PROFENTS_PER_PAGE * N_PROFENTS_PAGES, and at that point no more |
|
410 ProfileEntries can be storedd. |
|
411 */ |
|
412 |
|
413 |
|
414 typedef |
|
415 struct { |
|
416 pthread_t thrId; |
|
417 void* stackTop; |
|
418 uint64_t nSamples; |
|
419 } |
|
420 StackLimit; |
|
421 |
|
422 /* Globals -- the buffer array */ |
|
423 #define N_UNW_THR_BUFFERS 10 |
|
424 /*SL*/ static UnwinderThreadBuffer** g_buffers = nullptr; |
|
425 /*SL*/ static uint64_t g_seqNo = 0; |
|
426 /*SL*/ static SpinLock g_spinLock = { 0 }; |
|
427 |
|
428 /* Globals -- the thread array. The array is dynamically expanded on |
|
429 demand. The spinlock must be held when accessing g_stackLimits, |
|
430 g_stackLimits[some index], g_stackLimitsUsed and g_stackLimitsSize. |
|
431 However, the spinlock must not be held when calling malloc to |
|
432 allocate or expand the array, as that would risk deadlock against a |
|
433 sampling thread that holds the malloc lock and is trying to acquire |
|
434 the spinlock. */ |
|
435 /*SL*/ static StackLimit* g_stackLimits = nullptr; |
|
436 /*SL*/ static size_t g_stackLimitsUsed = 0; |
|
437 /*SL*/ static size_t g_stackLimitsSize = 0; |
|
438 |
|
439 /* Stats -- atomically incremented, no lock needed */ |
|
440 static uintptr_t g_stats_totalSamples = 0; // total # sample attempts |
|
441 static uintptr_t g_stats_noBuffAvail = 0; // # failed due to no buffer avail |
|
442 static uintptr_t g_stats_thrUnregd = 0; // # failed due to unregistered thr |
|
443 |
|
444 /* We must be VERY CAREFUL what we do with the spinlock held. The |
|
445 only thing it is safe to do with it held is modify (viz, read or |
|
446 write) g_buffers, g_buffers[], g_seqNo, g_buffers[]->state, |
|
447 g_stackLimits, g_stackLimits[], g_stackLimitsUsed and |
|
448 g_stackLimitsSize. No arbitrary computations, no syscalls, no |
|
449 printfs, no file IO, and absolutely no dynamic memory allocation |
|
450 (else we WILL eventually deadlock). |
|
451 |
|
452 This applies both to the signal handler and to the unwinder thread. |
|
453 */ |
|
454 |
|
455 //// END type UnwindThreadBuffer |
|
456 ////////////////////////////////////////////////////////// |
|
457 |
|
458 // This is the interface to LUL. |
|
459 typedef struct { u_int64_t pc; u_int64_t sp; } PCandSP; |
|
460 |
|
461 // Forward declaration. Implementation is below. |
|
462 static |
|
463 void do_lul_unwind_Buffer(/*OUT*/PCandSP** pairs, |
|
464 /*OUT*/unsigned int* nPairs, |
|
465 UnwinderThreadBuffer* buff, |
|
466 int buffNo /* for debug printing only */); |
|
467 |
|
468 static bool is_page_aligned(void* v) |
|
469 { |
|
470 uintptr_t w = (uintptr_t) v; |
|
471 return (w & (SPS_PAGE_SIZE-1)) == 0 ? true : false; |
|
472 } |
|
473 |
|
474 |
|
475 /* Implement machine-word sized atomic compare-and-swap. Returns true |
|
476 if success, false if failure. */ |
|
477 static bool do_CASW(uintptr_t* addr, uintptr_t expected, uintptr_t nyu) |
|
478 { |
|
479 #if defined(__GNUC__) |
|
480 return __sync_bool_compare_and_swap(addr, expected, nyu); |
|
481 #else |
|
482 # error "Unhandled compiler" |
|
483 #endif |
|
484 } |
|
485 |
|
486 /* Hint to the CPU core that we are in a spin-wait loop, and that |
|
487 other processors/cores/threads-running-on-the-same-core should be |
|
488 given priority on execute resources, if that is possible. Not |
|
489 critical if this is a no-op on some targets. */ |
|
490 static void do_SPINLOOP_RELAX() |
|
491 { |
|
492 #if (defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)) && defined(__GNUC__) |
|
493 __asm__ __volatile__("rep; nop"); |
|
494 #elif defined(SPS_PLAT_arm_android) && MOZILLA_ARM_ARCH >= 7 |
|
495 __asm__ __volatile__("wfe"); |
|
496 #endif |
|
497 } |
|
498 |
|
499 /* Tell any cores snoozing in spin loops to wake up. */ |
|
500 static void do_SPINLOOP_NUDGE() |
|
501 { |
|
502 #if (defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)) && defined(__GNUC__) |
|
503 /* this is a no-op */ |
|
504 #elif defined(SPS_PLAT_arm_android) && MOZILLA_ARM_ARCH >= 7 |
|
505 __asm__ __volatile__("sev"); |
|
506 #endif |
|
507 } |
|
508 |
|
509 /* Perform a full memory barrier. */ |
|
510 static void do_MBAR() |
|
511 { |
|
512 #if defined(__GNUC__) |
|
513 __sync_synchronize(); |
|
514 #else |
|
515 # error "Unhandled compiler" |
|
516 #endif |
|
517 } |
|
518 |
|
519 static void spinLock_acquire(SpinLock* sl) |
|
520 { |
|
521 uintptr_t* val = &sl->val; |
|
522 VALGRIND_HG_MUTEX_LOCK_PRE(sl, 0/*!isTryLock*/); |
|
523 while (1) { |
|
524 bool ok = do_CASW( val, 0, 1 ); |
|
525 if (ok) break; |
|
526 do_SPINLOOP_RELAX(); |
|
527 } |
|
528 do_MBAR(); |
|
529 VALGRIND_HG_MUTEX_LOCK_POST(sl); |
|
530 } |
|
531 |
|
532 static void spinLock_release(SpinLock* sl) |
|
533 { |
|
534 uintptr_t* val = &sl->val; |
|
535 VALGRIND_HG_MUTEX_UNLOCK_PRE(sl); |
|
536 do_MBAR(); |
|
537 bool ok = do_CASW( val, 1, 0 ); |
|
538 /* This must succeed at the first try. To fail would imply that |
|
539 the lock was unheld. */ |
|
540 MOZ_ALWAYS_TRUE(ok); |
|
541 do_SPINLOOP_NUDGE(); |
|
542 VALGRIND_HG_MUTEX_UNLOCK_POST(sl); |
|
543 } |
|
544 |
|
545 static void sleep_ms(unsigned int ms) |
|
546 { |
|
547 struct timespec req; |
|
548 req.tv_sec = ((time_t)ms) / 1000; |
|
549 req.tv_nsec = 1000 * 1000 * (((unsigned long)ms) % 1000); |
|
550 nanosleep(&req, nullptr); |
|
551 } |
|
552 |
|
553 /* Use CAS to implement standalone atomic increment. */ |
|
554 static void atomic_INC(uintptr_t* loc) |
|
555 { |
|
556 while (1) { |
|
557 uintptr_t old = *loc; |
|
558 uintptr_t nyu = old + 1; |
|
559 bool ok = do_CASW( loc, old, nyu ); |
|
560 if (ok) break; |
|
561 } |
|
562 } |
|
563 |
|
564 // Empties out the buffer queue. |
|
565 static void empty_buffer_queue() |
|
566 { |
|
567 spinLock_acquire(&g_spinLock); |
|
568 |
|
569 UnwinderThreadBuffer** tmp_g_buffers = g_buffers; |
|
570 g_stackLimitsUsed = 0; |
|
571 g_seqNo = 0; |
|
572 g_buffers = nullptr; |
|
573 |
|
574 spinLock_release(&g_spinLock); |
|
575 |
|
576 // Can't do any malloc/free when holding the spinlock. |
|
577 free(tmp_g_buffers); |
|
578 |
|
579 // We could potentially free up g_stackLimits; but given the |
|
580 // complications above involved in resizing it, it's probably |
|
581 // safer just to leave it in place. |
|
582 } |
|
583 |
|
584 |
|
585 // Registers a thread for profiling. Detects and ignores duplicate |
|
586 // registration. |
|
587 static void thread_register_for_profiling(void* stackTop) |
|
588 { |
|
589 pthread_t me = pthread_self(); |
|
590 |
|
591 spinLock_acquire(&g_spinLock); |
|
592 |
|
593 // tmp copy of g_stackLimitsUsed, to avoid racing in message printing |
|
594 int n_used; |
|
595 |
|
596 // Ignore spurious calls which aren't really registering anything. |
|
597 if (stackTop == nullptr) { |
|
598 n_used = g_stackLimitsUsed; |
|
599 spinLock_release(&g_spinLock); |
|
600 LOGF("BPUnw: [%d total] thread_register_for_profiling" |
|
601 "(me=%p, stacktop=NULL) (IGNORED)", n_used, (void*)me); |
|
602 return; |
|
603 } |
|
604 |
|
605 /* Minimal sanity check on stackTop */ |
|
606 MOZ_ASSERT((void*)&n_used/*any auto var will do*/ < stackTop); |
|
607 |
|
608 bool is_dup = false; |
|
609 for (size_t i = 0; i < g_stackLimitsUsed; i++) { |
|
610 if (g_stackLimits[i].thrId == me) { |
|
611 is_dup = true; |
|
612 break; |
|
613 } |
|
614 } |
|
615 |
|
616 if (is_dup) { |
|
617 /* It's a duplicate registration. Ignore it: drop the lock and |
|
618 return. */ |
|
619 n_used = g_stackLimitsUsed; |
|
620 spinLock_release(&g_spinLock); |
|
621 |
|
622 LOGF("BPUnw: [%d total] thread_register_for_profiling" |
|
623 "(me=%p, stacktop=%p) (DUPLICATE)", n_used, (void*)me, stackTop); |
|
624 return; |
|
625 } |
|
626 |
|
627 /* Make sure the g_stackLimits array is large enough to accommodate |
|
628 this new entry. This is tricky. If it isn't large enough, we |
|
629 can malloc a larger version, but we have to do that without |
|
630 holding the spinlock, else we risk deadlock. The deadlock |
|
631 scenario is: |
|
632 |
|
633 Some other thread that is being sampled |
|
634 This thread |
|
635 |
|
636 call malloc call this function |
|
637 acquire malloc lock acquire the spinlock |
|
638 (sampling signal) discover thread array not big enough, |
|
639 call uwt__acquire_empty_buffer call malloc to make it larger |
|
640 acquire the spinlock acquire malloc lock |
|
641 |
|
642 This gives an inconsistent lock acquisition order on the malloc |
|
643 lock and spinlock, hence risk of deadlock. |
|
644 |
|
645 Allocating more space for the array without holding the spinlock |
|
646 implies tolerating races against other thread(s) who are also |
|
647 trying to expand the array. How can we detect if we have been |
|
648 out-raced? Every successful expansion of g_stackLimits[] results |
|
649 in an increase in g_stackLimitsSize. Hence we can detect if we |
|
650 got out-raced by remembering g_stackLimitsSize before we dropped |
|
651 the spinlock and checking if it has changed after the spinlock is |
|
652 reacquired. */ |
|
653 |
|
654 MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize); |
|
655 |
|
656 if (g_stackLimitsUsed == g_stackLimitsSize) { |
|
657 /* g_stackLimits[] is full; resize it. */ |
|
658 |
|
659 size_t old_size = g_stackLimitsSize; |
|
660 size_t new_size = old_size == 0 ? 4 : (2 * old_size); |
|
661 |
|
662 spinLock_release(&g_spinLock); |
|
663 StackLimit* new_arr = (StackLimit*)malloc(new_size * sizeof(StackLimit)); |
|
664 if (!new_arr) |
|
665 return; |
|
666 |
|
667 spinLock_acquire(&g_spinLock); |
|
668 |
|
669 if (old_size != g_stackLimitsSize) { |
|
670 /* We've been outraced. Instead of trying to deal in-line with |
|
671 this extremely rare case, just start all over again by |
|
672 tail-calling this routine. */ |
|
673 spinLock_release(&g_spinLock); |
|
674 free(new_arr); |
|
675 thread_register_for_profiling(stackTop); |
|
676 return; |
|
677 } |
|
678 |
|
679 memcpy(new_arr, g_stackLimits, old_size * sizeof(StackLimit)); |
|
680 if (g_stackLimits) |
|
681 free(g_stackLimits); |
|
682 |
|
683 g_stackLimits = new_arr; |
|
684 |
|
685 MOZ_ASSERT(g_stackLimitsSize < new_size); |
|
686 g_stackLimitsSize = new_size; |
|
687 } |
|
688 |
|
689 MOZ_ASSERT(g_stackLimitsUsed < g_stackLimitsSize); |
|
690 |
|
691 /* Finally, we have a safe place to put the new entry. */ |
|
692 |
|
693 // Round |stackTop| up to the end of the containing page. We may |
|
694 // as well do this -- there's no danger of a fault, and we might |
|
695 // get a few more base-of-the-stack frames as a result. This |
|
696 // assumes that no target has a page size smaller than 4096. |
|
697 uintptr_t stackTopR = (uintptr_t)stackTop; |
|
698 stackTopR = (stackTopR & ~(uintptr_t)4095) + (uintptr_t)4095; |
|
699 |
|
700 g_stackLimits[g_stackLimitsUsed].thrId = me; |
|
701 g_stackLimits[g_stackLimitsUsed].stackTop = (void*)stackTopR; |
|
702 g_stackLimits[g_stackLimitsUsed].nSamples = 0; |
|
703 g_stackLimitsUsed++; |
|
704 |
|
705 n_used = g_stackLimitsUsed; |
|
706 spinLock_release(&g_spinLock); |
|
707 |
|
708 LOGF("BPUnw: [%d total] thread_register_for_profiling" |
|
709 "(me=%p, stacktop=%p)", n_used, (void*)me, stackTop); |
|
710 } |
|
711 |
|
712 // Deregisters a thread from profiling. Detects and ignores attempts |
|
713 // to deregister a not-registered thread. |
|
714 static void thread_unregister_for_profiling() |
|
715 { |
|
716 spinLock_acquire(&g_spinLock); |
|
717 |
|
718 // tmp copy of g_stackLimitsUsed, to avoid racing in message printing |
|
719 size_t n_used; |
|
720 |
|
721 size_t i; |
|
722 bool found = false; |
|
723 pthread_t me = pthread_self(); |
|
724 for (i = 0; i < g_stackLimitsUsed; i++) { |
|
725 if (g_stackLimits[i].thrId == me) |
|
726 break; |
|
727 } |
|
728 if (i < g_stackLimitsUsed) { |
|
729 // found this entry. Slide the remaining ones down one place. |
|
730 for (; i+1 < g_stackLimitsUsed; i++) { |
|
731 g_stackLimits[i] = g_stackLimits[i+1]; |
|
732 } |
|
733 g_stackLimitsUsed--; |
|
734 found = true; |
|
735 } |
|
736 |
|
737 n_used = g_stackLimitsUsed; |
|
738 |
|
739 spinLock_release(&g_spinLock); |
|
740 LOGF("BPUnw: [%d total] thread_unregister_for_profiling(me=%p) %s", |
|
741 (int)n_used, (void*)me, found ? "" : " (NOT REGISTERED) "); |
|
742 } |
|
743 |
|
744 |
|
745 __attribute__((unused)) |
|
746 static void show_registered_threads() |
|
747 { |
|
748 size_t i; |
|
749 spinLock_acquire(&g_spinLock); |
|
750 for (i = 0; i < g_stackLimitsUsed; i++) { |
|
751 LOGF("[%d] pthread_t=%p nSamples=%lld", |
|
752 (int)i, (void*)g_stackLimits[i].thrId, |
|
753 (unsigned long long int)g_stackLimits[i].nSamples); |
|
754 } |
|
755 spinLock_release(&g_spinLock); |
|
756 } |
|
757 |
|
758 // RUNS IN SIGHANDLER CONTEXT |
|
759 /* The calling thread owns the buffer, as denoted by its state being |
|
760 S_FILLING. So we can mess with it without further locking. */ |
|
761 static void init_empty_buffer(UnwinderThreadBuffer* buff, void* stackTop) |
|
762 { |
|
763 /* Now we own the buffer, initialise it. */ |
|
764 buff->aProfile = nullptr; |
|
765 buff->entsUsed = 0; |
|
766 buff->haveNativeInfo = false; |
|
767 buff->stackImg.mLen = 0; |
|
768 buff->stackImg.mStartAvma = 0; |
|
769 buff->stackMaxSafe = stackTop; /* We will need this in |
|
770 release_full_buffer() */ |
|
771 for (size_t i = 0; i < N_PROF_ENT_PAGES; i++) |
|
772 buff->entsPages[i] = ProfEntsPage_INVALID; |
|
773 } |
|
774 |
|
775 struct SyncUnwinderThreadBuffer : public LinkedUWTBuffer |
|
776 { |
|
777 UnwinderThreadBuffer* GetBuffer() |
|
778 { |
|
779 return &mBuff; |
|
780 } |
|
781 |
|
782 UnwinderThreadBuffer mBuff; |
|
783 }; |
|
784 |
|
785 static LinkedUWTBuffer* acquire_sync_buffer(void* stackTop) |
|
786 { |
|
787 MOZ_ASSERT(stackTop); |
|
788 SyncUnwinderThreadBuffer* buff = new SyncUnwinderThreadBuffer(); |
|
789 // We can set state without locking here because this thread owns the buffer |
|
790 // and it is going to fill it itself. |
|
791 buff->GetBuffer()->state = S_FILLING; |
|
792 init_empty_buffer(buff->GetBuffer(), stackTop); |
|
793 return buff; |
|
794 } |
|
795 |
|
796 // RUNS IN SIGHANDLER CONTEXT |
|
797 static UnwinderThreadBuffer* acquire_empty_buffer() |
|
798 { |
|
799 /* acq lock |
|
800 if buffers == nullptr { rel lock; exit } |
|
801 scan to find a free buff; if none { rel lock; exit } |
|
802 set buff state to S_FILLING |
|
803 fillseqno++; and remember it |
|
804 rel lock |
|
805 */ |
|
806 size_t i; |
|
807 |
|
808 atomic_INC( &g_stats_totalSamples ); |
|
809 |
|
810 /* This code is critical. We are in a signal handler and possibly |
|
811 with the malloc lock held. So we can't allocate any heap, and |
|
812 can't safely call any C library functions, not even the pthread_ |
|
813 functions. And we certainly can't do any syscalls. In short, |
|
814 this function needs to be self contained, not do any allocation, |
|
815 and not hold on to the spinlock for any significant length of |
|
816 time. */ |
|
817 |
|
818 spinLock_acquire(&g_spinLock); |
|
819 |
|
820 /* First of all, look for this thread's entry in g_stackLimits[]. |
|
821 We need to find it in order to figure out how much stack we can |
|
822 safely copy into the sample. This assumes that pthread_self() |
|
823 is safe to call in a signal handler, which strikes me as highly |
|
824 likely. */ |
|
825 pthread_t me = pthread_self(); |
|
826 MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize); |
|
827 for (i = 0; i < g_stackLimitsUsed; i++) { |
|
828 if (g_stackLimits[i].thrId == me) |
|
829 break; |
|
830 } |
|
831 |
|
832 /* If the thread isn't registered for profiling, just ignore the call |
|
833 and return nullptr. */ |
|
834 if (i == g_stackLimitsUsed) { |
|
835 spinLock_release(&g_spinLock); |
|
836 atomic_INC( &g_stats_thrUnregd ); |
|
837 return nullptr; |
|
838 } |
|
839 |
|
840 /* "this thread is registered for profiling" */ |
|
841 MOZ_ASSERT(i < g_stackLimitsUsed); |
|
842 |
|
843 /* The furthest point that we can safely scan back up the stack. */ |
|
844 void* myStackTop = g_stackLimits[i].stackTop; |
|
845 g_stackLimits[i].nSamples++; |
|
846 |
|
847 /* Try to find a free buffer to use. */ |
|
848 if (g_buffers == nullptr) { |
|
849 /* The unwinder thread hasn't allocated any buffers yet. |
|
850 Nothing we can do. */ |
|
851 spinLock_release(&g_spinLock); |
|
852 atomic_INC( &g_stats_noBuffAvail ); |
|
853 return nullptr; |
|
854 } |
|
855 |
|
856 for (i = 0; i < N_UNW_THR_BUFFERS; i++) { |
|
857 if (g_buffers[i]->state == S_EMPTY) |
|
858 break; |
|
859 } |
|
860 MOZ_ASSERT(i <= N_UNW_THR_BUFFERS); |
|
861 |
|
862 if (i == N_UNW_THR_BUFFERS) { |
|
863 /* Again, no free buffers .. give up. */ |
|
864 spinLock_release(&g_spinLock); |
|
865 atomic_INC( &g_stats_noBuffAvail ); |
|
866 if (LOGLEVEL >= 3) |
|
867 LOG("BPUnw: handler: no free buffers"); |
|
868 return nullptr; |
|
869 } |
|
870 |
|
871 /* So we can use this one safely. Whilst still holding the lock, |
|
872 mark the buffer as belonging to us, and increment the sequence |
|
873 number. */ |
|
874 UnwinderThreadBuffer* buff = g_buffers[i]; |
|
875 MOZ_ASSERT(buff->state == S_EMPTY); |
|
876 buff->state = S_FILLING; |
|
877 buff->seqNo = g_seqNo; |
|
878 g_seqNo++; |
|
879 |
|
880 /* And drop the lock. We own the buffer, so go on and fill it. */ |
|
881 spinLock_release(&g_spinLock); |
|
882 |
|
883 /* Now we own the buffer, initialise it. */ |
|
884 init_empty_buffer(buff, myStackTop); |
|
885 return buff; |
|
886 } |
|
887 |
|
888 // RUNS IN SIGHANDLER CONTEXT |
|
889 /* The calling thread owns the buffer, as denoted by its state being |
|
890 S_FILLING. So we can mess with it without further locking. */ |
|
891 static void fill_buffer(ThreadProfile* aProfile, |
|
892 UnwinderThreadBuffer* buff, |
|
893 void* /* ucontext_t*, really */ ucV) |
|
894 { |
|
895 MOZ_ASSERT(buff->state == S_FILLING); |
|
896 |
|
897 //////////////////////////////////////////////////// |
|
898 // BEGIN fill |
|
899 |
|
900 /* The buffer already will have some of its ProfileEntries filled |
|
901 in, but everything else needs to be filled in at this point. */ |
|
902 //LOGF("Release full buffer: %lu ents", buff->entsUsed); |
|
903 /* Where the resulting info is to be dumped */ |
|
904 buff->aProfile = aProfile; |
|
905 |
|
906 /* And, if we have register state, that and the stack top */ |
|
907 buff->haveNativeInfo = ucV != nullptr; |
|
908 if (buff->haveNativeInfo) { |
|
909 # if defined(SPS_PLAT_amd64_linux) |
|
910 ucontext_t* uc = (ucontext_t*)ucV; |
|
911 mcontext_t* mc = &(uc->uc_mcontext); |
|
912 buff->startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]); |
|
913 buff->startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]); |
|
914 buff->startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]); |
|
915 # elif defined(SPS_PLAT_amd64_darwin) |
|
916 ucontext_t* uc = (ucontext_t*)ucV; |
|
917 struct __darwin_mcontext64* mc = uc->uc_mcontext; |
|
918 struct __darwin_x86_thread_state64* ss = &mc->__ss; |
|
919 buff->regs.rip = ss->__rip; |
|
920 buff->regs.rsp = ss->__rsp; |
|
921 buff->regs.rbp = ss->__rbp; |
|
922 # elif defined(SPS_PLAT_arm_android) |
|
923 ucontext_t* uc = (ucontext_t*)ucV; |
|
924 mcontext_t* mc = &(uc->uc_mcontext); |
|
925 buff->startRegs.r15 = lul::TaggedUWord(mc->arm_pc); |
|
926 buff->startRegs.r14 = lul::TaggedUWord(mc->arm_lr); |
|
927 buff->startRegs.r13 = lul::TaggedUWord(mc->arm_sp); |
|
928 buff->startRegs.r12 = lul::TaggedUWord(mc->arm_ip); |
|
929 buff->startRegs.r11 = lul::TaggedUWord(mc->arm_fp); |
|
930 buff->startRegs.r7 = lul::TaggedUWord(mc->arm_r7); |
|
931 # elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android) |
|
932 ucontext_t* uc = (ucontext_t*)ucV; |
|
933 mcontext_t* mc = &(uc->uc_mcontext); |
|
934 buff->startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]); |
|
935 buff->startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]); |
|
936 buff->startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]); |
|
937 # elif defined(SPS_PLAT_x86_darwin) |
|
938 ucontext_t* uc = (ucontext_t*)ucV; |
|
939 struct __darwin_mcontext32* mc = uc->uc_mcontext; |
|
940 struct __darwin_i386_thread_state* ss = &mc->__ss; |
|
941 buff->regs.eip = ss->__eip; |
|
942 buff->regs.esp = ss->__esp; |
|
943 buff->regs.ebp = ss->__ebp; |
|
944 # else |
|
945 # error "Unknown plat" |
|
946 # endif |
|
947 |
|
948 /* Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not |
|
949 going past the stack's registered top point. Do some basic |
|
950 sanity checks too. This assumes that the TaggedUWord holding |
|
951 the stack pointer value is valid, but it should be, since it |
|
952 was constructed that way in the code just above. */ |
|
953 { |
|
954 # if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_amd64_darwin) |
|
955 uintptr_t rEDZONE_SIZE = 128; |
|
956 uintptr_t start = buff->startRegs.xsp.Value() - rEDZONE_SIZE; |
|
957 # elif defined(SPS_PLAT_arm_android) |
|
958 uintptr_t rEDZONE_SIZE = 0; |
|
959 uintptr_t start = buff->startRegs.r13.Value() - rEDZONE_SIZE; |
|
960 # elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_darwin) \ |
|
961 || defined(SPS_PLAT_x86_android) |
|
962 uintptr_t rEDZONE_SIZE = 0; |
|
963 uintptr_t start = buff->startRegs.xsp.Value() - rEDZONE_SIZE; |
|
964 # else |
|
965 # error "Unknown plat" |
|
966 # endif |
|
967 uintptr_t end = (uintptr_t)buff->stackMaxSafe; |
|
968 uintptr_t ws = sizeof(void*); |
|
969 start &= ~(ws-1); |
|
970 end &= ~(ws-1); |
|
971 uintptr_t nToCopy = 0; |
|
972 if (start < end) { |
|
973 nToCopy = end - start; |
|
974 if (nToCopy > lul::N_STACK_BYTES) |
|
975 nToCopy = lul::N_STACK_BYTES; |
|
976 } |
|
977 MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); |
|
978 buff->stackImg.mLen = nToCopy; |
|
979 buff->stackImg.mStartAvma = start; |
|
980 if (nToCopy > 0) { |
|
981 memcpy(&buff->stackImg.mContents[0], (void*)start, nToCopy); |
|
982 (void)VALGRIND_MAKE_MEM_DEFINED(&buff->stackImg.mContents[0], nToCopy); |
|
983 } |
|
984 } |
|
985 } /* if (buff->haveNativeInfo) */ |
|
986 // END fill |
|
987 //////////////////////////////////////////////////// |
|
988 } |
|
989 |
|
990 // RUNS IN SIGHANDLER CONTEXT |
|
991 /* The calling thread owns the buffer, as denoted by its state being |
|
992 S_FILLING. So we can mess with it without further locking. */ |
|
993 static void release_full_buffer(ThreadProfile* aProfile, |
|
994 UnwinderThreadBuffer* buff, |
|
995 void* /* ucontext_t*, really */ ucV ) |
|
996 { |
|
997 fill_buffer(aProfile, buff, ucV); |
|
998 /* And now relinquish ownership of the buff, so that an unwinder |
|
999 thread can pick it up. */ |
|
1000 spinLock_acquire(&g_spinLock); |
|
1001 buff->state = S_FULL; |
|
1002 spinLock_release(&g_spinLock); |
|
1003 } |
|
1004 |
|
1005 // RUNS IN SIGHANDLER CONTEXT |
|
1006 // Allocate a ProfEntsPage, without using malloc, or return |
|
1007 // ProfEntsPage_INVALID if we can't for some reason. |
|
1008 static ProfEntsPage* mmap_anon_ProfEntsPage() |
|
1009 { |
|
1010 # if defined(SPS_OS_darwin) |
|
1011 void* v = ::mmap(nullptr, sizeof(ProfEntsPage), PROT_READ | PROT_WRITE, |
|
1012 MAP_PRIVATE | MAP_ANON, -1, 0); |
|
1013 # else |
|
1014 void* v = ::mmap(nullptr, sizeof(ProfEntsPage), PROT_READ | PROT_WRITE, |
|
1015 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
|
1016 # endif |
|
1017 if (v == MAP_FAILED) { |
|
1018 return ProfEntsPage_INVALID; |
|
1019 } else { |
|
1020 return (ProfEntsPage*)v; |
|
1021 } |
|
1022 } |
|
1023 |
|
1024 // Runs in the unwinder thread |
|
1025 // Free a ProfEntsPage as allocated by mmap_anon_ProfEntsPage |
|
1026 static void munmap_ProfEntsPage(ProfEntsPage* pep) |
|
1027 { |
|
1028 MOZ_ALWAYS_TRUE(is_page_aligned(pep)); |
|
1029 ::munmap(pep, sizeof(ProfEntsPage)); |
|
1030 } |
|
1031 |
|
1032 |
|
1033 // RUNS IN SIGHANDLER CONTEXT |
|
1034 void |
|
1035 utb_add_prof_ent(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent) |
|
1036 { |
|
1037 uintptr_t limit |
|
1038 = N_FIXED_PROF_ENTS + (N_PROF_ENTS_PER_PAGE * N_PROF_ENT_PAGES); |
|
1039 if (utb->entsUsed == limit) { |
|
1040 /* We're full. Now what? */ |
|
1041 LOG("BPUnw: utb__addEntry: NO SPACE for ProfileEntry; ignoring."); |
|
1042 return; |
|
1043 } |
|
1044 MOZ_ASSERT(utb->entsUsed < limit); |
|
1045 |
|
1046 /* Will it fit in the fixed array? */ |
|
1047 if (utb->entsUsed < N_FIXED_PROF_ENTS) { |
|
1048 utb->entsFixed[utb->entsUsed] = ent; |
|
1049 utb->entsUsed++; |
|
1050 return; |
|
1051 } |
|
1052 |
|
1053 /* No. Put it in the extras. */ |
|
1054 uintptr_t i = utb->entsUsed; |
|
1055 uintptr_t j = i - N_FIXED_PROF_ENTS; |
|
1056 uintptr_t j_div = j / N_PROF_ENTS_PER_PAGE; /* page number */ |
|
1057 uintptr_t j_mod = j % N_PROF_ENTS_PER_PAGE; /* page offset */ |
|
1058 ProfEntsPage* pep = utb->entsPages[j_div]; |
|
1059 if (pep == ProfEntsPage_INVALID) { |
|
1060 pep = mmap_anon_ProfEntsPage(); |
|
1061 if (pep == ProfEntsPage_INVALID) { |
|
1062 /* Urr, we ran out of memory. Now what? */ |
|
1063 LOG("BPUnw: utb__addEntry: MMAP FAILED for ProfileEntry; ignoring."); |
|
1064 return; |
|
1065 } |
|
1066 utb->entsPages[j_div] = pep; |
|
1067 } |
|
1068 pep->ents[j_mod] = ent; |
|
1069 utb->entsUsed++; |
|
1070 } |
|
1071 |
|
1072 |
|
1073 // misc helper |
|
1074 static ProfileEntry utb_get_profent(UnwinderThreadBuffer* buff, uintptr_t i) |
|
1075 { |
|
1076 MOZ_ASSERT(i < buff->entsUsed); |
|
1077 if (i < N_FIXED_PROF_ENTS) { |
|
1078 return buff->entsFixed[i]; |
|
1079 } else { |
|
1080 uintptr_t j = i - N_FIXED_PROF_ENTS; |
|
1081 uintptr_t j_div = j / N_PROF_ENTS_PER_PAGE; /* page number */ |
|
1082 uintptr_t j_mod = j % N_PROF_ENTS_PER_PAGE; /* page offset */ |
|
1083 MOZ_ASSERT(buff->entsPages[j_div] != ProfEntsPage_INVALID); |
|
1084 return buff->entsPages[j_div]->ents[j_mod]; |
|
1085 } |
|
1086 } |
|
1087 |
|
1088 /* Copy ProfileEntries presented to us by the sampling thread. |
|
1089 Most of them are copied verbatim into |buff->aProfile|, |
|
1090 except for 'hint' tags, which direct us to do something |
|
1091 different. */ |
|
1092 static void process_buffer(UnwinderThreadBuffer* buff, int oldest_ix) |
|
1093 { |
|
1094 /* Need to lock |aProfile| so nobody tries to copy out entries |
|
1095 whilst we are putting them in. */ |
|
1096 buff->aProfile->BeginUnwind(); |
|
1097 |
|
1098 /* The buff is a sequence of ProfileEntries (ents). It has |
|
1099 this grammar: |
|
1100 |
|
1101 | --pre-tags-- | (h 'P' .. h 'Q')* | --post-tags-- | |
|
1102 ^ ^ |
|
1103 ix_first_hP ix_last_hQ |
|
1104 |
|
1105 Each (h 'P' .. h 'Q') subsequence represents one pseudostack |
|
1106 entry. These, if present, are in the order |
|
1107 outermost-frame-first, and that is the order that they should |
|
1108 be copied into aProfile. The --pre-tags-- and --post-tags-- |
|
1109 are to be copied into the aProfile verbatim, except that they |
|
1110 may contain the hints "h 'F'" for a flush and "h 'N'" to |
|
1111 indicate that a native unwind is also required, and must be |
|
1112 interleaved with the pseudostack entries. |
|
1113 |
|
1114 The hint tags that bound each pseudostack entry, "h 'P'" and "h |
|
1115 'Q'", are not to be copied into the aProfile -- they are |
|
1116 present only to make parsing easy here. Also, the pseudostack |
|
1117 entries may contain an "'S' (void*)" entry, which is the stack |
|
1118 pointer value for that entry, and these are also not to be |
|
1119 copied. |
|
1120 */ |
|
1121 /* The first thing to do is therefore to find the pseudostack |
|
1122 entries, if any, and to find out also whether a native unwind |
|
1123 has been requested. */ |
|
1124 const uintptr_t infUW = ~(uintptr_t)0; // infinity |
|
1125 bool need_native_unw = false; |
|
1126 uintptr_t ix_first_hP = infUW; // "not found" |
|
1127 uintptr_t ix_last_hQ = infUW; // "not found" |
|
1128 |
|
1129 uintptr_t k; |
|
1130 for (k = 0; k < buff->entsUsed; k++) { |
|
1131 ProfileEntry ent = utb_get_profent(buff, k); |
|
1132 if (ent.is_ent_hint('N')) { |
|
1133 need_native_unw = true; |
|
1134 } |
|
1135 else if (ent.is_ent_hint('P') && ix_first_hP == ~(uintptr_t)0) { |
|
1136 ix_first_hP = k; |
|
1137 } |
|
1138 else if (ent.is_ent_hint('Q')) { |
|
1139 ix_last_hQ = k; |
|
1140 } |
|
1141 } |
|
1142 |
|
1143 if (0) LOGF("BPUnw: ix_first_hP %llu ix_last_hQ %llu need_native_unw %llu", |
|
1144 (unsigned long long int)ix_first_hP, |
|
1145 (unsigned long long int)ix_last_hQ, |
|
1146 (unsigned long long int)need_native_unw); |
|
1147 |
|
1148 /* There are four possibilities: native-only, pseudostack-only, |
|
1149 combined (both), and neither. We handle all four cases. */ |
|
1150 |
|
1151 MOZ_ASSERT( (ix_first_hP == infUW && ix_last_hQ == infUW) || |
|
1152 (ix_first_hP != infUW && ix_last_hQ != infUW) ); |
|
1153 bool have_P = ix_first_hP != infUW; |
|
1154 if (have_P) { |
|
1155 MOZ_ASSERT(ix_first_hP < ix_last_hQ); |
|
1156 MOZ_ASSERT(ix_last_hQ <= buff->entsUsed); |
|
1157 } |
|
1158 |
|
1159 /* Neither N nor P. This is very unusual but has been observed to happen. |
|
1160 Just copy to the output. */ |
|
1161 if (!need_native_unw && !have_P) { |
|
1162 for (k = 0; k < buff->entsUsed; k++) { |
|
1163 ProfileEntry ent = utb_get_profent(buff, k); |
|
1164 // action flush-hints |
|
1165 if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } |
|
1166 // skip ones we can't copy |
|
1167 if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } |
|
1168 // handle GetBacktrace() |
|
1169 if (ent.is_ent('B')) { |
|
1170 UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); |
|
1171 process_buffer(buff, -1); |
|
1172 continue; |
|
1173 } |
|
1174 // and copy everything else |
|
1175 buff->aProfile->addTag( ent ); |
|
1176 } |
|
1177 } |
|
1178 else /* Native only-case. */ |
|
1179 if (need_native_unw && !have_P) { |
|
1180 for (k = 0; k < buff->entsUsed; k++) { |
|
1181 ProfileEntry ent = utb_get_profent(buff, k); |
|
1182 // action a native-unwind-now hint |
|
1183 if (ent.is_ent_hint('N')) { |
|
1184 MOZ_ASSERT(buff->haveNativeInfo); |
|
1185 PCandSP* pairs = nullptr; |
|
1186 unsigned int nPairs = 0; |
|
1187 do_lul_unwind_Buffer(&pairs, &nPairs, buff, oldest_ix); |
|
1188 buff->aProfile->addTag( ProfileEntry('s', "(root)") ); |
|
1189 for (unsigned int i = 0; i < nPairs; i++) { |
|
1190 /* Skip any outermost frames that |
|
1191 do_lul_unwind_Buffer didn't give us. See comments |
|
1192 on that function for details. */ |
|
1193 if (pairs[i].pc == 0 && pairs[i].sp == 0) |
|
1194 continue; |
|
1195 buff->aProfile |
|
1196 ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[i].pc)) ); |
|
1197 } |
|
1198 if (pairs) |
|
1199 free(pairs); |
|
1200 continue; |
|
1201 } |
|
1202 // action flush-hints |
|
1203 if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } |
|
1204 // skip ones we can't copy |
|
1205 if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } |
|
1206 // handle GetBacktrace() |
|
1207 if (ent.is_ent('B')) { |
|
1208 UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); |
|
1209 process_buffer(buff, -1); |
|
1210 continue; |
|
1211 } |
|
1212 // and copy everything else |
|
1213 buff->aProfile->addTag( ent ); |
|
1214 } |
|
1215 } |
|
1216 else /* Pseudostack-only case */ |
|
1217 if (!need_native_unw && have_P) { |
|
1218 /* If there's no request for a native stack, it's easy: just |
|
1219 copy the tags verbatim into aProfile, skipping the ones that |
|
1220 can't be copied -- 'h' (hint) tags, and "'S' (void*)" |
|
1221 stack-pointer tags. Except, insert a sample-start tag when |
|
1222 we see the start of the first pseudostack frame. */ |
|
1223 for (k = 0; k < buff->entsUsed; k++) { |
|
1224 ProfileEntry ent = utb_get_profent(buff, k); |
|
1225 // We need to insert a sample-start tag before the first frame |
|
1226 if (k == ix_first_hP) { |
|
1227 buff->aProfile->addTag( ProfileEntry('s', "(root)") ); |
|
1228 } |
|
1229 // action flush-hints |
|
1230 if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } |
|
1231 // skip ones we can't copy |
|
1232 if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } |
|
1233 // handle GetBacktrace() |
|
1234 if (ent.is_ent('B')) { |
|
1235 UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); |
|
1236 process_buffer(buff, -1); |
|
1237 continue; |
|
1238 } |
|
1239 // and copy everything else |
|
1240 buff->aProfile->addTag( ent ); |
|
1241 } |
|
1242 } |
|
1243 else /* Combined case */ |
|
1244 if (need_native_unw && have_P) |
|
1245 { |
|
1246 /* We need to get a native stacktrace and merge it with the |
|
1247 pseudostack entries. This isn't too simple. First, copy all |
|
1248 the tags up to the start of the pseudostack tags. Then |
|
1249 generate a combined set of tags by native unwind and |
|
1250 pseudostack. Then, copy all the stuff after the pseudostack |
|
1251 tags. */ |
|
1252 MOZ_ASSERT(buff->haveNativeInfo); |
|
1253 |
|
1254 // Get native unwind info |
|
1255 PCandSP* pairs = nullptr; |
|
1256 unsigned int n_pairs = 0; |
|
1257 do_lul_unwind_Buffer(&pairs, &n_pairs, buff, oldest_ix); |
|
1258 |
|
1259 // Entries before the pseudostack frames |
|
1260 for (k = 0; k < ix_first_hP; k++) { |
|
1261 ProfileEntry ent = utb_get_profent(buff, k); |
|
1262 // action flush-hints |
|
1263 if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } |
|
1264 // skip ones we can't copy |
|
1265 if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } |
|
1266 // handle GetBacktrace() |
|
1267 if (ent.is_ent('B')) { |
|
1268 UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr(); |
|
1269 process_buffer(buff, -1); |
|
1270 continue; |
|
1271 } |
|
1272 // and copy everything else |
|
1273 buff->aProfile->addTag( ent ); |
|
1274 } |
|
1275 |
|
1276 // BEGIN merge |
|
1277 buff->aProfile->addTag( ProfileEntry('s', "(root)") ); |
|
1278 unsigned int next_N = 0; // index in pairs[] |
|
1279 unsigned int next_P = ix_first_hP; // index in buff profent array |
|
1280 bool last_was_P = false; |
|
1281 if (0) LOGF("at mergeloop: n_pairs %llu ix_last_hQ %llu", |
|
1282 (unsigned long long int)n_pairs, |
|
1283 (unsigned long long int)ix_last_hQ); |
|
1284 /* Skip any outermost frames that do_lul_unwind_Buffer |
|
1285 didn't give us. See comments on that function for |
|
1286 details. */ |
|
1287 while (next_N < n_pairs && pairs[next_N].pc == 0 && pairs[next_N].sp == 0) |
|
1288 next_N++; |
|
1289 |
|
1290 while (true) { |
|
1291 if (next_P <= ix_last_hQ) { |
|
1292 // Assert that next_P points at the start of an P entry |
|
1293 MOZ_ASSERT(utb_get_profent(buff, next_P).is_ent_hint('P')); |
|
1294 } |
|
1295 if (next_N >= n_pairs && next_P > ix_last_hQ) { |
|
1296 // both stacks empty |
|
1297 break; |
|
1298 } |
|
1299 /* Decide which entry to use next: |
|
1300 If N is empty, must use P, and vice versa |
|
1301 else |
|
1302 If the last was P and current P has zero SP, use P |
|
1303 else |
|
1304 we assume that both P and N have valid SP, in which case |
|
1305 use the one with the larger value |
|
1306 */ |
|
1307 bool use_P = true; |
|
1308 if (next_N >= n_pairs) { |
|
1309 // N empty, use P |
|
1310 use_P = true; |
|
1311 if (0) LOG(" P <= no remaining N entries"); |
|
1312 } |
|
1313 else if (next_P > ix_last_hQ) { |
|
1314 // P empty, use N |
|
1315 use_P = false; |
|
1316 if (0) LOG(" N <= no remaining P entries"); |
|
1317 } |
|
1318 else { |
|
1319 // We have at least one N and one P entry available. |
|
1320 // Scan forwards to find the SP of the current P entry |
|
1321 u_int64_t sp_cur_P = 0; |
|
1322 unsigned int m = next_P + 1; |
|
1323 while (1) { |
|
1324 /* This assertion should hold because in a well formed |
|
1325 input, we must eventually find the hint-Q that marks |
|
1326 the end of this frame's entries. */ |
|
1327 MOZ_ASSERT(m < buff->entsUsed); |
|
1328 ProfileEntry ent = utb_get_profent(buff, m); |
|
1329 if (ent.is_ent_hint('Q')) |
|
1330 break; |
|
1331 if (ent.is_ent('S')) { |
|
1332 sp_cur_P = reinterpret_cast<u_int64_t>(ent.get_tagPtr()); |
|
1333 break; |
|
1334 } |
|
1335 m++; |
|
1336 } |
|
1337 if (last_was_P && sp_cur_P == 0) { |
|
1338 if (0) LOG(" P <= last_was_P && sp_cur_P == 0"); |
|
1339 use_P = true; |
|
1340 } else { |
|
1341 u_int64_t sp_cur_N = pairs[next_N].sp; |
|
1342 use_P = (sp_cur_P > sp_cur_N); |
|
1343 if (0) LOGF(" %s <= sps P %p N %p", |
|
1344 use_P ? "P" : "N", (void*)(intptr_t)sp_cur_P, |
|
1345 (void*)(intptr_t)sp_cur_N); |
|
1346 } |
|
1347 } |
|
1348 /* So, we know which we are going to use. */ |
|
1349 if (use_P) { |
|
1350 unsigned int m = next_P + 1; |
|
1351 while (true) { |
|
1352 MOZ_ASSERT(m < buff->entsUsed); |
|
1353 ProfileEntry ent = utb_get_profent(buff, m); |
|
1354 if (ent.is_ent_hint('Q')) { |
|
1355 next_P = m + 1; |
|
1356 break; |
|
1357 } |
|
1358 // we don't expect a flush-hint here |
|
1359 MOZ_ASSERT(!ent.is_ent_hint('F')); |
|
1360 // skip ones we can't copy |
|
1361 if (ent.is_ent_hint() || ent.is_ent('S')) { m++; continue; } |
|
1362 // and copy everything else |
|
1363 buff->aProfile->addTag( ent ); |
|
1364 m++; |
|
1365 } |
|
1366 } else { |
|
1367 buff->aProfile |
|
1368 ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[next_N].pc)) ); |
|
1369 next_N++; |
|
1370 } |
|
1371 /* Remember what we chose, for next time. */ |
|
1372 last_was_P = use_P; |
|
1373 } |
|
1374 |
|
1375 MOZ_ASSERT(next_P == ix_last_hQ + 1); |
|
1376 MOZ_ASSERT(next_N == n_pairs); |
|
1377 // END merge |
|
1378 |
|
1379 // Entries after the pseudostack frames |
|
1380 for (k = ix_last_hQ+1; k < buff->entsUsed; k++) { |
|
1381 ProfileEntry ent = utb_get_profent(buff, k); |
|
1382 // action flush-hints |
|
1383 if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; } |
|
1384 // skip ones we can't copy |
|
1385 if (ent.is_ent_hint() || ent.is_ent('S')) { continue; } |
|
1386 // and copy everything else |
|
1387 buff->aProfile->addTag( ent ); |
|
1388 } |
|
1389 |
|
1390 // free native unwind info |
|
1391 if (pairs) |
|
1392 free(pairs); |
|
1393 } |
|
1394 |
|
1395 #if 0 |
|
1396 bool show = true; |
|
1397 if (show) LOG("----------------"); |
|
1398 for (k = 0; k < buff->entsUsed; k++) { |
|
1399 ProfileEntry ent = utb_get_profent(buff, k); |
|
1400 if (show) ent.log(); |
|
1401 if (ent.is_ent_hint('F')) { |
|
1402 /* This is a flush-hint */ |
|
1403 buff->aProfile->flush(); |
|
1404 } |
|
1405 else if (ent.is_ent_hint('N')) { |
|
1406 /* This is a do-a-native-unwind-right-now hint */ |
|
1407 MOZ_ASSERT(buff->haveNativeInfo); |
|
1408 PCandSP* pairs = nullptr; |
|
1409 unsigned int nPairs = 0; |
|
1410 do_lul_unwind_Buffer(&pairs, &nPairs, buff, oldest_ix); |
|
1411 buff->aProfile->addTag( ProfileEntry('s', "(root)") ); |
|
1412 for (unsigned int i = 0; i < nPairs; i++) { |
|
1413 buff->aProfile |
|
1414 ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[i].pc)) ); |
|
1415 } |
|
1416 if (pairs) |
|
1417 free(pairs); |
|
1418 } else { |
|
1419 /* Copy in verbatim */ |
|
1420 buff->aProfile->addTag( ent ); |
|
1421 } |
|
1422 } |
|
1423 #endif |
|
1424 |
|
1425 buff->aProfile->EndUnwind(); |
|
1426 } |
|
1427 |
|
1428 |
|
1429 // Find out, in a platform-dependent way, where the code modules got |
|
1430 // mapped in the process' virtual address space, and get |aLUL| to |
|
1431 // load unwind info for them. |
|
1432 void |
|
1433 read_procmaps(lul::LUL* aLUL) |
|
1434 { |
|
1435 MOZ_ASSERT(aLUL->CountMappings() == 0); |
|
1436 |
|
1437 # if defined(SPS_OS_linux) || defined(SPS_OS_android) || defined(SPS_OS_darwin) |
|
1438 SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); |
|
1439 |
|
1440 for (size_t i = 0; i < info.GetSize(); i++) { |
|
1441 const SharedLibrary& lib = info.GetEntry(i); |
|
1442 |
|
1443 #if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) |
|
1444 // We're using faulty.lib. Use a special-case object mapper. |
|
1445 AutoObjectMapperFaultyLib mapper(aLUL->mLog); |
|
1446 #else |
|
1447 // We can use the standard POSIX-based mapper. |
|
1448 AutoObjectMapperPOSIX mapper(aLUL->mLog); |
|
1449 #endif |
|
1450 |
|
1451 // Ask |mapper| to map the object. Then hand its mapped address |
|
1452 // to NotifyAfterMap(). |
|
1453 void* image = nullptr; |
|
1454 size_t size = 0; |
|
1455 bool ok = mapper.Map(&image, &size, lib.GetName()); |
|
1456 if (ok && image && size > 0) { |
|
1457 aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd()-lib.GetStart(), |
|
1458 lib.GetName().c_str(), image); |
|
1459 } else if (!ok && lib.GetName() == "") { |
|
1460 // The object has no name and (as a consequence) the mapper |
|
1461 // failed to map it. This happens on Linux, where |
|
1462 // GetInfoForSelf() produces two such mappings: one for the |
|
1463 // executable and one for the VDSO. The executable one isn't a |
|
1464 // big deal since there's not much interesting code in there, |
|
1465 // but the VDSO one is a problem on x86-{linux,android} because |
|
1466 // lack of knowledge about the mapped area inhibits LUL's |
|
1467 // special __kernel_syscall handling. Hence notify |aLUL| at |
|
1468 // least of the mapping, even though it can't read any unwind |
|
1469 // information for the area. |
|
1470 aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd()-lib.GetStart()); |
|
1471 } |
|
1472 |
|
1473 // |mapper| goes out of scope at this point and so its destructor |
|
1474 // unmaps the object. |
|
1475 } |
|
1476 |
|
1477 # else |
|
1478 # error "Unknown platform" |
|
1479 # endif |
|
1480 } |
|
1481 |
|
1482 // LUL needs a callback for its logging sink. |
|
1483 static void |
|
1484 logging_sink_for_LUL(const char* str) { |
|
1485 // Ignore any trailing \n, since LOG will add one anyway. |
|
1486 size_t n = strlen(str); |
|
1487 if (n > 0 && str[n-1] == '\n') { |
|
1488 char* tmp = strdup(str); |
|
1489 tmp[n-1] = 0; |
|
1490 LOG(tmp); |
|
1491 free(tmp); |
|
1492 } else { |
|
1493 LOG(str); |
|
1494 } |
|
1495 } |
|
1496 |
|
1497 // Runs in the unwinder thread -- well, this _is_ the unwinder thread. |
|
1498 static void* unwind_thr_fn(void* exit_nowV) |
|
1499 { |
|
1500 // This is the unwinder thread function. The first thread in must |
|
1501 // create the unwinder library and request it to read the debug |
|
1502 // info. The last thread out must deallocate the library. These |
|
1503 // three tasks (create library, read debuginfo, destroy library) are |
|
1504 // sequentialised by |sLULmutex|. |sLUL| and |sLULcount| may only |
|
1505 // be modified whilst |sLULmutex| is held. |
|
1506 // |
|
1507 // Once the threads are up and running, |sLUL| (the pointer itself, |
|
1508 // that is) stays constant, and the multiple threads may make |
|
1509 // concurrent calls into |sLUL| to do concurrent unwinding. |
|
1510 LOG("unwind_thr_fn: START"); |
|
1511 |
|
1512 // A hook for testing LUL: at the first entrance here, check env var |
|
1513 // MOZ_PROFILER_LUL_TEST, and if set, run tests on LUL. Note that |
|
1514 // it is preferable to run the LUL tests via gtest, but gtest is not |
|
1515 // currently supported on all targets that LUL runs on. Hence the |
|
1516 // auxiliary mechanism here is also needed. |
|
1517 bool doLulTest = false; |
|
1518 |
|
1519 mozilla::DebugOnly<int> r = pthread_mutex_lock(&sLULmutex); |
|
1520 MOZ_ASSERT(!r); |
|
1521 |
|
1522 if (!sLUL) { |
|
1523 // sLUL hasn't been allocated, so we must be the first thread in. |
|
1524 sLUL = new lul::LUL(logging_sink_for_LUL); |
|
1525 MOZ_ASSERT(sLUL); |
|
1526 MOZ_ASSERT(sLULcount == 0); |
|
1527 // Register this thread so it can read unwind info and do unwinding. |
|
1528 sLUL->RegisterUnwinderThread(); |
|
1529 // Read all the unwind info currently available. |
|
1530 read_procmaps(sLUL); |
|
1531 // Has a test been requested? |
|
1532 if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) { |
|
1533 doLulTest = true; |
|
1534 } |
|
1535 } else { |
|
1536 // sLUL has already been allocated, so we can't be the first |
|
1537 // thread in. |
|
1538 MOZ_ASSERT(sLULcount > 0); |
|
1539 // Register this thread so it can do unwinding. |
|
1540 sLUL->RegisterUnwinderThread(); |
|
1541 } |
|
1542 |
|
1543 sLULcount++; |
|
1544 |
|
1545 r = pthread_mutex_unlock(&sLULmutex); |
|
1546 MOZ_ASSERT(!r); |
|
1547 |
|
1548 // If a test has been requested for LUL, run it. Summary results |
|
1549 // are sent to sLUL's logging sink. Note that this happens after |
|
1550 // read_procmaps has read unwind information into sLUL, so that the |
|
1551 // tests have something to unwind against. Without that they'd be |
|
1552 // pretty meaningless. |
|
1553 if (doLulTest) { |
|
1554 int nTests = 0, nTestsPassed = 0; |
|
1555 RunLulUnitTests(&nTests, &nTestsPassed, sLUL); |
|
1556 } |
|
1557 |
|
1558 // At this point, sLUL -- the single instance of the library -- is |
|
1559 // allocated and has read the required unwind info. All running |
|
1560 // threads can now make Unwind() requests of it concurrently, if |
|
1561 // they wish. |
|
1562 |
|
1563 // Now go on to allocate the array of buffers used for communication |
|
1564 // between the sampling threads and the unwinder threads. |
|
1565 |
|
1566 // If we're the first thread in, we'll need to allocate the buffer |
|
1567 // array g_buffers plus the Buffer structs that it points at. */ |
|
1568 spinLock_acquire(&g_spinLock); |
|
1569 if (g_buffers == nullptr) { |
|
1570 // Drop the lock, make a complete copy in memory, reacquire the |
|
1571 // lock, and try to install it -- which might fail, if someone |
|
1572 // else beat us to it. */ |
|
1573 spinLock_release(&g_spinLock); |
|
1574 UnwinderThreadBuffer** buffers |
|
1575 = (UnwinderThreadBuffer**)malloc(N_UNW_THR_BUFFERS |
|
1576 * sizeof(UnwinderThreadBuffer*)); |
|
1577 MOZ_ASSERT(buffers); |
|
1578 int i; |
|
1579 for (i = 0; i < N_UNW_THR_BUFFERS; i++) { |
|
1580 /* These calloc-ations are shared between the sampling and |
|
1581 unwinding threads. They must be free after all such threads |
|
1582 have terminated. */ |
|
1583 buffers[i] = (UnwinderThreadBuffer*) |
|
1584 calloc(sizeof(UnwinderThreadBuffer), 1); |
|
1585 MOZ_ASSERT(buffers[i]); |
|
1586 buffers[i]->state = S_EMPTY; |
|
1587 } |
|
1588 /* Try to install it */ |
|
1589 spinLock_acquire(&g_spinLock); |
|
1590 if (g_buffers == nullptr) { |
|
1591 g_buffers = buffers; |
|
1592 spinLock_release(&g_spinLock); |
|
1593 } else { |
|
1594 /* Someone else beat us to it. Release what we just allocated |
|
1595 so as to avoid a leak. */ |
|
1596 spinLock_release(&g_spinLock); |
|
1597 for (i = 0; i < N_UNW_THR_BUFFERS; i++) { |
|
1598 free(buffers[i]); |
|
1599 } |
|
1600 free(buffers); |
|
1601 } |
|
1602 } else { |
|
1603 /* They are already allocated, so just drop the lock and continue. */ |
|
1604 spinLock_release(&g_spinLock); |
|
1605 } |
|
1606 |
|
1607 /* |
|
1608 while (1) { |
|
1609 acq lock |
|
1610 scan to find oldest full |
|
1611 if none { rel lock; sleep; continue } |
|
1612 set buff state to emptying |
|
1613 rel lock |
|
1614 acq MLock // implicitly |
|
1615 process buffer |
|
1616 rel MLock // implicitly |
|
1617 acq lock |
|
1618 set buff state to S_EMPTY |
|
1619 rel lock |
|
1620 } |
|
1621 */ |
|
1622 int* exit_now = (int*)exit_nowV; |
|
1623 int ms_to_sleep_if_empty = 1; |
|
1624 |
|
1625 const int longest_sleep_ms = 1000; |
|
1626 bool show_sleep_message = true; |
|
1627 |
|
1628 while (1) { |
|
1629 |
|
1630 if (*exit_now != 0) { |
|
1631 *exit_now = 0; |
|
1632 break; |
|
1633 } |
|
1634 |
|
1635 spinLock_acquire(&g_spinLock); |
|
1636 |
|
1637 /* Find the oldest filled buffer, if any. */ |
|
1638 uint64_t oldest_seqNo = ~0ULL; /* infinity */ |
|
1639 int oldest_ix = -1; |
|
1640 int i; |
|
1641 for (i = 0; i < N_UNW_THR_BUFFERS; i++) { |
|
1642 UnwinderThreadBuffer* buff = g_buffers[i]; |
|
1643 if (buff->state != S_FULL) continue; |
|
1644 if (buff->seqNo < oldest_seqNo) { |
|
1645 oldest_seqNo = buff->seqNo; |
|
1646 oldest_ix = i; |
|
1647 } |
|
1648 } |
|
1649 if (oldest_ix == -1) { |
|
1650 /* We didn't find a full buffer. Snooze and try again later. */ |
|
1651 MOZ_ASSERT(oldest_seqNo == ~0ULL); |
|
1652 spinLock_release(&g_spinLock); |
|
1653 if (ms_to_sleep_if_empty > 100 && LOGLEVEL >= 2) { |
|
1654 if (show_sleep_message) |
|
1655 LOGF("BPUnw: unwinder: sleep for %d ms", ms_to_sleep_if_empty); |
|
1656 /* If we've already shown the message for the longest sleep, |
|
1657 don't show it again, until the next round of sleeping |
|
1658 starts. */ |
|
1659 if (ms_to_sleep_if_empty == longest_sleep_ms) |
|
1660 show_sleep_message = false; |
|
1661 } |
|
1662 sleep_ms(ms_to_sleep_if_empty); |
|
1663 if (ms_to_sleep_if_empty < 20) { |
|
1664 ms_to_sleep_if_empty += 2; |
|
1665 } else { |
|
1666 ms_to_sleep_if_empty = (15 * ms_to_sleep_if_empty) / 10; |
|
1667 if (ms_to_sleep_if_empty > longest_sleep_ms) |
|
1668 ms_to_sleep_if_empty = longest_sleep_ms; |
|
1669 } |
|
1670 continue; |
|
1671 } |
|
1672 |
|
1673 /* We found a full a buffer. Mark it as 'ours' and drop the |
|
1674 lock; then we can safely throw breakpad at it. */ |
|
1675 UnwinderThreadBuffer* buff = g_buffers[oldest_ix]; |
|
1676 MOZ_ASSERT(buff->state == S_FULL); |
|
1677 buff->state = S_EMPTYING; |
|
1678 spinLock_release(&g_spinLock); |
|
1679 |
|
1680 /* unwind .. in which we can do anything we like, since any |
|
1681 resource stalls that we may encounter (eg malloc locks) in |
|
1682 competition with signal handler instances, will be short |
|
1683 lived since the signal handler is guaranteed nonblocking. */ |
|
1684 if (0) LOGF("BPUnw: unwinder: seqNo %llu: emptying buf %d\n", |
|
1685 (unsigned long long int)oldest_seqNo, oldest_ix); |
|
1686 |
|
1687 process_buffer(buff, oldest_ix); |
|
1688 |
|
1689 /* And .. we're done. Mark the buffer as empty so it can be |
|
1690 reused. First though, unmap any of the entsPages that got |
|
1691 mapped during filling. */ |
|
1692 for (i = 0; i < N_PROF_ENT_PAGES; i++) { |
|
1693 if (buff->entsPages[i] == ProfEntsPage_INVALID) |
|
1694 continue; |
|
1695 munmap_ProfEntsPage(buff->entsPages[i]); |
|
1696 buff->entsPages[i] = ProfEntsPage_INVALID; |
|
1697 } |
|
1698 |
|
1699 (void)VALGRIND_MAKE_MEM_UNDEFINED(&buff->stackImg.mContents[0], |
|
1700 lul::N_STACK_BYTES); |
|
1701 spinLock_acquire(&g_spinLock); |
|
1702 MOZ_ASSERT(buff->state == S_EMPTYING); |
|
1703 buff->state = S_EMPTY; |
|
1704 spinLock_release(&g_spinLock); |
|
1705 ms_to_sleep_if_empty = 1; |
|
1706 show_sleep_message = true; |
|
1707 } |
|
1708 |
|
1709 // This unwinder thread is exiting. If it's the last one out, |
|
1710 // shut down and deallocate the unwinder library. |
|
1711 r = pthread_mutex_lock(&sLULmutex); |
|
1712 MOZ_ASSERT(!r); |
|
1713 |
|
1714 MOZ_ASSERT(sLULcount > 0); |
|
1715 if (sLULcount == 1) { |
|
1716 // Tell the library to discard unwind info for the entire address |
|
1717 // space. |
|
1718 sLUL->NotifyBeforeUnmapAll(); |
|
1719 |
|
1720 delete sLUL; |
|
1721 sLUL = nullptr; |
|
1722 } |
|
1723 |
|
1724 sLULcount--; |
|
1725 |
|
1726 r = pthread_mutex_unlock(&sLULmutex); |
|
1727 MOZ_ASSERT(!r); |
|
1728 |
|
1729 LOG("unwind_thr_fn: STOP"); |
|
1730 return nullptr; |
|
1731 } |
|
1732 |
|
1733 static void finish_sync_buffer(ThreadProfile* profile, |
|
1734 UnwinderThreadBuffer* buff, |
|
1735 void* /* ucontext_t*, really */ ucV) |
|
1736 { |
|
1737 SyncProfile* syncProfile = profile->AsSyncProfile(); |
|
1738 MOZ_ASSERT(syncProfile); |
|
1739 SyncUnwinderThreadBuffer* utb = static_cast<SyncUnwinderThreadBuffer*>( |
|
1740 syncProfile->GetUWTBuffer()); |
|
1741 fill_buffer(profile, utb->GetBuffer(), ucV); |
|
1742 utb->GetBuffer()->state = S_FULL; |
|
1743 PseudoStack* stack = profile->GetPseudoStack(); |
|
1744 stack->addLinkedUWTBuffer(utb); |
|
1745 } |
|
1746 |
|
1747 static void release_sync_buffer(LinkedUWTBuffer* buff) |
|
1748 { |
|
1749 SyncUnwinderThreadBuffer* data = static_cast<SyncUnwinderThreadBuffer*>(buff); |
|
1750 MOZ_ASSERT(data->GetBuffer()->state == S_EMPTY); |
|
1751 delete data; |
|
1752 } |
|
1753 |
|
1754 //////////////////////////////////////////////////////////////// |
|
1755 //////////////////////////////////////////////////////////////// |
|
1756 //////////////////////////////////////////////////////////////// |
|
1757 //////////////////////////////////////////////////////////////// |
|
1758 //////////////////////////////////////////////////////////////// |
|
1759 //////////////////////////////////////////////////////////////// |
|
1760 |
|
1761 // Keeps count of how frames are recovered, which is useful for |
|
1762 // diagnostic purposes. |
|
1763 static void stats_notify_frame(int n_context, int n_cfi, int n_scanned) |
|
1764 { |
|
1765 // Gather stats in intervals. |
|
1766 static unsigned int nf_total = 0; // total frames since last printout |
|
1767 static unsigned int nf_CONTEXT = 0; |
|
1768 static unsigned int nf_CFI = 0; |
|
1769 static unsigned int nf_SCANNED = 0; |
|
1770 |
|
1771 nf_CONTEXT += n_context; |
|
1772 nf_CFI += n_cfi; |
|
1773 nf_SCANNED += n_scanned; |
|
1774 nf_total += (n_context + n_cfi + n_scanned); |
|
1775 |
|
1776 if (nf_total >= 5000) { |
|
1777 LOGF("BPUnw frame stats: TOTAL %5u" |
|
1778 " CTX %4u CFI %4u SCAN %4u", |
|
1779 nf_total, nf_CONTEXT, nf_CFI, nf_SCANNED); |
|
1780 nf_total = 0; |
|
1781 nf_CONTEXT = 0; |
|
1782 nf_CFI = 0; |
|
1783 nf_SCANNED = 0; |
|
1784 } |
|
1785 } |
|
1786 |
|
1787 static |
|
1788 void do_lul_unwind_Buffer(/*OUT*/PCandSP** pairs, |
|
1789 /*OUT*/unsigned int* nPairs, |
|
1790 UnwinderThreadBuffer* buff, |
|
1791 int buffNo /* for debug printing only */) |
|
1792 { |
|
1793 # if defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86) |
|
1794 lul::UnwindRegs startRegs = buff->startRegs; |
|
1795 if (0) { |
|
1796 LOGF("Initial RIP = 0x%llx", (unsigned long long int)startRegs.xip.Value()); |
|
1797 LOGF("Initial RSP = 0x%llx", (unsigned long long int)startRegs.xsp.Value()); |
|
1798 LOGF("Initial RBP = 0x%llx", (unsigned long long int)startRegs.xbp.Value()); |
|
1799 } |
|
1800 |
|
1801 # elif defined(SPS_ARCH_arm) |
|
1802 lul::UnwindRegs startRegs = buff->startRegs; |
|
1803 if (0) { |
|
1804 LOGF("Initial R15 = 0x%llx", (unsigned long long int)startRegs.r15.Value()); |
|
1805 LOGF("Initial R13 = 0x%llx", (unsigned long long int)startRegs.r13.Value()); |
|
1806 } |
|
1807 |
|
1808 # else |
|
1809 # error "Unknown plat" |
|
1810 # endif |
|
1811 |
|
1812 // FIXME: should we reinstate the ability to use separate debug objects? |
|
1813 // /* Make up a list of places where the debug objects might be. */ |
|
1814 // std::vector<std::string> debug_dirs; |
|
1815 # if defined(SPS_OS_linux) |
|
1816 // debug_dirs.push_back("/usr/lib/debug/lib"); |
|
1817 // debug_dirs.push_back("/usr/lib/debug/usr/lib"); |
|
1818 // debug_dirs.push_back("/usr/lib/debug/lib/x86_64-linux-gnu"); |
|
1819 // debug_dirs.push_back("/usr/lib/debug/usr/lib/x86_64-linux-gnu"); |
|
1820 # elif defined(SPS_OS_android) |
|
1821 // debug_dirs.push_back("/sdcard/symbols/system/lib"); |
|
1822 // debug_dirs.push_back("/sdcard/symbols/system/bin"); |
|
1823 # elif defined(SPS_OS_darwin) |
|
1824 // /* Nothing */ |
|
1825 # else |
|
1826 # error "Unknown plat" |
|
1827 # endif |
|
1828 |
|
1829 // Set the max number of scanned or otherwise dubious frames |
|
1830 // to the user specified limit |
|
1831 size_t scannedFramesAllowed |
|
1832 = std::min(std::max(0, sUnwindStackScan), MAX_NATIVE_FRAMES); |
|
1833 |
|
1834 // The max number of frames is MAX_NATIVE_FRAMES, so as to avoid |
|
1835 // the unwinder wasting a lot of time looping on corrupted stacks. |
|
1836 uintptr_t framePCs[MAX_NATIVE_FRAMES]; |
|
1837 uintptr_t frameSPs[MAX_NATIVE_FRAMES]; |
|
1838 size_t framesAvail = mozilla::ArrayLength(framePCs); |
|
1839 size_t framesUsed = 0; |
|
1840 size_t scannedFramesAcquired = 0; |
|
1841 sLUL->Unwind( &framePCs[0], &frameSPs[0], |
|
1842 &framesUsed, &scannedFramesAcquired, |
|
1843 framesAvail, scannedFramesAllowed, |
|
1844 &startRegs, &buff->stackImg ); |
|
1845 |
|
1846 if (LOGLEVEL >= 2) |
|
1847 stats_notify_frame(/* context */ 1, |
|
1848 /* cfi */ framesUsed - 1 - scannedFramesAcquired, |
|
1849 /* scanned */ scannedFramesAcquired); |
|
1850 |
|
1851 // PC values are now in framePCs[0 .. framesUsed-1], with [0] being |
|
1852 // the innermost frame. SP values are likewise in frameSPs[]. |
|
1853 *pairs = (PCandSP*)calloc(framesUsed, sizeof(PCandSP)); |
|
1854 *nPairs = framesUsed; |
|
1855 if (*pairs == nullptr) { |
|
1856 *nPairs = 0; |
|
1857 return; |
|
1858 } |
|
1859 |
|
1860 if (framesUsed > 0) { |
|
1861 for (unsigned int frame_index = 0; |
|
1862 frame_index < framesUsed; ++frame_index) { |
|
1863 (*pairs)[framesUsed-1-frame_index].pc = framePCs[frame_index]; |
|
1864 (*pairs)[framesUsed-1-frame_index].sp = frameSPs[frame_index]; |
|
1865 } |
|
1866 } |
|
1867 |
|
1868 if (LOGLEVEL >= 3) { |
|
1869 LOGF("BPUnw: unwinder: seqNo %llu, buf %d: got %u frames", |
|
1870 (unsigned long long int)buff->seqNo, buffNo, |
|
1871 (unsigned int)framesUsed); |
|
1872 } |
|
1873 |
|
1874 if (LOGLEVEL >= 2) { |
|
1875 if (0 == (g_stats_totalSamples % 1000)) |
|
1876 LOGF("BPUnw: %llu total samples, %llu failed (buffer unavail), " |
|
1877 "%llu failed (thread unreg'd), ", |
|
1878 (unsigned long long int)g_stats_totalSamples, |
|
1879 (unsigned long long int)g_stats_noBuffAvail, |
|
1880 (unsigned long long int)g_stats_thrUnregd); |
|
1881 } |
|
1882 } |
|
1883 |
|
1884 #endif /* defined(SPS_OS_windows) */ |