tools/profiler/UnwinderThread2.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/tools/profiler/UnwinderThread2.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1884 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include <stdio.h>
    1.10 +#include <signal.h>
    1.11 +#include <string.h>
    1.12 +#include <stdlib.h>
    1.13 +#include <time.h>
    1.14 +
    1.15 +#ifdef MOZ_VALGRIND
    1.16 +# include <valgrind/helgrind.h>
    1.17 +# include <valgrind/memcheck.h>
    1.18 +#else
    1.19 +# define VALGRIND_HG_MUTEX_LOCK_PRE(_mx,_istry)  /* */
    1.20 +# define VALGRIND_HG_MUTEX_LOCK_POST(_mx)        /* */
    1.21 +# define VALGRIND_HG_MUTEX_UNLOCK_PRE(_mx)       /* */
    1.22 +# define VALGRIND_HG_MUTEX_UNLOCK_POST(_mx)      /* */
    1.23 +# define VALGRIND_MAKE_MEM_DEFINED(_addr,_len)   ((void)0)
    1.24 +# define VALGRIND_MAKE_MEM_UNDEFINED(_addr,_len) ((void)0)
    1.25 +#endif
    1.26 +
    1.27 +#include "prenv.h"
    1.28 +#include "mozilla/arm.h"
    1.29 +#include "mozilla/DebugOnly.h"
    1.30 +#include <stdint.h>
    1.31 +#include "PlatformMacros.h"
    1.32 +
    1.33 +#include "platform.h"
    1.34 +#include <ostream>
    1.35 +#include <string>
    1.36 +
    1.37 +#include "ProfileEntry.h"
    1.38 +#include "SyncProfile.h"
    1.39 +#include "AutoObjectMapper.h"
    1.40 +#include "UnwinderThread2.h"
    1.41 +
    1.42 +#if !defined(SPS_OS_windows)
    1.43 +# include <sys/mman.h>
    1.44 +#endif
    1.45 +
    1.46 +#if defined(SPS_OS_android) || defined(SPS_OS_linux)
    1.47 +# include <ucontext.h>
    1.48 +# include "LulMain.h"
    1.49 +#endif
    1.50 +
    1.51 +#include "shared-libraries.h"
    1.52 +
    1.53 +
    1.54 +// Verbosity of this module, for debugging:
    1.55 +//   0  silent
    1.56 +//   1  adds info about debuginfo load success/failure
    1.57 +//   2  adds slow-summary stats for buffer fills/misses (RECOMMENDED)
    1.58 +//   3  adds per-sample summary lines
    1.59 +//   4  adds per-sample frame listing
    1.60 +// Note that level 3 and above produces risk of deadlock, and 
    1.61 +// are not recommended for extended use.
    1.62 +#define LOGLEVEL 2
    1.63 +
    1.64 +// The maximum number of frames that the native unwinder will
    1.65 +// produce.  Setting it too high gives a risk of it wasting a
    1.66 +// lot of time looping on corrupted stacks.
    1.67 +#define MAX_NATIVE_FRAMES 256
    1.68 +
    1.69 +
    1.70 +// The 'else' of this covers the entire rest of the file
    1.71 +#if defined(SPS_OS_windows) || defined(SPS_OS_darwin)
    1.72 +
    1.73 +//////////////////////////////////////////////////////////
    1.74 +//// BEGIN externally visible functions (WINDOWS and OSX STUBS)
    1.75 +
    1.76 +// On Windows and OSX this will all need reworking.
    1.77 +// GeckoProfilerImpl.h will ensure these functions are never actually
    1.78 +// called, so just provide no-op stubs for now.
    1.79 +
    1.80 +void uwt__init()
    1.81 +{
    1.82 +}
    1.83 +
    1.84 +void uwt__stop()
    1.85 +{
    1.86 +}
    1.87 +
    1.88 +void uwt__deinit()
    1.89 +{
    1.90 +}
    1.91 +
    1.92 +void uwt__register_thread_for_profiling ( void* stackTop )
    1.93 +{
    1.94 +}
    1.95 +
    1.96 +void uwt__unregister_thread_for_profiling()
    1.97 +{
    1.98 +}
    1.99 +
   1.100 +LinkedUWTBuffer* utb__acquire_sync_buffer(void* stackTop)
   1.101 +{
   1.102 +  return nullptr;
   1.103 +}
   1.104 +
   1.105 +// RUNS IN SIGHANDLER CONTEXT
   1.106 +UnwinderThreadBuffer* uwt__acquire_empty_buffer()
   1.107 +{
   1.108 +  return nullptr;
   1.109 +}
   1.110 +
   1.111 +void
   1.112 +utb__finish_sync_buffer(ThreadProfile* aProfile,
   1.113 +                        UnwinderThreadBuffer* utb,
   1.114 +                        void* /* ucontext_t*, really */ ucV)
   1.115 +{
   1.116 +}
   1.117 +
   1.118 +void
   1.119 +utb__release_sync_buffer(LinkedUWTBuffer* utb)
   1.120 +{
   1.121 +}
   1.122 +
   1.123 +// RUNS IN SIGHANDLER CONTEXT
   1.124 +void
   1.125 +uwt__release_full_buffer(ThreadProfile* aProfile,
   1.126 +                         UnwinderThreadBuffer* utb,
   1.127 +                         void* /* ucontext_t*, really */ ucV )
   1.128 +{
   1.129 +}
   1.130 +
   1.131 +// RUNS IN SIGHANDLER CONTEXT
   1.132 +void
   1.133 +utb__addEntry(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent)
   1.134 +{
   1.135 +}
   1.136 +
   1.137 +//// END externally visible functions (WINDOWS and OSX STUBS)
   1.138 +//////////////////////////////////////////////////////////
   1.139 +
   1.140 +#else // a supported target
   1.141 +
   1.142 +//////////////////////////////////////////////////////////
   1.143 +//// BEGIN externally visible functions
   1.144 +
   1.145 +// Forward references
   1.146 +// the unwinder thread ID, its fn, and a stop-now flag
   1.147 +static void* unwind_thr_fn ( void* exit_nowV );
   1.148 +static pthread_t unwind_thr;
   1.149 +static int       unwind_thr_exit_now = 0; // RACED ON
   1.150 +
   1.151 +// Threads must be registered with this file before they can be
   1.152 +// sampled.  So that we know the max safe stack address for each
   1.153 +// registered thread.
   1.154 +static void thread_register_for_profiling ( void* stackTop );
   1.155 +
   1.156 +// Unregister a thread.
   1.157 +static void thread_unregister_for_profiling();
   1.158 +
   1.159 +// Empties out the buffer queue.  Used when the unwinder thread is
   1.160 +// shut down.
   1.161 +static void empty_buffer_queue();
   1.162 +
   1.163 +// Allocate a buffer for synchronous unwinding
   1.164 +static LinkedUWTBuffer* acquire_sync_buffer(void* stackTop);
   1.165 +
   1.166 +// RUNS IN SIGHANDLER CONTEXT
   1.167 +// Acquire an empty buffer and mark it as FILLING
   1.168 +static UnwinderThreadBuffer* acquire_empty_buffer();
   1.169 +
   1.170 +static void finish_sync_buffer(ThreadProfile* aProfile,
   1.171 +                               UnwinderThreadBuffer* utb,
   1.172 +                               void* /* ucontext_t*, really */ ucV);
   1.173 +
   1.174 +// Release an empty synchronous unwind buffer.
   1.175 +static void release_sync_buffer(LinkedUWTBuffer* utb);
   1.176 +
   1.177 +// RUNS IN SIGHANDLER CONTEXT
   1.178 +// Put this buffer in the queue of stuff going to the unwinder
   1.179 +// thread, and mark it as FULL.  Before doing that, fill in stack
   1.180 +// chunk and register fields if a native unwind is requested.
   1.181 +// APROFILE is where the profile data should be added to.  UTB
   1.182 +// is the partially-filled-in buffer, containing ProfileEntries.
   1.183 +// UCV is the ucontext_t* from the signal handler.  If non-nullptr,
   1.184 +// is taken as a cue to request native unwind.
   1.185 +static void release_full_buffer(ThreadProfile* aProfile,
   1.186 +                                UnwinderThreadBuffer* utb,
   1.187 +                                void* /* ucontext_t*, really */ ucV );
   1.188 +
   1.189 +// RUNS IN SIGHANDLER CONTEXT
   1.190 +static void utb_add_prof_ent(UnwinderThreadBuffer* utb, ProfileEntry ent);
   1.191 +
   1.192 +// Do a store memory barrier.
   1.193 +static void do_MBAR();
   1.194 +
   1.195 +
   1.196 +// This is the single instance of the LUL unwind library that we will
   1.197 +// use.  Currently the library is operated with multiple sampling
   1.198 +// threads but only one unwinder thread.  It should also be possible
   1.199 +// to use the library with multiple unwinder threads, to improve
   1.200 +// throughput.  The setup here makes it possible to use multiple
   1.201 +// unwinder threads, although that is as-yet untested.
   1.202 +//
   1.203 +// |sLULmutex| protects |sLUL| and |sLULcount| and also is used to
   1.204 +// ensure that only the first unwinder thread requests |sLUL| to read
   1.205 +// debug info.  |sLUL| may only be assigned to (and the object it
   1.206 +// points at may only be created/destroyed) when |sLULcount| is zero.
   1.207 +// |sLULcount| holds the number of unwinder threads currently in
   1.208 +// existence.
   1.209 +static pthread_mutex_t sLULmutex = PTHREAD_MUTEX_INITIALIZER;
   1.210 +static lul::LUL*       sLUL      = nullptr;
   1.211 +static int             sLULcount = 0;
   1.212 +
   1.213 +
   1.214 +void uwt__init()
   1.215 +{
   1.216 +  // Create the unwinder thread.
   1.217 +  MOZ_ASSERT(unwind_thr_exit_now == 0);
   1.218 +  int r = pthread_create( &unwind_thr, nullptr,
   1.219 +                          unwind_thr_fn, (void*)&unwind_thr_exit_now );
   1.220 +  MOZ_ALWAYS_TRUE(r == 0);
   1.221 +}
   1.222 +
   1.223 +void uwt__stop()
   1.224 +{
   1.225 +  // Shut down the unwinder thread.
   1.226 +  MOZ_ASSERT(unwind_thr_exit_now == 0);
   1.227 +  unwind_thr_exit_now = 1;
   1.228 +  do_MBAR();
   1.229 +  int r = pthread_join(unwind_thr, nullptr);
   1.230 +  MOZ_ALWAYS_TRUE(r == 0);
   1.231 +}
   1.232 +
   1.233 +void uwt__deinit()
   1.234 +{
   1.235 +  empty_buffer_queue();
   1.236 +}
   1.237 +
   1.238 +void uwt__register_thread_for_profiling(void* stackTop)
   1.239 +{
   1.240 +  thread_register_for_profiling(stackTop);
   1.241 +}
   1.242 +
   1.243 +void uwt__unregister_thread_for_profiling()
   1.244 +{
   1.245 +  thread_unregister_for_profiling();
   1.246 +}
   1.247 +
   1.248 +LinkedUWTBuffer* utb__acquire_sync_buffer(void* stackTop)
   1.249 +{
   1.250 +  return acquire_sync_buffer(stackTop);
   1.251 +}
   1.252 +
   1.253 +void utb__finish_sync_buffer(ThreadProfile* profile,
   1.254 +                             UnwinderThreadBuffer* buff,
   1.255 +                             void* /* ucontext_t*, really */ ucV)
   1.256 +{
   1.257 +  finish_sync_buffer(profile, buff, ucV);
   1.258 +}
   1.259 +
   1.260 +void utb__release_sync_buffer(LinkedUWTBuffer* buff)
   1.261 +{
   1.262 +  release_sync_buffer(buff);
   1.263 +}
   1.264 +
   1.265 +// RUNS IN SIGHANDLER CONTEXT
   1.266 +UnwinderThreadBuffer* uwt__acquire_empty_buffer()
   1.267 +{
   1.268 +  return acquire_empty_buffer();
   1.269 +}
   1.270 +
   1.271 +// RUNS IN SIGHANDLER CONTEXT
   1.272 +void
   1.273 +uwt__release_full_buffer(ThreadProfile* aProfile,
   1.274 +                         UnwinderThreadBuffer* utb,
   1.275 +                         void* /* ucontext_t*, really */ ucV )
   1.276 +{
   1.277 +  release_full_buffer( aProfile, utb, ucV );
   1.278 +}
   1.279 +
   1.280 +// RUNS IN SIGHANDLER CONTEXT
   1.281 +void
   1.282 +utb__addEntry(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent)
   1.283 +{
   1.284 +  utb_add_prof_ent(utb, ent);
   1.285 +}
   1.286 +
   1.287 +//// END externally visible functions
   1.288 +//////////////////////////////////////////////////////////
   1.289 +
   1.290 +
   1.291 +//////////////////////////////////////////////////////////
   1.292 +//// BEGIN type UnwindThreadBuffer
   1.293 +
   1.294 +static_assert(sizeof(uint32_t) == 4, "uint32_t size incorrect");
   1.295 +static_assert(sizeof(uint64_t) == 8, "uint64_t size incorrect");
   1.296 +static_assert(sizeof(uintptr_t) == sizeof(void*),
   1.297 +              "uintptr_t size incorrect");
   1.298 +
   1.299 +typedef
   1.300 +  struct { 
   1.301 +    uint64_t rsp;
   1.302 +    uint64_t rbp;
   1.303 +    uint64_t rip; 
   1.304 +  }
   1.305 +  AMD64Regs;
   1.306 +
   1.307 +typedef
   1.308 +  struct {
   1.309 +    uint32_t r15;
   1.310 +    uint32_t r14;
   1.311 +    uint32_t r13;
   1.312 +    uint32_t r12;
   1.313 +    uint32_t r11;
   1.314 +    uint32_t r7;
   1.315 +  }
   1.316 +  ARMRegs;
   1.317 +
   1.318 +typedef
   1.319 +  struct {
   1.320 +    uint32_t esp;
   1.321 +    uint32_t ebp;
   1.322 +    uint32_t eip;
   1.323 +  }
   1.324 +  X86Regs;
   1.325 +
   1.326 +#if defined(SPS_ARCH_amd64)
   1.327 +typedef  AMD64Regs  ArchRegs;
   1.328 +#elif defined(SPS_ARCH_arm)
   1.329 +typedef  ARMRegs  ArchRegs;
   1.330 +#elif defined(SPS_ARCH_x86)
   1.331 +typedef  X86Regs  ArchRegs;
   1.332 +#else
   1.333 +# error "Unknown plat"
   1.334 +#endif
   1.335 +
   1.336 +#if defined(SPS_ARCH_amd64) || defined(SPS_ARCH_arm) || defined(SPS_ARCH_x86)
   1.337 +# define SPS_PAGE_SIZE 4096
   1.338 +#else
   1.339 +# error "Unknown plat"
   1.340 +#endif
   1.341 +
   1.342 +typedef  enum { S_EMPTY, S_FILLING, S_EMPTYING, S_FULL }  State;
   1.343 +
   1.344 +typedef  struct { uintptr_t val; }  SpinLock;
   1.345 +
   1.346 +/* CONFIGURABLE */
   1.347 +/* The number of fixed ProfileEntry slots.  If more are required, they
   1.348 +   are placed in mmap'd pages. */
   1.349 +#define N_FIXED_PROF_ENTS 20
   1.350 +
   1.351 +/* CONFIGURABLE */
   1.352 +/* The number of extra pages of ProfileEntries.  If (on arm) each
   1.353 +   ProfileEntry is 8 bytes, then a page holds 512, and so 100 pages
   1.354 +   is enough to hold 51200. */
   1.355 +#define N_PROF_ENT_PAGES 100
   1.356 +
   1.357 +/* DERIVATIVE */
   1.358 +#define N_PROF_ENTS_PER_PAGE (SPS_PAGE_SIZE / sizeof(ProfileEntry))
   1.359 +
   1.360 +/* A page of ProfileEntrys.  This might actually be slightly smaller
   1.361 +   than a page if SPS_PAGE_SIZE is not an exact multiple of
   1.362 +   sizeof(ProfileEntry). */
   1.363 +typedef
   1.364 +  struct { ProfileEntry ents[N_PROF_ENTS_PER_PAGE]; }
   1.365 +  ProfEntsPage;
   1.366 +
   1.367 +#define ProfEntsPage_INVALID ((ProfEntsPage*)1)
   1.368 +
   1.369 +
   1.370 +/* Fields protected by the spinlock are marked SL */
   1.371 +
   1.372 +struct _UnwinderThreadBuffer {
   1.373 +  /*SL*/ State  state;
   1.374 +  /* The rest of these are protected, in some sense, by ::state.  If
   1.375 +     ::state is S_FILLING, they are 'owned' by the sampler thread
   1.376 +     that set the state to S_FILLING.  If ::state is S_EMPTYING,
   1.377 +     they are 'owned' by the unwinder thread that set the state to
   1.378 +     S_EMPTYING.  If ::state is S_EMPTY or S_FULL, the buffer isn't
   1.379 +     owned by any thread, and so no thread may access these
   1.380 +     fields. */
   1.381 +  /* Sample number, needed to process samples in order */
   1.382 +  uint64_t       seqNo;
   1.383 +  /* The ThreadProfile into which the results are eventually to be
   1.384 +     dumped. */
   1.385 +  ThreadProfile* aProfile;
   1.386 +  /* Pseudostack and other info, always present */
   1.387 +  ProfileEntry   entsFixed[N_FIXED_PROF_ENTS];
   1.388 +  ProfEntsPage*  entsPages[N_PROF_ENT_PAGES];
   1.389 +  uintptr_t      entsUsed;
   1.390 +  /* Do we also have data to do a native unwind? */
   1.391 +  bool           haveNativeInfo;
   1.392 +  /* If so, here is the register state and stack.  Unset if
   1.393 +     .haveNativeInfo is false. */
   1.394 +  lul::UnwindRegs startRegs;
   1.395 +  lul::StackImage stackImg;
   1.396 +  void* stackMaxSafe; /* Address for max safe stack reading. */
   1.397 +};
   1.398 +/* Indexing scheme for ents:
   1.399 +     0 <= i < N_FIXED_PROF_ENTS
   1.400 +       is at entsFixed[i]
   1.401 +
   1.402 +     i >= N_FIXED_PROF_ENTS
   1.403 +       is at let j = i - N_FIXED_PROF_ENTS
   1.404 +             in  entsPages[j / N_PROFENTS_PER_PAGE]
   1.405 +                  ->ents[j % N_PROFENTS_PER_PAGE]
   1.406 +     
   1.407 +   entsPages[] are allocated on demand.  Because zero can
   1.408 +   theoretically be a valid page pointer, use 
   1.409 +   ProfEntsPage_INVALID == (ProfEntsPage*)1 to mark invalid pages.
   1.410 +
   1.411 +   It follows that the max entsUsed value is N_FIXED_PROF_ENTS +
   1.412 +   N_PROFENTS_PER_PAGE * N_PROFENTS_PAGES, and at that point no more
   1.413 +   ProfileEntries can be storedd.
   1.414 +*/
   1.415 +
   1.416 +
   1.417 +typedef
   1.418 +  struct {
   1.419 +    pthread_t thrId;
   1.420 +    void*     stackTop;
   1.421 +    uint64_t  nSamples; 
   1.422 +  }
   1.423 +  StackLimit;
   1.424 +
   1.425 +/* Globals -- the buffer array */
   1.426 +#define N_UNW_THR_BUFFERS 10
   1.427 +/*SL*/ static UnwinderThreadBuffer** g_buffers     = nullptr;
   1.428 +/*SL*/ static uint64_t               g_seqNo       = 0;
   1.429 +/*SL*/ static SpinLock               g_spinLock    = { 0 };
   1.430 +
   1.431 +/* Globals -- the thread array.  The array is dynamically expanded on
   1.432 +   demand.  The spinlock must be held when accessing g_stackLimits,
   1.433 +   g_stackLimits[some index], g_stackLimitsUsed and g_stackLimitsSize.
   1.434 +   However, the spinlock must not be held when calling malloc to
   1.435 +   allocate or expand the array, as that would risk deadlock against a
   1.436 +   sampling thread that holds the malloc lock and is trying to acquire
   1.437 +   the spinlock. */
   1.438 +/*SL*/ static StackLimit* g_stackLimits     = nullptr;
   1.439 +/*SL*/ static size_t      g_stackLimitsUsed = 0;
   1.440 +/*SL*/ static size_t      g_stackLimitsSize = 0;
   1.441 +
   1.442 +/* Stats -- atomically incremented, no lock needed */
   1.443 +static uintptr_t g_stats_totalSamples = 0; // total # sample attempts
   1.444 +static uintptr_t g_stats_noBuffAvail  = 0; // # failed due to no buffer avail
   1.445 +static uintptr_t g_stats_thrUnregd    = 0; // # failed due to unregistered thr
   1.446 +
   1.447 +/* We must be VERY CAREFUL what we do with the spinlock held.  The
   1.448 +   only thing it is safe to do with it held is modify (viz, read or
   1.449 +   write) g_buffers, g_buffers[], g_seqNo, g_buffers[]->state,
   1.450 +   g_stackLimits, g_stackLimits[], g_stackLimitsUsed and
   1.451 +   g_stackLimitsSize.  No arbitrary computations, no syscalls, no
   1.452 +   printfs, no file IO, and absolutely no dynamic memory allocation
   1.453 +   (else we WILL eventually deadlock).
   1.454 +
   1.455 +   This applies both to the signal handler and to the unwinder thread.
   1.456 +*/
   1.457 +
   1.458 +//// END type UnwindThreadBuffer
   1.459 +//////////////////////////////////////////////////////////
   1.460 +
   1.461 +// This is the interface to LUL.
   1.462 +typedef  struct { u_int64_t pc; u_int64_t sp; }  PCandSP;
   1.463 +
   1.464 +// Forward declaration.  Implementation is below.
   1.465 +static
   1.466 +void do_lul_unwind_Buffer(/*OUT*/PCandSP** pairs,
   1.467 +                          /*OUT*/unsigned int* nPairs,
   1.468 +                          UnwinderThreadBuffer* buff,
   1.469 +                          int buffNo /* for debug printing only */);
   1.470 +
   1.471 +static bool is_page_aligned(void* v)
   1.472 +{
   1.473 +  uintptr_t w = (uintptr_t) v;
   1.474 +  return (w & (SPS_PAGE_SIZE-1)) == 0  ? true  : false;
   1.475 +}
   1.476 +
   1.477 +
   1.478 +/* Implement machine-word sized atomic compare-and-swap.  Returns true
   1.479 +   if success, false if failure. */
   1.480 +static bool do_CASW(uintptr_t* addr, uintptr_t expected, uintptr_t nyu)
   1.481 +{
   1.482 +#if defined(__GNUC__)
   1.483 +  return __sync_bool_compare_and_swap(addr, expected, nyu);
   1.484 +#else
   1.485 +# error "Unhandled compiler"
   1.486 +#endif
   1.487 +}
   1.488 +
   1.489 +/* Hint to the CPU core that we are in a spin-wait loop, and that
   1.490 +   other processors/cores/threads-running-on-the-same-core should be
   1.491 +   given priority on execute resources, if that is possible.  Not
   1.492 +   critical if this is a no-op on some targets. */
   1.493 +static void do_SPINLOOP_RELAX()
   1.494 +{
   1.495 +#if (defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)) && defined(__GNUC__)
   1.496 +  __asm__ __volatile__("rep; nop");
   1.497 +#elif defined(SPS_PLAT_arm_android) && MOZILLA_ARM_ARCH >= 7
   1.498 +  __asm__ __volatile__("wfe");
   1.499 +#endif
   1.500 +}
   1.501 +
   1.502 +/* Tell any cores snoozing in spin loops to wake up. */
   1.503 +static void do_SPINLOOP_NUDGE()
   1.504 +{
   1.505 +#if (defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)) && defined(__GNUC__)
   1.506 +  /* this is a no-op */
   1.507 +#elif defined(SPS_PLAT_arm_android) && MOZILLA_ARM_ARCH >= 7
   1.508 +  __asm__ __volatile__("sev");
   1.509 +#endif
   1.510 +}
   1.511 +
   1.512 +/* Perform a full memory barrier. */
   1.513 +static void do_MBAR()
   1.514 +{
   1.515 +#if defined(__GNUC__)
   1.516 +  __sync_synchronize();
   1.517 +#else
   1.518 +# error "Unhandled compiler"
   1.519 +#endif
   1.520 +}
   1.521 +
   1.522 +static void spinLock_acquire(SpinLock* sl)
   1.523 +{
   1.524 +  uintptr_t* val = &sl->val;
   1.525 +  VALGRIND_HG_MUTEX_LOCK_PRE(sl, 0/*!isTryLock*/);
   1.526 +  while (1) {
   1.527 +    bool ok = do_CASW( val, 0, 1 );
   1.528 +    if (ok) break;
   1.529 +    do_SPINLOOP_RELAX();
   1.530 +  }
   1.531 +  do_MBAR();
   1.532 +  VALGRIND_HG_MUTEX_LOCK_POST(sl);
   1.533 +}
   1.534 +
   1.535 +static void spinLock_release(SpinLock* sl)
   1.536 +{
   1.537 +  uintptr_t* val = &sl->val;
   1.538 +  VALGRIND_HG_MUTEX_UNLOCK_PRE(sl);
   1.539 +  do_MBAR();
   1.540 +  bool ok = do_CASW( val, 1, 0 );
   1.541 +  /* This must succeed at the first try.  To fail would imply that
   1.542 +     the lock was unheld. */
   1.543 +  MOZ_ALWAYS_TRUE(ok);
   1.544 +  do_SPINLOOP_NUDGE();
   1.545 +  VALGRIND_HG_MUTEX_UNLOCK_POST(sl);
   1.546 +}
   1.547 +
   1.548 +static void sleep_ms(unsigned int ms)
   1.549 +{
   1.550 +  struct timespec req;
   1.551 +  req.tv_sec = ((time_t)ms) / 1000;
   1.552 +  req.tv_nsec = 1000 * 1000 * (((unsigned long)ms) % 1000);
   1.553 +  nanosleep(&req, nullptr);
   1.554 +}
   1.555 +
   1.556 +/* Use CAS to implement standalone atomic increment. */
   1.557 +static void atomic_INC(uintptr_t* loc)
   1.558 +{
   1.559 +  while (1) {
   1.560 +    uintptr_t old = *loc;
   1.561 +    uintptr_t nyu = old + 1;
   1.562 +    bool ok = do_CASW( loc, old, nyu );
   1.563 +    if (ok) break;
   1.564 +  }
   1.565 +}
   1.566 +
   1.567 +// Empties out the buffer queue.
   1.568 +static void empty_buffer_queue()
   1.569 +{
   1.570 +  spinLock_acquire(&g_spinLock);
   1.571 +
   1.572 +  UnwinderThreadBuffer** tmp_g_buffers = g_buffers;
   1.573 +  g_stackLimitsUsed = 0;
   1.574 +  g_seqNo = 0;
   1.575 +  g_buffers = nullptr;
   1.576 +
   1.577 +  spinLock_release(&g_spinLock);
   1.578 +
   1.579 +  // Can't do any malloc/free when holding the spinlock.
   1.580 +  free(tmp_g_buffers);
   1.581 +
   1.582 +  // We could potentially free up g_stackLimits; but given the
   1.583 +  // complications above involved in resizing it, it's probably
   1.584 +  // safer just to leave it in place.
   1.585 +}
   1.586 +
   1.587 +
   1.588 +// Registers a thread for profiling.  Detects and ignores duplicate
   1.589 +// registration.
   1.590 +static void thread_register_for_profiling(void* stackTop)
   1.591 +{
   1.592 +  pthread_t me = pthread_self();
   1.593 +
   1.594 +  spinLock_acquire(&g_spinLock);
   1.595 +
   1.596 +  // tmp copy of g_stackLimitsUsed, to avoid racing in message printing
   1.597 +  int n_used;
   1.598 +
   1.599 +  // Ignore spurious calls which aren't really registering anything.
   1.600 +  if (stackTop == nullptr) {
   1.601 +    n_used = g_stackLimitsUsed;
   1.602 +    spinLock_release(&g_spinLock);
   1.603 +    LOGF("BPUnw: [%d total] thread_register_for_profiling"
   1.604 +         "(me=%p, stacktop=NULL) (IGNORED)", n_used, (void*)me);
   1.605 +    return;
   1.606 +  }
   1.607 +
   1.608 +  /* Minimal sanity check on stackTop */
   1.609 +  MOZ_ASSERT((void*)&n_used/*any auto var will do*/ < stackTop);
   1.610 +
   1.611 +  bool is_dup = false;
   1.612 +  for (size_t i = 0; i < g_stackLimitsUsed; i++) {
   1.613 +    if (g_stackLimits[i].thrId == me) {
   1.614 +      is_dup = true;
   1.615 +      break;
   1.616 +    }
   1.617 +  }
   1.618 +
   1.619 +  if (is_dup) {
   1.620 +    /* It's a duplicate registration.  Ignore it: drop the lock and
   1.621 +       return. */
   1.622 +    n_used = g_stackLimitsUsed;
   1.623 +    spinLock_release(&g_spinLock);
   1.624 +
   1.625 +    LOGF("BPUnw: [%d total] thread_register_for_profiling"
   1.626 +         "(me=%p, stacktop=%p) (DUPLICATE)", n_used, (void*)me, stackTop);
   1.627 +    return;
   1.628 +  }
   1.629 +
   1.630 +  /* Make sure the g_stackLimits array is large enough to accommodate
   1.631 +     this new entry.  This is tricky.  If it isn't large enough, we
   1.632 +     can malloc a larger version, but we have to do that without
   1.633 +     holding the spinlock, else we risk deadlock.  The deadlock
   1.634 +     scenario is:
   1.635 +
   1.636 +     Some other thread that is being sampled
   1.637 +                                        This thread
   1.638 +
   1.639 +     call malloc                        call this function
   1.640 +     acquire malloc lock                acquire the spinlock
   1.641 +     (sampling signal)                  discover thread array not big enough,
   1.642 +     call uwt__acquire_empty_buffer       call malloc to make it larger
   1.643 +     acquire the spinlock               acquire malloc lock
   1.644 +
   1.645 +     This gives an inconsistent lock acquisition order on the malloc
   1.646 +     lock and spinlock, hence risk of deadlock.
   1.647 +
   1.648 +     Allocating more space for the array without holding the spinlock
   1.649 +     implies tolerating races against other thread(s) who are also
   1.650 +     trying to expand the array.  How can we detect if we have been
   1.651 +     out-raced?  Every successful expansion of g_stackLimits[] results
   1.652 +     in an increase in g_stackLimitsSize.  Hence we can detect if we
   1.653 +     got out-raced by remembering g_stackLimitsSize before we dropped
   1.654 +     the spinlock and checking if it has changed after the spinlock is
   1.655 +     reacquired. */
   1.656 +
   1.657 +  MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize);
   1.658 +
   1.659 +  if (g_stackLimitsUsed == g_stackLimitsSize) {
   1.660 +    /* g_stackLimits[] is full; resize it. */
   1.661 +
   1.662 +    size_t old_size = g_stackLimitsSize;
   1.663 +    size_t new_size = old_size == 0 ? 4 : (2 * old_size);
   1.664 +
   1.665 +    spinLock_release(&g_spinLock);
   1.666 +    StackLimit* new_arr  = (StackLimit*)malloc(new_size * sizeof(StackLimit));
   1.667 +    if (!new_arr)
   1.668 +      return;
   1.669 +
   1.670 +    spinLock_acquire(&g_spinLock);
   1.671 +
   1.672 +    if (old_size != g_stackLimitsSize) {
   1.673 +      /* We've been outraced.  Instead of trying to deal in-line with
   1.674 +         this extremely rare case, just start all over again by
   1.675 +         tail-calling this routine. */
   1.676 +      spinLock_release(&g_spinLock);
   1.677 +      free(new_arr);
   1.678 +      thread_register_for_profiling(stackTop);
   1.679 +      return;
   1.680 +    }
   1.681 +
   1.682 +    memcpy(new_arr, g_stackLimits, old_size * sizeof(StackLimit));
   1.683 +    if (g_stackLimits)
   1.684 +      free(g_stackLimits);
   1.685 +
   1.686 +    g_stackLimits = new_arr;
   1.687 +
   1.688 +    MOZ_ASSERT(g_stackLimitsSize < new_size);
   1.689 +    g_stackLimitsSize = new_size;
   1.690 +  }
   1.691 +
   1.692 +  MOZ_ASSERT(g_stackLimitsUsed < g_stackLimitsSize);
   1.693 +
   1.694 +  /* Finally, we have a safe place to put the new entry. */
   1.695 +
   1.696 +  // Round |stackTop| up to the end of the containing page.  We may
   1.697 +  // as well do this -- there's no danger of a fault, and we might
   1.698 +  // get a few more base-of-the-stack frames as a result.  This
   1.699 +  // assumes that no target has a page size smaller than 4096.
   1.700 +  uintptr_t stackTopR = (uintptr_t)stackTop;
   1.701 +  stackTopR = (stackTopR & ~(uintptr_t)4095) + (uintptr_t)4095;
   1.702 +
   1.703 +  g_stackLimits[g_stackLimitsUsed].thrId    = me;
   1.704 +  g_stackLimits[g_stackLimitsUsed].stackTop = (void*)stackTopR;
   1.705 +  g_stackLimits[g_stackLimitsUsed].nSamples = 0;
   1.706 +  g_stackLimitsUsed++;
   1.707 +
   1.708 +  n_used = g_stackLimitsUsed;
   1.709 +  spinLock_release(&g_spinLock);
   1.710 +
   1.711 +  LOGF("BPUnw: [%d total] thread_register_for_profiling"
   1.712 +       "(me=%p, stacktop=%p)", n_used, (void*)me, stackTop);
   1.713 +}
   1.714 +
   1.715 +// Deregisters a thread from profiling.  Detects and ignores attempts
   1.716 +// to deregister a not-registered thread.
   1.717 +static void thread_unregister_for_profiling()
   1.718 +{
   1.719 +  spinLock_acquire(&g_spinLock);
   1.720 +
   1.721 +  // tmp copy of g_stackLimitsUsed, to avoid racing in message printing
   1.722 +  size_t n_used;
   1.723 +
   1.724 +  size_t i;
   1.725 +  bool found = false;
   1.726 +  pthread_t me = pthread_self();
   1.727 +  for (i = 0; i < g_stackLimitsUsed; i++) {
   1.728 +    if (g_stackLimits[i].thrId == me)
   1.729 +      break;
   1.730 +  }
   1.731 +  if (i < g_stackLimitsUsed) {
   1.732 +    // found this entry.  Slide the remaining ones down one place.
   1.733 +    for (; i+1 < g_stackLimitsUsed; i++) {
   1.734 +      g_stackLimits[i] = g_stackLimits[i+1];
   1.735 +    }
   1.736 +    g_stackLimitsUsed--;
   1.737 +    found = true;
   1.738 +  }
   1.739 +
   1.740 +  n_used = g_stackLimitsUsed;
   1.741 +
   1.742 +  spinLock_release(&g_spinLock);
   1.743 +  LOGF("BPUnw: [%d total] thread_unregister_for_profiling(me=%p) %s", 
   1.744 +       (int)n_used, (void*)me, found ? "" : " (NOT REGISTERED) ");
   1.745 +}
   1.746 +
   1.747 +
   1.748 +__attribute__((unused))
   1.749 +static void show_registered_threads()
   1.750 +{
   1.751 +  size_t i;
   1.752 +  spinLock_acquire(&g_spinLock);
   1.753 +  for (i = 0; i < g_stackLimitsUsed; i++) {
   1.754 +    LOGF("[%d]  pthread_t=%p  nSamples=%lld",
   1.755 +         (int)i, (void*)g_stackLimits[i].thrId, 
   1.756 +                 (unsigned long long int)g_stackLimits[i].nSamples);
   1.757 +  }
   1.758 +  spinLock_release(&g_spinLock);
   1.759 +}
   1.760 +
   1.761 +// RUNS IN SIGHANDLER CONTEXT
   1.762 +/* The calling thread owns the buffer, as denoted by its state being
   1.763 +   S_FILLING.  So we can mess with it without further locking. */
   1.764 +static void init_empty_buffer(UnwinderThreadBuffer* buff, void* stackTop)
   1.765 +{
   1.766 +  /* Now we own the buffer, initialise it. */
   1.767 +  buff->aProfile            = nullptr;
   1.768 +  buff->entsUsed            = 0;
   1.769 +  buff->haveNativeInfo      = false;
   1.770 +  buff->stackImg.mLen       = 0;
   1.771 +  buff->stackImg.mStartAvma = 0;
   1.772 +  buff->stackMaxSafe        = stackTop; /* We will need this in
   1.773 +                                           release_full_buffer() */
   1.774 +  for (size_t i = 0; i < N_PROF_ENT_PAGES; i++)
   1.775 +    buff->entsPages[i] = ProfEntsPage_INVALID;
   1.776 +}
   1.777 +
   1.778 +struct SyncUnwinderThreadBuffer : public LinkedUWTBuffer
   1.779 +{
   1.780 +  UnwinderThreadBuffer* GetBuffer()
   1.781 +  {
   1.782 +    return &mBuff;
   1.783 +  }
   1.784 +  
   1.785 +  UnwinderThreadBuffer  mBuff;
   1.786 +};
   1.787 +
   1.788 +static LinkedUWTBuffer* acquire_sync_buffer(void* stackTop)
   1.789 +{
   1.790 +  MOZ_ASSERT(stackTop);
   1.791 +  SyncUnwinderThreadBuffer* buff = new SyncUnwinderThreadBuffer();
   1.792 +  // We can set state without locking here because this thread owns the buffer
   1.793 +  // and it is going to fill it itself.
   1.794 +  buff->GetBuffer()->state = S_FILLING;
   1.795 +  init_empty_buffer(buff->GetBuffer(), stackTop);
   1.796 +  return buff;
   1.797 +}
   1.798 +
   1.799 +// RUNS IN SIGHANDLER CONTEXT
   1.800 +static UnwinderThreadBuffer* acquire_empty_buffer()
   1.801 +{
   1.802 +  /* acq lock
   1.803 +     if buffers == nullptr { rel lock; exit }
   1.804 +     scan to find a free buff; if none { rel lock; exit }
   1.805 +     set buff state to S_FILLING
   1.806 +     fillseqno++; and remember it
   1.807 +     rel lock
   1.808 +  */
   1.809 +  size_t i;
   1.810 +
   1.811 +  atomic_INC( &g_stats_totalSamples );
   1.812 +
   1.813 +  /* This code is critical.  We are in a signal handler and possibly
   1.814 +     with the malloc lock held.  So we can't allocate any heap, and
   1.815 +     can't safely call any C library functions, not even the pthread_
   1.816 +     functions.  And we certainly can't do any syscalls.  In short,
   1.817 +     this function needs to be self contained, not do any allocation,
   1.818 +     and not hold on to the spinlock for any significant length of
   1.819 +     time. */
   1.820 +
   1.821 +  spinLock_acquire(&g_spinLock);
   1.822 +
   1.823 +  /* First of all, look for this thread's entry in g_stackLimits[].
   1.824 +     We need to find it in order to figure out how much stack we can
   1.825 +     safely copy into the sample.  This assumes that pthread_self()
   1.826 +     is safe to call in a signal handler, which strikes me as highly
   1.827 +     likely. */
   1.828 +  pthread_t me = pthread_self();
   1.829 +  MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize);
   1.830 +  for (i = 0; i < g_stackLimitsUsed; i++) {
   1.831 +    if (g_stackLimits[i].thrId == me)
   1.832 +      break;
   1.833 +  }
   1.834 +
   1.835 +  /* If the thread isn't registered for profiling, just ignore the call
   1.836 +     and return nullptr. */
   1.837 +  if (i == g_stackLimitsUsed) {
   1.838 +    spinLock_release(&g_spinLock);
   1.839 +    atomic_INC( &g_stats_thrUnregd );
   1.840 +    return nullptr;
   1.841 +  }
   1.842 +
   1.843 +  /* "this thread is registered for profiling" */
   1.844 +  MOZ_ASSERT(i < g_stackLimitsUsed);
   1.845 +
   1.846 +  /* The furthest point that we can safely scan back up the stack. */
   1.847 +  void* myStackTop = g_stackLimits[i].stackTop;
   1.848 +  g_stackLimits[i].nSamples++;
   1.849 +
   1.850 +  /* Try to find a free buffer to use. */
   1.851 +  if (g_buffers == nullptr) {
   1.852 +    /* The unwinder thread hasn't allocated any buffers yet.
   1.853 +       Nothing we can do. */
   1.854 +    spinLock_release(&g_spinLock);
   1.855 +    atomic_INC( &g_stats_noBuffAvail );
   1.856 +    return nullptr;
   1.857 +  }
   1.858 +
   1.859 +  for (i = 0; i < N_UNW_THR_BUFFERS; i++) {
   1.860 +    if (g_buffers[i]->state == S_EMPTY)
   1.861 +      break;
   1.862 +  }
   1.863 +  MOZ_ASSERT(i <= N_UNW_THR_BUFFERS);
   1.864 +
   1.865 +  if (i == N_UNW_THR_BUFFERS) {
   1.866 +    /* Again, no free buffers .. give up. */
   1.867 +    spinLock_release(&g_spinLock);
   1.868 +    atomic_INC( &g_stats_noBuffAvail );
   1.869 +    if (LOGLEVEL >= 3)
   1.870 +      LOG("BPUnw: handler:  no free buffers");
   1.871 +    return nullptr;
   1.872 +  }
   1.873 +
   1.874 +  /* So we can use this one safely.  Whilst still holding the lock,
   1.875 +     mark the buffer as belonging to us, and increment the sequence
   1.876 +     number. */
   1.877 +  UnwinderThreadBuffer* buff = g_buffers[i];
   1.878 +  MOZ_ASSERT(buff->state == S_EMPTY);
   1.879 +  buff->state = S_FILLING;
   1.880 +  buff->seqNo = g_seqNo;
   1.881 +  g_seqNo++;
   1.882 +
   1.883 +  /* And drop the lock.  We own the buffer, so go on and fill it. */
   1.884 +  spinLock_release(&g_spinLock);
   1.885 +
   1.886 +  /* Now we own the buffer, initialise it. */
   1.887 +  init_empty_buffer(buff, myStackTop);
   1.888 +  return buff;
   1.889 +}
   1.890 +
   1.891 +// RUNS IN SIGHANDLER CONTEXT
   1.892 +/* The calling thread owns the buffer, as denoted by its state being
   1.893 +   S_FILLING.  So we can mess with it without further locking. */
   1.894 +static void fill_buffer(ThreadProfile* aProfile,
   1.895 +                        UnwinderThreadBuffer* buff,
   1.896 +                        void* /* ucontext_t*, really */ ucV)
   1.897 +{
   1.898 +  MOZ_ASSERT(buff->state == S_FILLING);
   1.899 +
   1.900 +  ////////////////////////////////////////////////////
   1.901 +  // BEGIN fill
   1.902 +
   1.903 +  /* The buffer already will have some of its ProfileEntries filled
   1.904 +     in, but everything else needs to be filled in at this point. */
   1.905 +  //LOGF("Release full buffer: %lu ents", buff->entsUsed);
   1.906 +  /* Where the resulting info is to be dumped */
   1.907 +  buff->aProfile = aProfile;
   1.908 +
   1.909 +  /* And, if we have register state, that and the stack top */
   1.910 +  buff->haveNativeInfo = ucV != nullptr;
   1.911 +  if (buff->haveNativeInfo) {
   1.912 +#   if defined(SPS_PLAT_amd64_linux)
   1.913 +    ucontext_t* uc = (ucontext_t*)ucV;
   1.914 +    mcontext_t* mc = &(uc->uc_mcontext);
   1.915 +    buff->startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
   1.916 +    buff->startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
   1.917 +    buff->startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
   1.918 +#   elif defined(SPS_PLAT_amd64_darwin)
   1.919 +    ucontext_t* uc = (ucontext_t*)ucV;
   1.920 +    struct __darwin_mcontext64* mc = uc->uc_mcontext;
   1.921 +    struct __darwin_x86_thread_state64* ss = &mc->__ss;
   1.922 +    buff->regs.rip = ss->__rip;
   1.923 +    buff->regs.rsp = ss->__rsp;
   1.924 +    buff->regs.rbp = ss->__rbp;
   1.925 +#   elif defined(SPS_PLAT_arm_android)
   1.926 +    ucontext_t* uc = (ucontext_t*)ucV;
   1.927 +    mcontext_t* mc = &(uc->uc_mcontext);
   1.928 +    buff->startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
   1.929 +    buff->startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
   1.930 +    buff->startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
   1.931 +    buff->startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
   1.932 +    buff->startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
   1.933 +    buff->startRegs.r7  = lul::TaggedUWord(mc->arm_r7);
   1.934 +#   elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android)
   1.935 +    ucontext_t* uc = (ucontext_t*)ucV;
   1.936 +    mcontext_t* mc = &(uc->uc_mcontext);
   1.937 +    buff->startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
   1.938 +    buff->startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
   1.939 +    buff->startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
   1.940 +#   elif defined(SPS_PLAT_x86_darwin)
   1.941 +    ucontext_t* uc = (ucontext_t*)ucV;
   1.942 +    struct __darwin_mcontext32* mc = uc->uc_mcontext;
   1.943 +    struct __darwin_i386_thread_state* ss = &mc->__ss;
   1.944 +    buff->regs.eip = ss->__eip;
   1.945 +    buff->regs.esp = ss->__esp;
   1.946 +    buff->regs.ebp = ss->__ebp;
   1.947 +#   else
   1.948 +#     error "Unknown plat"
   1.949 +#   endif
   1.950 +
   1.951 +    /* Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not
   1.952 +       going past the stack's registered top point.  Do some basic
   1.953 +       sanity checks too.  This assumes that the TaggedUWord holding
   1.954 +       the stack pointer value is valid, but it should be, since it
   1.955 +       was constructed that way in the code just above. */
   1.956 +    { 
   1.957 +#     if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_amd64_darwin)
   1.958 +      uintptr_t rEDZONE_SIZE = 128;
   1.959 +      uintptr_t start = buff->startRegs.xsp.Value() - rEDZONE_SIZE;
   1.960 +#     elif defined(SPS_PLAT_arm_android)
   1.961 +      uintptr_t rEDZONE_SIZE = 0;
   1.962 +      uintptr_t start = buff->startRegs.r13.Value() - rEDZONE_SIZE;
   1.963 +#     elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_darwin) \
   1.964 +           || defined(SPS_PLAT_x86_android)
   1.965 +      uintptr_t rEDZONE_SIZE = 0;
   1.966 +      uintptr_t start = buff->startRegs.xsp.Value() - rEDZONE_SIZE;
   1.967 +#     else
   1.968 +#       error "Unknown plat"
   1.969 +#     endif
   1.970 +      uintptr_t end   = (uintptr_t)buff->stackMaxSafe;
   1.971 +      uintptr_t ws    = sizeof(void*);
   1.972 +      start &= ~(ws-1);
   1.973 +      end   &= ~(ws-1);
   1.974 +      uintptr_t nToCopy = 0;
   1.975 +      if (start < end) {
   1.976 +        nToCopy = end - start;
   1.977 +        if (nToCopy > lul::N_STACK_BYTES)
   1.978 +          nToCopy = lul::N_STACK_BYTES;
   1.979 +      }
   1.980 +      MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
   1.981 +      buff->stackImg.mLen       = nToCopy;
   1.982 +      buff->stackImg.mStartAvma = start;
   1.983 +      if (nToCopy > 0) {
   1.984 +        memcpy(&buff->stackImg.mContents[0], (void*)start, nToCopy);
   1.985 +        (void)VALGRIND_MAKE_MEM_DEFINED(&buff->stackImg.mContents[0], nToCopy);
   1.986 +      }
   1.987 +    }
   1.988 +  } /* if (buff->haveNativeInfo) */
   1.989 +  // END fill
   1.990 +  ////////////////////////////////////////////////////
   1.991 +}
   1.992 +
   1.993 +// RUNS IN SIGHANDLER CONTEXT
   1.994 +/* The calling thread owns the buffer, as denoted by its state being
   1.995 +   S_FILLING.  So we can mess with it without further locking. */
   1.996 +static void release_full_buffer(ThreadProfile* aProfile,
   1.997 +                                UnwinderThreadBuffer* buff,
   1.998 +                                void* /* ucontext_t*, really */ ucV )
   1.999 +{
  1.1000 +  fill_buffer(aProfile, buff, ucV);
  1.1001 +  /* And now relinquish ownership of the buff, so that an unwinder
  1.1002 +     thread can pick it up. */
  1.1003 +  spinLock_acquire(&g_spinLock);
  1.1004 +  buff->state = S_FULL;
  1.1005 +  spinLock_release(&g_spinLock);
  1.1006 +}
  1.1007 +
  1.1008 +// RUNS IN SIGHANDLER CONTEXT
  1.1009 +// Allocate a ProfEntsPage, without using malloc, or return
  1.1010 +// ProfEntsPage_INVALID if we can't for some reason.
  1.1011 +static ProfEntsPage* mmap_anon_ProfEntsPage()
  1.1012 +{
  1.1013 +# if defined(SPS_OS_darwin)
  1.1014 +  void* v = ::mmap(nullptr, sizeof(ProfEntsPage), PROT_READ | PROT_WRITE, 
  1.1015 +                   MAP_PRIVATE | MAP_ANON,      -1, 0);
  1.1016 +# else
  1.1017 +  void* v = ::mmap(nullptr, sizeof(ProfEntsPage), PROT_READ | PROT_WRITE, 
  1.1018 +                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  1.1019 +# endif
  1.1020 +  if (v == MAP_FAILED) {
  1.1021 +    return ProfEntsPage_INVALID;
  1.1022 +  } else {
  1.1023 +    return (ProfEntsPage*)v;
  1.1024 +  }
  1.1025 +}
  1.1026 +
  1.1027 +// Runs in the unwinder thread
  1.1028 +// Free a ProfEntsPage as allocated by mmap_anon_ProfEntsPage
  1.1029 +static void munmap_ProfEntsPage(ProfEntsPage* pep)
  1.1030 +{
  1.1031 +  MOZ_ALWAYS_TRUE(is_page_aligned(pep));
  1.1032 +  ::munmap(pep, sizeof(ProfEntsPage));
  1.1033 +}
  1.1034 +
  1.1035 +
  1.1036 +// RUNS IN SIGHANDLER CONTEXT
  1.1037 +void
  1.1038 +utb_add_prof_ent(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent)
  1.1039 +{
  1.1040 +  uintptr_t limit
  1.1041 +    = N_FIXED_PROF_ENTS + (N_PROF_ENTS_PER_PAGE * N_PROF_ENT_PAGES);
  1.1042 +  if (utb->entsUsed == limit) {
  1.1043 +    /* We're full.  Now what? */
  1.1044 +    LOG("BPUnw: utb__addEntry: NO SPACE for ProfileEntry; ignoring.");
  1.1045 +    return;
  1.1046 +  }
  1.1047 +  MOZ_ASSERT(utb->entsUsed < limit);
  1.1048 +
  1.1049 +  /* Will it fit in the fixed array? */
  1.1050 +  if (utb->entsUsed < N_FIXED_PROF_ENTS) {
  1.1051 +    utb->entsFixed[utb->entsUsed] = ent;
  1.1052 +    utb->entsUsed++;
  1.1053 +    return;
  1.1054 +  }
  1.1055 +
  1.1056 +  /* No.  Put it in the extras. */
  1.1057 +  uintptr_t i     = utb->entsUsed;
  1.1058 +  uintptr_t j     = i - N_FIXED_PROF_ENTS;
  1.1059 +  uintptr_t j_div = j / N_PROF_ENTS_PER_PAGE; /* page number */
  1.1060 +  uintptr_t j_mod = j % N_PROF_ENTS_PER_PAGE; /* page offset */
  1.1061 +  ProfEntsPage* pep = utb->entsPages[j_div];
  1.1062 +  if (pep == ProfEntsPage_INVALID) {
  1.1063 +    pep = mmap_anon_ProfEntsPage();
  1.1064 +    if (pep == ProfEntsPage_INVALID) {
  1.1065 +      /* Urr, we ran out of memory.  Now what? */
  1.1066 +      LOG("BPUnw: utb__addEntry: MMAP FAILED for ProfileEntry; ignoring.");
  1.1067 +      return;
  1.1068 +    }
  1.1069 +    utb->entsPages[j_div] = pep;
  1.1070 +  }
  1.1071 +  pep->ents[j_mod] = ent;
  1.1072 +  utb->entsUsed++;
  1.1073 +}
  1.1074 +
  1.1075 +
  1.1076 +// misc helper
  1.1077 +static ProfileEntry utb_get_profent(UnwinderThreadBuffer* buff, uintptr_t i)
  1.1078 +{
  1.1079 +  MOZ_ASSERT(i < buff->entsUsed);
  1.1080 +  if (i < N_FIXED_PROF_ENTS) {
  1.1081 +    return buff->entsFixed[i];
  1.1082 +  } else {
  1.1083 +    uintptr_t j     = i - N_FIXED_PROF_ENTS;
  1.1084 +    uintptr_t j_div = j / N_PROF_ENTS_PER_PAGE; /* page number */
  1.1085 +    uintptr_t j_mod = j % N_PROF_ENTS_PER_PAGE; /* page offset */
  1.1086 +    MOZ_ASSERT(buff->entsPages[j_div] != ProfEntsPage_INVALID);
  1.1087 +    return buff->entsPages[j_div]->ents[j_mod];
  1.1088 +  }
  1.1089 +}
  1.1090 +
  1.1091 +/* Copy ProfileEntries presented to us by the sampling thread.
  1.1092 +   Most of them are copied verbatim into |buff->aProfile|,
  1.1093 +   except for 'hint' tags, which direct us to do something
  1.1094 +   different. */
  1.1095 +static void process_buffer(UnwinderThreadBuffer* buff, int oldest_ix)
  1.1096 +{
  1.1097 +  /* Need to lock |aProfile| so nobody tries to copy out entries
  1.1098 +     whilst we are putting them in. */
  1.1099 +  buff->aProfile->BeginUnwind();
  1.1100 +
  1.1101 +  /* The buff is a sequence of ProfileEntries (ents).  It has
  1.1102 +     this grammar:
  1.1103 +
  1.1104 +     | --pre-tags-- | (h 'P' .. h 'Q')* | --post-tags-- |
  1.1105 +                      ^               ^
  1.1106 +                      ix_first_hP     ix_last_hQ
  1.1107 +
  1.1108 +     Each (h 'P' .. h 'Q') subsequence represents one pseudostack
  1.1109 +     entry.  These, if present, are in the order
  1.1110 +     outermost-frame-first, and that is the order that they should
  1.1111 +     be copied into aProfile.  The --pre-tags-- and --post-tags--
  1.1112 +     are to be copied into the aProfile verbatim, except that they
  1.1113 +     may contain the hints "h 'F'" for a flush and "h 'N'" to
  1.1114 +     indicate that a native unwind is also required, and must be
  1.1115 +     interleaved with the pseudostack entries.
  1.1116 +
  1.1117 +     The hint tags that bound each pseudostack entry, "h 'P'" and "h
  1.1118 +     'Q'", are not to be copied into the aProfile -- they are
  1.1119 +     present only to make parsing easy here.  Also, the pseudostack
  1.1120 +     entries may contain an "'S' (void*)" entry, which is the stack
  1.1121 +     pointer value for that entry, and these are also not to be
  1.1122 +     copied.
  1.1123 +  */
  1.1124 +  /* The first thing to do is therefore to find the pseudostack
  1.1125 +     entries, if any, and to find out also whether a native unwind
  1.1126 +     has been requested. */
  1.1127 +  const uintptr_t infUW = ~(uintptr_t)0; // infinity
  1.1128 +  bool  need_native_unw = false;
  1.1129 +  uintptr_t ix_first_hP = infUW; // "not found"
  1.1130 +  uintptr_t ix_last_hQ  = infUW; // "not found"
  1.1131 +
  1.1132 +  uintptr_t k;
  1.1133 +  for (k = 0; k < buff->entsUsed; k++) {
  1.1134 +    ProfileEntry ent = utb_get_profent(buff, k);
  1.1135 +    if (ent.is_ent_hint('N')) {
  1.1136 +      need_native_unw = true;
  1.1137 +    }
  1.1138 +    else if (ent.is_ent_hint('P') && ix_first_hP == ~(uintptr_t)0) {
  1.1139 +      ix_first_hP = k;
  1.1140 +    }
  1.1141 +    else if (ent.is_ent_hint('Q')) {
  1.1142 +      ix_last_hQ = k;
  1.1143 +    }
  1.1144 +  }
  1.1145 +
  1.1146 +  if (0) LOGF("BPUnw: ix_first_hP %llu  ix_last_hQ %llu  need_native_unw %llu",
  1.1147 +              (unsigned long long int)ix_first_hP,
  1.1148 +              (unsigned long long int)ix_last_hQ,
  1.1149 +              (unsigned long long int)need_native_unw);
  1.1150 +
  1.1151 +  /* There are four possibilities: native-only, pseudostack-only,
  1.1152 +     combined (both), and neither.  We handle all four cases. */
  1.1153 +
  1.1154 +  MOZ_ASSERT( (ix_first_hP == infUW && ix_last_hQ == infUW) ||
  1.1155 +              (ix_first_hP != infUW && ix_last_hQ != infUW) );
  1.1156 +  bool have_P = ix_first_hP != infUW;
  1.1157 +  if (have_P) {
  1.1158 +    MOZ_ASSERT(ix_first_hP < ix_last_hQ);
  1.1159 +    MOZ_ASSERT(ix_last_hQ <= buff->entsUsed);
  1.1160 +  }
  1.1161 +
  1.1162 +  /* Neither N nor P.  This is very unusual but has been observed to happen.
  1.1163 +     Just copy to the output. */
  1.1164 +  if (!need_native_unw && !have_P) {
  1.1165 +    for (k = 0; k < buff->entsUsed; k++) {
  1.1166 +      ProfileEntry ent = utb_get_profent(buff, k);
  1.1167 +      // action flush-hints
  1.1168 +      if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; }
  1.1169 +      // skip ones we can't copy
  1.1170 +      if (ent.is_ent_hint() || ent.is_ent('S')) { continue; }
  1.1171 +      // handle GetBacktrace()
  1.1172 +      if (ent.is_ent('B')) {
  1.1173 +        UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr();
  1.1174 +        process_buffer(buff, -1);
  1.1175 +        continue;
  1.1176 +      }
  1.1177 +      // and copy everything else
  1.1178 +      buff->aProfile->addTag( ent );
  1.1179 +    }
  1.1180 +  }
  1.1181 +  else /* Native only-case. */
  1.1182 +  if (need_native_unw && !have_P) {
  1.1183 +    for (k = 0; k < buff->entsUsed; k++) {
  1.1184 +      ProfileEntry ent = utb_get_profent(buff, k);
  1.1185 +      // action a native-unwind-now hint
  1.1186 +      if (ent.is_ent_hint('N')) {
  1.1187 +        MOZ_ASSERT(buff->haveNativeInfo);
  1.1188 +        PCandSP* pairs = nullptr;
  1.1189 +        unsigned int nPairs = 0;
  1.1190 +        do_lul_unwind_Buffer(&pairs, &nPairs, buff, oldest_ix);
  1.1191 +        buff->aProfile->addTag( ProfileEntry('s', "(root)") );
  1.1192 +        for (unsigned int i = 0; i < nPairs; i++) {
  1.1193 +          /* Skip any outermost frames that
  1.1194 +             do_lul_unwind_Buffer didn't give us.  See comments
  1.1195 +             on that function for details. */
  1.1196 +          if (pairs[i].pc == 0 && pairs[i].sp == 0)
  1.1197 +            continue;
  1.1198 +          buff->aProfile
  1.1199 +              ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[i].pc)) );
  1.1200 +        }
  1.1201 +        if (pairs)
  1.1202 +          free(pairs);
  1.1203 +        continue;
  1.1204 +      }
  1.1205 +      // action flush-hints
  1.1206 +      if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; }
  1.1207 +      // skip ones we can't copy
  1.1208 +      if (ent.is_ent_hint() || ent.is_ent('S')) { continue; }
  1.1209 +      // handle GetBacktrace()
  1.1210 +      if (ent.is_ent('B')) {
  1.1211 +        UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr();
  1.1212 +        process_buffer(buff, -1);
  1.1213 +        continue;
  1.1214 +      }
  1.1215 +      // and copy everything else
  1.1216 +      buff->aProfile->addTag( ent );
  1.1217 +    }
  1.1218 +  }
  1.1219 +  else /* Pseudostack-only case */
  1.1220 +  if (!need_native_unw && have_P) {
  1.1221 +    /* If there's no request for a native stack, it's easy: just
  1.1222 +       copy the tags verbatim into aProfile, skipping the ones that
  1.1223 +       can't be copied -- 'h' (hint) tags, and "'S' (void*)"
  1.1224 +       stack-pointer tags.  Except, insert a sample-start tag when
  1.1225 +       we see the start of the first pseudostack frame. */
  1.1226 +    for (k = 0; k < buff->entsUsed; k++) {
  1.1227 +      ProfileEntry ent = utb_get_profent(buff, k);
  1.1228 +      // We need to insert a sample-start tag before the first frame
  1.1229 +      if (k == ix_first_hP) {
  1.1230 +        buff->aProfile->addTag( ProfileEntry('s', "(root)") );
  1.1231 +      }
  1.1232 +      // action flush-hints
  1.1233 +      if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; }
  1.1234 +      // skip ones we can't copy
  1.1235 +      if (ent.is_ent_hint() || ent.is_ent('S')) { continue; }
  1.1236 +      // handle GetBacktrace()
  1.1237 +      if (ent.is_ent('B')) {
  1.1238 +        UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr();
  1.1239 +        process_buffer(buff, -1);
  1.1240 +        continue;
  1.1241 +      }
  1.1242 +      // and copy everything else
  1.1243 +      buff->aProfile->addTag( ent );
  1.1244 +    }
  1.1245 +  }
  1.1246 +  else /* Combined case */
  1.1247 +  if (need_native_unw && have_P)
  1.1248 +  {
  1.1249 +    /* We need to get a native stacktrace and merge it with the
  1.1250 +       pseudostack entries.  This isn't too simple.  First, copy all
  1.1251 +       the tags up to the start of the pseudostack tags.  Then
  1.1252 +       generate a combined set of tags by native unwind and
  1.1253 +       pseudostack.  Then, copy all the stuff after the pseudostack
  1.1254 +       tags. */
  1.1255 +    MOZ_ASSERT(buff->haveNativeInfo);
  1.1256 +
  1.1257 +    // Get native unwind info
  1.1258 +    PCandSP* pairs = nullptr;
  1.1259 +    unsigned int n_pairs = 0;
  1.1260 +    do_lul_unwind_Buffer(&pairs, &n_pairs, buff, oldest_ix);
  1.1261 +
  1.1262 +    // Entries before the pseudostack frames
  1.1263 +    for (k = 0; k < ix_first_hP; k++) {
  1.1264 +      ProfileEntry ent = utb_get_profent(buff, k);
  1.1265 +      // action flush-hints
  1.1266 +      if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; }
  1.1267 +      // skip ones we can't copy
  1.1268 +      if (ent.is_ent_hint() || ent.is_ent('S')) { continue; }
  1.1269 +      // handle GetBacktrace()
  1.1270 +      if (ent.is_ent('B')) {
  1.1271 +        UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr();
  1.1272 +        process_buffer(buff, -1);
  1.1273 +        continue;
  1.1274 +      }
  1.1275 +      // and copy everything else
  1.1276 +      buff->aProfile->addTag( ent );
  1.1277 +    }
  1.1278 +
  1.1279 +    // BEGIN merge
  1.1280 +    buff->aProfile->addTag( ProfileEntry('s', "(root)") );
  1.1281 +    unsigned int next_N = 0; // index in pairs[]
  1.1282 +    unsigned int next_P = ix_first_hP; // index in buff profent array
  1.1283 +    bool last_was_P = false;
  1.1284 +    if (0) LOGF("at mergeloop: n_pairs %llu ix_last_hQ %llu",
  1.1285 +                (unsigned long long int)n_pairs,
  1.1286 +                (unsigned long long int)ix_last_hQ);
  1.1287 +    /* Skip any outermost frames that do_lul_unwind_Buffer
  1.1288 +       didn't give us.  See comments on that function for
  1.1289 +       details. */
  1.1290 +    while (next_N < n_pairs && pairs[next_N].pc == 0 && pairs[next_N].sp == 0)
  1.1291 +      next_N++;
  1.1292 +
  1.1293 +    while (true) {
  1.1294 +      if (next_P <= ix_last_hQ) {
  1.1295 +        // Assert that next_P points at the start of an P entry
  1.1296 +        MOZ_ASSERT(utb_get_profent(buff, next_P).is_ent_hint('P'));
  1.1297 +      }
  1.1298 +      if (next_N >= n_pairs && next_P > ix_last_hQ) {
  1.1299 +        // both stacks empty
  1.1300 +        break;
  1.1301 +      }
  1.1302 +      /* Decide which entry to use next:
  1.1303 +         If N is empty, must use P, and vice versa
  1.1304 +         else
  1.1305 +         If the last was P and current P has zero SP, use P
  1.1306 +         else
  1.1307 +         we assume that both P and N have valid SP, in which case
  1.1308 +            use the one with the larger value
  1.1309 +      */
  1.1310 +      bool use_P = true;
  1.1311 +      if (next_N >= n_pairs) {
  1.1312 +        // N empty, use P
  1.1313 +        use_P = true;
  1.1314 +        if (0) LOG("  P  <=  no remaining N entries");
  1.1315 +      }
  1.1316 +      else if (next_P > ix_last_hQ) {
  1.1317 +        // P empty, use N
  1.1318 +        use_P = false;
  1.1319 +        if (0) LOG("  N  <=  no remaining P entries");
  1.1320 +      }
  1.1321 +      else {
  1.1322 +        // We have at least one N and one P entry available.
  1.1323 +        // Scan forwards to find the SP of the current P entry
  1.1324 +        u_int64_t sp_cur_P = 0;
  1.1325 +        unsigned int m = next_P + 1;
  1.1326 +        while (1) {
  1.1327 +          /* This assertion should hold because in a well formed
  1.1328 +             input, we must eventually find the hint-Q that marks
  1.1329 +             the end of this frame's entries. */
  1.1330 +          MOZ_ASSERT(m < buff->entsUsed);
  1.1331 +          ProfileEntry ent = utb_get_profent(buff, m);
  1.1332 +          if (ent.is_ent_hint('Q'))
  1.1333 +            break;
  1.1334 +          if (ent.is_ent('S')) {
  1.1335 +            sp_cur_P = reinterpret_cast<u_int64_t>(ent.get_tagPtr());
  1.1336 +            break;
  1.1337 +          }
  1.1338 +          m++;
  1.1339 +        }
  1.1340 +        if (last_was_P && sp_cur_P == 0) {
  1.1341 +          if (0) LOG("  P  <=  last_was_P && sp_cur_P == 0");
  1.1342 +          use_P = true;
  1.1343 +        } else {
  1.1344 +          u_int64_t sp_cur_N = pairs[next_N].sp;
  1.1345 +          use_P = (sp_cur_P > sp_cur_N);
  1.1346 +          if (0) LOGF("  %s  <=  sps P %p N %p",
  1.1347 +                      use_P ? "P" : "N", (void*)(intptr_t)sp_cur_P, 
  1.1348 +                                         (void*)(intptr_t)sp_cur_N);
  1.1349 +        }
  1.1350 +      }
  1.1351 +      /* So, we know which we are going to use. */
  1.1352 +      if (use_P) {
  1.1353 +        unsigned int m = next_P + 1;
  1.1354 +        while (true) {
  1.1355 +          MOZ_ASSERT(m < buff->entsUsed);
  1.1356 +          ProfileEntry ent = utb_get_profent(buff, m);
  1.1357 +          if (ent.is_ent_hint('Q')) {
  1.1358 +            next_P = m + 1;
  1.1359 +            break;
  1.1360 +          }
  1.1361 +          // we don't expect a flush-hint here
  1.1362 +          MOZ_ASSERT(!ent.is_ent_hint('F'));
  1.1363 +          // skip ones we can't copy
  1.1364 +          if (ent.is_ent_hint() || ent.is_ent('S')) { m++; continue; }
  1.1365 +          // and copy everything else
  1.1366 +          buff->aProfile->addTag( ent );
  1.1367 +          m++;
  1.1368 +        }
  1.1369 +      } else {
  1.1370 +        buff->aProfile
  1.1371 +            ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[next_N].pc)) );
  1.1372 +        next_N++;
  1.1373 +      }
  1.1374 +      /* Remember what we chose, for next time. */
  1.1375 +      last_was_P = use_P;
  1.1376 +    }
  1.1377 +
  1.1378 +    MOZ_ASSERT(next_P == ix_last_hQ + 1);
  1.1379 +    MOZ_ASSERT(next_N == n_pairs);
  1.1380 +    // END merge
  1.1381 +
  1.1382 +    // Entries after the pseudostack frames
  1.1383 +    for (k = ix_last_hQ+1; k < buff->entsUsed; k++) {
  1.1384 +      ProfileEntry ent = utb_get_profent(buff, k);
  1.1385 +      // action flush-hints
  1.1386 +      if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; }
  1.1387 +      // skip ones we can't copy
  1.1388 +      if (ent.is_ent_hint() || ent.is_ent('S')) { continue; }
  1.1389 +      // and copy everything else
  1.1390 +      buff->aProfile->addTag( ent );
  1.1391 +    }
  1.1392 +
  1.1393 +    // free native unwind info
  1.1394 +    if (pairs)
  1.1395 +      free(pairs);
  1.1396 +  }
  1.1397 +
  1.1398 +#if 0
  1.1399 +  bool show = true;
  1.1400 +  if (show) LOG("----------------");
  1.1401 +  for (k = 0; k < buff->entsUsed; k++) {
  1.1402 +    ProfileEntry ent = utb_get_profent(buff, k);
  1.1403 +    if (show) ent.log();
  1.1404 +    if (ent.is_ent_hint('F')) {
  1.1405 +      /* This is a flush-hint */
  1.1406 +      buff->aProfile->flush();
  1.1407 +    } 
  1.1408 +    else if (ent.is_ent_hint('N')) {
  1.1409 +      /* This is a do-a-native-unwind-right-now hint */
  1.1410 +      MOZ_ASSERT(buff->haveNativeInfo);
  1.1411 +      PCandSP* pairs = nullptr;
  1.1412 +      unsigned int nPairs = 0;
  1.1413 +      do_lul_unwind_Buffer(&pairs, &nPairs, buff, oldest_ix);
  1.1414 +      buff->aProfile->addTag( ProfileEntry('s', "(root)") );
  1.1415 +      for (unsigned int i = 0; i < nPairs; i++) {
  1.1416 +        buff->aProfile
  1.1417 +            ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[i].pc)) );
  1.1418 +      }
  1.1419 +      if (pairs)
  1.1420 +        free(pairs);
  1.1421 +    } else {
  1.1422 +      /* Copy in verbatim */
  1.1423 +      buff->aProfile->addTag( ent );
  1.1424 +    }
  1.1425 +  }
  1.1426 +#endif
  1.1427 +
  1.1428 +  buff->aProfile->EndUnwind();
  1.1429 +}
  1.1430 +
  1.1431 +
  1.1432 +// Find out, in a platform-dependent way, where the code modules got
  1.1433 +// mapped in the process' virtual address space, and get |aLUL| to
  1.1434 +// load unwind info for them.
  1.1435 +void
  1.1436 +read_procmaps(lul::LUL* aLUL)
  1.1437 +{
  1.1438 +  MOZ_ASSERT(aLUL->CountMappings() == 0);
  1.1439 +
  1.1440 +# if defined(SPS_OS_linux) || defined(SPS_OS_android) || defined(SPS_OS_darwin)
  1.1441 +  SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
  1.1442 +
  1.1443 +  for (size_t i = 0; i < info.GetSize(); i++) {
  1.1444 +    const SharedLibrary& lib = info.GetEntry(i);
  1.1445 +
  1.1446 +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
  1.1447 +    // We're using faulty.lib.  Use a special-case object mapper.
  1.1448 +    AutoObjectMapperFaultyLib mapper(aLUL->mLog);
  1.1449 +#else
  1.1450 +    // We can use the standard POSIX-based mapper.
  1.1451 +    AutoObjectMapperPOSIX mapper(aLUL->mLog);
  1.1452 +#endif
  1.1453 +
  1.1454 +    // Ask |mapper| to map the object.  Then hand its mapped address
  1.1455 +    // to NotifyAfterMap().
  1.1456 +    void*  image = nullptr;
  1.1457 +    size_t size  = 0;
  1.1458 +    bool ok = mapper.Map(&image, &size, lib.GetName());
  1.1459 +    if (ok && image && size > 0) {
  1.1460 +      aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd()-lib.GetStart(),
  1.1461 +                           lib.GetName().c_str(), image);
  1.1462 +    } else if (!ok && lib.GetName() == "") {
  1.1463 +      // The object has no name and (as a consequence) the mapper
  1.1464 +      // failed to map it.  This happens on Linux, where
  1.1465 +      // GetInfoForSelf() produces two such mappings: one for the
  1.1466 +      // executable and one for the VDSO.  The executable one isn't a
  1.1467 +      // big deal since there's not much interesting code in there,
  1.1468 +      // but the VDSO one is a problem on x86-{linux,android} because
  1.1469 +      // lack of knowledge about the mapped area inhibits LUL's
  1.1470 +      // special __kernel_syscall handling.  Hence notify |aLUL| at
  1.1471 +      // least of the mapping, even though it can't read any unwind
  1.1472 +      // information for the area.
  1.1473 +      aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd()-lib.GetStart());
  1.1474 +    }
  1.1475 +
  1.1476 +    // |mapper| goes out of scope at this point and so its destructor
  1.1477 +    // unmaps the object.
  1.1478 +  }
  1.1479 +
  1.1480 +# else
  1.1481 +#  error "Unknown platform"
  1.1482 +# endif
  1.1483 +}
  1.1484 +
  1.1485 +// LUL needs a callback for its logging sink.
  1.1486 +static void
  1.1487 +logging_sink_for_LUL(const char* str) {
  1.1488 +  // Ignore any trailing \n, since LOG will add one anyway.
  1.1489 +  size_t n = strlen(str);
  1.1490 +  if (n > 0 && str[n-1] == '\n') {
  1.1491 +    char* tmp = strdup(str);
  1.1492 +    tmp[n-1] = 0;
  1.1493 +    LOG(tmp);
  1.1494 +    free(tmp);
  1.1495 +  } else {
  1.1496 +    LOG(str);
  1.1497 +  }
  1.1498 +}
  1.1499 +
  1.1500 +// Runs in the unwinder thread -- well, this _is_ the unwinder thread.
  1.1501 +static void* unwind_thr_fn(void* exit_nowV)
  1.1502 +{
  1.1503 +  // This is the unwinder thread function.  The first thread in must
  1.1504 +  // create the unwinder library and request it to read the debug
  1.1505 +  // info.  The last thread out must deallocate the library.  These
  1.1506 +  // three tasks (create library, read debuginfo, destroy library) are
  1.1507 +  // sequentialised by |sLULmutex|.  |sLUL| and |sLULcount| may only
  1.1508 +  // be modified whilst |sLULmutex| is held.
  1.1509 +  //
  1.1510 +  // Once the threads are up and running, |sLUL| (the pointer itself,
  1.1511 +  // that is) stays constant, and the multiple threads may make
  1.1512 +  // concurrent calls into |sLUL| to do concurrent unwinding.
  1.1513 +  LOG("unwind_thr_fn: START");
  1.1514 +
  1.1515 +  // A hook for testing LUL: at the first entrance here, check env var
  1.1516 +  // MOZ_PROFILER_LUL_TEST, and if set, run tests on LUL.  Note that
  1.1517 +  // it is preferable to run the LUL tests via gtest, but gtest is not
  1.1518 +  // currently supported on all targets that LUL runs on.  Hence the
  1.1519 +  // auxiliary mechanism here is also needed.
  1.1520 +  bool doLulTest = false;
  1.1521 +
  1.1522 +  mozilla::DebugOnly<int> r = pthread_mutex_lock(&sLULmutex);
  1.1523 +  MOZ_ASSERT(!r);
  1.1524 +
  1.1525 +  if (!sLUL) {
  1.1526 +    // sLUL hasn't been allocated, so we must be the first thread in.
  1.1527 +    sLUL = new lul::LUL(logging_sink_for_LUL);
  1.1528 +    MOZ_ASSERT(sLUL);
  1.1529 +    MOZ_ASSERT(sLULcount == 0);
  1.1530 +    // Register this thread so it can read unwind info and do unwinding.
  1.1531 +    sLUL->RegisterUnwinderThread();
  1.1532 +    // Read all the unwind info currently available.
  1.1533 +    read_procmaps(sLUL);
  1.1534 +    // Has a test been requested?
  1.1535 +    if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) {
  1.1536 +      doLulTest = true;
  1.1537 +    }
  1.1538 +  } else {
  1.1539 +    // sLUL has already been allocated, so we can't be the first
  1.1540 +    // thread in.
  1.1541 +    MOZ_ASSERT(sLULcount > 0);
  1.1542 +    // Register this thread so it can do unwinding.
  1.1543 +    sLUL->RegisterUnwinderThread();
  1.1544 +  }
  1.1545 +
  1.1546 +  sLULcount++;
  1.1547 +
  1.1548 +  r = pthread_mutex_unlock(&sLULmutex);
  1.1549 +  MOZ_ASSERT(!r);
  1.1550 +
  1.1551 +  // If a test has been requested for LUL, run it.  Summary results
  1.1552 +  // are sent to sLUL's logging sink.  Note that this happens after
  1.1553 +  // read_procmaps has read unwind information into sLUL, so that the
  1.1554 +  // tests have something to unwind against.  Without that they'd be
  1.1555 +  // pretty meaningless.
  1.1556 +  if (doLulTest) {
  1.1557 +    int nTests = 0, nTestsPassed = 0;
  1.1558 +    RunLulUnitTests(&nTests, &nTestsPassed, sLUL);
  1.1559 +  }
  1.1560 +
  1.1561 +  // At this point, sLUL -- the single instance of the library -- is
  1.1562 +  // allocated and has read the required unwind info.  All running
  1.1563 +  // threads can now make Unwind() requests of it concurrently, if
  1.1564 +  // they wish.
  1.1565 +
  1.1566 +  // Now go on to allocate the array of buffers used for communication
  1.1567 +  // between the sampling threads and the unwinder threads.
  1.1568 +
  1.1569 +  // If we're the first thread in, we'll need to allocate the buffer
  1.1570 +  // array g_buffers plus the Buffer structs that it points at. */
  1.1571 +  spinLock_acquire(&g_spinLock);
  1.1572 +  if (g_buffers == nullptr) {
  1.1573 +    // Drop the lock, make a complete copy in memory, reacquire the
  1.1574 +    // lock, and try to install it -- which might fail, if someone
  1.1575 +    // else beat us to it. */
  1.1576 +    spinLock_release(&g_spinLock);
  1.1577 +    UnwinderThreadBuffer** buffers
  1.1578 +      = (UnwinderThreadBuffer**)malloc(N_UNW_THR_BUFFERS
  1.1579 +                                        * sizeof(UnwinderThreadBuffer*));
  1.1580 +    MOZ_ASSERT(buffers);
  1.1581 +    int i;
  1.1582 +    for (i = 0; i < N_UNW_THR_BUFFERS; i++) {
  1.1583 +      /* These calloc-ations are shared between the sampling and
  1.1584 +         unwinding threads.  They must be free after all such threads
  1.1585 +         have terminated. */
  1.1586 +      buffers[i] = (UnwinderThreadBuffer*)
  1.1587 +                   calloc(sizeof(UnwinderThreadBuffer), 1);
  1.1588 +      MOZ_ASSERT(buffers[i]);
  1.1589 +      buffers[i]->state = S_EMPTY;
  1.1590 +    }
  1.1591 +    /* Try to install it */
  1.1592 +    spinLock_acquire(&g_spinLock);
  1.1593 +    if (g_buffers == nullptr) {
  1.1594 +      g_buffers = buffers;
  1.1595 +      spinLock_release(&g_spinLock);
  1.1596 +    } else {
  1.1597 +      /* Someone else beat us to it.  Release what we just allocated
  1.1598 +         so as to avoid a leak. */
  1.1599 +      spinLock_release(&g_spinLock);
  1.1600 +      for (i = 0; i < N_UNW_THR_BUFFERS; i++) {
  1.1601 +        free(buffers[i]);
  1.1602 +      }
  1.1603 +      free(buffers);
  1.1604 +    }
  1.1605 +  } else {
  1.1606 +    /* They are already allocated, so just drop the lock and continue. */
  1.1607 +    spinLock_release(&g_spinLock);
  1.1608 +  }
  1.1609 +
  1.1610 +  /* 
  1.1611 +    while (1) {
  1.1612 +      acq lock
  1.1613 +      scan to find oldest full
  1.1614 +         if none { rel lock; sleep; continue }
  1.1615 +      set buff state to emptying
  1.1616 +      rel lock
  1.1617 +      acq MLock // implicitly
  1.1618 +      process buffer
  1.1619 +      rel MLock // implicitly
  1.1620 +      acq lock
  1.1621 +      set buff state to S_EMPTY
  1.1622 +      rel lock
  1.1623 +    }
  1.1624 +  */
  1.1625 +  int* exit_now = (int*)exit_nowV;
  1.1626 +  int ms_to_sleep_if_empty = 1;
  1.1627 +
  1.1628 +  const int longest_sleep_ms = 1000;
  1.1629 +  bool show_sleep_message = true;
  1.1630 +
  1.1631 +  while (1) {
  1.1632 +
  1.1633 +    if (*exit_now != 0) {
  1.1634 +      *exit_now = 0;
  1.1635 +      break;
  1.1636 +    }
  1.1637 +
  1.1638 +    spinLock_acquire(&g_spinLock);
  1.1639 +
  1.1640 +    /* Find the oldest filled buffer, if any. */
  1.1641 +    uint64_t oldest_seqNo = ~0ULL; /* infinity */
  1.1642 +    int      oldest_ix    = -1;
  1.1643 +    int      i;
  1.1644 +    for (i = 0; i < N_UNW_THR_BUFFERS; i++) {
  1.1645 +      UnwinderThreadBuffer* buff = g_buffers[i];
  1.1646 +      if (buff->state != S_FULL) continue;
  1.1647 +      if (buff->seqNo < oldest_seqNo) {
  1.1648 +        oldest_seqNo = buff->seqNo;
  1.1649 +        oldest_ix    = i;
  1.1650 +      }
  1.1651 +    }
  1.1652 +    if (oldest_ix == -1) {
  1.1653 +      /* We didn't find a full buffer.  Snooze and try again later. */
  1.1654 +      MOZ_ASSERT(oldest_seqNo == ~0ULL);
  1.1655 +      spinLock_release(&g_spinLock);
  1.1656 +      if (ms_to_sleep_if_empty > 100 && LOGLEVEL >= 2) {
  1.1657 +        if (show_sleep_message)
  1.1658 +          LOGF("BPUnw: unwinder: sleep for %d ms", ms_to_sleep_if_empty);
  1.1659 +        /* If we've already shown the message for the longest sleep,
  1.1660 +           don't show it again, until the next round of sleeping
  1.1661 +           starts. */
  1.1662 +        if (ms_to_sleep_if_empty == longest_sleep_ms)
  1.1663 +          show_sleep_message = false;
  1.1664 +      }
  1.1665 +      sleep_ms(ms_to_sleep_if_empty);
  1.1666 +      if (ms_to_sleep_if_empty < 20) {
  1.1667 +        ms_to_sleep_if_empty += 2;
  1.1668 +      } else {
  1.1669 +        ms_to_sleep_if_empty = (15 * ms_to_sleep_if_empty) / 10;
  1.1670 +        if (ms_to_sleep_if_empty > longest_sleep_ms)
  1.1671 +          ms_to_sleep_if_empty = longest_sleep_ms;
  1.1672 +      }
  1.1673 +      continue;
  1.1674 +    }
  1.1675 +
  1.1676 +    /* We found a full a buffer.  Mark it as 'ours' and drop the
  1.1677 +       lock; then we can safely throw breakpad at it. */
  1.1678 +    UnwinderThreadBuffer* buff = g_buffers[oldest_ix];
  1.1679 +    MOZ_ASSERT(buff->state == S_FULL);
  1.1680 +    buff->state = S_EMPTYING;
  1.1681 +    spinLock_release(&g_spinLock);
  1.1682 +
  1.1683 +    /* unwind .. in which we can do anything we like, since any
  1.1684 +       resource stalls that we may encounter (eg malloc locks) in
  1.1685 +       competition with signal handler instances, will be short
  1.1686 +       lived since the signal handler is guaranteed nonblocking. */
  1.1687 +    if (0) LOGF("BPUnw: unwinder: seqNo %llu: emptying buf %d\n",
  1.1688 +                (unsigned long long int)oldest_seqNo, oldest_ix);
  1.1689 +
  1.1690 +    process_buffer(buff, oldest_ix);
  1.1691 +
  1.1692 +    /* And .. we're done.  Mark the buffer as empty so it can be
  1.1693 +       reused.  First though, unmap any of the entsPages that got
  1.1694 +       mapped during filling. */
  1.1695 +    for (i = 0; i < N_PROF_ENT_PAGES; i++) {
  1.1696 +      if (buff->entsPages[i] == ProfEntsPage_INVALID)
  1.1697 +        continue;
  1.1698 +      munmap_ProfEntsPage(buff->entsPages[i]);
  1.1699 +      buff->entsPages[i] = ProfEntsPage_INVALID;
  1.1700 +    }
  1.1701 +
  1.1702 +    (void)VALGRIND_MAKE_MEM_UNDEFINED(&buff->stackImg.mContents[0],
  1.1703 +                                      lul::N_STACK_BYTES);
  1.1704 +    spinLock_acquire(&g_spinLock);
  1.1705 +    MOZ_ASSERT(buff->state == S_EMPTYING);
  1.1706 +    buff->state = S_EMPTY;
  1.1707 +    spinLock_release(&g_spinLock);
  1.1708 +    ms_to_sleep_if_empty = 1;
  1.1709 +    show_sleep_message = true;
  1.1710 +  }
  1.1711 +
  1.1712 +  // This unwinder thread is exiting.  If it's the last one out,
  1.1713 +  // shut down and deallocate the unwinder library.
  1.1714 +  r = pthread_mutex_lock(&sLULmutex);
  1.1715 +  MOZ_ASSERT(!r);
  1.1716 +
  1.1717 +  MOZ_ASSERT(sLULcount > 0);
  1.1718 +  if (sLULcount == 1) {
  1.1719 +    // Tell the library to discard unwind info for the entire address
  1.1720 +    // space.
  1.1721 +    sLUL->NotifyBeforeUnmapAll();
  1.1722 +
  1.1723 +    delete sLUL;
  1.1724 +    sLUL = nullptr;
  1.1725 +  }
  1.1726 +
  1.1727 +  sLULcount--;
  1.1728 +
  1.1729 +  r = pthread_mutex_unlock(&sLULmutex);
  1.1730 +  MOZ_ASSERT(!r);
  1.1731 +
  1.1732 +  LOG("unwind_thr_fn: STOP");
  1.1733 +  return nullptr;
  1.1734 +}
  1.1735 +
  1.1736 +static void finish_sync_buffer(ThreadProfile* profile,
  1.1737 +                               UnwinderThreadBuffer* buff,
  1.1738 +                               void* /* ucontext_t*, really */ ucV)
  1.1739 +{
  1.1740 +  SyncProfile* syncProfile = profile->AsSyncProfile();
  1.1741 +  MOZ_ASSERT(syncProfile);
  1.1742 +  SyncUnwinderThreadBuffer* utb = static_cast<SyncUnwinderThreadBuffer*>(
  1.1743 +                                                   syncProfile->GetUWTBuffer());
  1.1744 +  fill_buffer(profile, utb->GetBuffer(), ucV);
  1.1745 +  utb->GetBuffer()->state = S_FULL;
  1.1746 +  PseudoStack* stack = profile->GetPseudoStack();
  1.1747 +  stack->addLinkedUWTBuffer(utb);
  1.1748 +}
  1.1749 +
  1.1750 +static void release_sync_buffer(LinkedUWTBuffer* buff)
  1.1751 +{
  1.1752 +  SyncUnwinderThreadBuffer* data = static_cast<SyncUnwinderThreadBuffer*>(buff);
  1.1753 +  MOZ_ASSERT(data->GetBuffer()->state == S_EMPTY);
  1.1754 +  delete data;
  1.1755 +}
  1.1756 +
  1.1757 +////////////////////////////////////////////////////////////////
  1.1758 +////////////////////////////////////////////////////////////////
  1.1759 +////////////////////////////////////////////////////////////////
  1.1760 +////////////////////////////////////////////////////////////////
  1.1761 +////////////////////////////////////////////////////////////////
  1.1762 +////////////////////////////////////////////////////////////////
  1.1763 +
  1.1764 +// Keeps count of how frames are recovered, which is useful for
  1.1765 +// diagnostic purposes.
  1.1766 +static void stats_notify_frame(int n_context, int n_cfi, int n_scanned)
  1.1767 +{
  1.1768 +  // Gather stats in intervals.
  1.1769 +  static unsigned int nf_total    = 0; // total frames since last printout
  1.1770 +  static unsigned int nf_CONTEXT  = 0;
  1.1771 +  static unsigned int nf_CFI      = 0;
  1.1772 +  static unsigned int nf_SCANNED  = 0;
  1.1773 +
  1.1774 +  nf_CONTEXT += n_context;
  1.1775 +  nf_CFI     += n_cfi;
  1.1776 +  nf_SCANNED += n_scanned;
  1.1777 +  nf_total   += (n_context + n_cfi + n_scanned);
  1.1778 +
  1.1779 +  if (nf_total >= 5000) {
  1.1780 +    LOGF("BPUnw frame stats: TOTAL %5u"
  1.1781 +         "    CTX %4u    CFI %4u    SCAN %4u",
  1.1782 +         nf_total, nf_CONTEXT, nf_CFI, nf_SCANNED);
  1.1783 +    nf_total    = 0;
  1.1784 +    nf_CONTEXT  = 0;
  1.1785 +    nf_CFI      = 0;
  1.1786 +    nf_SCANNED  = 0;
  1.1787 +  }
  1.1788 +}
  1.1789 +
  1.1790 +static
  1.1791 +void do_lul_unwind_Buffer(/*OUT*/PCandSP** pairs,
  1.1792 +                          /*OUT*/unsigned int* nPairs,
  1.1793 +                          UnwinderThreadBuffer* buff,
  1.1794 +                          int buffNo /* for debug printing only */)
  1.1795 +{
  1.1796 +# if defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)
  1.1797 +  lul::UnwindRegs startRegs = buff->startRegs;
  1.1798 +  if (0) {
  1.1799 +    LOGF("Initial RIP = 0x%llx", (unsigned long long int)startRegs.xip.Value());
  1.1800 +    LOGF("Initial RSP = 0x%llx", (unsigned long long int)startRegs.xsp.Value());
  1.1801 +    LOGF("Initial RBP = 0x%llx", (unsigned long long int)startRegs.xbp.Value());
  1.1802 +  }
  1.1803 +
  1.1804 +# elif defined(SPS_ARCH_arm)
  1.1805 +  lul::UnwindRegs startRegs = buff->startRegs;
  1.1806 +  if (0) {
  1.1807 +    LOGF("Initial R15 = 0x%llx", (unsigned long long int)startRegs.r15.Value());
  1.1808 +    LOGF("Initial R13 = 0x%llx", (unsigned long long int)startRegs.r13.Value());
  1.1809 +  }
  1.1810 +
  1.1811 +# else
  1.1812 +#   error "Unknown plat"
  1.1813 +# endif
  1.1814 +
  1.1815 +  // FIXME: should we reinstate the ability to use separate debug objects?
  1.1816 +  // /* Make up a list of places where the debug objects might be. */
  1.1817 +  // std::vector<std::string> debug_dirs;
  1.1818 +# if defined(SPS_OS_linux)
  1.1819 +  //  debug_dirs.push_back("/usr/lib/debug/lib");
  1.1820 +  //  debug_dirs.push_back("/usr/lib/debug/usr/lib");
  1.1821 +  //  debug_dirs.push_back("/usr/lib/debug/lib/x86_64-linux-gnu");
  1.1822 +  //  debug_dirs.push_back("/usr/lib/debug/usr/lib/x86_64-linux-gnu");
  1.1823 +# elif defined(SPS_OS_android)
  1.1824 +  //  debug_dirs.push_back("/sdcard/symbols/system/lib");
  1.1825 +  //  debug_dirs.push_back("/sdcard/symbols/system/bin");
  1.1826 +# elif defined(SPS_OS_darwin)
  1.1827 +  //  /* Nothing */
  1.1828 +# else
  1.1829 +#   error "Unknown plat"
  1.1830 +# endif
  1.1831 +
  1.1832 +  // Set the max number of scanned or otherwise dubious frames
  1.1833 +  // to the user specified limit
  1.1834 +  size_t scannedFramesAllowed
  1.1835 +    = std::min(std::max(0, sUnwindStackScan), MAX_NATIVE_FRAMES);
  1.1836 +
  1.1837 +  // The max number of frames is MAX_NATIVE_FRAMES, so as to avoid
  1.1838 +  // the unwinder wasting a lot of time looping on corrupted stacks.
  1.1839 +  uintptr_t framePCs[MAX_NATIVE_FRAMES];
  1.1840 +  uintptr_t frameSPs[MAX_NATIVE_FRAMES];
  1.1841 +  size_t framesAvail = mozilla::ArrayLength(framePCs);
  1.1842 +  size_t framesUsed  = 0;
  1.1843 +  size_t scannedFramesAcquired = 0;
  1.1844 +  sLUL->Unwind( &framePCs[0], &frameSPs[0], 
  1.1845 +                &framesUsed, &scannedFramesAcquired,
  1.1846 +                framesAvail, scannedFramesAllowed,
  1.1847 +                &startRegs, &buff->stackImg );
  1.1848 +
  1.1849 +  if (LOGLEVEL >= 2)
  1.1850 +    stats_notify_frame(/* context */ 1,
  1.1851 +                       /* cfi     */ framesUsed - 1 - scannedFramesAcquired,
  1.1852 +                       /* scanned */ scannedFramesAcquired);
  1.1853 +
  1.1854 +  // PC values are now in framePCs[0 .. framesUsed-1], with [0] being
  1.1855 +  // the innermost frame.  SP values are likewise in frameSPs[].
  1.1856 +  *pairs  = (PCandSP*)calloc(framesUsed, sizeof(PCandSP));
  1.1857 +  *nPairs = framesUsed;
  1.1858 +  if (*pairs == nullptr) {
  1.1859 +    *nPairs = 0;
  1.1860 +    return;
  1.1861 +  }
  1.1862 +
  1.1863 +  if (framesUsed > 0) {
  1.1864 +    for (unsigned int frame_index = 0; 
  1.1865 +         frame_index < framesUsed; ++frame_index) {
  1.1866 +      (*pairs)[framesUsed-1-frame_index].pc = framePCs[frame_index];
  1.1867 +      (*pairs)[framesUsed-1-frame_index].sp = frameSPs[frame_index];
  1.1868 +    }
  1.1869 +  }
  1.1870 +
  1.1871 +  if (LOGLEVEL >= 3) {
  1.1872 +    LOGF("BPUnw: unwinder: seqNo %llu, buf %d: got %u frames",
  1.1873 +         (unsigned long long int)buff->seqNo, buffNo,
  1.1874 +         (unsigned int)framesUsed);
  1.1875 +  }
  1.1876 +
  1.1877 +  if (LOGLEVEL >= 2) {
  1.1878 +    if (0 == (g_stats_totalSamples % 1000))
  1.1879 +      LOGF("BPUnw: %llu total samples, %llu failed (buffer unavail), "
  1.1880 +                   "%llu failed (thread unreg'd), ",
  1.1881 +           (unsigned long long int)g_stats_totalSamples,
  1.1882 +           (unsigned long long int)g_stats_noBuffAvail,
  1.1883 +           (unsigned long long int)g_stats_thrUnregd);
  1.1884 +  }
  1.1885 +}
  1.1886 +
  1.1887 +#endif /* defined(SPS_OS_windows) */

mercurial