tools/profiler/UnwinderThread2.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #include <stdio.h>
     7 #include <signal.h>
     8 #include <string.h>
     9 #include <stdlib.h>
    10 #include <time.h>
    12 #ifdef MOZ_VALGRIND
    13 # include <valgrind/helgrind.h>
    14 # include <valgrind/memcheck.h>
    15 #else
    16 # define VALGRIND_HG_MUTEX_LOCK_PRE(_mx,_istry)  /* */
    17 # define VALGRIND_HG_MUTEX_LOCK_POST(_mx)        /* */
    18 # define VALGRIND_HG_MUTEX_UNLOCK_PRE(_mx)       /* */
    19 # define VALGRIND_HG_MUTEX_UNLOCK_POST(_mx)      /* */
    20 # define VALGRIND_MAKE_MEM_DEFINED(_addr,_len)   ((void)0)
    21 # define VALGRIND_MAKE_MEM_UNDEFINED(_addr,_len) ((void)0)
    22 #endif
    24 #include "prenv.h"
    25 #include "mozilla/arm.h"
    26 #include "mozilla/DebugOnly.h"
    27 #include <stdint.h>
    28 #include "PlatformMacros.h"
    30 #include "platform.h"
    31 #include <ostream>
    32 #include <string>
    34 #include "ProfileEntry.h"
    35 #include "SyncProfile.h"
    36 #include "AutoObjectMapper.h"
    37 #include "UnwinderThread2.h"
    39 #if !defined(SPS_OS_windows)
    40 # include <sys/mman.h>
    41 #endif
    43 #if defined(SPS_OS_android) || defined(SPS_OS_linux)
    44 # include <ucontext.h>
    45 # include "LulMain.h"
    46 #endif
    48 #include "shared-libraries.h"
    51 // Verbosity of this module, for debugging:
    52 //   0  silent
    53 //   1  adds info about debuginfo load success/failure
    54 //   2  adds slow-summary stats for buffer fills/misses (RECOMMENDED)
    55 //   3  adds per-sample summary lines
    56 //   4  adds per-sample frame listing
    57 // Note that level 3 and above produces risk of deadlock, and 
    58 // are not recommended for extended use.
    59 #define LOGLEVEL 2
    61 // The maximum number of frames that the native unwinder will
    62 // produce.  Setting it too high gives a risk of it wasting a
    63 // lot of time looping on corrupted stacks.
    64 #define MAX_NATIVE_FRAMES 256
    67 // The 'else' of this covers the entire rest of the file
    68 #if defined(SPS_OS_windows) || defined(SPS_OS_darwin)
    70 //////////////////////////////////////////////////////////
    71 //// BEGIN externally visible functions (WINDOWS and OSX STUBS)
    73 // On Windows and OSX this will all need reworking.
    74 // GeckoProfilerImpl.h will ensure these functions are never actually
    75 // called, so just provide no-op stubs for now.
    77 void uwt__init()
    78 {
    79 }
    81 void uwt__stop()
    82 {
    83 }
    85 void uwt__deinit()
    86 {
    87 }
    89 void uwt__register_thread_for_profiling ( void* stackTop )
    90 {
    91 }
    93 void uwt__unregister_thread_for_profiling()
    94 {
    95 }
    97 LinkedUWTBuffer* utb__acquire_sync_buffer(void* stackTop)
    98 {
    99   return nullptr;
   100 }
   102 // RUNS IN SIGHANDLER CONTEXT
   103 UnwinderThreadBuffer* uwt__acquire_empty_buffer()
   104 {
   105   return nullptr;
   106 }
   108 void
   109 utb__finish_sync_buffer(ThreadProfile* aProfile,
   110                         UnwinderThreadBuffer* utb,
   111                         void* /* ucontext_t*, really */ ucV)
   112 {
   113 }
   115 void
   116 utb__release_sync_buffer(LinkedUWTBuffer* utb)
   117 {
   118 }
   120 // RUNS IN SIGHANDLER CONTEXT
   121 void
   122 uwt__release_full_buffer(ThreadProfile* aProfile,
   123                          UnwinderThreadBuffer* utb,
   124                          void* /* ucontext_t*, really */ ucV )
   125 {
   126 }
   128 // RUNS IN SIGHANDLER CONTEXT
   129 void
   130 utb__addEntry(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent)
   131 {
   132 }
   134 //// END externally visible functions (WINDOWS and OSX STUBS)
   135 //////////////////////////////////////////////////////////
   137 #else // a supported target
   139 //////////////////////////////////////////////////////////
   140 //// BEGIN externally visible functions
   142 // Forward references
   143 // the unwinder thread ID, its fn, and a stop-now flag
   144 static void* unwind_thr_fn ( void* exit_nowV );
   145 static pthread_t unwind_thr;
   146 static int       unwind_thr_exit_now = 0; // RACED ON
   148 // Threads must be registered with this file before they can be
   149 // sampled.  So that we know the max safe stack address for each
   150 // registered thread.
   151 static void thread_register_for_profiling ( void* stackTop );
   153 // Unregister a thread.
   154 static void thread_unregister_for_profiling();
   156 // Empties out the buffer queue.  Used when the unwinder thread is
   157 // shut down.
   158 static void empty_buffer_queue();
   160 // Allocate a buffer for synchronous unwinding
   161 static LinkedUWTBuffer* acquire_sync_buffer(void* stackTop);
   163 // RUNS IN SIGHANDLER CONTEXT
   164 // Acquire an empty buffer and mark it as FILLING
   165 static UnwinderThreadBuffer* acquire_empty_buffer();
   167 static void finish_sync_buffer(ThreadProfile* aProfile,
   168                                UnwinderThreadBuffer* utb,
   169                                void* /* ucontext_t*, really */ ucV);
   171 // Release an empty synchronous unwind buffer.
   172 static void release_sync_buffer(LinkedUWTBuffer* utb);
   174 // RUNS IN SIGHANDLER CONTEXT
   175 // Put this buffer in the queue of stuff going to the unwinder
   176 // thread, and mark it as FULL.  Before doing that, fill in stack
   177 // chunk and register fields if a native unwind is requested.
   178 // APROFILE is where the profile data should be added to.  UTB
   179 // is the partially-filled-in buffer, containing ProfileEntries.
   180 // UCV is the ucontext_t* from the signal handler.  If non-nullptr,
   181 // is taken as a cue to request native unwind.
   182 static void release_full_buffer(ThreadProfile* aProfile,
   183                                 UnwinderThreadBuffer* utb,
   184                                 void* /* ucontext_t*, really */ ucV );
   186 // RUNS IN SIGHANDLER CONTEXT
   187 static void utb_add_prof_ent(UnwinderThreadBuffer* utb, ProfileEntry ent);
   189 // Do a store memory barrier.
   190 static void do_MBAR();
   193 // This is the single instance of the LUL unwind library that we will
   194 // use.  Currently the library is operated with multiple sampling
   195 // threads but only one unwinder thread.  It should also be possible
   196 // to use the library with multiple unwinder threads, to improve
   197 // throughput.  The setup here makes it possible to use multiple
   198 // unwinder threads, although that is as-yet untested.
   199 //
   200 // |sLULmutex| protects |sLUL| and |sLULcount| and also is used to
   201 // ensure that only the first unwinder thread requests |sLUL| to read
   202 // debug info.  |sLUL| may only be assigned to (and the object it
   203 // points at may only be created/destroyed) when |sLULcount| is zero.
   204 // |sLULcount| holds the number of unwinder threads currently in
   205 // existence.
   206 static pthread_mutex_t sLULmutex = PTHREAD_MUTEX_INITIALIZER;
   207 static lul::LUL*       sLUL      = nullptr;
   208 static int             sLULcount = 0;
   211 void uwt__init()
   212 {
   213   // Create the unwinder thread.
   214   MOZ_ASSERT(unwind_thr_exit_now == 0);
   215   int r = pthread_create( &unwind_thr, nullptr,
   216                           unwind_thr_fn, (void*)&unwind_thr_exit_now );
   217   MOZ_ALWAYS_TRUE(r == 0);
   218 }
   220 void uwt__stop()
   221 {
   222   // Shut down the unwinder thread.
   223   MOZ_ASSERT(unwind_thr_exit_now == 0);
   224   unwind_thr_exit_now = 1;
   225   do_MBAR();
   226   int r = pthread_join(unwind_thr, nullptr);
   227   MOZ_ALWAYS_TRUE(r == 0);
   228 }
   230 void uwt__deinit()
   231 {
   232   empty_buffer_queue();
   233 }
   235 void uwt__register_thread_for_profiling(void* stackTop)
   236 {
   237   thread_register_for_profiling(stackTop);
   238 }
   240 void uwt__unregister_thread_for_profiling()
   241 {
   242   thread_unregister_for_profiling();
   243 }
   245 LinkedUWTBuffer* utb__acquire_sync_buffer(void* stackTop)
   246 {
   247   return acquire_sync_buffer(stackTop);
   248 }
   250 void utb__finish_sync_buffer(ThreadProfile* profile,
   251                              UnwinderThreadBuffer* buff,
   252                              void* /* ucontext_t*, really */ ucV)
   253 {
   254   finish_sync_buffer(profile, buff, ucV);
   255 }
   257 void utb__release_sync_buffer(LinkedUWTBuffer* buff)
   258 {
   259   release_sync_buffer(buff);
   260 }
   262 // RUNS IN SIGHANDLER CONTEXT
   263 UnwinderThreadBuffer* uwt__acquire_empty_buffer()
   264 {
   265   return acquire_empty_buffer();
   266 }
   268 // RUNS IN SIGHANDLER CONTEXT
   269 void
   270 uwt__release_full_buffer(ThreadProfile* aProfile,
   271                          UnwinderThreadBuffer* utb,
   272                          void* /* ucontext_t*, really */ ucV )
   273 {
   274   release_full_buffer( aProfile, utb, ucV );
   275 }
   277 // RUNS IN SIGHANDLER CONTEXT
   278 void
   279 utb__addEntry(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent)
   280 {
   281   utb_add_prof_ent(utb, ent);
   282 }
   284 //// END externally visible functions
   285 //////////////////////////////////////////////////////////
   288 //////////////////////////////////////////////////////////
   289 //// BEGIN type UnwindThreadBuffer
   291 static_assert(sizeof(uint32_t) == 4, "uint32_t size incorrect");
   292 static_assert(sizeof(uint64_t) == 8, "uint64_t size incorrect");
   293 static_assert(sizeof(uintptr_t) == sizeof(void*),
   294               "uintptr_t size incorrect");
   296 typedef
   297   struct { 
   298     uint64_t rsp;
   299     uint64_t rbp;
   300     uint64_t rip; 
   301   }
   302   AMD64Regs;
   304 typedef
   305   struct {
   306     uint32_t r15;
   307     uint32_t r14;
   308     uint32_t r13;
   309     uint32_t r12;
   310     uint32_t r11;
   311     uint32_t r7;
   312   }
   313   ARMRegs;
   315 typedef
   316   struct {
   317     uint32_t esp;
   318     uint32_t ebp;
   319     uint32_t eip;
   320   }
   321   X86Regs;
   323 #if defined(SPS_ARCH_amd64)
   324 typedef  AMD64Regs  ArchRegs;
   325 #elif defined(SPS_ARCH_arm)
   326 typedef  ARMRegs  ArchRegs;
   327 #elif defined(SPS_ARCH_x86)
   328 typedef  X86Regs  ArchRegs;
   329 #else
   330 # error "Unknown plat"
   331 #endif
   333 #if defined(SPS_ARCH_amd64) || defined(SPS_ARCH_arm) || defined(SPS_ARCH_x86)
   334 # define SPS_PAGE_SIZE 4096
   335 #else
   336 # error "Unknown plat"
   337 #endif
   339 typedef  enum { S_EMPTY, S_FILLING, S_EMPTYING, S_FULL }  State;
   341 typedef  struct { uintptr_t val; }  SpinLock;
   343 /* CONFIGURABLE */
   344 /* The number of fixed ProfileEntry slots.  If more are required, they
   345    are placed in mmap'd pages. */
   346 #define N_FIXED_PROF_ENTS 20
   348 /* CONFIGURABLE */
   349 /* The number of extra pages of ProfileEntries.  If (on arm) each
   350    ProfileEntry is 8 bytes, then a page holds 512, and so 100 pages
   351    is enough to hold 51200. */
   352 #define N_PROF_ENT_PAGES 100
   354 /* DERIVATIVE */
   355 #define N_PROF_ENTS_PER_PAGE (SPS_PAGE_SIZE / sizeof(ProfileEntry))
   357 /* A page of ProfileEntrys.  This might actually be slightly smaller
   358    than a page if SPS_PAGE_SIZE is not an exact multiple of
   359    sizeof(ProfileEntry). */
   360 typedef
   361   struct { ProfileEntry ents[N_PROF_ENTS_PER_PAGE]; }
   362   ProfEntsPage;
   364 #define ProfEntsPage_INVALID ((ProfEntsPage*)1)
   367 /* Fields protected by the spinlock are marked SL */
   369 struct _UnwinderThreadBuffer {
   370   /*SL*/ State  state;
   371   /* The rest of these are protected, in some sense, by ::state.  If
   372      ::state is S_FILLING, they are 'owned' by the sampler thread
   373      that set the state to S_FILLING.  If ::state is S_EMPTYING,
   374      they are 'owned' by the unwinder thread that set the state to
   375      S_EMPTYING.  If ::state is S_EMPTY or S_FULL, the buffer isn't
   376      owned by any thread, and so no thread may access these
   377      fields. */
   378   /* Sample number, needed to process samples in order */
   379   uint64_t       seqNo;
   380   /* The ThreadProfile into which the results are eventually to be
   381      dumped. */
   382   ThreadProfile* aProfile;
   383   /* Pseudostack and other info, always present */
   384   ProfileEntry   entsFixed[N_FIXED_PROF_ENTS];
   385   ProfEntsPage*  entsPages[N_PROF_ENT_PAGES];
   386   uintptr_t      entsUsed;
   387   /* Do we also have data to do a native unwind? */
   388   bool           haveNativeInfo;
   389   /* If so, here is the register state and stack.  Unset if
   390      .haveNativeInfo is false. */
   391   lul::UnwindRegs startRegs;
   392   lul::StackImage stackImg;
   393   void* stackMaxSafe; /* Address for max safe stack reading. */
   394 };
   395 /* Indexing scheme for ents:
   396      0 <= i < N_FIXED_PROF_ENTS
   397        is at entsFixed[i]
   399      i >= N_FIXED_PROF_ENTS
   400        is at let j = i - N_FIXED_PROF_ENTS
   401              in  entsPages[j / N_PROFENTS_PER_PAGE]
   402                   ->ents[j % N_PROFENTS_PER_PAGE]
   404    entsPages[] are allocated on demand.  Because zero can
   405    theoretically be a valid page pointer, use 
   406    ProfEntsPage_INVALID == (ProfEntsPage*)1 to mark invalid pages.
   408    It follows that the max entsUsed value is N_FIXED_PROF_ENTS +
   409    N_PROFENTS_PER_PAGE * N_PROFENTS_PAGES, and at that point no more
   410    ProfileEntries can be storedd.
   411 */
   414 typedef
   415   struct {
   416     pthread_t thrId;
   417     void*     stackTop;
   418     uint64_t  nSamples; 
   419   }
   420   StackLimit;
   422 /* Globals -- the buffer array */
   423 #define N_UNW_THR_BUFFERS 10
   424 /*SL*/ static UnwinderThreadBuffer** g_buffers     = nullptr;
   425 /*SL*/ static uint64_t               g_seqNo       = 0;
   426 /*SL*/ static SpinLock               g_spinLock    = { 0 };
   428 /* Globals -- the thread array.  The array is dynamically expanded on
   429    demand.  The spinlock must be held when accessing g_stackLimits,
   430    g_stackLimits[some index], g_stackLimitsUsed and g_stackLimitsSize.
   431    However, the spinlock must not be held when calling malloc to
   432    allocate or expand the array, as that would risk deadlock against a
   433    sampling thread that holds the malloc lock and is trying to acquire
   434    the spinlock. */
   435 /*SL*/ static StackLimit* g_stackLimits     = nullptr;
   436 /*SL*/ static size_t      g_stackLimitsUsed = 0;
   437 /*SL*/ static size_t      g_stackLimitsSize = 0;
   439 /* Stats -- atomically incremented, no lock needed */
   440 static uintptr_t g_stats_totalSamples = 0; // total # sample attempts
   441 static uintptr_t g_stats_noBuffAvail  = 0; // # failed due to no buffer avail
   442 static uintptr_t g_stats_thrUnregd    = 0; // # failed due to unregistered thr
   444 /* We must be VERY CAREFUL what we do with the spinlock held.  The
   445    only thing it is safe to do with it held is modify (viz, read or
   446    write) g_buffers, g_buffers[], g_seqNo, g_buffers[]->state,
   447    g_stackLimits, g_stackLimits[], g_stackLimitsUsed and
   448    g_stackLimitsSize.  No arbitrary computations, no syscalls, no
   449    printfs, no file IO, and absolutely no dynamic memory allocation
   450    (else we WILL eventually deadlock).
   452    This applies both to the signal handler and to the unwinder thread.
   453 */
   455 //// END type UnwindThreadBuffer
   456 //////////////////////////////////////////////////////////
   458 // This is the interface to LUL.
   459 typedef  struct { u_int64_t pc; u_int64_t sp; }  PCandSP;
   461 // Forward declaration.  Implementation is below.
   462 static
   463 void do_lul_unwind_Buffer(/*OUT*/PCandSP** pairs,
   464                           /*OUT*/unsigned int* nPairs,
   465                           UnwinderThreadBuffer* buff,
   466                           int buffNo /* for debug printing only */);
   468 static bool is_page_aligned(void* v)
   469 {
   470   uintptr_t w = (uintptr_t) v;
   471   return (w & (SPS_PAGE_SIZE-1)) == 0  ? true  : false;
   472 }
   475 /* Implement machine-word sized atomic compare-and-swap.  Returns true
   476    if success, false if failure. */
   477 static bool do_CASW(uintptr_t* addr, uintptr_t expected, uintptr_t nyu)
   478 {
   479 #if defined(__GNUC__)
   480   return __sync_bool_compare_and_swap(addr, expected, nyu);
   481 #else
   482 # error "Unhandled compiler"
   483 #endif
   484 }
   486 /* Hint to the CPU core that we are in a spin-wait loop, and that
   487    other processors/cores/threads-running-on-the-same-core should be
   488    given priority on execute resources, if that is possible.  Not
   489    critical if this is a no-op on some targets. */
   490 static void do_SPINLOOP_RELAX()
   491 {
   492 #if (defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)) && defined(__GNUC__)
   493   __asm__ __volatile__("rep; nop");
   494 #elif defined(SPS_PLAT_arm_android) && MOZILLA_ARM_ARCH >= 7
   495   __asm__ __volatile__("wfe");
   496 #endif
   497 }
   499 /* Tell any cores snoozing in spin loops to wake up. */
   500 static void do_SPINLOOP_NUDGE()
   501 {
   502 #if (defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)) && defined(__GNUC__)
   503   /* this is a no-op */
   504 #elif defined(SPS_PLAT_arm_android) && MOZILLA_ARM_ARCH >= 7
   505   __asm__ __volatile__("sev");
   506 #endif
   507 }
   509 /* Perform a full memory barrier. */
   510 static void do_MBAR()
   511 {
   512 #if defined(__GNUC__)
   513   __sync_synchronize();
   514 #else
   515 # error "Unhandled compiler"
   516 #endif
   517 }
   519 static void spinLock_acquire(SpinLock* sl)
   520 {
   521   uintptr_t* val = &sl->val;
   522   VALGRIND_HG_MUTEX_LOCK_PRE(sl, 0/*!isTryLock*/);
   523   while (1) {
   524     bool ok = do_CASW( val, 0, 1 );
   525     if (ok) break;
   526     do_SPINLOOP_RELAX();
   527   }
   528   do_MBAR();
   529   VALGRIND_HG_MUTEX_LOCK_POST(sl);
   530 }
   532 static void spinLock_release(SpinLock* sl)
   533 {
   534   uintptr_t* val = &sl->val;
   535   VALGRIND_HG_MUTEX_UNLOCK_PRE(sl);
   536   do_MBAR();
   537   bool ok = do_CASW( val, 1, 0 );
   538   /* This must succeed at the first try.  To fail would imply that
   539      the lock was unheld. */
   540   MOZ_ALWAYS_TRUE(ok);
   541   do_SPINLOOP_NUDGE();
   542   VALGRIND_HG_MUTEX_UNLOCK_POST(sl);
   543 }
   545 static void sleep_ms(unsigned int ms)
   546 {
   547   struct timespec req;
   548   req.tv_sec = ((time_t)ms) / 1000;
   549   req.tv_nsec = 1000 * 1000 * (((unsigned long)ms) % 1000);
   550   nanosleep(&req, nullptr);
   551 }
   553 /* Use CAS to implement standalone atomic increment. */
   554 static void atomic_INC(uintptr_t* loc)
   555 {
   556   while (1) {
   557     uintptr_t old = *loc;
   558     uintptr_t nyu = old + 1;
   559     bool ok = do_CASW( loc, old, nyu );
   560     if (ok) break;
   561   }
   562 }
   564 // Empties out the buffer queue.
   565 static void empty_buffer_queue()
   566 {
   567   spinLock_acquire(&g_spinLock);
   569   UnwinderThreadBuffer** tmp_g_buffers = g_buffers;
   570   g_stackLimitsUsed = 0;
   571   g_seqNo = 0;
   572   g_buffers = nullptr;
   574   spinLock_release(&g_spinLock);
   576   // Can't do any malloc/free when holding the spinlock.
   577   free(tmp_g_buffers);
   579   // We could potentially free up g_stackLimits; but given the
   580   // complications above involved in resizing it, it's probably
   581   // safer just to leave it in place.
   582 }
   585 // Registers a thread for profiling.  Detects and ignores duplicate
   586 // registration.
   587 static void thread_register_for_profiling(void* stackTop)
   588 {
   589   pthread_t me = pthread_self();
   591   spinLock_acquire(&g_spinLock);
   593   // tmp copy of g_stackLimitsUsed, to avoid racing in message printing
   594   int n_used;
   596   // Ignore spurious calls which aren't really registering anything.
   597   if (stackTop == nullptr) {
   598     n_used = g_stackLimitsUsed;
   599     spinLock_release(&g_spinLock);
   600     LOGF("BPUnw: [%d total] thread_register_for_profiling"
   601          "(me=%p, stacktop=NULL) (IGNORED)", n_used, (void*)me);
   602     return;
   603   }
   605   /* Minimal sanity check on stackTop */
   606   MOZ_ASSERT((void*)&n_used/*any auto var will do*/ < stackTop);
   608   bool is_dup = false;
   609   for (size_t i = 0; i < g_stackLimitsUsed; i++) {
   610     if (g_stackLimits[i].thrId == me) {
   611       is_dup = true;
   612       break;
   613     }
   614   }
   616   if (is_dup) {
   617     /* It's a duplicate registration.  Ignore it: drop the lock and
   618        return. */
   619     n_used = g_stackLimitsUsed;
   620     spinLock_release(&g_spinLock);
   622     LOGF("BPUnw: [%d total] thread_register_for_profiling"
   623          "(me=%p, stacktop=%p) (DUPLICATE)", n_used, (void*)me, stackTop);
   624     return;
   625   }
   627   /* Make sure the g_stackLimits array is large enough to accommodate
   628      this new entry.  This is tricky.  If it isn't large enough, we
   629      can malloc a larger version, but we have to do that without
   630      holding the spinlock, else we risk deadlock.  The deadlock
   631      scenario is:
   633      Some other thread that is being sampled
   634                                         This thread
   636      call malloc                        call this function
   637      acquire malloc lock                acquire the spinlock
   638      (sampling signal)                  discover thread array not big enough,
   639      call uwt__acquire_empty_buffer       call malloc to make it larger
   640      acquire the spinlock               acquire malloc lock
   642      This gives an inconsistent lock acquisition order on the malloc
   643      lock and spinlock, hence risk of deadlock.
   645      Allocating more space for the array without holding the spinlock
   646      implies tolerating races against other thread(s) who are also
   647      trying to expand the array.  How can we detect if we have been
   648      out-raced?  Every successful expansion of g_stackLimits[] results
   649      in an increase in g_stackLimitsSize.  Hence we can detect if we
   650      got out-raced by remembering g_stackLimitsSize before we dropped
   651      the spinlock and checking if it has changed after the spinlock is
   652      reacquired. */
   654   MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize);
   656   if (g_stackLimitsUsed == g_stackLimitsSize) {
   657     /* g_stackLimits[] is full; resize it. */
   659     size_t old_size = g_stackLimitsSize;
   660     size_t new_size = old_size == 0 ? 4 : (2 * old_size);
   662     spinLock_release(&g_spinLock);
   663     StackLimit* new_arr  = (StackLimit*)malloc(new_size * sizeof(StackLimit));
   664     if (!new_arr)
   665       return;
   667     spinLock_acquire(&g_spinLock);
   669     if (old_size != g_stackLimitsSize) {
   670       /* We've been outraced.  Instead of trying to deal in-line with
   671          this extremely rare case, just start all over again by
   672          tail-calling this routine. */
   673       spinLock_release(&g_spinLock);
   674       free(new_arr);
   675       thread_register_for_profiling(stackTop);
   676       return;
   677     }
   679     memcpy(new_arr, g_stackLimits, old_size * sizeof(StackLimit));
   680     if (g_stackLimits)
   681       free(g_stackLimits);
   683     g_stackLimits = new_arr;
   685     MOZ_ASSERT(g_stackLimitsSize < new_size);
   686     g_stackLimitsSize = new_size;
   687   }
   689   MOZ_ASSERT(g_stackLimitsUsed < g_stackLimitsSize);
   691   /* Finally, we have a safe place to put the new entry. */
   693   // Round |stackTop| up to the end of the containing page.  We may
   694   // as well do this -- there's no danger of a fault, and we might
   695   // get a few more base-of-the-stack frames as a result.  This
   696   // assumes that no target has a page size smaller than 4096.
   697   uintptr_t stackTopR = (uintptr_t)stackTop;
   698   stackTopR = (stackTopR & ~(uintptr_t)4095) + (uintptr_t)4095;
   700   g_stackLimits[g_stackLimitsUsed].thrId    = me;
   701   g_stackLimits[g_stackLimitsUsed].stackTop = (void*)stackTopR;
   702   g_stackLimits[g_stackLimitsUsed].nSamples = 0;
   703   g_stackLimitsUsed++;
   705   n_used = g_stackLimitsUsed;
   706   spinLock_release(&g_spinLock);
   708   LOGF("BPUnw: [%d total] thread_register_for_profiling"
   709        "(me=%p, stacktop=%p)", n_used, (void*)me, stackTop);
   710 }
   712 // Deregisters a thread from profiling.  Detects and ignores attempts
   713 // to deregister a not-registered thread.
   714 static void thread_unregister_for_profiling()
   715 {
   716   spinLock_acquire(&g_spinLock);
   718   // tmp copy of g_stackLimitsUsed, to avoid racing in message printing
   719   size_t n_used;
   721   size_t i;
   722   bool found = false;
   723   pthread_t me = pthread_self();
   724   for (i = 0; i < g_stackLimitsUsed; i++) {
   725     if (g_stackLimits[i].thrId == me)
   726       break;
   727   }
   728   if (i < g_stackLimitsUsed) {
   729     // found this entry.  Slide the remaining ones down one place.
   730     for (; i+1 < g_stackLimitsUsed; i++) {
   731       g_stackLimits[i] = g_stackLimits[i+1];
   732     }
   733     g_stackLimitsUsed--;
   734     found = true;
   735   }
   737   n_used = g_stackLimitsUsed;
   739   spinLock_release(&g_spinLock);
   740   LOGF("BPUnw: [%d total] thread_unregister_for_profiling(me=%p) %s", 
   741        (int)n_used, (void*)me, found ? "" : " (NOT REGISTERED) ");
   742 }
   745 __attribute__((unused))
   746 static void show_registered_threads()
   747 {
   748   size_t i;
   749   spinLock_acquire(&g_spinLock);
   750   for (i = 0; i < g_stackLimitsUsed; i++) {
   751     LOGF("[%d]  pthread_t=%p  nSamples=%lld",
   752          (int)i, (void*)g_stackLimits[i].thrId, 
   753                  (unsigned long long int)g_stackLimits[i].nSamples);
   754   }
   755   spinLock_release(&g_spinLock);
   756 }
   758 // RUNS IN SIGHANDLER CONTEXT
   759 /* The calling thread owns the buffer, as denoted by its state being
   760    S_FILLING.  So we can mess with it without further locking. */
   761 static void init_empty_buffer(UnwinderThreadBuffer* buff, void* stackTop)
   762 {
   763   /* Now we own the buffer, initialise it. */
   764   buff->aProfile            = nullptr;
   765   buff->entsUsed            = 0;
   766   buff->haveNativeInfo      = false;
   767   buff->stackImg.mLen       = 0;
   768   buff->stackImg.mStartAvma = 0;
   769   buff->stackMaxSafe        = stackTop; /* We will need this in
   770                                            release_full_buffer() */
   771   for (size_t i = 0; i < N_PROF_ENT_PAGES; i++)
   772     buff->entsPages[i] = ProfEntsPage_INVALID;
   773 }
   775 struct SyncUnwinderThreadBuffer : public LinkedUWTBuffer
   776 {
   777   UnwinderThreadBuffer* GetBuffer()
   778   {
   779     return &mBuff;
   780   }
   782   UnwinderThreadBuffer  mBuff;
   783 };
   785 static LinkedUWTBuffer* acquire_sync_buffer(void* stackTop)
   786 {
   787   MOZ_ASSERT(stackTop);
   788   SyncUnwinderThreadBuffer* buff = new SyncUnwinderThreadBuffer();
   789   // We can set state without locking here because this thread owns the buffer
   790   // and it is going to fill it itself.
   791   buff->GetBuffer()->state = S_FILLING;
   792   init_empty_buffer(buff->GetBuffer(), stackTop);
   793   return buff;
   794 }
   796 // RUNS IN SIGHANDLER CONTEXT
   797 static UnwinderThreadBuffer* acquire_empty_buffer()
   798 {
   799   /* acq lock
   800      if buffers == nullptr { rel lock; exit }
   801      scan to find a free buff; if none { rel lock; exit }
   802      set buff state to S_FILLING
   803      fillseqno++; and remember it
   804      rel lock
   805   */
   806   size_t i;
   808   atomic_INC( &g_stats_totalSamples );
   810   /* This code is critical.  We are in a signal handler and possibly
   811      with the malloc lock held.  So we can't allocate any heap, and
   812      can't safely call any C library functions, not even the pthread_
   813      functions.  And we certainly can't do any syscalls.  In short,
   814      this function needs to be self contained, not do any allocation,
   815      and not hold on to the spinlock for any significant length of
   816      time. */
   818   spinLock_acquire(&g_spinLock);
   820   /* First of all, look for this thread's entry in g_stackLimits[].
   821      We need to find it in order to figure out how much stack we can
   822      safely copy into the sample.  This assumes that pthread_self()
   823      is safe to call in a signal handler, which strikes me as highly
   824      likely. */
   825   pthread_t me = pthread_self();
   826   MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize);
   827   for (i = 0; i < g_stackLimitsUsed; i++) {
   828     if (g_stackLimits[i].thrId == me)
   829       break;
   830   }
   832   /* If the thread isn't registered for profiling, just ignore the call
   833      and return nullptr. */
   834   if (i == g_stackLimitsUsed) {
   835     spinLock_release(&g_spinLock);
   836     atomic_INC( &g_stats_thrUnregd );
   837     return nullptr;
   838   }
   840   /* "this thread is registered for profiling" */
   841   MOZ_ASSERT(i < g_stackLimitsUsed);
   843   /* The furthest point that we can safely scan back up the stack. */
   844   void* myStackTop = g_stackLimits[i].stackTop;
   845   g_stackLimits[i].nSamples++;
   847   /* Try to find a free buffer to use. */
   848   if (g_buffers == nullptr) {
   849     /* The unwinder thread hasn't allocated any buffers yet.
   850        Nothing we can do. */
   851     spinLock_release(&g_spinLock);
   852     atomic_INC( &g_stats_noBuffAvail );
   853     return nullptr;
   854   }
   856   for (i = 0; i < N_UNW_THR_BUFFERS; i++) {
   857     if (g_buffers[i]->state == S_EMPTY)
   858       break;
   859   }
   860   MOZ_ASSERT(i <= N_UNW_THR_BUFFERS);
   862   if (i == N_UNW_THR_BUFFERS) {
   863     /* Again, no free buffers .. give up. */
   864     spinLock_release(&g_spinLock);
   865     atomic_INC( &g_stats_noBuffAvail );
   866     if (LOGLEVEL >= 3)
   867       LOG("BPUnw: handler:  no free buffers");
   868     return nullptr;
   869   }
   871   /* So we can use this one safely.  Whilst still holding the lock,
   872      mark the buffer as belonging to us, and increment the sequence
   873      number. */
   874   UnwinderThreadBuffer* buff = g_buffers[i];
   875   MOZ_ASSERT(buff->state == S_EMPTY);
   876   buff->state = S_FILLING;
   877   buff->seqNo = g_seqNo;
   878   g_seqNo++;
   880   /* And drop the lock.  We own the buffer, so go on and fill it. */
   881   spinLock_release(&g_spinLock);
   883   /* Now we own the buffer, initialise it. */
   884   init_empty_buffer(buff, myStackTop);
   885   return buff;
   886 }
   888 // RUNS IN SIGHANDLER CONTEXT
   889 /* The calling thread owns the buffer, as denoted by its state being
   890    S_FILLING.  So we can mess with it without further locking. */
   891 static void fill_buffer(ThreadProfile* aProfile,
   892                         UnwinderThreadBuffer* buff,
   893                         void* /* ucontext_t*, really */ ucV)
   894 {
   895   MOZ_ASSERT(buff->state == S_FILLING);
   897   ////////////////////////////////////////////////////
   898   // BEGIN fill
   900   /* The buffer already will have some of its ProfileEntries filled
   901      in, but everything else needs to be filled in at this point. */
   902   //LOGF("Release full buffer: %lu ents", buff->entsUsed);
   903   /* Where the resulting info is to be dumped */
   904   buff->aProfile = aProfile;
   906   /* And, if we have register state, that and the stack top */
   907   buff->haveNativeInfo = ucV != nullptr;
   908   if (buff->haveNativeInfo) {
   909 #   if defined(SPS_PLAT_amd64_linux)
   910     ucontext_t* uc = (ucontext_t*)ucV;
   911     mcontext_t* mc = &(uc->uc_mcontext);
   912     buff->startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
   913     buff->startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
   914     buff->startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
   915 #   elif defined(SPS_PLAT_amd64_darwin)
   916     ucontext_t* uc = (ucontext_t*)ucV;
   917     struct __darwin_mcontext64* mc = uc->uc_mcontext;
   918     struct __darwin_x86_thread_state64* ss = &mc->__ss;
   919     buff->regs.rip = ss->__rip;
   920     buff->regs.rsp = ss->__rsp;
   921     buff->regs.rbp = ss->__rbp;
   922 #   elif defined(SPS_PLAT_arm_android)
   923     ucontext_t* uc = (ucontext_t*)ucV;
   924     mcontext_t* mc = &(uc->uc_mcontext);
   925     buff->startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
   926     buff->startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
   927     buff->startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
   928     buff->startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
   929     buff->startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
   930     buff->startRegs.r7  = lul::TaggedUWord(mc->arm_r7);
   931 #   elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android)
   932     ucontext_t* uc = (ucontext_t*)ucV;
   933     mcontext_t* mc = &(uc->uc_mcontext);
   934     buff->startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
   935     buff->startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
   936     buff->startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
   937 #   elif defined(SPS_PLAT_x86_darwin)
   938     ucontext_t* uc = (ucontext_t*)ucV;
   939     struct __darwin_mcontext32* mc = uc->uc_mcontext;
   940     struct __darwin_i386_thread_state* ss = &mc->__ss;
   941     buff->regs.eip = ss->__eip;
   942     buff->regs.esp = ss->__esp;
   943     buff->regs.ebp = ss->__ebp;
   944 #   else
   945 #     error "Unknown plat"
   946 #   endif
   948     /* Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not
   949        going past the stack's registered top point.  Do some basic
   950        sanity checks too.  This assumes that the TaggedUWord holding
   951        the stack pointer value is valid, but it should be, since it
   952        was constructed that way in the code just above. */
   953     { 
   954 #     if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_amd64_darwin)
   955       uintptr_t rEDZONE_SIZE = 128;
   956       uintptr_t start = buff->startRegs.xsp.Value() - rEDZONE_SIZE;
   957 #     elif defined(SPS_PLAT_arm_android)
   958       uintptr_t rEDZONE_SIZE = 0;
   959       uintptr_t start = buff->startRegs.r13.Value() - rEDZONE_SIZE;
   960 #     elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_darwin) \
   961            || defined(SPS_PLAT_x86_android)
   962       uintptr_t rEDZONE_SIZE = 0;
   963       uintptr_t start = buff->startRegs.xsp.Value() - rEDZONE_SIZE;
   964 #     else
   965 #       error "Unknown plat"
   966 #     endif
   967       uintptr_t end   = (uintptr_t)buff->stackMaxSafe;
   968       uintptr_t ws    = sizeof(void*);
   969       start &= ~(ws-1);
   970       end   &= ~(ws-1);
   971       uintptr_t nToCopy = 0;
   972       if (start < end) {
   973         nToCopy = end - start;
   974         if (nToCopy > lul::N_STACK_BYTES)
   975           nToCopy = lul::N_STACK_BYTES;
   976       }
   977       MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
   978       buff->stackImg.mLen       = nToCopy;
   979       buff->stackImg.mStartAvma = start;
   980       if (nToCopy > 0) {
   981         memcpy(&buff->stackImg.mContents[0], (void*)start, nToCopy);
   982         (void)VALGRIND_MAKE_MEM_DEFINED(&buff->stackImg.mContents[0], nToCopy);
   983       }
   984     }
   985   } /* if (buff->haveNativeInfo) */
   986   // END fill
   987   ////////////////////////////////////////////////////
   988 }
   990 // RUNS IN SIGHANDLER CONTEXT
   991 /* The calling thread owns the buffer, as denoted by its state being
   992    S_FILLING.  So we can mess with it without further locking. */
   993 static void release_full_buffer(ThreadProfile* aProfile,
   994                                 UnwinderThreadBuffer* buff,
   995                                 void* /* ucontext_t*, really */ ucV )
   996 {
   997   fill_buffer(aProfile, buff, ucV);
   998   /* And now relinquish ownership of the buff, so that an unwinder
   999      thread can pick it up. */
  1000   spinLock_acquire(&g_spinLock);
  1001   buff->state = S_FULL;
  1002   spinLock_release(&g_spinLock);
  1005 // RUNS IN SIGHANDLER CONTEXT
  1006 // Allocate a ProfEntsPage, without using malloc, or return
  1007 // ProfEntsPage_INVALID if we can't for some reason.
  1008 static ProfEntsPage* mmap_anon_ProfEntsPage()
  1010 # if defined(SPS_OS_darwin)
  1011   void* v = ::mmap(nullptr, sizeof(ProfEntsPage), PROT_READ | PROT_WRITE, 
  1012                    MAP_PRIVATE | MAP_ANON,      -1, 0);
  1013 # else
  1014   void* v = ::mmap(nullptr, sizeof(ProfEntsPage), PROT_READ | PROT_WRITE, 
  1015                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  1016 # endif
  1017   if (v == MAP_FAILED) {
  1018     return ProfEntsPage_INVALID;
  1019   } else {
  1020     return (ProfEntsPage*)v;
  1024 // Runs in the unwinder thread
  1025 // Free a ProfEntsPage as allocated by mmap_anon_ProfEntsPage
  1026 static void munmap_ProfEntsPage(ProfEntsPage* pep)
  1028   MOZ_ALWAYS_TRUE(is_page_aligned(pep));
  1029   ::munmap(pep, sizeof(ProfEntsPage));
  1033 // RUNS IN SIGHANDLER CONTEXT
  1034 void
  1035 utb_add_prof_ent(/*MODIFIED*/UnwinderThreadBuffer* utb, ProfileEntry ent)
  1037   uintptr_t limit
  1038     = N_FIXED_PROF_ENTS + (N_PROF_ENTS_PER_PAGE * N_PROF_ENT_PAGES);
  1039   if (utb->entsUsed == limit) {
  1040     /* We're full.  Now what? */
  1041     LOG("BPUnw: utb__addEntry: NO SPACE for ProfileEntry; ignoring.");
  1042     return;
  1044   MOZ_ASSERT(utb->entsUsed < limit);
  1046   /* Will it fit in the fixed array? */
  1047   if (utb->entsUsed < N_FIXED_PROF_ENTS) {
  1048     utb->entsFixed[utb->entsUsed] = ent;
  1049     utb->entsUsed++;
  1050     return;
  1053   /* No.  Put it in the extras. */
  1054   uintptr_t i     = utb->entsUsed;
  1055   uintptr_t j     = i - N_FIXED_PROF_ENTS;
  1056   uintptr_t j_div = j / N_PROF_ENTS_PER_PAGE; /* page number */
  1057   uintptr_t j_mod = j % N_PROF_ENTS_PER_PAGE; /* page offset */
  1058   ProfEntsPage* pep = utb->entsPages[j_div];
  1059   if (pep == ProfEntsPage_INVALID) {
  1060     pep = mmap_anon_ProfEntsPage();
  1061     if (pep == ProfEntsPage_INVALID) {
  1062       /* Urr, we ran out of memory.  Now what? */
  1063       LOG("BPUnw: utb__addEntry: MMAP FAILED for ProfileEntry; ignoring.");
  1064       return;
  1066     utb->entsPages[j_div] = pep;
  1068   pep->ents[j_mod] = ent;
  1069   utb->entsUsed++;
  1073 // misc helper
  1074 static ProfileEntry utb_get_profent(UnwinderThreadBuffer* buff, uintptr_t i)
  1076   MOZ_ASSERT(i < buff->entsUsed);
  1077   if (i < N_FIXED_PROF_ENTS) {
  1078     return buff->entsFixed[i];
  1079   } else {
  1080     uintptr_t j     = i - N_FIXED_PROF_ENTS;
  1081     uintptr_t j_div = j / N_PROF_ENTS_PER_PAGE; /* page number */
  1082     uintptr_t j_mod = j % N_PROF_ENTS_PER_PAGE; /* page offset */
  1083     MOZ_ASSERT(buff->entsPages[j_div] != ProfEntsPage_INVALID);
  1084     return buff->entsPages[j_div]->ents[j_mod];
  1088 /* Copy ProfileEntries presented to us by the sampling thread.
  1089    Most of them are copied verbatim into |buff->aProfile|,
  1090    except for 'hint' tags, which direct us to do something
  1091    different. */
  1092 static void process_buffer(UnwinderThreadBuffer* buff, int oldest_ix)
  1094   /* Need to lock |aProfile| so nobody tries to copy out entries
  1095      whilst we are putting them in. */
  1096   buff->aProfile->BeginUnwind();
  1098   /* The buff is a sequence of ProfileEntries (ents).  It has
  1099      this grammar:
  1101      | --pre-tags-- | (h 'P' .. h 'Q')* | --post-tags-- |
  1102                       ^               ^
  1103                       ix_first_hP     ix_last_hQ
  1105      Each (h 'P' .. h 'Q') subsequence represents one pseudostack
  1106      entry.  These, if present, are in the order
  1107      outermost-frame-first, and that is the order that they should
  1108      be copied into aProfile.  The --pre-tags-- and --post-tags--
  1109      are to be copied into the aProfile verbatim, except that they
  1110      may contain the hints "h 'F'" for a flush and "h 'N'" to
  1111      indicate that a native unwind is also required, and must be
  1112      interleaved with the pseudostack entries.
  1114      The hint tags that bound each pseudostack entry, "h 'P'" and "h
  1115      'Q'", are not to be copied into the aProfile -- they are
  1116      present only to make parsing easy here.  Also, the pseudostack
  1117      entries may contain an "'S' (void*)" entry, which is the stack
  1118      pointer value for that entry, and these are also not to be
  1119      copied.
  1120   */
  1121   /* The first thing to do is therefore to find the pseudostack
  1122      entries, if any, and to find out also whether a native unwind
  1123      has been requested. */
  1124   const uintptr_t infUW = ~(uintptr_t)0; // infinity
  1125   bool  need_native_unw = false;
  1126   uintptr_t ix_first_hP = infUW; // "not found"
  1127   uintptr_t ix_last_hQ  = infUW; // "not found"
  1129   uintptr_t k;
  1130   for (k = 0; k < buff->entsUsed; k++) {
  1131     ProfileEntry ent = utb_get_profent(buff, k);
  1132     if (ent.is_ent_hint('N')) {
  1133       need_native_unw = true;
  1135     else if (ent.is_ent_hint('P') && ix_first_hP == ~(uintptr_t)0) {
  1136       ix_first_hP = k;
  1138     else if (ent.is_ent_hint('Q')) {
  1139       ix_last_hQ = k;
  1143   if (0) LOGF("BPUnw: ix_first_hP %llu  ix_last_hQ %llu  need_native_unw %llu",
  1144               (unsigned long long int)ix_first_hP,
  1145               (unsigned long long int)ix_last_hQ,
  1146               (unsigned long long int)need_native_unw);
  1148   /* There are four possibilities: native-only, pseudostack-only,
  1149      combined (both), and neither.  We handle all four cases. */
  1151   MOZ_ASSERT( (ix_first_hP == infUW && ix_last_hQ == infUW) ||
  1152               (ix_first_hP != infUW && ix_last_hQ != infUW) );
  1153   bool have_P = ix_first_hP != infUW;
  1154   if (have_P) {
  1155     MOZ_ASSERT(ix_first_hP < ix_last_hQ);
  1156     MOZ_ASSERT(ix_last_hQ <= buff->entsUsed);
  1159   /* Neither N nor P.  This is very unusual but has been observed to happen.
  1160      Just copy to the output. */
  1161   if (!need_native_unw && !have_P) {
  1162     for (k = 0; k < buff->entsUsed; k++) {
  1163       ProfileEntry ent = utb_get_profent(buff, k);
  1164       // action flush-hints
  1165       if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; }
  1166       // skip ones we can't copy
  1167       if (ent.is_ent_hint() || ent.is_ent('S')) { continue; }
  1168       // handle GetBacktrace()
  1169       if (ent.is_ent('B')) {
  1170         UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr();
  1171         process_buffer(buff, -1);
  1172         continue;
  1174       // and copy everything else
  1175       buff->aProfile->addTag( ent );
  1178   else /* Native only-case. */
  1179   if (need_native_unw && !have_P) {
  1180     for (k = 0; k < buff->entsUsed; k++) {
  1181       ProfileEntry ent = utb_get_profent(buff, k);
  1182       // action a native-unwind-now hint
  1183       if (ent.is_ent_hint('N')) {
  1184         MOZ_ASSERT(buff->haveNativeInfo);
  1185         PCandSP* pairs = nullptr;
  1186         unsigned int nPairs = 0;
  1187         do_lul_unwind_Buffer(&pairs, &nPairs, buff, oldest_ix);
  1188         buff->aProfile->addTag( ProfileEntry('s', "(root)") );
  1189         for (unsigned int i = 0; i < nPairs; i++) {
  1190           /* Skip any outermost frames that
  1191              do_lul_unwind_Buffer didn't give us.  See comments
  1192              on that function for details. */
  1193           if (pairs[i].pc == 0 && pairs[i].sp == 0)
  1194             continue;
  1195           buff->aProfile
  1196               ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[i].pc)) );
  1198         if (pairs)
  1199           free(pairs);
  1200         continue;
  1202       // action flush-hints
  1203       if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; }
  1204       // skip ones we can't copy
  1205       if (ent.is_ent_hint() || ent.is_ent('S')) { continue; }
  1206       // handle GetBacktrace()
  1207       if (ent.is_ent('B')) {
  1208         UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr();
  1209         process_buffer(buff, -1);
  1210         continue;
  1212       // and copy everything else
  1213       buff->aProfile->addTag( ent );
  1216   else /* Pseudostack-only case */
  1217   if (!need_native_unw && have_P) {
  1218     /* If there's no request for a native stack, it's easy: just
  1219        copy the tags verbatim into aProfile, skipping the ones that
  1220        can't be copied -- 'h' (hint) tags, and "'S' (void*)"
  1221        stack-pointer tags.  Except, insert a sample-start tag when
  1222        we see the start of the first pseudostack frame. */
  1223     for (k = 0; k < buff->entsUsed; k++) {
  1224       ProfileEntry ent = utb_get_profent(buff, k);
  1225       // We need to insert a sample-start tag before the first frame
  1226       if (k == ix_first_hP) {
  1227         buff->aProfile->addTag( ProfileEntry('s', "(root)") );
  1229       // action flush-hints
  1230       if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; }
  1231       // skip ones we can't copy
  1232       if (ent.is_ent_hint() || ent.is_ent('S')) { continue; }
  1233       // handle GetBacktrace()
  1234       if (ent.is_ent('B')) {
  1235         UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr();
  1236         process_buffer(buff, -1);
  1237         continue;
  1239       // and copy everything else
  1240       buff->aProfile->addTag( ent );
  1243   else /* Combined case */
  1244   if (need_native_unw && have_P)
  1246     /* We need to get a native stacktrace and merge it with the
  1247        pseudostack entries.  This isn't too simple.  First, copy all
  1248        the tags up to the start of the pseudostack tags.  Then
  1249        generate a combined set of tags by native unwind and
  1250        pseudostack.  Then, copy all the stuff after the pseudostack
  1251        tags. */
  1252     MOZ_ASSERT(buff->haveNativeInfo);
  1254     // Get native unwind info
  1255     PCandSP* pairs = nullptr;
  1256     unsigned int n_pairs = 0;
  1257     do_lul_unwind_Buffer(&pairs, &n_pairs, buff, oldest_ix);
  1259     // Entries before the pseudostack frames
  1260     for (k = 0; k < ix_first_hP; k++) {
  1261       ProfileEntry ent = utb_get_profent(buff, k);
  1262       // action flush-hints
  1263       if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; }
  1264       // skip ones we can't copy
  1265       if (ent.is_ent_hint() || ent.is_ent('S')) { continue; }
  1266       // handle GetBacktrace()
  1267       if (ent.is_ent('B')) {
  1268         UnwinderThreadBuffer* buff = (UnwinderThreadBuffer*)ent.get_tagPtr();
  1269         process_buffer(buff, -1);
  1270         continue;
  1272       // and copy everything else
  1273       buff->aProfile->addTag( ent );
  1276     // BEGIN merge
  1277     buff->aProfile->addTag( ProfileEntry('s', "(root)") );
  1278     unsigned int next_N = 0; // index in pairs[]
  1279     unsigned int next_P = ix_first_hP; // index in buff profent array
  1280     bool last_was_P = false;
  1281     if (0) LOGF("at mergeloop: n_pairs %llu ix_last_hQ %llu",
  1282                 (unsigned long long int)n_pairs,
  1283                 (unsigned long long int)ix_last_hQ);
  1284     /* Skip any outermost frames that do_lul_unwind_Buffer
  1285        didn't give us.  See comments on that function for
  1286        details. */
  1287     while (next_N < n_pairs && pairs[next_N].pc == 0 && pairs[next_N].sp == 0)
  1288       next_N++;
  1290     while (true) {
  1291       if (next_P <= ix_last_hQ) {
  1292         // Assert that next_P points at the start of an P entry
  1293         MOZ_ASSERT(utb_get_profent(buff, next_P).is_ent_hint('P'));
  1295       if (next_N >= n_pairs && next_P > ix_last_hQ) {
  1296         // both stacks empty
  1297         break;
  1299       /* Decide which entry to use next:
  1300          If N is empty, must use P, and vice versa
  1301          else
  1302          If the last was P and current P has zero SP, use P
  1303          else
  1304          we assume that both P and N have valid SP, in which case
  1305             use the one with the larger value
  1306       */
  1307       bool use_P = true;
  1308       if (next_N >= n_pairs) {
  1309         // N empty, use P
  1310         use_P = true;
  1311         if (0) LOG("  P  <=  no remaining N entries");
  1313       else if (next_P > ix_last_hQ) {
  1314         // P empty, use N
  1315         use_P = false;
  1316         if (0) LOG("  N  <=  no remaining P entries");
  1318       else {
  1319         // We have at least one N and one P entry available.
  1320         // Scan forwards to find the SP of the current P entry
  1321         u_int64_t sp_cur_P = 0;
  1322         unsigned int m = next_P + 1;
  1323         while (1) {
  1324           /* This assertion should hold because in a well formed
  1325              input, we must eventually find the hint-Q that marks
  1326              the end of this frame's entries. */
  1327           MOZ_ASSERT(m < buff->entsUsed);
  1328           ProfileEntry ent = utb_get_profent(buff, m);
  1329           if (ent.is_ent_hint('Q'))
  1330             break;
  1331           if (ent.is_ent('S')) {
  1332             sp_cur_P = reinterpret_cast<u_int64_t>(ent.get_tagPtr());
  1333             break;
  1335           m++;
  1337         if (last_was_P && sp_cur_P == 0) {
  1338           if (0) LOG("  P  <=  last_was_P && sp_cur_P == 0");
  1339           use_P = true;
  1340         } else {
  1341           u_int64_t sp_cur_N = pairs[next_N].sp;
  1342           use_P = (sp_cur_P > sp_cur_N);
  1343           if (0) LOGF("  %s  <=  sps P %p N %p",
  1344                       use_P ? "P" : "N", (void*)(intptr_t)sp_cur_P, 
  1345                                          (void*)(intptr_t)sp_cur_N);
  1348       /* So, we know which we are going to use. */
  1349       if (use_P) {
  1350         unsigned int m = next_P + 1;
  1351         while (true) {
  1352           MOZ_ASSERT(m < buff->entsUsed);
  1353           ProfileEntry ent = utb_get_profent(buff, m);
  1354           if (ent.is_ent_hint('Q')) {
  1355             next_P = m + 1;
  1356             break;
  1358           // we don't expect a flush-hint here
  1359           MOZ_ASSERT(!ent.is_ent_hint('F'));
  1360           // skip ones we can't copy
  1361           if (ent.is_ent_hint() || ent.is_ent('S')) { m++; continue; }
  1362           // and copy everything else
  1363           buff->aProfile->addTag( ent );
  1364           m++;
  1366       } else {
  1367         buff->aProfile
  1368             ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[next_N].pc)) );
  1369         next_N++;
  1371       /* Remember what we chose, for next time. */
  1372       last_was_P = use_P;
  1375     MOZ_ASSERT(next_P == ix_last_hQ + 1);
  1376     MOZ_ASSERT(next_N == n_pairs);
  1377     // END merge
  1379     // Entries after the pseudostack frames
  1380     for (k = ix_last_hQ+1; k < buff->entsUsed; k++) {
  1381       ProfileEntry ent = utb_get_profent(buff, k);
  1382       // action flush-hints
  1383       if (ent.is_ent_hint('F')) { buff->aProfile->flush(); continue; }
  1384       // skip ones we can't copy
  1385       if (ent.is_ent_hint() || ent.is_ent('S')) { continue; }
  1386       // and copy everything else
  1387       buff->aProfile->addTag( ent );
  1390     // free native unwind info
  1391     if (pairs)
  1392       free(pairs);
  1395 #if 0
  1396   bool show = true;
  1397   if (show) LOG("----------------");
  1398   for (k = 0; k < buff->entsUsed; k++) {
  1399     ProfileEntry ent = utb_get_profent(buff, k);
  1400     if (show) ent.log();
  1401     if (ent.is_ent_hint('F')) {
  1402       /* This is a flush-hint */
  1403       buff->aProfile->flush();
  1405     else if (ent.is_ent_hint('N')) {
  1406       /* This is a do-a-native-unwind-right-now hint */
  1407       MOZ_ASSERT(buff->haveNativeInfo);
  1408       PCandSP* pairs = nullptr;
  1409       unsigned int nPairs = 0;
  1410       do_lul_unwind_Buffer(&pairs, &nPairs, buff, oldest_ix);
  1411       buff->aProfile->addTag( ProfileEntry('s', "(root)") );
  1412       for (unsigned int i = 0; i < nPairs; i++) {
  1413         buff->aProfile
  1414             ->addTag( ProfileEntry('l', reinterpret_cast<void*>(pairs[i].pc)) );
  1416       if (pairs)
  1417         free(pairs);
  1418     } else {
  1419       /* Copy in verbatim */
  1420       buff->aProfile->addTag( ent );
  1423 #endif
  1425   buff->aProfile->EndUnwind();
  1429 // Find out, in a platform-dependent way, where the code modules got
  1430 // mapped in the process' virtual address space, and get |aLUL| to
  1431 // load unwind info for them.
  1432 void
  1433 read_procmaps(lul::LUL* aLUL)
  1435   MOZ_ASSERT(aLUL->CountMappings() == 0);
  1437 # if defined(SPS_OS_linux) || defined(SPS_OS_android) || defined(SPS_OS_darwin)
  1438   SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
  1440   for (size_t i = 0; i < info.GetSize(); i++) {
  1441     const SharedLibrary& lib = info.GetEntry(i);
  1443 #if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
  1444     // We're using faulty.lib.  Use a special-case object mapper.
  1445     AutoObjectMapperFaultyLib mapper(aLUL->mLog);
  1446 #else
  1447     // We can use the standard POSIX-based mapper.
  1448     AutoObjectMapperPOSIX mapper(aLUL->mLog);
  1449 #endif
  1451     // Ask |mapper| to map the object.  Then hand its mapped address
  1452     // to NotifyAfterMap().
  1453     void*  image = nullptr;
  1454     size_t size  = 0;
  1455     bool ok = mapper.Map(&image, &size, lib.GetName());
  1456     if (ok && image && size > 0) {
  1457       aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd()-lib.GetStart(),
  1458                            lib.GetName().c_str(), image);
  1459     } else if (!ok && lib.GetName() == "") {
  1460       // The object has no name and (as a consequence) the mapper
  1461       // failed to map it.  This happens on Linux, where
  1462       // GetInfoForSelf() produces two such mappings: one for the
  1463       // executable and one for the VDSO.  The executable one isn't a
  1464       // big deal since there's not much interesting code in there,
  1465       // but the VDSO one is a problem on x86-{linux,android} because
  1466       // lack of knowledge about the mapped area inhibits LUL's
  1467       // special __kernel_syscall handling.  Hence notify |aLUL| at
  1468       // least of the mapping, even though it can't read any unwind
  1469       // information for the area.
  1470       aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd()-lib.GetStart());
  1473     // |mapper| goes out of scope at this point and so its destructor
  1474     // unmaps the object.
  1477 # else
  1478 #  error "Unknown platform"
  1479 # endif
  1482 // LUL needs a callback for its logging sink.
  1483 static void
  1484 logging_sink_for_LUL(const char* str) {
  1485   // Ignore any trailing \n, since LOG will add one anyway.
  1486   size_t n = strlen(str);
  1487   if (n > 0 && str[n-1] == '\n') {
  1488     char* tmp = strdup(str);
  1489     tmp[n-1] = 0;
  1490     LOG(tmp);
  1491     free(tmp);
  1492   } else {
  1493     LOG(str);
  1497 // Runs in the unwinder thread -- well, this _is_ the unwinder thread.
  1498 static void* unwind_thr_fn(void* exit_nowV)
  1500   // This is the unwinder thread function.  The first thread in must
  1501   // create the unwinder library and request it to read the debug
  1502   // info.  The last thread out must deallocate the library.  These
  1503   // three tasks (create library, read debuginfo, destroy library) are
  1504   // sequentialised by |sLULmutex|.  |sLUL| and |sLULcount| may only
  1505   // be modified whilst |sLULmutex| is held.
  1506   //
  1507   // Once the threads are up and running, |sLUL| (the pointer itself,
  1508   // that is) stays constant, and the multiple threads may make
  1509   // concurrent calls into |sLUL| to do concurrent unwinding.
  1510   LOG("unwind_thr_fn: START");
  1512   // A hook for testing LUL: at the first entrance here, check env var
  1513   // MOZ_PROFILER_LUL_TEST, and if set, run tests on LUL.  Note that
  1514   // it is preferable to run the LUL tests via gtest, but gtest is not
  1515   // currently supported on all targets that LUL runs on.  Hence the
  1516   // auxiliary mechanism here is also needed.
  1517   bool doLulTest = false;
  1519   mozilla::DebugOnly<int> r = pthread_mutex_lock(&sLULmutex);
  1520   MOZ_ASSERT(!r);
  1522   if (!sLUL) {
  1523     // sLUL hasn't been allocated, so we must be the first thread in.
  1524     sLUL = new lul::LUL(logging_sink_for_LUL);
  1525     MOZ_ASSERT(sLUL);
  1526     MOZ_ASSERT(sLULcount == 0);
  1527     // Register this thread so it can read unwind info and do unwinding.
  1528     sLUL->RegisterUnwinderThread();
  1529     // Read all the unwind info currently available.
  1530     read_procmaps(sLUL);
  1531     // Has a test been requested?
  1532     if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) {
  1533       doLulTest = true;
  1535   } else {
  1536     // sLUL has already been allocated, so we can't be the first
  1537     // thread in.
  1538     MOZ_ASSERT(sLULcount > 0);
  1539     // Register this thread so it can do unwinding.
  1540     sLUL->RegisterUnwinderThread();
  1543   sLULcount++;
  1545   r = pthread_mutex_unlock(&sLULmutex);
  1546   MOZ_ASSERT(!r);
  1548   // If a test has been requested for LUL, run it.  Summary results
  1549   // are sent to sLUL's logging sink.  Note that this happens after
  1550   // read_procmaps has read unwind information into sLUL, so that the
  1551   // tests have something to unwind against.  Without that they'd be
  1552   // pretty meaningless.
  1553   if (doLulTest) {
  1554     int nTests = 0, nTestsPassed = 0;
  1555     RunLulUnitTests(&nTests, &nTestsPassed, sLUL);
  1558   // At this point, sLUL -- the single instance of the library -- is
  1559   // allocated and has read the required unwind info.  All running
  1560   // threads can now make Unwind() requests of it concurrently, if
  1561   // they wish.
  1563   // Now go on to allocate the array of buffers used for communication
  1564   // between the sampling threads and the unwinder threads.
  1566   // If we're the first thread in, we'll need to allocate the buffer
  1567   // array g_buffers plus the Buffer structs that it points at. */
  1568   spinLock_acquire(&g_spinLock);
  1569   if (g_buffers == nullptr) {
  1570     // Drop the lock, make a complete copy in memory, reacquire the
  1571     // lock, and try to install it -- which might fail, if someone
  1572     // else beat us to it. */
  1573     spinLock_release(&g_spinLock);
  1574     UnwinderThreadBuffer** buffers
  1575       = (UnwinderThreadBuffer**)malloc(N_UNW_THR_BUFFERS
  1576                                         * sizeof(UnwinderThreadBuffer*));
  1577     MOZ_ASSERT(buffers);
  1578     int i;
  1579     for (i = 0; i < N_UNW_THR_BUFFERS; i++) {
  1580       /* These calloc-ations are shared between the sampling and
  1581          unwinding threads.  They must be free after all such threads
  1582          have terminated. */
  1583       buffers[i] = (UnwinderThreadBuffer*)
  1584                    calloc(sizeof(UnwinderThreadBuffer), 1);
  1585       MOZ_ASSERT(buffers[i]);
  1586       buffers[i]->state = S_EMPTY;
  1588     /* Try to install it */
  1589     spinLock_acquire(&g_spinLock);
  1590     if (g_buffers == nullptr) {
  1591       g_buffers = buffers;
  1592       spinLock_release(&g_spinLock);
  1593     } else {
  1594       /* Someone else beat us to it.  Release what we just allocated
  1595          so as to avoid a leak. */
  1596       spinLock_release(&g_spinLock);
  1597       for (i = 0; i < N_UNW_THR_BUFFERS; i++) {
  1598         free(buffers[i]);
  1600       free(buffers);
  1602   } else {
  1603     /* They are already allocated, so just drop the lock and continue. */
  1604     spinLock_release(&g_spinLock);
  1607   /* 
  1608     while (1) {
  1609       acq lock
  1610       scan to find oldest full
  1611          if none { rel lock; sleep; continue }
  1612       set buff state to emptying
  1613       rel lock
  1614       acq MLock // implicitly
  1615       process buffer
  1616       rel MLock // implicitly
  1617       acq lock
  1618       set buff state to S_EMPTY
  1619       rel lock
  1621   */
  1622   int* exit_now = (int*)exit_nowV;
  1623   int ms_to_sleep_if_empty = 1;
  1625   const int longest_sleep_ms = 1000;
  1626   bool show_sleep_message = true;
  1628   while (1) {
  1630     if (*exit_now != 0) {
  1631       *exit_now = 0;
  1632       break;
  1635     spinLock_acquire(&g_spinLock);
  1637     /* Find the oldest filled buffer, if any. */
  1638     uint64_t oldest_seqNo = ~0ULL; /* infinity */
  1639     int      oldest_ix    = -1;
  1640     int      i;
  1641     for (i = 0; i < N_UNW_THR_BUFFERS; i++) {
  1642       UnwinderThreadBuffer* buff = g_buffers[i];
  1643       if (buff->state != S_FULL) continue;
  1644       if (buff->seqNo < oldest_seqNo) {
  1645         oldest_seqNo = buff->seqNo;
  1646         oldest_ix    = i;
  1649     if (oldest_ix == -1) {
  1650       /* We didn't find a full buffer.  Snooze and try again later. */
  1651       MOZ_ASSERT(oldest_seqNo == ~0ULL);
  1652       spinLock_release(&g_spinLock);
  1653       if (ms_to_sleep_if_empty > 100 && LOGLEVEL >= 2) {
  1654         if (show_sleep_message)
  1655           LOGF("BPUnw: unwinder: sleep for %d ms", ms_to_sleep_if_empty);
  1656         /* If we've already shown the message for the longest sleep,
  1657            don't show it again, until the next round of sleeping
  1658            starts. */
  1659         if (ms_to_sleep_if_empty == longest_sleep_ms)
  1660           show_sleep_message = false;
  1662       sleep_ms(ms_to_sleep_if_empty);
  1663       if (ms_to_sleep_if_empty < 20) {
  1664         ms_to_sleep_if_empty += 2;
  1665       } else {
  1666         ms_to_sleep_if_empty = (15 * ms_to_sleep_if_empty) / 10;
  1667         if (ms_to_sleep_if_empty > longest_sleep_ms)
  1668           ms_to_sleep_if_empty = longest_sleep_ms;
  1670       continue;
  1673     /* We found a full a buffer.  Mark it as 'ours' and drop the
  1674        lock; then we can safely throw breakpad at it. */
  1675     UnwinderThreadBuffer* buff = g_buffers[oldest_ix];
  1676     MOZ_ASSERT(buff->state == S_FULL);
  1677     buff->state = S_EMPTYING;
  1678     spinLock_release(&g_spinLock);
  1680     /* unwind .. in which we can do anything we like, since any
  1681        resource stalls that we may encounter (eg malloc locks) in
  1682        competition with signal handler instances, will be short
  1683        lived since the signal handler is guaranteed nonblocking. */
  1684     if (0) LOGF("BPUnw: unwinder: seqNo %llu: emptying buf %d\n",
  1685                 (unsigned long long int)oldest_seqNo, oldest_ix);
  1687     process_buffer(buff, oldest_ix);
  1689     /* And .. we're done.  Mark the buffer as empty so it can be
  1690        reused.  First though, unmap any of the entsPages that got
  1691        mapped during filling. */
  1692     for (i = 0; i < N_PROF_ENT_PAGES; i++) {
  1693       if (buff->entsPages[i] == ProfEntsPage_INVALID)
  1694         continue;
  1695       munmap_ProfEntsPage(buff->entsPages[i]);
  1696       buff->entsPages[i] = ProfEntsPage_INVALID;
  1699     (void)VALGRIND_MAKE_MEM_UNDEFINED(&buff->stackImg.mContents[0],
  1700                                       lul::N_STACK_BYTES);
  1701     spinLock_acquire(&g_spinLock);
  1702     MOZ_ASSERT(buff->state == S_EMPTYING);
  1703     buff->state = S_EMPTY;
  1704     spinLock_release(&g_spinLock);
  1705     ms_to_sleep_if_empty = 1;
  1706     show_sleep_message = true;
  1709   // This unwinder thread is exiting.  If it's the last one out,
  1710   // shut down and deallocate the unwinder library.
  1711   r = pthread_mutex_lock(&sLULmutex);
  1712   MOZ_ASSERT(!r);
  1714   MOZ_ASSERT(sLULcount > 0);
  1715   if (sLULcount == 1) {
  1716     // Tell the library to discard unwind info for the entire address
  1717     // space.
  1718     sLUL->NotifyBeforeUnmapAll();
  1720     delete sLUL;
  1721     sLUL = nullptr;
  1724   sLULcount--;
  1726   r = pthread_mutex_unlock(&sLULmutex);
  1727   MOZ_ASSERT(!r);
  1729   LOG("unwind_thr_fn: STOP");
  1730   return nullptr;
  1733 static void finish_sync_buffer(ThreadProfile* profile,
  1734                                UnwinderThreadBuffer* buff,
  1735                                void* /* ucontext_t*, really */ ucV)
  1737   SyncProfile* syncProfile = profile->AsSyncProfile();
  1738   MOZ_ASSERT(syncProfile);
  1739   SyncUnwinderThreadBuffer* utb = static_cast<SyncUnwinderThreadBuffer*>(
  1740                                                    syncProfile->GetUWTBuffer());
  1741   fill_buffer(profile, utb->GetBuffer(), ucV);
  1742   utb->GetBuffer()->state = S_FULL;
  1743   PseudoStack* stack = profile->GetPseudoStack();
  1744   stack->addLinkedUWTBuffer(utb);
  1747 static void release_sync_buffer(LinkedUWTBuffer* buff)
  1749   SyncUnwinderThreadBuffer* data = static_cast<SyncUnwinderThreadBuffer*>(buff);
  1750   MOZ_ASSERT(data->GetBuffer()->state == S_EMPTY);
  1751   delete data;
  1754 ////////////////////////////////////////////////////////////////
  1755 ////////////////////////////////////////////////////////////////
  1756 ////////////////////////////////////////////////////////////////
  1757 ////////////////////////////////////////////////////////////////
  1758 ////////////////////////////////////////////////////////////////
  1759 ////////////////////////////////////////////////////////////////
  1761 // Keeps count of how frames are recovered, which is useful for
  1762 // diagnostic purposes.
  1763 static void stats_notify_frame(int n_context, int n_cfi, int n_scanned)
  1765   // Gather stats in intervals.
  1766   static unsigned int nf_total    = 0; // total frames since last printout
  1767   static unsigned int nf_CONTEXT  = 0;
  1768   static unsigned int nf_CFI      = 0;
  1769   static unsigned int nf_SCANNED  = 0;
  1771   nf_CONTEXT += n_context;
  1772   nf_CFI     += n_cfi;
  1773   nf_SCANNED += n_scanned;
  1774   nf_total   += (n_context + n_cfi + n_scanned);
  1776   if (nf_total >= 5000) {
  1777     LOGF("BPUnw frame stats: TOTAL %5u"
  1778          "    CTX %4u    CFI %4u    SCAN %4u",
  1779          nf_total, nf_CONTEXT, nf_CFI, nf_SCANNED);
  1780     nf_total    = 0;
  1781     nf_CONTEXT  = 0;
  1782     nf_CFI      = 0;
  1783     nf_SCANNED  = 0;
  1787 static
  1788 void do_lul_unwind_Buffer(/*OUT*/PCandSP** pairs,
  1789                           /*OUT*/unsigned int* nPairs,
  1790                           UnwinderThreadBuffer* buff,
  1791                           int buffNo /* for debug printing only */)
  1793 # if defined(SPS_ARCH_amd64) || defined(SPS_ARCH_x86)
  1794   lul::UnwindRegs startRegs = buff->startRegs;
  1795   if (0) {
  1796     LOGF("Initial RIP = 0x%llx", (unsigned long long int)startRegs.xip.Value());
  1797     LOGF("Initial RSP = 0x%llx", (unsigned long long int)startRegs.xsp.Value());
  1798     LOGF("Initial RBP = 0x%llx", (unsigned long long int)startRegs.xbp.Value());
  1801 # elif defined(SPS_ARCH_arm)
  1802   lul::UnwindRegs startRegs = buff->startRegs;
  1803   if (0) {
  1804     LOGF("Initial R15 = 0x%llx", (unsigned long long int)startRegs.r15.Value());
  1805     LOGF("Initial R13 = 0x%llx", (unsigned long long int)startRegs.r13.Value());
  1808 # else
  1809 #   error "Unknown plat"
  1810 # endif
  1812   // FIXME: should we reinstate the ability to use separate debug objects?
  1813   // /* Make up a list of places where the debug objects might be. */
  1814   // std::vector<std::string> debug_dirs;
  1815 # if defined(SPS_OS_linux)
  1816   //  debug_dirs.push_back("/usr/lib/debug/lib");
  1817   //  debug_dirs.push_back("/usr/lib/debug/usr/lib");
  1818   //  debug_dirs.push_back("/usr/lib/debug/lib/x86_64-linux-gnu");
  1819   //  debug_dirs.push_back("/usr/lib/debug/usr/lib/x86_64-linux-gnu");
  1820 # elif defined(SPS_OS_android)
  1821   //  debug_dirs.push_back("/sdcard/symbols/system/lib");
  1822   //  debug_dirs.push_back("/sdcard/symbols/system/bin");
  1823 # elif defined(SPS_OS_darwin)
  1824   //  /* Nothing */
  1825 # else
  1826 #   error "Unknown plat"
  1827 # endif
  1829   // Set the max number of scanned or otherwise dubious frames
  1830   // to the user specified limit
  1831   size_t scannedFramesAllowed
  1832     = std::min(std::max(0, sUnwindStackScan), MAX_NATIVE_FRAMES);
  1834   // The max number of frames is MAX_NATIVE_FRAMES, so as to avoid
  1835   // the unwinder wasting a lot of time looping on corrupted stacks.
  1836   uintptr_t framePCs[MAX_NATIVE_FRAMES];
  1837   uintptr_t frameSPs[MAX_NATIVE_FRAMES];
  1838   size_t framesAvail = mozilla::ArrayLength(framePCs);
  1839   size_t framesUsed  = 0;
  1840   size_t scannedFramesAcquired = 0;
  1841   sLUL->Unwind( &framePCs[0], &frameSPs[0], 
  1842                 &framesUsed, &scannedFramesAcquired,
  1843                 framesAvail, scannedFramesAllowed,
  1844                 &startRegs, &buff->stackImg );
  1846   if (LOGLEVEL >= 2)
  1847     stats_notify_frame(/* context */ 1,
  1848                        /* cfi     */ framesUsed - 1 - scannedFramesAcquired,
  1849                        /* scanned */ scannedFramesAcquired);
  1851   // PC values are now in framePCs[0 .. framesUsed-1], with [0] being
  1852   // the innermost frame.  SP values are likewise in frameSPs[].
  1853   *pairs  = (PCandSP*)calloc(framesUsed, sizeof(PCandSP));
  1854   *nPairs = framesUsed;
  1855   if (*pairs == nullptr) {
  1856     *nPairs = 0;
  1857     return;
  1860   if (framesUsed > 0) {
  1861     for (unsigned int frame_index = 0; 
  1862          frame_index < framesUsed; ++frame_index) {
  1863       (*pairs)[framesUsed-1-frame_index].pc = framePCs[frame_index];
  1864       (*pairs)[framesUsed-1-frame_index].sp = frameSPs[frame_index];
  1868   if (LOGLEVEL >= 3) {
  1869     LOGF("BPUnw: unwinder: seqNo %llu, buf %d: got %u frames",
  1870          (unsigned long long int)buff->seqNo, buffNo,
  1871          (unsigned int)framesUsed);
  1874   if (LOGLEVEL >= 2) {
  1875     if (0 == (g_stats_totalSamples % 1000))
  1876       LOGF("BPUnw: %llu total samples, %llu failed (buffer unavail), "
  1877                    "%llu failed (thread unreg'd), ",
  1878            (unsigned long long int)g_stats_totalSamples,
  1879            (unsigned long long int)g_stats_noBuffAvail,
  1880            (unsigned long long int)g_stats_thrUnregd);
  1884 #endif /* defined(SPS_OS_windows) */

mercurial