tools/profiler/LulMain.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #ifndef LulMain_h
     8 #define LulMain_h
    10 #include <pthread.h>   // pthread_t
    12 #include <map>
    14 #include "LulPlatformMacros.h"
    15 #include "LulRWLock.h"
    17 // LUL: A Lightweight Unwind Library.
    18 // This file provides the end-user (external) interface for LUL.
    20 // Some comments about naming in the implementation.  These are safe
    21 // to ignore if you are merely using LUL, but are important if you
    22 // hack on its internals.
    23 //
    24 // Debuginfo readers in general have tended to use the word "address"
    25 // to mean several different things.  This sometimes makes them
    26 // difficult to understand and maintain.  LUL tries hard to avoid
    27 // using the word "address" and instead uses the following more
    28 // precise terms:
    29 //
    30 // * SVMA ("Stated Virtual Memory Address"): this is an address of a
    31 //   symbol (etc) as it is stated in the symbol table, or other
    32 //   metadata, of an object.  Such values are typically small and
    33 //   start from zero or thereabouts, unless the object has been
    34 //   prelinked.
    35 //
    36 // * AVMA ("Actual Virtual Memory Address"): this is the address of a
    37 //   symbol (etc) in a running process, that is, once the associated
    38 //   object has been mapped into a process.  Such values are typically
    39 //   much larger than SVMAs, since objects can get mapped arbitrarily
    40 //   far along the address space.
    41 //
    42 // * "Bias": the difference between AVMA and SVMA for a given symbol
    43 //   (specifically, AVMA - SVMA).  The bias is always an integral
    44 //   number of pages.  Once we know the bias for a given object's
    45 //   text section (for example), we can compute the AVMAs of all of
    46 //   its text symbols by adding the bias to their SVMAs.
    47 //
    48 // * "Image address": typically, to read debuginfo from an object we
    49 //   will temporarily mmap in the file so as to read symbol tables
    50 //   etc.  Addresses in this temporary mapping are called "Image
    51 //   addresses".  Note that the temporary mapping is entirely
    52 //   unrelated to the mappings of the file that the dynamic linker
    53 //   must perform merely in order to get the program to run.  Hence
    54 //   image addresses are unrelated to either SVMAs or AVMAs.
    57 namespace lul {
    59 // A machine word plus validity tag.
    60 class TaggedUWord {
    61 public:
    62   // Construct a valid one.
    63   TaggedUWord(uintptr_t w)
    64     : mValue(w)
    65     , mValid(true)
    66   {}
    68   // Construct an invalid one.
    69   TaggedUWord()
    70     : mValue(0)
    71     , mValid(false)
    72   {}
    74   // Add in a second one.
    75   void Add(TaggedUWord other) {
    76     if (mValid && other.Valid()) {
    77       mValue += other.Value();
    78     } else {
    79       mValue = 0;
    80       mValid = false;
    81     }
    82   }
    84   // Is it word-aligned?
    85   bool IsAligned() const {
    86     return mValid && (mValue & (sizeof(uintptr_t)-1)) == 0;
    87   }
    89   uintptr_t Value() const { return mValue; }
    90   bool      Valid() const { return mValid; }
    92 private:
    93   uintptr_t mValue;
    94   bool mValid;
    95 };
    98 // The registers, with validity tags, that will be unwound.
   100 struct UnwindRegs {
   101 #if defined(LUL_ARCH_arm)
   102   TaggedUWord r7;
   103   TaggedUWord r11;
   104   TaggedUWord r12;
   105   TaggedUWord r13;
   106   TaggedUWord r14;
   107   TaggedUWord r15;
   108 #elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
   109   TaggedUWord xbp;
   110   TaggedUWord xsp;
   111   TaggedUWord xip;
   112 #else
   113 # error "Unknown plat"
   114 #endif
   115 };
   118 // The maximum number of bytes in a stack snapshot.  This can be
   119 // increased if necessary, but larger values cost performance, since a
   120 // stack snapshot needs to be copied between sampling and worker
   121 // threads for each snapshot.  In practice 32k seems to be enough
   122 // to get good backtraces.
   123 static const size_t N_STACK_BYTES = 32768;
   125 // The stack chunk image that will be unwound.
   126 struct StackImage {
   127   // [start_avma, +len) specify the address range in the buffer.
   128   // Obviously we require 0 <= len <= N_STACK_BYTES.
   129   uintptr_t mStartAvma;
   130   size_t    mLen;
   131   uint8_t   mContents[N_STACK_BYTES];
   132 };
   135 // The core unwinder library class.  Just one of these is needed, and
   136 // it can be shared by multiple unwinder threads.
   137 //
   138 // Access to the library is mediated by a single reader-writer lock.
   139 // All attempts to change the library's internal shared state -- that
   140 // is, loading or unloading unwind info -- are forced single-threaded
   141 // by causing the called routine to acquire a write-lock.  Unwind
   142 // requests do not change the library's internal shared state and
   143 // therefore require only a read-lock.  Hence multiple threads can
   144 // unwind in parallel.
   145 //
   146 // The library needs to maintain state which is private to each
   147 // unwinder thread -- the CFI (Dwarf Call Frame Information) fast
   148 // cache.  Hence unwinder threads first need to register with the
   149 // library, so their identities are known.  Also, for maximum
   150 // effectiveness of the CFI caching, it is preferable to have a small
   151 // number of very-busy unwinder threads rather than a large number of
   152 // mostly-idle unwinder threads.
   153 //
   154 // None of the methods may be safely called from within a signal
   155 // handler, since this risks deadlock.  In particular this means
   156 // a thread may not unwind itself from within a signal handler
   157 // frame.  It might be safe to call Unwind() on its own stack
   158 // from not-inside a signal frame, although even that cannot be
   159 // guaranteed deadlock free.
   161 class PriMap;
   162 class SegArray;
   163 class CFICache;
   165 class LUL {
   166 public:
   167   // Create; supply a logging sink.  Initialises the rw-lock.
   168   LUL(void (*aLog)(const char*));
   170   // Destroy.  This acquires mRWlock for writing.  By doing that, waits
   171   // for all unwinder threads to finish any Unwind() calls they may be
   172   // in.  All resources are freed and all registered unwinder threads
   173   // are deregistered.
   174   ~LUL();
   176   // Notify of a new r-x mapping, and load the associated unwind info.
   177   // The filename is strdup'd and used for debug printing.  If
   178   // aMappedImage is NULL, this function will mmap/munmap the file
   179   // itself, so as to be able to read the unwind info.  If
   180   // aMappedImage is non-NULL then it is assumed to point to a
   181   // called-supplied and caller-managed mapped image of the file.
   182   //
   183   // Acquires mRWlock for writing.  This must be called only after the
   184   // code area in question really has been mapped.
   185   void NotifyAfterMap(uintptr_t aRXavma, size_t aSize,
   186                       const char* aFileName, const void* aMappedImage);
   188   // In rare cases we know an executable area exists but don't know
   189   // what the associated file is.  This call notifies LUL of such
   190   // areas.  This is important for correct functioning of stack
   191   // scanning and of the x86-{linux,android} special-case
   192   // __kernel_syscall function handling.  Acquires mRWlock for
   193   // writing.  This must be called only after the code area in
   194   // question really has been mapped.
   195   void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize);
   197   // Notify that a mapped area has been unmapped; discard any
   198   // associated unwind info.  Acquires mRWlock for writing.  Note that
   199   // to avoid segfaulting the stack-scan unwinder, which inspects code
   200   // areas, this must be called before the code area in question is
   201   // really unmapped.  Note that, unlike NotifyAfterMap(), this
   202   // function takes the start and end addresses of the range to be
   203   // unmapped, rather than a start and a length parameter.  This is so
   204   // as to make it possible to notify an unmap for the entire address
   205   // space using a single call.
   206   void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax);
   208   // Apply NotifyBeforeUnmap to the entire address space.  This causes
   209   // LUL to discard all unwind and executable-area information for the
   210   // entire address space.
   211   void NotifyBeforeUnmapAll() {
   212     NotifyBeforeUnmap(0, UINTPTR_MAX);
   213   }
   215   // Returns the number of mappings currently registered.  Acquires
   216   // mRWlock for writing.
   217   size_t CountMappings();
   219   // Register the calling thread for unwinding.  Acquires mRWlock for
   220   // writing.
   221   void RegisterUnwinderThread();
   223   // Unwind |aStackImg| starting with the context in |aStartRegs|.
   224   // Write the number of frames recovered in *aFramesUsed.  Put
   225   // the PC values in aFramePCs[0 .. *aFramesUsed-1] and
   226   // the SP values in aFrameSPs[0 .. *aFramesUsed-1].
   227   // |aFramesAvail| is the size of the two output arrays and hence the
   228   // largest possible value of *aFramesUsed.  PC values are always
   229   // valid, and the unwind will stop when the PC becomes invalid, but
   230   // the SP values might be invalid, in which case the value zero will
   231   // be written in the relevant frameSPs[] slot.
   232   //
   233   // Unwinding may optionally use stack scanning.  The maximum number
   234   // of frames that may be recovered by stack scanning is
   235   // |aScannedFramesAllowed| and the actual number recovered is
   236   // written into *aScannedFramesAcquired.  |aScannedFramesAllowed|
   237   // must be less than or equal to |aFramesAvail|.
   238   //
   239   // This function assumes that the SP values increase as it unwinds
   240   // away from the innermost frame -- that is, that the stack grows
   241   // down.  It monitors SP values as it unwinds to check they
   242   // decrease, so as to avoid looping on corrupted stacks.
   243   //
   244   // Acquires mRWlock for reading.  Hence multiple threads may unwind
   245   // at once, but no thread may be unwinding whilst the library loads
   246   // or discards unwind information.  Returns false if the calling
   247   // thread is not registered for unwinding.
   248   //
   249   // Up to aScannedFramesAllowed stack-scanned frames may be recovered.
   250   //
   251   // The calling thread must previously have registered itself via
   252   // RegisterUnwinderThread.
   253   void Unwind(/*OUT*/uintptr_t* aFramePCs,
   254               /*OUT*/uintptr_t* aFrameSPs,
   255               /*OUT*/size_t* aFramesUsed,
   256               /*OUT*/size_t* aScannedFramesAcquired,
   257               size_t aFramesAvail,
   258               size_t aScannedFramesAllowed,
   259               UnwindRegs* aStartRegs, StackImage* aStackImg);
   261   // The logging sink.  Call to send debug strings to the caller-
   262   // specified destination.
   263   void (*mLog)(const char*);
   265 private:
   266   // Invalidate the caches.  Requires mRWlock to be held for writing;
   267   // does not acquire it itself.
   268   void InvalidateCFICaches();
   270   // The one-and-only lock, a reader-writer lock, for the library.
   271   LulRWLock* mRWlock;
   273   // The top level mapping from code address ranges to postprocessed
   274   // unwind info.  Basically a sorted array of (addr, len, info)
   275   // records.  Threads wishing to query this field must hold mRWlock
   276   // for reading.  Threads wishing to modify this field must hold
   277   // mRWlock for writing.  This field is updated by NotifyAfterMap and
   278   // NotifyBeforeUnmap.
   279   PriMap* mPriMap;
   281   // An auxiliary structure that records which address ranges are
   282   // mapped r-x, for the benefit of the stack scanner.  Threads
   283   // wishing to query this field must hold mRWlock for reading.
   284   // Threads wishing to modify this field must hold mRWlock for
   285   // writing.
   286   SegArray* mSegArray;
   288   // The thread-local data: a mapping from threads to CFI-fast-caches.
   289   // Threads wishing to query this field must hold mRWlock for
   290   // reading.  Threads wishing to modify this field must hold mRWlock
   291   // for writing.
   292   //
   293   // The CFICaches themselves are thread-local and can be both read
   294   // and written when mRWlock is held for reading.  It would probably
   295   // be faster to use the pthread_{set,get}specific functions, but
   296   // also more difficult.  This map is queried once per unwind, in
   297   // order to get hold of the CFI cache for a given thread.
   298   std::map<pthread_t, CFICache*> mCaches;
   299 };
   302 // Run unit tests on an initialised, loaded-up LUL instance, and print
   303 // summary results on |aLUL|'s logging sink.  Also return the number
   304 // of tests run in *aNTests and the number that passed in
   305 // *aNTestsPassed.
   306 void
   307 RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL);
   309 } // namespace lul
   311 #endif // LulMain_h

mercurial