tools/profiler/LulMain.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/tools/profiler/LulMain.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,311 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#ifndef LulMain_h
    1.11 +#define LulMain_h
    1.12 +
    1.13 +#include <pthread.h>   // pthread_t
    1.14 +
    1.15 +#include <map>
    1.16 +
    1.17 +#include "LulPlatformMacros.h"
    1.18 +#include "LulRWLock.h"
    1.19 +
    1.20 +// LUL: A Lightweight Unwind Library.
    1.21 +// This file provides the end-user (external) interface for LUL.
    1.22 +
    1.23 +// Some comments about naming in the implementation.  These are safe
    1.24 +// to ignore if you are merely using LUL, but are important if you
    1.25 +// hack on its internals.
    1.26 +//
    1.27 +// Debuginfo readers in general have tended to use the word "address"
    1.28 +// to mean several different things.  This sometimes makes them
    1.29 +// difficult to understand and maintain.  LUL tries hard to avoid
    1.30 +// using the word "address" and instead uses the following more
    1.31 +// precise terms:
    1.32 +//
    1.33 +// * SVMA ("Stated Virtual Memory Address"): this is an address of a
    1.34 +//   symbol (etc) as it is stated in the symbol table, or other
    1.35 +//   metadata, of an object.  Such values are typically small and
    1.36 +//   start from zero or thereabouts, unless the object has been
    1.37 +//   prelinked.
    1.38 +//
    1.39 +// * AVMA ("Actual Virtual Memory Address"): this is the address of a
    1.40 +//   symbol (etc) in a running process, that is, once the associated
    1.41 +//   object has been mapped into a process.  Such values are typically
    1.42 +//   much larger than SVMAs, since objects can get mapped arbitrarily
    1.43 +//   far along the address space.
    1.44 +//
    1.45 +// * "Bias": the difference between AVMA and SVMA for a given symbol
    1.46 +//   (specifically, AVMA - SVMA).  The bias is always an integral
    1.47 +//   number of pages.  Once we know the bias for a given object's
    1.48 +//   text section (for example), we can compute the AVMAs of all of
    1.49 +//   its text symbols by adding the bias to their SVMAs.
    1.50 +//
    1.51 +// * "Image address": typically, to read debuginfo from an object we
    1.52 +//   will temporarily mmap in the file so as to read symbol tables
    1.53 +//   etc.  Addresses in this temporary mapping are called "Image
    1.54 +//   addresses".  Note that the temporary mapping is entirely
    1.55 +//   unrelated to the mappings of the file that the dynamic linker
    1.56 +//   must perform merely in order to get the program to run.  Hence
    1.57 +//   image addresses are unrelated to either SVMAs or AVMAs.
    1.58 +
    1.59 +
    1.60 +namespace lul {
    1.61 +
    1.62 +// A machine word plus validity tag.
    1.63 +class TaggedUWord {
    1.64 +public:
    1.65 +  // Construct a valid one.
    1.66 +  TaggedUWord(uintptr_t w)
    1.67 +    : mValue(w)
    1.68 +    , mValid(true)
    1.69 +  {}
    1.70 +
    1.71 +  // Construct an invalid one.
    1.72 +  TaggedUWord()
    1.73 +    : mValue(0)
    1.74 +    , mValid(false)
    1.75 +  {}
    1.76 +
    1.77 +  // Add in a second one.
    1.78 +  void Add(TaggedUWord other) {
    1.79 +    if (mValid && other.Valid()) {
    1.80 +      mValue += other.Value();
    1.81 +    } else {
    1.82 +      mValue = 0;
    1.83 +      mValid = false;
    1.84 +    }
    1.85 +  }
    1.86 +
    1.87 +  // Is it word-aligned?
    1.88 +  bool IsAligned() const {
    1.89 +    return mValid && (mValue & (sizeof(uintptr_t)-1)) == 0;
    1.90 +  }
    1.91 +
    1.92 +  uintptr_t Value() const { return mValue; }
    1.93 +  bool      Valid() const { return mValid; }
    1.94 +
    1.95 +private:
    1.96 +  uintptr_t mValue;
    1.97 +  bool mValid;
    1.98 +};
    1.99 +
   1.100 +
   1.101 +// The registers, with validity tags, that will be unwound.
   1.102 +
   1.103 +struct UnwindRegs {
   1.104 +#if defined(LUL_ARCH_arm)
   1.105 +  TaggedUWord r7;
   1.106 +  TaggedUWord r11;
   1.107 +  TaggedUWord r12;
   1.108 +  TaggedUWord r13;
   1.109 +  TaggedUWord r14;
   1.110 +  TaggedUWord r15;
   1.111 +#elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
   1.112 +  TaggedUWord xbp;
   1.113 +  TaggedUWord xsp;
   1.114 +  TaggedUWord xip;
   1.115 +#else
   1.116 +# error "Unknown plat"
   1.117 +#endif
   1.118 +};
   1.119 +
   1.120 +
   1.121 +// The maximum number of bytes in a stack snapshot.  This can be
   1.122 +// increased if necessary, but larger values cost performance, since a
   1.123 +// stack snapshot needs to be copied between sampling and worker
   1.124 +// threads for each snapshot.  In practice 32k seems to be enough
   1.125 +// to get good backtraces.
   1.126 +static const size_t N_STACK_BYTES = 32768;
   1.127 +
   1.128 +// The stack chunk image that will be unwound.
   1.129 +struct StackImage {
   1.130 +  // [start_avma, +len) specify the address range in the buffer.
   1.131 +  // Obviously we require 0 <= len <= N_STACK_BYTES.
   1.132 +  uintptr_t mStartAvma;
   1.133 +  size_t    mLen;
   1.134 +  uint8_t   mContents[N_STACK_BYTES];
   1.135 +};
   1.136 +
   1.137 +
   1.138 +// The core unwinder library class.  Just one of these is needed, and
   1.139 +// it can be shared by multiple unwinder threads.
   1.140 +//
   1.141 +// Access to the library is mediated by a single reader-writer lock.
   1.142 +// All attempts to change the library's internal shared state -- that
   1.143 +// is, loading or unloading unwind info -- are forced single-threaded
   1.144 +// by causing the called routine to acquire a write-lock.  Unwind
   1.145 +// requests do not change the library's internal shared state and
   1.146 +// therefore require only a read-lock.  Hence multiple threads can
   1.147 +// unwind in parallel.
   1.148 +//
   1.149 +// The library needs to maintain state which is private to each
   1.150 +// unwinder thread -- the CFI (Dwarf Call Frame Information) fast
   1.151 +// cache.  Hence unwinder threads first need to register with the
   1.152 +// library, so their identities are known.  Also, for maximum
   1.153 +// effectiveness of the CFI caching, it is preferable to have a small
   1.154 +// number of very-busy unwinder threads rather than a large number of
   1.155 +// mostly-idle unwinder threads.
   1.156 +//
   1.157 +// None of the methods may be safely called from within a signal
   1.158 +// handler, since this risks deadlock.  In particular this means
   1.159 +// a thread may not unwind itself from within a signal handler
   1.160 +// frame.  It might be safe to call Unwind() on its own stack
   1.161 +// from not-inside a signal frame, although even that cannot be
   1.162 +// guaranteed deadlock free.
   1.163 +
   1.164 +class PriMap;
   1.165 +class SegArray;
   1.166 +class CFICache;
   1.167 +
   1.168 +class LUL {
   1.169 +public:
   1.170 +  // Create; supply a logging sink.  Initialises the rw-lock.
   1.171 +  LUL(void (*aLog)(const char*));
   1.172 +
   1.173 +  // Destroy.  This acquires mRWlock for writing.  By doing that, waits
   1.174 +  // for all unwinder threads to finish any Unwind() calls they may be
   1.175 +  // in.  All resources are freed and all registered unwinder threads
   1.176 +  // are deregistered.
   1.177 +  ~LUL();
   1.178 +
   1.179 +  // Notify of a new r-x mapping, and load the associated unwind info.
   1.180 +  // The filename is strdup'd and used for debug printing.  If
   1.181 +  // aMappedImage is NULL, this function will mmap/munmap the file
   1.182 +  // itself, so as to be able to read the unwind info.  If
   1.183 +  // aMappedImage is non-NULL then it is assumed to point to a
   1.184 +  // called-supplied and caller-managed mapped image of the file.
   1.185 +  //
   1.186 +  // Acquires mRWlock for writing.  This must be called only after the
   1.187 +  // code area in question really has been mapped.
   1.188 +  void NotifyAfterMap(uintptr_t aRXavma, size_t aSize,
   1.189 +                      const char* aFileName, const void* aMappedImage);
   1.190 +
   1.191 +  // In rare cases we know an executable area exists but don't know
   1.192 +  // what the associated file is.  This call notifies LUL of such
   1.193 +  // areas.  This is important for correct functioning of stack
   1.194 +  // scanning and of the x86-{linux,android} special-case
   1.195 +  // __kernel_syscall function handling.  Acquires mRWlock for
   1.196 +  // writing.  This must be called only after the code area in
   1.197 +  // question really has been mapped.
   1.198 +  void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize);
   1.199 +
   1.200 +  // Notify that a mapped area has been unmapped; discard any
   1.201 +  // associated unwind info.  Acquires mRWlock for writing.  Note that
   1.202 +  // to avoid segfaulting the stack-scan unwinder, which inspects code
   1.203 +  // areas, this must be called before the code area in question is
   1.204 +  // really unmapped.  Note that, unlike NotifyAfterMap(), this
   1.205 +  // function takes the start and end addresses of the range to be
   1.206 +  // unmapped, rather than a start and a length parameter.  This is so
   1.207 +  // as to make it possible to notify an unmap for the entire address
   1.208 +  // space using a single call.
   1.209 +  void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax);
   1.210 +
   1.211 +  // Apply NotifyBeforeUnmap to the entire address space.  This causes
   1.212 +  // LUL to discard all unwind and executable-area information for the
   1.213 +  // entire address space.
   1.214 +  void NotifyBeforeUnmapAll() {
   1.215 +    NotifyBeforeUnmap(0, UINTPTR_MAX);
   1.216 +  }
   1.217 +
   1.218 +  // Returns the number of mappings currently registered.  Acquires
   1.219 +  // mRWlock for writing.
   1.220 +  size_t CountMappings();
   1.221 +
   1.222 +  // Register the calling thread for unwinding.  Acquires mRWlock for
   1.223 +  // writing.
   1.224 +  void RegisterUnwinderThread();
   1.225 +
   1.226 +  // Unwind |aStackImg| starting with the context in |aStartRegs|.
   1.227 +  // Write the number of frames recovered in *aFramesUsed.  Put
   1.228 +  // the PC values in aFramePCs[0 .. *aFramesUsed-1] and
   1.229 +  // the SP values in aFrameSPs[0 .. *aFramesUsed-1].
   1.230 +  // |aFramesAvail| is the size of the two output arrays and hence the
   1.231 +  // largest possible value of *aFramesUsed.  PC values are always
   1.232 +  // valid, and the unwind will stop when the PC becomes invalid, but
   1.233 +  // the SP values might be invalid, in which case the value zero will
   1.234 +  // be written in the relevant frameSPs[] slot.
   1.235 +  //
   1.236 +  // Unwinding may optionally use stack scanning.  The maximum number
   1.237 +  // of frames that may be recovered by stack scanning is
   1.238 +  // |aScannedFramesAllowed| and the actual number recovered is
   1.239 +  // written into *aScannedFramesAcquired.  |aScannedFramesAllowed|
   1.240 +  // must be less than or equal to |aFramesAvail|.
   1.241 +  //
   1.242 +  // This function assumes that the SP values increase as it unwinds
   1.243 +  // away from the innermost frame -- that is, that the stack grows
   1.244 +  // down.  It monitors SP values as it unwinds to check they
   1.245 +  // decrease, so as to avoid looping on corrupted stacks.
   1.246 +  //
   1.247 +  // Acquires mRWlock for reading.  Hence multiple threads may unwind
   1.248 +  // at once, but no thread may be unwinding whilst the library loads
   1.249 +  // or discards unwind information.  Returns false if the calling
   1.250 +  // thread is not registered for unwinding.
   1.251 +  //
   1.252 +  // Up to aScannedFramesAllowed stack-scanned frames may be recovered.
   1.253 +  //
   1.254 +  // The calling thread must previously have registered itself via
   1.255 +  // RegisterUnwinderThread.
   1.256 +  void Unwind(/*OUT*/uintptr_t* aFramePCs,
   1.257 +              /*OUT*/uintptr_t* aFrameSPs,
   1.258 +              /*OUT*/size_t* aFramesUsed,
   1.259 +              /*OUT*/size_t* aScannedFramesAcquired,
   1.260 +              size_t aFramesAvail,
   1.261 +              size_t aScannedFramesAllowed,
   1.262 +              UnwindRegs* aStartRegs, StackImage* aStackImg);
   1.263 +
   1.264 +  // The logging sink.  Call to send debug strings to the caller-
   1.265 +  // specified destination.
   1.266 +  void (*mLog)(const char*);
   1.267 +
   1.268 +private:
   1.269 +  // Invalidate the caches.  Requires mRWlock to be held for writing;
   1.270 +  // does not acquire it itself.
   1.271 +  void InvalidateCFICaches();
   1.272 +
   1.273 +  // The one-and-only lock, a reader-writer lock, for the library.
   1.274 +  LulRWLock* mRWlock;
   1.275 +
   1.276 +  // The top level mapping from code address ranges to postprocessed
   1.277 +  // unwind info.  Basically a sorted array of (addr, len, info)
   1.278 +  // records.  Threads wishing to query this field must hold mRWlock
   1.279 +  // for reading.  Threads wishing to modify this field must hold
   1.280 +  // mRWlock for writing.  This field is updated by NotifyAfterMap and
   1.281 +  // NotifyBeforeUnmap.
   1.282 +  PriMap* mPriMap;
   1.283 +
   1.284 +  // An auxiliary structure that records which address ranges are
   1.285 +  // mapped r-x, for the benefit of the stack scanner.  Threads
   1.286 +  // wishing to query this field must hold mRWlock for reading.
   1.287 +  // Threads wishing to modify this field must hold mRWlock for
   1.288 +  // writing.
   1.289 +  SegArray* mSegArray;
   1.290 +
   1.291 +  // The thread-local data: a mapping from threads to CFI-fast-caches.
   1.292 +  // Threads wishing to query this field must hold mRWlock for
   1.293 +  // reading.  Threads wishing to modify this field must hold mRWlock
   1.294 +  // for writing.
   1.295 +  //
   1.296 +  // The CFICaches themselves are thread-local and can be both read
   1.297 +  // and written when mRWlock is held for reading.  It would probably
   1.298 +  // be faster to use the pthread_{set,get}specific functions, but
   1.299 +  // also more difficult.  This map is queried once per unwind, in
   1.300 +  // order to get hold of the CFI cache for a given thread.
   1.301 +  std::map<pthread_t, CFICache*> mCaches;
   1.302 +};
   1.303 +
   1.304 +
   1.305 +// Run unit tests on an initialised, loaded-up LUL instance, and print
   1.306 +// summary results on |aLUL|'s logging sink.  Also return the number
   1.307 +// of tests run in *aNTests and the number that passed in
   1.308 +// *aNTestsPassed.
   1.309 +void
   1.310 +RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL);
   1.311 +
   1.312 +} // namespace lul
   1.313 +
   1.314 +#endif // LulMain_h

mercurial