1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/tools/profiler/LulMain.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,311 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#ifndef LulMain_h 1.11 +#define LulMain_h 1.12 + 1.13 +#include <pthread.h> // pthread_t 1.14 + 1.15 +#include <map> 1.16 + 1.17 +#include "LulPlatformMacros.h" 1.18 +#include "LulRWLock.h" 1.19 + 1.20 +// LUL: A Lightweight Unwind Library. 1.21 +// This file provides the end-user (external) interface for LUL. 1.22 + 1.23 +// Some comments about naming in the implementation. These are safe 1.24 +// to ignore if you are merely using LUL, but are important if you 1.25 +// hack on its internals. 1.26 +// 1.27 +// Debuginfo readers in general have tended to use the word "address" 1.28 +// to mean several different things. This sometimes makes them 1.29 +// difficult to understand and maintain. LUL tries hard to avoid 1.30 +// using the word "address" and instead uses the following more 1.31 +// precise terms: 1.32 +// 1.33 +// * SVMA ("Stated Virtual Memory Address"): this is an address of a 1.34 +// symbol (etc) as it is stated in the symbol table, or other 1.35 +// metadata, of an object. Such values are typically small and 1.36 +// start from zero or thereabouts, unless the object has been 1.37 +// prelinked. 1.38 +// 1.39 +// * AVMA ("Actual Virtual Memory Address"): this is the address of a 1.40 +// symbol (etc) in a running process, that is, once the associated 1.41 +// object has been mapped into a process. Such values are typically 1.42 +// much larger than SVMAs, since objects can get mapped arbitrarily 1.43 +// far along the address space. 1.44 +// 1.45 +// * "Bias": the difference between AVMA and SVMA for a given symbol 1.46 +// (specifically, AVMA - SVMA). The bias is always an integral 1.47 +// number of pages. Once we know the bias for a given object's 1.48 +// text section (for example), we can compute the AVMAs of all of 1.49 +// its text symbols by adding the bias to their SVMAs. 1.50 +// 1.51 +// * "Image address": typically, to read debuginfo from an object we 1.52 +// will temporarily mmap in the file so as to read symbol tables 1.53 +// etc. Addresses in this temporary mapping are called "Image 1.54 +// addresses". Note that the temporary mapping is entirely 1.55 +// unrelated to the mappings of the file that the dynamic linker 1.56 +// must perform merely in order to get the program to run. Hence 1.57 +// image addresses are unrelated to either SVMAs or AVMAs. 1.58 + 1.59 + 1.60 +namespace lul { 1.61 + 1.62 +// A machine word plus validity tag. 1.63 +class TaggedUWord { 1.64 +public: 1.65 + // Construct a valid one. 1.66 + TaggedUWord(uintptr_t w) 1.67 + : mValue(w) 1.68 + , mValid(true) 1.69 + {} 1.70 + 1.71 + // Construct an invalid one. 1.72 + TaggedUWord() 1.73 + : mValue(0) 1.74 + , mValid(false) 1.75 + {} 1.76 + 1.77 + // Add in a second one. 1.78 + void Add(TaggedUWord other) { 1.79 + if (mValid && other.Valid()) { 1.80 + mValue += other.Value(); 1.81 + } else { 1.82 + mValue = 0; 1.83 + mValid = false; 1.84 + } 1.85 + } 1.86 + 1.87 + // Is it word-aligned? 1.88 + bool IsAligned() const { 1.89 + return mValid && (mValue & (sizeof(uintptr_t)-1)) == 0; 1.90 + } 1.91 + 1.92 + uintptr_t Value() const { return mValue; } 1.93 + bool Valid() const { return mValid; } 1.94 + 1.95 +private: 1.96 + uintptr_t mValue; 1.97 + bool mValid; 1.98 +}; 1.99 + 1.100 + 1.101 +// The registers, with validity tags, that will be unwound. 1.102 + 1.103 +struct UnwindRegs { 1.104 +#if defined(LUL_ARCH_arm) 1.105 + TaggedUWord r7; 1.106 + TaggedUWord r11; 1.107 + TaggedUWord r12; 1.108 + TaggedUWord r13; 1.109 + TaggedUWord r14; 1.110 + TaggedUWord r15; 1.111 +#elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) 1.112 + TaggedUWord xbp; 1.113 + TaggedUWord xsp; 1.114 + TaggedUWord xip; 1.115 +#else 1.116 +# error "Unknown plat" 1.117 +#endif 1.118 +}; 1.119 + 1.120 + 1.121 +// The maximum number of bytes in a stack snapshot. This can be 1.122 +// increased if necessary, but larger values cost performance, since a 1.123 +// stack snapshot needs to be copied between sampling and worker 1.124 +// threads for each snapshot. In practice 32k seems to be enough 1.125 +// to get good backtraces. 1.126 +static const size_t N_STACK_BYTES = 32768; 1.127 + 1.128 +// The stack chunk image that will be unwound. 1.129 +struct StackImage { 1.130 + // [start_avma, +len) specify the address range in the buffer. 1.131 + // Obviously we require 0 <= len <= N_STACK_BYTES. 1.132 + uintptr_t mStartAvma; 1.133 + size_t mLen; 1.134 + uint8_t mContents[N_STACK_BYTES]; 1.135 +}; 1.136 + 1.137 + 1.138 +// The core unwinder library class. Just one of these is needed, and 1.139 +// it can be shared by multiple unwinder threads. 1.140 +// 1.141 +// Access to the library is mediated by a single reader-writer lock. 1.142 +// All attempts to change the library's internal shared state -- that 1.143 +// is, loading or unloading unwind info -- are forced single-threaded 1.144 +// by causing the called routine to acquire a write-lock. Unwind 1.145 +// requests do not change the library's internal shared state and 1.146 +// therefore require only a read-lock. Hence multiple threads can 1.147 +// unwind in parallel. 1.148 +// 1.149 +// The library needs to maintain state which is private to each 1.150 +// unwinder thread -- the CFI (Dwarf Call Frame Information) fast 1.151 +// cache. Hence unwinder threads first need to register with the 1.152 +// library, so their identities are known. Also, for maximum 1.153 +// effectiveness of the CFI caching, it is preferable to have a small 1.154 +// number of very-busy unwinder threads rather than a large number of 1.155 +// mostly-idle unwinder threads. 1.156 +// 1.157 +// None of the methods may be safely called from within a signal 1.158 +// handler, since this risks deadlock. In particular this means 1.159 +// a thread may not unwind itself from within a signal handler 1.160 +// frame. It might be safe to call Unwind() on its own stack 1.161 +// from not-inside a signal frame, although even that cannot be 1.162 +// guaranteed deadlock free. 1.163 + 1.164 +class PriMap; 1.165 +class SegArray; 1.166 +class CFICache; 1.167 + 1.168 +class LUL { 1.169 +public: 1.170 + // Create; supply a logging sink. Initialises the rw-lock. 1.171 + LUL(void (*aLog)(const char*)); 1.172 + 1.173 + // Destroy. This acquires mRWlock for writing. By doing that, waits 1.174 + // for all unwinder threads to finish any Unwind() calls they may be 1.175 + // in. All resources are freed and all registered unwinder threads 1.176 + // are deregistered. 1.177 + ~LUL(); 1.178 + 1.179 + // Notify of a new r-x mapping, and load the associated unwind info. 1.180 + // The filename is strdup'd and used for debug printing. If 1.181 + // aMappedImage is NULL, this function will mmap/munmap the file 1.182 + // itself, so as to be able to read the unwind info. If 1.183 + // aMappedImage is non-NULL then it is assumed to point to a 1.184 + // called-supplied and caller-managed mapped image of the file. 1.185 + // 1.186 + // Acquires mRWlock for writing. This must be called only after the 1.187 + // code area in question really has been mapped. 1.188 + void NotifyAfterMap(uintptr_t aRXavma, size_t aSize, 1.189 + const char* aFileName, const void* aMappedImage); 1.190 + 1.191 + // In rare cases we know an executable area exists but don't know 1.192 + // what the associated file is. This call notifies LUL of such 1.193 + // areas. This is important for correct functioning of stack 1.194 + // scanning and of the x86-{linux,android} special-case 1.195 + // __kernel_syscall function handling. Acquires mRWlock for 1.196 + // writing. This must be called only after the code area in 1.197 + // question really has been mapped. 1.198 + void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize); 1.199 + 1.200 + // Notify that a mapped area has been unmapped; discard any 1.201 + // associated unwind info. Acquires mRWlock for writing. Note that 1.202 + // to avoid segfaulting the stack-scan unwinder, which inspects code 1.203 + // areas, this must be called before the code area in question is 1.204 + // really unmapped. Note that, unlike NotifyAfterMap(), this 1.205 + // function takes the start and end addresses of the range to be 1.206 + // unmapped, rather than a start and a length parameter. This is so 1.207 + // as to make it possible to notify an unmap for the entire address 1.208 + // space using a single call. 1.209 + void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax); 1.210 + 1.211 + // Apply NotifyBeforeUnmap to the entire address space. This causes 1.212 + // LUL to discard all unwind and executable-area information for the 1.213 + // entire address space. 1.214 + void NotifyBeforeUnmapAll() { 1.215 + NotifyBeforeUnmap(0, UINTPTR_MAX); 1.216 + } 1.217 + 1.218 + // Returns the number of mappings currently registered. Acquires 1.219 + // mRWlock for writing. 1.220 + size_t CountMappings(); 1.221 + 1.222 + // Register the calling thread for unwinding. Acquires mRWlock for 1.223 + // writing. 1.224 + void RegisterUnwinderThread(); 1.225 + 1.226 + // Unwind |aStackImg| starting with the context in |aStartRegs|. 1.227 + // Write the number of frames recovered in *aFramesUsed. Put 1.228 + // the PC values in aFramePCs[0 .. *aFramesUsed-1] and 1.229 + // the SP values in aFrameSPs[0 .. *aFramesUsed-1]. 1.230 + // |aFramesAvail| is the size of the two output arrays and hence the 1.231 + // largest possible value of *aFramesUsed. PC values are always 1.232 + // valid, and the unwind will stop when the PC becomes invalid, but 1.233 + // the SP values might be invalid, in which case the value zero will 1.234 + // be written in the relevant frameSPs[] slot. 1.235 + // 1.236 + // Unwinding may optionally use stack scanning. The maximum number 1.237 + // of frames that may be recovered by stack scanning is 1.238 + // |aScannedFramesAllowed| and the actual number recovered is 1.239 + // written into *aScannedFramesAcquired. |aScannedFramesAllowed| 1.240 + // must be less than or equal to |aFramesAvail|. 1.241 + // 1.242 + // This function assumes that the SP values increase as it unwinds 1.243 + // away from the innermost frame -- that is, that the stack grows 1.244 + // down. It monitors SP values as it unwinds to check they 1.245 + // decrease, so as to avoid looping on corrupted stacks. 1.246 + // 1.247 + // Acquires mRWlock for reading. Hence multiple threads may unwind 1.248 + // at once, but no thread may be unwinding whilst the library loads 1.249 + // or discards unwind information. Returns false if the calling 1.250 + // thread is not registered for unwinding. 1.251 + // 1.252 + // Up to aScannedFramesAllowed stack-scanned frames may be recovered. 1.253 + // 1.254 + // The calling thread must previously have registered itself via 1.255 + // RegisterUnwinderThread. 1.256 + void Unwind(/*OUT*/uintptr_t* aFramePCs, 1.257 + /*OUT*/uintptr_t* aFrameSPs, 1.258 + /*OUT*/size_t* aFramesUsed, 1.259 + /*OUT*/size_t* aScannedFramesAcquired, 1.260 + size_t aFramesAvail, 1.261 + size_t aScannedFramesAllowed, 1.262 + UnwindRegs* aStartRegs, StackImage* aStackImg); 1.263 + 1.264 + // The logging sink. Call to send debug strings to the caller- 1.265 + // specified destination. 1.266 + void (*mLog)(const char*); 1.267 + 1.268 +private: 1.269 + // Invalidate the caches. Requires mRWlock to be held for writing; 1.270 + // does not acquire it itself. 1.271 + void InvalidateCFICaches(); 1.272 + 1.273 + // The one-and-only lock, a reader-writer lock, for the library. 1.274 + LulRWLock* mRWlock; 1.275 + 1.276 + // The top level mapping from code address ranges to postprocessed 1.277 + // unwind info. Basically a sorted array of (addr, len, info) 1.278 + // records. Threads wishing to query this field must hold mRWlock 1.279 + // for reading. Threads wishing to modify this field must hold 1.280 + // mRWlock for writing. This field is updated by NotifyAfterMap and 1.281 + // NotifyBeforeUnmap. 1.282 + PriMap* mPriMap; 1.283 + 1.284 + // An auxiliary structure that records which address ranges are 1.285 + // mapped r-x, for the benefit of the stack scanner. Threads 1.286 + // wishing to query this field must hold mRWlock for reading. 1.287 + // Threads wishing to modify this field must hold mRWlock for 1.288 + // writing. 1.289 + SegArray* mSegArray; 1.290 + 1.291 + // The thread-local data: a mapping from threads to CFI-fast-caches. 1.292 + // Threads wishing to query this field must hold mRWlock for 1.293 + // reading. Threads wishing to modify this field must hold mRWlock 1.294 + // for writing. 1.295 + // 1.296 + // The CFICaches themselves are thread-local and can be both read 1.297 + // and written when mRWlock is held for reading. It would probably 1.298 + // be faster to use the pthread_{set,get}specific functions, but 1.299 + // also more difficult. This map is queried once per unwind, in 1.300 + // order to get hold of the CFI cache for a given thread. 1.301 + std::map<pthread_t, CFICache*> mCaches; 1.302 +}; 1.303 + 1.304 + 1.305 +// Run unit tests on an initialised, loaded-up LUL instance, and print 1.306 +// summary results on |aLUL|'s logging sink. Also return the number 1.307 +// of tests run in *aNTests and the number that passed in 1.308 +// *aNTestsPassed. 1.309 +void 1.310 +RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL); 1.311 + 1.312 +} // namespace lul 1.313 + 1.314 +#endif // LulMain_h