diff -r 000000000000 -r 6474c204b198 tools/profiler/LulMain.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/profiler/LulMain.h Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,311 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulMain_h +#define LulMain_h + +#include // pthread_t + +#include + +#include "LulPlatformMacros.h" +#include "LulRWLock.h" + +// LUL: A Lightweight Unwind Library. +// This file provides the end-user (external) interface for LUL. + +// Some comments about naming in the implementation. These are safe +// to ignore if you are merely using LUL, but are important if you +// hack on its internals. +// +// Debuginfo readers in general have tended to use the word "address" +// to mean several different things. This sometimes makes them +// difficult to understand and maintain. LUL tries hard to avoid +// using the word "address" and instead uses the following more +// precise terms: +// +// * SVMA ("Stated Virtual Memory Address"): this is an address of a +// symbol (etc) as it is stated in the symbol table, or other +// metadata, of an object. Such values are typically small and +// start from zero or thereabouts, unless the object has been +// prelinked. +// +// * AVMA ("Actual Virtual Memory Address"): this is the address of a +// symbol (etc) in a running process, that is, once the associated +// object has been mapped into a process. Such values are typically +// much larger than SVMAs, since objects can get mapped arbitrarily +// far along the address space. +// +// * "Bias": the difference between AVMA and SVMA for a given symbol +// (specifically, AVMA - SVMA). The bias is always an integral +// number of pages. Once we know the bias for a given object's +// text section (for example), we can compute the AVMAs of all of +// its text symbols by adding the bias to their SVMAs. +// +// * "Image address": typically, to read debuginfo from an object we +// will temporarily mmap in the file so as to read symbol tables +// etc. Addresses in this temporary mapping are called "Image +// addresses". Note that the temporary mapping is entirely +// unrelated to the mappings of the file that the dynamic linker +// must perform merely in order to get the program to run. Hence +// image addresses are unrelated to either SVMAs or AVMAs. + + +namespace lul { + +// A machine word plus validity tag. +class TaggedUWord { +public: + // Construct a valid one. + TaggedUWord(uintptr_t w) + : mValue(w) + , mValid(true) + {} + + // Construct an invalid one. + TaggedUWord() + : mValue(0) + , mValid(false) + {} + + // Add in a second one. + void Add(TaggedUWord other) { + if (mValid && other.Valid()) { + mValue += other.Value(); + } else { + mValue = 0; + mValid = false; + } + } + + // Is it word-aligned? + bool IsAligned() const { + return mValid && (mValue & (sizeof(uintptr_t)-1)) == 0; + } + + uintptr_t Value() const { return mValue; } + bool Valid() const { return mValid; } + +private: + uintptr_t mValue; + bool mValid; +}; + + +// The registers, with validity tags, that will be unwound. + +struct UnwindRegs { +#if defined(LUL_ARCH_arm) + TaggedUWord r7; + TaggedUWord r11; + TaggedUWord r12; + TaggedUWord r13; + TaggedUWord r14; + TaggedUWord r15; +#elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + TaggedUWord xbp; + TaggedUWord xsp; + TaggedUWord xip; +#else +# error "Unknown plat" +#endif +}; + + +// The maximum number of bytes in a stack snapshot. This can be +// increased if necessary, but larger values cost performance, since a +// stack snapshot needs to be copied between sampling and worker +// threads for each snapshot. In practice 32k seems to be enough +// to get good backtraces. +static const size_t N_STACK_BYTES = 32768; + +// The stack chunk image that will be unwound. +struct StackImage { + // [start_avma, +len) specify the address range in the buffer. + // Obviously we require 0 <= len <= N_STACK_BYTES. + uintptr_t mStartAvma; + size_t mLen; + uint8_t mContents[N_STACK_BYTES]; +}; + + +// The core unwinder library class. Just one of these is needed, and +// it can be shared by multiple unwinder threads. +// +// Access to the library is mediated by a single reader-writer lock. +// All attempts to change the library's internal shared state -- that +// is, loading or unloading unwind info -- are forced single-threaded +// by causing the called routine to acquire a write-lock. Unwind +// requests do not change the library's internal shared state and +// therefore require only a read-lock. Hence multiple threads can +// unwind in parallel. +// +// The library needs to maintain state which is private to each +// unwinder thread -- the CFI (Dwarf Call Frame Information) fast +// cache. Hence unwinder threads first need to register with the +// library, so their identities are known. Also, for maximum +// effectiveness of the CFI caching, it is preferable to have a small +// number of very-busy unwinder threads rather than a large number of +// mostly-idle unwinder threads. +// +// None of the methods may be safely called from within a signal +// handler, since this risks deadlock. In particular this means +// a thread may not unwind itself from within a signal handler +// frame. It might be safe to call Unwind() on its own stack +// from not-inside a signal frame, although even that cannot be +// guaranteed deadlock free. + +class PriMap; +class SegArray; +class CFICache; + +class LUL { +public: + // Create; supply a logging sink. Initialises the rw-lock. + LUL(void (*aLog)(const char*)); + + // Destroy. This acquires mRWlock for writing. By doing that, waits + // for all unwinder threads to finish any Unwind() calls they may be + // in. All resources are freed and all registered unwinder threads + // are deregistered. + ~LUL(); + + // Notify of a new r-x mapping, and load the associated unwind info. + // The filename is strdup'd and used for debug printing. If + // aMappedImage is NULL, this function will mmap/munmap the file + // itself, so as to be able to read the unwind info. If + // aMappedImage is non-NULL then it is assumed to point to a + // called-supplied and caller-managed mapped image of the file. + // + // Acquires mRWlock for writing. This must be called only after the + // code area in question really has been mapped. + void NotifyAfterMap(uintptr_t aRXavma, size_t aSize, + const char* aFileName, const void* aMappedImage); + + // In rare cases we know an executable area exists but don't know + // what the associated file is. This call notifies LUL of such + // areas. This is important for correct functioning of stack + // scanning and of the x86-{linux,android} special-case + // __kernel_syscall function handling. Acquires mRWlock for + // writing. This must be called only after the code area in + // question really has been mapped. + void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize); + + // Notify that a mapped area has been unmapped; discard any + // associated unwind info. Acquires mRWlock for writing. Note that + // to avoid segfaulting the stack-scan unwinder, which inspects code + // areas, this must be called before the code area in question is + // really unmapped. Note that, unlike NotifyAfterMap(), this + // function takes the start and end addresses of the range to be + // unmapped, rather than a start and a length parameter. This is so + // as to make it possible to notify an unmap for the entire address + // space using a single call. + void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax); + + // Apply NotifyBeforeUnmap to the entire address space. This causes + // LUL to discard all unwind and executable-area information for the + // entire address space. + void NotifyBeforeUnmapAll() { + NotifyBeforeUnmap(0, UINTPTR_MAX); + } + + // Returns the number of mappings currently registered. Acquires + // mRWlock for writing. + size_t CountMappings(); + + // Register the calling thread for unwinding. Acquires mRWlock for + // writing. + void RegisterUnwinderThread(); + + // Unwind |aStackImg| starting with the context in |aStartRegs|. + // Write the number of frames recovered in *aFramesUsed. Put + // the PC values in aFramePCs[0 .. *aFramesUsed-1] and + // the SP values in aFrameSPs[0 .. *aFramesUsed-1]. + // |aFramesAvail| is the size of the two output arrays and hence the + // largest possible value of *aFramesUsed. PC values are always + // valid, and the unwind will stop when the PC becomes invalid, but + // the SP values might be invalid, in which case the value zero will + // be written in the relevant frameSPs[] slot. + // + // Unwinding may optionally use stack scanning. The maximum number + // of frames that may be recovered by stack scanning is + // |aScannedFramesAllowed| and the actual number recovered is + // written into *aScannedFramesAcquired. |aScannedFramesAllowed| + // must be less than or equal to |aFramesAvail|. + // + // This function assumes that the SP values increase as it unwinds + // away from the innermost frame -- that is, that the stack grows + // down. It monitors SP values as it unwinds to check they + // decrease, so as to avoid looping on corrupted stacks. + // + // Acquires mRWlock for reading. Hence multiple threads may unwind + // at once, but no thread may be unwinding whilst the library loads + // or discards unwind information. Returns false if the calling + // thread is not registered for unwinding. + // + // Up to aScannedFramesAllowed stack-scanned frames may be recovered. + // + // The calling thread must previously have registered itself via + // RegisterUnwinderThread. + void Unwind(/*OUT*/uintptr_t* aFramePCs, + /*OUT*/uintptr_t* aFrameSPs, + /*OUT*/size_t* aFramesUsed, + /*OUT*/size_t* aScannedFramesAcquired, + size_t aFramesAvail, + size_t aScannedFramesAllowed, + UnwindRegs* aStartRegs, StackImage* aStackImg); + + // The logging sink. Call to send debug strings to the caller- + // specified destination. + void (*mLog)(const char*); + +private: + // Invalidate the caches. Requires mRWlock to be held for writing; + // does not acquire it itself. + void InvalidateCFICaches(); + + // The one-and-only lock, a reader-writer lock, for the library. + LulRWLock* mRWlock; + + // The top level mapping from code address ranges to postprocessed + // unwind info. Basically a sorted array of (addr, len, info) + // records. Threads wishing to query this field must hold mRWlock + // for reading. Threads wishing to modify this field must hold + // mRWlock for writing. This field is updated by NotifyAfterMap and + // NotifyBeforeUnmap. + PriMap* mPriMap; + + // An auxiliary structure that records which address ranges are + // mapped r-x, for the benefit of the stack scanner. Threads + // wishing to query this field must hold mRWlock for reading. + // Threads wishing to modify this field must hold mRWlock for + // writing. + SegArray* mSegArray; + + // The thread-local data: a mapping from threads to CFI-fast-caches. + // Threads wishing to query this field must hold mRWlock for + // reading. Threads wishing to modify this field must hold mRWlock + // for writing. + // + // The CFICaches themselves are thread-local and can be both read + // and written when mRWlock is held for reading. It would probably + // be faster to use the pthread_{set,get}specific functions, but + // also more difficult. This map is queried once per unwind, in + // order to get hold of the CFI cache for a given thread. + std::map mCaches; +}; + + +// Run unit tests on an initialised, loaded-up LUL instance, and print +// summary results on |aLUL|'s logging sink. Also return the number +// of tests run in *aNTests and the number that passed in +// *aNTestsPassed. +void +RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL); + +} // namespace lul + +#endif // LulMain_h