|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #ifndef LulMain_h |
|
8 #define LulMain_h |
|
9 |
|
10 #include <pthread.h> // pthread_t |
|
11 |
|
12 #include <map> |
|
13 |
|
14 #include "LulPlatformMacros.h" |
|
15 #include "LulRWLock.h" |
|
16 |
|
17 // LUL: A Lightweight Unwind Library. |
|
18 // This file provides the end-user (external) interface for LUL. |
|
19 |
|
20 // Some comments about naming in the implementation. These are safe |
|
21 // to ignore if you are merely using LUL, but are important if you |
|
22 // hack on its internals. |
|
23 // |
|
24 // Debuginfo readers in general have tended to use the word "address" |
|
25 // to mean several different things. This sometimes makes them |
|
26 // difficult to understand and maintain. LUL tries hard to avoid |
|
27 // using the word "address" and instead uses the following more |
|
28 // precise terms: |
|
29 // |
|
30 // * SVMA ("Stated Virtual Memory Address"): this is an address of a |
|
31 // symbol (etc) as it is stated in the symbol table, or other |
|
32 // metadata, of an object. Such values are typically small and |
|
33 // start from zero or thereabouts, unless the object has been |
|
34 // prelinked. |
|
35 // |
|
36 // * AVMA ("Actual Virtual Memory Address"): this is the address of a |
|
37 // symbol (etc) in a running process, that is, once the associated |
|
38 // object has been mapped into a process. Such values are typically |
|
39 // much larger than SVMAs, since objects can get mapped arbitrarily |
|
40 // far along the address space. |
|
41 // |
|
42 // * "Bias": the difference between AVMA and SVMA for a given symbol |
|
43 // (specifically, AVMA - SVMA). The bias is always an integral |
|
44 // number of pages. Once we know the bias for a given object's |
|
45 // text section (for example), we can compute the AVMAs of all of |
|
46 // its text symbols by adding the bias to their SVMAs. |
|
47 // |
|
48 // * "Image address": typically, to read debuginfo from an object we |
|
49 // will temporarily mmap in the file so as to read symbol tables |
|
50 // etc. Addresses in this temporary mapping are called "Image |
|
51 // addresses". Note that the temporary mapping is entirely |
|
52 // unrelated to the mappings of the file that the dynamic linker |
|
53 // must perform merely in order to get the program to run. Hence |
|
54 // image addresses are unrelated to either SVMAs or AVMAs. |
|
55 |
|
56 |
|
57 namespace lul { |
|
58 |
|
59 // A machine word plus validity tag. |
|
60 class TaggedUWord { |
|
61 public: |
|
62 // Construct a valid one. |
|
63 TaggedUWord(uintptr_t w) |
|
64 : mValue(w) |
|
65 , mValid(true) |
|
66 {} |
|
67 |
|
68 // Construct an invalid one. |
|
69 TaggedUWord() |
|
70 : mValue(0) |
|
71 , mValid(false) |
|
72 {} |
|
73 |
|
74 // Add in a second one. |
|
75 void Add(TaggedUWord other) { |
|
76 if (mValid && other.Valid()) { |
|
77 mValue += other.Value(); |
|
78 } else { |
|
79 mValue = 0; |
|
80 mValid = false; |
|
81 } |
|
82 } |
|
83 |
|
84 // Is it word-aligned? |
|
85 bool IsAligned() const { |
|
86 return mValid && (mValue & (sizeof(uintptr_t)-1)) == 0; |
|
87 } |
|
88 |
|
89 uintptr_t Value() const { return mValue; } |
|
90 bool Valid() const { return mValid; } |
|
91 |
|
92 private: |
|
93 uintptr_t mValue; |
|
94 bool mValid; |
|
95 }; |
|
96 |
|
97 |
|
98 // The registers, with validity tags, that will be unwound. |
|
99 |
|
100 struct UnwindRegs { |
|
101 #if defined(LUL_ARCH_arm) |
|
102 TaggedUWord r7; |
|
103 TaggedUWord r11; |
|
104 TaggedUWord r12; |
|
105 TaggedUWord r13; |
|
106 TaggedUWord r14; |
|
107 TaggedUWord r15; |
|
108 #elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) |
|
109 TaggedUWord xbp; |
|
110 TaggedUWord xsp; |
|
111 TaggedUWord xip; |
|
112 #else |
|
113 # error "Unknown plat" |
|
114 #endif |
|
115 }; |
|
116 |
|
117 |
|
118 // The maximum number of bytes in a stack snapshot. This can be |
|
119 // increased if necessary, but larger values cost performance, since a |
|
120 // stack snapshot needs to be copied between sampling and worker |
|
121 // threads for each snapshot. In practice 32k seems to be enough |
|
122 // to get good backtraces. |
|
123 static const size_t N_STACK_BYTES = 32768; |
|
124 |
|
125 // The stack chunk image that will be unwound. |
|
126 struct StackImage { |
|
127 // [start_avma, +len) specify the address range in the buffer. |
|
128 // Obviously we require 0 <= len <= N_STACK_BYTES. |
|
129 uintptr_t mStartAvma; |
|
130 size_t mLen; |
|
131 uint8_t mContents[N_STACK_BYTES]; |
|
132 }; |
|
133 |
|
134 |
|
135 // The core unwinder library class. Just one of these is needed, and |
|
136 // it can be shared by multiple unwinder threads. |
|
137 // |
|
138 // Access to the library is mediated by a single reader-writer lock. |
|
139 // All attempts to change the library's internal shared state -- that |
|
140 // is, loading or unloading unwind info -- are forced single-threaded |
|
141 // by causing the called routine to acquire a write-lock. Unwind |
|
142 // requests do not change the library's internal shared state and |
|
143 // therefore require only a read-lock. Hence multiple threads can |
|
144 // unwind in parallel. |
|
145 // |
|
146 // The library needs to maintain state which is private to each |
|
147 // unwinder thread -- the CFI (Dwarf Call Frame Information) fast |
|
148 // cache. Hence unwinder threads first need to register with the |
|
149 // library, so their identities are known. Also, for maximum |
|
150 // effectiveness of the CFI caching, it is preferable to have a small |
|
151 // number of very-busy unwinder threads rather than a large number of |
|
152 // mostly-idle unwinder threads. |
|
153 // |
|
154 // None of the methods may be safely called from within a signal |
|
155 // handler, since this risks deadlock. In particular this means |
|
156 // a thread may not unwind itself from within a signal handler |
|
157 // frame. It might be safe to call Unwind() on its own stack |
|
158 // from not-inside a signal frame, although even that cannot be |
|
159 // guaranteed deadlock free. |
|
160 |
|
161 class PriMap; |
|
162 class SegArray; |
|
163 class CFICache; |
|
164 |
|
165 class LUL { |
|
166 public: |
|
167 // Create; supply a logging sink. Initialises the rw-lock. |
|
168 LUL(void (*aLog)(const char*)); |
|
169 |
|
170 // Destroy. This acquires mRWlock for writing. By doing that, waits |
|
171 // for all unwinder threads to finish any Unwind() calls they may be |
|
172 // in. All resources are freed and all registered unwinder threads |
|
173 // are deregistered. |
|
174 ~LUL(); |
|
175 |
|
176 // Notify of a new r-x mapping, and load the associated unwind info. |
|
177 // The filename is strdup'd and used for debug printing. If |
|
178 // aMappedImage is NULL, this function will mmap/munmap the file |
|
179 // itself, so as to be able to read the unwind info. If |
|
180 // aMappedImage is non-NULL then it is assumed to point to a |
|
181 // called-supplied and caller-managed mapped image of the file. |
|
182 // |
|
183 // Acquires mRWlock for writing. This must be called only after the |
|
184 // code area in question really has been mapped. |
|
185 void NotifyAfterMap(uintptr_t aRXavma, size_t aSize, |
|
186 const char* aFileName, const void* aMappedImage); |
|
187 |
|
188 // In rare cases we know an executable area exists but don't know |
|
189 // what the associated file is. This call notifies LUL of such |
|
190 // areas. This is important for correct functioning of stack |
|
191 // scanning and of the x86-{linux,android} special-case |
|
192 // __kernel_syscall function handling. Acquires mRWlock for |
|
193 // writing. This must be called only after the code area in |
|
194 // question really has been mapped. |
|
195 void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize); |
|
196 |
|
197 // Notify that a mapped area has been unmapped; discard any |
|
198 // associated unwind info. Acquires mRWlock for writing. Note that |
|
199 // to avoid segfaulting the stack-scan unwinder, which inspects code |
|
200 // areas, this must be called before the code area in question is |
|
201 // really unmapped. Note that, unlike NotifyAfterMap(), this |
|
202 // function takes the start and end addresses of the range to be |
|
203 // unmapped, rather than a start and a length parameter. This is so |
|
204 // as to make it possible to notify an unmap for the entire address |
|
205 // space using a single call. |
|
206 void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax); |
|
207 |
|
208 // Apply NotifyBeforeUnmap to the entire address space. This causes |
|
209 // LUL to discard all unwind and executable-area information for the |
|
210 // entire address space. |
|
211 void NotifyBeforeUnmapAll() { |
|
212 NotifyBeforeUnmap(0, UINTPTR_MAX); |
|
213 } |
|
214 |
|
215 // Returns the number of mappings currently registered. Acquires |
|
216 // mRWlock for writing. |
|
217 size_t CountMappings(); |
|
218 |
|
219 // Register the calling thread for unwinding. Acquires mRWlock for |
|
220 // writing. |
|
221 void RegisterUnwinderThread(); |
|
222 |
|
223 // Unwind |aStackImg| starting with the context in |aStartRegs|. |
|
224 // Write the number of frames recovered in *aFramesUsed. Put |
|
225 // the PC values in aFramePCs[0 .. *aFramesUsed-1] and |
|
226 // the SP values in aFrameSPs[0 .. *aFramesUsed-1]. |
|
227 // |aFramesAvail| is the size of the two output arrays and hence the |
|
228 // largest possible value of *aFramesUsed. PC values are always |
|
229 // valid, and the unwind will stop when the PC becomes invalid, but |
|
230 // the SP values might be invalid, in which case the value zero will |
|
231 // be written in the relevant frameSPs[] slot. |
|
232 // |
|
233 // Unwinding may optionally use stack scanning. The maximum number |
|
234 // of frames that may be recovered by stack scanning is |
|
235 // |aScannedFramesAllowed| and the actual number recovered is |
|
236 // written into *aScannedFramesAcquired. |aScannedFramesAllowed| |
|
237 // must be less than or equal to |aFramesAvail|. |
|
238 // |
|
239 // This function assumes that the SP values increase as it unwinds |
|
240 // away from the innermost frame -- that is, that the stack grows |
|
241 // down. It monitors SP values as it unwinds to check they |
|
242 // decrease, so as to avoid looping on corrupted stacks. |
|
243 // |
|
244 // Acquires mRWlock for reading. Hence multiple threads may unwind |
|
245 // at once, but no thread may be unwinding whilst the library loads |
|
246 // or discards unwind information. Returns false if the calling |
|
247 // thread is not registered for unwinding. |
|
248 // |
|
249 // Up to aScannedFramesAllowed stack-scanned frames may be recovered. |
|
250 // |
|
251 // The calling thread must previously have registered itself via |
|
252 // RegisterUnwinderThread. |
|
253 void Unwind(/*OUT*/uintptr_t* aFramePCs, |
|
254 /*OUT*/uintptr_t* aFrameSPs, |
|
255 /*OUT*/size_t* aFramesUsed, |
|
256 /*OUT*/size_t* aScannedFramesAcquired, |
|
257 size_t aFramesAvail, |
|
258 size_t aScannedFramesAllowed, |
|
259 UnwindRegs* aStartRegs, StackImage* aStackImg); |
|
260 |
|
261 // The logging sink. Call to send debug strings to the caller- |
|
262 // specified destination. |
|
263 void (*mLog)(const char*); |
|
264 |
|
265 private: |
|
266 // Invalidate the caches. Requires mRWlock to be held for writing; |
|
267 // does not acquire it itself. |
|
268 void InvalidateCFICaches(); |
|
269 |
|
270 // The one-and-only lock, a reader-writer lock, for the library. |
|
271 LulRWLock* mRWlock; |
|
272 |
|
273 // The top level mapping from code address ranges to postprocessed |
|
274 // unwind info. Basically a sorted array of (addr, len, info) |
|
275 // records. Threads wishing to query this field must hold mRWlock |
|
276 // for reading. Threads wishing to modify this field must hold |
|
277 // mRWlock for writing. This field is updated by NotifyAfterMap and |
|
278 // NotifyBeforeUnmap. |
|
279 PriMap* mPriMap; |
|
280 |
|
281 // An auxiliary structure that records which address ranges are |
|
282 // mapped r-x, for the benefit of the stack scanner. Threads |
|
283 // wishing to query this field must hold mRWlock for reading. |
|
284 // Threads wishing to modify this field must hold mRWlock for |
|
285 // writing. |
|
286 SegArray* mSegArray; |
|
287 |
|
288 // The thread-local data: a mapping from threads to CFI-fast-caches. |
|
289 // Threads wishing to query this field must hold mRWlock for |
|
290 // reading. Threads wishing to modify this field must hold mRWlock |
|
291 // for writing. |
|
292 // |
|
293 // The CFICaches themselves are thread-local and can be both read |
|
294 // and written when mRWlock is held for reading. It would probably |
|
295 // be faster to use the pthread_{set,get}specific functions, but |
|
296 // also more difficult. This map is queried once per unwind, in |
|
297 // order to get hold of the CFI cache for a given thread. |
|
298 std::map<pthread_t, CFICache*> mCaches; |
|
299 }; |
|
300 |
|
301 |
|
302 // Run unit tests on an initialised, loaded-up LUL instance, and print |
|
303 // summary results on |aLUL|'s logging sink. Also return the number |
|
304 // of tests run in *aNTests and the number that passed in |
|
305 // *aNTestsPassed. |
|
306 void |
|
307 RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL); |
|
308 |
|
309 } // namespace lul |
|
310 |
|
311 #endif // LulMain_h |