toolkit/crashreporter/google-breakpad/src/common/stabs_reader.h

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:e0777ab7c1e6
1 // -*- mode: c++ -*-
2
3 // Copyright (c) 2010 Google Inc. All Rights Reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
32
33 // stabs_reader.h: Define StabsReader, a parser for STABS debugging
34 // information. A description of the STABS debugging format can be
35 // found at:
36 //
37 // http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html
38 //
39 // The comments here assume you understand the format.
40 //
41 // This parser can handle big-endian and little-endian data, and the symbol
42 // values may be either 32 or 64 bits long. It handles both STABS in
43 // sections (as used on Linux) and STABS appearing directly in an
44 // a.out-like symbol table (as used in Darwin OS X Mach-O files).
45
46 #ifndef COMMON_STABS_READER_H__
47 #define COMMON_STABS_READER_H__
48
49 #include <stddef.h>
50 #include <stdint.h>
51
52 #ifdef HAVE_CONFIG_H
53 #include <config.h>
54 #endif
55
56 #ifdef HAVE_A_OUT_H
57 #include <a.out.h>
58 #endif
59 #ifdef HAVE_MACH_O_NLIST_H
60 #include <mach-o/nlist.h>
61 #endif
62
63 #include <string>
64 #include <vector>
65
66 #include "common/byte_cursor.h"
67 #include "common/using_std_string.h"
68
69 namespace google_breakpad {
70
71 class StabsHandler;
72
73 class StabsReader {
74 public:
75 // Create a reader for the STABS debug information whose .stab section is
76 // being traversed by ITERATOR, and whose .stabstr section is referred to
77 // by STRINGS. The reader will call the member functions of HANDLER to
78 // report the information it finds, when the reader's 'Process' member
79 // function is called.
80 //
81 // BIG_ENDIAN should be true if the entries in the .stab section are in
82 // big-endian form, or false if they are in little-endian form.
83 //
84 // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value'
85 // field in each entry in bytes.
86 //
87 // UNITIZED should be true if the STABS data is stored in units with
88 // N_UNDF headers. This is usually the case for STABS stored in sections,
89 // like .stab/.stabstr, and usually not the case for STABS stored in the
90 // actual symbol table; UNITIZED should be true when parsing Linux stabs,
91 // false when parsing Mac OS X STABS. For details, see:
92 // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html
93 //
94 // Note that, in ELF, the .stabstr section should be found using the
95 // 'sh_link' field of the .stab section header, not by name.
96 StabsReader(const uint8_t *stab, size_t stab_size,
97 const uint8_t *stabstr, size_t stabstr_size,
98 bool big_endian, size_t value_size, bool unitized,
99 StabsHandler *handler);
100
101 // Process the STABS data, calling the handler's member functions to
102 // report what we find. While the handler functions return true,
103 // continue to process until we reach the end of the section. If we
104 // processed the entire section and all handlers returned true,
105 // return true. If any handler returned false, return false.
106 //
107 // This is only meant to be called once per StabsReader instance;
108 // resuming a prior processing pass that stopped abruptly isn't supported.
109 bool Process();
110
111 private:
112
113 // An class for walking arrays of STABS entries. This isolates the main
114 // STABS reader from the exact format (size; endianness) of the entries
115 // themselves.
116 class EntryIterator {
117 public:
118 // The contents of a STABS entry, adjusted for the host's endianness,
119 // word size, 'struct nlist' layout, and so on.
120 struct Entry {
121 // True if this iterator has reached the end of the entry array. When
122 // this is set, the other members of this structure are not valid.
123 bool at_end;
124
125 // The number of this entry within the list.
126 size_t index;
127
128 // The current entry's name offset. This is the offset within the
129 // current compilation unit's strings, as establish by the N_UNDF entries.
130 size_t name_offset;
131
132 // The current entry's type, 'other' field, descriptor, and value.
133 unsigned char type;
134 unsigned char other;
135 short descriptor;
136 uint64_t value;
137 };
138
139 // Create a EntryIterator walking the entries in BUFFER. Treat the
140 // entries as big-endian if BIG_ENDIAN is true, as little-endian
141 // otherwise. Assume each entry has a 'value' field whose size is
142 // VALUE_SIZE.
143 //
144 // This would not be terribly clean to extend to other format variations,
145 // but it's enough to handle Linux and Mac, and we'd like STABS to die
146 // anyway.
147 //
148 // For the record: on Linux, STABS entry values are always 32 bits,
149 // regardless of the architecture address size (don't ask me why); on
150 // Mac, they are 32 or 64 bits long. Oddly, the section header's entry
151 // size for a Linux ELF .stab section varies according to the ELF class
152 // from 12 to 20 even as the actual entries remain unchanged.
153 EntryIterator(const ByteBuffer *buffer, bool big_endian, size_t value_size);
154
155 // Move to the next entry. This function's behavior is undefined if
156 // at_end() is true when it is called.
157 EntryIterator &operator++() { Fetch(); entry_.index++; return *this; }
158
159 // Dereferencing this iterator produces a reference to an Entry structure
160 // that holds the current entry's values. The entry is owned by this
161 // EntryIterator, and will be invalidated at the next call to operator++.
162 const Entry &operator*() const { return entry_; }
163 const Entry *operator->() const { return &entry_; }
164
165 private:
166 // Read the STABS entry at cursor_, and set entry_ appropriately.
167 void Fetch();
168
169 // The size of entries' value field, in bytes.
170 size_t value_size_;
171
172 // A byte cursor traversing buffer_.
173 ByteCursor cursor_;
174
175 // Values for the entry this iterator refers to.
176 Entry entry_;
177 };
178
179 // A source line, saved to be reported later.
180 struct Line {
181 uint64_t address;
182 const char *filename;
183 int number;
184 };
185
186 // Return the name of the current symbol.
187 const char *SymbolString();
188
189 // Process a compilation unit starting at symbol_. Return true
190 // to continue processing, or false to abort.
191 bool ProcessCompilationUnit();
192
193 // Process a function in current_source_file_ starting at symbol_.
194 // Return true to continue processing, or false to abort.
195 bool ProcessFunction();
196
197 // Process an exported function symbol.
198 // Return true to continue processing, or false to abort.
199 bool ProcessExtern();
200
201 // The STABS entries being parsed.
202 ByteBuffer entries_;
203
204 // The string section to which the entries refer.
205 ByteBuffer strings_;
206
207 // The iterator walking the STABS entries.
208 EntryIterator iterator_;
209
210 // True if the data is "unitized"; see the explanation in the comment for
211 // StabsReader::StabsReader.
212 bool unitized_;
213
214 StabsHandler *handler_;
215
216 // The offset of the current compilation unit's strings within stabstr_.
217 size_t string_offset_;
218
219 // The value string_offset_ should have for the next compilation unit,
220 // as established by N_UNDF entries.
221 size_t next_cu_string_offset_;
222
223 // The current source file name.
224 const char *current_source_file_;
225
226 // Mac OS X STABS place SLINE records before functions; we accumulate a
227 // vector of these until we see the FUN record, and then report them
228 // after the StartFunction call.
229 std::vector<Line> queued_lines_;
230 };
231
232 // Consumer-provided callback structure for the STABS reader. Clients
233 // of the STABS reader provide an instance of this structure. The
234 // reader then invokes the member functions of that instance to report
235 // the information it finds.
236 //
237 // The default definitions of the member functions do nothing, and return
238 // true so processing will continue.
239 class StabsHandler {
240 public:
241 StabsHandler() { }
242 virtual ~StabsHandler() { }
243
244 // Some general notes about the handler callback functions:
245
246 // Processing proceeds until the end of the .stabs section, or until
247 // one of these functions returns false.
248
249 // The addresses given are as reported in the STABS info, without
250 // regard for whether the module may be loaded at different
251 // addresses at different times (a shared library, say). When
252 // processing STABS from an ELF shared library, the addresses given
253 // all assume the library is loaded at its nominal load address.
254 // They are *not* offsets from the nominal load address. If you
255 // want offsets, you must subtract off the library's nominal load
256 // address.
257
258 // The arguments to these functions named FILENAME are all
259 // references to strings stored in the .stabstr section. Because
260 // both the Linux and Solaris linkers factor out duplicate strings
261 // from the .stabstr section, the consumer can assume that if two
262 // FILENAME values are different addresses, they represent different
263 // file names.
264 //
265 // Thus, it's safe to use (say) std::map<char *, ...>, which does
266 // string address comparisons, not string content comparisons.
267 // Since all the strings are in same array of characters --- the
268 // .stabstr section --- comparing their addresses produces
269 // predictable, if not lexicographically meaningful, results.
270
271 // Begin processing a compilation unit whose main source file is
272 // named FILENAME, and whose base address is ADDRESS. If
273 // BUILD_DIRECTORY is non-NULL, it is the name of the build
274 // directory in which the compilation occurred.
275 virtual bool StartCompilationUnit(const char *filename, uint64_t address,
276 const char *build_directory) {
277 return true;
278 }
279
280 // Finish processing the compilation unit. If ADDRESS is non-zero,
281 // it is the ending address of the compilation unit. If ADDRESS is
282 // zero, then the compilation unit's ending address is not
283 // available, and the consumer must infer it by other means.
284 virtual bool EndCompilationUnit(uint64_t address) { return true; }
285
286 // Begin processing a function named NAME, whose starting address is
287 // ADDRESS. This function belongs to the compilation unit that was
288 // most recently started but not ended.
289 //
290 // Note that, unlike filenames, NAME is not a pointer into the
291 // .stabstr section; this is because the name as it appears in the
292 // STABS data is followed by type information. The value passed to
293 // StartFunction is the function name alone.
294 //
295 // In languages that use name mangling, like C++, NAME is mangled.
296 virtual bool StartFunction(const string &name, uint64_t address) {
297 return true;
298 }
299
300 // Finish processing the function. If ADDRESS is non-zero, it is
301 // the ending address for the function. If ADDRESS is zero, then
302 // the function's ending address is not available, and the consumer
303 // must infer it by other means.
304 virtual bool EndFunction(uint64_t address) { return true; }
305
306 // Report that the code at ADDRESS is attributable to line NUMBER of
307 // the source file named FILENAME. The caller must infer the ending
308 // address of the line.
309 virtual bool Line(uint64_t address, const char *filename, int number) {
310 return true;
311 }
312
313 // Report that an exported function NAME is present at ADDRESS.
314 // The size of the function is unknown.
315 virtual bool Extern(const string &name, uint64_t address) {
316 return true;
317 }
318
319 // Report a warning. FORMAT is a printf-like format string,
320 // specifying how to format the subsequent arguments.
321 virtual void Warning(const char *format, ...) = 0;
322 };
323
324 } // namespace google_breakpad
325
326 #endif // COMMON_STABS_READER_H__

mercurial