|
1 // -*- mode: C++ -*- |
|
2 |
|
3 // Copyright (c) 2010, Google Inc. |
|
4 // All rights reserved. |
|
5 // |
|
6 // Redistribution and use in source and binary forms, with or without |
|
7 // modification, are permitted provided that the following conditions are |
|
8 // met: |
|
9 // |
|
10 // * Redistributions of source code must retain the above copyright |
|
11 // notice, this list of conditions and the following disclaimer. |
|
12 // * Redistributions in binary form must reproduce the above |
|
13 // copyright notice, this list of conditions and the following disclaimer |
|
14 // in the documentation and/or other materials provided with the |
|
15 // distribution. |
|
16 // * Neither the name of Google Inc. nor the names of its |
|
17 // contributors may be used to endorse or promote products derived from |
|
18 // this software without specific prior written permission. |
|
19 // |
|
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
31 |
|
32 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
|
33 |
|
34 // test-assembler.h: interface to class for building complex binary streams. |
|
35 |
|
36 // To test the Breakpad symbol dumper and processor thoroughly, for |
|
37 // all combinations of host system and minidump processor |
|
38 // architecture, we need to be able to easily generate complex test |
|
39 // data like debugging information and minidump files. |
|
40 // |
|
41 // For example, if we want our unit tests to provide full code |
|
42 // coverage for stack walking, it may be difficult to persuade the |
|
43 // compiler to generate every possible sort of stack walking |
|
44 // information that we want to support; there are probably DWARF CFI |
|
45 // opcodes that GCC never emits. Similarly, if we want to test our |
|
46 // error handling, we will need to generate damaged minidumps or |
|
47 // debugging information that (we hope) the client or compiler will |
|
48 // never produce on its own. |
|
49 // |
|
50 // google_breakpad::TestAssembler provides a predictable and |
|
51 // (relatively) simple way to generate complex formatted data streams |
|
52 // like minidumps and CFI. Furthermore, because TestAssembler is |
|
53 // portable, developers without access to (say) Visual Studio or a |
|
54 // SPARC assembler can still work on test data for those targets. |
|
55 |
|
56 #ifndef PROCESSOR_TEST_ASSEMBLER_H_ |
|
57 #define PROCESSOR_TEST_ASSEMBLER_H_ |
|
58 |
|
59 #include <list> |
|
60 #include <vector> |
|
61 #include <string> |
|
62 |
|
63 #include "common/using_std_string.h" |
|
64 #include "google_breakpad/common/breakpad_types.h" |
|
65 |
|
66 namespace google_breakpad { |
|
67 |
|
68 using std::list; |
|
69 using std::vector; |
|
70 |
|
71 namespace test_assembler { |
|
72 |
|
73 // A Label represents a value not yet known that we need to store in a |
|
74 // section. As long as all the labels a section refers to are defined |
|
75 // by the time we retrieve its contents as bytes, we can use undefined |
|
76 // labels freely in that section's construction. |
|
77 // |
|
78 // A label can be in one of three states: |
|
79 // - undefined, |
|
80 // - defined as the sum of some other label and a constant, or |
|
81 // - a constant. |
|
82 // |
|
83 // A label's value never changes, but it can accumulate constraints. |
|
84 // Adding labels and integers is permitted, and yields a label. |
|
85 // Subtracting a constant from a label is permitted, and also yields a |
|
86 // label. Subtracting two labels that have some relationship to each |
|
87 // other is permitted, and yields a constant. |
|
88 // |
|
89 // For example: |
|
90 // |
|
91 // Label a; // a's value is undefined |
|
92 // Label b; // b's value is undefined |
|
93 // { |
|
94 // Label c = a + 4; // okay, even though a's value is unknown |
|
95 // b = c + 4; // also okay; b is now a+8 |
|
96 // } |
|
97 // Label d = b - 2; // okay; d == a+6, even though c is gone |
|
98 // d.Value(); // error: d's value is not yet known |
|
99 // d - a; // is 6, even though their values are not known |
|
100 // a = 12; // now b == 20, and d == 18 |
|
101 // d.Value(); // 18: no longer an error |
|
102 // b.Value(); // 20 |
|
103 // d = 10; // error: d is already defined. |
|
104 // |
|
105 // Label objects' lifetimes are unconstrained: notice that, in the |
|
106 // above example, even though a and b are only related through c, and |
|
107 // c goes out of scope, the assignment to a sets b's value as well. In |
|
108 // particular, it's not necessary to ensure that a Label lives beyond |
|
109 // Sections that refer to it. |
|
110 class Label { |
|
111 public: |
|
112 Label(); // An undefined label. |
|
113 Label(uint64_t value); // A label with a fixed value |
|
114 Label(const Label &value); // A label equal to another. |
|
115 ~Label(); |
|
116 |
|
117 // Return this label's value; it must be known. |
|
118 // |
|
119 // Providing this as a cast operator is nifty, but the conversions |
|
120 // happen in unexpected places. In particular, ISO C++ says that |
|
121 // Label + size_t becomes ambigious, because it can't decide whether |
|
122 // to convert the Label to a uint64_t and then to a size_t, or use |
|
123 // the overloaded operator that returns a new label, even though the |
|
124 // former could fail if the label is not yet defined and the latter won't. |
|
125 uint64_t Value() const; |
|
126 |
|
127 Label &operator=(uint64_t value); |
|
128 Label &operator=(const Label &value); |
|
129 Label operator+(uint64_t addend) const; |
|
130 Label operator-(uint64_t subtrahend) const; |
|
131 uint64_t operator-(const Label &subtrahend) const; |
|
132 |
|
133 // We could also provide == and != that work on undefined, but |
|
134 // related, labels. |
|
135 |
|
136 // Return true if this label's value is known. If VALUE_P is given, |
|
137 // set *VALUE_P to the known value if returning true. |
|
138 bool IsKnownConstant(uint64_t *value_p = NULL) const; |
|
139 |
|
140 // Return true if the offset from LABEL to this label is known. If |
|
141 // OFFSET_P is given, set *OFFSET_P to the offset when returning true. |
|
142 // |
|
143 // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m', |
|
144 // except that it also returns a value indicating whether the |
|
145 // subtraction is possible given what we currently know of l and m. |
|
146 // It can be possible even if we don't know l and m's values. For |
|
147 // example: |
|
148 // |
|
149 // Label l, m; |
|
150 // m = l + 10; |
|
151 // l.IsKnownConstant(); // false |
|
152 // m.IsKnownConstant(); // false |
|
153 // uint64_t d; |
|
154 // l.IsKnownOffsetFrom(m, &d); // true, and sets d to -10. |
|
155 // l-m // -10 |
|
156 // m-l // 10 |
|
157 // m.Value() // error: m's value is not known |
|
158 bool IsKnownOffsetFrom(const Label &label, uint64_t *offset_p = NULL) const; |
|
159 |
|
160 private: |
|
161 // A label's value, or if that is not yet known, how the value is |
|
162 // related to other labels' values. A binding may be: |
|
163 // - a known constant, |
|
164 // - constrained to be equal to some other binding plus a constant, or |
|
165 // - unconstrained, and free to take on any value. |
|
166 // |
|
167 // Many labels may point to a single binding, and each binding may |
|
168 // refer to another, so bindings and labels form trees whose leaves |
|
169 // are labels, whose interior nodes (and roots) are bindings, and |
|
170 // where links point from children to parents. Bindings are |
|
171 // reference counted, allowing labels to be lightweight, copyable, |
|
172 // assignable, placed in containers, and so on. |
|
173 class Binding { |
|
174 public: |
|
175 Binding(); |
|
176 Binding(uint64_t addend); |
|
177 ~Binding(); |
|
178 |
|
179 // Increment our reference count. |
|
180 void Acquire() { reference_count_++; }; |
|
181 // Decrement our reference count, and return true if it is zero. |
|
182 bool Release() { return --reference_count_ == 0; } |
|
183 |
|
184 // Set this binding to be equal to BINDING + ADDEND. If BINDING is |
|
185 // NULL, then set this binding to the known constant ADDEND. |
|
186 // Update every binding on this binding's chain to point directly |
|
187 // to BINDING, or to be a constant, with addends adjusted |
|
188 // appropriately. |
|
189 void Set(Binding *binding, uint64_t value); |
|
190 |
|
191 // Return what we know about the value of this binding. |
|
192 // - If this binding's value is a known constant, set BASE to |
|
193 // NULL, and set ADDEND to its value. |
|
194 // - If this binding is not a known constant but related to other |
|
195 // bindings, set BASE to the binding at the end of the relation |
|
196 // chain (which will always be unconstrained), and set ADDEND to the |
|
197 // value to add to that binding's value to get this binding's |
|
198 // value. |
|
199 // - If this binding is unconstrained, set BASE to this, and leave |
|
200 // ADDEND unchanged. |
|
201 void Get(Binding **base, uint64_t *addend); |
|
202 |
|
203 private: |
|
204 // There are three cases: |
|
205 // |
|
206 // - A binding representing a known constant value has base_ NULL, |
|
207 // and addend_ equal to the value. |
|
208 // |
|
209 // - A binding representing a completely unconstrained value has |
|
210 // base_ pointing to this; addend_ is unused. |
|
211 // |
|
212 // - A binding whose value is related to some other binding's |
|
213 // value has base_ pointing to that other binding, and addend_ |
|
214 // set to the amount to add to that binding's value to get this |
|
215 // binding's value. We only represent relationships of the form |
|
216 // x = y+c. |
|
217 // |
|
218 // Thus, the bind_ links form a chain terminating in either a |
|
219 // known constant value or a completely unconstrained value. Most |
|
220 // operations on bindings do path compression: they change every |
|
221 // binding on the chain to point directly to the final value, |
|
222 // adjusting addends as appropriate. |
|
223 Binding *base_; |
|
224 uint64_t addend_; |
|
225 |
|
226 // The number of Labels and Bindings pointing to this binding. |
|
227 // (When a binding points to itself, indicating a completely |
|
228 // unconstrained binding, that doesn't count as a reference.) |
|
229 int reference_count_; |
|
230 }; |
|
231 |
|
232 // This label's value. |
|
233 Binding *value_; |
|
234 }; |
|
235 |
|
236 inline Label operator+(uint64_t a, const Label &l) { return l + a; } |
|
237 // Note that int-Label isn't defined, as negating a Label is not an |
|
238 // operation we support. |
|
239 |
|
240 // Conventions for representing larger numbers as sequences of bytes. |
|
241 enum Endianness { |
|
242 kBigEndian, // Big-endian: the most significant byte comes first. |
|
243 kLittleEndian, // Little-endian: the least significant byte comes first. |
|
244 kUnsetEndian, // used internally |
|
245 }; |
|
246 |
|
247 // A section is a sequence of bytes, constructed by appending bytes |
|
248 // to the end. Sections have a convenient and flexible set of member |
|
249 // functions for appending data in various formats: big-endian and |
|
250 // little-endian signed and unsigned values of different sizes; |
|
251 // LEB128 and ULEB128 values (see below), and raw blocks of bytes. |
|
252 // |
|
253 // If you need to append a value to a section that is not convenient |
|
254 // to compute immediately, you can create a label, append the |
|
255 // label's value to the section, and then set the label's value |
|
256 // later, when it's convenient to do so. Once a label's value is |
|
257 // known, the section class takes care of updating all previously |
|
258 // appended references to it. |
|
259 // |
|
260 // Once all the labels to which a section refers have had their |
|
261 // values determined, you can get a copy of the section's contents |
|
262 // as a string. |
|
263 // |
|
264 // Note that there is no specified "start of section" label. This is |
|
265 // because there are typically several different meanings for "the |
|
266 // start of a section": the offset of the section within an object |
|
267 // file, the address in memory at which the section's content appear, |
|
268 // and so on. It's up to the code that uses the Section class to |
|
269 // keep track of these explicitly, as they depend on the application. |
|
270 class Section { |
|
271 public: |
|
272 Section(Endianness endianness = kUnsetEndian) |
|
273 : endianness_(endianness) { }; |
|
274 |
|
275 // A base class destructor should be either public and virtual, |
|
276 // or protected and nonvirtual. |
|
277 virtual ~Section() { }; |
|
278 |
|
279 // Set the default endianness of this section to ENDIANNESS. This |
|
280 // sets the behavior of the D<N> appending functions. If the |
|
281 // assembler's default endianness was set, this is the |
|
282 void set_endianness(Endianness endianness) { |
|
283 endianness_ = endianness; |
|
284 } |
|
285 |
|
286 // Return the default endianness of this section. |
|
287 Endianness endianness() const { return endianness_; } |
|
288 |
|
289 // Append the SIZE bytes at DATA or the contents of STRING to the |
|
290 // end of this section. Return a reference to this section. |
|
291 Section &Append(const uint8_t *data, size_t size) { |
|
292 contents_.append(reinterpret_cast<const char *>(data), size); |
|
293 return *this; |
|
294 }; |
|
295 Section &Append(const string &data) { |
|
296 contents_.append(data); |
|
297 return *this; |
|
298 }; |
|
299 |
|
300 // Append SIZE copies of BYTE to the end of this section. Return a |
|
301 // reference to this section. |
|
302 Section &Append(size_t size, uint8_t byte) { |
|
303 contents_.append(size, (char) byte); |
|
304 return *this; |
|
305 } |
|
306 |
|
307 // Append NUMBER to this section. ENDIANNESS is the endianness to |
|
308 // use to write the number. SIZE is the length of the number in |
|
309 // bytes. Return a reference to this section. |
|
310 Section &Append(Endianness endianness, size_t size, uint64_t number); |
|
311 Section &Append(Endianness endianness, size_t size, const Label &label); |
|
312 |
|
313 // Append SECTION to the end of this section. The labels SECTION |
|
314 // refers to need not be defined yet. |
|
315 // |
|
316 // Note that this has no effect on any Labels' values, or on |
|
317 // SECTION. If placing SECTION within 'this' provides new |
|
318 // constraints on existing labels' values, then it's up to the |
|
319 // caller to fiddle with those labels as needed. |
|
320 Section &Append(const Section §ion); |
|
321 |
|
322 // Append the contents of DATA as a series of bytes terminated by |
|
323 // a NULL character. |
|
324 Section &AppendCString(const string &data) { |
|
325 Append(data); |
|
326 contents_ += '\0'; |
|
327 return *this; |
|
328 } |
|
329 |
|
330 // Append at most SIZE bytes from DATA; if DATA is less than SIZE bytes |
|
331 // long, pad with '\0' characters. |
|
332 Section &AppendCString(const string &data, size_t size) { |
|
333 contents_.append(data, 0, size); |
|
334 if (data.size() < size) |
|
335 Append(size - data.size(), 0); |
|
336 return *this; |
|
337 } |
|
338 |
|
339 // Append VALUE or LABEL to this section, with the given bit width and |
|
340 // endianness. Return a reference to this section. |
|
341 // |
|
342 // The names of these functions have the form <ENDIANNESS><BITWIDTH>: |
|
343 // <ENDIANNESS> is either 'L' (little-endian, least significant byte first), |
|
344 // 'B' (big-endian, most significant byte first), or |
|
345 // 'D' (default, the section's default endianness) |
|
346 // <BITWIDTH> is 8, 16, 32, or 64. |
|
347 // |
|
348 // Since endianness doesn't matter for a single byte, all the |
|
349 // <BITWIDTH>=8 functions are equivalent. |
|
350 // |
|
351 // These can be used to write both signed and unsigned values, as |
|
352 // the compiler will properly sign-extend a signed value before |
|
353 // passing it to the function, at which point the function's |
|
354 // behavior is the same either way. |
|
355 Section &L8(uint8_t value) { contents_ += value; return *this; } |
|
356 Section &B8(uint8_t value) { contents_ += value; return *this; } |
|
357 Section &D8(uint8_t value) { contents_ += value; return *this; } |
|
358 Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t), |
|
359 &B16(uint16_t), &B32(uint32_t), &B64(uint64_t), |
|
360 &D16(uint16_t), &D32(uint32_t), &D64(uint64_t); |
|
361 Section &L8(const Label &label), &L16(const Label &label), |
|
362 &L32(const Label &label), &L64(const Label &label), |
|
363 &B8(const Label &label), &B16(const Label &label), |
|
364 &B32(const Label &label), &B64(const Label &label), |
|
365 &D8(const Label &label), &D16(const Label &label), |
|
366 &D32(const Label &label), &D64(const Label &label); |
|
367 |
|
368 // Append VALUE in a signed LEB128 (Little-Endian Base 128) form. |
|
369 // |
|
370 // The signed LEB128 representation of an integer N is a variable |
|
371 // number of bytes: |
|
372 // |
|
373 // - If N is between -0x40 and 0x3f, then its signed LEB128 |
|
374 // representation is a single byte whose value is N. |
|
375 // |
|
376 // - Otherwise, its signed LEB128 representation is (N & 0x7f) | |
|
377 // 0x80, followed by the signed LEB128 representation of N / 128, |
|
378 // rounded towards negative infinity. |
|
379 // |
|
380 // In other words, we break VALUE into groups of seven bits, put |
|
381 // them in little-endian order, and then write them as eight-bit |
|
382 // bytes with the high bit on all but the last. |
|
383 // |
|
384 // Note that VALUE cannot be a Label (we would have to implement |
|
385 // relaxation). |
|
386 Section &LEB128(long long value); |
|
387 |
|
388 // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form. |
|
389 // |
|
390 // The unsigned LEB128 representation of an integer N is a variable |
|
391 // number of bytes: |
|
392 // |
|
393 // - If N is between 0 and 0x7f, then its unsigned LEB128 |
|
394 // representation is a single byte whose value is N. |
|
395 // |
|
396 // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | |
|
397 // 0x80, followed by the unsigned LEB128 representation of N / |
|
398 // 128, rounded towards negative infinity. |
|
399 // |
|
400 // Note that VALUE cannot be a Label (we would have to implement |
|
401 // relaxation). |
|
402 Section &ULEB128(uint64_t value); |
|
403 |
|
404 // Jump to the next location aligned on an ALIGNMENT-byte boundary, |
|
405 // relative to the start of the section. Fill the gap with PAD_BYTE. |
|
406 // ALIGNMENT must be a power of two. Return a reference to this |
|
407 // section. |
|
408 Section &Align(size_t alignment, uint8_t pad_byte = 0); |
|
409 |
|
410 // Clear the contents of this section. |
|
411 void Clear(); |
|
412 |
|
413 // Return the current size of the section. |
|
414 size_t Size() const { return contents_.size(); } |
|
415 |
|
416 // Return a label representing the start of the section. |
|
417 // |
|
418 // It is up to the user whether this label represents the section's |
|
419 // position in an object file, the section's address in memory, or |
|
420 // what have you; some applications may need both, in which case |
|
421 // this simple-minded interface won't be enough. This class only |
|
422 // provides a single start label, for use with the Here and Mark |
|
423 // member functions. |
|
424 // |
|
425 // Ideally, we'd provide this in a subclass that actually knows more |
|
426 // about the application at hand and can provide an appropriate |
|
427 // collection of start labels. But then the appending member |
|
428 // functions like Append and D32 would return a reference to the |
|
429 // base class, not the derived class, and the chaining won't work. |
|
430 // Since the only value here is in pretty notation, that's a fatal |
|
431 // flaw. |
|
432 Label start() const { return start_; } |
|
433 |
|
434 // Return a label representing the point at which the next Appended |
|
435 // item will appear in the section, relative to start(). |
|
436 Label Here() const { return start_ + Size(); } |
|
437 |
|
438 // Set *LABEL to Here, and return a reference to this section. |
|
439 Section &Mark(Label *label) { *label = Here(); return *this; } |
|
440 |
|
441 // If there are no undefined label references left in this |
|
442 // section, set CONTENTS to the contents of this section, as a |
|
443 // string, and clear this section. Return true on success, or false |
|
444 // if there were still undefined labels. |
|
445 bool GetContents(string *contents); |
|
446 |
|
447 private: |
|
448 // Used internally. A reference to a label's value. |
|
449 struct Reference { |
|
450 Reference(size_t set_offset, Endianness set_endianness, size_t set_size, |
|
451 const Label &set_label) |
|
452 : offset(set_offset), endianness(set_endianness), size(set_size), |
|
453 label(set_label) { } |
|
454 |
|
455 // The offset of the reference within the section. |
|
456 size_t offset; |
|
457 |
|
458 // The endianness of the reference. |
|
459 Endianness endianness; |
|
460 |
|
461 // The size of the reference. |
|
462 size_t size; |
|
463 |
|
464 // The label to which this is a reference. |
|
465 Label label; |
|
466 }; |
|
467 |
|
468 // The default endianness of this section. |
|
469 Endianness endianness_; |
|
470 |
|
471 // The contents of the section. |
|
472 string contents_; |
|
473 |
|
474 // References to labels within those contents. |
|
475 vector<Reference> references_; |
|
476 |
|
477 // A label referring to the beginning of the section. |
|
478 Label start_; |
|
479 }; |
|
480 |
|
481 } // namespace test_assembler |
|
482 } // namespace google_breakpad |
|
483 |
|
484 #endif // PROCESSOR_TEST_ASSEMBLER_H_ |