1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/crashreporter/google-breakpad/src/common/test_assembler.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,484 @@ 1.4 +// -*- mode: C++ -*- 1.5 + 1.6 +// Copyright (c) 2010, Google Inc. 1.7 +// All rights reserved. 1.8 +// 1.9 +// Redistribution and use in source and binary forms, with or without 1.10 +// modification, are permitted provided that the following conditions are 1.11 +// met: 1.12 +// 1.13 +// * Redistributions of source code must retain the above copyright 1.14 +// notice, this list of conditions and the following disclaimer. 1.15 +// * Redistributions in binary form must reproduce the above 1.16 +// copyright notice, this list of conditions and the following disclaimer 1.17 +// in the documentation and/or other materials provided with the 1.18 +// distribution. 1.19 +// * Neither the name of Google Inc. nor the names of its 1.20 +// contributors may be used to endorse or promote products derived from 1.21 +// this software without specific prior written permission. 1.22 +// 1.23 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.24 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.25 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.26 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.27 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.28 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.29 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.30 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.31 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.32 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.33 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.34 + 1.35 +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 1.36 + 1.37 +// test-assembler.h: interface to class for building complex binary streams. 1.38 + 1.39 +// To test the Breakpad symbol dumper and processor thoroughly, for 1.40 +// all combinations of host system and minidump processor 1.41 +// architecture, we need to be able to easily generate complex test 1.42 +// data like debugging information and minidump files. 1.43 +// 1.44 +// For example, if we want our unit tests to provide full code 1.45 +// coverage for stack walking, it may be difficult to persuade the 1.46 +// compiler to generate every possible sort of stack walking 1.47 +// information that we want to support; there are probably DWARF CFI 1.48 +// opcodes that GCC never emits. Similarly, if we want to test our 1.49 +// error handling, we will need to generate damaged minidumps or 1.50 +// debugging information that (we hope) the client or compiler will 1.51 +// never produce on its own. 1.52 +// 1.53 +// google_breakpad::TestAssembler provides a predictable and 1.54 +// (relatively) simple way to generate complex formatted data streams 1.55 +// like minidumps and CFI. Furthermore, because TestAssembler is 1.56 +// portable, developers without access to (say) Visual Studio or a 1.57 +// SPARC assembler can still work on test data for those targets. 1.58 + 1.59 +#ifndef PROCESSOR_TEST_ASSEMBLER_H_ 1.60 +#define PROCESSOR_TEST_ASSEMBLER_H_ 1.61 + 1.62 +#include <list> 1.63 +#include <vector> 1.64 +#include <string> 1.65 + 1.66 +#include "common/using_std_string.h" 1.67 +#include "google_breakpad/common/breakpad_types.h" 1.68 + 1.69 +namespace google_breakpad { 1.70 + 1.71 +using std::list; 1.72 +using std::vector; 1.73 + 1.74 +namespace test_assembler { 1.75 + 1.76 +// A Label represents a value not yet known that we need to store in a 1.77 +// section. As long as all the labels a section refers to are defined 1.78 +// by the time we retrieve its contents as bytes, we can use undefined 1.79 +// labels freely in that section's construction. 1.80 +// 1.81 +// A label can be in one of three states: 1.82 +// - undefined, 1.83 +// - defined as the sum of some other label and a constant, or 1.84 +// - a constant. 1.85 +// 1.86 +// A label's value never changes, but it can accumulate constraints. 1.87 +// Adding labels and integers is permitted, and yields a label. 1.88 +// Subtracting a constant from a label is permitted, and also yields a 1.89 +// label. Subtracting two labels that have some relationship to each 1.90 +// other is permitted, and yields a constant. 1.91 +// 1.92 +// For example: 1.93 +// 1.94 +// Label a; // a's value is undefined 1.95 +// Label b; // b's value is undefined 1.96 +// { 1.97 +// Label c = a + 4; // okay, even though a's value is unknown 1.98 +// b = c + 4; // also okay; b is now a+8 1.99 +// } 1.100 +// Label d = b - 2; // okay; d == a+6, even though c is gone 1.101 +// d.Value(); // error: d's value is not yet known 1.102 +// d - a; // is 6, even though their values are not known 1.103 +// a = 12; // now b == 20, and d == 18 1.104 +// d.Value(); // 18: no longer an error 1.105 +// b.Value(); // 20 1.106 +// d = 10; // error: d is already defined. 1.107 +// 1.108 +// Label objects' lifetimes are unconstrained: notice that, in the 1.109 +// above example, even though a and b are only related through c, and 1.110 +// c goes out of scope, the assignment to a sets b's value as well. In 1.111 +// particular, it's not necessary to ensure that a Label lives beyond 1.112 +// Sections that refer to it. 1.113 +class Label { 1.114 + public: 1.115 + Label(); // An undefined label. 1.116 + Label(uint64_t value); // A label with a fixed value 1.117 + Label(const Label &value); // A label equal to another. 1.118 + ~Label(); 1.119 + 1.120 + // Return this label's value; it must be known. 1.121 + // 1.122 + // Providing this as a cast operator is nifty, but the conversions 1.123 + // happen in unexpected places. In particular, ISO C++ says that 1.124 + // Label + size_t becomes ambigious, because it can't decide whether 1.125 + // to convert the Label to a uint64_t and then to a size_t, or use 1.126 + // the overloaded operator that returns a new label, even though the 1.127 + // former could fail if the label is not yet defined and the latter won't. 1.128 + uint64_t Value() const; 1.129 + 1.130 + Label &operator=(uint64_t value); 1.131 + Label &operator=(const Label &value); 1.132 + Label operator+(uint64_t addend) const; 1.133 + Label operator-(uint64_t subtrahend) const; 1.134 + uint64_t operator-(const Label &subtrahend) const; 1.135 + 1.136 + // We could also provide == and != that work on undefined, but 1.137 + // related, labels. 1.138 + 1.139 + // Return true if this label's value is known. If VALUE_P is given, 1.140 + // set *VALUE_P to the known value if returning true. 1.141 + bool IsKnownConstant(uint64_t *value_p = NULL) const; 1.142 + 1.143 + // Return true if the offset from LABEL to this label is known. If 1.144 + // OFFSET_P is given, set *OFFSET_P to the offset when returning true. 1.145 + // 1.146 + // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m', 1.147 + // except that it also returns a value indicating whether the 1.148 + // subtraction is possible given what we currently know of l and m. 1.149 + // It can be possible even if we don't know l and m's values. For 1.150 + // example: 1.151 + // 1.152 + // Label l, m; 1.153 + // m = l + 10; 1.154 + // l.IsKnownConstant(); // false 1.155 + // m.IsKnownConstant(); // false 1.156 + // uint64_t d; 1.157 + // l.IsKnownOffsetFrom(m, &d); // true, and sets d to -10. 1.158 + // l-m // -10 1.159 + // m-l // 10 1.160 + // m.Value() // error: m's value is not known 1.161 + bool IsKnownOffsetFrom(const Label &label, uint64_t *offset_p = NULL) const; 1.162 + 1.163 + private: 1.164 + // A label's value, or if that is not yet known, how the value is 1.165 + // related to other labels' values. A binding may be: 1.166 + // - a known constant, 1.167 + // - constrained to be equal to some other binding plus a constant, or 1.168 + // - unconstrained, and free to take on any value. 1.169 + // 1.170 + // Many labels may point to a single binding, and each binding may 1.171 + // refer to another, so bindings and labels form trees whose leaves 1.172 + // are labels, whose interior nodes (and roots) are bindings, and 1.173 + // where links point from children to parents. Bindings are 1.174 + // reference counted, allowing labels to be lightweight, copyable, 1.175 + // assignable, placed in containers, and so on. 1.176 + class Binding { 1.177 + public: 1.178 + Binding(); 1.179 + Binding(uint64_t addend); 1.180 + ~Binding(); 1.181 + 1.182 + // Increment our reference count. 1.183 + void Acquire() { reference_count_++; }; 1.184 + // Decrement our reference count, and return true if it is zero. 1.185 + bool Release() { return --reference_count_ == 0; } 1.186 + 1.187 + // Set this binding to be equal to BINDING + ADDEND. If BINDING is 1.188 + // NULL, then set this binding to the known constant ADDEND. 1.189 + // Update every binding on this binding's chain to point directly 1.190 + // to BINDING, or to be a constant, with addends adjusted 1.191 + // appropriately. 1.192 + void Set(Binding *binding, uint64_t value); 1.193 + 1.194 + // Return what we know about the value of this binding. 1.195 + // - If this binding's value is a known constant, set BASE to 1.196 + // NULL, and set ADDEND to its value. 1.197 + // - If this binding is not a known constant but related to other 1.198 + // bindings, set BASE to the binding at the end of the relation 1.199 + // chain (which will always be unconstrained), and set ADDEND to the 1.200 + // value to add to that binding's value to get this binding's 1.201 + // value. 1.202 + // - If this binding is unconstrained, set BASE to this, and leave 1.203 + // ADDEND unchanged. 1.204 + void Get(Binding **base, uint64_t *addend); 1.205 + 1.206 + private: 1.207 + // There are three cases: 1.208 + // 1.209 + // - A binding representing a known constant value has base_ NULL, 1.210 + // and addend_ equal to the value. 1.211 + // 1.212 + // - A binding representing a completely unconstrained value has 1.213 + // base_ pointing to this; addend_ is unused. 1.214 + // 1.215 + // - A binding whose value is related to some other binding's 1.216 + // value has base_ pointing to that other binding, and addend_ 1.217 + // set to the amount to add to that binding's value to get this 1.218 + // binding's value. We only represent relationships of the form 1.219 + // x = y+c. 1.220 + // 1.221 + // Thus, the bind_ links form a chain terminating in either a 1.222 + // known constant value or a completely unconstrained value. Most 1.223 + // operations on bindings do path compression: they change every 1.224 + // binding on the chain to point directly to the final value, 1.225 + // adjusting addends as appropriate. 1.226 + Binding *base_; 1.227 + uint64_t addend_; 1.228 + 1.229 + // The number of Labels and Bindings pointing to this binding. 1.230 + // (When a binding points to itself, indicating a completely 1.231 + // unconstrained binding, that doesn't count as a reference.) 1.232 + int reference_count_; 1.233 + }; 1.234 + 1.235 + // This label's value. 1.236 + Binding *value_; 1.237 +}; 1.238 + 1.239 +inline Label operator+(uint64_t a, const Label &l) { return l + a; } 1.240 +// Note that int-Label isn't defined, as negating a Label is not an 1.241 +// operation we support. 1.242 + 1.243 +// Conventions for representing larger numbers as sequences of bytes. 1.244 +enum Endianness { 1.245 + kBigEndian, // Big-endian: the most significant byte comes first. 1.246 + kLittleEndian, // Little-endian: the least significant byte comes first. 1.247 + kUnsetEndian, // used internally 1.248 +}; 1.249 + 1.250 +// A section is a sequence of bytes, constructed by appending bytes 1.251 +// to the end. Sections have a convenient and flexible set of member 1.252 +// functions for appending data in various formats: big-endian and 1.253 +// little-endian signed and unsigned values of different sizes; 1.254 +// LEB128 and ULEB128 values (see below), and raw blocks of bytes. 1.255 +// 1.256 +// If you need to append a value to a section that is not convenient 1.257 +// to compute immediately, you can create a label, append the 1.258 +// label's value to the section, and then set the label's value 1.259 +// later, when it's convenient to do so. Once a label's value is 1.260 +// known, the section class takes care of updating all previously 1.261 +// appended references to it. 1.262 +// 1.263 +// Once all the labels to which a section refers have had their 1.264 +// values determined, you can get a copy of the section's contents 1.265 +// as a string. 1.266 +// 1.267 +// Note that there is no specified "start of section" label. This is 1.268 +// because there are typically several different meanings for "the 1.269 +// start of a section": the offset of the section within an object 1.270 +// file, the address in memory at which the section's content appear, 1.271 +// and so on. It's up to the code that uses the Section class to 1.272 +// keep track of these explicitly, as they depend on the application. 1.273 +class Section { 1.274 + public: 1.275 + Section(Endianness endianness = kUnsetEndian) 1.276 + : endianness_(endianness) { }; 1.277 + 1.278 + // A base class destructor should be either public and virtual, 1.279 + // or protected and nonvirtual. 1.280 + virtual ~Section() { }; 1.281 + 1.282 + // Set the default endianness of this section to ENDIANNESS. This 1.283 + // sets the behavior of the D<N> appending functions. If the 1.284 + // assembler's default endianness was set, this is the 1.285 + void set_endianness(Endianness endianness) { 1.286 + endianness_ = endianness; 1.287 + } 1.288 + 1.289 + // Return the default endianness of this section. 1.290 + Endianness endianness() const { return endianness_; } 1.291 + 1.292 + // Append the SIZE bytes at DATA or the contents of STRING to the 1.293 + // end of this section. Return a reference to this section. 1.294 + Section &Append(const uint8_t *data, size_t size) { 1.295 + contents_.append(reinterpret_cast<const char *>(data), size); 1.296 + return *this; 1.297 + }; 1.298 + Section &Append(const string &data) { 1.299 + contents_.append(data); 1.300 + return *this; 1.301 + }; 1.302 + 1.303 + // Append SIZE copies of BYTE to the end of this section. Return a 1.304 + // reference to this section. 1.305 + Section &Append(size_t size, uint8_t byte) { 1.306 + contents_.append(size, (char) byte); 1.307 + return *this; 1.308 + } 1.309 + 1.310 + // Append NUMBER to this section. ENDIANNESS is the endianness to 1.311 + // use to write the number. SIZE is the length of the number in 1.312 + // bytes. Return a reference to this section. 1.313 + Section &Append(Endianness endianness, size_t size, uint64_t number); 1.314 + Section &Append(Endianness endianness, size_t size, const Label &label); 1.315 + 1.316 + // Append SECTION to the end of this section. The labels SECTION 1.317 + // refers to need not be defined yet. 1.318 + // 1.319 + // Note that this has no effect on any Labels' values, or on 1.320 + // SECTION. If placing SECTION within 'this' provides new 1.321 + // constraints on existing labels' values, then it's up to the 1.322 + // caller to fiddle with those labels as needed. 1.323 + Section &Append(const Section §ion); 1.324 + 1.325 + // Append the contents of DATA as a series of bytes terminated by 1.326 + // a NULL character. 1.327 + Section &AppendCString(const string &data) { 1.328 + Append(data); 1.329 + contents_ += '\0'; 1.330 + return *this; 1.331 + } 1.332 + 1.333 + // Append at most SIZE bytes from DATA; if DATA is less than SIZE bytes 1.334 + // long, pad with '\0' characters. 1.335 + Section &AppendCString(const string &data, size_t size) { 1.336 + contents_.append(data, 0, size); 1.337 + if (data.size() < size) 1.338 + Append(size - data.size(), 0); 1.339 + return *this; 1.340 + } 1.341 + 1.342 + // Append VALUE or LABEL to this section, with the given bit width and 1.343 + // endianness. Return a reference to this section. 1.344 + // 1.345 + // The names of these functions have the form <ENDIANNESS><BITWIDTH>: 1.346 + // <ENDIANNESS> is either 'L' (little-endian, least significant byte first), 1.347 + // 'B' (big-endian, most significant byte first), or 1.348 + // 'D' (default, the section's default endianness) 1.349 + // <BITWIDTH> is 8, 16, 32, or 64. 1.350 + // 1.351 + // Since endianness doesn't matter for a single byte, all the 1.352 + // <BITWIDTH>=8 functions are equivalent. 1.353 + // 1.354 + // These can be used to write both signed and unsigned values, as 1.355 + // the compiler will properly sign-extend a signed value before 1.356 + // passing it to the function, at which point the function's 1.357 + // behavior is the same either way. 1.358 + Section &L8(uint8_t value) { contents_ += value; return *this; } 1.359 + Section &B8(uint8_t value) { contents_ += value; return *this; } 1.360 + Section &D8(uint8_t value) { contents_ += value; return *this; } 1.361 + Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t), 1.362 + &B16(uint16_t), &B32(uint32_t), &B64(uint64_t), 1.363 + &D16(uint16_t), &D32(uint32_t), &D64(uint64_t); 1.364 + Section &L8(const Label &label), &L16(const Label &label), 1.365 + &L32(const Label &label), &L64(const Label &label), 1.366 + &B8(const Label &label), &B16(const Label &label), 1.367 + &B32(const Label &label), &B64(const Label &label), 1.368 + &D8(const Label &label), &D16(const Label &label), 1.369 + &D32(const Label &label), &D64(const Label &label); 1.370 + 1.371 + // Append VALUE in a signed LEB128 (Little-Endian Base 128) form. 1.372 + // 1.373 + // The signed LEB128 representation of an integer N is a variable 1.374 + // number of bytes: 1.375 + // 1.376 + // - If N is between -0x40 and 0x3f, then its signed LEB128 1.377 + // representation is a single byte whose value is N. 1.378 + // 1.379 + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | 1.380 + // 0x80, followed by the signed LEB128 representation of N / 128, 1.381 + // rounded towards negative infinity. 1.382 + // 1.383 + // In other words, we break VALUE into groups of seven bits, put 1.384 + // them in little-endian order, and then write them as eight-bit 1.385 + // bytes with the high bit on all but the last. 1.386 + // 1.387 + // Note that VALUE cannot be a Label (we would have to implement 1.388 + // relaxation). 1.389 + Section &LEB128(long long value); 1.390 + 1.391 + // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form. 1.392 + // 1.393 + // The unsigned LEB128 representation of an integer N is a variable 1.394 + // number of bytes: 1.395 + // 1.396 + // - If N is between 0 and 0x7f, then its unsigned LEB128 1.397 + // representation is a single byte whose value is N. 1.398 + // 1.399 + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | 1.400 + // 0x80, followed by the unsigned LEB128 representation of N / 1.401 + // 128, rounded towards negative infinity. 1.402 + // 1.403 + // Note that VALUE cannot be a Label (we would have to implement 1.404 + // relaxation). 1.405 + Section &ULEB128(uint64_t value); 1.406 + 1.407 + // Jump to the next location aligned on an ALIGNMENT-byte boundary, 1.408 + // relative to the start of the section. Fill the gap with PAD_BYTE. 1.409 + // ALIGNMENT must be a power of two. Return a reference to this 1.410 + // section. 1.411 + Section &Align(size_t alignment, uint8_t pad_byte = 0); 1.412 + 1.413 + // Clear the contents of this section. 1.414 + void Clear(); 1.415 + 1.416 + // Return the current size of the section. 1.417 + size_t Size() const { return contents_.size(); } 1.418 + 1.419 + // Return a label representing the start of the section. 1.420 + // 1.421 + // It is up to the user whether this label represents the section's 1.422 + // position in an object file, the section's address in memory, or 1.423 + // what have you; some applications may need both, in which case 1.424 + // this simple-minded interface won't be enough. This class only 1.425 + // provides a single start label, for use with the Here and Mark 1.426 + // member functions. 1.427 + // 1.428 + // Ideally, we'd provide this in a subclass that actually knows more 1.429 + // about the application at hand and can provide an appropriate 1.430 + // collection of start labels. But then the appending member 1.431 + // functions like Append and D32 would return a reference to the 1.432 + // base class, not the derived class, and the chaining won't work. 1.433 + // Since the only value here is in pretty notation, that's a fatal 1.434 + // flaw. 1.435 + Label start() const { return start_; } 1.436 + 1.437 + // Return a label representing the point at which the next Appended 1.438 + // item will appear in the section, relative to start(). 1.439 + Label Here() const { return start_ + Size(); } 1.440 + 1.441 + // Set *LABEL to Here, and return a reference to this section. 1.442 + Section &Mark(Label *label) { *label = Here(); return *this; } 1.443 + 1.444 + // If there are no undefined label references left in this 1.445 + // section, set CONTENTS to the contents of this section, as a 1.446 + // string, and clear this section. Return true on success, or false 1.447 + // if there were still undefined labels. 1.448 + bool GetContents(string *contents); 1.449 + 1.450 + private: 1.451 + // Used internally. A reference to a label's value. 1.452 + struct Reference { 1.453 + Reference(size_t set_offset, Endianness set_endianness, size_t set_size, 1.454 + const Label &set_label) 1.455 + : offset(set_offset), endianness(set_endianness), size(set_size), 1.456 + label(set_label) { } 1.457 + 1.458 + // The offset of the reference within the section. 1.459 + size_t offset; 1.460 + 1.461 + // The endianness of the reference. 1.462 + Endianness endianness; 1.463 + 1.464 + // The size of the reference. 1.465 + size_t size; 1.466 + 1.467 + // The label to which this is a reference. 1.468 + Label label; 1.469 + }; 1.470 + 1.471 + // The default endianness of this section. 1.472 + Endianness endianness_; 1.473 + 1.474 + // The contents of the section. 1.475 + string contents_; 1.476 + 1.477 + // References to labels within those contents. 1.478 + vector<Reference> references_; 1.479 + 1.480 + // A label referring to the beginning of the section. 1.481 + Label start_; 1.482 +}; 1.483 + 1.484 +} // namespace test_assembler 1.485 +} // namespace google_breakpad 1.486 + 1.487 +#endif // PROCESSOR_TEST_ASSEMBLER_H_