michael@0: // -*- mode: C++ -*- michael@0: michael@0: // Copyright (c) 2010, Google Inc. michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // Original author: Jim Blandy michael@0: michael@0: // test-assembler.h: interface to class for building complex binary streams. michael@0: michael@0: // To test the Breakpad symbol dumper and processor thoroughly, for michael@0: // all combinations of host system and minidump processor michael@0: // architecture, we need to be able to easily generate complex test michael@0: // data like debugging information and minidump files. michael@0: // michael@0: // For example, if we want our unit tests to provide full code michael@0: // coverage for stack walking, it may be difficult to persuade the michael@0: // compiler to generate every possible sort of stack walking michael@0: // information that we want to support; there are probably DWARF CFI michael@0: // opcodes that GCC never emits. Similarly, if we want to test our michael@0: // error handling, we will need to generate damaged minidumps or michael@0: // debugging information that (we hope) the client or compiler will michael@0: // never produce on its own. michael@0: // michael@0: // google_breakpad::TestAssembler provides a predictable and michael@0: // (relatively) simple way to generate complex formatted data streams michael@0: // like minidumps and CFI. Furthermore, because TestAssembler is michael@0: // portable, developers without access to (say) Visual Studio or a michael@0: // SPARC assembler can still work on test data for those targets. michael@0: michael@0: #ifndef PROCESSOR_TEST_ASSEMBLER_H_ michael@0: #define PROCESSOR_TEST_ASSEMBLER_H_ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "common/using_std_string.h" michael@0: #include "google_breakpad/common/breakpad_types.h" michael@0: michael@0: namespace google_breakpad { michael@0: michael@0: using std::list; michael@0: using std::vector; michael@0: michael@0: namespace test_assembler { michael@0: michael@0: // A Label represents a value not yet known that we need to store in a michael@0: // section. As long as all the labels a section refers to are defined michael@0: // by the time we retrieve its contents as bytes, we can use undefined michael@0: // labels freely in that section's construction. michael@0: // michael@0: // A label can be in one of three states: michael@0: // - undefined, michael@0: // - defined as the sum of some other label and a constant, or michael@0: // - a constant. michael@0: // michael@0: // A label's value never changes, but it can accumulate constraints. michael@0: // Adding labels and integers is permitted, and yields a label. michael@0: // Subtracting a constant from a label is permitted, and also yields a michael@0: // label. Subtracting two labels that have some relationship to each michael@0: // other is permitted, and yields a constant. michael@0: // michael@0: // For example: michael@0: // michael@0: // Label a; // a's value is undefined michael@0: // Label b; // b's value is undefined michael@0: // { michael@0: // Label c = a + 4; // okay, even though a's value is unknown michael@0: // b = c + 4; // also okay; b is now a+8 michael@0: // } michael@0: // Label d = b - 2; // okay; d == a+6, even though c is gone michael@0: // d.Value(); // error: d's value is not yet known michael@0: // d - a; // is 6, even though their values are not known michael@0: // a = 12; // now b == 20, and d == 18 michael@0: // d.Value(); // 18: no longer an error michael@0: // b.Value(); // 20 michael@0: // d = 10; // error: d is already defined. michael@0: // michael@0: // Label objects' lifetimes are unconstrained: notice that, in the michael@0: // above example, even though a and b are only related through c, and michael@0: // c goes out of scope, the assignment to a sets b's value as well. In michael@0: // particular, it's not necessary to ensure that a Label lives beyond michael@0: // Sections that refer to it. michael@0: class Label { michael@0: public: michael@0: Label(); // An undefined label. michael@0: Label(uint64_t value); // A label with a fixed value michael@0: Label(const Label &value); // A label equal to another. michael@0: ~Label(); michael@0: michael@0: // Return this label's value; it must be known. michael@0: // michael@0: // Providing this as a cast operator is nifty, but the conversions michael@0: // happen in unexpected places. In particular, ISO C++ says that michael@0: // Label + size_t becomes ambigious, because it can't decide whether michael@0: // to convert the Label to a uint64_t and then to a size_t, or use michael@0: // the overloaded operator that returns a new label, even though the michael@0: // former could fail if the label is not yet defined and the latter won't. michael@0: uint64_t Value() const; michael@0: michael@0: Label &operator=(uint64_t value); michael@0: Label &operator=(const Label &value); michael@0: Label operator+(uint64_t addend) const; michael@0: Label operator-(uint64_t subtrahend) const; michael@0: uint64_t operator-(const Label &subtrahend) const; michael@0: michael@0: // We could also provide == and != that work on undefined, but michael@0: // related, labels. michael@0: michael@0: // Return true if this label's value is known. If VALUE_P is given, michael@0: // set *VALUE_P to the known value if returning true. michael@0: bool IsKnownConstant(uint64_t *value_p = NULL) const; michael@0: michael@0: // Return true if the offset from LABEL to this label is known. If michael@0: // OFFSET_P is given, set *OFFSET_P to the offset when returning true. michael@0: // michael@0: // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m', michael@0: // except that it also returns a value indicating whether the michael@0: // subtraction is possible given what we currently know of l and m. michael@0: // It can be possible even if we don't know l and m's values. For michael@0: // example: michael@0: // michael@0: // Label l, m; michael@0: // m = l + 10; michael@0: // l.IsKnownConstant(); // false michael@0: // m.IsKnownConstant(); // false michael@0: // uint64_t d; michael@0: // l.IsKnownOffsetFrom(m, &d); // true, and sets d to -10. michael@0: // l-m // -10 michael@0: // m-l // 10 michael@0: // m.Value() // error: m's value is not known michael@0: bool IsKnownOffsetFrom(const Label &label, uint64_t *offset_p = NULL) const; michael@0: michael@0: private: michael@0: // A label's value, or if that is not yet known, how the value is michael@0: // related to other labels' values. A binding may be: michael@0: // - a known constant, michael@0: // - constrained to be equal to some other binding plus a constant, or michael@0: // - unconstrained, and free to take on any value. michael@0: // michael@0: // Many labels may point to a single binding, and each binding may michael@0: // refer to another, so bindings and labels form trees whose leaves michael@0: // are labels, whose interior nodes (and roots) are bindings, and michael@0: // where links point from children to parents. Bindings are michael@0: // reference counted, allowing labels to be lightweight, copyable, michael@0: // assignable, placed in containers, and so on. michael@0: class Binding { michael@0: public: michael@0: Binding(); michael@0: Binding(uint64_t addend); michael@0: ~Binding(); michael@0: michael@0: // Increment our reference count. michael@0: void Acquire() { reference_count_++; }; michael@0: // Decrement our reference count, and return true if it is zero. michael@0: bool Release() { return --reference_count_ == 0; } michael@0: michael@0: // Set this binding to be equal to BINDING + ADDEND. If BINDING is michael@0: // NULL, then set this binding to the known constant ADDEND. michael@0: // Update every binding on this binding's chain to point directly michael@0: // to BINDING, or to be a constant, with addends adjusted michael@0: // appropriately. michael@0: void Set(Binding *binding, uint64_t value); michael@0: michael@0: // Return what we know about the value of this binding. michael@0: // - If this binding's value is a known constant, set BASE to michael@0: // NULL, and set ADDEND to its value. michael@0: // - If this binding is not a known constant but related to other michael@0: // bindings, set BASE to the binding at the end of the relation michael@0: // chain (which will always be unconstrained), and set ADDEND to the michael@0: // value to add to that binding's value to get this binding's michael@0: // value. michael@0: // - If this binding is unconstrained, set BASE to this, and leave michael@0: // ADDEND unchanged. michael@0: void Get(Binding **base, uint64_t *addend); michael@0: michael@0: private: michael@0: // There are three cases: michael@0: // michael@0: // - A binding representing a known constant value has base_ NULL, michael@0: // and addend_ equal to the value. michael@0: // michael@0: // - A binding representing a completely unconstrained value has michael@0: // base_ pointing to this; addend_ is unused. michael@0: // michael@0: // - A binding whose value is related to some other binding's michael@0: // value has base_ pointing to that other binding, and addend_ michael@0: // set to the amount to add to that binding's value to get this michael@0: // binding's value. We only represent relationships of the form michael@0: // x = y+c. michael@0: // michael@0: // Thus, the bind_ links form a chain terminating in either a michael@0: // known constant value or a completely unconstrained value. Most michael@0: // operations on bindings do path compression: they change every michael@0: // binding on the chain to point directly to the final value, michael@0: // adjusting addends as appropriate. michael@0: Binding *base_; michael@0: uint64_t addend_; michael@0: michael@0: // The number of Labels and Bindings pointing to this binding. michael@0: // (When a binding points to itself, indicating a completely michael@0: // unconstrained binding, that doesn't count as a reference.) michael@0: int reference_count_; michael@0: }; michael@0: michael@0: // This label's value. michael@0: Binding *value_; michael@0: }; michael@0: michael@0: inline Label operator+(uint64_t a, const Label &l) { return l + a; } michael@0: // Note that int-Label isn't defined, as negating a Label is not an michael@0: // operation we support. michael@0: michael@0: // Conventions for representing larger numbers as sequences of bytes. michael@0: enum Endianness { michael@0: kBigEndian, // Big-endian: the most significant byte comes first. michael@0: kLittleEndian, // Little-endian: the least significant byte comes first. michael@0: kUnsetEndian, // used internally michael@0: }; michael@0: michael@0: // A section is a sequence of bytes, constructed by appending bytes michael@0: // to the end. Sections have a convenient and flexible set of member michael@0: // functions for appending data in various formats: big-endian and michael@0: // little-endian signed and unsigned values of different sizes; michael@0: // LEB128 and ULEB128 values (see below), and raw blocks of bytes. michael@0: // michael@0: // If you need to append a value to a section that is not convenient michael@0: // to compute immediately, you can create a label, append the michael@0: // label's value to the section, and then set the label's value michael@0: // later, when it's convenient to do so. Once a label's value is michael@0: // known, the section class takes care of updating all previously michael@0: // appended references to it. michael@0: // michael@0: // Once all the labels to which a section refers have had their michael@0: // values determined, you can get a copy of the section's contents michael@0: // as a string. michael@0: // michael@0: // Note that there is no specified "start of section" label. This is michael@0: // because there are typically several different meanings for "the michael@0: // start of a section": the offset of the section within an object michael@0: // file, the address in memory at which the section's content appear, michael@0: // and so on. It's up to the code that uses the Section class to michael@0: // keep track of these explicitly, as they depend on the application. michael@0: class Section { michael@0: public: michael@0: Section(Endianness endianness = kUnsetEndian) michael@0: : endianness_(endianness) { }; michael@0: michael@0: // A base class destructor should be either public and virtual, michael@0: // or protected and nonvirtual. michael@0: virtual ~Section() { }; michael@0: michael@0: // Set the default endianness of this section to ENDIANNESS. This michael@0: // sets the behavior of the D appending functions. If the michael@0: // assembler's default endianness was set, this is the michael@0: void set_endianness(Endianness endianness) { michael@0: endianness_ = endianness; michael@0: } michael@0: michael@0: // Return the default endianness of this section. michael@0: Endianness endianness() const { return endianness_; } michael@0: michael@0: // Append the SIZE bytes at DATA or the contents of STRING to the michael@0: // end of this section. Return a reference to this section. michael@0: Section &Append(const uint8_t *data, size_t size) { michael@0: contents_.append(reinterpret_cast(data), size); michael@0: return *this; michael@0: }; michael@0: Section &Append(const string &data) { michael@0: contents_.append(data); michael@0: return *this; michael@0: }; michael@0: michael@0: // Append SIZE copies of BYTE to the end of this section. Return a michael@0: // reference to this section. michael@0: Section &Append(size_t size, uint8_t byte) { michael@0: contents_.append(size, (char) byte); michael@0: return *this; michael@0: } michael@0: michael@0: // Append NUMBER to this section. ENDIANNESS is the endianness to michael@0: // use to write the number. SIZE is the length of the number in michael@0: // bytes. Return a reference to this section. michael@0: Section &Append(Endianness endianness, size_t size, uint64_t number); michael@0: Section &Append(Endianness endianness, size_t size, const Label &label); michael@0: michael@0: // Append SECTION to the end of this section. The labels SECTION michael@0: // refers to need not be defined yet. michael@0: // michael@0: // Note that this has no effect on any Labels' values, or on michael@0: // SECTION. If placing SECTION within 'this' provides new michael@0: // constraints on existing labels' values, then it's up to the michael@0: // caller to fiddle with those labels as needed. michael@0: Section &Append(const Section §ion); michael@0: michael@0: // Append the contents of DATA as a series of bytes terminated by michael@0: // a NULL character. michael@0: Section &AppendCString(const string &data) { michael@0: Append(data); michael@0: contents_ += '\0'; michael@0: return *this; michael@0: } michael@0: michael@0: // Append at most SIZE bytes from DATA; if DATA is less than SIZE bytes michael@0: // long, pad with '\0' characters. michael@0: Section &AppendCString(const string &data, size_t size) { michael@0: contents_.append(data, 0, size); michael@0: if (data.size() < size) michael@0: Append(size - data.size(), 0); michael@0: return *this; michael@0: } michael@0: michael@0: // Append VALUE or LABEL to this section, with the given bit width and michael@0: // endianness. Return a reference to this section. michael@0: // michael@0: // The names of these functions have the form : michael@0: // is either 'L' (little-endian, least significant byte first), michael@0: // 'B' (big-endian, most significant byte first), or michael@0: // 'D' (default, the section's default endianness) michael@0: // is 8, 16, 32, or 64. michael@0: // michael@0: // Since endianness doesn't matter for a single byte, all the michael@0: // =8 functions are equivalent. michael@0: // michael@0: // These can be used to write both signed and unsigned values, as michael@0: // the compiler will properly sign-extend a signed value before michael@0: // passing it to the function, at which point the function's michael@0: // behavior is the same either way. michael@0: Section &L8(uint8_t value) { contents_ += value; return *this; } michael@0: Section &B8(uint8_t value) { contents_ += value; return *this; } michael@0: Section &D8(uint8_t value) { contents_ += value; return *this; } michael@0: Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t), michael@0: &B16(uint16_t), &B32(uint32_t), &B64(uint64_t), michael@0: &D16(uint16_t), &D32(uint32_t), &D64(uint64_t); michael@0: Section &L8(const Label &label), &L16(const Label &label), michael@0: &L32(const Label &label), &L64(const Label &label), michael@0: &B8(const Label &label), &B16(const Label &label), michael@0: &B32(const Label &label), &B64(const Label &label), michael@0: &D8(const Label &label), &D16(const Label &label), michael@0: &D32(const Label &label), &D64(const Label &label); michael@0: michael@0: // Append VALUE in a signed LEB128 (Little-Endian Base 128) form. michael@0: // michael@0: // The signed LEB128 representation of an integer N is a variable michael@0: // number of bytes: michael@0: // michael@0: // - If N is between -0x40 and 0x3f, then its signed LEB128 michael@0: // representation is a single byte whose value is N. michael@0: // michael@0: // - Otherwise, its signed LEB128 representation is (N & 0x7f) | michael@0: // 0x80, followed by the signed LEB128 representation of N / 128, michael@0: // rounded towards negative infinity. michael@0: // michael@0: // In other words, we break VALUE into groups of seven bits, put michael@0: // them in little-endian order, and then write them as eight-bit michael@0: // bytes with the high bit on all but the last. michael@0: // michael@0: // Note that VALUE cannot be a Label (we would have to implement michael@0: // relaxation). michael@0: Section &LEB128(long long value); michael@0: michael@0: // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form. michael@0: // michael@0: // The unsigned LEB128 representation of an integer N is a variable michael@0: // number of bytes: michael@0: // michael@0: // - If N is between 0 and 0x7f, then its unsigned LEB128 michael@0: // representation is a single byte whose value is N. michael@0: // michael@0: // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | michael@0: // 0x80, followed by the unsigned LEB128 representation of N / michael@0: // 128, rounded towards negative infinity. michael@0: // michael@0: // Note that VALUE cannot be a Label (we would have to implement michael@0: // relaxation). michael@0: Section &ULEB128(uint64_t value); michael@0: michael@0: // Jump to the next location aligned on an ALIGNMENT-byte boundary, michael@0: // relative to the start of the section. Fill the gap with PAD_BYTE. michael@0: // ALIGNMENT must be a power of two. Return a reference to this michael@0: // section. michael@0: Section &Align(size_t alignment, uint8_t pad_byte = 0); michael@0: michael@0: // Clear the contents of this section. michael@0: void Clear(); michael@0: michael@0: // Return the current size of the section. michael@0: size_t Size() const { return contents_.size(); } michael@0: michael@0: // Return a label representing the start of the section. michael@0: // michael@0: // It is up to the user whether this label represents the section's michael@0: // position in an object file, the section's address in memory, or michael@0: // what have you; some applications may need both, in which case michael@0: // this simple-minded interface won't be enough. This class only michael@0: // provides a single start label, for use with the Here and Mark michael@0: // member functions. michael@0: // michael@0: // Ideally, we'd provide this in a subclass that actually knows more michael@0: // about the application at hand and can provide an appropriate michael@0: // collection of start labels. But then the appending member michael@0: // functions like Append and D32 would return a reference to the michael@0: // base class, not the derived class, and the chaining won't work. michael@0: // Since the only value here is in pretty notation, that's a fatal michael@0: // flaw. michael@0: Label start() const { return start_; } michael@0: michael@0: // Return a label representing the point at which the next Appended michael@0: // item will appear in the section, relative to start(). michael@0: Label Here() const { return start_ + Size(); } michael@0: michael@0: // Set *LABEL to Here, and return a reference to this section. michael@0: Section &Mark(Label *label) { *label = Here(); return *this; } michael@0: michael@0: // If there are no undefined label references left in this michael@0: // section, set CONTENTS to the contents of this section, as a michael@0: // string, and clear this section. Return true on success, or false michael@0: // if there were still undefined labels. michael@0: bool GetContents(string *contents); michael@0: michael@0: private: michael@0: // Used internally. A reference to a label's value. michael@0: struct Reference { michael@0: Reference(size_t set_offset, Endianness set_endianness, size_t set_size, michael@0: const Label &set_label) michael@0: : offset(set_offset), endianness(set_endianness), size(set_size), michael@0: label(set_label) { } michael@0: michael@0: // The offset of the reference within the section. michael@0: size_t offset; michael@0: michael@0: // The endianness of the reference. michael@0: Endianness endianness; michael@0: michael@0: // The size of the reference. michael@0: size_t size; michael@0: michael@0: // The label to which this is a reference. michael@0: Label label; michael@0: }; michael@0: michael@0: // The default endianness of this section. michael@0: Endianness endianness_; michael@0: michael@0: // The contents of the section. michael@0: string contents_; michael@0: michael@0: // References to labels within those contents. michael@0: vector references_; michael@0: michael@0: // A label referring to the beginning of the section. michael@0: Label start_; michael@0: }; michael@0: michael@0: } // namespace test_assembler michael@0: } // namespace google_breakpad michael@0: michael@0: #endif // PROCESSOR_TEST_ASSEMBLER_H_