michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set ts=8 sts=2 et sw=2 tw=80: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef LulMainInt_h michael@0: #define LulMainInt_h michael@0: michael@0: #include "LulPlatformMacros.h" michael@0: michael@0: #include michael@0: michael@0: #include "mozilla/Assertions.h" michael@0: michael@0: // This file is provides internal interface inside LUL. If you are an michael@0: // end-user of LUL, do not include it in your code. The end-user michael@0: // interface is in LulMain.h. michael@0: michael@0: michael@0: namespace lul { michael@0: michael@0: //////////////////////////////////////////////////////////////// michael@0: // DW_REG_ constants // michael@0: //////////////////////////////////////////////////////////////// michael@0: michael@0: // These are the Dwarf CFI register numbers, as (presumably) defined michael@0: // in the ELF ABI supplements for each architecture. michael@0: michael@0: enum DW_REG_NUMBER { michael@0: // No real register has this number. It's convenient to be able to michael@0: // treat the CFA (Canonical Frame Address) as "just another michael@0: // register", though. michael@0: DW_REG_CFA = -1, michael@0: #if defined(LUL_ARCH_arm) michael@0: // ARM registers michael@0: DW_REG_ARM_R7 = 7, michael@0: DW_REG_ARM_R11 = 11, michael@0: DW_REG_ARM_R12 = 12, michael@0: DW_REG_ARM_R13 = 13, michael@0: DW_REG_ARM_R14 = 14, michael@0: DW_REG_ARM_R15 = 15, michael@0: #elif defined(LUL_ARCH_x64) michael@0: // Because the X86 (32 bit) and AMD64 (64 bit) summarisers are michael@0: // combined, a merged set of register constants is needed. michael@0: DW_REG_INTEL_XBP = 6, michael@0: DW_REG_INTEL_XSP = 7, michael@0: DW_REG_INTEL_XIP = 16, michael@0: #elif defined(LUL_ARCH_x86) michael@0: DW_REG_INTEL_XBP = 5, michael@0: DW_REG_INTEL_XSP = 4, michael@0: DW_REG_INTEL_XIP = 8, michael@0: #else michael@0: # error "Unknown arch" michael@0: #endif michael@0: }; michael@0: michael@0: michael@0: //////////////////////////////////////////////////////////////// michael@0: // LExpr // michael@0: //////////////////////////////////////////////////////////////// michael@0: michael@0: // An expression -- very primitive. Denotes either "register + michael@0: // offset" or a dereferenced version of the same. So as to allow michael@0: // convenient handling of Dwarf-derived unwind info, the register may michael@0: // also denote the CFA. A large number of these need to be stored, so michael@0: // we ensure it fits into 8 bytes. See comment below on RuleSet to michael@0: // see how expressions fit into the bigger picture. michael@0: michael@0: struct LExpr { michael@0: // Denotes an expression with no value. michael@0: LExpr() michael@0: : mHow(UNKNOWN) michael@0: , mReg(0) michael@0: , mOffset(0) michael@0: {} michael@0: michael@0: // Denotes any expressible expression. michael@0: LExpr(uint8_t how, int16_t reg, int32_t offset) michael@0: : mHow(how) michael@0: , mReg(reg) michael@0: , mOffset(offset) michael@0: {} michael@0: michael@0: // Change the offset for an expression that references memory. michael@0: LExpr add_delta(long delta) michael@0: { michael@0: MOZ_ASSERT(mHow == NODEREF); michael@0: // If this is a non-debug build and the above assertion would have michael@0: // failed, at least return LExpr() so that the machinery that uses michael@0: // the resulting expression fails in a repeatable way. michael@0: return (mHow == NODEREF) ? LExpr(mHow, mReg, mOffset+delta) michael@0: : LExpr(); // Gone bad michael@0: } michael@0: michael@0: // Dereference an expression that denotes a memory address. michael@0: LExpr deref() michael@0: { michael@0: MOZ_ASSERT(mHow == NODEREF); michael@0: // Same rationale as for add_delta(). michael@0: return (mHow == NODEREF) ? LExpr(DEREF, mReg, mOffset) michael@0: : LExpr(); // Gone bad michael@0: } michael@0: michael@0: // Representation of expressions. If |mReg| is DW_REG_CFA (-1) then michael@0: // it denotes the CFA. All other allowed values for |mReg| are michael@0: // nonnegative and are DW_REG_ values. michael@0: michael@0: enum { UNKNOWN=0, // This LExpr denotes no value. michael@0: NODEREF, // Value is (mReg + mOffset). michael@0: DEREF }; // Value is *(mReg + mOffset). michael@0: michael@0: uint8_t mHow; // UNKNOWN, NODEREF or DEREF michael@0: int16_t mReg; // A DW_REG_ value michael@0: int32_t mOffset; // 32-bit signed offset should be more than enough. michael@0: }; michael@0: michael@0: static_assert(sizeof(LExpr) <= 8, "LExpr size changed unexpectedly"); michael@0: michael@0: michael@0: //////////////////////////////////////////////////////////////// michael@0: // RuleSet // michael@0: //////////////////////////////////////////////////////////////// michael@0: michael@0: // This is platform-dependent. For some address range, describes how michael@0: // to recover the CFA and then how to recover the registers for the michael@0: // previous frame. michael@0: // michael@0: // The set of LExprs contained in a given RuleSet describe a DAG which michael@0: // says how to compute the caller's registers ("new registers") from michael@0: // the callee's registers ("old registers"). The DAG can contain a michael@0: // single internal node, which is the value of the CFA for the callee. michael@0: // It would be possible to construct a DAG that omits the CFA, but michael@0: // including it makes the summarisers simpler, and the Dwarf CFI spec michael@0: // has the CFA as a central concept. michael@0: // michael@0: // For this to make sense, |mCfaExpr| can't have michael@0: // |mReg| == DW_REG_CFA since we have no previous value for the CFA. michael@0: // All of the other |Expr| fields can -- and usually do -- specify michael@0: // |mReg| == DW_REG_CFA. michael@0: // michael@0: // With that in place, the unwind algorithm proceeds as follows. michael@0: // michael@0: // (0) Initially: we have values for the old registers, and a memory michael@0: // image. michael@0: // michael@0: // (1) Compute the CFA by evaluating |mCfaExpr|. Add the computed michael@0: // value to the set of "old registers". michael@0: // michael@0: // (2) Compute values for the registers by evaluating all of the other michael@0: // |Expr| fields in the RuleSet. These can depend on both the old michael@0: // register values and the just-computed CFA. michael@0: // michael@0: // If we are unwinding without computing a CFA, perhaps because the michael@0: // RuleSets are derived from EXIDX instead of Dwarf, then michael@0: // |mCfaExpr.mHow| will be LExpr::UNKNOWN, so the computed value will michael@0: // be invalid -- that is, TaggedUWord() -- and so any attempt to use michael@0: // that will result in the same value. But that's OK because the michael@0: // RuleSet would make no sense if depended on the CFA but specified no michael@0: // way to compute it. michael@0: // michael@0: // A RuleSet is not allowed to cover zero address range. Having zero michael@0: // length would break binary searching in SecMaps and PriMaps. michael@0: michael@0: class RuleSet { michael@0: public: michael@0: RuleSet(); michael@0: void Print(void(*aLog)(const char*)); michael@0: michael@0: // Find the LExpr* for a given DW_REG_ value in this class. michael@0: LExpr* ExprForRegno(DW_REG_NUMBER aRegno); michael@0: michael@0: uintptr_t mAddr; michael@0: uintptr_t mLen; michael@0: // How to compute the CFA. michael@0: LExpr mCfaExpr; michael@0: // How to compute caller register values. These may reference the michael@0: // value defined by |mCfaExpr|. michael@0: #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) michael@0: LExpr mXipExpr; // return address michael@0: LExpr mXspExpr; michael@0: LExpr mXbpExpr; michael@0: #elif defined(LUL_ARCH_arm) michael@0: LExpr mR15expr; // return address michael@0: LExpr mR14expr; michael@0: LExpr mR13expr; michael@0: LExpr mR12expr; michael@0: LExpr mR11expr; michael@0: LExpr mR7expr; michael@0: #else michael@0: # error "Unknown arch" michael@0: #endif michael@0: }; michael@0: michael@0: michael@0: //////////////////////////////////////////////////////////////// michael@0: // SecMap // michael@0: //////////////////////////////////////////////////////////////// michael@0: michael@0: // A SecMap may have zero address range, temporarily, whilst RuleSets michael@0: // are being added to it. But adding a zero-range SecMap to a PriMap michael@0: // will make it impossible to maintain the total order of the PriMap michael@0: // entries, and so that can't be allowed to happen. michael@0: michael@0: class SecMap { michael@0: public: michael@0: // These summarise the contained mRuleSets, in that they give michael@0: // exactly the lowest and highest addresses that any of the entries michael@0: // in this SecMap cover. Hence invariants: michael@0: // michael@0: // mRuleSets is nonempty michael@0: // <=> mSummaryMinAddr <= mSummaryMaxAddr michael@0: // && mSummaryMinAddr == mRuleSets[0].mAddr michael@0: // && mSummaryMaxAddr == mRuleSets[#rulesets-1].mAddr michael@0: // + mRuleSets[#rulesets-1].mLen - 1; michael@0: // michael@0: // This requires that no RuleSet has zero length. michael@0: // michael@0: // mRuleSets is empty michael@0: // <=> mSummaryMinAddr > mSummaryMaxAddr michael@0: // michael@0: // This doesn't constrain mSummaryMinAddr and mSummaryMaxAddr uniquely, michael@0: // so let's use mSummaryMinAddr == 1 and mSummaryMaxAddr == 0 to denote michael@0: // this case. michael@0: michael@0: SecMap(void(*aLog)(const char*)); michael@0: ~SecMap(); michael@0: michael@0: // Binary search mRuleSets to find one that brackets |ia|, or nullptr michael@0: // if none is found. It's not allowable to do this until PrepareRuleSets michael@0: // has been called first. michael@0: RuleSet* FindRuleSet(uintptr_t ia); michael@0: michael@0: // Add a RuleSet to the collection. The rule is copied in. Calling michael@0: // this makes the map non-searchable. michael@0: void AddRuleSet(RuleSet* rs); michael@0: michael@0: // Prepare the map for searching. Also, remove any rules for code michael@0: // address ranges which don't fall inside [start, +len). |len| may michael@0: // not be zero. michael@0: void PrepareRuleSets(uintptr_t start, size_t len); michael@0: michael@0: bool IsEmpty(); michael@0: michael@0: size_t Size() { return mRuleSets.size(); } michael@0: michael@0: // The min and max addresses of the addresses in the contained michael@0: // RuleSets. See comment above for invariants. michael@0: uintptr_t mSummaryMinAddr; michael@0: uintptr_t mSummaryMaxAddr; michael@0: michael@0: private: michael@0: // False whilst adding entries; true once it is safe to call FindRuleSet. michael@0: // Transition (false->true) is caused by calling PrepareRuleSets(). michael@0: bool mUsable; michael@0: michael@0: // A vector of RuleSets, sorted, nonoverlapping (post Prepare()). michael@0: std::vector mRuleSets; michael@0: michael@0: // A logging sink, for debugging. michael@0: void (*mLog)(const char*); michael@0: }; michael@0: michael@0: } // namespace lul michael@0: michael@0: #endif // ndef LulMainInt_h