1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/tools/profiler/LulMain.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1878 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include "LulMain.h" 1.11 + 1.12 +#include <string.h> 1.13 +#include <stdlib.h> 1.14 +#include <stdio.h> 1.15 + 1.16 +#include <algorithm> // std::sort 1.17 +#include <string> 1.18 + 1.19 +#include "mozilla/Assertions.h" 1.20 +#include "mozilla/ArrayUtils.h" 1.21 +#include "mozilla/MemoryChecking.h" 1.22 + 1.23 +#include "LulCommonExt.h" 1.24 +#include "LulElfExt.h" 1.25 + 1.26 +#include "LulMainInt.h" 1.27 + 1.28 +// Set this to 1 for verbose logging 1.29 +#define DEBUG_MAIN 0 1.30 + 1.31 + 1.32 +namespace lul { 1.33 + 1.34 +using std::string; 1.35 +using std::vector; 1.36 + 1.37 + 1.38 +//////////////////////////////////////////////////////////////// 1.39 +// AutoLulRWLocker // 1.40 +//////////////////////////////////////////////////////////////// 1.41 + 1.42 +// This is a simple RAII class that manages acquisition and release of 1.43 +// LulRWLock reader-writer locks. 1.44 + 1.45 +class AutoLulRWLocker { 1.46 +public: 1.47 + enum AcqMode { FOR_READING, FOR_WRITING }; 1.48 + AutoLulRWLocker(LulRWLock* aRWLock, AcqMode mode) 1.49 + : mRWLock(aRWLock) 1.50 + { 1.51 + if (mode == FOR_WRITING) { 1.52 + aRWLock->WrLock(); 1.53 + } else { 1.54 + aRWLock->RdLock(); 1.55 + } 1.56 + } 1.57 + ~AutoLulRWLocker() 1.58 + { 1.59 + mRWLock->Unlock(); 1.60 + } 1.61 + 1.62 +private: 1.63 + LulRWLock* mRWLock; 1.64 +}; 1.65 + 1.66 + 1.67 +//////////////////////////////////////////////////////////////// 1.68 +// RuleSet // 1.69 +//////////////////////////////////////////////////////////////// 1.70 + 1.71 +static const char* 1.72 +NameOf_DW_REG(int16_t aReg) 1.73 +{ 1.74 + switch (aReg) { 1.75 + case DW_REG_CFA: return "cfa"; 1.76 +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) 1.77 + case DW_REG_INTEL_XBP: return "xbp"; 1.78 + case DW_REG_INTEL_XSP: return "xsp"; 1.79 + case DW_REG_INTEL_XIP: return "xip"; 1.80 +#elif defined(LUL_ARCH_arm) 1.81 + case DW_REG_ARM_R7: return "r7"; 1.82 + case DW_REG_ARM_R11: return "r11"; 1.83 + case DW_REG_ARM_R12: return "r12"; 1.84 + case DW_REG_ARM_R13: return "r13"; 1.85 + case DW_REG_ARM_R14: return "r14"; 1.86 + case DW_REG_ARM_R15: return "r15"; 1.87 +#else 1.88 +# error "Unsupported arch" 1.89 +#endif 1.90 + default: return "???"; 1.91 + } 1.92 +} 1.93 + 1.94 +static string 1.95 +ShowRule(const char* aNewReg, LExpr aExpr) 1.96 +{ 1.97 + char buf[64]; 1.98 + string res = string(aNewReg) + "="; 1.99 + switch (aExpr.mHow) { 1.100 + case LExpr::UNKNOWN: 1.101 + res += "Unknown"; 1.102 + break; 1.103 + case LExpr::NODEREF: 1.104 + sprintf(buf, "%s+%d", NameOf_DW_REG(aExpr.mReg), (int)aExpr.mOffset); 1.105 + res += buf; 1.106 + break; 1.107 + case LExpr::DEREF: 1.108 + sprintf(buf, "*(%s+%d)", NameOf_DW_REG(aExpr.mReg), (int)aExpr.mOffset); 1.109 + res += buf; 1.110 + break; 1.111 + default: 1.112 + res += "???"; 1.113 + break; 1.114 + } 1.115 + return res; 1.116 +} 1.117 + 1.118 +void 1.119 +RuleSet::Print(void(*aLog)(const char*)) 1.120 +{ 1.121 + char buf[96]; 1.122 + sprintf(buf, "[%llx .. %llx]: let ", 1.123 + (unsigned long long int)mAddr, 1.124 + (unsigned long long int)(mAddr + mLen - 1)); 1.125 + string res = string(buf); 1.126 + res += ShowRule("cfa", mCfaExpr); 1.127 + res += " in"; 1.128 + // For each reg we care about, print the recovery expression. 1.129 +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) 1.130 + res += ShowRule(" RA", mXipExpr); 1.131 + res += ShowRule(" SP", mXspExpr); 1.132 + res += ShowRule(" BP", mXbpExpr); 1.133 +#elif defined(LUL_ARCH_arm) 1.134 + res += ShowRule(" R15", mR15expr); 1.135 + res += ShowRule(" R7", mR7expr); 1.136 + res += ShowRule(" R11", mR11expr); 1.137 + res += ShowRule(" R12", mR12expr); 1.138 + res += ShowRule(" R13", mR13expr); 1.139 + res += ShowRule(" R14", mR14expr); 1.140 +#else 1.141 +# error "Unsupported arch" 1.142 +#endif 1.143 + aLog(res.c_str()); 1.144 +} 1.145 + 1.146 +LExpr* 1.147 +RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) { 1.148 + switch (aRegno) { 1.149 + case DW_REG_CFA: return &mCfaExpr; 1.150 +# if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) 1.151 + case DW_REG_INTEL_XIP: return &mXipExpr; 1.152 + case DW_REG_INTEL_XSP: return &mXspExpr; 1.153 + case DW_REG_INTEL_XBP: return &mXbpExpr; 1.154 +# elif defined(LUL_ARCH_arm) 1.155 + case DW_REG_ARM_R15: return &mR15expr; 1.156 + case DW_REG_ARM_R14: return &mR14expr; 1.157 + case DW_REG_ARM_R13: return &mR13expr; 1.158 + case DW_REG_ARM_R12: return &mR12expr; 1.159 + case DW_REG_ARM_R11: return &mR11expr; 1.160 + case DW_REG_ARM_R7: return &mR7expr; 1.161 +# else 1.162 +# error "Unknown arch" 1.163 +# endif 1.164 + default: return nullptr; 1.165 + } 1.166 +} 1.167 + 1.168 +RuleSet::RuleSet() 1.169 +{ 1.170 + mAddr = 0; 1.171 + mLen = 0; 1.172 + // The only other fields are of type LExpr and those are initialised 1.173 + // by LExpr::LExpr(). 1.174 +} 1.175 + 1.176 + 1.177 +//////////////////////////////////////////////////////////////// 1.178 +// SecMap // 1.179 +//////////////////////////////////////////////////////////////// 1.180 + 1.181 +// See header file LulMainInt.h for comments about invariants. 1.182 + 1.183 +SecMap::SecMap(void(*aLog)(const char*)) 1.184 + : mSummaryMinAddr(1) 1.185 + , mSummaryMaxAddr(0) 1.186 + , mUsable(true) 1.187 + , mLog(aLog) 1.188 +{} 1.189 + 1.190 +SecMap::~SecMap() { 1.191 + mRuleSets.clear(); 1.192 +} 1.193 + 1.194 +RuleSet* 1.195 +SecMap::FindRuleSet(uintptr_t ia) { 1.196 + // Binary search mRuleSets to find one that brackets |ia|. 1.197 + // lo and hi need to be signed, else the loop termination tests 1.198 + // don't work properly. Note that this works correctly even when 1.199 + // mRuleSets.size() == 0. 1.200 + 1.201 + // Can't do this until the array has been sorted and preened. 1.202 + MOZ_ASSERT(mUsable); 1.203 + 1.204 + long int lo = 0; 1.205 + long int hi = (long int)mRuleSets.size() - 1; 1.206 + while (true) { 1.207 + // current unsearched space is from lo to hi, inclusive. 1.208 + if (lo > hi) { 1.209 + // not found 1.210 + return nullptr; 1.211 + } 1.212 + long int mid = lo + ((hi - lo) / 2); 1.213 + RuleSet* mid_ruleSet = &mRuleSets[mid]; 1.214 + uintptr_t mid_minAddr = mid_ruleSet->mAddr; 1.215 + uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1; 1.216 + if (ia < mid_minAddr) { hi = mid-1; continue; } 1.217 + if (ia > mid_maxAddr) { lo = mid+1; continue; } 1.218 + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); 1.219 + return mid_ruleSet; 1.220 + } 1.221 + // NOTREACHED 1.222 +} 1.223 + 1.224 +// Add a RuleSet to the collection. The rule is copied in. Calling 1.225 +// this makes the map non-searchable. 1.226 +void 1.227 +SecMap::AddRuleSet(RuleSet* rs) { 1.228 + mUsable = false; 1.229 + mRuleSets.push_back(*rs); 1.230 +} 1.231 + 1.232 + 1.233 +static bool 1.234 +CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) { 1.235 + return rs1.mAddr < rs2.mAddr; 1.236 +} 1.237 + 1.238 +// Prepare the map for searching. Completely remove any which don't 1.239 +// fall inside the specified range [start, +len). 1.240 +void 1.241 +SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) 1.242 +{ 1.243 + if (mRuleSets.empty()) { 1.244 + return; 1.245 + } 1.246 + 1.247 + MOZ_ASSERT(aLen > 0); 1.248 + if (aLen == 0) { 1.249 + // This should never happen. 1.250 + mRuleSets.clear(); 1.251 + return; 1.252 + } 1.253 + 1.254 + // Sort by start addresses. 1.255 + std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE); 1.256 + 1.257 + // Detect any entry not completely contained within [start, +len). 1.258 + // Set its length to zero, so that the next pass will remove it. 1.259 + for (size_t i = 0; i < mRuleSets.size(); ++i) { 1.260 + RuleSet* rs = &mRuleSets[i]; 1.261 + if (rs->mLen > 0 && 1.262 + (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) { 1.263 + rs->mLen = 0; 1.264 + } 1.265 + } 1.266 + 1.267 + // Iteratively truncate any overlaps and remove any zero length 1.268 + // entries that might result, or that may have been present 1.269 + // initially. Unless the input is seriously screwy, this is 1.270 + // expected to iterate only once. 1.271 + while (true) { 1.272 + size_t i; 1.273 + size_t n = mRuleSets.size(); 1.274 + size_t nZeroLen = 0; 1.275 + 1.276 + if (n == 0) { 1.277 + break; 1.278 + } 1.279 + 1.280 + for (i = 1; i < n; ++i) { 1.281 + RuleSet* prev = &mRuleSets[i-1]; 1.282 + RuleSet* here = &mRuleSets[i]; 1.283 + MOZ_ASSERT(prev->mAddr <= here->mAddr); 1.284 + if (prev->mAddr + prev->mLen > here->mAddr) { 1.285 + prev->mLen = here->mAddr - prev->mAddr; 1.286 + } 1.287 + if (prev->mLen == 0) 1.288 + nZeroLen++; 1.289 + } 1.290 + 1.291 + if (mRuleSets[n-1].mLen == 0) { 1.292 + nZeroLen++; 1.293 + } 1.294 + 1.295 + // At this point, the entries are in-order and non-overlapping. 1.296 + // If none of them are zero-length, we are done. 1.297 + if (nZeroLen == 0) { 1.298 + break; 1.299 + } 1.300 + 1.301 + // Slide back the entries to remove the zero length ones. 1.302 + size_t j = 0; // The write-point. 1.303 + for (i = 0; i < n; ++i) { 1.304 + if (mRuleSets[i].mLen == 0) { 1.305 + continue; 1.306 + } 1.307 + if (j != i) mRuleSets[j] = mRuleSets[i]; 1.308 + ++j; 1.309 + } 1.310 + MOZ_ASSERT(i == n); 1.311 + MOZ_ASSERT(nZeroLen <= n); 1.312 + MOZ_ASSERT(j == n - nZeroLen); 1.313 + while (nZeroLen > 0) { 1.314 + mRuleSets.pop_back(); 1.315 + nZeroLen--; 1.316 + } 1.317 + 1.318 + MOZ_ASSERT(mRuleSets.size() == j); 1.319 + } 1.320 + 1.321 + size_t n = mRuleSets.size(); 1.322 + 1.323 +#ifdef DEBUG 1.324 + // Do a final check on the rules: their address ranges must be 1.325 + // ascending, non overlapping, non zero sized. 1.326 + if (n > 0) { 1.327 + MOZ_ASSERT(mRuleSets[0].mLen > 0); 1.328 + for (size_t i = 1; i < n; ++i) { 1.329 + RuleSet* prev = &mRuleSets[i-1]; 1.330 + RuleSet* here = &mRuleSets[i]; 1.331 + MOZ_ASSERT(prev->mAddr < here->mAddr); 1.332 + MOZ_ASSERT(here->mLen > 0); 1.333 + MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr); 1.334 + } 1.335 + } 1.336 +#endif 1.337 + 1.338 + // Set the summary min and max address values. 1.339 + if (n == 0) { 1.340 + // Use the values defined in comments in the class declaration. 1.341 + mSummaryMinAddr = 1; 1.342 + mSummaryMaxAddr = 0; 1.343 + } else { 1.344 + mSummaryMinAddr = mRuleSets[0].mAddr; 1.345 + mSummaryMaxAddr = mRuleSets[n-1].mAddr + mRuleSets[n-1].mLen - 1; 1.346 + } 1.347 + char buf[150]; 1.348 + snprintf(buf, sizeof(buf), 1.349 + "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n", 1.350 + (int)n, (unsigned long long int)mSummaryMinAddr, 1.351 + (unsigned long long int)mSummaryMaxAddr); 1.352 + buf[sizeof(buf)-1] = 0; 1.353 + mLog(buf); 1.354 + 1.355 + // Is now usable for binary search. 1.356 + mUsable = true; 1.357 + 1.358 + if (0) { 1.359 + mLog("\nRulesets after preening\n"); 1.360 + for (size_t i = 0; i < mRuleSets.size(); ++i) { 1.361 + mRuleSets[i].Print(mLog); 1.362 + mLog("\n"); 1.363 + } 1.364 + mLog("\n"); 1.365 + } 1.366 +} 1.367 + 1.368 +bool SecMap::IsEmpty() { 1.369 + return mRuleSets.empty(); 1.370 +} 1.371 + 1.372 + 1.373 +//////////////////////////////////////////////////////////////// 1.374 +// SegArray // 1.375 +//////////////////////////////////////////////////////////////// 1.376 + 1.377 +// A SegArray holds a set of address ranges that together exactly 1.378 +// cover an address range, with no overlaps or holes. Each range has 1.379 +// an associated value, which in this case has been specialised to be 1.380 +// a simple boolean. The representation is kept to minimal canonical 1.381 +// form in which adjacent ranges with the same associated value are 1.382 +// merged together. Each range is represented by a |struct Seg|. 1.383 +// 1.384 +// SegArrays are used to keep track of which parts of the address 1.385 +// space are known to contain instructions. 1.386 +class SegArray { 1.387 + 1.388 + public: 1.389 + void add(uintptr_t lo, uintptr_t hi, bool val) { 1.390 + if (lo > hi) { 1.391 + return; 1.392 + } 1.393 + split_at(lo); 1.394 + if (hi < UINTPTR_MAX) { 1.395 + split_at(hi+1); 1.396 + } 1.397 + std::vector<Seg>::size_type iLo, iHi, i; 1.398 + iLo = find(lo); 1.399 + iHi = find(hi); 1.400 + for (i = iLo; i <= iHi; ++i) { 1.401 + mSegs[i].val = val; 1.402 + } 1.403 + preen(); 1.404 + } 1.405 + 1.406 + bool getBoundingCodeSegment(/*OUT*/uintptr_t* rx_min, 1.407 + /*OUT*/uintptr_t* rx_max, uintptr_t addr) { 1.408 + std::vector<Seg>::size_type i = find(addr); 1.409 + if (!mSegs[i].val) { 1.410 + return false; 1.411 + } 1.412 + *rx_min = mSegs[i].lo; 1.413 + *rx_max = mSegs[i].hi; 1.414 + return true; 1.415 + } 1.416 + 1.417 + SegArray() { 1.418 + Seg s(0, UINTPTR_MAX, false); 1.419 + mSegs.push_back(s); 1.420 + } 1.421 + 1.422 + private: 1.423 + struct Seg { 1.424 + Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {} 1.425 + uintptr_t lo; 1.426 + uintptr_t hi; 1.427 + bool val; 1.428 + }; 1.429 + 1.430 + void preen() { 1.431 + for (std::vector<Seg>::iterator iter = mSegs.begin(); 1.432 + iter < mSegs.end()-1; 1.433 + ++iter) { 1.434 + if (iter[0].val != iter[1].val) { 1.435 + continue; 1.436 + } 1.437 + iter[0].hi = iter[1].hi; 1.438 + mSegs.erase(iter+1); 1.439 + // Back up one, so as not to miss an opportunity to merge 1.440 + // with the entry after this one. 1.441 + --iter; 1.442 + } 1.443 + } 1.444 + 1.445 + std::vector<Seg>::size_type find(uintptr_t a) { 1.446 + long int lo = 0; 1.447 + long int hi = (long int)mSegs.size(); 1.448 + while (true) { 1.449 + // The unsearched space is lo .. hi inclusive. 1.450 + if (lo > hi) { 1.451 + // Not found. This can't happen. 1.452 + return (std::vector<Seg>::size_type)(-1); 1.453 + } 1.454 + long int mid = lo + ((hi - lo) / 2); 1.455 + uintptr_t mid_lo = mSegs[mid].lo; 1.456 + uintptr_t mid_hi = mSegs[mid].hi; 1.457 + if (a < mid_lo) { hi = mid-1; continue; } 1.458 + if (a > mid_hi) { lo = mid+1; continue; } 1.459 + return (std::vector<Seg>::size_type)mid; 1.460 + } 1.461 + } 1.462 + 1.463 + void split_at(uintptr_t a) { 1.464 + std::vector<Seg>::size_type i = find(a); 1.465 + if (mSegs[i].lo == a) { 1.466 + return; 1.467 + } 1.468 + mSegs.insert( mSegs.begin()+i+1, mSegs[i] ); 1.469 + mSegs[i].hi = a-1; 1.470 + mSegs[i+1].lo = a; 1.471 + } 1.472 + 1.473 + void show() { 1.474 + printf("<< %d entries:\n", (int)mSegs.size()); 1.475 + for (std::vector<Seg>::iterator iter = mSegs.begin(); 1.476 + iter < mSegs.end(); 1.477 + ++iter) { 1.478 + printf(" %016llx %016llx %s\n", 1.479 + (unsigned long long int)(*iter).lo, 1.480 + (unsigned long long int)(*iter).hi, 1.481 + (*iter).val ? "true" : "false"); 1.482 + } 1.483 + printf(">>\n"); 1.484 + } 1.485 + 1.486 + std::vector<Seg> mSegs; 1.487 +}; 1.488 + 1.489 + 1.490 +//////////////////////////////////////////////////////////////// 1.491 +// PriMap // 1.492 +//////////////////////////////////////////////////////////////// 1.493 + 1.494 +class PriMap { 1.495 + public: 1.496 + PriMap(void (*aLog)(const char*)) 1.497 + : mLog(aLog) 1.498 + {} 1.499 + 1.500 + ~PriMap() { 1.501 + for (std::vector<SecMap*>::iterator iter = mSecMaps.begin(); 1.502 + iter != mSecMaps.end(); 1.503 + ++iter) { 1.504 + delete *iter; 1.505 + } 1.506 + mSecMaps.clear(); 1.507 + } 1.508 + 1.509 + // This can happen with the global lock held for reading. 1.510 + RuleSet* Lookup(uintptr_t ia) { 1.511 + SecMap* sm = FindSecMap(ia); 1.512 + return sm ? sm->FindRuleSet(ia) : nullptr; 1.513 + } 1.514 + 1.515 + // Add a secondary map. No overlaps allowed w.r.t. existing 1.516 + // secondary maps. Global lock must be held for writing. 1.517 + void AddSecMap(SecMap* aSecMap) { 1.518 + // We can't add an empty SecMap to the PriMap. But that's OK 1.519 + // since we'd never be able to find anything in it anyway. 1.520 + if (aSecMap->IsEmpty()) { 1.521 + return; 1.522 + } 1.523 + 1.524 + // Iterate through the SecMaps and find the right place for this 1.525 + // one. At the same time, ensure that the in-order 1.526 + // non-overlapping invariant is preserved (and, generally, holds). 1.527 + // FIXME: this gives a cost that is O(N^2) in the total number of 1.528 + // shared objects in the system. ToDo: better. 1.529 + MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr); 1.530 + 1.531 + size_t num_secMaps = mSecMaps.size(); 1.532 + uintptr_t i; 1.533 + for (i = 0; i < num_secMaps; ++i) { 1.534 + SecMap* sm_i = mSecMaps[i]; 1.535 + MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr); 1.536 + if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) { 1.537 + // |aSecMap| needs to be inserted immediately before mSecMaps[i]. 1.538 + break; 1.539 + } 1.540 + } 1.541 + MOZ_ASSERT(i <= num_secMaps); 1.542 + if (i == num_secMaps) { 1.543 + // It goes at the end. 1.544 + mSecMaps.push_back(aSecMap); 1.545 + } else { 1.546 + std::vector<SecMap*>::iterator iter = mSecMaps.begin() + i; 1.547 + mSecMaps.insert(iter, aSecMap); 1.548 + } 1.549 + char buf[100]; 1.550 + snprintf(buf, sizeof(buf), "AddSecMap: now have %d SecMaps\n", 1.551 + (int)mSecMaps.size()); 1.552 + buf[sizeof(buf)-1] = 0; 1.553 + mLog(buf); 1.554 + } 1.555 + 1.556 + // Remove and delete any SecMaps in the mapping, that intersect 1.557 + // with the specified address range. 1.558 + void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) { 1.559 + MOZ_ASSERT(avma_min <= avma_max); 1.560 + size_t num_secMaps = mSecMaps.size(); 1.561 + if (num_secMaps > 0) { 1.562 + intptr_t i; 1.563 + // Iterate from end to start over the vector, so as to ensure 1.564 + // that the special case where |avma_min| and |avma_max| denote 1.565 + // the entire address space, can be completed in time proportional 1.566 + // to the number of elements in the map. 1.567 + for (i = (intptr_t)num_secMaps-1; i >= 0; i--) { 1.568 + SecMap* sm_i = mSecMaps[i]; 1.569 + if (sm_i->mSummaryMaxAddr < avma_min || 1.570 + avma_max < sm_i->mSummaryMinAddr) { 1.571 + // There's no overlap. Move on. 1.572 + continue; 1.573 + } 1.574 + // We need to remove mSecMaps[i] and slide all those above it 1.575 + // downwards to cover the hole. 1.576 + mSecMaps.erase(mSecMaps.begin() + i); 1.577 + delete sm_i; 1.578 + } 1.579 + } 1.580 + } 1.581 + 1.582 + // Return the number of currently contained SecMaps. 1.583 + size_t CountSecMaps() { 1.584 + return mSecMaps.size(); 1.585 + } 1.586 + 1.587 + // Assess heuristically whether the given address is an instruction 1.588 + // immediately following a call instruction. The caller is required 1.589 + // to hold the global lock for reading. 1.590 + bool MaybeIsReturnPoint(TaggedUWord aInstrAddr, SegArray* aSegArray) { 1.591 + if (!aInstrAddr.Valid()) { 1.592 + return false; 1.593 + } 1.594 + 1.595 + uintptr_t ia = aInstrAddr.Value(); 1.596 + 1.597 + // Assume that nobody would be crazy enough to put code in the 1.598 + // first or last page. 1.599 + if (ia < 4096 || ((uintptr_t)(-ia)) < 4096) { 1.600 + return false; 1.601 + } 1.602 + 1.603 + // See if it falls inside a known r-x mapped area. Poking around 1.604 + // outside such places risks segfaulting. 1.605 + uintptr_t insns_min, insns_max; 1.606 + bool b = aSegArray->getBoundingCodeSegment(&insns_min, &insns_max, ia); 1.607 + if (!b) { 1.608 + // no code (that we know about) at this address 1.609 + return false; 1.610 + } 1.611 + 1.612 + // |ia| falls within an r-x range. So we can 1.613 + // safely poke around in [insns_min, insns_max]. 1.614 + 1.615 +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) 1.616 + // Is the previous instruction recognisably a CALL? This is 1.617 + // common for the 32- and 64-bit versions, except for the 1.618 + // simm32(%rip) case, which is 64-bit only. 1.619 + // 1.620 + // For all other cases, the 64 bit versions are either identical 1.621 + // to the 32 bit versions, or have an optional extra leading REX.W 1.622 + // byte (0x41). Since the extra 0x41 is optional we have to 1.623 + // ignore it, with the convenient result that the same matching 1.624 + // logic works for both 32- and 64-bit cases. 1.625 + 1.626 + uint8_t* p = (uint8_t*)ia; 1.627 +# if defined(LUL_ARCH_x64) 1.628 + // CALL simm32(%rip) == FF15 simm32 1.629 + if (ia - 6 >= insns_min && p[-6] == 0xFF && p[-5] == 0x15) { 1.630 + return true; 1.631 + } 1.632 +# endif 1.633 + // CALL rel32 == E8 rel32 (both 32- and 64-bit) 1.634 + if (ia - 5 >= insns_min && p[-5] == 0xE8) { 1.635 + return true; 1.636 + } 1.637 + // CALL *%eax .. CALL *%edi == FFD0 .. FFD7 (32-bit) 1.638 + // CALL *%rax .. CALL *%rdi == FFD0 .. FFD7 (64-bit) 1.639 + // CALL *%r8 .. CALL *%r15 == 41FFD0 .. 41FFD7 (64-bit) 1.640 + if (ia - 2 >= insns_min && 1.641 + p[-2] == 0xFF && p[-1] >= 0xD0 && p[-1] <= 0xD7) { 1.642 + return true; 1.643 + } 1.644 + // Almost all of the remaining cases that occur in practice are 1.645 + // of the form CALL *simm8(reg) or CALL *simm32(reg). 1.646 + // 1.647 + // 64 bit cases: 1.648 + // 1.649 + // call *simm8(%rax) FF50 simm8 1.650 + // call *simm8(%rcx) FF51 simm8 1.651 + // call *simm8(%rdx) FF52 simm8 1.652 + // call *simm8(%rbx) FF53 simm8 1.653 + // call *simm8(%rsp) FF5424 simm8 1.654 + // call *simm8(%rbp) FF55 simm8 1.655 + // call *simm8(%rsi) FF56 simm8 1.656 + // call *simm8(%rdi) FF57 simm8 1.657 + // 1.658 + // call *simm8(%r8) 41FF50 simm8 1.659 + // call *simm8(%r9) 41FF51 simm8 1.660 + // call *simm8(%r10) 41FF52 simm8 1.661 + // call *simm8(%r11) 41FF53 simm8 1.662 + // call *simm8(%r12) 41FF5424 simm8 1.663 + // call *simm8(%r13) 41FF55 simm8 1.664 + // call *simm8(%r14) 41FF56 simm8 1.665 + // call *simm8(%r15) 41FF57 simm8 1.666 + // 1.667 + // call *simm32(%rax) FF90 simm32 1.668 + // call *simm32(%rcx) FF91 simm32 1.669 + // call *simm32(%rdx) FF92 simm32 1.670 + // call *simm32(%rbx) FF93 simm32 1.671 + // call *simm32(%rsp) FF9424 simm32 1.672 + // call *simm32(%rbp) FF95 simm32 1.673 + // call *simm32(%rsi) FF96 simm32 1.674 + // call *simm32(%rdi) FF97 simm32 1.675 + // 1.676 + // call *simm32(%r8) 41FF90 simm32 1.677 + // call *simm32(%r9) 41FF91 simm32 1.678 + // call *simm32(%r10) 41FF92 simm32 1.679 + // call *simm32(%r11) 41FF93 simm32 1.680 + // call *simm32(%r12) 41FF9424 simm32 1.681 + // call *simm32(%r13) 41FF95 simm32 1.682 + // call *simm32(%r14) 41FF96 simm32 1.683 + // call *simm32(%r15) 41FF97 simm32 1.684 + // 1.685 + // 32 bit cases: 1.686 + // 1.687 + // call *simm8(%eax) FF50 simm8 1.688 + // call *simm8(%ecx) FF51 simm8 1.689 + // call *simm8(%edx) FF52 simm8 1.690 + // call *simm8(%ebx) FF53 simm8 1.691 + // call *simm8(%esp) FF5424 simm8 1.692 + // call *simm8(%ebp) FF55 simm8 1.693 + // call *simm8(%esi) FF56 simm8 1.694 + // call *simm8(%edi) FF57 simm8 1.695 + // 1.696 + // call *simm32(%eax) FF90 simm32 1.697 + // call *simm32(%ecx) FF91 simm32 1.698 + // call *simm32(%edx) FF92 simm32 1.699 + // call *simm32(%ebx) FF93 simm32 1.700 + // call *simm32(%esp) FF9424 simm32 1.701 + // call *simm32(%ebp) FF95 simm32 1.702 + // call *simm32(%esi) FF96 simm32 1.703 + // call *simm32(%edi) FF97 simm32 1.704 + if (ia - 3 >= insns_min && 1.705 + p[-3] == 0xFF && 1.706 + (p[-2] >= 0x50 && p[-2] <= 0x57 && p[-2] != 0x54)) { 1.707 + // imm8 case, not including %esp/%rsp 1.708 + return true; 1.709 + } 1.710 + if (ia - 4 >= insns_min && 1.711 + p[-4] == 0xFF && p[-3] == 0x54 && p[-2] == 0x24) { 1.712 + // imm8 case for %esp/%rsp 1.713 + return true; 1.714 + } 1.715 + if (ia - 6 >= insns_min && 1.716 + p[-6] == 0xFF && 1.717 + (p[-5] >= 0x90 && p[-5] <= 0x97 && p[-5] != 0x94)) { 1.718 + // imm32 case, not including %esp/%rsp 1.719 + return true; 1.720 + } 1.721 + if (ia - 7 >= insns_min && 1.722 + p[-7] == 0xFF && p[-6] == 0x94 && p[-5] == 0x24) { 1.723 + // imm32 case for %esp/%rsp 1.724 + return true; 1.725 + } 1.726 + 1.727 +#elif defined(LUL_ARCH_arm) 1.728 + if (ia & 1) { 1.729 + uint16_t w0 = 0, w1 = 0; 1.730 + // The return address has its lowest bit set, indicating a return 1.731 + // to Thumb code. 1.732 + ia &= ~(uintptr_t)1; 1.733 + if (ia - 2 >= insns_min && ia - 1 <= insns_max) { 1.734 + w1 = *(uint16_t*)(ia - 2); 1.735 + } 1.736 + if (ia - 4 >= insns_min && ia - 1 <= insns_max) { 1.737 + w0 = *(uint16_t*)(ia - 4); 1.738 + } 1.739 + // Is it a 32-bit Thumb call insn? 1.740 + // BL simm26 (Encoding T1) 1.741 + if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) { 1.742 + return true; 1.743 + } 1.744 + // BLX simm26 (Encoding T2) 1.745 + if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) { 1.746 + return true; 1.747 + } 1.748 + // Other possible cases: 1.749 + // (BLX Rm, Encoding T1). 1.750 + // BLX Rm (encoding T1, 16 bit, inspect w1 and ignore w0.) 1.751 + // 0100 0111 1 Rm 000 1.752 + } else { 1.753 + // Returning to ARM code. 1.754 + uint32_t a0 = 0; 1.755 + if ((ia & 3) == 0 && ia - 4 >= insns_min && ia - 1 <= insns_max) { 1.756 + a0 = *(uint32_t*)(ia - 4); 1.757 + } 1.758 + // Leading E forces unconditional only -- fix. It could be 1.759 + // anything except F, which is the deprecated NV code. 1.760 + // BL simm26 (Encoding A1) 1.761 + if ((a0 & 0xFF000000) == 0xEB000000) { 1.762 + return true; 1.763 + } 1.764 + // Other possible cases: 1.765 + // BLX simm26 (Encoding A2) 1.766 + //if ((a0 & 0xFE000000) == 0xFA000000) 1.767 + // return true; 1.768 + // BLX (register) (A1): BLX <c> <Rm> 1.769 + // cond 0001 0010 1111 1111 1111 0011 Rm 1.770 + // again, cond can be anything except NV (0xF) 1.771 + } 1.772 + 1.773 +#else 1.774 +# error "Unsupported arch" 1.775 +#endif 1.776 + 1.777 + // Not an insn we recognise. 1.778 + return false; 1.779 + } 1.780 + 1.781 + private: 1.782 + // FindSecMap's caller must hold the global lock for reading or writing. 1.783 + SecMap* FindSecMap(uintptr_t ia) { 1.784 + // Binary search mSecMaps to find one that brackets |ia|. 1.785 + // lo and hi need to be signed, else the loop termination tests 1.786 + // don't work properly. 1.787 + long int lo = 0; 1.788 + long int hi = (long int)mSecMaps.size() - 1; 1.789 + while (true) { 1.790 + // current unsearched space is from lo to hi, inclusive. 1.791 + if (lo > hi) { 1.792 + // not found 1.793 + return nullptr; 1.794 + } 1.795 + long int mid = lo + ((hi - lo) / 2); 1.796 + SecMap* mid_secMap = mSecMaps[mid]; 1.797 + uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr; 1.798 + uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr; 1.799 + if (ia < mid_minAddr) { hi = mid-1; continue; } 1.800 + if (ia > mid_maxAddr) { lo = mid+1; continue; } 1.801 + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); 1.802 + return mid_secMap; 1.803 + } 1.804 + // NOTREACHED 1.805 + } 1.806 + 1.807 + private: 1.808 + // sorted array of per-object ranges, non overlapping, non empty 1.809 + std::vector<SecMap*> mSecMaps; 1.810 + 1.811 + // a logging sink, for debugging. 1.812 + void (*mLog)(const char*); 1.813 +}; 1.814 + 1.815 + 1.816 +//////////////////////////////////////////////////////////////// 1.817 +// CFICache // 1.818 +//////////////////////////////////////////////////////////////// 1.819 + 1.820 +// This is the thread-local cache. It maps individual insn AVMAs to 1.821 +// the associated CFI record, which live in LUL::mPriMap. 1.822 +// 1.823 +// The cache is a direct map hash table indexed by address. 1.824 +// It has to distinguish 3 cases: 1.825 +// 1.826 +// (1) .mRSet == (RuleSet*)0 ==> cache slot not in use 1.827 +// (2) .mRSet == (RuleSet*)1 ==> slot in use, no RuleSet avail 1.828 +// (3) .mRSet > (RuleSet*)1 ==> slot in use, RuleSet* available 1.829 +// 1.830 +// Distinguishing between (2) and (3) is important, because if we look 1.831 +// up an address in LUL::mPriMap and find there is no RuleSet, then 1.832 +// that fact needs to cached here, so as to avoid potentially 1.833 +// expensive repeat lookups. 1.834 + 1.835 +// A CFICacheEntry::mRSet value of zero indicates that the slot is not 1.836 +// in use, and a value of one indicates that the slot is in use but 1.837 +// there is no RuleSet available. 1.838 +#define ENTRY_NOT_IN_USE ((RuleSet*)0) 1.839 +#define NO_RULESET_AVAILABLE ((RuleSet*)1) 1.840 + 1.841 +class CFICache { 1.842 + public: 1.843 + 1.844 + CFICache(PriMap* aPriMap) { 1.845 + Invalidate(); 1.846 + mPriMap = aPriMap; 1.847 + } 1.848 + 1.849 + void Invalidate() { 1.850 + for (int i = 0; i < N_ENTRIES; ++i) { 1.851 + mCache[i].mAVMA = 0; 1.852 + mCache[i].mRSet = ENTRY_NOT_IN_USE; 1.853 + } 1.854 + } 1.855 + 1.856 + RuleSet* Lookup(uintptr_t ia) { 1.857 + uintptr_t hash = ia % (uintptr_t)N_ENTRIES; 1.858 + CFICacheEntry* ce = &mCache[hash]; 1.859 + if (ce->mAVMA == ia) { 1.860 + // The cache has an entry for |ia|. Interpret it. 1.861 + if (ce->mRSet > NO_RULESET_AVAILABLE) { 1.862 + // There's a RuleSet. So return it. 1.863 + return ce->mRSet; 1.864 + } 1.865 + if (ce->mRSet == NO_RULESET_AVAILABLE) { 1.866 + // There's no RuleSet for this address. Don't update 1.867 + // the cache, since we might get queried again. 1.868 + return nullptr; 1.869 + } 1.870 + // Otherwise, the slot is not in use. Fall through to 1.871 + // the 'miss' case. 1.872 + } 1.873 + 1.874 + // The cache entry is for some other address, or is not in use. 1.875 + // Update it. If it can be found in the priMap then install it 1.876 + // as-is. Else put NO_RULESET_AVAILABLE in, so as to indicate 1.877 + // there's no info for this address. 1.878 + RuleSet* fallback = mPriMap->Lookup(ia); 1.879 + mCache[hash].mAVMA = ia; 1.880 + mCache[hash].mRSet = fallback ? fallback : NO_RULESET_AVAILABLE; 1.881 + return fallback; 1.882 + } 1.883 + 1.884 + private: 1.885 + // This should be a prime number. 1.886 + static const int N_ENTRIES = 509; 1.887 + 1.888 + // See comment above for the meaning of these entries. 1.889 + struct CFICacheEntry { 1.890 + uintptr_t mAVMA; // AVMA of the associated instruction 1.891 + RuleSet* mRSet; // RuleSet* for the instruction 1.892 + }; 1.893 + CFICacheEntry mCache[N_ENTRIES]; 1.894 + 1.895 + // Need to have a pointer to the PriMap, so as to be able 1.896 + // to service misses. 1.897 + PriMap* mPriMap; 1.898 +}; 1.899 + 1.900 +#undef ENTRY_NOT_IN_USE 1.901 +#undef NO_RULESET_AVAILABLE 1.902 + 1.903 + 1.904 +//////////////////////////////////////////////////////////////// 1.905 +// LUL // 1.906 +//////////////////////////////////////////////////////////////// 1.907 + 1.908 +LUL::LUL(void (*aLog)(const char*)) 1.909 +{ 1.910 + mRWlock = new LulRWLock(); 1.911 + AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING); 1.912 + mLog = aLog; 1.913 + mPriMap = new PriMap(aLog); 1.914 + mSegArray = new SegArray(); 1.915 +} 1.916 + 1.917 + 1.918 +LUL::~LUL() 1.919 +{ 1.920 + // The auto-locked section must have its own scope, so that the 1.921 + // unlock is performed before the mRWLock is deleted. 1.922 + { 1.923 + AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING); 1.924 + for (std::map<pthread_t,CFICache*>::iterator iter = mCaches.begin(); 1.925 + iter != mCaches.end(); 1.926 + ++iter) { 1.927 + delete iter->second; 1.928 + } 1.929 + delete mPriMap; 1.930 + delete mSegArray; 1.931 + mLog = nullptr; 1.932 + } 1.933 + // Now we don't hold the lock. Hence it is safe to delete it. 1.934 + delete mRWlock; 1.935 +} 1.936 + 1.937 + 1.938 +void 1.939 +LUL::RegisterUnwinderThread() 1.940 +{ 1.941 + AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING); 1.942 + 1.943 + pthread_t me = pthread_self(); 1.944 + CFICache* cache = new CFICache(mPriMap); 1.945 + 1.946 + std::pair<std::map<pthread_t,CFICache*>::iterator, bool> res 1.947 + = mCaches.insert(std::pair<pthread_t,CFICache*>(me, cache)); 1.948 + // "this thread is not already registered" 1.949 + MOZ_ASSERT(res.second); // "new element was inserted" 1.950 + // Using mozilla::DebugOnly to declare |res| leads to compilation error 1.951 + (void)res.second; 1.952 +} 1.953 + 1.954 +void 1.955 +LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, 1.956 + const char* aFileName, const void* aMappedImage) 1.957 +{ 1.958 + AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING); 1.959 + 1.960 + mLog(":\n"); 1.961 + char buf[200]; 1.962 + snprintf(buf, sizeof(buf), "NotifyMap %llx %llu %s\n", 1.963 + (unsigned long long int)aRXavma, (unsigned long long int)aSize, 1.964 + aFileName); 1.965 + buf[sizeof(buf)-1] = 0; 1.966 + mLog(buf); 1.967 + 1.968 + InvalidateCFICaches(); 1.969 + 1.970 + // Ignore obviously-stupid notifications. 1.971 + if (aSize > 0) { 1.972 + 1.973 + // Here's a new mapping, for this object. 1.974 + SecMap* smap = new SecMap(mLog); 1.975 + 1.976 + // Read CFI or EXIDX unwind data into |smap|. 1.977 + if (!aMappedImage) { 1.978 + (void)lul::ReadSymbolData( 1.979 + string(aFileName), std::vector<string>(), smap, 1.980 + (void*)aRXavma, mLog); 1.981 + } else { 1.982 + (void)lul::ReadSymbolDataInternal( 1.983 + (const uint8_t*)aMappedImage, 1.984 + string(aFileName), std::vector<string>(), smap, 1.985 + (void*)aRXavma, mLog); 1.986 + } 1.987 + 1.988 + mLog("NotifyMap .. preparing entries\n"); 1.989 + 1.990 + smap->PrepareRuleSets(aRXavma, aSize); 1.991 + 1.992 + snprintf(buf, sizeof(buf), 1.993 + "NotifyMap got %lld entries\n", (long long int)smap->Size()); 1.994 + buf[sizeof(buf)-1] = 0; 1.995 + mLog(buf); 1.996 + 1.997 + // Add it to the primary map (the top level set of mapped objects). 1.998 + mPriMap->AddSecMap(smap); 1.999 + 1.1000 + // Tell the segment array about the mapping, so that the stack 1.1001 + // scan and __kernel_syscall mechanisms know where valid code is. 1.1002 + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); 1.1003 + } 1.1004 +} 1.1005 + 1.1006 + 1.1007 +void 1.1008 +LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) 1.1009 +{ 1.1010 + AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING); 1.1011 + 1.1012 + mLog(":\n"); 1.1013 + char buf[200]; 1.1014 + snprintf(buf, sizeof(buf), "NotifyExecutableArea %llx %llu\n", 1.1015 + (unsigned long long int)aRXavma, (unsigned long long int)aSize); 1.1016 + buf[sizeof(buf)-1] = 0; 1.1017 + mLog(buf); 1.1018 + 1.1019 + InvalidateCFICaches(); 1.1020 + 1.1021 + // Ignore obviously-stupid notifications. 1.1022 + if (aSize > 0) { 1.1023 + // Tell the segment array about the mapping, so that the stack 1.1024 + // scan and __kernel_syscall mechanisms know where valid code is. 1.1025 + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); 1.1026 + } 1.1027 +} 1.1028 + 1.1029 + 1.1030 +void 1.1031 +LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) 1.1032 +{ 1.1033 + AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING); 1.1034 + 1.1035 + mLog(":\n"); 1.1036 + char buf[100]; 1.1037 + snprintf(buf, sizeof(buf), "NotifyUnmap %016llx-%016llx\n", 1.1038 + (unsigned long long int)aRXavmaMin, 1.1039 + (unsigned long long int)aRXavmaMax); 1.1040 + buf[sizeof(buf)-1] = 0; 1.1041 + mLog(buf); 1.1042 + 1.1043 + MOZ_ASSERT(aRXavmaMin <= aRXavmaMax); 1.1044 + 1.1045 + InvalidateCFICaches(); 1.1046 + 1.1047 + // Remove from the primary map, any secondary maps that intersect 1.1048 + // with the address range. Also delete the secondary maps. 1.1049 + mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax); 1.1050 + 1.1051 + // Tell the segment array that the address range no longer 1.1052 + // contains valid code. 1.1053 + mSegArray->add(aRXavmaMin, aRXavmaMax, false); 1.1054 + 1.1055 + snprintf(buf, sizeof(buf), "NotifyUnmap: now have %d SecMaps\n", 1.1056 + (int)mPriMap->CountSecMaps()); 1.1057 + buf[sizeof(buf)-1] = 0; 1.1058 + mLog(buf); 1.1059 +} 1.1060 + 1.1061 + 1.1062 +size_t 1.1063 +LUL::CountMappings() 1.1064 +{ 1.1065 + AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING); 1.1066 + return mPriMap->CountSecMaps(); 1.1067 +} 1.1068 + 1.1069 + 1.1070 +static 1.1071 +TaggedUWord DerefTUW(TaggedUWord aAddr, StackImage* aStackImg) 1.1072 +{ 1.1073 + if (!aAddr.Valid()) { 1.1074 + return TaggedUWord(); 1.1075 + } 1.1076 + if (aAddr.Value() < aStackImg->mStartAvma) { 1.1077 + return TaggedUWord(); 1.1078 + } 1.1079 + if (aAddr.Value() + sizeof(uintptr_t) > aStackImg->mStartAvma 1.1080 + + aStackImg->mLen) { 1.1081 + return TaggedUWord(); 1.1082 + } 1.1083 + return TaggedUWord(*(uintptr_t*)(aStackImg->mContents + aAddr.Value() 1.1084 + - aStackImg->mStartAvma)); 1.1085 +} 1.1086 + 1.1087 +static 1.1088 +TaggedUWord EvaluateReg(int16_t aReg, UnwindRegs* aOldRegs, TaggedUWord aCFA) 1.1089 +{ 1.1090 + switch (aReg) { 1.1091 + case DW_REG_CFA: return aCFA; 1.1092 +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) 1.1093 + case DW_REG_INTEL_XBP: return aOldRegs->xbp; 1.1094 + case DW_REG_INTEL_XSP: return aOldRegs->xsp; 1.1095 + case DW_REG_INTEL_XIP: return aOldRegs->xip; 1.1096 +#elif defined(LUL_ARCH_arm) 1.1097 + case DW_REG_ARM_R7: return aOldRegs->r7; 1.1098 + case DW_REG_ARM_R11: return aOldRegs->r11; 1.1099 + case DW_REG_ARM_R12: return aOldRegs->r12; 1.1100 + case DW_REG_ARM_R13: return aOldRegs->r13; 1.1101 + case DW_REG_ARM_R14: return aOldRegs->r14; 1.1102 + case DW_REG_ARM_R15: return aOldRegs->r15; 1.1103 +#else 1.1104 +# error "Unsupported arch" 1.1105 +#endif 1.1106 + default: MOZ_ASSERT(0); return TaggedUWord(); 1.1107 + } 1.1108 +} 1.1109 + 1.1110 +static 1.1111 +TaggedUWord EvaluateExpr(LExpr aExpr, UnwindRegs* aOldRegs, 1.1112 + TaggedUWord aCFA, StackImage* aStackImg) 1.1113 +{ 1.1114 + switch (aExpr.mHow) { 1.1115 + case LExpr::UNKNOWN: 1.1116 + return TaggedUWord(); 1.1117 + case LExpr::NODEREF: { 1.1118 + TaggedUWord tuw = EvaluateReg(aExpr.mReg, aOldRegs, aCFA); 1.1119 + tuw.Add(TaggedUWord((intptr_t)aExpr.mOffset)); 1.1120 + return tuw; 1.1121 + } 1.1122 + case LExpr::DEREF: { 1.1123 + TaggedUWord tuw = EvaluateReg(aExpr.mReg, aOldRegs, aCFA); 1.1124 + tuw.Add(TaggedUWord((intptr_t)aExpr.mOffset)); 1.1125 + return DerefTUW(tuw, aStackImg); 1.1126 + } 1.1127 + default: 1.1128 + MOZ_ASSERT(0); 1.1129 + return TaggedUWord(); 1.1130 + } 1.1131 +} 1.1132 + 1.1133 +static 1.1134 +void UseRuleSet(/*MOD*/UnwindRegs* aRegs, 1.1135 + StackImage* aStackImg, RuleSet* aRS) 1.1136 +{ 1.1137 + // Take a copy of regs, since we'll need to refer to the old values 1.1138 + // whilst computing the new ones. 1.1139 + UnwindRegs old_regs = *aRegs; 1.1140 + 1.1141 + // Mark all the current register values as invalid, so that the 1.1142 + // caller can see, on our return, which ones have been computed 1.1143 + // anew. If we don't even manage to compute a new PC value, then 1.1144 + // the caller will have to abandon the unwind. 1.1145 + // FIXME: Create and use instead: aRegs->SetAllInvalid(); 1.1146 +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) 1.1147 + aRegs->xbp = TaggedUWord(); 1.1148 + aRegs->xsp = TaggedUWord(); 1.1149 + aRegs->xip = TaggedUWord(); 1.1150 +#elif defined(LUL_ARCH_arm) 1.1151 + aRegs->r7 = TaggedUWord(); 1.1152 + aRegs->r11 = TaggedUWord(); 1.1153 + aRegs->r12 = TaggedUWord(); 1.1154 + aRegs->r13 = TaggedUWord(); 1.1155 + aRegs->r14 = TaggedUWord(); 1.1156 + aRegs->r15 = TaggedUWord(); 1.1157 +#else 1.1158 +# error "Unsupported arch" 1.1159 +#endif 1.1160 + 1.1161 + // This is generally useful. 1.1162 + const TaggedUWord inval = TaggedUWord(); 1.1163 + 1.1164 + // First, compute the CFA. 1.1165 + TaggedUWord cfa = EvaluateExpr(aRS->mCfaExpr, &old_regs, 1.1166 + inval/*old cfa*/, aStackImg); 1.1167 + 1.1168 + // If we didn't manage to compute the CFA, well .. that's ungood, 1.1169 + // but keep going anyway. It'll be OK provided none of the register 1.1170 + // value rules mention the CFA. In any case, compute the new values 1.1171 + // for each register that we're tracking. 1.1172 + 1.1173 +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) 1.1174 + aRegs->xbp = EvaluateExpr(aRS->mXbpExpr, &old_regs, cfa, aStackImg); 1.1175 + aRegs->xsp = EvaluateExpr(aRS->mXspExpr, &old_regs, cfa, aStackImg); 1.1176 + aRegs->xip = EvaluateExpr(aRS->mXipExpr, &old_regs, cfa, aStackImg); 1.1177 +#elif defined(LUL_ARCH_arm) 1.1178 + aRegs->r7 = EvaluateExpr(aRS->mR7expr, &old_regs, cfa, aStackImg); 1.1179 + aRegs->r11 = EvaluateExpr(aRS->mR11expr, &old_regs, cfa, aStackImg); 1.1180 + aRegs->r12 = EvaluateExpr(aRS->mR12expr, &old_regs, cfa, aStackImg); 1.1181 + aRegs->r13 = EvaluateExpr(aRS->mR13expr, &old_regs, cfa, aStackImg); 1.1182 + aRegs->r14 = EvaluateExpr(aRS->mR14expr, &old_regs, cfa, aStackImg); 1.1183 + aRegs->r15 = EvaluateExpr(aRS->mR15expr, &old_regs, cfa, aStackImg); 1.1184 +#else 1.1185 +# error "Unsupported arch" 1.1186 +#endif 1.1187 + 1.1188 + // We're done. Any regs for which we didn't manage to compute a 1.1189 + // new value will now be marked as invalid. 1.1190 +} 1.1191 + 1.1192 +void 1.1193 +LUL::Unwind(/*OUT*/uintptr_t* aFramePCs, 1.1194 + /*OUT*/uintptr_t* aFrameSPs, 1.1195 + /*OUT*/size_t* aFramesUsed, 1.1196 + /*OUT*/size_t* aScannedFramesAcquired, 1.1197 + size_t aFramesAvail, 1.1198 + size_t aScannedFramesAllowed, 1.1199 + UnwindRegs* aStartRegs, StackImage* aStackImg) 1.1200 +{ 1.1201 + AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_READING); 1.1202 + 1.1203 + pthread_t me = pthread_self(); 1.1204 + std::map<pthread_t, CFICache*>::iterator iter = mCaches.find(me); 1.1205 + 1.1206 + if (iter == mCaches.end()) { 1.1207 + // The calling thread is not registered for unwinding. 1.1208 + MOZ_CRASH(); 1.1209 + return; 1.1210 + } 1.1211 + 1.1212 + CFICache* cache = iter->second; 1.1213 + MOZ_ASSERT(cache); 1.1214 + 1.1215 + // Do unwindery, possibly modifying |cache|. 1.1216 + 1.1217 + ///////////////////////////////////////////////////////// 1.1218 + // BEGIN UNWIND 1.1219 + 1.1220 + *aFramesUsed = 0; 1.1221 + 1.1222 + UnwindRegs regs = *aStartRegs; 1.1223 + TaggedUWord last_valid_sp = TaggedUWord(); 1.1224 + 1.1225 + // Stack-scan control 1.1226 + unsigned int n_scanned_frames = 0; // # s-s frames recovered so far 1.1227 + static const int NUM_SCANNED_WORDS = 50; // max allowed scan length 1.1228 + 1.1229 + while (true) { 1.1230 + 1.1231 + if (DEBUG_MAIN) { 1.1232 + char buf[300]; 1.1233 + mLog("\n"); 1.1234 +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) 1.1235 + snprintf(buf, sizeof(buf), 1.1236 + "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n", 1.1237 + (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(), 1.1238 + (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(), 1.1239 + (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value()); 1.1240 + buf[sizeof(buf)-1] = 0; 1.1241 + mLog(buf); 1.1242 +#elif defined(LUL_ARCH_arm) 1.1243 + snprintf(buf, sizeof(buf), 1.1244 + "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx" 1.1245 + " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n", 1.1246 + (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(), 1.1247 + (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(), 1.1248 + (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(), 1.1249 + (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(), 1.1250 + (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(), 1.1251 + (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value()); 1.1252 + buf[sizeof(buf)-1] = 0; 1.1253 + mLog(buf); 1.1254 +#else 1.1255 +# error "Unsupported arch" 1.1256 +#endif 1.1257 + } 1.1258 + 1.1259 +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) 1.1260 + TaggedUWord ia = regs.xip; 1.1261 + TaggedUWord sp = regs.xsp; 1.1262 +#elif defined(LUL_ARCH_arm) 1.1263 + TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14); 1.1264 + TaggedUWord sp = regs.r13; 1.1265 +#else 1.1266 +# error "Unsupported arch" 1.1267 +#endif 1.1268 + 1.1269 + if (*aFramesUsed >= aFramesAvail) { 1.1270 + break; 1.1271 + } 1.1272 + 1.1273 + // If we don't have a valid value for the PC, give up. 1.1274 + if (!ia.Valid()) { 1.1275 + break; 1.1276 + } 1.1277 + 1.1278 + // If this is the innermost frame, record the SP value, which 1.1279 + // presumably is valid. If this isn't the innermost frame, and we 1.1280 + // have a valid SP value, check that its SP value isn't less that 1.1281 + // the one we've seen so far, so as to catch potential SP value 1.1282 + // cycles. 1.1283 + if (*aFramesUsed == 0) { 1.1284 + last_valid_sp = sp; 1.1285 + } else { 1.1286 + MOZ_ASSERT(last_valid_sp.Valid()); 1.1287 + if (sp.Valid()) { 1.1288 + if (sp.Value() < last_valid_sp.Value()) { 1.1289 + // Hmm, SP going in the wrong direction. Let's stop. 1.1290 + break; 1.1291 + } 1.1292 + // Remember where we got to. 1.1293 + last_valid_sp = sp; 1.1294 + } 1.1295 + } 1.1296 + 1.1297 + // For the innermost frame, the IA value is what we need. For all 1.1298 + // other frames, it's actually the return address, so back up one 1.1299 + // byte so as to get it into the calling instruction. 1.1300 + aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1); 1.1301 + aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0; 1.1302 + (*aFramesUsed)++; 1.1303 + 1.1304 + // Find the RuleSet for the current IA, if any. This will also 1.1305 + // query the backing (secondary) maps if it isn't found in the 1.1306 + // thread-local cache. 1.1307 + 1.1308 + // If this isn't the innermost frame, back up into the calling insn. 1.1309 + if (*aFramesUsed > 1) { 1.1310 + ia.Add(TaggedUWord((uintptr_t)(-1))); 1.1311 + } 1.1312 + 1.1313 + RuleSet* ruleset = cache->Lookup(ia.Value()); 1.1314 + if (DEBUG_MAIN) { 1.1315 + char buf[100]; 1.1316 + snprintf(buf, sizeof(buf), "ruleset for 0x%llx = %p\n", 1.1317 + (unsigned long long int)ia.Value(), ruleset); 1.1318 + buf[sizeof(buf)-1] = 0; 1.1319 + mLog(buf); 1.1320 + } 1.1321 + 1.1322 + ///////////////////////////////////////////// 1.1323 + //// 1.1324 + // On 32 bit x86-linux, syscalls are often done via the VDSO 1.1325 + // function __kernel_vsyscall, which doesn't have a corresponding 1.1326 + // object that we can read debuginfo from. That effectively kills 1.1327 + // off all stack traces for threads blocked in syscalls. Hence 1.1328 + // special-case by looking at the code surrounding the program 1.1329 + // counter. 1.1330 + // 1.1331 + // 0xf7757420 <__kernel_vsyscall+0>: push %ecx 1.1332 + // 0xf7757421 <__kernel_vsyscall+1>: push %edx 1.1333 + // 0xf7757422 <__kernel_vsyscall+2>: push %ebp 1.1334 + // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp 1.1335 + // 0xf7757425 <__kernel_vsyscall+5>: sysenter 1.1336 + // 0xf7757427 <__kernel_vsyscall+7>: nop 1.1337 + // 0xf7757428 <__kernel_vsyscall+8>: nop 1.1338 + // 0xf7757429 <__kernel_vsyscall+9>: nop 1.1339 + // 0xf775742a <__kernel_vsyscall+10>: nop 1.1340 + // 0xf775742b <__kernel_vsyscall+11>: nop 1.1341 + // 0xf775742c <__kernel_vsyscall+12>: nop 1.1342 + // 0xf775742d <__kernel_vsyscall+13>: nop 1.1343 + // 0xf775742e <__kernel_vsyscall+14>: int $0x80 1.1344 + // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp 1.1345 + // 0xf7757431 <__kernel_vsyscall+17>: pop %edx 1.1346 + // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx 1.1347 + // 0xf7757433 <__kernel_vsyscall+19>: ret 1.1348 + // 1.1349 + // In cases where the sampled thread is blocked in a syscall, its 1.1350 + // program counter will point at "pop %ebp". Hence we look for 1.1351 + // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and 1.1352 + // the corresponding register-recovery actions are: 1.1353 + // new_ebp = *(old_esp + 0) 1.1354 + // new eip = *(old_esp + 12) 1.1355 + // new_esp = old_esp + 16 1.1356 + // 1.1357 + // It may also be the case that the program counter points two 1.1358 + // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in 1.1359 + // the case where the syscall has been restarted but the thread 1.1360 + // hasn't been rescheduled. The code below doesn't handle that; 1.1361 + // it could easily be made to. 1.1362 + // 1.1363 +#if defined(LUL_PLAT_x86_android) || defined(LUL_PLAT_x86_linux) 1.1364 + if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) { 1.1365 + uintptr_t insns_min, insns_max; 1.1366 + uintptr_t eip = ia.Value(); 1.1367 + bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip); 1.1368 + if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) { 1.1369 + uint8_t* eipC = (uint8_t*)eip; 1.1370 + if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D && 1.1371 + eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) { 1.1372 + TaggedUWord sp_plus_0 = sp; 1.1373 + TaggedUWord sp_plus_12 = sp; 1.1374 + TaggedUWord sp_plus_16 = sp; 1.1375 + sp_plus_12.Add(TaggedUWord(12)); 1.1376 + sp_plus_16.Add(TaggedUWord(16)); 1.1377 + TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg); 1.1378 + TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg); 1.1379 + TaggedUWord new_esp = sp_plus_16; 1.1380 + if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) { 1.1381 + regs.xbp = new_ebp; 1.1382 + regs.xip = new_eip; 1.1383 + regs.xsp = new_esp; 1.1384 + continue; 1.1385 + } 1.1386 + } 1.1387 + } 1.1388 + } 1.1389 +#endif 1.1390 + //// 1.1391 + ///////////////////////////////////////////// 1.1392 + 1.1393 + // So, do we have a ruleset for this address? If so, use it now. 1.1394 + if (ruleset) { 1.1395 + 1.1396 + if (DEBUG_MAIN) { 1.1397 + ruleset->Print(mLog); mLog("\n"); 1.1398 + } 1.1399 + // Use the RuleSet to compute the registers for the previous 1.1400 + // frame. |regs| is modified in-place. 1.1401 + UseRuleSet(®s, aStackImg, ruleset); 1.1402 + 1.1403 + } else { 1.1404 + 1.1405 + // There's no RuleSet for the specified address, so see if 1.1406 + // it's possible to get anywhere by stack-scanning. 1.1407 + 1.1408 + // Use stack scanning frugally. 1.1409 + if (n_scanned_frames++ >= aScannedFramesAllowed) { 1.1410 + break; 1.1411 + } 1.1412 + 1.1413 + // We can't scan the stack without a valid, aligned stack pointer. 1.1414 + if (!sp.IsAligned()) { 1.1415 + break; 1.1416 + } 1.1417 + 1.1418 + bool scan_succeeded = false; 1.1419 + for (int i = 0; i < NUM_SCANNED_WORDS; ++i) { 1.1420 + TaggedUWord aWord = DerefTUW(sp, aStackImg); 1.1421 + // aWord is something we fished off the stack. It should be 1.1422 + // valid, unless we overran the stack bounds. 1.1423 + if (!aWord.Valid()) { 1.1424 + break; 1.1425 + } 1.1426 + 1.1427 + // Now, does aWord point inside a text section and immediately 1.1428 + // after something that looks like a call instruction? 1.1429 + if (mPriMap->MaybeIsReturnPoint(aWord, mSegArray)) { 1.1430 + // Yes it does. Update the unwound registers heuristically, 1.1431 + // using the same schemes as Breakpad does. 1.1432 + scan_succeeded = true; 1.1433 + (*aScannedFramesAcquired)++; 1.1434 + 1.1435 +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) 1.1436 + // The same logic applies for the 32- and 64-bit cases. 1.1437 + // Register names of the form xsp etc refer to (eg) esp in 1.1438 + // the 32-bit case and rsp in the 64-bit case. 1.1439 +# if defined(LUL_ARCH_x64) 1.1440 + const int wordSize = 8; 1.1441 +# else 1.1442 + const int wordSize = 4; 1.1443 +# endif 1.1444 + // The return address -- at XSP -- will have been pushed by 1.1445 + // the CALL instruction. So the caller's XSP value 1.1446 + // immediately before and after that CALL instruction is the 1.1447 + // word above XSP. 1.1448 + regs.xsp = sp; 1.1449 + regs.xsp.Add(TaggedUWord(wordSize)); 1.1450 + 1.1451 + // aWord points at the return point, so back up one byte 1.1452 + // to put it in the calling instruction. 1.1453 + regs.xip = aWord; 1.1454 + regs.xip.Add(TaggedUWord((uintptr_t)(-1))); 1.1455 + 1.1456 + // Computing a new value from the frame pointer is more tricky. 1.1457 + if (regs.xbp.Valid() && 1.1458 + sp.Valid() && regs.xbp.Value() == sp.Value() - wordSize) { 1.1459 + // One possibility is that the callee begins with the standard 1.1460 + // preamble "push %xbp; mov %xsp, %xbp". In which case, the 1.1461 + // (1) caller's XBP value will be at the word below XSP, and 1.1462 + // (2) the current (callee's) XBP will point at that word: 1.1463 + regs.xbp = DerefTUW(regs.xbp, aStackImg); 1.1464 + } else if (regs.xbp.Valid() && 1.1465 + sp.Valid() && regs.xbp.Value() >= sp.Value() + wordSize) { 1.1466 + // If that didn't work out, maybe the callee didn't change 1.1467 + // XBP, so it still holds the caller's value. For that to 1.1468 + // be plausible, XBP will need to have a value at least 1.1469 + // higher than XSP since that holds the purported return 1.1470 + // address. In which case do nothing, since XBP already 1.1471 + // holds the "right" value. 1.1472 + } else { 1.1473 + // Mark XBP as invalid, so that subsequent unwind iterations 1.1474 + // don't assume it holds valid data. 1.1475 + regs.xbp = TaggedUWord(); 1.1476 + } 1.1477 + 1.1478 + // Move on to the next word up the stack 1.1479 + sp.Add(TaggedUWord(wordSize)); 1.1480 + 1.1481 +#elif defined(LUL_ARCH_arm) 1.1482 + // Set all registers to be undefined, except for SP(R13) and 1.1483 + // PC(R15). 1.1484 + 1.1485 + // aWord points either at the return point, if returning to 1.1486 + // ARM code, or one insn past the return point if returning 1.1487 + // to Thumb code. In both cases, aWord-2 is guaranteed to 1.1488 + // fall within the calling instruction. 1.1489 + regs.r15 = aWord; 1.1490 + regs.r15.Add(TaggedUWord((uintptr_t)(-2))); 1.1491 + 1.1492 + // Make SP be the word above the location where the return 1.1493 + // address was found. 1.1494 + regs.r13 = sp; 1.1495 + regs.r13.Add(TaggedUWord(4)); 1.1496 + 1.1497 + // All other regs are undefined. 1.1498 + regs.r7 = regs.r11 = regs.r12 = regs.r14 = TaggedUWord(); 1.1499 + 1.1500 + // Move on to the next word up the stack 1.1501 + sp.Add(TaggedUWord(4)); 1.1502 + 1.1503 +#else 1.1504 +# error "Unknown plat" 1.1505 +#endif 1.1506 + 1.1507 + break; 1.1508 + } 1.1509 + 1.1510 + } // for (int i = 0; i < NUM_SCANNED_WORDS; i++) 1.1511 + 1.1512 + // We tried to make progress by scanning the stack, but failed. 1.1513 + // So give up -- fall out of the top level unwind loop. 1.1514 + if (!scan_succeeded) { 1.1515 + break; 1.1516 + } 1.1517 + } 1.1518 + 1.1519 + } // top level unwind loop 1.1520 + 1.1521 + // END UNWIND 1.1522 + ///////////////////////////////////////////////////////// 1.1523 +} 1.1524 + 1.1525 + 1.1526 +void 1.1527 +LUL::InvalidateCFICaches() 1.1528 +{ 1.1529 + // NB: the caller must hold m_rwl for writing. 1.1530 + 1.1531 + // FIXME: this could get expensive. Use a bool to remember when the 1.1532 + // caches have been invalidated and hence avoid duplicate invalidations. 1.1533 + for (std::map<pthread_t,CFICache*>::iterator iter = mCaches.begin(); 1.1534 + iter != mCaches.end(); 1.1535 + ++iter) { 1.1536 + iter->second->Invalidate(); 1.1537 + } 1.1538 +} 1.1539 + 1.1540 + 1.1541 +//////////////////////////////////////////////////////////////// 1.1542 +// LUL Unit Testing // 1.1543 +//////////////////////////////////////////////////////////////// 1.1544 + 1.1545 +static const int LUL_UNIT_TEST_STACK_SIZE = 16384; 1.1546 + 1.1547 +// This function is innermost in the test call sequence. It uses LUL 1.1548 +// to unwind, and compares the result with the sequence specified in 1.1549 +// the director string. These need to agree in order for the test to 1.1550 +// pass. In order not to screw up the results, this function needs 1.1551 +// to have a not-very big stack frame, since we're only presenting 1.1552 +// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and 1.1553 +// that chunk unavoidably includes the frame for this function. 1.1554 +// 1.1555 +// This function must not be inlined into its callers. Doing so will 1.1556 +// cause the expected-vs-actual backtrace consistency checking to 1.1557 +// fail. Prints summary results to |aLUL|'s logging sink and also 1.1558 +// returns a boolean indicating whether or not the test passed. 1.1559 +static __attribute__((noinline)) 1.1560 +bool GetAndCheckStackTrace(LUL* aLUL, const char* dstring) 1.1561 +{ 1.1562 + // Get hold of the current unwind-start registers. 1.1563 + UnwindRegs startRegs; 1.1564 + memset(&startRegs, 0, sizeof(startRegs)); 1.1565 +#if defined(LUL_PLAT_x64_linux) 1.1566 + volatile uintptr_t block[3]; 1.1567 + MOZ_ASSERT(sizeof(block) == 24); 1.1568 + __asm__ __volatile__( 1.1569 + "leaq 0(%%rip), %%r15" "\n\t" 1.1570 + "movq %%r15, 0(%0)" "\n\t" 1.1571 + "movq %%rsp, 8(%0)" "\n\t" 1.1572 + "movq %%rbp, 16(%0)" "\n" 1.1573 + : : "r"(&block[0]) : "memory", "r15" 1.1574 + ); 1.1575 + startRegs.xip = TaggedUWord(block[0]); 1.1576 + startRegs.xsp = TaggedUWord(block[1]); 1.1577 + startRegs.xbp = TaggedUWord(block[2]); 1.1578 + const uintptr_t REDZONE_SIZE = 128; 1.1579 + uintptr_t start = block[1] - REDZONE_SIZE; 1.1580 +#elif defined(LUL_PLAT_x86_linux) || defined(LUL_PLAT_x86_android) 1.1581 + volatile uintptr_t block[3]; 1.1582 + MOZ_ASSERT(sizeof(block) == 12); 1.1583 + __asm__ __volatile__( 1.1584 + ".byte 0xE8,0x00,0x00,0x00,0x00"/*call next insn*/ "\n\t" 1.1585 + "popl %%edi" "\n\t" 1.1586 + "movl %%edi, 0(%0)" "\n\t" 1.1587 + "movl %%esp, 4(%0)" "\n\t" 1.1588 + "movl %%ebp, 8(%0)" "\n" 1.1589 + : : "r"(&block[0]) : "memory", "edi" 1.1590 + ); 1.1591 + startRegs.xip = TaggedUWord(block[0]); 1.1592 + startRegs.xsp = TaggedUWord(block[1]); 1.1593 + startRegs.xbp = TaggedUWord(block[2]); 1.1594 + const uintptr_t REDZONE_SIZE = 0; 1.1595 + uintptr_t start = block[1] - REDZONE_SIZE; 1.1596 +#elif defined(LUL_PLAT_arm_android) 1.1597 + volatile uintptr_t block[6]; 1.1598 + MOZ_ASSERT(sizeof(block) == 24); 1.1599 + __asm__ __volatile__( 1.1600 + "mov r0, r15" "\n\t" 1.1601 + "str r0, [%0, #0]" "\n\t" 1.1602 + "str r14, [%0, #4]" "\n\t" 1.1603 + "str r13, [%0, #8]" "\n\t" 1.1604 + "str r12, [%0, #12]" "\n\t" 1.1605 + "str r11, [%0, #16]" "\n\t" 1.1606 + "str r7, [%0, #20]" "\n" 1.1607 + : : "r"(&block[0]) : "memory", "r0" 1.1608 + ); 1.1609 + startRegs.r15 = TaggedUWord(block[0]); 1.1610 + startRegs.r14 = TaggedUWord(block[1]); 1.1611 + startRegs.r13 = TaggedUWord(block[2]); 1.1612 + startRegs.r12 = TaggedUWord(block[3]); 1.1613 + startRegs.r11 = TaggedUWord(block[4]); 1.1614 + startRegs.r7 = TaggedUWord(block[5]); 1.1615 + const uintptr_t REDZONE_SIZE = 0; 1.1616 + uintptr_t start = block[1] - REDZONE_SIZE; 1.1617 +#else 1.1618 +# error "Unsupported platform" 1.1619 +#endif 1.1620 + 1.1621 + // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the 1.1622 + // stack. 1.1623 + uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE; 1.1624 + uintptr_t ws = sizeof(void*); 1.1625 + start &= ~(ws-1); 1.1626 + end &= ~(ws-1); 1.1627 + uintptr_t nToCopy = end - start; 1.1628 + if (nToCopy > lul::N_STACK_BYTES) { 1.1629 + nToCopy = lul::N_STACK_BYTES; 1.1630 + } 1.1631 + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); 1.1632 + StackImage* stackImg = new StackImage(); 1.1633 + stackImg->mLen = nToCopy; 1.1634 + stackImg->mStartAvma = start; 1.1635 + if (nToCopy > 0) { 1.1636 + MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy); 1.1637 + memcpy(&stackImg->mContents[0], (void*)start, nToCopy); 1.1638 + } 1.1639 + 1.1640 + // Unwind it. 1.1641 + const int MAX_TEST_FRAMES = 64; 1.1642 + uintptr_t framePCs[MAX_TEST_FRAMES]; 1.1643 + uintptr_t frameSPs[MAX_TEST_FRAMES]; 1.1644 + size_t framesAvail = mozilla::ArrayLength(framePCs); 1.1645 + size_t framesUsed = 0; 1.1646 + size_t scannedFramesAllowed = 0; 1.1647 + size_t scannedFramesAcquired = 0; 1.1648 + aLUL->Unwind( &framePCs[0], &frameSPs[0], 1.1649 + &framesUsed, &scannedFramesAcquired, 1.1650 + framesAvail, scannedFramesAllowed, 1.1651 + &startRegs, stackImg ); 1.1652 + 1.1653 + delete stackImg; 1.1654 + 1.1655 + //if (0) { 1.1656 + // // Show what we have. 1.1657 + // fprintf(stderr, "Got %d frames:\n", (int)framesUsed); 1.1658 + // for (size_t i = 0; i < framesUsed; i++) { 1.1659 + // fprintf(stderr, " [%2d] SP %p PC %p\n", 1.1660 + // (int)i, (void*)frameSPs[i], (void*)framePCs[i]); 1.1661 + // } 1.1662 + // fprintf(stderr, "\n"); 1.1663 + //} 1.1664 + 1.1665 + // Check to see if there's a consistent binding between digits in 1.1666 + // the director string ('1' .. '8') and the PC values acquired by 1.1667 + // the unwind. If there isn't, the unwinding has failed somehow. 1.1668 + uintptr_t binding[8]; // binding for '1' .. binding for '8' 1.1669 + memset((void*)binding, 0, sizeof(binding)); 1.1670 + 1.1671 + // The general plan is to work backwards along the director string 1.1672 + // and forwards along the framePCs array. Doing so corresponds to 1.1673 + // working outwards from the innermost frame of the recursive test set. 1.1674 + const char* cursor = dstring; 1.1675 + 1.1676 + // Find the end. This leaves |cursor| two bytes past the first 1.1677 + // character we want to look at -- see comment below. 1.1678 + while (*cursor) cursor++; 1.1679 + 1.1680 + // Counts the number of consistent frames. 1.1681 + size_t nConsistent = 0; 1.1682 + 1.1683 + // Iterate back to the start of the director string. The starting 1.1684 + // points are a bit complex. We can't use framePCs[0] because that 1.1685 + // contains the PC in this frame (above). We can't use framePCs[1] 1.1686 + // because that will contain the PC at return point in the recursive 1.1687 + // test group (TestFn[1-8]) for their call "out" to this function, 1.1688 + // GetAndCheckStackTrace. Although LUL will compute a correct 1.1689 + // return address, that will not be the same return address as for a 1.1690 + // recursive call out of the the function to another function in the 1.1691 + // group. Hence we can only start consistency checking at 1.1692 + // framePCs[2]. 1.1693 + // 1.1694 + // To be consistent, then, we must ignore the last element in the 1.1695 + // director string as that corresponds to framePCs[1]. Hence the 1.1696 + // start points are: framePCs[2] and the director string 2 bytes 1.1697 + // before the terminating zero. 1.1698 + // 1.1699 + // Also as a result of this, the number of consistent frames counted 1.1700 + // will always be one less than the length of the director string 1.1701 + // (not including its terminating zero). 1.1702 + size_t frameIx; 1.1703 + for (cursor = cursor-2, frameIx = 2; 1.1704 + cursor >= dstring && frameIx < framesUsed; 1.1705 + cursor--, frameIx++) { 1.1706 + char c = *cursor; 1.1707 + uintptr_t pc = framePCs[frameIx]; 1.1708 + // If this doesn't hold, the director string is ill-formed. 1.1709 + MOZ_ASSERT(c >= '1' && c <= '8'); 1.1710 + int n = ((int)c) - ((int)'1'); 1.1711 + if (binding[n] == 0) { 1.1712 + // There's no binding for |c| yet, so install |pc| and carry on. 1.1713 + binding[n] = pc; 1.1714 + nConsistent++; 1.1715 + continue; 1.1716 + } 1.1717 + // There's a pre-existing binding for |c|. Check it's consistent. 1.1718 + if (binding[n] != pc) { 1.1719 + // Not consistent. Give up now. 1.1720 + break; 1.1721 + } 1.1722 + // Consistent. Keep going. 1.1723 + nConsistent++; 1.1724 + } 1.1725 + 1.1726 + // So, did we succeed? 1.1727 + bool passed = nConsistent+1 == strlen(dstring); 1.1728 + 1.1729 + // Show the results. 1.1730 + char buf[200]; 1.1731 + snprintf(buf, sizeof(buf), "LULUnitTest: dstring = %s\n", dstring); 1.1732 + buf[sizeof(buf)-1] = 0; 1.1733 + aLUL->mLog(buf); 1.1734 + snprintf(buf, sizeof(buf), 1.1735 + "LULUnitTest: %d consistent, %d in dstring: %s\n", 1.1736 + (int)nConsistent, (int)strlen(dstring), 1.1737 + passed ? "PASS" : "FAIL"); 1.1738 + buf[sizeof(buf)-1] = 0; 1.1739 + aLUL->mLog(buf); 1.1740 + 1.1741 + return passed; 1.1742 +} 1.1743 + 1.1744 + 1.1745 +// Macro magic to create a set of 8 mutually recursive functions with 1.1746 +// varying frame sizes. These will recurse amongst themselves as 1.1747 +// specified by |strP|, the directory string, and call 1.1748 +// GetAndCheckStackTrace when the string becomes empty, passing it the 1.1749 +// original value of the string. This checks the result, printing 1.1750 +// results on |aLUL|'s logging sink, and also returns a boolean 1.1751 +// indicating whether or not the results are acceptable (correct). 1.1752 + 1.1753 +#define DECL_TEST_FN(NAME) \ 1.1754 + bool NAME(LUL* aLUL, const char* strPorig, const char* strP); 1.1755 + 1.1756 +#define GEN_TEST_FN(NAME, FRAMESIZE) \ 1.1757 + bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \ 1.1758 + volatile char space[FRAMESIZE]; \ 1.1759 + memset((char*)&space[0], 0, sizeof(space)); \ 1.1760 + if (*strP == '\0') { \ 1.1761 + /* We've come to the end of the director string. */ \ 1.1762 + /* Take a stack snapshot. */ \ 1.1763 + return GetAndCheckStackTrace(aLUL, strPorig); \ 1.1764 + } else { \ 1.1765 + /* Recurse onwards. This is a bit subtle. The obvious */ \ 1.1766 + /* thing to do here is call onwards directly, from within the */ \ 1.1767 + /* arms of the case statement. That gives a problem in that */ \ 1.1768 + /* there will be multiple return points inside each function when */ \ 1.1769 + /* unwinding, so it will be difficult to check for consistency */ \ 1.1770 + /* against the director string. Instead, we make an indirect */ \ 1.1771 + /* call, so as to guarantee that there is only one call site */ \ 1.1772 + /* within each function. This does assume that the compiler */ \ 1.1773 + /* won't transform it back to the simple direct-call form. */ \ 1.1774 + /* To discourage it from doing so, the call is bracketed with */ \ 1.1775 + /* __asm__ __volatile__ sections so as to make it not-movable. */ \ 1.1776 + bool (*nextFn)(LUL*, const char*, const char*) = NULL; \ 1.1777 + switch (*strP) { \ 1.1778 + case '1': nextFn = TestFn1; break; \ 1.1779 + case '2': nextFn = TestFn2; break; \ 1.1780 + case '3': nextFn = TestFn3; break; \ 1.1781 + case '4': nextFn = TestFn4; break; \ 1.1782 + case '5': nextFn = TestFn5; break; \ 1.1783 + case '6': nextFn = TestFn6; break; \ 1.1784 + case '7': nextFn = TestFn7; break; \ 1.1785 + case '8': nextFn = TestFn8; break; \ 1.1786 + default: nextFn = TestFn8; break; \ 1.1787 + } \ 1.1788 + __asm__ __volatile__("":::"cc","memory"); \ 1.1789 + bool passed = nextFn(aLUL, strPorig, strP+1); \ 1.1790 + __asm__ __volatile__("":::"cc","memory"); \ 1.1791 + return passed; \ 1.1792 + } \ 1.1793 + } 1.1794 + 1.1795 +// The test functions are mutually recursive, so it is necessary to 1.1796 +// declare them before defining them. 1.1797 +DECL_TEST_FN(TestFn1) 1.1798 +DECL_TEST_FN(TestFn2) 1.1799 +DECL_TEST_FN(TestFn3) 1.1800 +DECL_TEST_FN(TestFn4) 1.1801 +DECL_TEST_FN(TestFn5) 1.1802 +DECL_TEST_FN(TestFn6) 1.1803 +DECL_TEST_FN(TestFn7) 1.1804 +DECL_TEST_FN(TestFn8) 1.1805 + 1.1806 +GEN_TEST_FN(TestFn1, 123) 1.1807 +GEN_TEST_FN(TestFn2, 456) 1.1808 +GEN_TEST_FN(TestFn3, 789) 1.1809 +GEN_TEST_FN(TestFn4, 23) 1.1810 +GEN_TEST_FN(TestFn5, 47) 1.1811 +GEN_TEST_FN(TestFn6, 117) 1.1812 +GEN_TEST_FN(TestFn7, 1) 1.1813 +GEN_TEST_FN(TestFn8, 99) 1.1814 + 1.1815 + 1.1816 +// This starts the test sequence going. Call here to generate a 1.1817 +// sequence of calls as directed by the string |dstring|. The call 1.1818 +// sequence will, from its innermost frame, finish by calling 1.1819 +// GetAndCheckStackTrace() and passing it |dstring|. 1.1820 +// GetAndCheckStackTrace() will unwind the stack, check consistency 1.1821 +// of those results against |dstring|, and print a pass/fail message 1.1822 +// to aLUL's logging sink. It also updates the counters in *aNTests 1.1823 +// and aNTestsPassed. 1.1824 +__attribute__((noinline)) void 1.1825 +TestUnw(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, 1.1826 + LUL* aLUL, const char* dstring) 1.1827 +{ 1.1828 + // Ensure that the stack has at least this much space on it. This 1.1829 + // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes 1.1830 + // and hand it to LUL. Safe in the sense that no segfault can 1.1831 + // happen because the stack is at least this big. This is all 1.1832 + // somewhat dubious in the sense that a sufficiently clever compiler 1.1833 + // (clang, for one) can figure out that space[] is unused and delete 1.1834 + // it from the frame. Hence the somewhat elaborate hoop jumping to 1.1835 + // fill it up before the call and to at least appear to use the 1.1836 + // value afterwards. 1.1837 + int i; 1.1838 + volatile char space[LUL_UNIT_TEST_STACK_SIZE]; 1.1839 + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { 1.1840 + space[i] = (char)(i & 0x7F); 1.1841 + } 1.1842 + 1.1843 + // Really run the test. 1.1844 + bool passed = TestFn1(aLUL, dstring, dstring); 1.1845 + 1.1846 + // Appear to use space[], by visiting the value to compute some kind 1.1847 + // of checksum, and then (apparently) using the checksum. 1.1848 + int sum = 0; 1.1849 + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { 1.1850 + // If this doesn't fool LLVM, I don't know what will. 1.1851 + sum += space[i] - 3*i; 1.1852 + } 1.1853 + __asm__ __volatile__("" : : "r"(sum)); 1.1854 + 1.1855 + // Update the counters. 1.1856 + (*aNTests)++; 1.1857 + if (passed) { 1.1858 + (*aNTestsPassed)++; 1.1859 + } 1.1860 +} 1.1861 + 1.1862 + 1.1863 +void 1.1864 +RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL) 1.1865 +{ 1.1866 + aLUL->mLog(":\n"); 1.1867 + aLUL->mLog("LULUnitTest: BEGIN\n"); 1.1868 + *aNTests = *aNTestsPassed = 0; 1.1869 + TestUnw(aNTests, aNTestsPassed, aLUL, "11111111"); 1.1870 + TestUnw(aNTests, aNTestsPassed, aLUL, "11222211"); 1.1871 + TestUnw(aNTests, aNTestsPassed, aLUL, "111222333"); 1.1872 + TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212"); 1.1873 + TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258"); 1.1874 + TestUnw(aNTests, aNTestsPassed, aLUL, 1.1875 + "123456781122334455667788777777777777777777777"); 1.1876 + aLUL->mLog("LULUnitTest: END\n"); 1.1877 + aLUL->mLog(":\n"); 1.1878 +} 1.1879 + 1.1880 + 1.1881 +} // namespace lul