tools/profiler/LulMain.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #include "LulMain.h"
     9 #include <string.h>
    10 #include <stdlib.h>
    11 #include <stdio.h>
    13 #include <algorithm>  // std::sort
    14 #include <string>
    16 #include "mozilla/Assertions.h"
    17 #include "mozilla/ArrayUtils.h"
    18 #include "mozilla/MemoryChecking.h"
    20 #include "LulCommonExt.h"
    21 #include "LulElfExt.h"
    23 #include "LulMainInt.h"
    25 // Set this to 1 for verbose logging
    26 #define DEBUG_MAIN 0
    29 namespace lul {
    31 using std::string;
    32 using std::vector;
    35 ////////////////////////////////////////////////////////////////
    36 // AutoLulRWLocker                                            //
    37 ////////////////////////////////////////////////////////////////
    39 // This is a simple RAII class that manages acquisition and release of
    40 // LulRWLock reader-writer locks.
    42 class AutoLulRWLocker {
    43 public:
    44   enum AcqMode { FOR_READING, FOR_WRITING };
    45   AutoLulRWLocker(LulRWLock* aRWLock, AcqMode mode)
    46     : mRWLock(aRWLock)
    47   {
    48     if (mode == FOR_WRITING) {
    49       aRWLock->WrLock();
    50     } else {
    51       aRWLock->RdLock();
    52     }
    53   }
    54   ~AutoLulRWLocker()
    55   {
    56     mRWLock->Unlock();
    57   }
    59 private:
    60   LulRWLock* mRWLock;
    61 };
    64 ////////////////////////////////////////////////////////////////
    65 // RuleSet                                                    //
    66 ////////////////////////////////////////////////////////////////
    68 static const char* 
    69 NameOf_DW_REG(int16_t aReg)
    70 {
    71   switch (aReg) {
    72     case DW_REG_CFA:       return "cfa";
    73 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
    74     case DW_REG_INTEL_XBP: return "xbp";
    75     case DW_REG_INTEL_XSP: return "xsp";
    76     case DW_REG_INTEL_XIP: return "xip";
    77 #elif defined(LUL_ARCH_arm)
    78     case DW_REG_ARM_R7:    return "r7";
    79     case DW_REG_ARM_R11:   return "r11";
    80     case DW_REG_ARM_R12:   return "r12";
    81     case DW_REG_ARM_R13:   return "r13";
    82     case DW_REG_ARM_R14:   return "r14";
    83     case DW_REG_ARM_R15:   return "r15";
    84 #else
    85 # error "Unsupported arch"
    86 #endif
    87     default: return "???";
    88   }
    89 }
    91 static string
    92 ShowRule(const char* aNewReg, LExpr aExpr)
    93 {
    94   char buf[64];
    95   string res = string(aNewReg) + "=";
    96   switch (aExpr.mHow) {
    97     case LExpr::UNKNOWN:
    98       res += "Unknown";
    99       break;
   100     case LExpr::NODEREF:
   101       sprintf(buf, "%s+%d", NameOf_DW_REG(aExpr.mReg), (int)aExpr.mOffset);
   102       res += buf;
   103       break;
   104     case LExpr::DEREF:
   105       sprintf(buf, "*(%s+%d)", NameOf_DW_REG(aExpr.mReg), (int)aExpr.mOffset);
   106       res += buf;
   107       break;
   108     default:
   109       res += "???";
   110       break;
   111   }
   112   return res;
   113 }
   115 void
   116 RuleSet::Print(void(*aLog)(const char*))
   117 {
   118   char buf[96];
   119   sprintf(buf, "[%llx .. %llx]: let ",
   120           (unsigned long long int)mAddr,
   121           (unsigned long long int)(mAddr + mLen - 1));
   122   string res = string(buf);
   123   res += ShowRule("cfa", mCfaExpr);
   124   res += " in";
   125   // For each reg we care about, print the recovery expression.
   126 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
   127   res += ShowRule(" RA", mXipExpr);
   128   res += ShowRule(" SP", mXspExpr);
   129   res += ShowRule(" BP", mXbpExpr);
   130 #elif defined(LUL_ARCH_arm)
   131   res += ShowRule(" R15", mR15expr);
   132   res += ShowRule(" R7",  mR7expr);
   133   res += ShowRule(" R11", mR11expr);
   134   res += ShowRule(" R12", mR12expr);
   135   res += ShowRule(" R13", mR13expr);
   136   res += ShowRule(" R14", mR14expr);
   137 #else
   138 # error "Unsupported arch"
   139 #endif
   140   aLog(res.c_str());
   141 }
   143 LExpr*
   144 RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
   145   switch (aRegno) {
   146     case DW_REG_CFA: return &mCfaExpr;
   147 #   if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
   148     case DW_REG_INTEL_XIP: return &mXipExpr;
   149     case DW_REG_INTEL_XSP: return &mXspExpr;
   150     case DW_REG_INTEL_XBP: return &mXbpExpr;
   151 #   elif defined(LUL_ARCH_arm)
   152     case DW_REG_ARM_R15:   return &mR15expr;
   153     case DW_REG_ARM_R14:   return &mR14expr;
   154     case DW_REG_ARM_R13:   return &mR13expr;
   155     case DW_REG_ARM_R12:   return &mR12expr;
   156     case DW_REG_ARM_R11:   return &mR11expr;
   157     case DW_REG_ARM_R7:    return &mR7expr;
   158 #   else
   159 #     error "Unknown arch"
   160 #   endif
   161     default: return nullptr;
   162   }
   163 }
   165 RuleSet::RuleSet()
   166 {
   167   mAddr = 0;
   168   mLen  = 0;
   169   // The only other fields are of type LExpr and those are initialised
   170   // by LExpr::LExpr().
   171 }
   174 ////////////////////////////////////////////////////////////////
   175 // SecMap                                                     //
   176 ////////////////////////////////////////////////////////////////
   178 // See header file LulMainInt.h for comments about invariants.
   180 SecMap::SecMap(void(*aLog)(const char*))
   181   : mSummaryMinAddr(1)
   182   , mSummaryMaxAddr(0)
   183   , mUsable(true)
   184   , mLog(aLog)
   185 {}
   187 SecMap::~SecMap() {
   188   mRuleSets.clear();
   189 }
   191 RuleSet*
   192 SecMap::FindRuleSet(uintptr_t ia) {
   193   // Binary search mRuleSets to find one that brackets |ia|.
   194   // lo and hi need to be signed, else the loop termination tests
   195   // don't work properly.  Note that this works correctly even when
   196   // mRuleSets.size() == 0.
   198   // Can't do this until the array has been sorted and preened.
   199   MOZ_ASSERT(mUsable);
   201   long int lo = 0;
   202   long int hi = (long int)mRuleSets.size() - 1;
   203   while (true) {
   204     // current unsearched space is from lo to hi, inclusive.
   205     if (lo > hi) {
   206       // not found
   207       return nullptr;
   208     }
   209     long int  mid         = lo + ((hi - lo) / 2);
   210     RuleSet*  mid_ruleSet = &mRuleSets[mid];
   211     uintptr_t mid_minAddr = mid_ruleSet->mAddr;
   212     uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
   213     if (ia < mid_minAddr) { hi = mid-1; continue; }
   214     if (ia > mid_maxAddr) { lo = mid+1; continue; }
   215     MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
   216     return mid_ruleSet;
   217   }
   218   // NOTREACHED
   219 }
   221 // Add a RuleSet to the collection.  The rule is copied in.  Calling
   222 // this makes the map non-searchable.
   223 void
   224 SecMap::AddRuleSet(RuleSet* rs) {
   225   mUsable = false;
   226   mRuleSets.push_back(*rs);
   227 }
   230 static bool
   231 CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
   232   return rs1.mAddr < rs2.mAddr;
   233 }
   235 // Prepare the map for searching.  Completely remove any which don't
   236 // fall inside the specified range [start, +len).
   237 void
   238 SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen)
   239 {
   240   if (mRuleSets.empty()) {
   241     return;
   242   }
   244   MOZ_ASSERT(aLen > 0);
   245   if (aLen == 0) {
   246     // This should never happen.
   247     mRuleSets.clear();
   248     return;
   249   }
   251   // Sort by start addresses.
   252   std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);
   254   // Detect any entry not completely contained within [start, +len).
   255   // Set its length to zero, so that the next pass will remove it.
   256   for (size_t i = 0; i < mRuleSets.size(); ++i) {
   257     RuleSet* rs = &mRuleSets[i];
   258     if (rs->mLen > 0 &&
   259         (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
   260       rs->mLen = 0;
   261     }
   262   }
   264   // Iteratively truncate any overlaps and remove any zero length
   265   // entries that might result, or that may have been present
   266   // initially.  Unless the input is seriously screwy, this is
   267   // expected to iterate only once.
   268   while (true) {
   269     size_t i;
   270     size_t n = mRuleSets.size();
   271     size_t nZeroLen = 0;
   273     if (n == 0) {
   274       break;
   275     }
   277     for (i = 1; i < n; ++i) {
   278       RuleSet* prev = &mRuleSets[i-1];
   279       RuleSet* here = &mRuleSets[i];
   280       MOZ_ASSERT(prev->mAddr <= here->mAddr);
   281       if (prev->mAddr + prev->mLen > here->mAddr) {
   282         prev->mLen = here->mAddr - prev->mAddr;
   283       }
   284       if (prev->mLen == 0)
   285         nZeroLen++;
   286     }
   288     if (mRuleSets[n-1].mLen == 0) {
   289       nZeroLen++;
   290     }
   292     // At this point, the entries are in-order and non-overlapping.
   293     // If none of them are zero-length, we are done.
   294     if (nZeroLen == 0) {
   295       break;
   296     }
   298     // Slide back the entries to remove the zero length ones.
   299     size_t j = 0;  // The write-point.
   300     for (i = 0; i < n; ++i) {
   301       if (mRuleSets[i].mLen == 0) {
   302         continue;
   303       }
   304       if (j != i) mRuleSets[j] = mRuleSets[i];
   305       ++j;
   306     }
   307     MOZ_ASSERT(i == n);
   308     MOZ_ASSERT(nZeroLen <= n);
   309     MOZ_ASSERT(j == n - nZeroLen);
   310     while (nZeroLen > 0) {
   311       mRuleSets.pop_back();
   312       nZeroLen--;
   313     }
   315     MOZ_ASSERT(mRuleSets.size() == j);
   316   }
   318   size_t n = mRuleSets.size();
   320 #ifdef DEBUG
   321   // Do a final check on the rules: their address ranges must be
   322   // ascending, non overlapping, non zero sized.
   323   if (n > 0) {
   324     MOZ_ASSERT(mRuleSets[0].mLen > 0);
   325     for (size_t i = 1; i < n; ++i) {
   326       RuleSet* prev = &mRuleSets[i-1];
   327       RuleSet* here = &mRuleSets[i];
   328       MOZ_ASSERT(prev->mAddr < here->mAddr);
   329       MOZ_ASSERT(here->mLen > 0);
   330       MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
   331     }
   332   }
   333 #endif
   335   // Set the summary min and max address values.
   336   if (n == 0) {
   337     // Use the values defined in comments in the class declaration.
   338     mSummaryMinAddr = 1;
   339     mSummaryMaxAddr = 0;
   340   } else {
   341     mSummaryMinAddr = mRuleSets[0].mAddr;
   342     mSummaryMaxAddr = mRuleSets[n-1].mAddr + mRuleSets[n-1].mLen - 1;
   343   }
   344   char buf[150];
   345   snprintf(buf, sizeof(buf),
   346            "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
   347            (int)n, (unsigned long long int)mSummaryMinAddr,
   348                    (unsigned long long int)mSummaryMaxAddr);
   349   buf[sizeof(buf)-1] = 0;
   350   mLog(buf);
   352   // Is now usable for binary search.
   353   mUsable = true;
   355   if (0) {
   356     mLog("\nRulesets after preening\n");
   357     for (size_t i = 0; i < mRuleSets.size(); ++i) {
   358       mRuleSets[i].Print(mLog);
   359       mLog("\n");
   360     }
   361     mLog("\n");
   362   }
   363 }
   365 bool SecMap::IsEmpty() {
   366   return mRuleSets.empty();
   367 }
   370 ////////////////////////////////////////////////////////////////
   371 // SegArray                                                   //
   372 ////////////////////////////////////////////////////////////////
   374 // A SegArray holds a set of address ranges that together exactly
   375 // cover an address range, with no overlaps or holes.  Each range has
   376 // an associated value, which in this case has been specialised to be
   377 // a simple boolean.  The representation is kept to minimal canonical
   378 // form in which adjacent ranges with the same associated value are
   379 // merged together.  Each range is represented by a |struct Seg|.
   380 //
   381 // SegArrays are used to keep track of which parts of the address
   382 // space are known to contain instructions.
   383 class SegArray {
   385  public:
   386   void add(uintptr_t lo, uintptr_t hi, bool val) {
   387     if (lo > hi) {
   388       return;
   389     }
   390     split_at(lo);
   391     if (hi < UINTPTR_MAX) {
   392       split_at(hi+1);
   393     }
   394     std::vector<Seg>::size_type iLo, iHi, i;
   395     iLo = find(lo);
   396     iHi = find(hi);
   397     for (i = iLo; i <= iHi; ++i) {
   398       mSegs[i].val = val;
   399     }
   400     preen();
   401   }
   403   bool getBoundingCodeSegment(/*OUT*/uintptr_t* rx_min,
   404                               /*OUT*/uintptr_t* rx_max, uintptr_t addr) {
   405     std::vector<Seg>::size_type i = find(addr);
   406     if (!mSegs[i].val) {
   407       return false;
   408     }
   409     *rx_min = mSegs[i].lo;
   410     *rx_max = mSegs[i].hi;
   411     return true;
   412   }
   414   SegArray() {
   415     Seg s(0, UINTPTR_MAX, false);
   416     mSegs.push_back(s);
   417   }
   419  private:
   420   struct Seg {
   421     Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
   422     uintptr_t lo;
   423     uintptr_t hi;
   424     bool val;
   425   };
   427   void preen() {
   428     for (std::vector<Seg>::iterator iter = mSegs.begin();
   429          iter < mSegs.end()-1;
   430          ++iter) {
   431       if (iter[0].val != iter[1].val) {
   432         continue;
   433       }
   434       iter[0].hi = iter[1].hi;
   435       mSegs.erase(iter+1);
   436       // Back up one, so as not to miss an opportunity to merge
   437       // with the entry after this one.
   438       --iter;
   439     }
   440   }
   442   std::vector<Seg>::size_type find(uintptr_t a) {
   443     long int lo = 0;
   444     long int hi = (long int)mSegs.size();
   445     while (true) {
   446       // The unsearched space is lo .. hi inclusive.
   447       if (lo > hi) {
   448         // Not found.  This can't happen.
   449         return (std::vector<Seg>::size_type)(-1);
   450       }
   451       long int  mid    = lo + ((hi - lo) / 2);
   452       uintptr_t mid_lo = mSegs[mid].lo;
   453       uintptr_t mid_hi = mSegs[mid].hi;
   454       if (a < mid_lo) { hi = mid-1; continue; }
   455       if (a > mid_hi) { lo = mid+1; continue; }
   456       return (std::vector<Seg>::size_type)mid;
   457     }
   458   }
   460   void split_at(uintptr_t a) {
   461     std::vector<Seg>::size_type i = find(a);
   462     if (mSegs[i].lo == a) {
   463       return;
   464     }
   465     mSegs.insert( mSegs.begin()+i+1, mSegs[i] );
   466     mSegs[i].hi = a-1;
   467     mSegs[i+1].lo = a;
   468   }
   470   void show() {
   471     printf("<< %d entries:\n", (int)mSegs.size());
   472     for (std::vector<Seg>::iterator iter = mSegs.begin();
   473          iter < mSegs.end();
   474          ++iter) {
   475       printf("  %016llx  %016llx  %s\n",
   476              (unsigned long long int)(*iter).lo,
   477              (unsigned long long int)(*iter).hi,
   478              (*iter).val ? "true" : "false");
   479     }
   480     printf(">>\n");
   481   }
   483   std::vector<Seg> mSegs;
   484 };
   487 ////////////////////////////////////////////////////////////////
   488 // PriMap                                                     //
   489 ////////////////////////////////////////////////////////////////
   491 class PriMap {
   492  public:
   493   PriMap(void (*aLog)(const char*))
   494     : mLog(aLog)
   495   {}
   497   ~PriMap() {
   498     for (std::vector<SecMap*>::iterator iter = mSecMaps.begin();
   499          iter != mSecMaps.end();
   500          ++iter) {
   501       delete *iter;
   502     }
   503     mSecMaps.clear();
   504   }
   506   // This can happen with the global lock held for reading.
   507   RuleSet* Lookup(uintptr_t ia) {
   508     SecMap* sm = FindSecMap(ia);
   509     return sm ? sm->FindRuleSet(ia) : nullptr;
   510   }
   512   // Add a secondary map.  No overlaps allowed w.r.t. existing
   513   // secondary maps.  Global lock must be held for writing.
   514   void AddSecMap(SecMap* aSecMap) {
   515     // We can't add an empty SecMap to the PriMap.  But that's OK
   516     // since we'd never be able to find anything in it anyway.
   517     if (aSecMap->IsEmpty()) {
   518       return;
   519     }
   521     // Iterate through the SecMaps and find the right place for this
   522     // one.  At the same time, ensure that the in-order
   523     // non-overlapping invariant is preserved (and, generally, holds).
   524     // FIXME: this gives a cost that is O(N^2) in the total number of
   525     // shared objects in the system.  ToDo: better.
   526     MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);
   528     size_t num_secMaps = mSecMaps.size();
   529     uintptr_t i;
   530     for (i = 0; i < num_secMaps; ++i) {
   531       SecMap* sm_i = mSecMaps[i];
   532       MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
   533       if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
   534         // |aSecMap| needs to be inserted immediately before mSecMaps[i].
   535         break;
   536       }
   537     }
   538     MOZ_ASSERT(i <= num_secMaps);
   539     if (i == num_secMaps) {
   540       // It goes at the end.
   541       mSecMaps.push_back(aSecMap);
   542     } else {
   543       std::vector<SecMap*>::iterator iter = mSecMaps.begin() + i;
   544       mSecMaps.insert(iter, aSecMap);
   545     }
   546     char buf[100];
   547     snprintf(buf, sizeof(buf), "AddSecMap: now have %d SecMaps\n",
   548              (int)mSecMaps.size());
   549     buf[sizeof(buf)-1] = 0;
   550     mLog(buf);
   551   }
   553   // Remove and delete any SecMaps in the mapping, that intersect
   554   // with the specified address range.
   555   void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
   556     MOZ_ASSERT(avma_min <= avma_max);
   557     size_t num_secMaps = mSecMaps.size();
   558     if (num_secMaps > 0) {
   559       intptr_t i;
   560       // Iterate from end to start over the vector, so as to ensure
   561       // that the special case where |avma_min| and |avma_max| denote
   562       // the entire address space, can be completed in time proportional
   563       // to the number of elements in the map.
   564       for (i = (intptr_t)num_secMaps-1; i >= 0; i--) {
   565         SecMap* sm_i = mSecMaps[i];
   566         if (sm_i->mSummaryMaxAddr < avma_min ||
   567             avma_max < sm_i->mSummaryMinAddr) {
   568           // There's no overlap.  Move on.
   569           continue;
   570         }
   571         // We need to remove mSecMaps[i] and slide all those above it
   572         // downwards to cover the hole.
   573         mSecMaps.erase(mSecMaps.begin() + i);
   574         delete sm_i;
   575       }
   576     }
   577   }
   579   // Return the number of currently contained SecMaps.
   580   size_t CountSecMaps() {
   581     return mSecMaps.size();
   582   }
   584   // Assess heuristically whether the given address is an instruction
   585   // immediately following a call instruction.  The caller is required
   586   // to hold the global lock for reading.
   587   bool MaybeIsReturnPoint(TaggedUWord aInstrAddr, SegArray* aSegArray) {
   588     if (!aInstrAddr.Valid()) {
   589       return false;
   590     }
   592     uintptr_t ia = aInstrAddr.Value();
   594     // Assume that nobody would be crazy enough to put code in the
   595     // first or last page.
   596     if (ia < 4096 || ((uintptr_t)(-ia)) < 4096) {
   597       return false;
   598     }
   600     // See if it falls inside a known r-x mapped area.  Poking around
   601     // outside such places risks segfaulting.
   602     uintptr_t insns_min, insns_max;
   603     bool b = aSegArray->getBoundingCodeSegment(&insns_min, &insns_max, ia);
   604     if (!b) {
   605       // no code (that we know about) at this address
   606       return false;
   607     }
   609     // |ia| falls within an r-x range.  So we can
   610     // safely poke around in [insns_min, insns_max].
   612 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
   613     // Is the previous instruction recognisably a CALL?  This is
   614     // common for the 32- and 64-bit versions, except for the
   615     // simm32(%rip) case, which is 64-bit only.
   616     //
   617     // For all other cases, the 64 bit versions are either identical
   618     // to the 32 bit versions, or have an optional extra leading REX.W
   619     // byte (0x41).  Since the extra 0x41 is optional we have to
   620     // ignore it, with the convenient result that the same matching
   621     // logic works for both 32- and 64-bit cases.
   623     uint8_t* p = (uint8_t*)ia;
   624 #   if defined(LUL_ARCH_x64)
   625     // CALL simm32(%rip)  == FF15 simm32
   626     if (ia - 6 >= insns_min && p[-6] == 0xFF && p[-5] == 0x15) {
   627       return true;
   628     }
   629 #   endif
   630     // CALL rel32  == E8 rel32  (both 32- and 64-bit)
   631     if (ia - 5 >= insns_min && p[-5] == 0xE8) {
   632       return true;
   633     }
   634     // CALL *%eax .. CALL *%edi  ==   FFD0 ..   FFD7  (32-bit)
   635     // CALL *%rax .. CALL *%rdi  ==   FFD0 ..   FFD7  (64-bit)
   636     // CALL *%r8  .. CALL *%r15  == 41FFD0 .. 41FFD7  (64-bit)
   637     if (ia - 2 >= insns_min &&
   638         p[-2] == 0xFF && p[-1] >= 0xD0 && p[-1] <= 0xD7) {
   639       return true;
   640     }
   641     // Almost all of the remaining cases that occur in practice are
   642     // of the form CALL *simm8(reg) or CALL *simm32(reg).
   643     //
   644     // 64 bit cases:
   645     //
   646     // call  *simm8(%rax)         FF50   simm8
   647     // call  *simm8(%rcx)         FF51   simm8
   648     // call  *simm8(%rdx)         FF52   simm8
   649     // call  *simm8(%rbx)         FF53   simm8
   650     // call  *simm8(%rsp)         FF5424 simm8
   651     // call  *simm8(%rbp)         FF55   simm8
   652     // call  *simm8(%rsi)         FF56   simm8
   653     // call  *simm8(%rdi)         FF57   simm8
   654     //
   655     // call  *simm8(%r8)        41FF50   simm8
   656     // call  *simm8(%r9)        41FF51   simm8
   657     // call  *simm8(%r10)       41FF52   simm8
   658     // call  *simm8(%r11)       41FF53   simm8
   659     // call  *simm8(%r12)       41FF5424 simm8
   660     // call  *simm8(%r13)       41FF55   simm8
   661     // call  *simm8(%r14)       41FF56   simm8
   662     // call  *simm8(%r15)       41FF57   simm8
   663     //
   664     // call  *simm32(%rax)        FF90   simm32
   665     // call  *simm32(%rcx)        FF91   simm32
   666     // call  *simm32(%rdx)        FF92   simm32
   667     // call  *simm32(%rbx)        FF93   simm32
   668     // call  *simm32(%rsp)        FF9424 simm32
   669     // call  *simm32(%rbp)        FF95   simm32
   670     // call  *simm32(%rsi)        FF96   simm32
   671     // call  *simm32(%rdi)        FF97   simm32
   672     //
   673     // call  *simm32(%r8)       41FF90   simm32
   674     // call  *simm32(%r9)       41FF91   simm32
   675     // call  *simm32(%r10)      41FF92   simm32
   676     // call  *simm32(%r11)      41FF93   simm32
   677     // call  *simm32(%r12)      41FF9424 simm32
   678     // call  *simm32(%r13)      41FF95   simm32
   679     // call  *simm32(%r14)      41FF96   simm32
   680     // call  *simm32(%r15)      41FF97   simm32
   681     //
   682     // 32 bit cases:
   683     //
   684     // call  *simm8(%eax)         FF50   simm8
   685     // call  *simm8(%ecx)         FF51   simm8
   686     // call  *simm8(%edx)         FF52   simm8
   687     // call  *simm8(%ebx)         FF53   simm8
   688     // call  *simm8(%esp)         FF5424 simm8
   689     // call  *simm8(%ebp)         FF55   simm8
   690     // call  *simm8(%esi)         FF56   simm8
   691     // call  *simm8(%edi)         FF57   simm8
   692     //
   693     // call  *simm32(%eax)        FF90   simm32
   694     // call  *simm32(%ecx)        FF91   simm32
   695     // call  *simm32(%edx)        FF92   simm32
   696     // call  *simm32(%ebx)        FF93   simm32
   697     // call  *simm32(%esp)        FF9424 simm32
   698     // call  *simm32(%ebp)        FF95   simm32
   699     // call  *simm32(%esi)        FF96   simm32
   700     // call  *simm32(%edi)        FF97   simm32
   701     if (ia - 3 >= insns_min &&
   702         p[-3] == 0xFF &&
   703         (p[-2] >= 0x50 && p[-2] <= 0x57 && p[-2] != 0x54)) {
   704       // imm8 case, not including %esp/%rsp
   705       return true;
   706     }
   707     if (ia - 4 >= insns_min &&
   708         p[-4] == 0xFF && p[-3] == 0x54 && p[-2] == 0x24) {
   709       // imm8 case for %esp/%rsp
   710       return true;
   711     }
   712     if (ia - 6 >= insns_min &&
   713         p[-6] == 0xFF &&
   714         (p[-5] >= 0x90 && p[-5] <= 0x97 && p[-5] != 0x94)) {
   715       // imm32 case, not including %esp/%rsp
   716       return true;
   717     }
   718     if (ia - 7 >= insns_min &&
   719         p[-7] == 0xFF && p[-6] == 0x94 && p[-5] == 0x24) {
   720       // imm32 case for %esp/%rsp
   721       return true;
   722     }
   724 #elif defined(LUL_ARCH_arm)
   725     if (ia & 1) {
   726       uint16_t w0 = 0, w1 = 0;
   727       // The return address has its lowest bit set, indicating a return
   728       // to Thumb code.
   729       ia &= ~(uintptr_t)1;
   730       if (ia - 2 >= insns_min && ia - 1 <= insns_max) {
   731         w1 = *(uint16_t*)(ia - 2);
   732       }
   733       if (ia - 4 >= insns_min && ia - 1 <= insns_max) {
   734         w0 = *(uint16_t*)(ia - 4);
   735       }
   736       // Is it a 32-bit Thumb call insn?
   737       // BL  simm26 (Encoding T1)
   738       if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) {
   739         return true;
   740       }
   741       // BLX simm26 (Encoding T2)
   742       if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) {
   743         return true;
   744       }
   745       // Other possible cases:
   746       // (BLX Rm, Encoding T1).
   747       // BLX Rm (encoding T1, 16 bit, inspect w1 and ignore w0.)
   748       // 0100 0111 1 Rm 000
   749     } else {
   750       // Returning to ARM code.
   751       uint32_t a0 = 0;
   752       if ((ia & 3) == 0 && ia - 4 >= insns_min && ia - 1 <= insns_max) {
   753         a0 = *(uint32_t*)(ia - 4);
   754       }
   755       // Leading E forces unconditional only -- fix.  It could be
   756       // anything except F, which is the deprecated NV code.
   757       // BL simm26 (Encoding A1)
   758       if ((a0 & 0xFF000000) == 0xEB000000) {
   759         return true;
   760       }
   761       // Other possible cases:
   762       // BLX simm26 (Encoding A2)
   763       //if ((a0 & 0xFE000000) == 0xFA000000)
   764       //  return true;
   765       // BLX (register) (A1): BLX <c> <Rm>
   766       // cond 0001 0010 1111 1111 1111 0011 Rm
   767       // again, cond can be anything except NV (0xF)
   768     }
   770 #else
   771 # error "Unsupported arch"
   772 #endif
   774     // Not an insn we recognise.
   775     return false;
   776   }
   778  private:
   779   // FindSecMap's caller must hold the global lock for reading or writing.
   780   SecMap* FindSecMap(uintptr_t ia) {
   781     // Binary search mSecMaps to find one that brackets |ia|.
   782     // lo and hi need to be signed, else the loop termination tests
   783     // don't work properly.
   784     long int lo = 0;
   785     long int hi = (long int)mSecMaps.size() - 1;
   786     while (true) {
   787       // current unsearched space is from lo to hi, inclusive.
   788       if (lo > hi) {
   789         // not found
   790         return nullptr;
   791       }
   792       long int  mid         = lo + ((hi - lo) / 2);
   793       SecMap*   mid_secMap  = mSecMaps[mid];
   794       uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
   795       uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
   796       if (ia < mid_minAddr) { hi = mid-1; continue; }
   797       if (ia > mid_maxAddr) { lo = mid+1; continue; }
   798       MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
   799       return mid_secMap;
   800     }
   801     // NOTREACHED
   802   }
   804  private:
   805   // sorted array of per-object ranges, non overlapping, non empty
   806   std::vector<SecMap*> mSecMaps;
   808   // a logging sink, for debugging.
   809   void (*mLog)(const char*);
   810 };
   813 ////////////////////////////////////////////////////////////////
   814 // CFICache                                                   //
   815 ////////////////////////////////////////////////////////////////
   817 // This is the thread-local cache.  It maps individual insn AVMAs to
   818 // the associated CFI record, which live in LUL::mPriMap.
   819 //
   820 // The cache is a direct map hash table indexed by address.
   821 // It has to distinguish 3 cases:
   822 //
   823 // (1) .mRSet == (RuleSet*)0  ==> cache slot not in use
   824 // (2) .mRSet == (RuleSet*)1  ==> slot in use, no RuleSet avail
   825 // (3) .mRSet >  (RuleSet*)1  ==> slot in use, RuleSet* available
   826 //
   827 // Distinguishing between (2) and (3) is important, because if we look
   828 // up an address in LUL::mPriMap and find there is no RuleSet, then
   829 // that fact needs to cached here, so as to avoid potentially
   830 // expensive repeat lookups.
   832 // A CFICacheEntry::mRSet value of zero indicates that the slot is not
   833 // in use, and a value of one indicates that the slot is in use but
   834 // there is no RuleSet available.
   835 #define ENTRY_NOT_IN_USE      ((RuleSet*)0)
   836 #define NO_RULESET_AVAILABLE  ((RuleSet*)1)
   838 class CFICache {
   839  public:
   841   CFICache(PriMap* aPriMap) {
   842     Invalidate();
   843     mPriMap = aPriMap;
   844   }
   846   void Invalidate() {
   847     for (int i = 0; i < N_ENTRIES; ++i) {
   848       mCache[i].mAVMA = 0;
   849       mCache[i].mRSet = ENTRY_NOT_IN_USE;
   850     }
   851   }
   853   RuleSet* Lookup(uintptr_t ia) {
   854     uintptr_t hash = ia % (uintptr_t)N_ENTRIES;
   855     CFICacheEntry* ce = &mCache[hash];
   856     if (ce->mAVMA == ia) {
   857       // The cache has an entry for |ia|.  Interpret it.
   858       if (ce->mRSet > NO_RULESET_AVAILABLE) {
   859         // There's a RuleSet.  So return it.
   860         return ce->mRSet;
   861       }
   862       if (ce->mRSet == NO_RULESET_AVAILABLE) {
   863         // There's no RuleSet for this address.  Don't update
   864         // the cache, since we might get queried again.
   865         return nullptr;
   866       }
   867       // Otherwise, the slot is not in use.  Fall through to
   868       // the 'miss' case.
   869     }
   871     // The cache entry is for some other address, or is not in use.
   872     // Update it.  If it can be found in the priMap then install it
   873     // as-is.  Else put NO_RULESET_AVAILABLE in, so as to indicate
   874     // there's no info for this address.
   875     RuleSet* fallback  = mPriMap->Lookup(ia);
   876     mCache[hash].mAVMA = ia;
   877     mCache[hash].mRSet = fallback ? fallback : NO_RULESET_AVAILABLE;
   878     return fallback;
   879   }
   881  private:
   882   // This should be a prime number.
   883   static const int N_ENTRIES = 509;
   885   // See comment above for the meaning of these entries.
   886   struct CFICacheEntry {
   887     uintptr_t mAVMA; // AVMA of the associated instruction
   888     RuleSet*  mRSet; // RuleSet* for the instruction
   889   };
   890   CFICacheEntry mCache[N_ENTRIES];
   892   // Need to have a pointer to the PriMap, so as to be able
   893   // to service misses.
   894   PriMap* mPriMap;
   895 };
   897 #undef ENTRY_NOT_IN_USE
   898 #undef NO_RULESET_AVAILABLE
   901 ////////////////////////////////////////////////////////////////
   902 // LUL                                                        //
   903 ////////////////////////////////////////////////////////////////
   905 LUL::LUL(void (*aLog)(const char*))
   906 {
   907   mRWlock = new LulRWLock();
   908   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
   909   mLog = aLog;
   910   mPriMap = new PriMap(aLog);
   911   mSegArray = new SegArray();
   912 }
   915 LUL::~LUL()
   916 {
   917   // The auto-locked section must have its own scope, so that the
   918   // unlock is performed before the mRWLock is deleted.
   919   {
   920     AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
   921     for (std::map<pthread_t,CFICache*>::iterator iter = mCaches.begin();
   922          iter != mCaches.end();
   923          ++iter) {
   924       delete iter->second;
   925     }
   926     delete mPriMap;
   927     delete mSegArray;
   928     mLog = nullptr;
   929   }
   930   // Now we don't hold the lock.  Hence it is safe to delete it.
   931   delete mRWlock;
   932 }
   935 void
   936 LUL::RegisterUnwinderThread()
   937 {
   938   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
   940   pthread_t me = pthread_self();
   941   CFICache* cache = new CFICache(mPriMap);
   943   std::pair<std::map<pthread_t,CFICache*>::iterator, bool> res
   944     = mCaches.insert(std::pair<pthread_t,CFICache*>(me, cache));
   945   // "this thread is not already registered"
   946   MOZ_ASSERT(res.second); // "new element was inserted"
   947   // Using mozilla::DebugOnly to declare |res| leads to compilation error
   948   (void)res.second;
   949 }
   951 void
   952 LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize,
   953                     const char* aFileName, const void* aMappedImage)
   954 {
   955   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
   957   mLog(":\n");
   958   char buf[200];
   959   snprintf(buf, sizeof(buf), "NotifyMap %llx %llu %s\n",
   960            (unsigned long long int)aRXavma, (unsigned long long int)aSize,
   961            aFileName);
   962   buf[sizeof(buf)-1] = 0;
   963   mLog(buf);
   965   InvalidateCFICaches();
   967   // Ignore obviously-stupid notifications.
   968   if (aSize > 0) {
   970     // Here's a new mapping, for this object.
   971     SecMap* smap = new SecMap(mLog);
   973     // Read CFI or EXIDX unwind data into |smap|.
   974     if (!aMappedImage) {
   975       (void)lul::ReadSymbolData(
   976               string(aFileName), std::vector<string>(), smap,
   977               (void*)aRXavma, mLog);
   978     } else {
   979       (void)lul::ReadSymbolDataInternal(
   980               (const uint8_t*)aMappedImage,
   981               string(aFileName), std::vector<string>(), smap,
   982               (void*)aRXavma, mLog);
   983     }
   985     mLog("NotifyMap .. preparing entries\n");
   987     smap->PrepareRuleSets(aRXavma, aSize);
   989     snprintf(buf, sizeof(buf),
   990              "NotifyMap got %lld entries\n", (long long int)smap->Size());
   991     buf[sizeof(buf)-1] = 0;
   992     mLog(buf);
   994     // Add it to the primary map (the top level set of mapped objects).
   995     mPriMap->AddSecMap(smap);
   997     // Tell the segment array about the mapping, so that the stack
   998     // scan and __kernel_syscall mechanisms know where valid code is.
   999     mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
  1004 void
  1005 LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize)
  1007   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
  1009   mLog(":\n");
  1010   char buf[200];
  1011   snprintf(buf, sizeof(buf), "NotifyExecutableArea %llx %llu\n",
  1012            (unsigned long long int)aRXavma, (unsigned long long int)aSize);
  1013   buf[sizeof(buf)-1] = 0;
  1014   mLog(buf);
  1016   InvalidateCFICaches();
  1018   // Ignore obviously-stupid notifications.
  1019   if (aSize > 0) {
  1020     // Tell the segment array about the mapping, so that the stack
  1021     // scan and __kernel_syscall mechanisms know where valid code is.
  1022     mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
  1027 void
  1028 LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax)
  1030   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
  1032   mLog(":\n");
  1033   char buf[100];
  1034   snprintf(buf, sizeof(buf), "NotifyUnmap %016llx-%016llx\n",
  1035            (unsigned long long int)aRXavmaMin,
  1036            (unsigned long long int)aRXavmaMax);
  1037   buf[sizeof(buf)-1] = 0;
  1038   mLog(buf);
  1040   MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
  1042   InvalidateCFICaches();
  1044   // Remove from the primary map, any secondary maps that intersect
  1045   // with the address range.  Also delete the secondary maps.
  1046   mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
  1048   // Tell the segment array that the address range no longer
  1049   // contains valid code.
  1050   mSegArray->add(aRXavmaMin, aRXavmaMax, false);
  1052   snprintf(buf, sizeof(buf), "NotifyUnmap: now have %d SecMaps\n",
  1053            (int)mPriMap->CountSecMaps());
  1054   buf[sizeof(buf)-1] = 0;
  1055   mLog(buf);
  1059 size_t
  1060 LUL::CountMappings()
  1062   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
  1063   return mPriMap->CountSecMaps();
  1067 static
  1068 TaggedUWord DerefTUW(TaggedUWord aAddr, StackImage* aStackImg)
  1070   if (!aAddr.Valid()) {
  1071     return TaggedUWord();
  1073   if (aAddr.Value() < aStackImg->mStartAvma) {
  1074     return TaggedUWord();
  1076   if (aAddr.Value() + sizeof(uintptr_t) > aStackImg->mStartAvma
  1077                                           + aStackImg->mLen) {
  1078     return TaggedUWord();
  1080   return TaggedUWord(*(uintptr_t*)(aStackImg->mContents + aAddr.Value()
  1081                                    - aStackImg->mStartAvma));
  1084 static
  1085 TaggedUWord EvaluateReg(int16_t aReg, UnwindRegs* aOldRegs, TaggedUWord aCFA)
  1087   switch (aReg) {
  1088     case DW_REG_CFA:       return aCFA;
  1089 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
  1090     case DW_REG_INTEL_XBP: return aOldRegs->xbp;
  1091     case DW_REG_INTEL_XSP: return aOldRegs->xsp;
  1092     case DW_REG_INTEL_XIP: return aOldRegs->xip;
  1093 #elif defined(LUL_ARCH_arm)
  1094     case DW_REG_ARM_R7:    return aOldRegs->r7;
  1095     case DW_REG_ARM_R11:   return aOldRegs->r11;
  1096     case DW_REG_ARM_R12:   return aOldRegs->r12;
  1097     case DW_REG_ARM_R13:   return aOldRegs->r13;
  1098     case DW_REG_ARM_R14:   return aOldRegs->r14;
  1099     case DW_REG_ARM_R15:   return aOldRegs->r15;
  1100 #else
  1101 # error "Unsupported arch"
  1102 #endif
  1103     default: MOZ_ASSERT(0); return TaggedUWord();
  1107 static
  1108 TaggedUWord EvaluateExpr(LExpr aExpr, UnwindRegs* aOldRegs,
  1109                          TaggedUWord aCFA, StackImage* aStackImg)
  1111   switch (aExpr.mHow) {
  1112     case LExpr::UNKNOWN:
  1113       return TaggedUWord();
  1114     case LExpr::NODEREF: {
  1115       TaggedUWord tuw = EvaluateReg(aExpr.mReg, aOldRegs, aCFA);
  1116       tuw.Add(TaggedUWord((intptr_t)aExpr.mOffset));
  1117       return tuw;
  1119     case LExpr::DEREF: {
  1120       TaggedUWord tuw = EvaluateReg(aExpr.mReg, aOldRegs, aCFA);
  1121       tuw.Add(TaggedUWord((intptr_t)aExpr.mOffset));
  1122       return DerefTUW(tuw, aStackImg);
  1124     default:
  1125       MOZ_ASSERT(0);
  1126       return TaggedUWord();
  1130 static
  1131 void UseRuleSet(/*MOD*/UnwindRegs* aRegs,
  1132                 StackImage* aStackImg, RuleSet* aRS)
  1134   // Take a copy of regs, since we'll need to refer to the old values
  1135   // whilst computing the new ones.
  1136   UnwindRegs old_regs = *aRegs;
  1138   // Mark all the current register values as invalid, so that the
  1139   // caller can see, on our return, which ones have been computed
  1140   // anew.  If we don't even manage to compute a new PC value, then
  1141   // the caller will have to abandon the unwind.
  1142   // FIXME: Create and use instead: aRegs->SetAllInvalid();
  1143 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
  1144   aRegs->xbp = TaggedUWord();
  1145   aRegs->xsp = TaggedUWord();
  1146   aRegs->xip = TaggedUWord();
  1147 #elif defined(LUL_ARCH_arm)
  1148   aRegs->r7  = TaggedUWord();
  1149   aRegs->r11 = TaggedUWord();
  1150   aRegs->r12 = TaggedUWord();
  1151   aRegs->r13 = TaggedUWord();
  1152   aRegs->r14 = TaggedUWord();
  1153   aRegs->r15 = TaggedUWord();
  1154 #else
  1155 #  error "Unsupported arch"
  1156 #endif
  1158   // This is generally useful.
  1159   const TaggedUWord inval = TaggedUWord();
  1161   // First, compute the CFA.
  1162   TaggedUWord cfa = EvaluateExpr(aRS->mCfaExpr, &old_regs,
  1163                                  inval/*old cfa*/, aStackImg);
  1165   // If we didn't manage to compute the CFA, well .. that's ungood,
  1166   // but keep going anyway.  It'll be OK provided none of the register
  1167   // value rules mention the CFA.  In any case, compute the new values
  1168   // for each register that we're tracking.
  1170 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
  1171   aRegs->xbp = EvaluateExpr(aRS->mXbpExpr, &old_regs, cfa, aStackImg);
  1172   aRegs->xsp = EvaluateExpr(aRS->mXspExpr, &old_regs, cfa, aStackImg);
  1173   aRegs->xip = EvaluateExpr(aRS->mXipExpr, &old_regs, cfa, aStackImg);
  1174 #elif defined(LUL_ARCH_arm)
  1175   aRegs->r7  = EvaluateExpr(aRS->mR7expr,  &old_regs, cfa, aStackImg);
  1176   aRegs->r11 = EvaluateExpr(aRS->mR11expr, &old_regs, cfa, aStackImg);
  1177   aRegs->r12 = EvaluateExpr(aRS->mR12expr, &old_regs, cfa, aStackImg);
  1178   aRegs->r13 = EvaluateExpr(aRS->mR13expr, &old_regs, cfa, aStackImg);
  1179   aRegs->r14 = EvaluateExpr(aRS->mR14expr, &old_regs, cfa, aStackImg);
  1180   aRegs->r15 = EvaluateExpr(aRS->mR15expr, &old_regs, cfa, aStackImg);
  1181 #else
  1182 # error "Unsupported arch"
  1183 #endif
  1185   // We're done.  Any regs for which we didn't manage to compute a
  1186   // new value will now be marked as invalid.
  1189 void
  1190 LUL::Unwind(/*OUT*/uintptr_t* aFramePCs,
  1191             /*OUT*/uintptr_t* aFrameSPs,
  1192             /*OUT*/size_t* aFramesUsed, 
  1193             /*OUT*/size_t* aScannedFramesAcquired,
  1194             size_t aFramesAvail,
  1195             size_t aScannedFramesAllowed,
  1196             UnwindRegs* aStartRegs, StackImage* aStackImg)
  1198   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_READING);
  1200   pthread_t me = pthread_self();
  1201   std::map<pthread_t, CFICache*>::iterator iter = mCaches.find(me);
  1203   if (iter == mCaches.end()) {
  1204     // The calling thread is not registered for unwinding.
  1205     MOZ_CRASH();
  1206     return;
  1209   CFICache* cache = iter->second;
  1210   MOZ_ASSERT(cache);
  1212   // Do unwindery, possibly modifying |cache|.
  1214   /////////////////////////////////////////////////////////
  1215   // BEGIN UNWIND
  1217   *aFramesUsed = 0;
  1219   UnwindRegs  regs          = *aStartRegs;
  1220   TaggedUWord last_valid_sp = TaggedUWord();
  1222   // Stack-scan control
  1223   unsigned int n_scanned_frames      = 0;  // # s-s frames recovered so far
  1224   static const int NUM_SCANNED_WORDS = 50; // max allowed scan length
  1226   while (true) {
  1228     if (DEBUG_MAIN) {
  1229       char buf[300];
  1230       mLog("\n");
  1231 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
  1232       snprintf(buf, sizeof(buf),
  1233                "LoopTop: rip %d/%llx  rsp %d/%llx  rbp %d/%llx\n",
  1234                (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
  1235                (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
  1236                (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
  1237       buf[sizeof(buf)-1] = 0;
  1238       mLog(buf);
  1239 #elif defined(LUL_ARCH_arm)
  1240       snprintf(buf, sizeof(buf),
  1241                "LoopTop: r15 %d/%llx  r7 %d/%llx  r11 %d/%llx"
  1242                "  r12 %d/%llx  r13 %d/%llx  r14 %d/%llx\n",
  1243                (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
  1244                (int)regs.r7.Valid(),  (unsigned long long int)regs.r7.Value(),
  1245                (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
  1246                (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
  1247                (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
  1248                (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
  1249       buf[sizeof(buf)-1] = 0;
  1250       mLog(buf);
  1251 #else
  1252 # error "Unsupported arch"
  1253 #endif
  1256 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
  1257     TaggedUWord ia = regs.xip;
  1258     TaggedUWord sp = regs.xsp;
  1259 #elif defined(LUL_ARCH_arm)
  1260     TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
  1261     TaggedUWord sp = regs.r13;
  1262 #else
  1263 # error "Unsupported arch"
  1264 #endif
  1266     if (*aFramesUsed >= aFramesAvail) {
  1267       break;
  1270     // If we don't have a valid value for the PC, give up.
  1271     if (!ia.Valid()) {
  1272       break;
  1275     // If this is the innermost frame, record the SP value, which
  1276     // presumably is valid.  If this isn't the innermost frame, and we
  1277     // have a valid SP value, check that its SP value isn't less that
  1278     // the one we've seen so far, so as to catch potential SP value
  1279     // cycles.
  1280     if (*aFramesUsed == 0) {
  1281       last_valid_sp = sp;
  1282     } else {
  1283       MOZ_ASSERT(last_valid_sp.Valid());
  1284       if (sp.Valid()) {
  1285         if (sp.Value() < last_valid_sp.Value()) {
  1286           // Hmm, SP going in the wrong direction.  Let's stop.
  1287           break;
  1289         // Remember where we got to.
  1290         last_valid_sp = sp;
  1294     // For the innermost frame, the IA value is what we need.  For all
  1295     // other frames, it's actually the return address, so back up one
  1296     // byte so as to get it into the calling instruction.
  1297     aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
  1298     aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
  1299     (*aFramesUsed)++;
  1301     // Find the RuleSet for the current IA, if any.  This will also
  1302     // query the backing (secondary) maps if it isn't found in the
  1303     // thread-local cache.
  1305     // If this isn't the innermost frame, back up into the calling insn.
  1306     if (*aFramesUsed > 1) {
  1307       ia.Add(TaggedUWord((uintptr_t)(-1)));
  1310     RuleSet* ruleset = cache->Lookup(ia.Value());
  1311     if (DEBUG_MAIN) {
  1312       char buf[100];
  1313       snprintf(buf, sizeof(buf), "ruleset for 0x%llx = %p\n",
  1314                (unsigned long long int)ia.Value(), ruleset);
  1315       buf[sizeof(buf)-1] = 0;
  1316       mLog(buf);
  1319     /////////////////////////////////////////////
  1320     ////
  1321     // On 32 bit x86-linux, syscalls are often done via the VDSO
  1322     // function __kernel_vsyscall, which doesn't have a corresponding
  1323     // object that we can read debuginfo from.  That effectively kills
  1324     // off all stack traces for threads blocked in syscalls.  Hence
  1325     // special-case by looking at the code surrounding the program
  1326     // counter.
  1327     //
  1328     // 0xf7757420 <__kernel_vsyscall+0>:	push   %ecx
  1329     // 0xf7757421 <__kernel_vsyscall+1>:	push   %edx
  1330     // 0xf7757422 <__kernel_vsyscall+2>:	push   %ebp
  1331     // 0xf7757423 <__kernel_vsyscall+3>:	mov    %esp,%ebp
  1332     // 0xf7757425 <__kernel_vsyscall+5>:	sysenter
  1333     // 0xf7757427 <__kernel_vsyscall+7>:	nop
  1334     // 0xf7757428 <__kernel_vsyscall+8>:	nop
  1335     // 0xf7757429 <__kernel_vsyscall+9>:	nop
  1336     // 0xf775742a <__kernel_vsyscall+10>:	nop
  1337     // 0xf775742b <__kernel_vsyscall+11>:	nop
  1338     // 0xf775742c <__kernel_vsyscall+12>:	nop
  1339     // 0xf775742d <__kernel_vsyscall+13>:	nop
  1340     // 0xf775742e <__kernel_vsyscall+14>:	int    $0x80
  1341     // 0xf7757430 <__kernel_vsyscall+16>:	pop    %ebp
  1342     // 0xf7757431 <__kernel_vsyscall+17>:	pop    %edx
  1343     // 0xf7757432 <__kernel_vsyscall+18>:	pop    %ecx
  1344     // 0xf7757433 <__kernel_vsyscall+19>:	ret
  1345     //
  1346     // In cases where the sampled thread is blocked in a syscall, its
  1347     // program counter will point at "pop %ebp".  Hence we look for
  1348     // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
  1349     // the corresponding register-recovery actions are:
  1350     //    new_ebp = *(old_esp + 0)
  1351     //    new eip = *(old_esp + 12)
  1352     //    new_esp = old_esp + 16
  1353     //
  1354     // It may also be the case that the program counter points two
  1355     // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
  1356     // the case where the syscall has been restarted but the thread
  1357     // hasn't been rescheduled.  The code below doesn't handle that;
  1358     // it could easily be made to.
  1359     //
  1360 #if defined(LUL_PLAT_x86_android) || defined(LUL_PLAT_x86_linux)
  1361     if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
  1362       uintptr_t insns_min, insns_max;
  1363       uintptr_t eip = ia.Value();
  1364       bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
  1365       if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
  1366         uint8_t* eipC = (uint8_t*)eip;
  1367         if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
  1368             eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
  1369           TaggedUWord sp_plus_0  = sp;
  1370           TaggedUWord sp_plus_12 = sp;
  1371           TaggedUWord sp_plus_16 = sp;
  1372           sp_plus_12.Add(TaggedUWord(12));
  1373           sp_plus_16.Add(TaggedUWord(16));
  1374           TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
  1375           TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
  1376           TaggedUWord new_esp = sp_plus_16;
  1377           if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
  1378             regs.xbp = new_ebp;
  1379             regs.xip = new_eip;
  1380             regs.xsp = new_esp;
  1381             continue;
  1386 #endif
  1387     ////
  1388     /////////////////////////////////////////////
  1390     // So, do we have a ruleset for this address?  If so, use it now.
  1391     if (ruleset) {
  1393       if (DEBUG_MAIN) {
  1394         ruleset->Print(mLog); mLog("\n");
  1396       // Use the RuleSet to compute the registers for the previous
  1397       // frame.  |regs| is modified in-place.
  1398       UseRuleSet(&regs, aStackImg, ruleset);
  1400     } else {
  1402       // There's no RuleSet for the specified address, so see if
  1403       // it's possible to get anywhere by stack-scanning.
  1405       // Use stack scanning frugally.
  1406       if (n_scanned_frames++ >= aScannedFramesAllowed) {
  1407         break;
  1410       // We can't scan the stack without a valid, aligned stack pointer.
  1411       if (!sp.IsAligned()) {
  1412         break;
  1415       bool scan_succeeded = false;
  1416       for (int i = 0; i < NUM_SCANNED_WORDS; ++i) {
  1417         TaggedUWord aWord = DerefTUW(sp, aStackImg);
  1418         // aWord is something we fished off the stack.  It should be
  1419         // valid, unless we overran the stack bounds.
  1420         if (!aWord.Valid()) {
  1421           break;
  1424         // Now, does aWord point inside a text section and immediately
  1425         // after something that looks like a call instruction?
  1426         if (mPriMap->MaybeIsReturnPoint(aWord, mSegArray)) {
  1427           // Yes it does.  Update the unwound registers heuristically,
  1428           // using the same schemes as Breakpad does.
  1429           scan_succeeded = true;
  1430           (*aScannedFramesAcquired)++;
  1432 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
  1433           // The same logic applies for the 32- and 64-bit cases.
  1434           // Register names of the form xsp etc refer to (eg) esp in
  1435           // the 32-bit case and rsp in the 64-bit case.
  1436 #         if defined(LUL_ARCH_x64)
  1437           const int wordSize = 8;
  1438 #         else
  1439           const int wordSize = 4;
  1440 #         endif
  1441           // The return address -- at XSP -- will have been pushed by
  1442           // the CALL instruction.  So the caller's XSP value
  1443           // immediately before and after that CALL instruction is the
  1444           // word above XSP.
  1445           regs.xsp = sp;
  1446           regs.xsp.Add(TaggedUWord(wordSize));
  1448           // aWord points at the return point, so back up one byte
  1449           // to put it in the calling instruction.
  1450           regs.xip = aWord;
  1451           regs.xip.Add(TaggedUWord((uintptr_t)(-1)));
  1453           // Computing a new value from the frame pointer is more tricky.
  1454           if (regs.xbp.Valid() &&
  1455               sp.Valid() && regs.xbp.Value() == sp.Value() - wordSize) {
  1456             // One possibility is that the callee begins with the standard
  1457             // preamble "push %xbp; mov %xsp, %xbp".  In which case, the
  1458             // (1) caller's XBP value will be at the word below XSP, and
  1459             // (2) the current (callee's) XBP will point at that word:
  1460             regs.xbp = DerefTUW(regs.xbp, aStackImg);
  1461           } else if (regs.xbp.Valid() &&
  1462                      sp.Valid() && regs.xbp.Value() >= sp.Value() + wordSize) {
  1463             // If that didn't work out, maybe the callee didn't change
  1464             // XBP, so it still holds the caller's value.  For that to
  1465             // be plausible, XBP will need to have a value at least
  1466             // higher than XSP since that holds the purported return
  1467             // address.  In which case do nothing, since XBP already
  1468             // holds the "right" value.
  1469           } else {
  1470             // Mark XBP as invalid, so that subsequent unwind iterations
  1471             // don't assume it holds valid data.
  1472             regs.xbp = TaggedUWord();
  1475           // Move on to the next word up the stack
  1476           sp.Add(TaggedUWord(wordSize));
  1478 #elif defined(LUL_ARCH_arm)
  1479           // Set all registers to be undefined, except for SP(R13) and
  1480           // PC(R15).
  1482           // aWord points either at the return point, if returning to
  1483           // ARM code, or one insn past the return point if returning
  1484           // to Thumb code.  In both cases, aWord-2 is guaranteed to
  1485           // fall within the calling instruction.
  1486           regs.r15 = aWord;
  1487           regs.r15.Add(TaggedUWord((uintptr_t)(-2)));
  1489           // Make SP be the word above the location where the return
  1490           // address was found.
  1491           regs.r13 = sp;
  1492           regs.r13.Add(TaggedUWord(4));
  1494           // All other regs are undefined.
  1495           regs.r7 = regs.r11 = regs.r12 = regs.r14 = TaggedUWord();
  1497           // Move on to the next word up the stack
  1498           sp.Add(TaggedUWord(4));
  1500 #else
  1501 # error "Unknown plat"
  1502 #endif
  1504           break;
  1507       } // for (int i = 0; i < NUM_SCANNED_WORDS; i++)
  1509       // We tried to make progress by scanning the stack, but failed.
  1510       // So give up -- fall out of the top level unwind loop.
  1511       if (!scan_succeeded) {
  1512         break;
  1516   } // top level unwind loop
  1518   // END UNWIND
  1519   /////////////////////////////////////////////////////////
  1523 void
  1524 LUL::InvalidateCFICaches()
  1526   // NB: the caller must hold m_rwl for writing.
  1528   // FIXME: this could get expensive.  Use a bool to remember when the
  1529   // caches have been invalidated and hence avoid duplicate invalidations.
  1530   for (std::map<pthread_t,CFICache*>::iterator iter = mCaches.begin();
  1531        iter != mCaches.end();
  1532        ++iter) {
  1533     iter->second->Invalidate();
  1538 ////////////////////////////////////////////////////////////////
  1539 // LUL Unit Testing                                           //
  1540 ////////////////////////////////////////////////////////////////
  1542 static const int LUL_UNIT_TEST_STACK_SIZE = 16384;
  1544 // This function is innermost in the test call sequence.  It uses LUL
  1545 // to unwind, and compares the result with the sequence specified in
  1546 // the director string.  These need to agree in order for the test to
  1547 // pass.  In order not to screw up the results, this function needs
  1548 // to have a not-very big stack frame, since we're only presenting
  1549 // the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
  1550 // that chunk unavoidably includes the frame for this function.
  1551 //
  1552 // This function must not be inlined into its callers.  Doing so will
  1553 // cause the expected-vs-actual backtrace consistency checking to
  1554 // fail.  Prints summary results to |aLUL|'s logging sink and also
  1555 // returns a boolean indicating whether or not the test passed.
  1556 static __attribute__((noinline))
  1557 bool GetAndCheckStackTrace(LUL* aLUL, const char* dstring)
  1559   // Get hold of the current unwind-start registers.
  1560   UnwindRegs startRegs;
  1561   memset(&startRegs, 0, sizeof(startRegs));
  1562 #if defined(LUL_PLAT_x64_linux)
  1563   volatile uintptr_t block[3];
  1564   MOZ_ASSERT(sizeof(block) == 24);
  1565   __asm__ __volatile__(
  1566     "leaq 0(%%rip), %%r15"   "\n\t"
  1567     "movq %%r15, 0(%0)"      "\n\t"
  1568     "movq %%rsp, 8(%0)"      "\n\t"
  1569     "movq %%rbp, 16(%0)"     "\n"
  1570     : : "r"(&block[0]) : "memory", "r15"
  1571   );
  1572   startRegs.xip = TaggedUWord(block[0]);
  1573   startRegs.xsp = TaggedUWord(block[1]);
  1574   startRegs.xbp = TaggedUWord(block[2]);
  1575   const uintptr_t REDZONE_SIZE = 128;
  1576   uintptr_t start = block[1] - REDZONE_SIZE;
  1577 #elif defined(LUL_PLAT_x86_linux) || defined(LUL_PLAT_x86_android)
  1578   volatile uintptr_t block[3];
  1579   MOZ_ASSERT(sizeof(block) == 12);
  1580   __asm__ __volatile__(
  1581     ".byte 0xE8,0x00,0x00,0x00,0x00"/*call next insn*/  "\n\t"
  1582     "popl %%edi"             "\n\t"
  1583     "movl %%edi, 0(%0)"      "\n\t"
  1584     "movl %%esp, 4(%0)"      "\n\t"
  1585     "movl %%ebp, 8(%0)"      "\n"
  1586     : : "r"(&block[0]) : "memory", "edi"
  1587   );
  1588   startRegs.xip = TaggedUWord(block[0]);
  1589   startRegs.xsp = TaggedUWord(block[1]);
  1590   startRegs.xbp = TaggedUWord(block[2]);
  1591   const uintptr_t REDZONE_SIZE = 0;
  1592   uintptr_t start = block[1] - REDZONE_SIZE;
  1593 #elif defined(LUL_PLAT_arm_android)
  1594   volatile uintptr_t block[6];
  1595   MOZ_ASSERT(sizeof(block) == 24);
  1596   __asm__ __volatile__(
  1597     "mov r0, r15"            "\n\t"
  1598     "str r0,  [%0, #0]"      "\n\t"
  1599     "str r14, [%0, #4]"      "\n\t"
  1600     "str r13, [%0, #8]"      "\n\t"
  1601     "str r12, [%0, #12]"     "\n\t"
  1602     "str r11, [%0, #16]"     "\n\t"
  1603     "str r7,  [%0, #20]"     "\n"
  1604     : : "r"(&block[0]) : "memory", "r0"
  1605   );
  1606   startRegs.r15 = TaggedUWord(block[0]);
  1607   startRegs.r14 = TaggedUWord(block[1]);
  1608   startRegs.r13 = TaggedUWord(block[2]);
  1609   startRegs.r12 = TaggedUWord(block[3]);
  1610   startRegs.r11 = TaggedUWord(block[4]);
  1611   startRegs.r7  = TaggedUWord(block[5]);
  1612   const uintptr_t REDZONE_SIZE = 0;
  1613   uintptr_t start = block[1] - REDZONE_SIZE;
  1614 #else
  1615 # error "Unsupported platform"
  1616 #endif
  1618   // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
  1619   // stack.
  1620   uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
  1621   uintptr_t ws  = sizeof(void*);
  1622   start &= ~(ws-1);
  1623   end   &= ~(ws-1);
  1624   uintptr_t nToCopy = end - start;
  1625   if (nToCopy > lul::N_STACK_BYTES) {
  1626     nToCopy = lul::N_STACK_BYTES;
  1628   MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
  1629   StackImage* stackImg = new StackImage();
  1630   stackImg->mLen       = nToCopy;
  1631   stackImg->mStartAvma = start;
  1632   if (nToCopy > 0) {
  1633     MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
  1634     memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
  1637   // Unwind it.
  1638   const int MAX_TEST_FRAMES = 64;
  1639   uintptr_t framePCs[MAX_TEST_FRAMES];
  1640   uintptr_t frameSPs[MAX_TEST_FRAMES];
  1641   size_t framesAvail = mozilla::ArrayLength(framePCs);
  1642   size_t framesUsed  = 0;
  1643   size_t scannedFramesAllowed = 0;
  1644   size_t scannedFramesAcquired = 0;
  1645   aLUL->Unwind( &framePCs[0], &frameSPs[0],
  1646                 &framesUsed, &scannedFramesAcquired,
  1647                 framesAvail, scannedFramesAllowed,
  1648                 &startRegs, stackImg );
  1650   delete stackImg;
  1652   //if (0) {
  1653   //  // Show what we have.
  1654   //  fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
  1655   //  for (size_t i = 0; i < framesUsed; i++) {
  1656   //    fprintf(stderr, "  [%2d]   SP %p   PC %p\n",
  1657   //            (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
  1658   //  }
  1659   //  fprintf(stderr, "\n");
  1660   //}
  1662   // Check to see if there's a consistent binding between digits in
  1663   // the director string ('1' .. '8') and the PC values acquired by
  1664   // the unwind.  If there isn't, the unwinding has failed somehow.
  1665   uintptr_t binding[8];  // binding for '1' .. binding for '8'
  1666   memset((void*)binding, 0, sizeof(binding));
  1668   // The general plan is to work backwards along the director string
  1669   // and forwards along the framePCs array.  Doing so corresponds to
  1670   // working outwards from the innermost frame of the recursive test set.
  1671   const char* cursor = dstring;
  1673   // Find the end.  This leaves |cursor| two bytes past the first
  1674   // character we want to look at -- see comment below.
  1675   while (*cursor) cursor++;
  1677   // Counts the number of consistent frames.
  1678   size_t nConsistent = 0;
  1680   // Iterate back to the start of the director string.  The starting
  1681   // points are a bit complex.  We can't use framePCs[0] because that
  1682   // contains the PC in this frame (above).  We can't use framePCs[1]
  1683   // because that will contain the PC at return point in the recursive
  1684   // test group (TestFn[1-8]) for their call "out" to this function,
  1685   // GetAndCheckStackTrace.  Although LUL will compute a correct
  1686   // return address, that will not be the same return address as for a
  1687   // recursive call out of the the function to another function in the
  1688   // group.  Hence we can only start consistency checking at
  1689   // framePCs[2].
  1690   //
  1691   // To be consistent, then, we must ignore the last element in the
  1692   // director string as that corresponds to framePCs[1].  Hence the
  1693   // start points are: framePCs[2] and the director string 2 bytes
  1694   // before the terminating zero.
  1695   //
  1696   // Also as a result of this, the number of consistent frames counted
  1697   // will always be one less than the length of the director string
  1698   // (not including its terminating zero).
  1699   size_t frameIx;
  1700   for (cursor = cursor-2, frameIx = 2;
  1701        cursor >= dstring && frameIx < framesUsed;
  1702        cursor--, frameIx++) {
  1703     char      c  = *cursor;
  1704     uintptr_t pc = framePCs[frameIx];
  1705     // If this doesn't hold, the director string is ill-formed.
  1706     MOZ_ASSERT(c >= '1' && c <= '8');
  1707     int n = ((int)c) - ((int)'1');
  1708     if (binding[n] == 0) {
  1709       // There's no binding for |c| yet, so install |pc| and carry on.
  1710       binding[n] = pc;
  1711       nConsistent++;
  1712       continue;
  1714     // There's a pre-existing binding for |c|.  Check it's consistent.
  1715     if (binding[n] != pc) {
  1716       // Not consistent.  Give up now.
  1717       break;
  1719     // Consistent.  Keep going.
  1720     nConsistent++;
  1723   // So, did we succeed?
  1724   bool passed = nConsistent+1 == strlen(dstring);
  1726   // Show the results.
  1727   char buf[200];
  1728   snprintf(buf, sizeof(buf), "LULUnitTest:   dstring = %s\n", dstring);
  1729   buf[sizeof(buf)-1] = 0;
  1730   aLUL->mLog(buf);
  1731   snprintf(buf, sizeof(buf),
  1732            "LULUnitTest:     %d consistent, %d in dstring: %s\n",
  1733            (int)nConsistent, (int)strlen(dstring),
  1734            passed ? "PASS" : "FAIL");
  1735   buf[sizeof(buf)-1] = 0;
  1736   aLUL->mLog(buf);
  1738   return passed;
  1742 // Macro magic to create a set of 8 mutually recursive functions with
  1743 // varying frame sizes.  These will recurse amongst themselves as
  1744 // specified by |strP|, the directory string, and call
  1745 // GetAndCheckStackTrace when the string becomes empty, passing it the
  1746 // original value of the string.  This checks the result, printing
  1747 // results on |aLUL|'s logging sink, and also returns a boolean
  1748 // indicating whether or not the results are acceptable (correct).
  1750 #define DECL_TEST_FN(NAME) \
  1751   bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
  1753 #define GEN_TEST_FN(NAME, FRAMESIZE) \
  1754   bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
  1755     volatile char space[FRAMESIZE]; \
  1756     memset((char*)&space[0], 0, sizeof(space)); \
  1757     if (*strP == '\0') { \
  1758       /* We've come to the end of the director string. */ \
  1759       /* Take a stack snapshot. */ \
  1760       return GetAndCheckStackTrace(aLUL, strPorig); \
  1761     } else { \
  1762       /* Recurse onwards.  This is a bit subtle.  The obvious */ \
  1763       /* thing to do here is call onwards directly, from within the */ \
  1764       /* arms of the case statement.  That gives a problem in that */ \
  1765       /* there will be multiple return points inside each function when */ \
  1766       /* unwinding, so it will be difficult to check for consistency */ \
  1767       /* against the director string.  Instead, we make an indirect */ \
  1768       /* call, so as to guarantee that there is only one call site */ \
  1769       /* within each function.  This does assume that the compiler */ \
  1770       /* won't transform it back to the simple direct-call form. */ \
  1771       /* To discourage it from doing so, the call is bracketed with */ \
  1772       /* __asm__ __volatile__ sections so as to make it not-movable. */ \
  1773       bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
  1774       switch (*strP) { \
  1775         case '1': nextFn = TestFn1; break; \
  1776         case '2': nextFn = TestFn2; break; \
  1777         case '3': nextFn = TestFn3; break; \
  1778         case '4': nextFn = TestFn4; break; \
  1779         case '5': nextFn = TestFn5; break; \
  1780         case '6': nextFn = TestFn6; break; \
  1781         case '7': nextFn = TestFn7; break; \
  1782         case '8': nextFn = TestFn8; break; \
  1783         default:  nextFn = TestFn8; break; \
  1784       } \
  1785       __asm__ __volatile__("":::"cc","memory"); \
  1786       bool passed = nextFn(aLUL, strPorig, strP+1); \
  1787       __asm__ __volatile__("":::"cc","memory"); \
  1788       return passed; \
  1789     } \
  1792 // The test functions are mutually recursive, so it is necessary to
  1793 // declare them before defining them.
  1794 DECL_TEST_FN(TestFn1)
  1795 DECL_TEST_FN(TestFn2)
  1796 DECL_TEST_FN(TestFn3)
  1797 DECL_TEST_FN(TestFn4)
  1798 DECL_TEST_FN(TestFn5)
  1799 DECL_TEST_FN(TestFn6)
  1800 DECL_TEST_FN(TestFn7)
  1801 DECL_TEST_FN(TestFn8)
  1803 GEN_TEST_FN(TestFn1, 123)
  1804 GEN_TEST_FN(TestFn2, 456)
  1805 GEN_TEST_FN(TestFn3, 789)
  1806 GEN_TEST_FN(TestFn4, 23)
  1807 GEN_TEST_FN(TestFn5, 47)
  1808 GEN_TEST_FN(TestFn6, 117)
  1809 GEN_TEST_FN(TestFn7, 1)
  1810 GEN_TEST_FN(TestFn8, 99)
  1813 // This starts the test sequence going.  Call here to generate a
  1814 // sequence of calls as directed by the string |dstring|.  The call
  1815 // sequence will, from its innermost frame, finish by calling
  1816 // GetAndCheckStackTrace() and passing it |dstring|.
  1817 // GetAndCheckStackTrace() will unwind the stack, check consistency
  1818 // of those results against |dstring|, and print a pass/fail message
  1819 // to aLUL's logging sink.  It also updates the counters in *aNTests
  1820 // and aNTestsPassed.
  1821 __attribute__((noinline)) void
  1822 TestUnw(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed,
  1823         LUL* aLUL, const char* dstring)
  1825   // Ensure that the stack has at least this much space on it.  This
  1826   // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
  1827   // and hand it to LUL.  Safe in the sense that no segfault can
  1828   // happen because the stack is at least this big.  This is all
  1829   // somewhat dubious in the sense that a sufficiently clever compiler
  1830   // (clang, for one) can figure out that space[] is unused and delete
  1831   // it from the frame.  Hence the somewhat elaborate hoop jumping to
  1832   // fill it up before the call and to at least appear to use the
  1833   // value afterwards.
  1834   int i;
  1835   volatile char space[LUL_UNIT_TEST_STACK_SIZE];
  1836   for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
  1837     space[i] = (char)(i & 0x7F);
  1840   // Really run the test.
  1841   bool passed = TestFn1(aLUL, dstring, dstring);
  1843   // Appear to use space[], by visiting the value to compute some kind
  1844   // of checksum, and then (apparently) using the checksum.
  1845   int sum = 0;
  1846   for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
  1847     // If this doesn't fool LLVM, I don't know what will.
  1848     sum += space[i] - 3*i;
  1850   __asm__ __volatile__("" : : "r"(sum));
  1852   // Update the counters.
  1853   (*aNTests)++;
  1854   if (passed) {
  1855     (*aNTestsPassed)++;
  1860 void
  1861 RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL)
  1863   aLUL->mLog(":\n");
  1864   aLUL->mLog("LULUnitTest: BEGIN\n");
  1865   *aNTests = *aNTestsPassed = 0;
  1866   TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
  1867   TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
  1868   TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
  1869   TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
  1870   TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
  1871   TestUnw(aNTests, aNTestsPassed, aLUL,
  1872           "123456781122334455667788777777777777777777777");
  1873   aLUL->mLog("LULUnitTest: END\n");
  1874   aLUL->mLog(":\n");
  1878 } // namespace lul

mercurial