xpcom/build/nsWindowsDllInterceptor.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/xpcom/build/nsWindowsDllInterceptor.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,707 @@
     1.4 +/* -*- Mode: C++; tab-width: 40; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#ifndef NS_WINDOWS_DLL_INTERCEPTOR_H_
    1.10 +#define NS_WINDOWS_DLL_INTERCEPTOR_H_
    1.11 +#include <windows.h>
    1.12 +#include <winternl.h>
    1.13 +
    1.14 +/*
    1.15 + * Simple function interception.
    1.16 + *
    1.17 + * We have two separate mechanisms for intercepting a function: We can use the
    1.18 + * built-in nop space, if it exists, or we can create a detour.
    1.19 + *
    1.20 + * Using the built-in nop space works as follows: On x86-32, DLL functions
    1.21 + * begin with a two-byte nop (mov edi, edi) and are preceeded by five bytes of
    1.22 + * NOP instructions.
    1.23 + *
    1.24 + * When we detect a function with this prelude, we do the following:
    1.25 + *
    1.26 + * 1. Write a long jump to our interceptor function into the five bytes of NOPs
    1.27 + *    before the function.
    1.28 + *
    1.29 + * 2. Write a short jump -5 into the two-byte nop at the beginning of the function.
    1.30 + *
    1.31 + * This mechanism is nice because it's thread-safe.  It's even safe to do if
    1.32 + * another thread is currently running the function we're modifying!
    1.33 + *
    1.34 + * When the WindowsDllNopSpacePatcher is destroyed, we overwrite the short jump
    1.35 + * but not the long jump, so re-intercepting the same function won't work,
    1.36 + * because its prelude won't match.
    1.37 + *
    1.38 + *
    1.39 + * Unfortunately nop space patching doesn't work on functions which don't have
    1.40 + * this magic prelude (and in particular, x86-64 never has the prelude).  So
    1.41 + * when we can't use the built-in nop space, we fall back to using a detour,
    1.42 + * which works as follows:
    1.43 + *
    1.44 + * 1. Save first N bytes of OrigFunction to trampoline, where N is a
    1.45 + *    number of bytes >= 5 that are instruction aligned.
    1.46 + *
    1.47 + * 2. Replace first 5 bytes of OrigFunction with a jump to the Hook
    1.48 + *    function.
    1.49 + *
    1.50 + * 3. After N bytes of the trampoline, add a jump to OrigFunction+N to
    1.51 + *    continue original program flow.
    1.52 + *
    1.53 + * 4. Hook function needs to call the trampoline during its execution,
    1.54 + *    to invoke the original function (so address of trampoline is
    1.55 + *    returned).
    1.56 + *
    1.57 + * When the WindowsDllDetourPatcher object is destructed, OrigFunction is
    1.58 + * patched again to jump directly to the trampoline instead of going through
    1.59 + * the hook function. As such, re-intercepting the same function won't work, as
    1.60 + * jump instructions are not supported.
    1.61 + *
    1.62 + * Note that this is not thread-safe.  Sad day.
    1.63 + *
    1.64 + */
    1.65 +
    1.66 +#include <stdint.h>
    1.67 +
    1.68 +namespace mozilla {
    1.69 +namespace internal {
    1.70 +
    1.71 +class WindowsDllNopSpacePatcher
    1.72 +{
    1.73 +  typedef unsigned char *byteptr_t;
    1.74 +  HMODULE mModule;
    1.75 +
    1.76 +  // Dumb array for remembering the addresses of functions we've patched.
    1.77 +  // (This should be nsTArray, but non-XPCOM code uses this class.)
    1.78 +  static const size_t maxPatchedFns = 128;
    1.79 +  byteptr_t mPatchedFns[maxPatchedFns];
    1.80 +  int mPatchedFnsLen;
    1.81 +
    1.82 +public:
    1.83 +  WindowsDllNopSpacePatcher()
    1.84 +    : mModule(0)
    1.85 +    , mPatchedFnsLen(0)
    1.86 +  {}
    1.87 +
    1.88 +  ~WindowsDllNopSpacePatcher()
    1.89 +  {
    1.90 +    // Restore the mov edi, edi to the beginning of each function we patched.
    1.91 +
    1.92 +    for (int i = 0; i < mPatchedFnsLen; i++) {
    1.93 +      byteptr_t fn = mPatchedFns[i];
    1.94 +
    1.95 +      // Ensure we can write to the code.
    1.96 +      DWORD op;
    1.97 +      if (!VirtualProtectEx(GetCurrentProcess(), fn, 2, PAGE_EXECUTE_READWRITE, &op)) {
    1.98 +        // printf("VirtualProtectEx failed! %d\n", GetLastError());
    1.99 +        continue;
   1.100 +      }
   1.101 +
   1.102 +      // mov edi, edi
   1.103 +      *((uint16_t*)fn) = 0xff8b;
   1.104 +
   1.105 +      // Restore the old protection.
   1.106 +      VirtualProtectEx(GetCurrentProcess(), fn, 2, op, &op);
   1.107 +
   1.108 +      // I don't think this is actually necessary, but it can't hurt.
   1.109 +      FlushInstructionCache(GetCurrentProcess(),
   1.110 +                            /* ignored */ nullptr,
   1.111 +                            /* ignored */ 0);
   1.112 +    }
   1.113 +  }
   1.114 +
   1.115 +  void Init(const char *modulename)
   1.116 +  {
   1.117 +    mModule = LoadLibraryExA(modulename, nullptr, 0);
   1.118 +    if (!mModule) {
   1.119 +      //printf("LoadLibraryEx for '%s' failed\n", modulename);
   1.120 +      return;
   1.121 +    }
   1.122 +  }
   1.123 +
   1.124 +#if defined(_M_IX86)
   1.125 +  bool AddHook(const char *pname, intptr_t hookDest, void **origFunc)
   1.126 +  {
   1.127 +    if (!mModule)
   1.128 +      return false;
   1.129 +
   1.130 +    if (mPatchedFnsLen == maxPatchedFns) {
   1.131 +      // printf ("No space for hook in mPatchedFns.\n");
   1.132 +      return false;
   1.133 +    }
   1.134 +
   1.135 +    byteptr_t fn = reinterpret_cast<byteptr_t>(GetProcAddress(mModule, pname));
   1.136 +    if (!fn) {
   1.137 +      //printf ("GetProcAddress failed\n");
   1.138 +      return false;
   1.139 +    }
   1.140 +  
   1.141 +    // Ensure we can read and write starting at fn - 5 (for the long jmp we're
   1.142 +    // going to write) and ending at fn + 2 (for the short jmp up to the long
   1.143 +    // jmp).
   1.144 +    DWORD op;
   1.145 +    if (!VirtualProtectEx(GetCurrentProcess(), fn - 5, 7, PAGE_EXECUTE_READWRITE, &op)) {
   1.146 +      //printf ("VirtualProtectEx failed! %d\n", GetLastError());
   1.147 +      return false;
   1.148 +    }
   1.149 +
   1.150 +    bool rv = WriteHook(fn, hookDest, origFunc);
   1.151 +    
   1.152 +    // Re-protect, and we're done.
   1.153 +    VirtualProtectEx(GetCurrentProcess(), fn - 5, 7, op, &op);
   1.154 +
   1.155 +    if (rv) {
   1.156 +      mPatchedFns[mPatchedFnsLen] = fn;
   1.157 +      mPatchedFnsLen++;
   1.158 +    }
   1.159 +
   1.160 +    return rv;
   1.161 +  }
   1.162 +
   1.163 +  bool WriteHook(byteptr_t fn, intptr_t hookDest, void **origFunc)
   1.164 +  {
   1.165 +    // Check that the 5 bytes before fn are NOP's or INT 3's,
   1.166 +    // and that the 2 bytes after fn are mov(edi, edi).
   1.167 +    //
   1.168 +    // It's safe to read fn[-5] because we set it to PAGE_EXECUTE_READWRITE
   1.169 +    // before calling WriteHook.
   1.170 +
   1.171 +    for (int i = -5; i <= -1; i++) {
   1.172 +      if (fn[i] != 0x90 && fn[i] != 0xcc) // nop or int 3
   1.173 +        return false;
   1.174 +    }
   1.175 +
   1.176 +    // mov edi, edi.  Yes, there are two ways to encode the same thing:
   1.177 +    //
   1.178 +    //   0x89ff == mov r/m, r
   1.179 +    //   0x8bff == mov r, r/m
   1.180 +    //
   1.181 +    // where "r" is register and "r/m" is register or memory.  Windows seems to
   1.182 +    // use 8bff; I include 89ff out of paranoia.
   1.183 +    if ((fn[0] != 0x8b && fn[0] != 0x89) || fn[1] != 0xff) {
   1.184 +      return false;
   1.185 +    }
   1.186 +
   1.187 +    // Write a long jump into the space above the function.
   1.188 +    fn[-5] = 0xe9; // jmp
   1.189 +    *((intptr_t*)(fn - 4)) = hookDest - (uintptr_t)(fn); // target displacement
   1.190 +
   1.191 +    // Set origFunc here, because after this point, hookDest might be called,
   1.192 +    // and hookDest might use the origFunc pointer.
   1.193 +    *origFunc = fn + 2;
   1.194 +
   1.195 +    // Short jump up into our long jump.
   1.196 +    *((uint16_t*)(fn)) = 0xf9eb; // jmp $-5
   1.197 +
   1.198 +    // I think this routine is safe without this, but it can't hurt.
   1.199 +    FlushInstructionCache(GetCurrentProcess(),
   1.200 +                          /* ignored */ nullptr,
   1.201 +                          /* ignored */ 0);
   1.202 +
   1.203 +    return true;
   1.204 +  }
   1.205 +#else
   1.206 +  bool AddHook(const char *pname, intptr_t hookDest, void **origFunc)
   1.207 +  {
   1.208 +    // Not implemented except on x86-32.
   1.209 +    return false;
   1.210 +  }
   1.211 +#endif
   1.212 +};
   1.213 +
   1.214 +class WindowsDllDetourPatcher
   1.215 +{
   1.216 +  typedef unsigned char *byteptr_t;
   1.217 +public:
   1.218 +  WindowsDllDetourPatcher() 
   1.219 +    : mModule(0), mHookPage(0), mMaxHooks(0), mCurHooks(0)
   1.220 +  {
   1.221 +  }
   1.222 +
   1.223 +  ~WindowsDllDetourPatcher()
   1.224 +  {
   1.225 +    int i;
   1.226 +    byteptr_t p;
   1.227 +    for (i = 0, p = mHookPage; i < mCurHooks; i++, p += kHookSize) {
   1.228 +#if defined(_M_IX86)
   1.229 +      size_t nBytes = 1 + sizeof(intptr_t);
   1.230 +#elif defined(_M_X64)
   1.231 +      size_t nBytes = 2 + sizeof(intptr_t);
   1.232 +#else
   1.233 +#error "Unknown processor type"
   1.234 +#endif
   1.235 +      byteptr_t origBytes = *((byteptr_t *)p);
   1.236 +      // ensure we can modify the original code
   1.237 +      DWORD op;
   1.238 +      if (!VirtualProtectEx(GetCurrentProcess(), origBytes, nBytes, PAGE_EXECUTE_READWRITE, &op)) {
   1.239 +        //printf ("VirtualProtectEx failed! %d\n", GetLastError());
   1.240 +        continue;
   1.241 +      }
   1.242 +      // Remove the hook by making the original function jump directly
   1.243 +      // in the trampoline.
   1.244 +      intptr_t dest = (intptr_t)(p + sizeof(void *));
   1.245 +#if defined(_M_IX86)
   1.246 +      *((intptr_t*)(origBytes+1)) = dest - (intptr_t)(origBytes+5); // target displacement
   1.247 +#elif defined(_M_X64)
   1.248 +      *((intptr_t*)(origBytes+2)) = dest;
   1.249 +#else
   1.250 +#error "Unknown processor type"
   1.251 +#endif
   1.252 +      // restore protection; if this fails we can't really do anything about it
   1.253 +      VirtualProtectEx(GetCurrentProcess(), origBytes, nBytes, op, &op);
   1.254 +    }
   1.255 +  }
   1.256 +
   1.257 +  void Init(const char *modulename, int nhooks = 0)
   1.258 +  {
   1.259 +    if (mModule)
   1.260 +      return;
   1.261 +
   1.262 +    mModule = LoadLibraryExA(modulename, nullptr, 0);
   1.263 +    if (!mModule) {
   1.264 +      //printf("LoadLibraryEx for '%s' failed\n", modulename);
   1.265 +      return;
   1.266 +    }
   1.267 +
   1.268 +    int hooksPerPage = 4096 / kHookSize;
   1.269 +    if (nhooks == 0)
   1.270 +      nhooks = hooksPerPage;
   1.271 +
   1.272 +    mMaxHooks = nhooks + (hooksPerPage % nhooks);
   1.273 +
   1.274 +    mHookPage = (byteptr_t) VirtualAllocEx(GetCurrentProcess(), nullptr,
   1.275 +             mMaxHooks * kHookSize,
   1.276 +             MEM_COMMIT | MEM_RESERVE,
   1.277 +             PAGE_EXECUTE_READWRITE);
   1.278 +
   1.279 +    if (!mHookPage) {
   1.280 +      mModule = 0;
   1.281 +      return;
   1.282 +    }
   1.283 +  }
   1.284 +
   1.285 +  bool Initialized()
   1.286 +  {
   1.287 +    return !!mModule;
   1.288 +  }
   1.289 +
   1.290 +  void LockHooks()
   1.291 +  {
   1.292 +    if (!mModule)
   1.293 +      return;
   1.294 +
   1.295 +    DWORD op;
   1.296 +    VirtualProtectEx(GetCurrentProcess(), mHookPage, mMaxHooks * kHookSize, PAGE_EXECUTE_READ, &op);
   1.297 +
   1.298 +    mModule = 0;
   1.299 +  }
   1.300 +
   1.301 +  bool AddHook(const char *pname, intptr_t hookDest, void **origFunc)
   1.302 +  {
   1.303 +    if (!mModule)
   1.304 +      return false;
   1.305 +
   1.306 +    void *pAddr = (void *) GetProcAddress(mModule, pname);
   1.307 +    if (!pAddr) {
   1.308 +      //printf ("GetProcAddress failed\n");
   1.309 +      return false;
   1.310 +    }
   1.311 +
   1.312 +    CreateTrampoline(pAddr, hookDest, origFunc);
   1.313 +    if (!*origFunc) {
   1.314 +      //printf ("CreateTrampoline failed\n");
   1.315 +      return false;
   1.316 +    }
   1.317 +
   1.318 +    return true;
   1.319 +  }
   1.320 +
   1.321 +protected:
   1.322 +  const static int kPageSize = 4096;
   1.323 +  const static int kHookSize = 128;
   1.324 +
   1.325 +  HMODULE mModule;
   1.326 +  byteptr_t mHookPage;
   1.327 +  int mMaxHooks;
   1.328 +  int mCurHooks;
   1.329 +
   1.330 +  void CreateTrampoline(void *origFunction,
   1.331 +                        intptr_t dest,
   1.332 +                        void **outTramp)
   1.333 +  {
   1.334 +    *outTramp = nullptr;
   1.335 +
   1.336 +    byteptr_t tramp = FindTrampolineSpace();
   1.337 +    if (!tramp)
   1.338 +      return;
   1.339 +
   1.340 +    byteptr_t origBytes = (byteptr_t) origFunction;
   1.341 +
   1.342 +    int nBytes = 0;
   1.343 +    int pJmp32 = -1;
   1.344 +
   1.345 +#if defined(_M_IX86)
   1.346 +    while (nBytes < 5) {
   1.347 +      // Understand some simple instructions that might be found in a
   1.348 +      // prologue; we might need to extend this as necessary.
   1.349 +      //
   1.350 +      // Note!  If we ever need to understand jump instructions, we'll
   1.351 +      // need to rewrite the displacement argument.
   1.352 +      if (origBytes[nBytes] >= 0x88 && origBytes[nBytes] <= 0x8B) {
   1.353 +        // various MOVs
   1.354 +        unsigned char b = origBytes[nBytes+1];
   1.355 +        if (((b & 0xc0) == 0xc0) ||
   1.356 +            (((b & 0xc0) == 0x00) &&
   1.357 +             ((b & 0x07) != 0x04) && ((b & 0x07) != 0x05)))
   1.358 +        {
   1.359 +          // REG=r, R/M=r or REG=r, R/M=[r]
   1.360 +          nBytes += 2;
   1.361 +        } else if (((b & 0xc0) == 0x40) && ((b & 0x38) != 0x20)) {
   1.362 +          // REG=r, R/M=[r + disp8]
   1.363 +          nBytes += 3;
   1.364 +        } else {
   1.365 +          // complex MOV, bail
   1.366 +          return;
   1.367 +        }
   1.368 +      } else if (origBytes[nBytes] == 0xB8) {
   1.369 +        // MOV 0xB8: http://ref.x86asm.net/coder32.html#xB8
   1.370 +        nBytes += 5;
   1.371 +      } else if (origBytes[nBytes] == 0x83) {
   1.372 +        // ADD|ODR|ADC|SBB|AND|SUB|XOR|CMP r/m, imm8
   1.373 +        unsigned char b = origBytes[nBytes+1];
   1.374 +        if ((b & 0xc0) == 0xc0) {
   1.375 +          // ADD|ODR|ADC|SBB|AND|SUB|XOR|CMP r, imm8
   1.376 +          nBytes += 3;
   1.377 +        } else {
   1.378 +          // bail
   1.379 +          return;
   1.380 +        }
   1.381 +      } else if (origBytes[nBytes] == 0x68) {
   1.382 +        // PUSH with 4-byte operand
   1.383 +        nBytes += 5;
   1.384 +      } else if ((origBytes[nBytes] & 0xf0) == 0x50) {
   1.385 +        // 1-byte PUSH/POP
   1.386 +        nBytes++;
   1.387 +      } else if (origBytes[nBytes] == 0x6A) {
   1.388 +        // PUSH imm8
   1.389 +        nBytes += 2;
   1.390 +      } else if (origBytes[nBytes] == 0xe9) {
   1.391 +        pJmp32 = nBytes;
   1.392 +        // jmp 32bit offset
   1.393 +        nBytes += 5;
   1.394 +      } else {
   1.395 +        //printf ("Unknown x86 instruction byte 0x%02x, aborting trampoline\n", origBytes[nBytes]);
   1.396 +        return;
   1.397 +      }
   1.398 +    }
   1.399 +#elif defined(_M_X64)
   1.400 +    byteptr_t directJmpAddr;
   1.401 +
   1.402 +    while (nBytes < 13) {
   1.403 +
   1.404 +      // if found JMP 32bit offset, next bytes must be NOP 
   1.405 +      if (pJmp32 >= 0) {
   1.406 +        if (origBytes[nBytes++] != 0x90)
   1.407 +          return;
   1.408 +
   1.409 +        continue;
   1.410 +      } 
   1.411 +      if (origBytes[nBytes] == 0x0f) {
   1.412 +        nBytes++;
   1.413 +        if (origBytes[nBytes] == 0x1f) {
   1.414 +          // nop (multibyte)
   1.415 +          nBytes++;
   1.416 +          if ((origBytes[nBytes] & 0xc0) == 0x40 &&
   1.417 +              (origBytes[nBytes] & 0x7) == 0x04) {
   1.418 +            nBytes += 3;
   1.419 +          } else {
   1.420 +            return;
   1.421 +          }
   1.422 +        } else if (origBytes[nBytes] == 0x05) {
   1.423 +          // syscall
   1.424 +          nBytes++;
   1.425 +        } else {
   1.426 +          return;
   1.427 +        }
   1.428 +      } else if (origBytes[nBytes] == 0x41) {
   1.429 +        // REX.B
   1.430 +        nBytes++;
   1.431 +
   1.432 +        if ((origBytes[nBytes] & 0xf0) == 0x50) {
   1.433 +          // push/pop with Rx register
   1.434 +          nBytes++;
   1.435 +        } else if (origBytes[nBytes] >= 0xb8 && origBytes[nBytes] <= 0xbf) {
   1.436 +          // mov r32, imm32
   1.437 +          nBytes += 5;
   1.438 +        } else {
   1.439 +          return;
   1.440 +        }
   1.441 +      } else if (origBytes[nBytes] == 0x45) {
   1.442 +        // REX.R & REX.B
   1.443 +        nBytes++;
   1.444 +
   1.445 +        if (origBytes[nBytes] == 0x33) {
   1.446 +          // xor r32, r32
   1.447 +          nBytes += 2;
   1.448 +        } else {
   1.449 +          return;
   1.450 +        }
   1.451 +      } else if ((origBytes[nBytes] & 0xfb) == 0x48) {
   1.452 +        // REX.W | REX.WR
   1.453 +        nBytes++;
   1.454 +
   1.455 +        if (origBytes[nBytes] == 0x81 && (origBytes[nBytes+1] & 0xf8) == 0xe8) {
   1.456 +          // sub r, dword
   1.457 +          nBytes += 6;
   1.458 +        } else if (origBytes[nBytes] == 0x83 &&
   1.459 +                  (origBytes[nBytes+1] & 0xf8) == 0xe8) {
   1.460 +          // sub r, byte
   1.461 +          nBytes += 3;
   1.462 +        } else if (origBytes[nBytes] == 0x83 &&
   1.463 +                  (origBytes[nBytes+1] & 0xf8) == 0x60) {
   1.464 +          // and [r+d], imm8
   1.465 +          nBytes += 5;
   1.466 +        } else if ((origBytes[nBytes] & 0xfd) == 0x89) {
   1.467 +          // MOV r/m64, r64 | MOV r64, r/m64
   1.468 +          if ((origBytes[nBytes+1] & 0xc0) == 0x40) {
   1.469 +            if ((origBytes[nBytes+1] & 0x7) == 0x04) {
   1.470 +              // R/M=[SIB+disp8], REG=r64
   1.471 +              nBytes += 4;
   1.472 +            } else {
   1.473 +              // R/M=[r64+disp8], REG=r64
   1.474 +              nBytes += 3;
   1.475 +            }
   1.476 +          } else if (((origBytes[nBytes+1] & 0xc0) == 0xc0) ||
   1.477 +                     (((origBytes[nBytes+1] & 0xc0) == 0x00) &&
   1.478 +                      ((origBytes[nBytes+1] & 0x07) != 0x04) && ((origBytes[nBytes+1] & 0x07) != 0x05))) {
   1.479 +            // REG=r64, R/M=r64 or REG=r64, R/M=[r64]
   1.480 +            nBytes += 2;
   1.481 +          } else {
   1.482 +            // complex MOV
   1.483 +            return;
   1.484 +          }
   1.485 +        } else if (origBytes[nBytes] == 0xc7) {
   1.486 +          // MOV r/m64, imm32
   1.487 +          if (origBytes[nBytes + 1] == 0x44) {
   1.488 +            // MOV [r64+disp8], imm32
   1.489 +            // ModR/W + SIB + disp8 + imm32
   1.490 +            nBytes += 8;
   1.491 +          } else {
   1.492 +            return;
   1.493 +          }
   1.494 +        } else if (origBytes[nBytes] == 0xff) {
   1.495 +          pJmp32 = nBytes - 1;
   1.496 +          // JMP /4
   1.497 +          if ((origBytes[nBytes+1] & 0xc0) == 0x0 &&
   1.498 +              (origBytes[nBytes+1] & 0x07) == 0x5) {
   1.499 +            // [rip+disp32]
   1.500 +            // convert JMP 32bit offset to JMP 64bit direct
   1.501 +            directJmpAddr = (byteptr_t)*((uint64_t*)(origBytes + nBytes + 6 + (*((int32_t*)(origBytes + nBytes + 2)))));
   1.502 +            nBytes += 6;
   1.503 +          } else {
   1.504 +            // not support yet!
   1.505 +            return;
   1.506 +          }
   1.507 +        } else {
   1.508 +          // not support yet!
   1.509 +          return;
   1.510 +        }
   1.511 +      } else if ((origBytes[nBytes] & 0xf0) == 0x50) {
   1.512 +        // 1-byte push/pop
   1.513 +        nBytes++;
   1.514 +      } else if (origBytes[nBytes] == 0x90) {
   1.515 +        // nop
   1.516 +        nBytes++;
   1.517 +      } else if (origBytes[nBytes] == 0xb8) {
   1.518 +        // MOV 0xB8: http://ref.x86asm.net/coder32.html#xB8
   1.519 +        nBytes += 5;
   1.520 +      } else if (origBytes[nBytes] == 0xc3) {
   1.521 +        // ret
   1.522 +        nBytes++;
   1.523 +      } else if (origBytes[nBytes] == 0xe9) {
   1.524 +        pJmp32 = nBytes;
   1.525 +        // convert JMP 32bit offset to JMP 64bit direct
   1.526 +        directJmpAddr = origBytes + pJmp32 + 5 + (*((int32_t*)(origBytes + pJmp32 + 1)));
   1.527 +        // jmp 32bit offset
   1.528 +        nBytes += 5;
   1.529 +      } else if (origBytes[nBytes] == 0xff) {
   1.530 +        nBytes++;
   1.531 +        if ((origBytes[nBytes] & 0xf8) == 0xf0) {
   1.532 +          // push r64
   1.533 +          nBytes++;
   1.534 +        } else {
   1.535 +          return;
   1.536 +        }
   1.537 +      } else {
   1.538 +        return;
   1.539 +      }
   1.540 +    }
   1.541 +#else
   1.542 +#error "Unknown processor type"
   1.543 +#endif
   1.544 +
   1.545 +    if (nBytes > 100) {
   1.546 +      //printf ("Too big!");
   1.547 +      return;
   1.548 +    }
   1.549 +
   1.550 +    // We keep the address of the original function in the first bytes of
   1.551 +    // the trampoline buffer
   1.552 +    *((void **)tramp) = origFunction;
   1.553 +    tramp += sizeof(void *);
   1.554 +
   1.555 +    memcpy(tramp, origFunction, nBytes);
   1.556 +
   1.557 +    // OrigFunction+N, the target of the trampoline
   1.558 +    byteptr_t trampDest = origBytes + nBytes;
   1.559 +
   1.560 +#if defined(_M_IX86)
   1.561 +    if (pJmp32 >= 0) {
   1.562 +      // Jump directly to the original target of the jump instead of jumping to the
   1.563 +      // original function.
   1.564 +      // Adjust jump target displacement to jump location in the trampoline.
   1.565 +      *((intptr_t*)(tramp+pJmp32+1)) += origBytes - tramp;
   1.566 +    } else {
   1.567 +      tramp[nBytes] = 0xE9; // jmp
   1.568 +      *((intptr_t*)(tramp+nBytes+1)) = (intptr_t)trampDest - (intptr_t)(tramp+nBytes+5); // target displacement
   1.569 +    }
   1.570 +#elif defined(_M_X64)
   1.571 +    // If JMP32 opcode found, we don't insert to trampoline jump 
   1.572 +    if (pJmp32 >= 0) {
   1.573 +      // mov r11, address
   1.574 +      tramp[pJmp32]   = 0x49;
   1.575 +      tramp[pJmp32+1] = 0xbb;
   1.576 +      *((intptr_t*)(tramp+pJmp32+2)) = (intptr_t)directJmpAddr;
   1.577 +
   1.578 +      // jmp r11
   1.579 +      tramp[pJmp32+10] = 0x41;
   1.580 +      tramp[pJmp32+11] = 0xff;
   1.581 +      tramp[pJmp32+12] = 0xe3;
   1.582 +    } else {
   1.583 +      // mov r11, address
   1.584 +      tramp[nBytes] = 0x49;
   1.585 +      tramp[nBytes+1] = 0xbb;
   1.586 +      *((intptr_t*)(tramp+nBytes+2)) = (intptr_t)trampDest;
   1.587 +
   1.588 +      // jmp r11
   1.589 +      tramp[nBytes+10] = 0x41;
   1.590 +      tramp[nBytes+11] = 0xff;
   1.591 +      tramp[nBytes+12] = 0xe3;
   1.592 +    }
   1.593 +#endif
   1.594 +
   1.595 +    // The trampoline is now valid.
   1.596 +    *outTramp = tramp;
   1.597 +
   1.598 +    // ensure we can modify the original code
   1.599 +    DWORD op;
   1.600 +    if (!VirtualProtectEx(GetCurrentProcess(), origFunction, nBytes, PAGE_EXECUTE_READWRITE, &op)) {
   1.601 +      //printf ("VirtualProtectEx failed! %d\n", GetLastError());
   1.602 +      return;
   1.603 +    }
   1.604 +
   1.605 +#if defined(_M_IX86)
   1.606 +    // now modify the original bytes
   1.607 +    origBytes[0] = 0xE9; // jmp
   1.608 +    *((intptr_t*)(origBytes+1)) = dest - (intptr_t)(origBytes+5); // target displacement
   1.609 +#elif defined(_M_X64)
   1.610 +    // mov r11, address
   1.611 +    origBytes[0] = 0x49;
   1.612 +    origBytes[1] = 0xbb;
   1.613 +
   1.614 +    *((intptr_t*)(origBytes+2)) = dest;
   1.615 +
   1.616 +    // jmp r11
   1.617 +    origBytes[10] = 0x41;
   1.618 +    origBytes[11] = 0xff;
   1.619 +    origBytes[12] = 0xe3;
   1.620 +#endif
   1.621 +
   1.622 +    // restore protection; if this fails we can't really do anything about it
   1.623 +    VirtualProtectEx(GetCurrentProcess(), origFunction, nBytes, op, &op);
   1.624 +  }
   1.625 +
   1.626 +  byteptr_t FindTrampolineSpace()
   1.627 +  {
   1.628 +    if (mCurHooks >= mMaxHooks)
   1.629 +      return 0;
   1.630 +
   1.631 +    byteptr_t p = mHookPage + mCurHooks*kHookSize;
   1.632 +
   1.633 +    mCurHooks++;
   1.634 +
   1.635 +    return p;
   1.636 +  }
   1.637 +};
   1.638 +
   1.639 +} // namespace internal
   1.640 +
   1.641 +class WindowsDllInterceptor
   1.642 +{
   1.643 +  internal::WindowsDllNopSpacePatcher mNopSpacePatcher;
   1.644 +  internal::WindowsDllDetourPatcher mDetourPatcher;
   1.645 +
   1.646 +  const char *mModuleName;
   1.647 +  int mNHooks;
   1.648 +
   1.649 +public:
   1.650 +  WindowsDllInterceptor()
   1.651 +    : mModuleName(nullptr)
   1.652 +    , mNHooks(0)
   1.653 +  {}
   1.654 +
   1.655 +  void Init(const char *moduleName, int nhooks = 0)
   1.656 +  {
   1.657 +    if (mModuleName) {
   1.658 +      return;
   1.659 +    }
   1.660 +
   1.661 +    mModuleName = moduleName;
   1.662 +    mNHooks = nhooks;
   1.663 +    mNopSpacePatcher.Init(moduleName);
   1.664 +
   1.665 +    // Lazily initialize mDetourPatcher, since it allocates memory and we might
   1.666 +    // not need it.
   1.667 +  }
   1.668 +
   1.669 +  void LockHooks()
   1.670 +  {
   1.671 +    if (mDetourPatcher.Initialized())
   1.672 +      mDetourPatcher.LockHooks();
   1.673 +  }
   1.674 +
   1.675 +  bool AddHook(const char *pname, intptr_t hookDest, void **origFunc)
   1.676 +  {
   1.677 +    // Use a nop space patch if possible, otherwise fall back to a detour.
   1.678 +    // This should be the preferred method for adding hooks.
   1.679 +
   1.680 +    if (!mModuleName) {
   1.681 +      return false;
   1.682 +    }
   1.683 +
   1.684 +    if (mNopSpacePatcher.AddHook(pname, hookDest, origFunc)) {
   1.685 +      return true;
   1.686 +    }
   1.687 +
   1.688 +    return AddDetour(pname, hookDest, origFunc);
   1.689 +  }
   1.690 +
   1.691 +  bool AddDetour(const char *pname, intptr_t hookDest, void **origFunc)
   1.692 +  {
   1.693 +    // Generally, code should not call this method directly. Use AddHook unless
   1.694 +    // there is a specific need to avoid nop space patches.
   1.695 +
   1.696 +    if (!mModuleName) {
   1.697 +      return false;
   1.698 +    }
   1.699 +
   1.700 +    if (!mDetourPatcher.Initialized()) {
   1.701 +      mDetourPatcher.Init(mModuleName, mNHooks);
   1.702 +    }
   1.703 +
   1.704 +    return mDetourPatcher.AddHook(pname, hookDest, origFunc);
   1.705 +  }
   1.706 +};
   1.707 +
   1.708 +} // namespace mozilla
   1.709 +
   1.710 +#endif /* NS_WINDOWS_DLL_INTERCEPTOR_H_ */

mercurial