xpcom/build/nsWindowsDllInterceptor.h

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:b95aac196755
1 /* -*- Mode: C++; tab-width: 40; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 #ifndef NS_WINDOWS_DLL_INTERCEPTOR_H_
7 #define NS_WINDOWS_DLL_INTERCEPTOR_H_
8 #include <windows.h>
9 #include <winternl.h>
10
11 /*
12 * Simple function interception.
13 *
14 * We have two separate mechanisms for intercepting a function: We can use the
15 * built-in nop space, if it exists, or we can create a detour.
16 *
17 * Using the built-in nop space works as follows: On x86-32, DLL functions
18 * begin with a two-byte nop (mov edi, edi) and are preceeded by five bytes of
19 * NOP instructions.
20 *
21 * When we detect a function with this prelude, we do the following:
22 *
23 * 1. Write a long jump to our interceptor function into the five bytes of NOPs
24 * before the function.
25 *
26 * 2. Write a short jump -5 into the two-byte nop at the beginning of the function.
27 *
28 * This mechanism is nice because it's thread-safe. It's even safe to do if
29 * another thread is currently running the function we're modifying!
30 *
31 * When the WindowsDllNopSpacePatcher is destroyed, we overwrite the short jump
32 * but not the long jump, so re-intercepting the same function won't work,
33 * because its prelude won't match.
34 *
35 *
36 * Unfortunately nop space patching doesn't work on functions which don't have
37 * this magic prelude (and in particular, x86-64 never has the prelude). So
38 * when we can't use the built-in nop space, we fall back to using a detour,
39 * which works as follows:
40 *
41 * 1. Save first N bytes of OrigFunction to trampoline, where N is a
42 * number of bytes >= 5 that are instruction aligned.
43 *
44 * 2. Replace first 5 bytes of OrigFunction with a jump to the Hook
45 * function.
46 *
47 * 3. After N bytes of the trampoline, add a jump to OrigFunction+N to
48 * continue original program flow.
49 *
50 * 4. Hook function needs to call the trampoline during its execution,
51 * to invoke the original function (so address of trampoline is
52 * returned).
53 *
54 * When the WindowsDllDetourPatcher object is destructed, OrigFunction is
55 * patched again to jump directly to the trampoline instead of going through
56 * the hook function. As such, re-intercepting the same function won't work, as
57 * jump instructions are not supported.
58 *
59 * Note that this is not thread-safe. Sad day.
60 *
61 */
62
63 #include <stdint.h>
64
65 namespace mozilla {
66 namespace internal {
67
68 class WindowsDllNopSpacePatcher
69 {
70 typedef unsigned char *byteptr_t;
71 HMODULE mModule;
72
73 // Dumb array for remembering the addresses of functions we've patched.
74 // (This should be nsTArray, but non-XPCOM code uses this class.)
75 static const size_t maxPatchedFns = 128;
76 byteptr_t mPatchedFns[maxPatchedFns];
77 int mPatchedFnsLen;
78
79 public:
80 WindowsDllNopSpacePatcher()
81 : mModule(0)
82 , mPatchedFnsLen(0)
83 {}
84
85 ~WindowsDllNopSpacePatcher()
86 {
87 // Restore the mov edi, edi to the beginning of each function we patched.
88
89 for (int i = 0; i < mPatchedFnsLen; i++) {
90 byteptr_t fn = mPatchedFns[i];
91
92 // Ensure we can write to the code.
93 DWORD op;
94 if (!VirtualProtectEx(GetCurrentProcess(), fn, 2, PAGE_EXECUTE_READWRITE, &op)) {
95 // printf("VirtualProtectEx failed! %d\n", GetLastError());
96 continue;
97 }
98
99 // mov edi, edi
100 *((uint16_t*)fn) = 0xff8b;
101
102 // Restore the old protection.
103 VirtualProtectEx(GetCurrentProcess(), fn, 2, op, &op);
104
105 // I don't think this is actually necessary, but it can't hurt.
106 FlushInstructionCache(GetCurrentProcess(),
107 /* ignored */ nullptr,
108 /* ignored */ 0);
109 }
110 }
111
112 void Init(const char *modulename)
113 {
114 mModule = LoadLibraryExA(modulename, nullptr, 0);
115 if (!mModule) {
116 //printf("LoadLibraryEx for '%s' failed\n", modulename);
117 return;
118 }
119 }
120
121 #if defined(_M_IX86)
122 bool AddHook(const char *pname, intptr_t hookDest, void **origFunc)
123 {
124 if (!mModule)
125 return false;
126
127 if (mPatchedFnsLen == maxPatchedFns) {
128 // printf ("No space for hook in mPatchedFns.\n");
129 return false;
130 }
131
132 byteptr_t fn = reinterpret_cast<byteptr_t>(GetProcAddress(mModule, pname));
133 if (!fn) {
134 //printf ("GetProcAddress failed\n");
135 return false;
136 }
137
138 // Ensure we can read and write starting at fn - 5 (for the long jmp we're
139 // going to write) and ending at fn + 2 (for the short jmp up to the long
140 // jmp).
141 DWORD op;
142 if (!VirtualProtectEx(GetCurrentProcess(), fn - 5, 7, PAGE_EXECUTE_READWRITE, &op)) {
143 //printf ("VirtualProtectEx failed! %d\n", GetLastError());
144 return false;
145 }
146
147 bool rv = WriteHook(fn, hookDest, origFunc);
148
149 // Re-protect, and we're done.
150 VirtualProtectEx(GetCurrentProcess(), fn - 5, 7, op, &op);
151
152 if (rv) {
153 mPatchedFns[mPatchedFnsLen] = fn;
154 mPatchedFnsLen++;
155 }
156
157 return rv;
158 }
159
160 bool WriteHook(byteptr_t fn, intptr_t hookDest, void **origFunc)
161 {
162 // Check that the 5 bytes before fn are NOP's or INT 3's,
163 // and that the 2 bytes after fn are mov(edi, edi).
164 //
165 // It's safe to read fn[-5] because we set it to PAGE_EXECUTE_READWRITE
166 // before calling WriteHook.
167
168 for (int i = -5; i <= -1; i++) {
169 if (fn[i] != 0x90 && fn[i] != 0xcc) // nop or int 3
170 return false;
171 }
172
173 // mov edi, edi. Yes, there are two ways to encode the same thing:
174 //
175 // 0x89ff == mov r/m, r
176 // 0x8bff == mov r, r/m
177 //
178 // where "r" is register and "r/m" is register or memory. Windows seems to
179 // use 8bff; I include 89ff out of paranoia.
180 if ((fn[0] != 0x8b && fn[0] != 0x89) || fn[1] != 0xff) {
181 return false;
182 }
183
184 // Write a long jump into the space above the function.
185 fn[-5] = 0xe9; // jmp
186 *((intptr_t*)(fn - 4)) = hookDest - (uintptr_t)(fn); // target displacement
187
188 // Set origFunc here, because after this point, hookDest might be called,
189 // and hookDest might use the origFunc pointer.
190 *origFunc = fn + 2;
191
192 // Short jump up into our long jump.
193 *((uint16_t*)(fn)) = 0xf9eb; // jmp $-5
194
195 // I think this routine is safe without this, but it can't hurt.
196 FlushInstructionCache(GetCurrentProcess(),
197 /* ignored */ nullptr,
198 /* ignored */ 0);
199
200 return true;
201 }
202 #else
203 bool AddHook(const char *pname, intptr_t hookDest, void **origFunc)
204 {
205 // Not implemented except on x86-32.
206 return false;
207 }
208 #endif
209 };
210
211 class WindowsDllDetourPatcher
212 {
213 typedef unsigned char *byteptr_t;
214 public:
215 WindowsDllDetourPatcher()
216 : mModule(0), mHookPage(0), mMaxHooks(0), mCurHooks(0)
217 {
218 }
219
220 ~WindowsDllDetourPatcher()
221 {
222 int i;
223 byteptr_t p;
224 for (i = 0, p = mHookPage; i < mCurHooks; i++, p += kHookSize) {
225 #if defined(_M_IX86)
226 size_t nBytes = 1 + sizeof(intptr_t);
227 #elif defined(_M_X64)
228 size_t nBytes = 2 + sizeof(intptr_t);
229 #else
230 #error "Unknown processor type"
231 #endif
232 byteptr_t origBytes = *((byteptr_t *)p);
233 // ensure we can modify the original code
234 DWORD op;
235 if (!VirtualProtectEx(GetCurrentProcess(), origBytes, nBytes, PAGE_EXECUTE_READWRITE, &op)) {
236 //printf ("VirtualProtectEx failed! %d\n", GetLastError());
237 continue;
238 }
239 // Remove the hook by making the original function jump directly
240 // in the trampoline.
241 intptr_t dest = (intptr_t)(p + sizeof(void *));
242 #if defined(_M_IX86)
243 *((intptr_t*)(origBytes+1)) = dest - (intptr_t)(origBytes+5); // target displacement
244 #elif defined(_M_X64)
245 *((intptr_t*)(origBytes+2)) = dest;
246 #else
247 #error "Unknown processor type"
248 #endif
249 // restore protection; if this fails we can't really do anything about it
250 VirtualProtectEx(GetCurrentProcess(), origBytes, nBytes, op, &op);
251 }
252 }
253
254 void Init(const char *modulename, int nhooks = 0)
255 {
256 if (mModule)
257 return;
258
259 mModule = LoadLibraryExA(modulename, nullptr, 0);
260 if (!mModule) {
261 //printf("LoadLibraryEx for '%s' failed\n", modulename);
262 return;
263 }
264
265 int hooksPerPage = 4096 / kHookSize;
266 if (nhooks == 0)
267 nhooks = hooksPerPage;
268
269 mMaxHooks = nhooks + (hooksPerPage % nhooks);
270
271 mHookPage = (byteptr_t) VirtualAllocEx(GetCurrentProcess(), nullptr,
272 mMaxHooks * kHookSize,
273 MEM_COMMIT | MEM_RESERVE,
274 PAGE_EXECUTE_READWRITE);
275
276 if (!mHookPage) {
277 mModule = 0;
278 return;
279 }
280 }
281
282 bool Initialized()
283 {
284 return !!mModule;
285 }
286
287 void LockHooks()
288 {
289 if (!mModule)
290 return;
291
292 DWORD op;
293 VirtualProtectEx(GetCurrentProcess(), mHookPage, mMaxHooks * kHookSize, PAGE_EXECUTE_READ, &op);
294
295 mModule = 0;
296 }
297
298 bool AddHook(const char *pname, intptr_t hookDest, void **origFunc)
299 {
300 if (!mModule)
301 return false;
302
303 void *pAddr = (void *) GetProcAddress(mModule, pname);
304 if (!pAddr) {
305 //printf ("GetProcAddress failed\n");
306 return false;
307 }
308
309 CreateTrampoline(pAddr, hookDest, origFunc);
310 if (!*origFunc) {
311 //printf ("CreateTrampoline failed\n");
312 return false;
313 }
314
315 return true;
316 }
317
318 protected:
319 const static int kPageSize = 4096;
320 const static int kHookSize = 128;
321
322 HMODULE mModule;
323 byteptr_t mHookPage;
324 int mMaxHooks;
325 int mCurHooks;
326
327 void CreateTrampoline(void *origFunction,
328 intptr_t dest,
329 void **outTramp)
330 {
331 *outTramp = nullptr;
332
333 byteptr_t tramp = FindTrampolineSpace();
334 if (!tramp)
335 return;
336
337 byteptr_t origBytes = (byteptr_t) origFunction;
338
339 int nBytes = 0;
340 int pJmp32 = -1;
341
342 #if defined(_M_IX86)
343 while (nBytes < 5) {
344 // Understand some simple instructions that might be found in a
345 // prologue; we might need to extend this as necessary.
346 //
347 // Note! If we ever need to understand jump instructions, we'll
348 // need to rewrite the displacement argument.
349 if (origBytes[nBytes] >= 0x88 && origBytes[nBytes] <= 0x8B) {
350 // various MOVs
351 unsigned char b = origBytes[nBytes+1];
352 if (((b & 0xc0) == 0xc0) ||
353 (((b & 0xc0) == 0x00) &&
354 ((b & 0x07) != 0x04) && ((b & 0x07) != 0x05)))
355 {
356 // REG=r, R/M=r or REG=r, R/M=[r]
357 nBytes += 2;
358 } else if (((b & 0xc0) == 0x40) && ((b & 0x38) != 0x20)) {
359 // REG=r, R/M=[r + disp8]
360 nBytes += 3;
361 } else {
362 // complex MOV, bail
363 return;
364 }
365 } else if (origBytes[nBytes] == 0xB8) {
366 // MOV 0xB8: http://ref.x86asm.net/coder32.html#xB8
367 nBytes += 5;
368 } else if (origBytes[nBytes] == 0x83) {
369 // ADD|ODR|ADC|SBB|AND|SUB|XOR|CMP r/m, imm8
370 unsigned char b = origBytes[nBytes+1];
371 if ((b & 0xc0) == 0xc0) {
372 // ADD|ODR|ADC|SBB|AND|SUB|XOR|CMP r, imm8
373 nBytes += 3;
374 } else {
375 // bail
376 return;
377 }
378 } else if (origBytes[nBytes] == 0x68) {
379 // PUSH with 4-byte operand
380 nBytes += 5;
381 } else if ((origBytes[nBytes] & 0xf0) == 0x50) {
382 // 1-byte PUSH/POP
383 nBytes++;
384 } else if (origBytes[nBytes] == 0x6A) {
385 // PUSH imm8
386 nBytes += 2;
387 } else if (origBytes[nBytes] == 0xe9) {
388 pJmp32 = nBytes;
389 // jmp 32bit offset
390 nBytes += 5;
391 } else {
392 //printf ("Unknown x86 instruction byte 0x%02x, aborting trampoline\n", origBytes[nBytes]);
393 return;
394 }
395 }
396 #elif defined(_M_X64)
397 byteptr_t directJmpAddr;
398
399 while (nBytes < 13) {
400
401 // if found JMP 32bit offset, next bytes must be NOP
402 if (pJmp32 >= 0) {
403 if (origBytes[nBytes++] != 0x90)
404 return;
405
406 continue;
407 }
408 if (origBytes[nBytes] == 0x0f) {
409 nBytes++;
410 if (origBytes[nBytes] == 0x1f) {
411 // nop (multibyte)
412 nBytes++;
413 if ((origBytes[nBytes] & 0xc0) == 0x40 &&
414 (origBytes[nBytes] & 0x7) == 0x04) {
415 nBytes += 3;
416 } else {
417 return;
418 }
419 } else if (origBytes[nBytes] == 0x05) {
420 // syscall
421 nBytes++;
422 } else {
423 return;
424 }
425 } else if (origBytes[nBytes] == 0x41) {
426 // REX.B
427 nBytes++;
428
429 if ((origBytes[nBytes] & 0xf0) == 0x50) {
430 // push/pop with Rx register
431 nBytes++;
432 } else if (origBytes[nBytes] >= 0xb8 && origBytes[nBytes] <= 0xbf) {
433 // mov r32, imm32
434 nBytes += 5;
435 } else {
436 return;
437 }
438 } else if (origBytes[nBytes] == 0x45) {
439 // REX.R & REX.B
440 nBytes++;
441
442 if (origBytes[nBytes] == 0x33) {
443 // xor r32, r32
444 nBytes += 2;
445 } else {
446 return;
447 }
448 } else if ((origBytes[nBytes] & 0xfb) == 0x48) {
449 // REX.W | REX.WR
450 nBytes++;
451
452 if (origBytes[nBytes] == 0x81 && (origBytes[nBytes+1] & 0xf8) == 0xe8) {
453 // sub r, dword
454 nBytes += 6;
455 } else if (origBytes[nBytes] == 0x83 &&
456 (origBytes[nBytes+1] & 0xf8) == 0xe8) {
457 // sub r, byte
458 nBytes += 3;
459 } else if (origBytes[nBytes] == 0x83 &&
460 (origBytes[nBytes+1] & 0xf8) == 0x60) {
461 // and [r+d], imm8
462 nBytes += 5;
463 } else if ((origBytes[nBytes] & 0xfd) == 0x89) {
464 // MOV r/m64, r64 | MOV r64, r/m64
465 if ((origBytes[nBytes+1] & 0xc0) == 0x40) {
466 if ((origBytes[nBytes+1] & 0x7) == 0x04) {
467 // R/M=[SIB+disp8], REG=r64
468 nBytes += 4;
469 } else {
470 // R/M=[r64+disp8], REG=r64
471 nBytes += 3;
472 }
473 } else if (((origBytes[nBytes+1] & 0xc0) == 0xc0) ||
474 (((origBytes[nBytes+1] & 0xc0) == 0x00) &&
475 ((origBytes[nBytes+1] & 0x07) != 0x04) && ((origBytes[nBytes+1] & 0x07) != 0x05))) {
476 // REG=r64, R/M=r64 or REG=r64, R/M=[r64]
477 nBytes += 2;
478 } else {
479 // complex MOV
480 return;
481 }
482 } else if (origBytes[nBytes] == 0xc7) {
483 // MOV r/m64, imm32
484 if (origBytes[nBytes + 1] == 0x44) {
485 // MOV [r64+disp8], imm32
486 // ModR/W + SIB + disp8 + imm32
487 nBytes += 8;
488 } else {
489 return;
490 }
491 } else if (origBytes[nBytes] == 0xff) {
492 pJmp32 = nBytes - 1;
493 // JMP /4
494 if ((origBytes[nBytes+1] & 0xc0) == 0x0 &&
495 (origBytes[nBytes+1] & 0x07) == 0x5) {
496 // [rip+disp32]
497 // convert JMP 32bit offset to JMP 64bit direct
498 directJmpAddr = (byteptr_t)*((uint64_t*)(origBytes + nBytes + 6 + (*((int32_t*)(origBytes + nBytes + 2)))));
499 nBytes += 6;
500 } else {
501 // not support yet!
502 return;
503 }
504 } else {
505 // not support yet!
506 return;
507 }
508 } else if ((origBytes[nBytes] & 0xf0) == 0x50) {
509 // 1-byte push/pop
510 nBytes++;
511 } else if (origBytes[nBytes] == 0x90) {
512 // nop
513 nBytes++;
514 } else if (origBytes[nBytes] == 0xb8) {
515 // MOV 0xB8: http://ref.x86asm.net/coder32.html#xB8
516 nBytes += 5;
517 } else if (origBytes[nBytes] == 0xc3) {
518 // ret
519 nBytes++;
520 } else if (origBytes[nBytes] == 0xe9) {
521 pJmp32 = nBytes;
522 // convert JMP 32bit offset to JMP 64bit direct
523 directJmpAddr = origBytes + pJmp32 + 5 + (*((int32_t*)(origBytes + pJmp32 + 1)));
524 // jmp 32bit offset
525 nBytes += 5;
526 } else if (origBytes[nBytes] == 0xff) {
527 nBytes++;
528 if ((origBytes[nBytes] & 0xf8) == 0xf0) {
529 // push r64
530 nBytes++;
531 } else {
532 return;
533 }
534 } else {
535 return;
536 }
537 }
538 #else
539 #error "Unknown processor type"
540 #endif
541
542 if (nBytes > 100) {
543 //printf ("Too big!");
544 return;
545 }
546
547 // We keep the address of the original function in the first bytes of
548 // the trampoline buffer
549 *((void **)tramp) = origFunction;
550 tramp += sizeof(void *);
551
552 memcpy(tramp, origFunction, nBytes);
553
554 // OrigFunction+N, the target of the trampoline
555 byteptr_t trampDest = origBytes + nBytes;
556
557 #if defined(_M_IX86)
558 if (pJmp32 >= 0) {
559 // Jump directly to the original target of the jump instead of jumping to the
560 // original function.
561 // Adjust jump target displacement to jump location in the trampoline.
562 *((intptr_t*)(tramp+pJmp32+1)) += origBytes - tramp;
563 } else {
564 tramp[nBytes] = 0xE9; // jmp
565 *((intptr_t*)(tramp+nBytes+1)) = (intptr_t)trampDest - (intptr_t)(tramp+nBytes+5); // target displacement
566 }
567 #elif defined(_M_X64)
568 // If JMP32 opcode found, we don't insert to trampoline jump
569 if (pJmp32 >= 0) {
570 // mov r11, address
571 tramp[pJmp32] = 0x49;
572 tramp[pJmp32+1] = 0xbb;
573 *((intptr_t*)(tramp+pJmp32+2)) = (intptr_t)directJmpAddr;
574
575 // jmp r11
576 tramp[pJmp32+10] = 0x41;
577 tramp[pJmp32+11] = 0xff;
578 tramp[pJmp32+12] = 0xe3;
579 } else {
580 // mov r11, address
581 tramp[nBytes] = 0x49;
582 tramp[nBytes+1] = 0xbb;
583 *((intptr_t*)(tramp+nBytes+2)) = (intptr_t)trampDest;
584
585 // jmp r11
586 tramp[nBytes+10] = 0x41;
587 tramp[nBytes+11] = 0xff;
588 tramp[nBytes+12] = 0xe3;
589 }
590 #endif
591
592 // The trampoline is now valid.
593 *outTramp = tramp;
594
595 // ensure we can modify the original code
596 DWORD op;
597 if (!VirtualProtectEx(GetCurrentProcess(), origFunction, nBytes, PAGE_EXECUTE_READWRITE, &op)) {
598 //printf ("VirtualProtectEx failed! %d\n", GetLastError());
599 return;
600 }
601
602 #if defined(_M_IX86)
603 // now modify the original bytes
604 origBytes[0] = 0xE9; // jmp
605 *((intptr_t*)(origBytes+1)) = dest - (intptr_t)(origBytes+5); // target displacement
606 #elif defined(_M_X64)
607 // mov r11, address
608 origBytes[0] = 0x49;
609 origBytes[1] = 0xbb;
610
611 *((intptr_t*)(origBytes+2)) = dest;
612
613 // jmp r11
614 origBytes[10] = 0x41;
615 origBytes[11] = 0xff;
616 origBytes[12] = 0xe3;
617 #endif
618
619 // restore protection; if this fails we can't really do anything about it
620 VirtualProtectEx(GetCurrentProcess(), origFunction, nBytes, op, &op);
621 }
622
623 byteptr_t FindTrampolineSpace()
624 {
625 if (mCurHooks >= mMaxHooks)
626 return 0;
627
628 byteptr_t p = mHookPage + mCurHooks*kHookSize;
629
630 mCurHooks++;
631
632 return p;
633 }
634 };
635
636 } // namespace internal
637
638 class WindowsDllInterceptor
639 {
640 internal::WindowsDllNopSpacePatcher mNopSpacePatcher;
641 internal::WindowsDllDetourPatcher mDetourPatcher;
642
643 const char *mModuleName;
644 int mNHooks;
645
646 public:
647 WindowsDllInterceptor()
648 : mModuleName(nullptr)
649 , mNHooks(0)
650 {}
651
652 void Init(const char *moduleName, int nhooks = 0)
653 {
654 if (mModuleName) {
655 return;
656 }
657
658 mModuleName = moduleName;
659 mNHooks = nhooks;
660 mNopSpacePatcher.Init(moduleName);
661
662 // Lazily initialize mDetourPatcher, since it allocates memory and we might
663 // not need it.
664 }
665
666 void LockHooks()
667 {
668 if (mDetourPatcher.Initialized())
669 mDetourPatcher.LockHooks();
670 }
671
672 bool AddHook(const char *pname, intptr_t hookDest, void **origFunc)
673 {
674 // Use a nop space patch if possible, otherwise fall back to a detour.
675 // This should be the preferred method for adding hooks.
676
677 if (!mModuleName) {
678 return false;
679 }
680
681 if (mNopSpacePatcher.AddHook(pname, hookDest, origFunc)) {
682 return true;
683 }
684
685 return AddDetour(pname, hookDest, origFunc);
686 }
687
688 bool AddDetour(const char *pname, intptr_t hookDest, void **origFunc)
689 {
690 // Generally, code should not call this method directly. Use AddHook unless
691 // there is a specific need to avoid nop space patches.
692
693 if (!mModuleName) {
694 return false;
695 }
696
697 if (!mDetourPatcher.Initialized()) {
698 mDetourPatcher.Init(mModuleName, mNHooks);
699 }
700
701 return mDetourPatcher.AddHook(pname, hookDest, origFunc);
702 }
703 };
704
705 } // namespace mozilla
706
707 #endif /* NS_WINDOWS_DLL_INTERCEPTOR_H_ */

mercurial