1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/js/src/jit/shared/MoveEmitter-x86-shared.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,433 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99: 1.6 + * This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include "jit/shared/MoveEmitter-x86-shared.h" 1.11 + 1.12 +using namespace js; 1.13 +using namespace js::jit; 1.14 + 1.15 +MoveEmitterX86::MoveEmitterX86(MacroAssemblerSpecific &masm) 1.16 + : inCycle_(false), 1.17 + masm(masm), 1.18 + pushedAtCycle_(-1) 1.19 +{ 1.20 + pushedAtStart_ = masm.framePushed(); 1.21 +} 1.22 + 1.23 +// Examine the cycle in moves starting at position i. Determine if it's a 1.24 +// simple cycle consisting of all register-to-register moves in a single class, 1.25 +// and whether it can be implemented entirely by swaps. 1.26 +size_t 1.27 +MoveEmitterX86::characterizeCycle(const MoveResolver &moves, size_t i, 1.28 + bool *allGeneralRegs, bool *allFloatRegs) 1.29 +{ 1.30 + size_t swapCount = 0; 1.31 + 1.32 + for (size_t j = i; ; j++) { 1.33 + const MoveOp &move = moves.getMove(j); 1.34 + 1.35 + // If it isn't a cycle of registers of the same kind, we won't be able 1.36 + // to optimize it. 1.37 + if (!move.to().isGeneralReg()) 1.38 + *allGeneralRegs = false; 1.39 + if (!move.to().isFloatReg()) 1.40 + *allFloatRegs = false; 1.41 + if (!*allGeneralRegs && !*allFloatRegs) 1.42 + return -1; 1.43 + 1.44 + // Stop iterating when we see the last one. 1.45 + if (j != i && move.isCycleEnd()) 1.46 + break; 1.47 + 1.48 + // Check that this move is actually part of the cycle. This is 1.49 + // over-conservative when there are multiple reads from the same source, 1.50 + // but that's expected to be rare. 1.51 + if (move.from() != moves.getMove(j + 1).to()) { 1.52 + *allGeneralRegs = false; 1.53 + *allFloatRegs = false; 1.54 + return -1; 1.55 + } 1.56 + 1.57 + swapCount++; 1.58 + } 1.59 + 1.60 + // Check that the last move cycles back to the first move. 1.61 + const MoveOp &move = moves.getMove(i + swapCount); 1.62 + if (move.from() != moves.getMove(i).to()) { 1.63 + *allGeneralRegs = false; 1.64 + *allFloatRegs = false; 1.65 + return -1; 1.66 + } 1.67 + 1.68 + return swapCount; 1.69 +} 1.70 + 1.71 +// If we can emit optimized code for the cycle in moves starting at position i, 1.72 +// do so, and return true. 1.73 +bool 1.74 +MoveEmitterX86::maybeEmitOptimizedCycle(const MoveResolver &moves, size_t i, 1.75 + bool allGeneralRegs, bool allFloatRegs, size_t swapCount) 1.76 +{ 1.77 + if (allGeneralRegs && swapCount <= 2) { 1.78 + // Use x86's swap-integer-registers instruction if we only have a few 1.79 + // swaps. (x86 also has a swap between registers and memory but it's 1.80 + // slow.) 1.81 + for (size_t k = 0; k < swapCount; k++) 1.82 + masm.xchg(moves.getMove(i + k).to().reg(), moves.getMove(i + k + 1).to().reg()); 1.83 + return true; 1.84 + } 1.85 + 1.86 + if (allFloatRegs && swapCount == 1) { 1.87 + // There's no xchg for xmm registers, but if we only need a single swap, 1.88 + // it's cheap to do an XOR swap. 1.89 + FloatRegister a = moves.getMove(i).to().floatReg(); 1.90 + FloatRegister b = moves.getMove(i + 1).to().floatReg(); 1.91 + masm.xorpd(a, b); 1.92 + masm.xorpd(b, a); 1.93 + masm.xorpd(a, b); 1.94 + return true; 1.95 + } 1.96 + 1.97 + return false; 1.98 +} 1.99 + 1.100 +void 1.101 +MoveEmitterX86::emit(const MoveResolver &moves) 1.102 +{ 1.103 + for (size_t i = 0; i < moves.numMoves(); i++) { 1.104 + const MoveOp &move = moves.getMove(i); 1.105 + const MoveOperand &from = move.from(); 1.106 + const MoveOperand &to = move.to(); 1.107 + 1.108 + if (move.isCycleEnd()) { 1.109 + JS_ASSERT(inCycle_); 1.110 + completeCycle(to, move.type()); 1.111 + inCycle_ = false; 1.112 + continue; 1.113 + } 1.114 + 1.115 + if (move.isCycleBegin()) { 1.116 + JS_ASSERT(!inCycle_); 1.117 + 1.118 + // Characterize the cycle. 1.119 + bool allGeneralRegs = true, allFloatRegs = true; 1.120 + size_t swapCount = characterizeCycle(moves, i, &allGeneralRegs, &allFloatRegs); 1.121 + 1.122 + // Attempt to optimize it to avoid using the stack. 1.123 + if (maybeEmitOptimizedCycle(moves, i, allGeneralRegs, allFloatRegs, swapCount)) { 1.124 + i += swapCount; 1.125 + continue; 1.126 + } 1.127 + 1.128 + // Otherwise use the stack. 1.129 + breakCycle(to, move.endCycleType()); 1.130 + inCycle_ = true; 1.131 + } 1.132 + 1.133 + // A normal move which is not part of a cycle. 1.134 + switch (move.type()) { 1.135 + case MoveOp::FLOAT32: 1.136 + emitFloat32Move(from, to); 1.137 + break; 1.138 + case MoveOp::DOUBLE: 1.139 + emitDoubleMove(from, to); 1.140 + break; 1.141 + case MoveOp::INT32: 1.142 + emitInt32Move(from, to); 1.143 + break; 1.144 + case MoveOp::GENERAL: 1.145 + emitGeneralMove(from, to); 1.146 + break; 1.147 + default: 1.148 + MOZ_ASSUME_UNREACHABLE("Unexpected move type"); 1.149 + } 1.150 + } 1.151 +} 1.152 + 1.153 +MoveEmitterX86::~MoveEmitterX86() 1.154 +{ 1.155 + assertDone(); 1.156 +} 1.157 + 1.158 +Address 1.159 +MoveEmitterX86::cycleSlot() 1.160 +{ 1.161 + if (pushedAtCycle_ == -1) { 1.162 + // Reserve stack for cycle resolution 1.163 + masm.reserveStack(sizeof(double)); 1.164 + pushedAtCycle_ = masm.framePushed(); 1.165 + } 1.166 + 1.167 + return Address(StackPointer, masm.framePushed() - pushedAtCycle_); 1.168 +} 1.169 + 1.170 +Address 1.171 +MoveEmitterX86::toAddress(const MoveOperand &operand) const 1.172 +{ 1.173 + if (operand.base() != StackPointer) 1.174 + return Address(operand.base(), operand.disp()); 1.175 + 1.176 + JS_ASSERT(operand.disp() >= 0); 1.177 + 1.178 + // Otherwise, the stack offset may need to be adjusted. 1.179 + return Address(StackPointer, operand.disp() + (masm.framePushed() - pushedAtStart_)); 1.180 +} 1.181 + 1.182 +// Warning, do not use the resulting operand with pop instructions, since they 1.183 +// compute the effective destination address after altering the stack pointer. 1.184 +// Use toPopOperand if an Operand is needed for a pop. 1.185 +Operand 1.186 +MoveEmitterX86::toOperand(const MoveOperand &operand) const 1.187 +{ 1.188 + if (operand.isMemoryOrEffectiveAddress()) 1.189 + return Operand(toAddress(operand)); 1.190 + if (operand.isGeneralReg()) 1.191 + return Operand(operand.reg()); 1.192 + 1.193 + JS_ASSERT(operand.isFloatReg()); 1.194 + return Operand(operand.floatReg()); 1.195 +} 1.196 + 1.197 +// This is the same as toOperand except that it computes an Operand suitable for 1.198 +// use in a pop. 1.199 +Operand 1.200 +MoveEmitterX86::toPopOperand(const MoveOperand &operand) const 1.201 +{ 1.202 + if (operand.isMemory()) { 1.203 + if (operand.base() != StackPointer) 1.204 + return Operand(operand.base(), operand.disp()); 1.205 + 1.206 + JS_ASSERT(operand.disp() >= 0); 1.207 + 1.208 + // Otherwise, the stack offset may need to be adjusted. 1.209 + // Note the adjustment by the stack slot here, to offset for the fact that pop 1.210 + // computes its effective address after incrementing the stack pointer. 1.211 + return Operand(StackPointer, 1.212 + operand.disp() + (masm.framePushed() - sizeof(void *) - pushedAtStart_)); 1.213 + } 1.214 + if (operand.isGeneralReg()) 1.215 + return Operand(operand.reg()); 1.216 + 1.217 + JS_ASSERT(operand.isFloatReg()); 1.218 + return Operand(operand.floatReg()); 1.219 +} 1.220 + 1.221 +void 1.222 +MoveEmitterX86::breakCycle(const MoveOperand &to, MoveOp::Type type) 1.223 +{ 1.224 + // There is some pattern: 1.225 + // (A -> B) 1.226 + // (B -> A) 1.227 + // 1.228 + // This case handles (A -> B), which we reach first. We save B, then allow 1.229 + // the original move to continue. 1.230 + switch (type) { 1.231 + case MoveOp::FLOAT32: 1.232 + if (to.isMemory()) { 1.233 + masm.loadFloat32(toAddress(to), ScratchFloatReg); 1.234 + masm.storeFloat32(ScratchFloatReg, cycleSlot()); 1.235 + } else { 1.236 + masm.storeFloat32(to.floatReg(), cycleSlot()); 1.237 + } 1.238 + break; 1.239 + case MoveOp::DOUBLE: 1.240 + if (to.isMemory()) { 1.241 + masm.loadDouble(toAddress(to), ScratchFloatReg); 1.242 + masm.storeDouble(ScratchFloatReg, cycleSlot()); 1.243 + } else { 1.244 + masm.storeDouble(to.floatReg(), cycleSlot()); 1.245 + } 1.246 + break; 1.247 +#ifdef JS_CODEGEN_X64 1.248 + case MoveOp::INT32: 1.249 + // x64 can't pop to a 32-bit destination, so don't push. 1.250 + if (to.isMemory()) { 1.251 + masm.load32(toAddress(to), ScratchReg); 1.252 + masm.store32(ScratchReg, cycleSlot()); 1.253 + } else { 1.254 + masm.store32(to.reg(), cycleSlot()); 1.255 + } 1.256 + break; 1.257 +#endif 1.258 +#ifndef JS_CODEGEN_X64 1.259 + case MoveOp::INT32: 1.260 +#endif 1.261 + case MoveOp::GENERAL: 1.262 + masm.Push(toOperand(to)); 1.263 + break; 1.264 + default: 1.265 + MOZ_ASSUME_UNREACHABLE("Unexpected move type"); 1.266 + } 1.267 +} 1.268 + 1.269 +void 1.270 +MoveEmitterX86::completeCycle(const MoveOperand &to, MoveOp::Type type) 1.271 +{ 1.272 + // There is some pattern: 1.273 + // (A -> B) 1.274 + // (B -> A) 1.275 + // 1.276 + // This case handles (B -> A), which we reach last. We emit a move from the 1.277 + // saved value of B, to A. 1.278 + switch (type) { 1.279 + case MoveOp::FLOAT32: 1.280 + JS_ASSERT(pushedAtCycle_ != -1); 1.281 + JS_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(float)); 1.282 + if (to.isMemory()) { 1.283 + masm.loadFloat32(cycleSlot(), ScratchFloatReg); 1.284 + masm.storeFloat32(ScratchFloatReg, toAddress(to)); 1.285 + } else { 1.286 + masm.loadFloat32(cycleSlot(), to.floatReg()); 1.287 + } 1.288 + break; 1.289 + case MoveOp::DOUBLE: 1.290 + JS_ASSERT(pushedAtCycle_ != -1); 1.291 + JS_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(double)); 1.292 + if (to.isMemory()) { 1.293 + masm.loadDouble(cycleSlot(), ScratchFloatReg); 1.294 + masm.storeDouble(ScratchFloatReg, toAddress(to)); 1.295 + } else { 1.296 + masm.loadDouble(cycleSlot(), to.floatReg()); 1.297 + } 1.298 + break; 1.299 +#ifdef JS_CODEGEN_X64 1.300 + case MoveOp::INT32: 1.301 + JS_ASSERT(pushedAtCycle_ != -1); 1.302 + JS_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(int32_t)); 1.303 + // x64 can't pop to a 32-bit destination. 1.304 + if (to.isMemory()) { 1.305 + masm.load32(cycleSlot(), ScratchReg); 1.306 + masm.store32(ScratchReg, toAddress(to)); 1.307 + } else { 1.308 + masm.load32(cycleSlot(), to.reg()); 1.309 + } 1.310 + break; 1.311 +#endif 1.312 +#ifndef JS_CODEGEN_X64 1.313 + case MoveOp::INT32: 1.314 +#endif 1.315 + case MoveOp::GENERAL: 1.316 + JS_ASSERT(masm.framePushed() - pushedAtStart_ >= sizeof(intptr_t)); 1.317 + masm.Pop(toPopOperand(to)); 1.318 + break; 1.319 + default: 1.320 + MOZ_ASSUME_UNREACHABLE("Unexpected move type"); 1.321 + } 1.322 +} 1.323 + 1.324 +void 1.325 +MoveEmitterX86::emitInt32Move(const MoveOperand &from, const MoveOperand &to) 1.326 +{ 1.327 + if (from.isGeneralReg()) { 1.328 + masm.move32(from.reg(), toOperand(to)); 1.329 + } else if (to.isGeneralReg()) { 1.330 + JS_ASSERT(from.isMemory()); 1.331 + masm.load32(toAddress(from), to.reg()); 1.332 + } else { 1.333 + // Memory to memory gpr move. 1.334 + JS_ASSERT(from.isMemory()); 1.335 +#ifdef JS_CODEGEN_X64 1.336 + // x64 has a ScratchReg. Use it. 1.337 + masm.load32(toAddress(from), ScratchReg); 1.338 + masm.move32(ScratchReg, toOperand(to)); 1.339 +#else 1.340 + // No ScratchReg; bounce it off the stack. 1.341 + masm.Push(toOperand(from)); 1.342 + masm.Pop(toPopOperand(to)); 1.343 +#endif 1.344 + } 1.345 +} 1.346 + 1.347 +void 1.348 +MoveEmitterX86::emitGeneralMove(const MoveOperand &from, const MoveOperand &to) 1.349 +{ 1.350 + if (from.isGeneralReg()) { 1.351 + masm.mov(from.reg(), toOperand(to)); 1.352 + } else if (to.isGeneralReg()) { 1.353 + JS_ASSERT(from.isMemoryOrEffectiveAddress()); 1.354 + if (from.isMemory()) 1.355 + masm.loadPtr(toAddress(from), to.reg()); 1.356 + else 1.357 + masm.lea(toOperand(from), to.reg()); 1.358 + } else if (from.isMemory()) { 1.359 + // Memory to memory gpr move. 1.360 +#ifdef JS_CODEGEN_X64 1.361 + // x64 has a ScratchReg. Use it. 1.362 + masm.loadPtr(toAddress(from), ScratchReg); 1.363 + masm.mov(ScratchReg, toOperand(to)); 1.364 +#else 1.365 + // No ScratchReg; bounce it off the stack. 1.366 + masm.Push(toOperand(from)); 1.367 + masm.Pop(toPopOperand(to)); 1.368 +#endif 1.369 + } else { 1.370 + // Effective address to memory move. 1.371 + JS_ASSERT(from.isEffectiveAddress()); 1.372 +#ifdef JS_CODEGEN_X64 1.373 + // x64 has a ScratchReg. Use it. 1.374 + masm.lea(toOperand(from), ScratchReg); 1.375 + masm.mov(ScratchReg, toOperand(to)); 1.376 +#else 1.377 + // This is tricky without a ScratchReg. We can't do an lea. Bounce the 1.378 + // base register off the stack, then add the offset in place. Note that 1.379 + // this clobbers FLAGS! 1.380 + masm.Push(from.base()); 1.381 + masm.Pop(toPopOperand(to)); 1.382 + masm.addPtr(Imm32(from.disp()), toOperand(to)); 1.383 +#endif 1.384 + } 1.385 +} 1.386 + 1.387 +void 1.388 +MoveEmitterX86::emitFloat32Move(const MoveOperand &from, const MoveOperand &to) 1.389 +{ 1.390 + if (from.isFloatReg()) { 1.391 + if (to.isFloatReg()) 1.392 + masm.moveFloat32(from.floatReg(), to.floatReg()); 1.393 + else 1.394 + masm.storeFloat32(from.floatReg(), toAddress(to)); 1.395 + } else if (to.isFloatReg()) { 1.396 + masm.loadFloat32(toAddress(from), to.floatReg()); 1.397 + } else { 1.398 + // Memory to memory move. 1.399 + JS_ASSERT(from.isMemory()); 1.400 + masm.loadFloat32(toAddress(from), ScratchFloatReg); 1.401 + masm.storeFloat32(ScratchFloatReg, toAddress(to)); 1.402 + } 1.403 +} 1.404 + 1.405 +void 1.406 +MoveEmitterX86::emitDoubleMove(const MoveOperand &from, const MoveOperand &to) 1.407 +{ 1.408 + if (from.isFloatReg()) { 1.409 + if (to.isFloatReg()) 1.410 + masm.moveDouble(from.floatReg(), to.floatReg()); 1.411 + else 1.412 + masm.storeDouble(from.floatReg(), toAddress(to)); 1.413 + } else if (to.isFloatReg()) { 1.414 + masm.loadDouble(toAddress(from), to.floatReg()); 1.415 + } else { 1.416 + // Memory to memory move. 1.417 + JS_ASSERT(from.isMemory()); 1.418 + masm.loadDouble(toAddress(from), ScratchFloatReg); 1.419 + masm.storeDouble(ScratchFloatReg, toAddress(to)); 1.420 + } 1.421 +} 1.422 + 1.423 +void 1.424 +MoveEmitterX86::assertDone() 1.425 +{ 1.426 + JS_ASSERT(!inCycle_); 1.427 +} 1.428 + 1.429 +void 1.430 +MoveEmitterX86::finish() 1.431 +{ 1.432 + assertDone(); 1.433 + 1.434 + masm.freeStack(masm.framePushed() - pushedAtStart_); 1.435 +} 1.436 +