js/src/jit/shared/MoveEmitter-x86-shared.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/js/src/jit/shared/MoveEmitter-x86-shared.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,433 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99:
     1.6 + * This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#include "jit/shared/MoveEmitter-x86-shared.h"
    1.11 +
    1.12 +using namespace js;
    1.13 +using namespace js::jit;
    1.14 +
    1.15 +MoveEmitterX86::MoveEmitterX86(MacroAssemblerSpecific &masm)
    1.16 +  : inCycle_(false),
    1.17 +    masm(masm),
    1.18 +    pushedAtCycle_(-1)
    1.19 +{
    1.20 +    pushedAtStart_ = masm.framePushed();
    1.21 +}
    1.22 +
    1.23 +// Examine the cycle in moves starting at position i. Determine if it's a
    1.24 +// simple cycle consisting of all register-to-register moves in a single class,
    1.25 +// and whether it can be implemented entirely by swaps.
    1.26 +size_t
    1.27 +MoveEmitterX86::characterizeCycle(const MoveResolver &moves, size_t i,
    1.28 +                                  bool *allGeneralRegs, bool *allFloatRegs)
    1.29 +{
    1.30 +    size_t swapCount = 0;
    1.31 +
    1.32 +    for (size_t j = i; ; j++) {
    1.33 +        const MoveOp &move = moves.getMove(j);
    1.34 +
    1.35 +        // If it isn't a cycle of registers of the same kind, we won't be able
    1.36 +        // to optimize it.
    1.37 +        if (!move.to().isGeneralReg())
    1.38 +            *allGeneralRegs = false;
    1.39 +        if (!move.to().isFloatReg())
    1.40 +            *allFloatRegs = false;
    1.41 +        if (!*allGeneralRegs && !*allFloatRegs)
    1.42 +            return -1;
    1.43 +
    1.44 +        // Stop iterating when we see the last one.
    1.45 +        if (j != i && move.isCycleEnd())
    1.46 +            break;
    1.47 +
    1.48 +        // Check that this move is actually part of the cycle. This is
    1.49 +        // over-conservative when there are multiple reads from the same source,
    1.50 +        // but that's expected to be rare.
    1.51 +        if (move.from() != moves.getMove(j + 1).to()) {
    1.52 +            *allGeneralRegs = false;
    1.53 +            *allFloatRegs = false;
    1.54 +            return -1;
    1.55 +        }
    1.56 +
    1.57 +        swapCount++;
    1.58 +    }
    1.59 +
    1.60 +    // Check that the last move cycles back to the first move.
    1.61 +    const MoveOp &move = moves.getMove(i + swapCount);
    1.62 +    if (move.from() != moves.getMove(i).to()) {
    1.63 +        *allGeneralRegs = false;
    1.64 +        *allFloatRegs = false;
    1.65 +        return -1;
    1.66 +    }
    1.67 +
    1.68 +    return swapCount;
    1.69 +}
    1.70 +
    1.71 +// If we can emit optimized code for the cycle in moves starting at position i,
    1.72 +// do so, and return true.
    1.73 +bool
    1.74 +MoveEmitterX86::maybeEmitOptimizedCycle(const MoveResolver &moves, size_t i,
    1.75 +                                        bool allGeneralRegs, bool allFloatRegs, size_t swapCount)
    1.76 +{
    1.77 +    if (allGeneralRegs && swapCount <= 2) {
    1.78 +        // Use x86's swap-integer-registers instruction if we only have a few
    1.79 +        // swaps. (x86 also has a swap between registers and memory but it's
    1.80 +        // slow.)
    1.81 +        for (size_t k = 0; k < swapCount; k++)
    1.82 +            masm.xchg(moves.getMove(i + k).to().reg(), moves.getMove(i + k + 1).to().reg());
    1.83 +        return true;
    1.84 +    }
    1.85 +
    1.86 +    if (allFloatRegs && swapCount == 1) {
    1.87 +        // There's no xchg for xmm registers, but if we only need a single swap,
    1.88 +        // it's cheap to do an XOR swap.
    1.89 +        FloatRegister a = moves.getMove(i).to().floatReg();
    1.90 +        FloatRegister b = moves.getMove(i + 1).to().floatReg();
    1.91 +        masm.xorpd(a, b);
    1.92 +        masm.xorpd(b, a);
    1.93 +        masm.xorpd(a, b);
    1.94 +        return true;
    1.95 +    }
    1.96 +
    1.97 +    return false;
    1.98 +}
    1.99 +
   1.100 +void
   1.101 +MoveEmitterX86::emit(const MoveResolver &moves)
   1.102 +{
   1.103 +    for (size_t i = 0; i < moves.numMoves(); i++) {
   1.104 +        const MoveOp &move = moves.getMove(i);
   1.105 +        const MoveOperand &from = move.from();
   1.106 +        const MoveOperand &to = move.to();
   1.107 +
   1.108 +        if (move.isCycleEnd()) {
   1.109 +            JS_ASSERT(inCycle_);
   1.110 +            completeCycle(to, move.type());
   1.111 +            inCycle_ = false;
   1.112 +            continue;
   1.113 +        }
   1.114 +
   1.115 +        if (move.isCycleBegin()) {
   1.116 +            JS_ASSERT(!inCycle_);
   1.117 +
   1.118 +            // Characterize the cycle.
   1.119 +            bool allGeneralRegs = true, allFloatRegs = true;
   1.120 +            size_t swapCount = characterizeCycle(moves, i, &allGeneralRegs, &allFloatRegs);
   1.121 +
   1.122 +            // Attempt to optimize it to avoid using the stack.
   1.123 +            if (maybeEmitOptimizedCycle(moves, i, allGeneralRegs, allFloatRegs, swapCount)) {
   1.124 +                i += swapCount;
   1.125 +                continue;
   1.126 +            }
   1.127 +
   1.128 +            // Otherwise use the stack.
   1.129 +            breakCycle(to, move.endCycleType());
   1.130 +            inCycle_ = true;
   1.131 +        }
   1.132 +
   1.133 +        // A normal move which is not part of a cycle.
   1.134 +        switch (move.type()) {
   1.135 +          case MoveOp::FLOAT32:
   1.136 +            emitFloat32Move(from, to);
   1.137 +            break;
   1.138 +          case MoveOp::DOUBLE:
   1.139 +            emitDoubleMove(from, to);
   1.140 +            break;
   1.141 +          case MoveOp::INT32:
   1.142 +            emitInt32Move(from, to);
   1.143 +            break;
   1.144 +          case MoveOp::GENERAL:
   1.145 +            emitGeneralMove(from, to);
   1.146 +            break;
   1.147 +          default:
   1.148 +            MOZ_ASSUME_UNREACHABLE("Unexpected move type");
   1.149 +        }
   1.150 +    }
   1.151 +}
   1.152 +
   1.153 +MoveEmitterX86::~MoveEmitterX86()
   1.154 +{
   1.155 +    assertDone();
   1.156 +}
   1.157 +
   1.158 +Address
   1.159 +MoveEmitterX86::cycleSlot()
   1.160 +{
   1.161 +    if (pushedAtCycle_ == -1) {
   1.162 +        // Reserve stack for cycle resolution
   1.163 +        masm.reserveStack(sizeof(double));
   1.164 +        pushedAtCycle_ = masm.framePushed();
   1.165 +    }
   1.166 +
   1.167 +    return Address(StackPointer, masm.framePushed() - pushedAtCycle_);
   1.168 +}
   1.169 +
   1.170 +Address
   1.171 +MoveEmitterX86::toAddress(const MoveOperand &operand) const
   1.172 +{
   1.173 +    if (operand.base() != StackPointer)
   1.174 +        return Address(operand.base(), operand.disp());
   1.175 +
   1.176 +    JS_ASSERT(operand.disp() >= 0);
   1.177 +
   1.178 +    // Otherwise, the stack offset may need to be adjusted.
   1.179 +    return Address(StackPointer, operand.disp() + (masm.framePushed() - pushedAtStart_));
   1.180 +}
   1.181 +
   1.182 +// Warning, do not use the resulting operand with pop instructions, since they
   1.183 +// compute the effective destination address after altering the stack pointer.
   1.184 +// Use toPopOperand if an Operand is needed for a pop.
   1.185 +Operand
   1.186 +MoveEmitterX86::toOperand(const MoveOperand &operand) const
   1.187 +{
   1.188 +    if (operand.isMemoryOrEffectiveAddress())
   1.189 +        return Operand(toAddress(operand));
   1.190 +    if (operand.isGeneralReg())
   1.191 +        return Operand(operand.reg());
   1.192 +
   1.193 +    JS_ASSERT(operand.isFloatReg());
   1.194 +    return Operand(operand.floatReg());
   1.195 +}
   1.196 +
   1.197 +// This is the same as toOperand except that it computes an Operand suitable for
   1.198 +// use in a pop.
   1.199 +Operand
   1.200 +MoveEmitterX86::toPopOperand(const MoveOperand &operand) const
   1.201 +{
   1.202 +    if (operand.isMemory()) {
   1.203 +        if (operand.base() != StackPointer)
   1.204 +            return Operand(operand.base(), operand.disp());
   1.205 +
   1.206 +        JS_ASSERT(operand.disp() >= 0);
   1.207 +
   1.208 +        // Otherwise, the stack offset may need to be adjusted.
   1.209 +        // Note the adjustment by the stack slot here, to offset for the fact that pop
   1.210 +        // computes its effective address after incrementing the stack pointer.
   1.211 +        return Operand(StackPointer,
   1.212 +                       operand.disp() + (masm.framePushed() - sizeof(void *) - pushedAtStart_));
   1.213 +    }
   1.214 +    if (operand.isGeneralReg())
   1.215 +        return Operand(operand.reg());
   1.216 +
   1.217 +    JS_ASSERT(operand.isFloatReg());
   1.218 +    return Operand(operand.floatReg());
   1.219 +}
   1.220 +
   1.221 +void
   1.222 +MoveEmitterX86::breakCycle(const MoveOperand &to, MoveOp::Type type)
   1.223 +{
   1.224 +    // There is some pattern:
   1.225 +    //   (A -> B)
   1.226 +    //   (B -> A)
   1.227 +    //
   1.228 +    // This case handles (A -> B), which we reach first. We save B, then allow
   1.229 +    // the original move to continue.
   1.230 +    switch (type) {
   1.231 +      case MoveOp::FLOAT32:
   1.232 +        if (to.isMemory()) {
   1.233 +            masm.loadFloat32(toAddress(to), ScratchFloatReg);
   1.234 +            masm.storeFloat32(ScratchFloatReg, cycleSlot());
   1.235 +        } else {
   1.236 +            masm.storeFloat32(to.floatReg(), cycleSlot());
   1.237 +        }
   1.238 +        break;
   1.239 +      case MoveOp::DOUBLE:
   1.240 +        if (to.isMemory()) {
   1.241 +            masm.loadDouble(toAddress(to), ScratchFloatReg);
   1.242 +            masm.storeDouble(ScratchFloatReg, cycleSlot());
   1.243 +        } else {
   1.244 +            masm.storeDouble(to.floatReg(), cycleSlot());
   1.245 +        }
   1.246 +        break;
   1.247 +#ifdef JS_CODEGEN_X64
   1.248 +      case MoveOp::INT32:
   1.249 +        // x64 can't pop to a 32-bit destination, so don't push.
   1.250 +        if (to.isMemory()) {
   1.251 +            masm.load32(toAddress(to), ScratchReg);
   1.252 +            masm.store32(ScratchReg, cycleSlot());
   1.253 +        } else {
   1.254 +            masm.store32(to.reg(), cycleSlot());
   1.255 +        }
   1.256 +        break;
   1.257 +#endif
   1.258 +#ifndef JS_CODEGEN_X64
   1.259 +      case MoveOp::INT32:
   1.260 +#endif
   1.261 +      case MoveOp::GENERAL:
   1.262 +        masm.Push(toOperand(to));
   1.263 +        break;
   1.264 +      default:
   1.265 +        MOZ_ASSUME_UNREACHABLE("Unexpected move type");
   1.266 +    }
   1.267 +}
   1.268 +
   1.269 +void
   1.270 +MoveEmitterX86::completeCycle(const MoveOperand &to, MoveOp::Type type)
   1.271 +{
   1.272 +    // There is some pattern:
   1.273 +    //   (A -> B)
   1.274 +    //   (B -> A)
   1.275 +    //
   1.276 +    // This case handles (B -> A), which we reach last. We emit a move from the
   1.277 +    // saved value of B, to A.
   1.278 +    switch (type) {
   1.279 +      case MoveOp::FLOAT32:
   1.280 +        JS_ASSERT(pushedAtCycle_ != -1);
   1.281 +        JS_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(float));
   1.282 +        if (to.isMemory()) {
   1.283 +            masm.loadFloat32(cycleSlot(), ScratchFloatReg);
   1.284 +            masm.storeFloat32(ScratchFloatReg, toAddress(to));
   1.285 +        } else {
   1.286 +            masm.loadFloat32(cycleSlot(), to.floatReg());
   1.287 +        }
   1.288 +        break;
   1.289 +      case MoveOp::DOUBLE:
   1.290 +        JS_ASSERT(pushedAtCycle_ != -1);
   1.291 +        JS_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(double));
   1.292 +        if (to.isMemory()) {
   1.293 +            masm.loadDouble(cycleSlot(), ScratchFloatReg);
   1.294 +            masm.storeDouble(ScratchFloatReg, toAddress(to));
   1.295 +        } else {
   1.296 +            masm.loadDouble(cycleSlot(), to.floatReg());
   1.297 +        }
   1.298 +        break;
   1.299 +#ifdef JS_CODEGEN_X64
   1.300 +      case MoveOp::INT32:
   1.301 +        JS_ASSERT(pushedAtCycle_ != -1);
   1.302 +        JS_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(int32_t));
   1.303 +        // x64 can't pop to a 32-bit destination.
   1.304 +        if (to.isMemory()) {
   1.305 +            masm.load32(cycleSlot(), ScratchReg);
   1.306 +            masm.store32(ScratchReg, toAddress(to));
   1.307 +        } else {
   1.308 +            masm.load32(cycleSlot(), to.reg());
   1.309 +        }
   1.310 +        break;
   1.311 +#endif
   1.312 +#ifndef JS_CODEGEN_X64
   1.313 +      case MoveOp::INT32:
   1.314 +#endif
   1.315 +      case MoveOp::GENERAL:
   1.316 +        JS_ASSERT(masm.framePushed() - pushedAtStart_ >= sizeof(intptr_t));
   1.317 +        masm.Pop(toPopOperand(to));
   1.318 +        break;
   1.319 +      default:
   1.320 +        MOZ_ASSUME_UNREACHABLE("Unexpected move type");
   1.321 +    }
   1.322 +}
   1.323 +
   1.324 +void
   1.325 +MoveEmitterX86::emitInt32Move(const MoveOperand &from, const MoveOperand &to)
   1.326 +{
   1.327 +    if (from.isGeneralReg()) {
   1.328 +        masm.move32(from.reg(), toOperand(to));
   1.329 +    } else if (to.isGeneralReg()) {
   1.330 +        JS_ASSERT(from.isMemory());
   1.331 +        masm.load32(toAddress(from), to.reg());
   1.332 +    } else {
   1.333 +        // Memory to memory gpr move.
   1.334 +        JS_ASSERT(from.isMemory());
   1.335 +#ifdef JS_CODEGEN_X64
   1.336 +        // x64 has a ScratchReg. Use it.
   1.337 +        masm.load32(toAddress(from), ScratchReg);
   1.338 +        masm.move32(ScratchReg, toOperand(to));
   1.339 +#else
   1.340 +        // No ScratchReg; bounce it off the stack.
   1.341 +        masm.Push(toOperand(from));
   1.342 +        masm.Pop(toPopOperand(to));
   1.343 +#endif
   1.344 +    }
   1.345 +}
   1.346 +
   1.347 +void
   1.348 +MoveEmitterX86::emitGeneralMove(const MoveOperand &from, const MoveOperand &to)
   1.349 +{
   1.350 +    if (from.isGeneralReg()) {
   1.351 +        masm.mov(from.reg(), toOperand(to));
   1.352 +    } else if (to.isGeneralReg()) {
   1.353 +        JS_ASSERT(from.isMemoryOrEffectiveAddress());
   1.354 +        if (from.isMemory())
   1.355 +            masm.loadPtr(toAddress(from), to.reg());
   1.356 +        else
   1.357 +            masm.lea(toOperand(from), to.reg());
   1.358 +    } else if (from.isMemory()) {
   1.359 +        // Memory to memory gpr move.
   1.360 +#ifdef JS_CODEGEN_X64
   1.361 +        // x64 has a ScratchReg. Use it.
   1.362 +        masm.loadPtr(toAddress(from), ScratchReg);
   1.363 +        masm.mov(ScratchReg, toOperand(to));
   1.364 +#else
   1.365 +        // No ScratchReg; bounce it off the stack.
   1.366 +        masm.Push(toOperand(from));
   1.367 +        masm.Pop(toPopOperand(to));
   1.368 +#endif
   1.369 +    } else {
   1.370 +        // Effective address to memory move.
   1.371 +        JS_ASSERT(from.isEffectiveAddress());
   1.372 +#ifdef JS_CODEGEN_X64
   1.373 +        // x64 has a ScratchReg. Use it.
   1.374 +        masm.lea(toOperand(from), ScratchReg);
   1.375 +        masm.mov(ScratchReg, toOperand(to));
   1.376 +#else
   1.377 +        // This is tricky without a ScratchReg. We can't do an lea. Bounce the
   1.378 +        // base register off the stack, then add the offset in place. Note that
   1.379 +        // this clobbers FLAGS!
   1.380 +        masm.Push(from.base());
   1.381 +        masm.Pop(toPopOperand(to));
   1.382 +        masm.addPtr(Imm32(from.disp()), toOperand(to));
   1.383 +#endif
   1.384 +    }
   1.385 +}
   1.386 +
   1.387 +void
   1.388 +MoveEmitterX86::emitFloat32Move(const MoveOperand &from, const MoveOperand &to)
   1.389 +{
   1.390 +    if (from.isFloatReg()) {
   1.391 +        if (to.isFloatReg())
   1.392 +            masm.moveFloat32(from.floatReg(), to.floatReg());
   1.393 +        else
   1.394 +            masm.storeFloat32(from.floatReg(), toAddress(to));
   1.395 +    } else if (to.isFloatReg()) {
   1.396 +        masm.loadFloat32(toAddress(from), to.floatReg());
   1.397 +    } else {
   1.398 +        // Memory to memory move.
   1.399 +        JS_ASSERT(from.isMemory());
   1.400 +        masm.loadFloat32(toAddress(from), ScratchFloatReg);
   1.401 +        masm.storeFloat32(ScratchFloatReg, toAddress(to));
   1.402 +    }
   1.403 +}
   1.404 +
   1.405 +void
   1.406 +MoveEmitterX86::emitDoubleMove(const MoveOperand &from, const MoveOperand &to)
   1.407 +{
   1.408 +    if (from.isFloatReg()) {
   1.409 +        if (to.isFloatReg())
   1.410 +            masm.moveDouble(from.floatReg(), to.floatReg());
   1.411 +        else
   1.412 +            masm.storeDouble(from.floatReg(), toAddress(to));
   1.413 +    } else if (to.isFloatReg()) {
   1.414 +        masm.loadDouble(toAddress(from), to.floatReg());
   1.415 +    } else {
   1.416 +        // Memory to memory move.
   1.417 +        JS_ASSERT(from.isMemory());
   1.418 +        masm.loadDouble(toAddress(from), ScratchFloatReg);
   1.419 +        masm.storeDouble(ScratchFloatReg, toAddress(to));
   1.420 +    }
   1.421 +}
   1.422 +
   1.423 +void
   1.424 +MoveEmitterX86::assertDone()
   1.425 +{
   1.426 +    JS_ASSERT(!inCycle_);
   1.427 +}
   1.428 +
   1.429 +void
   1.430 +MoveEmitterX86::finish()
   1.431 +{
   1.432 +    assertDone();
   1.433 +
   1.434 +    masm.freeStack(masm.framePushed() - pushedAtStart_);
   1.435 +}
   1.436 +

mercurial