|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
|
2 * vim: set ts=8 sts=4 et sw=4 tw=99: |
|
3 * This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include "jit/shared/MoveEmitter-x86-shared.h" |
|
8 |
|
9 using namespace js; |
|
10 using namespace js::jit; |
|
11 |
|
12 MoveEmitterX86::MoveEmitterX86(MacroAssemblerSpecific &masm) |
|
13 : inCycle_(false), |
|
14 masm(masm), |
|
15 pushedAtCycle_(-1) |
|
16 { |
|
17 pushedAtStart_ = masm.framePushed(); |
|
18 } |
|
19 |
|
20 // Examine the cycle in moves starting at position i. Determine if it's a |
|
21 // simple cycle consisting of all register-to-register moves in a single class, |
|
22 // and whether it can be implemented entirely by swaps. |
|
23 size_t |
|
24 MoveEmitterX86::characterizeCycle(const MoveResolver &moves, size_t i, |
|
25 bool *allGeneralRegs, bool *allFloatRegs) |
|
26 { |
|
27 size_t swapCount = 0; |
|
28 |
|
29 for (size_t j = i; ; j++) { |
|
30 const MoveOp &move = moves.getMove(j); |
|
31 |
|
32 // If it isn't a cycle of registers of the same kind, we won't be able |
|
33 // to optimize it. |
|
34 if (!move.to().isGeneralReg()) |
|
35 *allGeneralRegs = false; |
|
36 if (!move.to().isFloatReg()) |
|
37 *allFloatRegs = false; |
|
38 if (!*allGeneralRegs && !*allFloatRegs) |
|
39 return -1; |
|
40 |
|
41 // Stop iterating when we see the last one. |
|
42 if (j != i && move.isCycleEnd()) |
|
43 break; |
|
44 |
|
45 // Check that this move is actually part of the cycle. This is |
|
46 // over-conservative when there are multiple reads from the same source, |
|
47 // but that's expected to be rare. |
|
48 if (move.from() != moves.getMove(j + 1).to()) { |
|
49 *allGeneralRegs = false; |
|
50 *allFloatRegs = false; |
|
51 return -1; |
|
52 } |
|
53 |
|
54 swapCount++; |
|
55 } |
|
56 |
|
57 // Check that the last move cycles back to the first move. |
|
58 const MoveOp &move = moves.getMove(i + swapCount); |
|
59 if (move.from() != moves.getMove(i).to()) { |
|
60 *allGeneralRegs = false; |
|
61 *allFloatRegs = false; |
|
62 return -1; |
|
63 } |
|
64 |
|
65 return swapCount; |
|
66 } |
|
67 |
|
68 // If we can emit optimized code for the cycle in moves starting at position i, |
|
69 // do so, and return true. |
|
70 bool |
|
71 MoveEmitterX86::maybeEmitOptimizedCycle(const MoveResolver &moves, size_t i, |
|
72 bool allGeneralRegs, bool allFloatRegs, size_t swapCount) |
|
73 { |
|
74 if (allGeneralRegs && swapCount <= 2) { |
|
75 // Use x86's swap-integer-registers instruction if we only have a few |
|
76 // swaps. (x86 also has a swap between registers and memory but it's |
|
77 // slow.) |
|
78 for (size_t k = 0; k < swapCount; k++) |
|
79 masm.xchg(moves.getMove(i + k).to().reg(), moves.getMove(i + k + 1).to().reg()); |
|
80 return true; |
|
81 } |
|
82 |
|
83 if (allFloatRegs && swapCount == 1) { |
|
84 // There's no xchg for xmm registers, but if we only need a single swap, |
|
85 // it's cheap to do an XOR swap. |
|
86 FloatRegister a = moves.getMove(i).to().floatReg(); |
|
87 FloatRegister b = moves.getMove(i + 1).to().floatReg(); |
|
88 masm.xorpd(a, b); |
|
89 masm.xorpd(b, a); |
|
90 masm.xorpd(a, b); |
|
91 return true; |
|
92 } |
|
93 |
|
94 return false; |
|
95 } |
|
96 |
|
97 void |
|
98 MoveEmitterX86::emit(const MoveResolver &moves) |
|
99 { |
|
100 for (size_t i = 0; i < moves.numMoves(); i++) { |
|
101 const MoveOp &move = moves.getMove(i); |
|
102 const MoveOperand &from = move.from(); |
|
103 const MoveOperand &to = move.to(); |
|
104 |
|
105 if (move.isCycleEnd()) { |
|
106 JS_ASSERT(inCycle_); |
|
107 completeCycle(to, move.type()); |
|
108 inCycle_ = false; |
|
109 continue; |
|
110 } |
|
111 |
|
112 if (move.isCycleBegin()) { |
|
113 JS_ASSERT(!inCycle_); |
|
114 |
|
115 // Characterize the cycle. |
|
116 bool allGeneralRegs = true, allFloatRegs = true; |
|
117 size_t swapCount = characterizeCycle(moves, i, &allGeneralRegs, &allFloatRegs); |
|
118 |
|
119 // Attempt to optimize it to avoid using the stack. |
|
120 if (maybeEmitOptimizedCycle(moves, i, allGeneralRegs, allFloatRegs, swapCount)) { |
|
121 i += swapCount; |
|
122 continue; |
|
123 } |
|
124 |
|
125 // Otherwise use the stack. |
|
126 breakCycle(to, move.endCycleType()); |
|
127 inCycle_ = true; |
|
128 } |
|
129 |
|
130 // A normal move which is not part of a cycle. |
|
131 switch (move.type()) { |
|
132 case MoveOp::FLOAT32: |
|
133 emitFloat32Move(from, to); |
|
134 break; |
|
135 case MoveOp::DOUBLE: |
|
136 emitDoubleMove(from, to); |
|
137 break; |
|
138 case MoveOp::INT32: |
|
139 emitInt32Move(from, to); |
|
140 break; |
|
141 case MoveOp::GENERAL: |
|
142 emitGeneralMove(from, to); |
|
143 break; |
|
144 default: |
|
145 MOZ_ASSUME_UNREACHABLE("Unexpected move type"); |
|
146 } |
|
147 } |
|
148 } |
|
149 |
|
150 MoveEmitterX86::~MoveEmitterX86() |
|
151 { |
|
152 assertDone(); |
|
153 } |
|
154 |
|
155 Address |
|
156 MoveEmitterX86::cycleSlot() |
|
157 { |
|
158 if (pushedAtCycle_ == -1) { |
|
159 // Reserve stack for cycle resolution |
|
160 masm.reserveStack(sizeof(double)); |
|
161 pushedAtCycle_ = masm.framePushed(); |
|
162 } |
|
163 |
|
164 return Address(StackPointer, masm.framePushed() - pushedAtCycle_); |
|
165 } |
|
166 |
|
167 Address |
|
168 MoveEmitterX86::toAddress(const MoveOperand &operand) const |
|
169 { |
|
170 if (operand.base() != StackPointer) |
|
171 return Address(operand.base(), operand.disp()); |
|
172 |
|
173 JS_ASSERT(operand.disp() >= 0); |
|
174 |
|
175 // Otherwise, the stack offset may need to be adjusted. |
|
176 return Address(StackPointer, operand.disp() + (masm.framePushed() - pushedAtStart_)); |
|
177 } |
|
178 |
|
179 // Warning, do not use the resulting operand with pop instructions, since they |
|
180 // compute the effective destination address after altering the stack pointer. |
|
181 // Use toPopOperand if an Operand is needed for a pop. |
|
182 Operand |
|
183 MoveEmitterX86::toOperand(const MoveOperand &operand) const |
|
184 { |
|
185 if (operand.isMemoryOrEffectiveAddress()) |
|
186 return Operand(toAddress(operand)); |
|
187 if (operand.isGeneralReg()) |
|
188 return Operand(operand.reg()); |
|
189 |
|
190 JS_ASSERT(operand.isFloatReg()); |
|
191 return Operand(operand.floatReg()); |
|
192 } |
|
193 |
|
194 // This is the same as toOperand except that it computes an Operand suitable for |
|
195 // use in a pop. |
|
196 Operand |
|
197 MoveEmitterX86::toPopOperand(const MoveOperand &operand) const |
|
198 { |
|
199 if (operand.isMemory()) { |
|
200 if (operand.base() != StackPointer) |
|
201 return Operand(operand.base(), operand.disp()); |
|
202 |
|
203 JS_ASSERT(operand.disp() >= 0); |
|
204 |
|
205 // Otherwise, the stack offset may need to be adjusted. |
|
206 // Note the adjustment by the stack slot here, to offset for the fact that pop |
|
207 // computes its effective address after incrementing the stack pointer. |
|
208 return Operand(StackPointer, |
|
209 operand.disp() + (masm.framePushed() - sizeof(void *) - pushedAtStart_)); |
|
210 } |
|
211 if (operand.isGeneralReg()) |
|
212 return Operand(operand.reg()); |
|
213 |
|
214 JS_ASSERT(operand.isFloatReg()); |
|
215 return Operand(operand.floatReg()); |
|
216 } |
|
217 |
|
218 void |
|
219 MoveEmitterX86::breakCycle(const MoveOperand &to, MoveOp::Type type) |
|
220 { |
|
221 // There is some pattern: |
|
222 // (A -> B) |
|
223 // (B -> A) |
|
224 // |
|
225 // This case handles (A -> B), which we reach first. We save B, then allow |
|
226 // the original move to continue. |
|
227 switch (type) { |
|
228 case MoveOp::FLOAT32: |
|
229 if (to.isMemory()) { |
|
230 masm.loadFloat32(toAddress(to), ScratchFloatReg); |
|
231 masm.storeFloat32(ScratchFloatReg, cycleSlot()); |
|
232 } else { |
|
233 masm.storeFloat32(to.floatReg(), cycleSlot()); |
|
234 } |
|
235 break; |
|
236 case MoveOp::DOUBLE: |
|
237 if (to.isMemory()) { |
|
238 masm.loadDouble(toAddress(to), ScratchFloatReg); |
|
239 masm.storeDouble(ScratchFloatReg, cycleSlot()); |
|
240 } else { |
|
241 masm.storeDouble(to.floatReg(), cycleSlot()); |
|
242 } |
|
243 break; |
|
244 #ifdef JS_CODEGEN_X64 |
|
245 case MoveOp::INT32: |
|
246 // x64 can't pop to a 32-bit destination, so don't push. |
|
247 if (to.isMemory()) { |
|
248 masm.load32(toAddress(to), ScratchReg); |
|
249 masm.store32(ScratchReg, cycleSlot()); |
|
250 } else { |
|
251 masm.store32(to.reg(), cycleSlot()); |
|
252 } |
|
253 break; |
|
254 #endif |
|
255 #ifndef JS_CODEGEN_X64 |
|
256 case MoveOp::INT32: |
|
257 #endif |
|
258 case MoveOp::GENERAL: |
|
259 masm.Push(toOperand(to)); |
|
260 break; |
|
261 default: |
|
262 MOZ_ASSUME_UNREACHABLE("Unexpected move type"); |
|
263 } |
|
264 } |
|
265 |
|
266 void |
|
267 MoveEmitterX86::completeCycle(const MoveOperand &to, MoveOp::Type type) |
|
268 { |
|
269 // There is some pattern: |
|
270 // (A -> B) |
|
271 // (B -> A) |
|
272 // |
|
273 // This case handles (B -> A), which we reach last. We emit a move from the |
|
274 // saved value of B, to A. |
|
275 switch (type) { |
|
276 case MoveOp::FLOAT32: |
|
277 JS_ASSERT(pushedAtCycle_ != -1); |
|
278 JS_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(float)); |
|
279 if (to.isMemory()) { |
|
280 masm.loadFloat32(cycleSlot(), ScratchFloatReg); |
|
281 masm.storeFloat32(ScratchFloatReg, toAddress(to)); |
|
282 } else { |
|
283 masm.loadFloat32(cycleSlot(), to.floatReg()); |
|
284 } |
|
285 break; |
|
286 case MoveOp::DOUBLE: |
|
287 JS_ASSERT(pushedAtCycle_ != -1); |
|
288 JS_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(double)); |
|
289 if (to.isMemory()) { |
|
290 masm.loadDouble(cycleSlot(), ScratchFloatReg); |
|
291 masm.storeDouble(ScratchFloatReg, toAddress(to)); |
|
292 } else { |
|
293 masm.loadDouble(cycleSlot(), to.floatReg()); |
|
294 } |
|
295 break; |
|
296 #ifdef JS_CODEGEN_X64 |
|
297 case MoveOp::INT32: |
|
298 JS_ASSERT(pushedAtCycle_ != -1); |
|
299 JS_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(int32_t)); |
|
300 // x64 can't pop to a 32-bit destination. |
|
301 if (to.isMemory()) { |
|
302 masm.load32(cycleSlot(), ScratchReg); |
|
303 masm.store32(ScratchReg, toAddress(to)); |
|
304 } else { |
|
305 masm.load32(cycleSlot(), to.reg()); |
|
306 } |
|
307 break; |
|
308 #endif |
|
309 #ifndef JS_CODEGEN_X64 |
|
310 case MoveOp::INT32: |
|
311 #endif |
|
312 case MoveOp::GENERAL: |
|
313 JS_ASSERT(masm.framePushed() - pushedAtStart_ >= sizeof(intptr_t)); |
|
314 masm.Pop(toPopOperand(to)); |
|
315 break; |
|
316 default: |
|
317 MOZ_ASSUME_UNREACHABLE("Unexpected move type"); |
|
318 } |
|
319 } |
|
320 |
|
321 void |
|
322 MoveEmitterX86::emitInt32Move(const MoveOperand &from, const MoveOperand &to) |
|
323 { |
|
324 if (from.isGeneralReg()) { |
|
325 masm.move32(from.reg(), toOperand(to)); |
|
326 } else if (to.isGeneralReg()) { |
|
327 JS_ASSERT(from.isMemory()); |
|
328 masm.load32(toAddress(from), to.reg()); |
|
329 } else { |
|
330 // Memory to memory gpr move. |
|
331 JS_ASSERT(from.isMemory()); |
|
332 #ifdef JS_CODEGEN_X64 |
|
333 // x64 has a ScratchReg. Use it. |
|
334 masm.load32(toAddress(from), ScratchReg); |
|
335 masm.move32(ScratchReg, toOperand(to)); |
|
336 #else |
|
337 // No ScratchReg; bounce it off the stack. |
|
338 masm.Push(toOperand(from)); |
|
339 masm.Pop(toPopOperand(to)); |
|
340 #endif |
|
341 } |
|
342 } |
|
343 |
|
344 void |
|
345 MoveEmitterX86::emitGeneralMove(const MoveOperand &from, const MoveOperand &to) |
|
346 { |
|
347 if (from.isGeneralReg()) { |
|
348 masm.mov(from.reg(), toOperand(to)); |
|
349 } else if (to.isGeneralReg()) { |
|
350 JS_ASSERT(from.isMemoryOrEffectiveAddress()); |
|
351 if (from.isMemory()) |
|
352 masm.loadPtr(toAddress(from), to.reg()); |
|
353 else |
|
354 masm.lea(toOperand(from), to.reg()); |
|
355 } else if (from.isMemory()) { |
|
356 // Memory to memory gpr move. |
|
357 #ifdef JS_CODEGEN_X64 |
|
358 // x64 has a ScratchReg. Use it. |
|
359 masm.loadPtr(toAddress(from), ScratchReg); |
|
360 masm.mov(ScratchReg, toOperand(to)); |
|
361 #else |
|
362 // No ScratchReg; bounce it off the stack. |
|
363 masm.Push(toOperand(from)); |
|
364 masm.Pop(toPopOperand(to)); |
|
365 #endif |
|
366 } else { |
|
367 // Effective address to memory move. |
|
368 JS_ASSERT(from.isEffectiveAddress()); |
|
369 #ifdef JS_CODEGEN_X64 |
|
370 // x64 has a ScratchReg. Use it. |
|
371 masm.lea(toOperand(from), ScratchReg); |
|
372 masm.mov(ScratchReg, toOperand(to)); |
|
373 #else |
|
374 // This is tricky without a ScratchReg. We can't do an lea. Bounce the |
|
375 // base register off the stack, then add the offset in place. Note that |
|
376 // this clobbers FLAGS! |
|
377 masm.Push(from.base()); |
|
378 masm.Pop(toPopOperand(to)); |
|
379 masm.addPtr(Imm32(from.disp()), toOperand(to)); |
|
380 #endif |
|
381 } |
|
382 } |
|
383 |
|
384 void |
|
385 MoveEmitterX86::emitFloat32Move(const MoveOperand &from, const MoveOperand &to) |
|
386 { |
|
387 if (from.isFloatReg()) { |
|
388 if (to.isFloatReg()) |
|
389 masm.moveFloat32(from.floatReg(), to.floatReg()); |
|
390 else |
|
391 masm.storeFloat32(from.floatReg(), toAddress(to)); |
|
392 } else if (to.isFloatReg()) { |
|
393 masm.loadFloat32(toAddress(from), to.floatReg()); |
|
394 } else { |
|
395 // Memory to memory move. |
|
396 JS_ASSERT(from.isMemory()); |
|
397 masm.loadFloat32(toAddress(from), ScratchFloatReg); |
|
398 masm.storeFloat32(ScratchFloatReg, toAddress(to)); |
|
399 } |
|
400 } |
|
401 |
|
402 void |
|
403 MoveEmitterX86::emitDoubleMove(const MoveOperand &from, const MoveOperand &to) |
|
404 { |
|
405 if (from.isFloatReg()) { |
|
406 if (to.isFloatReg()) |
|
407 masm.moveDouble(from.floatReg(), to.floatReg()); |
|
408 else |
|
409 masm.storeDouble(from.floatReg(), toAddress(to)); |
|
410 } else if (to.isFloatReg()) { |
|
411 masm.loadDouble(toAddress(from), to.floatReg()); |
|
412 } else { |
|
413 // Memory to memory move. |
|
414 JS_ASSERT(from.isMemory()); |
|
415 masm.loadDouble(toAddress(from), ScratchFloatReg); |
|
416 masm.storeDouble(ScratchFloatReg, toAddress(to)); |
|
417 } |
|
418 } |
|
419 |
|
420 void |
|
421 MoveEmitterX86::assertDone() |
|
422 { |
|
423 JS_ASSERT(!inCycle_); |
|
424 } |
|
425 |
|
426 void |
|
427 MoveEmitterX86::finish() |
|
428 { |
|
429 assertDone(); |
|
430 |
|
431 masm.freeStack(masm.framePushed() - pushedAtStart_); |
|
432 } |
|
433 |