1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/js/src/jit/StupidAllocator.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,419 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99: 1.6 + * This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include "jit/StupidAllocator.h" 1.11 + 1.12 +#include "jstypes.h" 1.13 + 1.14 +using namespace js; 1.15 +using namespace js::jit; 1.16 + 1.17 +static inline uint32_t 1.18 +DefaultStackSlot(uint32_t vreg) 1.19 +{ 1.20 + return vreg * sizeof(Value); 1.21 +} 1.22 + 1.23 +LAllocation * 1.24 +StupidAllocator::stackLocation(uint32_t vreg) 1.25 +{ 1.26 + LDefinition *def = virtualRegisters[vreg]; 1.27 + if (def->policy() == LDefinition::PRESET && def->output()->isArgument()) 1.28 + return def->output(); 1.29 + 1.30 + return new(alloc()) LStackSlot(DefaultStackSlot(vreg)); 1.31 +} 1.32 + 1.33 +StupidAllocator::RegisterIndex 1.34 +StupidAllocator::registerIndex(AnyRegister reg) 1.35 +{ 1.36 + for (size_t i = 0; i < registerCount; i++) { 1.37 + if (reg == registers[i].reg) 1.38 + return i; 1.39 + } 1.40 + MOZ_ASSUME_UNREACHABLE("Bad register"); 1.41 +} 1.42 + 1.43 +bool 1.44 +StupidAllocator::init() 1.45 +{ 1.46 + if (!RegisterAllocator::init()) 1.47 + return false; 1.48 + 1.49 + if (!virtualRegisters.appendN((LDefinition *)nullptr, graph.numVirtualRegisters())) 1.50 + return false; 1.51 + 1.52 + for (size_t i = 0; i < graph.numBlocks(); i++) { 1.53 + LBlock *block = graph.getBlock(i); 1.54 + for (LInstructionIterator ins = block->begin(); ins != block->end(); ins++) { 1.55 + for (size_t j = 0; j < ins->numDefs(); j++) { 1.56 + LDefinition *def = ins->getDef(j); 1.57 + if (def->policy() != LDefinition::PASSTHROUGH) 1.58 + virtualRegisters[def->virtualRegister()] = def; 1.59 + } 1.60 + 1.61 + for (size_t j = 0; j < ins->numTemps(); j++) { 1.62 + LDefinition *def = ins->getTemp(j); 1.63 + if (def->isBogusTemp()) 1.64 + continue; 1.65 + virtualRegisters[def->virtualRegister()] = def; 1.66 + } 1.67 + } 1.68 + for (size_t j = 0; j < block->numPhis(); j++) { 1.69 + LPhi *phi = block->getPhi(j); 1.70 + LDefinition *def = phi->getDef(0); 1.71 + uint32_t vreg = def->virtualRegister(); 1.72 + 1.73 + virtualRegisters[vreg] = def; 1.74 + } 1.75 + } 1.76 + 1.77 + // Assign physical registers to the tracked allocation. 1.78 + { 1.79 + registerCount = 0; 1.80 + RegisterSet remainingRegisters(allRegisters_); 1.81 + while (!remainingRegisters.empty(/* float = */ false)) 1.82 + registers[registerCount++].reg = AnyRegister(remainingRegisters.takeGeneral()); 1.83 + while (!remainingRegisters.empty(/* float = */ true)) 1.84 + registers[registerCount++].reg = AnyRegister(remainingRegisters.takeFloat()); 1.85 + JS_ASSERT(registerCount <= MAX_REGISTERS); 1.86 + } 1.87 + 1.88 + return true; 1.89 +} 1.90 + 1.91 +bool 1.92 +StupidAllocator::allocationRequiresRegister(const LAllocation *alloc, AnyRegister reg) 1.93 +{ 1.94 + if (alloc->isRegister() && alloc->toRegister() == reg) 1.95 + return true; 1.96 + if (alloc->isUse()) { 1.97 + const LUse *use = alloc->toUse(); 1.98 + if (use->policy() == LUse::FIXED) { 1.99 + AnyRegister usedReg = GetFixedRegister(virtualRegisters[use->virtualRegister()], use); 1.100 + if (usedReg == reg) 1.101 + return true; 1.102 + } 1.103 + } 1.104 + return false; 1.105 +} 1.106 + 1.107 +bool 1.108 +StupidAllocator::registerIsReserved(LInstruction *ins, AnyRegister reg) 1.109 +{ 1.110 + // Whether reg is already reserved for an input or output of ins. 1.111 + for (LInstruction::InputIterator alloc(*ins); alloc.more(); alloc.next()) { 1.112 + if (allocationRequiresRegister(*alloc, reg)) 1.113 + return true; 1.114 + } 1.115 + for (size_t i = 0; i < ins->numTemps(); i++) { 1.116 + if (allocationRequiresRegister(ins->getTemp(i)->output(), reg)) 1.117 + return true; 1.118 + } 1.119 + for (size_t i = 0; i < ins->numDefs(); i++) { 1.120 + if (allocationRequiresRegister(ins->getDef(i)->output(), reg)) 1.121 + return true; 1.122 + } 1.123 + return false; 1.124 +} 1.125 + 1.126 +AnyRegister 1.127 +StupidAllocator::ensureHasRegister(LInstruction *ins, uint32_t vreg) 1.128 +{ 1.129 + // Ensure that vreg is held in a register before ins. 1.130 + 1.131 + // Check if the virtual register is already held in a physical register. 1.132 + RegisterIndex existing = findExistingRegister(vreg); 1.133 + if (existing != UINT32_MAX) { 1.134 + if (registerIsReserved(ins, registers[existing].reg)) { 1.135 + evictRegister(ins, existing); 1.136 + } else { 1.137 + registers[existing].age = ins->id(); 1.138 + return registers[existing].reg; 1.139 + } 1.140 + } 1.141 + 1.142 + RegisterIndex best = allocateRegister(ins, vreg); 1.143 + loadRegister(ins, vreg, best, virtualRegisters[vreg]->type()); 1.144 + 1.145 + return registers[best].reg; 1.146 +} 1.147 + 1.148 +StupidAllocator::RegisterIndex 1.149 +StupidAllocator::allocateRegister(LInstruction *ins, uint32_t vreg) 1.150 +{ 1.151 + // Pick a register for vreg, evicting an existing register if necessary. 1.152 + // Spill code will be placed before ins, and no existing allocated input 1.153 + // for ins will be touched. 1.154 + JS_ASSERT(ins); 1.155 + 1.156 + LDefinition *def = virtualRegisters[vreg]; 1.157 + JS_ASSERT(def); 1.158 + 1.159 + RegisterIndex best = UINT32_MAX; 1.160 + 1.161 + for (size_t i = 0; i < registerCount; i++) { 1.162 + AnyRegister reg = registers[i].reg; 1.163 + 1.164 + if (reg.isFloat() != def->isFloatReg()) 1.165 + continue; 1.166 + 1.167 + // Skip the register if it is in use for an allocated input or output. 1.168 + if (registerIsReserved(ins, reg)) 1.169 + continue; 1.170 + 1.171 + if (registers[i].vreg == MISSING_ALLOCATION || 1.172 + best == UINT32_MAX || 1.173 + registers[best].age > registers[i].age) 1.174 + { 1.175 + best = i; 1.176 + } 1.177 + } 1.178 + 1.179 + evictRegister(ins, best); 1.180 + return best; 1.181 +} 1.182 + 1.183 +void 1.184 +StupidAllocator::syncRegister(LInstruction *ins, RegisterIndex index) 1.185 +{ 1.186 + if (registers[index].dirty) { 1.187 + LMoveGroup *input = getInputMoveGroup(ins->id()); 1.188 + LAllocation *source = new(alloc()) LAllocation(registers[index].reg); 1.189 + 1.190 + uint32_t existing = registers[index].vreg; 1.191 + LAllocation *dest = stackLocation(existing); 1.192 + input->addAfter(source, dest, registers[index].type); 1.193 + 1.194 + registers[index].dirty = false; 1.195 + } 1.196 +} 1.197 + 1.198 +void 1.199 +StupidAllocator::evictRegister(LInstruction *ins, RegisterIndex index) 1.200 +{ 1.201 + syncRegister(ins, index); 1.202 + registers[index].set(MISSING_ALLOCATION); 1.203 +} 1.204 + 1.205 +void 1.206 +StupidAllocator::loadRegister(LInstruction *ins, uint32_t vreg, RegisterIndex index, LDefinition::Type type) 1.207 +{ 1.208 + // Load a vreg from its stack location to a register. 1.209 + LMoveGroup *input = getInputMoveGroup(ins->id()); 1.210 + LAllocation *source = stackLocation(vreg); 1.211 + LAllocation *dest = new(alloc()) LAllocation(registers[index].reg); 1.212 + input->addAfter(source, dest, type); 1.213 + registers[index].set(vreg, ins); 1.214 + registers[index].type = type; 1.215 +} 1.216 + 1.217 +StupidAllocator::RegisterIndex 1.218 +StupidAllocator::findExistingRegister(uint32_t vreg) 1.219 +{ 1.220 + for (size_t i = 0; i < registerCount; i++) { 1.221 + if (registers[i].vreg == vreg) 1.222 + return i; 1.223 + } 1.224 + return UINT32_MAX; 1.225 +} 1.226 + 1.227 +bool 1.228 +StupidAllocator::go() 1.229 +{ 1.230 + // This register allocator is intended to be as simple as possible, while 1.231 + // still being complicated enough to share properties with more complicated 1.232 + // allocators. Namely, physical registers may be used to carry virtual 1.233 + // registers across LIR instructions, but not across basic blocks. 1.234 + // 1.235 + // This algorithm does not pay any attention to liveness. It is performed 1.236 + // as a single forward pass through the basic blocks in the program. As 1.237 + // virtual registers and temporaries are defined they are assigned physical 1.238 + // registers, evicting existing allocations in an LRU fashion. 1.239 + 1.240 + // For virtual registers not carried in a register, a canonical spill 1.241 + // location is used. Each vreg has a different spill location; since we do 1.242 + // not track liveness we cannot determine that two vregs have disjoint 1.243 + // lifetimes. Thus, the maximum stack height is the number of vregs (scaled 1.244 + // by two on 32 bit platforms to allow storing double values). 1.245 + graph.setLocalSlotCount(DefaultStackSlot(graph.numVirtualRegisters())); 1.246 + 1.247 + if (!init()) 1.248 + return false; 1.249 + 1.250 + for (size_t blockIndex = 0; blockIndex < graph.numBlocks(); blockIndex++) { 1.251 + LBlock *block = graph.getBlock(blockIndex); 1.252 + JS_ASSERT(block->mir()->id() == blockIndex); 1.253 + 1.254 + for (size_t i = 0; i < registerCount; i++) 1.255 + registers[i].set(MISSING_ALLOCATION); 1.256 + 1.257 + for (LInstructionIterator iter = block->begin(); iter != block->end(); iter++) { 1.258 + LInstruction *ins = *iter; 1.259 + 1.260 + if (ins == *block->rbegin()) 1.261 + syncForBlockEnd(block, ins); 1.262 + 1.263 + allocateForInstruction(ins); 1.264 + } 1.265 + } 1.266 + 1.267 + return true; 1.268 +} 1.269 + 1.270 +void 1.271 +StupidAllocator::syncForBlockEnd(LBlock *block, LInstruction *ins) 1.272 +{ 1.273 + // Sync any dirty registers, and update the synced state for phi nodes at 1.274 + // each successor of a block. We cannot conflate the storage for phis with 1.275 + // that of their inputs, as we cannot prove the live ranges of the phi and 1.276 + // its input do not overlap. The values for the two may additionally be 1.277 + // different, as the phi could be for the value of the input in a previous 1.278 + // loop iteration. 1.279 + 1.280 + for (size_t i = 0; i < registerCount; i++) 1.281 + syncRegister(ins, i); 1.282 + 1.283 + LMoveGroup *group = nullptr; 1.284 + 1.285 + MBasicBlock *successor = block->mir()->successorWithPhis(); 1.286 + if (successor) { 1.287 + uint32_t position = block->mir()->positionInPhiSuccessor(); 1.288 + LBlock *lirsuccessor = graph.getBlock(successor->id()); 1.289 + for (size_t i = 0; i < lirsuccessor->numPhis(); i++) { 1.290 + LPhi *phi = lirsuccessor->getPhi(i); 1.291 + 1.292 + uint32_t sourcevreg = phi->getOperand(position)->toUse()->virtualRegister(); 1.293 + uint32_t destvreg = phi->getDef(0)->virtualRegister(); 1.294 + 1.295 + if (sourcevreg == destvreg) 1.296 + continue; 1.297 + 1.298 + LAllocation *source = stackLocation(sourcevreg); 1.299 + LAllocation *dest = stackLocation(destvreg); 1.300 + 1.301 + if (!group) { 1.302 + // The moves we insert here need to happen simultaneously with 1.303 + // each other, yet after any existing moves before the instruction. 1.304 + LMoveGroup *input = getInputMoveGroup(ins->id()); 1.305 + if (input->numMoves() == 0) { 1.306 + group = input; 1.307 + } else { 1.308 + group = LMoveGroup::New(alloc()); 1.309 + block->insertAfter(input, group); 1.310 + } 1.311 + } 1.312 + 1.313 + group->add(source, dest, phi->getDef(0)->type()); 1.314 + } 1.315 + } 1.316 +} 1.317 + 1.318 +void 1.319 +StupidAllocator::allocateForInstruction(LInstruction *ins) 1.320 +{ 1.321 + // Sync all registers before making a call. 1.322 + if (ins->isCall()) { 1.323 + for (size_t i = 0; i < registerCount; i++) 1.324 + syncRegister(ins, i); 1.325 + } 1.326 + 1.327 + // Allocate for inputs which are required to be in registers. 1.328 + for (LInstruction::InputIterator alloc(*ins); alloc.more(); alloc.next()) { 1.329 + if (!alloc->isUse()) 1.330 + continue; 1.331 + LUse *use = alloc->toUse(); 1.332 + uint32_t vreg = use->virtualRegister(); 1.333 + if (use->policy() == LUse::REGISTER) { 1.334 + AnyRegister reg = ensureHasRegister(ins, vreg); 1.335 + alloc.replace(LAllocation(reg)); 1.336 + } else if (use->policy() == LUse::FIXED) { 1.337 + AnyRegister reg = GetFixedRegister(virtualRegisters[vreg], use); 1.338 + RegisterIndex index = registerIndex(reg); 1.339 + if (registers[index].vreg != vreg) { 1.340 + evictRegister(ins, index); 1.341 + RegisterIndex existing = findExistingRegister(vreg); 1.342 + if (existing != UINT32_MAX) 1.343 + evictRegister(ins, existing); 1.344 + loadRegister(ins, vreg, index, virtualRegisters[vreg]->type()); 1.345 + } 1.346 + alloc.replace(LAllocation(reg)); 1.347 + } else { 1.348 + // Inputs which are not required to be in a register are not 1.349 + // allocated until after temps/definitions, as the latter may need 1.350 + // to evict registers which hold these inputs. 1.351 + } 1.352 + } 1.353 + 1.354 + // Find registers to hold all temporaries and outputs of the instruction. 1.355 + for (size_t i = 0; i < ins->numTemps(); i++) { 1.356 + LDefinition *def = ins->getTemp(i); 1.357 + if (!def->isBogusTemp()) 1.358 + allocateForDefinition(ins, def); 1.359 + } 1.360 + for (size_t i = 0; i < ins->numDefs(); i++) { 1.361 + LDefinition *def = ins->getDef(i); 1.362 + if (def->policy() != LDefinition::PASSTHROUGH) 1.363 + allocateForDefinition(ins, def); 1.364 + } 1.365 + 1.366 + // Allocate for remaining inputs which do not need to be in registers. 1.367 + for (LInstruction::InputIterator alloc(*ins); alloc.more(); alloc.next()) { 1.368 + if (!alloc->isUse()) 1.369 + continue; 1.370 + LUse *use = alloc->toUse(); 1.371 + uint32_t vreg = use->virtualRegister(); 1.372 + JS_ASSERT(use->policy() != LUse::REGISTER && use->policy() != LUse::FIXED); 1.373 + 1.374 + RegisterIndex index = findExistingRegister(vreg); 1.375 + if (index == UINT32_MAX) { 1.376 + LAllocation *stack = stackLocation(use->virtualRegister()); 1.377 + alloc.replace(*stack); 1.378 + } else { 1.379 + registers[index].age = ins->id(); 1.380 + alloc.replace(LAllocation(registers[index].reg)); 1.381 + } 1.382 + } 1.383 + 1.384 + // If this is a call, evict all registers except for those holding outputs. 1.385 + if (ins->isCall()) { 1.386 + for (size_t i = 0; i < registerCount; i++) { 1.387 + if (!registers[i].dirty) 1.388 + registers[i].set(MISSING_ALLOCATION); 1.389 + } 1.390 + } 1.391 +} 1.392 + 1.393 +void 1.394 +StupidAllocator::allocateForDefinition(LInstruction *ins, LDefinition *def) 1.395 +{ 1.396 + uint32_t vreg = def->virtualRegister(); 1.397 + 1.398 + CodePosition from; 1.399 + if ((def->output()->isRegister() && def->policy() == LDefinition::PRESET) || 1.400 + def->policy() == LDefinition::MUST_REUSE_INPUT) 1.401 + { 1.402 + // Result will be in a specific register, spill any vreg held in 1.403 + // that register before the instruction. 1.404 + RegisterIndex index = 1.405 + registerIndex(def->policy() == LDefinition::PRESET 1.406 + ? def->output()->toRegister() 1.407 + : ins->getOperand(def->getReusedInput())->toRegister()); 1.408 + evictRegister(ins, index); 1.409 + registers[index].set(vreg, ins, true); 1.410 + registers[index].type = virtualRegisters[vreg]->type(); 1.411 + def->setOutput(LAllocation(registers[index].reg)); 1.412 + } else if (def->policy() == LDefinition::PRESET) { 1.413 + // The result must be a stack location. 1.414 + def->setOutput(*stackLocation(vreg)); 1.415 + } else { 1.416 + // Find a register to hold the result of the instruction. 1.417 + RegisterIndex best = allocateRegister(ins, vreg); 1.418 + registers[best].set(vreg, ins, true); 1.419 + registers[best].type = virtualRegisters[vreg]->type(); 1.420 + def->setOutput(LAllocation(registers[best].reg)); 1.421 + } 1.422 +}