reg_alloc: Explicitly specify GPR and XMM order

This allows each backend to modify what registers they want to use and their preferred orderings
This commit is contained in:
MerryMage 2020-04-08 13:19:26 +01:00
parent c232ad7971
commit 49fcfe040c
7 changed files with 62 additions and 42 deletions

View file

@ -85,7 +85,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
// Start emitting.
EmitCondPrelude(block);
RegAlloc reg_alloc{code, A32JitState::SpillCount, SpillToOpArg<A32JitState>};
RegAlloc reg_alloc{code, A32JitState::SpillCount, SpillToOpArg<A32JitState>, any_gpr, any_xmm};
A32EmitContext ctx{reg_alloc, block};
for (auto iter = block.begin(); iter != block.end(); ++iter) {
@ -841,7 +841,7 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.UseScratch(args[0], ABI_PARAM2);
const Xbyak::Reg64 vaddr = code.ABI_PARAM2;
const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr({ABI_RETURN});
const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr(ABI_RETURN);
const auto src_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr, value);
switch (bitsize) {
@ -912,7 +912,7 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
Xbyak::Label abort, end;
ctx.reg_alloc.ScratchGpr({ABI_RETURN});
ctx.reg_alloc.ScratchGpr(ABI_RETURN);
ctx.reg_alloc.UseScratch(args[0], ABI_PARAM2);
ctx.reg_alloc.UseScratch(args[1], ABI_PARAM3);

View file

@ -70,7 +70,7 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
// Start emitting.
EmitCondPrelude(block);
RegAlloc reg_alloc{code, A64JitState::SpillCount, SpillToOpArg<A64JitState>};
RegAlloc reg_alloc{code, A64JitState::SpillCount, SpillToOpArg<A64JitState>, any_gpr, any_xmm};
A64EmitContext ctx{conf, reg_alloc, block};
for (auto iter = block.begin(); iter != block.end(); ++iter) {

View file

@ -89,7 +89,7 @@ void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
ASSERT(args[0].IsImmediate());
const u64 unique_hash_of_target = args[0].GetImmediateU64();
ctx.reg_alloc.ScratchGpr({HostLoc::RCX});
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
const Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr();
@ -135,7 +135,7 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
}
}();
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr({HostLoc::RAX});
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
code.cmp(value, 0);
code.lahf();

View file

@ -122,7 +122,7 @@ void EmitX64::EmitTestBit(EmitContext& ctx, IR::Inst* inst) {
static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bitsize) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr({HostLoc::RAX}).cvt32();
const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX).cvt32();
const Xbyak::Reg then_ = ctx.reg_alloc.UseGpr(args[1]).changeBit(bitsize);
const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize);
@ -847,7 +847,7 @@ static Xbyak::Reg64 DoNZCV(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* nzc
return Xbyak::Reg64{-1};
}
const Xbyak::Reg64 nzcv = reg_alloc.ScratchGpr({HostLoc::RAX});
const Xbyak::Reg64 nzcv = reg_alloc.ScratchGpr(HostLoc::RAX);
code.xor_(nzcv.cvt32(), nzcv.cvt32());
return nzcv;
}
@ -1030,7 +1030,7 @@ void EmitX64::EmitMul64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitUnsignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.ScratchGpr({HostLoc::RDX});
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX);
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
code.mul(*op_arg);
@ -1041,7 +1041,7 @@ void EmitX64::EmitUnsignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitSignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.ScratchGpr({HostLoc::RDX});
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX);
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
code.imul(*op_arg);
@ -1052,8 +1052,8 @@ void EmitX64::EmitSignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.ScratchGpr({HostLoc::RAX});
ctx.reg_alloc.ScratchGpr({HostLoc::RDX});
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
const Xbyak::Reg32 dividend = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 divisor = ctx.reg_alloc.UseGpr(args[1]).cvt32();
@ -1073,8 +1073,8 @@ void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.ScratchGpr({HostLoc::RAX});
ctx.reg_alloc.ScratchGpr({HostLoc::RDX});
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
const Xbyak::Reg64 dividend = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 divisor = ctx.reg_alloc.UseGpr(args[1]);
@ -1094,8 +1094,8 @@ void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitSignedDiv32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.ScratchGpr({HostLoc::RAX});
ctx.reg_alloc.ScratchGpr({HostLoc::RDX});
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
const Xbyak::Reg32 dividend = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 divisor = ctx.reg_alloc.UseGpr(args[1]).cvt32();
@ -1115,8 +1115,8 @@ void EmitX64::EmitSignedDiv32(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitSignedDiv64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.ScratchGpr({HostLoc::RAX});
ctx.reg_alloc.ScratchGpr({HostLoc::RDX});
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
const Xbyak::Reg64 dividend = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 divisor = ctx.reg_alloc.UseGpr(args[1]);

View file

@ -1008,7 +1008,7 @@ void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
}
static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl
ctx.reg_alloc.ScratchGpr(HostLoc::RCX); // shifting requires use of cl
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr();
// x64 flags ARM flags

View file

@ -227,6 +227,14 @@ bool Argument::IsInMemory() const {
return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
}
RegAlloc::RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr, std::vector<HostLoc> gpr_order, std::vector<HostLoc> xmm_order)
: gpr_order(gpr_order)
, xmm_order(xmm_order)
, hostloc_info(NonSpillHostLocCount + num_spills)
, code(code)
, spill_to_addr(std::move(spill_to_addr))
{}
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
for (size_t i = 0; i < inst->NumArgs(); i++) {
@ -243,13 +251,13 @@ RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
Xbyak::Reg64 RegAlloc::UseGpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseImpl(arg.value, any_gpr));
return HostLocToReg64(UseImpl(arg.value, gpr_order));
}
Xbyak::Xmm RegAlloc::UseXmm(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToXmm(UseImpl(arg.value, any_xmm));
return HostLocToXmm(UseImpl(arg.value, xmm_order));
}
OpArg RegAlloc::UseOpArg(Argument& arg) {
@ -265,13 +273,13 @@ void RegAlloc::Use(Argument& arg, HostLoc host_loc) {
Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseScratchImpl(arg.value, any_gpr));
return HostLocToReg64(UseScratchImpl(arg.value, gpr_order));
}
Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToXmm(UseScratchImpl(arg.value, any_xmm));
return HostLocToXmm(UseScratchImpl(arg.value, xmm_order));
}
void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) {
@ -298,15 +306,23 @@ void RegAlloc::Release(const Xbyak::Reg& reg) {
LocInfo(hostloc).ReleaseOne();
}
Xbyak::Reg64 RegAlloc::ScratchGpr(HostLocList desired_locations) {
return HostLocToReg64(ScratchImpl(desired_locations));
Xbyak::Reg64 RegAlloc::ScratchGpr() {
return HostLocToReg64(ScratchImpl(gpr_order));
}
Xbyak::Xmm RegAlloc::ScratchXmm(HostLocList desired_locations) {
return HostLocToXmm(ScratchImpl(desired_locations));
Xbyak::Reg64 RegAlloc::ScratchGpr(HostLoc desired_location) {
return HostLocToReg64(ScratchImpl({desired_location}));
}
HostLoc RegAlloc::UseImpl(IR::Value use_value, HostLocList desired_locations) {
Xbyak::Xmm RegAlloc::ScratchXmm() {
return HostLocToXmm(ScratchImpl(xmm_order));
}
Xbyak::Xmm RegAlloc::ScratchXmm(HostLoc desired_location) {
return HostLocToXmm(ScratchImpl({desired_location}));
}
HostLoc RegAlloc::UseImpl(IR::Value use_value, const std::vector<HostLoc>& desired_locations) {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
}
@ -338,7 +354,7 @@ HostLoc RegAlloc::UseImpl(IR::Value use_value, HostLocList desired_locations) {
return destination_location;
}
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, HostLocList desired_locations) {
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const std::vector<HostLoc>& desired_locations) {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
}
@ -363,7 +379,7 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, HostLocList desired_locati
return destination_location;
}
HostLoc RegAlloc::ScratchImpl(HostLocList desired_locations) {
HostLoc RegAlloc::ScratchImpl(const std::vector<HostLoc>& desired_locations) {
const HostLoc location = SelectARegister(desired_locations);
MoveOutOfTheWay(location);
LocInfo(location).WriteLock();
@ -389,7 +405,7 @@ void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_r
return ret;
}();
ScratchGpr({ABI_RETURN});
ScratchGpr(ABI_RETURN);
if (result_def) {
DefineValueImpl(result_def, ABI_RETURN);
}
@ -417,7 +433,7 @@ void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_r
for (size_t i = 0; i < args_count; i++) {
if (!args[i]) {
// TODO: Force spill
ScratchGpr({args_hostloc[i]});
ScratchGpr(args_hostloc[i]);
}
}
@ -436,7 +452,7 @@ void RegAlloc::AssertNoMoreUses() {
ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) { return i.IsEmpty(); }));
}
HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const {
HostLoc RegAlloc::SelectARegister(const std::vector<HostLoc>& desired_locations) const {
std::vector<HostLoc> candidates = desired_locations;
// Find all locations that have not been allocated..
@ -475,7 +491,7 @@ void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) {
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
if (use_inst.IsImmediate()) {
const HostLoc location = ScratchImpl(any_gpr);
const HostLoc location = ScratchImpl(gpr_order);
DefineValueImpl(def_inst, location);
LoadImmediate(use_inst, location);
return;

View file

@ -97,8 +97,7 @@ class RegAlloc final {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr)
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr, std::vector<HostLoc> gpr_order, std::vector<HostLoc> xmm_order);
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
@ -116,8 +115,10 @@ public:
void Release(const Xbyak::Reg& reg);
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr);
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm);
Xbyak::Reg64 ScratchGpr();
Xbyak::Reg64 ScratchGpr(HostLoc desired_location);
Xbyak::Xmm ScratchXmm();
Xbyak::Xmm ScratchXmm(HostLoc desired_location);
void HostCall(IR::Inst* result_def = nullptr,
std::optional<Argument::copyable_reference> arg0 = {},
@ -134,12 +135,15 @@ public:
private:
friend struct Argument;
HostLoc SelectARegister(HostLocList desired_locations) const;
std::vector<HostLoc> gpr_order;
std::vector<HostLoc> xmm_order;
HostLoc SelectARegister(const std::vector<HostLoc>& desired_locations) const;
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
HostLoc UseImpl(IR::Value use_value, HostLocList desired_locations);
HostLoc UseScratchImpl(IR::Value use_value, HostLocList desired_locations);
HostLoc ScratchImpl(HostLocList desired_locations);
HostLoc UseImpl(IR::Value use_value, const std::vector<HostLoc>& desired_locations);
HostLoc UseScratchImpl(IR::Value use_value, const std::vector<HostLoc>& desired_locations);
HostLoc ScratchImpl(const std::vector<HostLoc>& desired_locations);
void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc);
void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst);