From 49fcfe040c4afa9fc17d1e94b3f7750ad3e9b1ac Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 8 Apr 2020 13:19:26 +0100 Subject: [PATCH] reg_alloc: Explicitly specify GPR and XMM order This allows each backend to modify what registers they want to use and their preferred orderings --- src/backend/x64/a32_emit_x64.cpp | 6 +-- src/backend/x64/a64_emit_x64.cpp | 2 +- src/backend/x64/emit_x64.cpp | 4 +- src/backend/x64/emit_x64_data_processing.cpp | 24 +++++----- src/backend/x64/emit_x64_floating_point.cpp | 2 +- src/backend/x64/reg_alloc.cpp | 46 +++++++++++++------- src/backend/x64/reg_alloc.h | 20 +++++---- 7 files changed, 62 insertions(+), 42 deletions(-) diff --git a/src/backend/x64/a32_emit_x64.cpp b/src/backend/x64/a32_emit_x64.cpp index 61ec3c57..fd00fcc0 100644 --- a/src/backend/x64/a32_emit_x64.cpp +++ b/src/backend/x64/a32_emit_x64.cpp @@ -85,7 +85,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { // Start emitting. EmitCondPrelude(block); - RegAlloc reg_alloc{code, A32JitState::SpillCount, SpillToOpArg}; + RegAlloc reg_alloc{code, A32JitState::SpillCount, SpillToOpArg, any_gpr, any_xmm}; A32EmitContext ctx{reg_alloc, block}; for (auto iter = block.begin(); iter != block.end(); ++iter) { @@ -841,7 +841,7 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.UseScratch(args[0], ABI_PARAM2); const Xbyak::Reg64 vaddr = code.ABI_PARAM2; - const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr({ABI_RETURN}); + const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr(ABI_RETURN); const auto src_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr, value); switch (bitsize) { @@ -912,7 +912,7 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Label abort, end; - ctx.reg_alloc.ScratchGpr({ABI_RETURN}); + ctx.reg_alloc.ScratchGpr(ABI_RETURN); ctx.reg_alloc.UseScratch(args[0], ABI_PARAM2); ctx.reg_alloc.UseScratch(args[1], ABI_PARAM3); diff --git a/src/backend/x64/a64_emit_x64.cpp b/src/backend/x64/a64_emit_x64.cpp index e0c06cb2..e4ff3bad 100644 --- a/src/backend/x64/a64_emit_x64.cpp +++ b/src/backend/x64/a64_emit_x64.cpp @@ -70,7 +70,7 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { // Start emitting. EmitCondPrelude(block); - RegAlloc reg_alloc{code, A64JitState::SpillCount, SpillToOpArg}; + RegAlloc reg_alloc{code, A64JitState::SpillCount, SpillToOpArg, any_gpr, any_xmm}; A64EmitContext ctx{conf, reg_alloc, block}; for (auto iter = block.begin(); iter != block.end(); ++iter) { diff --git a/src/backend/x64/emit_x64.cpp b/src/backend/x64/emit_x64.cpp index 2bc269c3..82ae3672 100644 --- a/src/backend/x64/emit_x64.cpp +++ b/src/backend/x64/emit_x64.cpp @@ -89,7 +89,7 @@ void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) { ASSERT(args[0].IsImmediate()); const u64 unique_hash_of_target = args[0].GetImmediateU64(); - ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); + ctx.reg_alloc.ScratchGpr(HostLoc::RCX); const Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr(); const Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr(); @@ -135,7 +135,7 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { } }(); - const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr({HostLoc::RAX}); + const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX); const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize); code.cmp(value, 0); code.lahf(); diff --git a/src/backend/x64/emit_x64_data_processing.cpp b/src/backend/x64/emit_x64_data_processing.cpp index 64bc3c45..8a640ff8 100644 --- a/src/backend/x64/emit_x64_data_processing.cpp +++ b/src/backend/x64/emit_x64_data_processing.cpp @@ -122,7 +122,7 @@ void EmitX64::EmitTestBit(EmitContext& ctx, IR::Inst* inst) { static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bitsize) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr({HostLoc::RAX}).cvt32(); + const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX).cvt32(); const Xbyak::Reg then_ = ctx.reg_alloc.UseGpr(args[1]).changeBit(bitsize); const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize); @@ -847,7 +847,7 @@ static Xbyak::Reg64 DoNZCV(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* nzc return Xbyak::Reg64{-1}; } - const Xbyak::Reg64 nzcv = reg_alloc.ScratchGpr({HostLoc::RAX}); + const Xbyak::Reg64 nzcv = reg_alloc.ScratchGpr(HostLoc::RAX); code.xor_(nzcv.cvt32(), nzcv.cvt32()); return nzcv; } @@ -1030,7 +1030,7 @@ void EmitX64::EmitMul64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitUnsignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr({HostLoc::RDX}); + ctx.reg_alloc.ScratchGpr(HostLoc::RDX); ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX); OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); code.mul(*op_arg); @@ -1041,7 +1041,7 @@ void EmitX64::EmitUnsignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitSignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr({HostLoc::RDX}); + ctx.reg_alloc.ScratchGpr(HostLoc::RDX); ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX); OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); code.imul(*op_arg); @@ -1052,8 +1052,8 @@ void EmitX64::EmitSignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr({HostLoc::RAX}); - ctx.reg_alloc.ScratchGpr({HostLoc::RDX}); + ctx.reg_alloc.ScratchGpr(HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(HostLoc::RDX); const Xbyak::Reg32 dividend = ctx.reg_alloc.UseGpr(args[0]).cvt32(); const Xbyak::Reg32 divisor = ctx.reg_alloc.UseGpr(args[1]).cvt32(); @@ -1073,8 +1073,8 @@ void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr({HostLoc::RAX}); - ctx.reg_alloc.ScratchGpr({HostLoc::RDX}); + ctx.reg_alloc.ScratchGpr(HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(HostLoc::RDX); const Xbyak::Reg64 dividend = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 divisor = ctx.reg_alloc.UseGpr(args[1]); @@ -1094,8 +1094,8 @@ void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitSignedDiv32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr({HostLoc::RAX}); - ctx.reg_alloc.ScratchGpr({HostLoc::RDX}); + ctx.reg_alloc.ScratchGpr(HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(HostLoc::RDX); const Xbyak::Reg32 dividend = ctx.reg_alloc.UseGpr(args[0]).cvt32(); const Xbyak::Reg32 divisor = ctx.reg_alloc.UseGpr(args[1]).cvt32(); @@ -1115,8 +1115,8 @@ void EmitX64::EmitSignedDiv32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitSignedDiv64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr({HostLoc::RAX}); - ctx.reg_alloc.ScratchGpr({HostLoc::RDX}); + ctx.reg_alloc.ScratchGpr(HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(HostLoc::RDX); const Xbyak::Reg64 dividend = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 divisor = ctx.reg_alloc.UseGpr(args[1]); diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index 8edbad21..2ffb346b 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -1008,7 +1008,7 @@ void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) { } static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) { - ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl + ctx.reg_alloc.ScratchGpr(HostLoc::RCX); // shifting requires use of cl const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(); // x64 flags ARM flags diff --git a/src/backend/x64/reg_alloc.cpp b/src/backend/x64/reg_alloc.cpp index 3c042f5a..73c7b813 100644 --- a/src/backend/x64/reg_alloc.cpp +++ b/src/backend/x64/reg_alloc.cpp @@ -227,6 +227,14 @@ bool Argument::IsInMemory() const { return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst())); } +RegAlloc::RegAlloc(BlockOfCode& code, size_t num_spills, std::function spill_to_addr, std::vector gpr_order, std::vector xmm_order) + : gpr_order(gpr_order) + , xmm_order(xmm_order) + , hostloc_info(NonSpillHostLocCount + num_spills) + , code(code) + , spill_to_addr(std::move(spill_to_addr)) +{} + RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) { ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}}; for (size_t i = 0; i < inst->NumArgs(); i++) { @@ -243,13 +251,13 @@ RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) { Xbyak::Reg64 RegAlloc::UseGpr(Argument& arg) { ASSERT(!arg.allocated); arg.allocated = true; - return HostLocToReg64(UseImpl(arg.value, any_gpr)); + return HostLocToReg64(UseImpl(arg.value, gpr_order)); } Xbyak::Xmm RegAlloc::UseXmm(Argument& arg) { ASSERT(!arg.allocated); arg.allocated = true; - return HostLocToXmm(UseImpl(arg.value, any_xmm)); + return HostLocToXmm(UseImpl(arg.value, xmm_order)); } OpArg RegAlloc::UseOpArg(Argument& arg) { @@ -265,13 +273,13 @@ void RegAlloc::Use(Argument& arg, HostLoc host_loc) { Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) { ASSERT(!arg.allocated); arg.allocated = true; - return HostLocToReg64(UseScratchImpl(arg.value, any_gpr)); + return HostLocToReg64(UseScratchImpl(arg.value, gpr_order)); } Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) { ASSERT(!arg.allocated); arg.allocated = true; - return HostLocToXmm(UseScratchImpl(arg.value, any_xmm)); + return HostLocToXmm(UseScratchImpl(arg.value, xmm_order)); } void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) { @@ -298,15 +306,23 @@ void RegAlloc::Release(const Xbyak::Reg& reg) { LocInfo(hostloc).ReleaseOne(); } -Xbyak::Reg64 RegAlloc::ScratchGpr(HostLocList desired_locations) { - return HostLocToReg64(ScratchImpl(desired_locations)); +Xbyak::Reg64 RegAlloc::ScratchGpr() { + return HostLocToReg64(ScratchImpl(gpr_order)); } -Xbyak::Xmm RegAlloc::ScratchXmm(HostLocList desired_locations) { - return HostLocToXmm(ScratchImpl(desired_locations)); +Xbyak::Reg64 RegAlloc::ScratchGpr(HostLoc desired_location) { + return HostLocToReg64(ScratchImpl({desired_location})); } -HostLoc RegAlloc::UseImpl(IR::Value use_value, HostLocList desired_locations) { +Xbyak::Xmm RegAlloc::ScratchXmm() { + return HostLocToXmm(ScratchImpl(xmm_order)); +} + +Xbyak::Xmm RegAlloc::ScratchXmm(HostLoc desired_location) { + return HostLocToXmm(ScratchImpl({desired_location})); +} + +HostLoc RegAlloc::UseImpl(IR::Value use_value, const std::vector& desired_locations) { if (use_value.IsImmediate()) { return LoadImmediate(use_value, ScratchImpl(desired_locations)); } @@ -338,7 +354,7 @@ HostLoc RegAlloc::UseImpl(IR::Value use_value, HostLocList desired_locations) { return destination_location; } -HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, HostLocList desired_locations) { +HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const std::vector& desired_locations) { if (use_value.IsImmediate()) { return LoadImmediate(use_value, ScratchImpl(desired_locations)); } @@ -363,7 +379,7 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, HostLocList desired_locati return destination_location; } -HostLoc RegAlloc::ScratchImpl(HostLocList desired_locations) { +HostLoc RegAlloc::ScratchImpl(const std::vector& desired_locations) { const HostLoc location = SelectARegister(desired_locations); MoveOutOfTheWay(location); LocInfo(location).WriteLock(); @@ -389,7 +405,7 @@ void RegAlloc::HostCall(IR::Inst* result_def, std::optional& desired_locations) const { std::vector candidates = desired_locations; // Find all locations that have not been allocated.. @@ -475,7 +491,7 @@ void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) { ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); if (use_inst.IsImmediate()) { - const HostLoc location = ScratchImpl(any_gpr); + const HostLoc location = ScratchImpl(gpr_order); DefineValueImpl(def_inst, location); LoadImmediate(use_inst, location); return; diff --git a/src/backend/x64/reg_alloc.h b/src/backend/x64/reg_alloc.h index 3edfa263..341dfe66 100644 --- a/src/backend/x64/reg_alloc.h +++ b/src/backend/x64/reg_alloc.h @@ -97,8 +97,7 @@ class RegAlloc final { public: using ArgumentInfo = std::array; - explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function spill_to_addr) - : hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {} + explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function spill_to_addr, std::vector gpr_order, std::vector xmm_order); ArgumentInfo GetArgumentInfo(IR::Inst* inst); @@ -116,8 +115,10 @@ public: void Release(const Xbyak::Reg& reg); - Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr); - Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm); + Xbyak::Reg64 ScratchGpr(); + Xbyak::Reg64 ScratchGpr(HostLoc desired_location); + Xbyak::Xmm ScratchXmm(); + Xbyak::Xmm ScratchXmm(HostLoc desired_location); void HostCall(IR::Inst* result_def = nullptr, std::optional arg0 = {}, @@ -134,12 +135,15 @@ public: private: friend struct Argument; - HostLoc SelectARegister(HostLocList desired_locations) const; + std::vector gpr_order; + std::vector xmm_order; + + HostLoc SelectARegister(const std::vector& desired_locations) const; std::optional ValueLocation(const IR::Inst* value) const; - HostLoc UseImpl(IR::Value use_value, HostLocList desired_locations); - HostLoc UseScratchImpl(IR::Value use_value, HostLocList desired_locations); - HostLoc ScratchImpl(HostLocList desired_locations); + HostLoc UseImpl(IR::Value use_value, const std::vector& desired_locations); + HostLoc UseScratchImpl(IR::Value use_value, const std::vector& desired_locations); + HostLoc ScratchImpl(const std::vector& desired_locations); void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc); void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst);