From 5ebe11c329e5f6d378edb8c70dcc69205d1d8b2f Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 7 May 2021 11:29:57 +0100 Subject: [PATCH] reg_alloc: Inform RegAlloc about rsp changes --- src/backend/x64/a64_emit_x64.cpp | 8 +++---- src/backend/x64/emit_x64_aes.cpp | 5 +++-- src/backend/x64/emit_x64_floating_point.cpp | 4 ++-- src/backend/x64/emit_x64_vector.cpp | 24 ++++++++++----------- src/backend/x64/hostloc.cpp | 10 --------- src/backend/x64/hostloc.h | 1 - src/backend/x64/reg_alloc.cpp | 24 +++++++++++++++++++++ src/backend/x64/reg_alloc.h | 6 ++++++ 8 files changed, 51 insertions(+), 31 deletions(-) diff --git a/src/backend/x64/a64_emit_x64.cpp b/src/backend/x64/a64_emit_x64.cpp index a7cef22e..6ce98f89 100644 --- a/src/backend/x64/a64_emit_x64.cpp +++ b/src/backend/x64/a64_emit_x64.cpp @@ -1101,7 +1101,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) { code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - code.sub(rsp, 16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.CallLambda( [](A64::UserConfig& conf, u64 vaddr, A64::Vector& ret) { @@ -1111,7 +1111,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) { } ); code.movups(result, xword[rsp + ABI_SHADOW_SPACE]); - code.add(rsp, 16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.DefineValue(inst, result); } @@ -1170,7 +1170,7 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) { } ); } else { - code.sub(rsp, 16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.movaps(xword[code.ABI_PARAM3], xmm1); code.CallLambda( @@ -1181,7 +1181,7 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) { }) ? 0 : 1; } ); - code.add(rsp, 16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); } code.L(end); } diff --git a/src/backend/x64/emit_x64_aes.cpp b/src/backend/x64/emit_x64_aes.cpp index 96540cf1..8bf06e48 100644 --- a/src/backend/x64/emit_x64_aes.cpp +++ b/src/backend/x64/emit_x64_aes.cpp @@ -24,7 +24,8 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + + ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + sizeof(AES::State)]); @@ -32,7 +33,7 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block code.CallFunction(fn); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]); - code.add(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.DefineValue(inst, result); } diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index 000a0477..07e647c0 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -671,11 +671,11 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0], args[1], args[2]); code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); #ifdef _WIN32 - code.sub(rsp, 16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); code.CallFunction(&FP::FPMulAdd); - code.add(rsp, 16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); #else code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPMulAdd); diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp index bc27209c..09aeb1fe 100644 --- a/src/backend/x64/emit_x64_vector.cpp +++ b/src/backend/x64/emit_x64_vector.cpp @@ -59,7 +59,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); @@ -67,7 +67,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins code.CallFunction(fn); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); - code.add(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.DefineValue(inst, result); } @@ -82,7 +82,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); @@ -90,7 +90,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext code.CallFunction(fn); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); - code.add(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); @@ -108,7 +108,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); @@ -118,7 +118,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext code.CallFunction(fn); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); - code.add(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); @@ -136,7 +136,7 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); @@ -146,7 +146,7 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins code.CallFunction(fn); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); - code.add(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.DefineValue(inst, result); } @@ -4272,7 +4272,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } const u32 stack_space = static_cast(6 * 8); - code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); for (size_t i = 0; i < table_size; ++i) { const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]); code.movq(qword[rsp + ABI_SHADOW_SPACE + i * 8], table_value); @@ -4304,7 +4304,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { ); code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]); - code.add(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.DefineValue(inst, result); } @@ -4402,7 +4402,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { } const u32 stack_space = static_cast((table_size + 2) * 16); - code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); for (size_t i = 0; i < table_size; ++i) { const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]); code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value); @@ -4434,7 +4434,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]); - code.add(rsp, stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.DefineValue(inst, result); } diff --git a/src/backend/x64/hostloc.cpp b/src/backend/x64/hostloc.cpp index dae8bf29..fd214e36 100644 --- a/src/backend/x64/hostloc.cpp +++ b/src/backend/x64/hostloc.cpp @@ -21,14 +21,4 @@ Xbyak::Xmm HostLocToXmm(HostLoc loc) { return Xbyak::Xmm(static_cast(loc) - static_cast(HostLoc::XMM0)); } -Xbyak::Address SpillToOpArg(HostLoc loc) { - ASSERT(HostLocIsSpill(loc)); - - size_t i = static_cast(loc) - static_cast(HostLoc::FirstSpill); - ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations"); - - using namespace Xbyak::util; - return xword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(u64) * 2]; -} - } // namespace Dynarmic::Backend::X64 diff --git a/src/backend/x64/hostloc.h b/src/backend/x64/hostloc.h index fe8169ac..0170e626 100644 --- a/src/backend/x64/hostloc.h +++ b/src/backend/x64/hostloc.h @@ -110,6 +110,5 @@ const HostLocList any_xmm = { Xbyak::Reg64 HostLocToReg64(HostLoc loc); Xbyak::Xmm HostLocToXmm(HostLoc loc); -Xbyak::Address SpillToOpArg(HostLoc loc); } // namespace Dynarmic::Backend::X64 diff --git a/src/backend/x64/reg_alloc.cpp b/src/backend/x64/reg_alloc.cpp index 0ab77a8c..b66131a6 100644 --- a/src/backend/x64/reg_alloc.cpp +++ b/src/backend/x64/reg_alloc.cpp @@ -438,6 +438,20 @@ void RegAlloc::HostCall(IR::Inst* result_def, std::optional(std::numeric_limits::max())); + ASSERT(reserved_stack_space == 0); + reserved_stack_space = stack_space; + code.sub(code.rsp, static_cast(stack_space)); +} + +void RegAlloc::ReleaseStackSpace(size_t stack_space) { + ASSERT(stack_space < static_cast(std::numeric_limits::max())); + ASSERT(reserved_stack_space == stack_space); + reserved_stack_space = 0; + code.add(code.rsp, static_cast(stack_space)); +} + void RegAlloc::EndOfAllocScope() { for (auto& iter : hostloc_info) { iter.ReleaseAll(); @@ -693,4 +707,14 @@ void RegAlloc::EmitExchange(HostLoc a, HostLoc b) { } } +Xbyak::Address RegAlloc::SpillToOpArg(HostLoc loc) { + ASSERT(HostLocIsSpill(loc)); + + size_t i = static_cast(loc) - static_cast(HostLoc::FirstSpill); + ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations"); + + using namespace Xbyak::util; + return xword[rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(u64) * 2]; +} + } // namespace Dynarmic::Backend::X64 diff --git a/src/backend/x64/reg_alloc.h b/src/backend/x64/reg_alloc.h index 04f6987b..ba0bdaa1 100644 --- a/src/backend/x64/reg_alloc.h +++ b/src/backend/x64/reg_alloc.h @@ -127,6 +127,9 @@ public: // TODO: Values in host flags + void AllocStackSpace(size_t stack_space); + void ReleaseStackSpace(size_t stack_space); + void EndOfAllocScope(); void AssertNoMoreUses(); @@ -160,8 +163,11 @@ private: const HostLocInfo& LocInfo(HostLoc loc) const; BlockOfCode& code; + size_t reserved_stack_space = 0; void EmitMove(size_t bit_width, HostLoc to, HostLoc from); void EmitExchange(HostLoc a, HostLoc b); + + Xbyak::Address SpillToOpArg(HostLoc loc); }; } // namespace Dynarmic::Backend::X64