From 476c0f15daec23b2f8f4c1fbb9f19b76dedf0787 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 31 Jul 2018 20:53:33 +0100 Subject: [PATCH] backend_x64: Remove all use of xmm0 --- src/backend_x64/a64_emit_x64.cpp | 40 +++++++++---------- src/backend_x64/emit_x64_aes.cpp | 5 ++- src/backend_x64/emit_x64_vector.cpp | 15 ++++--- .../emit_x64_vector_floating_point.cpp | 17 ++++---- src/backend_x64/hostloc.h | 2 +- 5 files changed, 43 insertions(+), 36 deletions(-) diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index 45e41cd5..287dfc41 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -145,18 +145,18 @@ void A64EmitX64::GenMemory128Accessors() { code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); code.lea(return_value_ptr, ptr[rsp + ABI_SHADOW_SPACE]); }); - code.movups(xmm0, xword[code.ABI_RETURN]); + code.movups(xmm1, xword[code.ABI_RETURN]); code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE); #else code.sub(rsp, 8); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code); if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { - code.movq(xmm0, code.ABI_RETURN); - code.pinsrq(xmm0, code.ABI_RETURN2, 1); + code.movq(xmm1, code.ABI_RETURN); + code.pinsrq(xmm1, code.ABI_RETURN2, 1); } else { - code.movq(xmm0, code.ABI_RETURN); - code.movq(xmm1, code.ABI_RETURN2); - code.punpcklqdq(xmm0, xmm1); + code.movq(xmm1, code.ABI_RETURN); + code.movq(xmm2, code.ABI_RETURN2); + code.punpcklqdq(xmm1, xmm2); } code.add(rsp, 8); #endif @@ -167,18 +167,18 @@ void A64EmitX64::GenMemory128Accessors() { #ifdef _WIN32 code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); - code.movaps(xword[code.ABI_PARAM3], xmm0); + code.movaps(xword[code.ABI_PARAM3], xmm1); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code); code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE); #else code.sub(rsp, 8); if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { - code.movq(code.ABI_PARAM3, xmm0); - code.pextrq(code.ABI_PARAM4, xmm0, 1); + code.movq(code.ABI_PARAM3, xmm1); + code.pextrq(code.ABI_PARAM4, xmm1, 1); } else { - code.movq(code.ABI_PARAM3, xmm0); - code.punpckhqdq(xmm0, xmm0); - code.movq(code.ABI_PARAM4, xmm0); + code.movq(code.ABI_PARAM3, xmm1); + code.punpckhqdq(xmm1, xmm1); + code.movq(code.ABI_PARAM4, xmm1); } DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code); code.add(rsp, 8); @@ -214,8 +214,8 @@ void A64EmitX64::GenFastmemFallbacks() { code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); } code.call(memory_read_128); - if (value_idx != 0) { - code.movaps(Xbyak::Xmm{value_idx}, xmm0); + if (value_idx != 1) { + code.movaps(Xbyak::Xmm{value_idx}, xmm1); } ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx)); code.ret(); @@ -226,8 +226,8 @@ void A64EmitX64::GenFastmemFallbacks() { if (vaddr_idx != code.ABI_PARAM2.getIdx()) { code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); } - if (value_idx != 0) { - code.movaps(xmm0, Xbyak::Xmm{value_idx}); + if (value_idx != 1) { + code.movaps(xmm1, Xbyak::Xmm{value_idx}); } code.call(memory_write_128); ABI_PopCallerSaveRegistersAndAdjustStack(code); @@ -780,7 +780,7 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); code.CallFunction(memory_read_128); - ctx.reg_alloc.DefineValue(inst, xmm0); + ctx.reg_alloc.DefineValue(inst, xmm1); } void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { @@ -849,7 +849,7 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.Use(args[1], HostLoc::XMM0); + ctx.reg_alloc.Use(args[1], HostLoc::XMM1); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); code.CallFunction(memory_write_128); @@ -863,7 +863,7 @@ void A64EmitX64::EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); } else { ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.Use(args[1], HostLoc::XMM0); + ctx.reg_alloc.Use(args[1], HostLoc::XMM1); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(inst); } @@ -914,7 +914,7 @@ void A64EmitX64::EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t case 128: code.sub(rsp, 16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); - code.movaps(xword[code.ABI_PARAM3], xmm0); + code.movaps(xword[code.ABI_PARAM3], xmm1); code.CallFunction(static_cast( [](A64::UserConfig& conf, u64 vaddr, A64::Vector& value) -> u32 { return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, 16, [&]{ diff --git a/src/backend_x64/emit_x64_aes.cpp b/src/backend_x64/emit_x64_aes.cpp index 175ab111..f1675271 100644 --- a/src/backend_x64/emit_x64_aes.cpp +++ b/src/backend_x64/emit_x64_aes.cpp @@ -23,6 +23,7 @@ static void EmitAESFunction(std::array args, EmitContext& ctx, Bloc IR::Inst* inst, AESFn fn) { constexpr u32 stack_space = static_cast(sizeof(AES::State)) * 2; const Xbyak::Xmm input = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); @@ -34,12 +35,12 @@ static void EmitAESFunction(std::array args, EmitContext& ctx, Bloc code.CallFunction(fn); - code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE]); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]); // Free memory code.add(rsp, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(inst, xmm0); + ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitAESDecryptSingleRound(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index aa099831..086cebfe 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -55,6 +55,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); @@ -64,11 +65,11 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins code.movaps(xword[code.ABI_PARAM2], arg1); code.CallFunction(fn); - code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.add(rsp, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(inst, xmm0); + ctx.reg_alloc.DefineValue(inst, result); } template @@ -77,6 +78,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); @@ -86,13 +88,13 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext code.movaps(xword[code.ABI_PARAM2], arg1); code.CallFunction(fn); - code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.add(rsp, stack_space + ABI_SHADOW_SPACE); code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); - ctx.reg_alloc.DefineValue(inst, xmm0); + ctx.reg_alloc.DefineValue(inst, result); } template @@ -102,6 +104,7 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); @@ -113,11 +116,11 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins code.movaps(xword[code.ABI_PARAM2], arg1); code.movaps(xword[code.ABI_PARAM3], arg2); code.CallFunction(fn); - code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.add(rsp, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(inst, xmm0); + ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index 538d5c40..220a083b 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -335,6 +335,7 @@ void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lamb auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); @@ -347,11 +348,11 @@ void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lamb code.movaps(xword[code.ABI_PARAM2], arg1); code.CallFunction(fn); - code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.add(rsp, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(inst, xmm0); + ctx.reg_alloc.DefineValue(inst, result); } template @@ -361,6 +362,7 @@ void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, La auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); @@ -388,14 +390,14 @@ void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, La code.CallFunction(fn); #ifdef _WIN32 - code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE + 1 * 16]); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 1 * 16]); #else - code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); #endif code.add(rsp, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(inst, xmm0); + ctx.reg_alloc.DefineValue(inst, result); } template @@ -443,12 +445,13 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm arg3 = ctx.reg_alloc.UseXmm(args[2]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - EmitFourOpFallbackWithoutRegAlloc(code, ctx, xmm0, arg1, arg2, arg3, lambda); + EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, arg3, lambda); - ctx.reg_alloc.DefineValue(inst, xmm0); + ctx.reg_alloc.DefineValue(inst, result); } } // anonymous namespace diff --git a/src/backend_x64/hostloc.h b/src/backend_x64/hostloc.h index 59f48a23..177e71c6 100644 --- a/src/backend_x64/hostloc.h +++ b/src/backend_x64/hostloc.h @@ -90,8 +90,8 @@ const HostLocList any_gpr = { HostLoc::R14, }; +// XMM0 is reserved for use by instructions that implicitly use it as an argument const HostLocList any_xmm = { - HostLoc::XMM0, HostLoc::XMM1, HostLoc::XMM2, HostLoc::XMM3,