diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index fefb4140..412943b3 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -4,9 +4,6 @@ * General Public License version 2 or any later version. */ -#include -#include - #include #include "backend_x64/a64_emit_x64.h" @@ -55,6 +52,7 @@ bool A64EmitContext::FPSCR_DN() const { A64EmitX64::A64EmitX64(BlockOfCode& code, A64::UserConfig conf) : EmitX64(code), conf(conf) { + GenMemory128Accessors(); code.PreludeComplete(); } @@ -126,6 +124,56 @@ void A64EmitX64::InvalidateCacheRanges(const boost::icl::interval_set& rang InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges)); } +void A64EmitX64::GenMemory128Accessors() { + code.align(); + memory_read_128 = code.getCurr(); +#ifdef _WIN32 + DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCallWithReturnPointer(code, [&](Xbyak::Reg64 return_value_ptr, RegList args) { + code.mov(code.ABI_PARAM3, code.ABI_PARAM2); + code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); + code.lea(return_value_ptr, ptr[rsp + ABI_SHADOW_SPACE]); + }); + code.movups(xmm0, xword[code.ABI_RETURN]); + code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE); +#else + code.sub(rsp, 8); + DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code.movq(xmm0, code.ABI_RETURN); + code.pinsrq(xmm0, code.ABI_RETURN2, 1); + } else { + code.movq(xmm0, code.ABI_RETURN); + code.movq(xmm1, code.ABI_RETURN2); + code.punpcklqdq(xmm0, xmm1); + } + code.add(rsp, 8); +#endif + code.ret(); + + code.align(); + memory_write_128 = code.getCurr(); +#ifdef _WIN32 + code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); + code.movaps(xword[code.ABI_PARAM3], xmm0); + DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code); + code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE); +#else + code.sub(rsp, 8); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code.movq(code.ABI_PARAM3, xmm0); + code.pextrq(code.ABI_PARAM4, xmm0, 1); + } else { + code.movq(code.ABI_PARAM3, xmm0); + code.punpckhqdq(xmm0, xmm0); + code.movq(code.ABI_PARAM4, xmm0); + } + DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code); + code.add(rsp, 8); +#endif + code.ret(); +} + void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); @@ -366,39 +414,10 @@ void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) { } void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { -#ifdef _WIN32 - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - static_assert(ABI_SHADOW_SPACE >= 16); - ctx.reg_alloc.HostCall(nullptr, {}, {}, args[0]); - - DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCallWithReturnPointer(code, [&](Xbyak::Reg64 return_value_ptr, RegList) { - code.lea(return_value_ptr, ptr[rsp]); - code.sub(rsp, ABI_SHADOW_SPACE); - }); - - Xbyak::Xmm result = xmm0; - code.movups(result, xword[code.ABI_RETURN]); - code.add(rsp, ABI_SHADOW_SPACE); - - ctx.reg_alloc.DefineValue(inst, result); -#else auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); - DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code); - - Xbyak::Xmm result = xmm0; - if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { - code.movq(result, code.ABI_RETURN); - code.pinsrq(result, code.ABI_RETURN2, 1); - } else { - Xbyak::Xmm tmp = xmm1; - code.movq(result, code.ABI_RETURN); - code.movq(tmp, code.ABI_RETURN2); - code.punpcklqdq(result, tmp); - } - ctx.reg_alloc.DefineValue(inst, result); -#endif + code.CallFunction(memory_read_128); + ctx.reg_alloc.DefineValue(inst, xmm0); } void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { @@ -426,40 +445,12 @@ void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { } void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { -#ifdef _WIN32 - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - static_assert(ABI_SHADOW_SPACE >= 16); - ctx.reg_alloc.Use(args[0], ABI_PARAM2); - Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]); - ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); - code.lea(code.ABI_PARAM3, ptr[rsp]); - code.sub(rsp, ABI_SHADOW_SPACE); - code.movaps(xword[code.ABI_PARAM3], xmm_value); - - DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code); - - code.add(rsp, ABI_SHADOW_SPACE); -#else auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.ScratchGpr({ABI_PARAM3}); - ctx.reg_alloc.ScratchGpr({ABI_PARAM4}); - if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { - Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]); - code.movq(code.ABI_PARAM3, xmm_value); - code.pextrq(code.ABI_PARAM4, xmm_value, 1); - } else { - Xbyak::Xmm xmm_value = ctx.reg_alloc.UseScratchXmm(args[1]); - code.movq(code.ABI_PARAM3, xmm_value); - code.punpckhqdq(xmm_value, xmm_value); - code.movq(code.ABI_PARAM4, xmm_value); - } + ctx.reg_alloc.Use(args[1], HostLoc::XMM0); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code); -#endif + code.CallFunction(memory_write_128); } void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) { diff --git a/src/backend_x64/a64_emit_x64.h b/src/backend_x64/a64_emit_x64.h index 7094d412..1ac71438 100644 --- a/src/backend_x64/a64_emit_x64.h +++ b/src/backend_x64/a64_emit_x64.h @@ -44,6 +44,10 @@ protected: const A64::UserConfig conf; BlockRangeInformation block_ranges; + void (*memory_read_128)(); + void (*memory_write_128)(); + void GenMemory128Accessors(); + // Microinstruction emitters #define OPCODE(...) #define A32OPC(...)