a64_emit_x64: Implement {Read,Write}Memory128 in terms of a function call

This commit is contained in:
MerryMage 2018-02-12 18:18:47 +00:00
parent 6c4773e85b
commit 9f2f08db8d
2 changed files with 59 additions and 64 deletions

View file

@ -4,9 +4,6 @@
* General Public License version 2 or any later version.
*/
#include <unordered_map>
#include <unordered_set>
#include <fmt/ostream.h>
#include "backend_x64/a64_emit_x64.h"
@ -55,6 +52,7 @@ bool A64EmitContext::FPSCR_DN() const {
A64EmitX64::A64EmitX64(BlockOfCode& code, A64::UserConfig conf)
: EmitX64(code), conf(conf)
{
GenMemory128Accessors();
code.PreludeComplete();
}
@ -126,6 +124,56 @@ void A64EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u64>& rang
InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges));
}
void A64EmitX64::GenMemory128Accessors() {
code.align();
memory_read_128 = code.getCurr<void(*)()>();
#ifdef _WIN32
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCallWithReturnPointer(code, [&](Xbyak::Reg64 return_value_ptr, RegList args) {
code.mov(code.ABI_PARAM3, code.ABI_PARAM2);
code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE);
code.lea(return_value_ptr, ptr[rsp + ABI_SHADOW_SPACE]);
});
code.movups(xmm0, xword[code.ABI_RETURN]);
code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE);
#else
code.sub(rsp, 8);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code);
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
code.movq(xmm0, code.ABI_RETURN);
code.pinsrq(xmm0, code.ABI_RETURN2, 1);
} else {
code.movq(xmm0, code.ABI_RETURN);
code.movq(xmm1, code.ABI_RETURN2);
code.punpcklqdq(xmm0, xmm1);
}
code.add(rsp, 8);
#endif
code.ret();
code.align();
memory_write_128 = code.getCurr<void(*)()>();
#ifdef _WIN32
code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
code.movaps(xword[code.ABI_PARAM3], xmm0);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code);
code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE);
#else
code.sub(rsp, 8);
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
code.movq(code.ABI_PARAM3, xmm0);
code.pextrq(code.ABI_PARAM4, xmm0, 1);
} else {
code.movq(code.ABI_PARAM3, xmm0);
code.punpckhqdq(xmm0, xmm0);
code.movq(code.ABI_PARAM4, xmm0);
}
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code);
code.add(rsp, 8);
#endif
code.ret();
}
void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
@ -366,39 +414,10 @@ void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) {
}
void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
#ifdef _WIN32
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
static_assert(ABI_SHADOW_SPACE >= 16);
ctx.reg_alloc.HostCall(nullptr, {}, {}, args[0]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCallWithReturnPointer(code, [&](Xbyak::Reg64 return_value_ptr, RegList) {
code.lea(return_value_ptr, ptr[rsp]);
code.sub(rsp, ABI_SHADOW_SPACE);
});
Xbyak::Xmm result = xmm0;
code.movups(result, xword[code.ABI_RETURN]);
code.add(rsp, ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
#else
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code);
Xbyak::Xmm result = xmm0;
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
code.movq(result, code.ABI_RETURN);
code.pinsrq(result, code.ABI_RETURN2, 1);
} else {
Xbyak::Xmm tmp = xmm1;
code.movq(result, code.ABI_RETURN);
code.movq(tmp, code.ABI_RETURN2);
code.punpcklqdq(result, tmp);
}
ctx.reg_alloc.DefineValue(inst, result);
#endif
code.CallFunction(memory_read_128);
ctx.reg_alloc.DefineValue(inst, xmm0);
}
void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
@ -426,40 +445,12 @@ void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) {
}
void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
#ifdef _WIN32
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
static_assert(ABI_SHADOW_SPACE >= 16);
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
code.lea(code.ABI_PARAM3, ptr[rsp]);
code.sub(rsp, ABI_SHADOW_SPACE);
code.movaps(xword[code.ABI_PARAM3], xmm_value);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code);
code.add(rsp, ABI_SHADOW_SPACE);
#else
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
ctx.reg_alloc.ScratchGpr({ABI_PARAM3});
ctx.reg_alloc.ScratchGpr({ABI_PARAM4});
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]);
code.movq(code.ABI_PARAM3, xmm_value);
code.pextrq(code.ABI_PARAM4, xmm_value, 1);
} else {
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseScratchXmm(args[1]);
code.movq(code.ABI_PARAM3, xmm_value);
code.punpckhqdq(xmm_value, xmm_value);
code.movq(code.ABI_PARAM4, xmm_value);
}
ctx.reg_alloc.Use(args[1], HostLoc::XMM0);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code);
#endif
code.CallFunction(memory_write_128);
}
void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) {

View file

@ -44,6 +44,10 @@ protected:
const A64::UserConfig conf;
BlockRangeInformation<u64> block_ranges;
void (*memory_read_128)();
void (*memory_write_128)();
void GenMemory128Accessors();
// Microinstruction emitters
#define OPCODE(...)
#define A32OPC(...)