From e1df7ae6219c0b19576646969d76f72567caccdd Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 24 Jan 2018 15:55:59 +0000 Subject: [PATCH] IR: Add IR instructions A64Memory{Read,Write}128 This implementation only works on macOS and Linux. --- include/dynarmic/A64/config.h | 2 ++ src/backend_x64/a64_emit_x64.cpp | 41 ++++++++++++++++++++++++ src/frontend/A64/ir_emitter.cpp | 8 +++++ src/frontend/A64/ir_emitter.h | 2 ++ src/frontend/A64/translate/impl/impl.cpp | 9 ++++-- src/frontend/A64/translate/impl/impl.h | 4 +-- src/frontend/ir/opcodes.inc | 2 ++ src/frontend/ir/value.h | 1 + tests/A64/testenv.h | 7 ++++ 9 files changed, 72 insertions(+), 4 deletions(-) diff --git a/include/dynarmic/A64/config.h b/include/dynarmic/A64/config.h index a2ab289b..6e30bf3d 100644 --- a/include/dynarmic/A64/config.h +++ b/include/dynarmic/A64/config.h @@ -42,12 +42,14 @@ struct UserCallbacks { virtual std::uint16_t MemoryRead16(VAddr vaddr) = 0; virtual std::uint32_t MemoryRead32(VAddr vaddr) = 0; virtual std::uint64_t MemoryRead64(VAddr vaddr) = 0; + virtual Vector MemoryRead128(VAddr vaddr) = 0; // Writes through these callbacks may not be aligned. virtual void MemoryWrite8(VAddr vaddr, std::uint8_t value) = 0; virtual void MemoryWrite16(VAddr vaddr, std::uint16_t value) = 0; virtual void MemoryWrite32(VAddr vaddr, std::uint32_t value) = 0; virtual void MemoryWrite64(VAddr vaddr, std::uint64_t value) = 0; + virtual void MemoryWrite128(VAddr vaddr, Vector value) = 0; // If this callback returns true, the JIT will assume MemoryRead* callbacks will always // return the same value at any point in time for this vaddr. The JIT may use this information diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index 8c529c92..541c650f 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -319,6 +319,25 @@ void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) { }); } +void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { + DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code, [&](Xbyak::Reg64 vaddr) { + ASSERT(vaddr == code->ABI_PARAM2); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(nullptr, {}, args[0]); + }); + Xbyak::Xmm result = xmm0; + if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code->movq(result, code->ABI_RETURN); + code->pinsrq(result, code->ABI_RETURN2, 1); + } else { + Xbyak::Xmm tmp = xmm1; + code->movq(result, code->ABI_RETURN); + code->movq(tmp, code->ABI_RETURN2); + code->punpcklqdq(result, tmp); + } + ctx.reg_alloc.DefineValue(inst, result); +} + void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) { ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3); @@ -351,6 +370,28 @@ void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { }); } +void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { + DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value0, Xbyak::Reg64 value1) { + ASSERT(vaddr == code->ABI_PARAM2 && value0 == code->ABI_PARAM3 && value1 == code->ABI_PARAM4); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.Use(args[0], ABI_PARAM2); + ctx.reg_alloc.ScratchGpr({ABI_PARAM3}); + ctx.reg_alloc.ScratchGpr({ABI_PARAM4}); + if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]); + code->movq(code->ABI_PARAM3, xmm_value); + code->pextrq(code->ABI_PARAM4, xmm_value, 1); + } else { + Xbyak::Xmm xmm_value = ctx.reg_alloc.UseScratchXmm(args[1]); + code->movq(code->ABI_PARAM3, xmm_value); + code->punpckhqdq(xmm_value, xmm_value); + code->movq(code->ABI_PARAM4, xmm_value); + } + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(nullptr); + }); +} + void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) { code->SwitchMxcsrOnExit(); DEVIRT(conf.callbacks, &A64::UserCallbacks::InterpreterFallback).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) { diff --git a/src/frontend/A64/ir_emitter.cpp b/src/frontend/A64/ir_emitter.cpp index ee65cff0..e2bf70e8 100644 --- a/src/frontend/A64/ir_emitter.cpp +++ b/src/frontend/A64/ir_emitter.cpp @@ -58,6 +58,10 @@ IR::U64 IREmitter::ReadMemory64(const IR::U64& vaddr) { return Inst(Opcode::A64ReadMemory64, vaddr); } +IR::U128 IREmitter::ReadMemory128(const IR::U64& vaddr) { + return Inst(Opcode::A64ReadMemory128, vaddr); +} + void IREmitter::WriteMemory8(const IR::U64& vaddr, const IR::U8& value) { Inst(Opcode::A64WriteMemory8, vaddr, value); } @@ -74,6 +78,10 @@ void IREmitter::WriteMemory64(const IR::U64& vaddr, const IR::U64& value) { Inst(Opcode::A64WriteMemory64, vaddr, value); } +void IREmitter::WriteMemory128(const IR::U64& vaddr, const IR::U128& value) { + Inst(Opcode::A64WriteMemory128, vaddr, value); +} + IR::U32 IREmitter::GetW(Reg reg) { if (reg == Reg::ZR) return Imm32(0); diff --git a/src/frontend/A64/ir_emitter.h b/src/frontend/A64/ir_emitter.h index 24eb7a57..4bf6c9b7 100644 --- a/src/frontend/A64/ir_emitter.h +++ b/src/frontend/A64/ir_emitter.h @@ -44,10 +44,12 @@ public: IR::U16 ReadMemory16(const IR::U64& vaddr); IR::U32 ReadMemory32(const IR::U64& vaddr); IR::U64 ReadMemory64(const IR::U64& vaddr); + IR::U128 ReadMemory128(const IR::U64& vaddr); void WriteMemory8(const IR::U64& vaddr, const IR::U8& value); void WriteMemory16(const IR::U64& vaddr, const IR::U16& value); void WriteMemory32(const IR::U64& vaddr, const IR::U32& value); void WriteMemory64(const IR::U64& vaddr, const IR::U64& value); + void WriteMemory128(const IR::U64& vaddr, const IR::U128& value); IR::U32 GetW(Reg source_reg); IR::U64 GetX(Reg source_reg); diff --git a/src/frontend/A64/translate/impl/impl.cpp b/src/frontend/A64/translate/impl/impl.cpp index 1fd5d8c2..1fd54a49 100644 --- a/src/frontend/A64/translate/impl/impl.cpp +++ b/src/frontend/A64/translate/impl/impl.cpp @@ -147,7 +147,7 @@ void TranslatorVisitor::V(size_t bitsize, Vec vec, IR::U128 value) { } } -IR::UAny TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype*/) { +IR::UAnyU128 TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype*/) { switch (bytesize) { case 1: return ir.ReadMemory8(address); @@ -157,13 +157,15 @@ IR::UAny TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acct return ir.ReadMemory32(address); case 8: return ir.ReadMemory64(address); + case 16: + return ir.ReadMemory128(address); default: ASSERT_MSG(false, "Invalid bytesize parameter %zu", bytesize); return {}; } } -void TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype*/, IR::UAny value) { +void TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype*/, IR::UAnyU128 value) { switch (bytesize) { case 1: ir.WriteMemory8(address, value); @@ -177,6 +179,9 @@ void TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype* case 8: ir.WriteMemory64(address, value); return; + case 16: + ir.WriteMemory128(address, value); + return; default: ASSERT_MSG(false, "Invalid bytesize parameter %zu", bytesize); return; diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index b838e423..7e3426f5 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -51,8 +51,8 @@ struct TranslatorVisitor final { IR::U128 V(size_t bitsize, Vec vec); void V(size_t bitsize, Vec vec, IR::U128 value); - IR::UAny Mem(IR::U64 address, size_t size, AccType acctype); - void Mem(IR::U64 address, size_t size, AccType acctype, IR::UAny value); + IR::UAnyU128 Mem(IR::U64 address, size_t size, AccType acctype); + void Mem(IR::U64 address, size_t size, AccType acctype, IR::UAnyU128 value); IR::U32U64 SignExtend(IR::UAny value, size_t to_size); IR::U32U64 ZeroExtend(IR::UAny value, size_t to_size); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index f08b8c4e..da922bfc 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -237,10 +237,12 @@ A64OPC(ReadMemory8, T::U8, T::U64 A64OPC(ReadMemory16, T::U16, T::U64 ) A64OPC(ReadMemory32, T::U32, T::U64 ) A64OPC(ReadMemory64, T::U64, T::U64 ) +A64OPC(ReadMemory128, T::U128, T::U64 ) A64OPC(WriteMemory8, T::Void, T::U64, T::U8 ) A64OPC(WriteMemory16, T::Void, T::U64, T::U16 ) A64OPC(WriteMemory32, T::Void, T::U64, T::U32 ) A64OPC(WriteMemory64, T::Void, T::U64, T::U64 ) +A64OPC(WriteMemory128, T::Void, T::U64, T::U128 ) // Coprocessor A32OPC(CoprocInternalOperation, T::Void, T::CoprocInfo ) diff --git a/src/frontend/ir/value.h b/src/frontend/ir/value.h index 52809d57..f83a7fb3 100644 --- a/src/frontend/ir/value.h +++ b/src/frontend/ir/value.h @@ -100,6 +100,7 @@ using U64 = TypedValue; using U128 = TypedValue; using U32U64 = TypedValue; using UAny = TypedValue; +using UAnyU128 = TypedValue; using NZCV = TypedValue; } // namespace IR diff --git a/tests/A64/testenv.h b/tests/A64/testenv.h index e5f67c82..7ff86e8d 100644 --- a/tests/A64/testenv.h +++ b/tests/A64/testenv.h @@ -50,6 +50,9 @@ public: std::uint64_t MemoryRead64(u64 vaddr) override { return u64(MemoryRead32(vaddr)) | u64(MemoryRead32(vaddr + 4)) << 32; } + Vector MemoryRead128(u64 vaddr) override { + return {MemoryRead64(vaddr), MemoryRead64(vaddr + 8)}; + } void MemoryWrite8(u64 vaddr, std::uint8_t value) override { if (vaddr < code_mem.size() * sizeof(u32)) { @@ -69,6 +72,10 @@ public: MemoryWrite32(vaddr, static_cast(value)); MemoryWrite32(vaddr + 4, static_cast(value >> 32)); } + void MemoryWrite128(u64 vaddr, Vector value) override { + MemoryWrite64(vaddr, value[0]); + MemoryWrite64(vaddr + 4, value[1]); + } void InterpreterFallback(u64 pc, size_t num_instructions) override { ASSERT_MSG(false, "InterpreterFallback(%" PRIx64 ", %zu)", pc, num_instructions); }