diff --git a/include/dynarmic/A64/a64.h b/include/dynarmic/A64/a64.h index 26238583..0e3808ff 100644 --- a/include/dynarmic/A64/a64.h +++ b/include/dynarmic/A64/a64.h @@ -90,6 +90,11 @@ public: /// Modify FPCR. void SetFpcr(std::uint32_t value); + /// View FPSR. + std::uint32_t GetFpsr() const; + /// Modify FPSR. + void SetFpsr(std::uint32_t value); + /// View PSTATE std::uint32_t GetPstate() const; /// Modify PSTATE diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index b229a188..4df1f142 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -347,6 +347,23 @@ void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void A64EmitX64::EmitA64GetFPCR(A64EmitContext& ctx, IR::Inst* inst) { + Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); + code.mov(result, qword[r15 + offsetof(A64JitState, fpcr)]); + ctx.reg_alloc.DefineValue(inst, result); +} + +static u32 GetFPSRImpl(A64JitState* jit_state) { + return jit_state->GetFpsr(); +} + +void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) { + ctx.reg_alloc.HostCall(inst); + code.mov(code.ABI_PARAM1, code.r15); + code.stmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); + code.CallFunction(GetFPSRImpl); +} + void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); A64::Reg reg = inst->GetArg(0).GetA64RegRef(); @@ -422,6 +439,30 @@ void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) { } } +static void SetFPCRImpl(A64JitState* jit_state, u32 value) { + jit_state->SetFpcr(value); +} + +void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(nullptr, {}, args[0]); + code.mov(code.ABI_PARAM1, code.r15); + code.CallFunction(SetFPCRImpl); + code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); +} + +static void SetFPSRImpl(A64JitState* jit_state, u32 value) { + jit_state->SetFpsr(value); +} + +void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(nullptr, {}, args[0]); + code.mov(code.ABI_PARAM1, code.r15); + code.CallFunction(SetFPSRImpl); + code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); +} + void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto addr = qword[r15 + offsetof(A64JitState, pc)]; diff --git a/src/backend_x64/a64_interface.cpp b/src/backend_x64/a64_interface.cpp index 46e5e470..0a464cac 100644 --- a/src/backend_x64/a64_interface.cpp +++ b/src/backend_x64/a64_interface.cpp @@ -151,6 +151,14 @@ public: jit_state.SetFpcr(value); } + u32 GetFpsr() const { + return jit_state.GetFpsr(); + } + + void SetFpsr(u32 value) { + jit_state.SetFpsr(value); + } + u32 GetPstate() const { return jit_state.GetPstate(); } @@ -314,6 +322,14 @@ void Jit::SetFpcr(u32 value) { impl->SetFpcr(value); } +u32 Jit::GetFpsr() const { + return impl->GetFpsr(); +} + +void Jit::SetFpsr(u32 value) { + impl->SetFpsr(value); +} + u32 Jit::GetPstate() const { return impl->GetPstate(); } diff --git a/src/backend_x64/a64_jitstate.cpp b/src/backend_x64/a64_jitstate.cpp index a9b40c33..d9106e0f 100644 --- a/src/backend_x64/a64_jitstate.cpp +++ b/src/backend_x64/a64_jitstate.cpp @@ -5,6 +5,7 @@ */ #include "backend_x64/a64_jitstate.h" +#include "common/bit_util.h" #include "frontend/A64/location_descriptor.h" namespace Dynarmic::BackendX64 { @@ -15,4 +16,103 @@ u64 A64JitState::GetUniqueHash() const { return pc_u64 | fpcr_u64; } +/** + * Comparing MXCSR and FPCR + * ======================== + * + * SSE MSCSR exception masks + * ------------------------- + * PM bit 12 Precision Mask + * UM bit 11 Underflow Mask + * OM bit 10 Overflow Mask + * ZM bit 9 Divide By Zero Mask + * DM bit 8 Denormal Mask + * IM bit 7 Invalid Operation Mask + * + * A64 FPCR exception trap enables + * ------------------------------- + * IDE bit 15 Input Denormal exception trap enable + * IXE bit 12 Inexact exception trap enable + * UFE bit 11 Underflow exception trap enable + * OFE bit 10 Overflow exception trap enable + * DZE bit 9 Division by Zero exception trap enable + * IOE bit 8 Invalid Operation exception trap enable + * + * SSE MXCSR mode bits + * ------------------- + * FZ bit 15 Flush To Zero + * DAZ bit 6 Denormals Are Zero + * RN bits 13-14 Round to {0 = Nearest, 1 = Negative, 2 = Positive, 3 = Zero} + * + * A64 FPCR mode bits + * ------------------ + * AHP bit 26 Alternative half-precision + * DN bit 25 Default NaN + * FZ bit 24 Flush to Zero + * RMode bits 22-23 Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero} + * FZ16 bit 19 Flush to Zero for half-precision + */ + +constexpr u32 FPCR_MASK = 0x07C89F00; + +u32 A64JitState::GetFpcr() const { + return fpcr; +} + +void A64JitState::SetFpcr(u32 value) { + fpcr = value & FPCR_MASK; + + guest_MXCSR = 0x00001f80; // Mask all exceptions + + // RMode + const std::array MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; + guest_MXCSR |= MXCSR_RMode[(value >> 22) & 0x3]; + + if (Common::Bit<24>(value)) { + guest_MXCSR |= (1 << 15); // SSE Flush to Zero + guest_MXCSR |= (1 << 6); // SSE Denormals are Zero + } +} + +/** + * Comparing MXCSR and FPSR + * ======================== + * + * SSE MXCSR exception flags + * ------------------------- + * PE bit 5 Precision Flag + * UE bit 4 Underflow Flag + * OE bit 3 Overflow Flag + * ZE bit 2 Divide By Zero Flag + * DE bit 1 Denormal Flag // Appears to only be set when MXCSR.DAZ = 0 + * IE bit 0 Invalid Operation Flag + * + * A64 FPSR cumulative exception bits + * ---------------------------------- + * QC bit 27 Cumulative saturation bit + * IDC bit 7 Input Denormal cumulative exception bit // Only ever set when FPCR.FTZ = 1 + * IXC bit 4 Inexact cumulative exception bit + * UFC bit 3 Underflow cumulative exception bit + * OFC bit 2 Overflow cumulative exception bit + * DZC bit 1 Division by Zero cumulative exception bit + * IOC bit 0 Invalid Operation cumulative exception bit + */ + +u32 A64JitState::GetFpsr() const { + u32 fpsr = 0; + fpsr |= (guest_MXCSR & 0b0000000000001); // IOC = IE + fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE + fpsr |= FPSCR_IDC; + fpsr |= FPSCR_UFC; + return fpsr; +} + +void A64JitState::SetFpsr(u32 value) { + guest_MXCSR |= ( value ) & 0b0000000000001; // IE = IOC + guest_MXCSR |= ( value << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC + + FPSCR_IDC = value & (1 << 7); + FPSCR_UFC = value & (1 << 3); +} + } // namespace Dynarmic::BackendX64 diff --git a/src/backend_x64/a64_jitstate.h b/src/backend_x64/a64_jitstate.h index 8f08a0bd..25e42932 100644 --- a/src/backend_x64/a64_jitstate.h +++ b/src/backend_x64/a64_jitstate.h @@ -74,8 +74,10 @@ struct A64JitState { u32 FPSCR_IDC = 0; u32 FPSCR_UFC = 0; u32 fpcr = 0; - u32 GetFpcr() const { return fpcr; } - void SetFpcr(u32 new_fpcr) { fpcr = new_fpcr; } + u32 GetFpcr() const; + u32 GetFpsr() const; + void SetFpcr(u32 new_fpcr); + void SetFpsr(u32 new_fpcr); u64 GetUniqueHash() const; }; diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 92ea0460..b020dd5a 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -66,7 +66,7 @@ INST(DSB, "DSB", "11010 INST(DMB, "DMB", "11010101000000110011MMMM10111111") //INST(ISB, "ISB", "11010101000000110011MMMM11011111") //INST(SYS, "SYS", "1101010100001oooNNNNMMMMooottttt") -//INST(MSR_reg, "MSR (register)", "110101010001poooNNNNMMMMooottttt") +INST(MSR_reg, "MSR (register)", "110101010001poooNNNNMMMMooottttt") //INST(SYSL, "SYSL", "1101010100101oooNNNNMMMMooottttt") INST(MRS, "MRS", "110101010011poooNNNNMMMMooottttt") diff --git a/src/frontend/A64/ir_emitter.cpp b/src/frontend/A64/ir_emitter.cpp index 8f094d39..64176b25 100644 --- a/src/frontend/A64/ir_emitter.cpp +++ b/src/frontend/A64/ir_emitter.cpp @@ -166,6 +166,14 @@ IR::U64 IREmitter::GetSP() { return Inst(Opcode::A64GetSP); } +IR::U32 IREmitter::GetFPCR() { + return Inst(Opcode::A64GetFPCR); +} + +IR::U32 IREmitter::GetFPSR() { + return Inst(Opcode::A64GetFPSR); +} + void IREmitter::SetW(const Reg reg, const IR::U32& value) { if (reg == Reg::ZR) return; @@ -194,6 +202,14 @@ void IREmitter::SetSP(const IR::U64& value) { Inst(Opcode::A64SetSP, value); } +void IREmitter::SetFPCR(const IR::U32& value) { + Inst(Opcode::A64SetFPCR, value); +} + +void IREmitter::SetFPSR(const IR::U32& value) { + Inst(Opcode::A64SetFPSR, value); +} + void IREmitter::SetPC(const IR::U64& value) { Inst(Opcode::A64SetPC, value); } diff --git a/src/frontend/A64/ir_emitter.h b/src/frontend/A64/ir_emitter.h index 867b4ba7..2fb8e525 100644 --- a/src/frontend/A64/ir_emitter.h +++ b/src/frontend/A64/ir_emitter.h @@ -73,12 +73,16 @@ public: IR::U128 GetD(Vec source_vec); IR::U128 GetQ(Vec source_vec); IR::U64 GetSP(); + IR::U32 GetFPCR(); + IR::U32 GetFPSR(); void SetW(Reg dest_reg, const IR::U32& value); void SetX(Reg dest_reg, const IR::U64& value); void SetS(Vec dest_vec, const IR::U128& value); void SetD(Vec dest_vec, const IR::U128& value); void SetQ(Vec dest_vec, const IR::U128& value); void SetSP(const IR::U64& value); + void SetFPCR(const IR::U32& value); + void SetFPSR(const IR::U32& value); void SetPC(const IR::U64& value); }; diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index c9085ffe..0ea9baeb 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -138,7 +138,7 @@ struct TranslatorVisitor final { bool DMB(Imm<4> CRm); bool ISB(Imm<4> CRm); bool SYS(Imm<3> op1, Imm<4> CRn, Imm<4> CRm, Imm<3> op2, Reg Rt); - bool MSR_reg(bool o0, Imm<3> op1, Imm<4> CRn, Imm<4> CRm, Imm<3> op2, Reg Rt); + bool MSR_reg(Imm<1> o0, Imm<3> op1, Imm<4> CRn, Imm<4> CRm, Imm<3> op2, Reg Rt); bool SYSL(Imm<3> op1, Imm<4> CRn, Imm<4> CRm, Imm<3> op2, Reg Rt); bool MRS(Imm<1> o0, Imm<3> op1, Imm<4> CRn, Imm<4> CRm, Imm<3> op2, Reg Rt); diff --git a/src/frontend/A64/translate/impl/system.cpp b/src/frontend/A64/translate/impl/system.cpp index cd6fc2dc..f88829e4 100644 --- a/src/frontend/A64/translate/impl/system.cpp +++ b/src/frontend/A64/translate/impl/system.cpp @@ -51,6 +51,20 @@ bool TranslatorVisitor::DMB(Imm<4> /*CRm*/) { return true; } +bool TranslatorVisitor::MSR_reg(Imm<1> o0, Imm<3> op1, Imm<4> CRn, Imm<4> CRm, Imm<3> op2, Reg Rt) { + const size_t sys_reg = concatenate(Imm<1>{1}, o0, op1, CRn, CRm, op2).ZeroExtend(); + switch (sys_reg) { + case 0b11'011'0100'0100'000: // FPCR + ir.SetFPCR(X(32, Rt)); + ir.SetTerm(IR::Term::ReturnToDispatch{}); + return false; + case 0b11'011'0100'0100'001: // FPSR + ir.SetFPSR(X(32, Rt)); + return true; + } + return InterpretThisInstruction(); +} + bool TranslatorVisitor::MRS(Imm<1> o0, Imm<3> op1, Imm<4> CRn, Imm<4> CRm, Imm<3> op2, Reg Rt) { const size_t sys_reg = concatenate(Imm<1>{1}, o0, op1, CRn, CRm, op2).ZeroExtend(); switch (sys_reg) { @@ -66,6 +80,12 @@ bool TranslatorVisitor::MRS(Imm<1> o0, Imm<3> op1, Imm<4> CRn, Imm<4> CRm, Imm<3 case 0b11'011'1110'0000'001: // CNTPCT_EL0 X(64, Rt, ir.GetCNTPCT()); return true; + case 0b11'011'0100'0100'000: // FPCR + X(32, Rt, ir.GetFPCR()); + return true; + case 0b11'011'0100'0100'001: // FPSR + X(32, Rt, ir.GetFPSR()); + return true; } return InterpretThisInstruction(); } diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 7ccd2711..3a1e8f81 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -194,6 +194,8 @@ bool Inst::ReadsFromFPSCR() const { switch (op) { case Opcode::A32GetFpscr: case Opcode::A32GetFpscrNZCV: + case Opcode::A64GetFPCR: + case Opcode::A64GetFPSR: case Opcode::FPAbs32: case Opcode::FPAbs64: case Opcode::FPAdd32: @@ -221,6 +223,8 @@ bool Inst::WritesToFPSCR() const { switch (op) { case Opcode::A32SetFpscr: case Opcode::A32SetFpscrNZCV: + case Opcode::A64SetFPCR: + case Opcode::A64SetFPSR: case Opcode::FPAbs32: case Opcode::FPAbs64: case Opcode::FPAdd32: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 2edd8a7c..630464db 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -47,6 +47,8 @@ A64OPC(GetS, T::U128, T::A64Vec A64OPC(GetD, T::U128, T::A64Vec ) A64OPC(GetQ, T::U128, T::A64Vec ) A64OPC(GetSP, T::U64, ) +A64OPC(GetFPCR, T::U32, ) +A64OPC(GetFPSR, T::U32, ) A64OPC(SetW, T::Void, T::A64Reg, T::U32 ) A64OPC(SetX, T::Void, T::A64Reg, T::U64 ) //A64OPC(SetB, T::Void, T::A64Vec, T::U8 ) @@ -55,6 +57,8 @@ A64OPC(SetS, T::Void, T::A64Vec, T::U128 A64OPC(SetD, T::Void, T::A64Vec, T::U128 ) A64OPC(SetQ, T::Void, T::A64Vec, T::U128 ) A64OPC(SetSP, T::Void, T::U64 ) +A64OPC(SetFPCR, T::Void, T::U32 ) +A64OPC(SetFPSR, T::Void, T::U32 ) A64OPC(SetPC, T::Void, T::U64 ) A64OPC(CallSupervisor, T::Void, T::U32 ) A64OPC(ExceptionRaised, T::Void, T::U64, T::U64 )