diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index 3bd5b54a..b17a54b9 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -5,10 +5,12 @@ #include "dynarmic/backend/arm64/a32_address_space.h" +#include "dynarmic/backend/arm64/a32_jitstate.h" #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/devirtualize.h" #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/stack_layout.h" +#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/ir/opt/passes.h" @@ -95,11 +97,23 @@ void A32AddressSpace::EmitPrelude() { prelude_info.run_code = code.ptr(); ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); + code.MOV(Xstate, X1); code.MOV(Xhalt, X2); + + code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor)); + code.AND(Wscratch0, Wscratch0, 0xffff0000); + code.MRS(Xscratch1, oaknut::SystemReg::FPCR); + code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + code.BR(X0); prelude_info.return_from_run_code = code.ptr(); + + code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); code.RET(); @@ -129,15 +143,19 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) { mem.unprotect(); - EmittedBlockInfo block_info = EmitArm64(code, std::move(block), { - .hook_isb = conf.hook_isb, - .enable_cycle_counting = conf.enable_cycle_counting, - .always_little_endian = conf.always_little_endian, - }); + const EmitConfig emit_conf{ + .hook_isb = conf.hook_isb, + .enable_cycle_counting = conf.enable_cycle_counting, + .always_little_endian = conf.always_little_endian, + .descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; }, + .state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv), + .state_fpsr_offset = offsetof(A32JitState, fpsr), + }; + EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf); + Link(block_info); mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); - mem.protect(); return block_info; diff --git a/src/dynarmic/backend/arm64/a32_jitstate.cpp b/src/dynarmic/backend/arm64/a32_jitstate.cpp index 27fd4e89..e24654c7 100644 --- a/src/dynarmic/backend/arm64/a32_jitstate.cpp +++ b/src/dynarmic/backend/arm64/a32_jitstate.cpp @@ -59,15 +59,16 @@ void A32JitState::SetCpsr(u32 cpsr) { } constexpr u32 FPCR_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK; -constexpr u32 FPSR_MASK = 0xF800009F; +constexpr u32 FPSR_MASK = 0x0800'009f; u32 A32JitState::Fpscr() const { - return (upper_location_descriptor & 0xffff0000) | fpsr; + return (upper_location_descriptor & 0xffff'0000) | fpsr | fpsr_nzcv; } void A32JitState::SetFpscr(u32 fpscr) { + fpsr_nzcv = fpscr & 0xf000'0000; fpsr = fpscr & FPSR_MASK; - upper_location_descriptor = (upper_location_descriptor & 0x0000ffff) | (fpscr & FPCR_MASK); + upper_location_descriptor = (upper_location_descriptor & 0x0000'ffff) | (fpscr & FPCR_MASK); } } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a32_jitstate.h b/src/dynarmic/backend/arm64/a32_jitstate.h index 2c240ab1..978bf84a 100644 --- a/src/dynarmic/backend/arm64/a32_jitstate.h +++ b/src/dynarmic/backend/arm64/a32_jitstate.h @@ -20,14 +20,15 @@ struct A32JitState { u32 cpsr_jaifm = 0; u32 cpsr_ge = 0; + u32 fpsr = 0; + u32 fpsr_nzcv = 0; + std::array regs{}; u32 upper_location_descriptor; alignas(16) std::array ext_regs{}; - u32 fpsr = 0; - u32 exclusive_state = 0; u32 Cpsr() const; diff --git a/src/dynarmic/backend/arm64/emit_arm64.cpp b/src/dynarmic/backend/arm64/emit_arm64.cpp index 771e0f28..d44b92d7 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64.cpp @@ -11,6 +11,7 @@ #include "dynarmic/backend/arm64/a32_jitstate.h" #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_context.h" +#include "dynarmic/backend/arm64/fpsr_manager.h" #include "dynarmic/backend/arm64/reg_alloc.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" @@ -147,8 +148,9 @@ static void EmitAddCycles(oaknut::CodeGenerator& code, EmitContext&, size_t cycl EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& conf) { EmittedBlockInfo ebi; - RegAlloc reg_alloc{code, GPR_ORDER, FPR_ORDER}; - EmitContext ctx{block, reg_alloc, conf, ebi, {}}; + FpsrManager fpsr_manager{code, conf.state_fpsr_offset}; + RegAlloc reg_alloc{code, fpsr_manager, GPR_ORDER, FPR_ORDER}; + EmitContext ctx{block, reg_alloc, conf, ebi, fpsr_manager}; ebi.entry_point = code.ptr(); @@ -193,6 +195,8 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E reg_alloc.AssertAllUnlocked(); } + fpsr_manager.Spill(); + reg_alloc.AssertNoMoreUses(); if (ctx.conf.enable_cycle_counting) { diff --git a/src/dynarmic/backend/arm64/emit_arm64.h b/src/dynarmic/backend/arm64/emit_arm64.h index b2176778..27f9618e 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.h +++ b/src/dynarmic/backend/arm64/emit_arm64.h @@ -18,9 +18,14 @@ using CodeGenerator = BasicCodeGenerator; struct Label; } // namespace oaknut +namespace Dynarmic::FP { +class FPCR; +} // namespace Dynarmic::FP + namespace Dynarmic::IR { class Block; class Inst; +class LocationDescriptor; enum class Cond; enum class Opcode; } // namespace Dynarmic::IR @@ -57,6 +62,11 @@ struct EmitConfig { bool hook_isb; bool enable_cycle_counting; bool always_little_endian; + + FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor); + + size_t state_nzcv_offset; + size_t state_fpsr_offset; }; struct EmitContext; diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp index 392ad4d2..07dd7158 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp @@ -11,6 +11,7 @@ #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/emit_context.h" +#include "dynarmic/backend/arm64/fpsr_manager.h" #include "dynarmic/backend/arm64/reg_alloc.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/ir/basic_block.h" @@ -505,34 +506,56 @@ void EmitIR(oaknut::CodeGenera template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Wfpscr = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wfpscr); + ctx.fpsr.Spill(); + + static_assert(offsetof(A32JitState, fpsr) + sizeof(u32) == offsetof(A32JitState, fpsr_nzcv)); + + code.LDR(Wfpscr, Xstate, offsetof(A32JitState, upper_location_descriptor)); + code.LDP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, fpsr)); + code.AND(Wfpscr, Wfpscr, 0xffff'0000); + code.ORR(Wscratch0, Wscratch0, Wscratch1); + code.ORR(Wfpscr, Wfpscr, Wscratch0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wfpscr = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wfpscr); + ctx.fpsr.Overwrite(); + + static_assert(offsetof(A32JitState, fpsr) + sizeof(u32) == offsetof(A32JitState, fpsr_nzcv)); + + code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor)); + code.MOV(Wscratch1, 0x07f7'0000); + code.AND(Wscratch1, Wfpscr, Wscratch1); + code.AND(Wscratch0, Wscratch0, 0x0000'ffff); + code.ORR(Wscratch0, Wscratch0, Wscratch1); + code.STR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor)); + + code.MOV(Wscratch0, 0x0800'009f); + code.AND(Wscratch0, Wfpscr, Wscratch0); + code.AND(Wscratch1, Wfpscr, 0xf000'0000); + code.STP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, fpsr)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Wnzcv = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wnzcv); + + code.LDR(Wnzcv, Xstate, offsetof(A32JitState, fpsr_nzcv)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wnzcv = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wnzcv); + + code.STR(Wnzcv, Xstate, offsetof(A32JitState, fpsr_nzcv)); } } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/emit_arm64_packed.cpp b/src/dynarmic/backend/arm64/emit_arm64_packed.cpp index b1700d96..b9f13ac6 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_packed.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_packed.cpp @@ -9,6 +9,7 @@ #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/emit_context.h" +#include "dynarmic/backend/arm64/fpsr_manager.h" #include "dynarmic/backend/arm64/reg_alloc.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" diff --git a/src/dynarmic/backend/arm64/emit_context.h b/src/dynarmic/backend/arm64/emit_context.h index 9fd232c5..108e0a42 100644 --- a/src/dynarmic/backend/arm64/emit_context.h +++ b/src/dynarmic/backend/arm64/emit_context.h @@ -7,6 +7,8 @@ #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/reg_alloc.h" +#include "dynarmic/common/fp/fpcr.h" +#include "dynarmic/ir/basic_block.h" namespace Dynarmic::IR { class Block; @@ -15,18 +17,19 @@ class Block; namespace Dynarmic::Backend::Arm64 { struct EmitConfig; - -struct FpsrManager { - void Spill() {} // TODO - void Load() {} // TODO -}; +class FpsrManager; struct EmitContext { IR::Block& block; RegAlloc& reg_alloc; const EmitConfig& conf; EmittedBlockInfo& ebi; - FpsrManager fpsr; + FpsrManager& fpsr; + + FP::FPCR FPCR(bool fpcr_controlled = true) const { + const FP::FPCR fpcr = conf.descriptor_to_fpcr(block.Location()); + return fpcr_controlled ? fpcr : fpcr.ASIMDStandardValue(); + } }; } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/backend/arm64/reg_alloc.cpp index 3355418e..28be2192 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -13,6 +13,7 @@ #include #include "dynarmic/backend/arm64/abi.h" +#include "dynarmic/backend/arm64/fpsr_manager.h" namespace Dynarmic::Backend::Arm64 { @@ -148,6 +149,7 @@ bool RegAlloc::IsValueLive(IR::Inst* inst) const { } void RegAlloc::PrepareForCall(IR::Inst* result, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { + fpsr_manager.Spill(); SpillFlags(); // TODO: Spill into callee-save registers diff --git a/src/dynarmic/backend/arm64/reg_alloc.h b/src/dynarmic/backend/arm64/reg_alloc.h index aa77e9ed..7be78c36 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.h +++ b/src/dynarmic/backend/arm64/reg_alloc.h @@ -23,6 +23,7 @@ namespace Dynarmic::Backend::Arm64 { +class FpsrManager; class RegAlloc; struct HostLoc { @@ -138,8 +139,8 @@ class RegAlloc { public: using ArgumentInfo = std::array; - explicit RegAlloc(oaknut::CodeGenerator& code, std::vector gpr_order, std::vector fpr_order) - : code{code}, gpr_order{gpr_order}, fpr_order{fpr_order}, rand_gen{std::random_device{}()} {} + explicit RegAlloc(oaknut::CodeGenerator& code, FpsrManager& fpsr_manager, std::vector gpr_order, std::vector fpr_order) + : code{code}, fpsr_manager{fpsr_manager}, gpr_order{gpr_order}, fpr_order{fpr_order}, rand_gen{std::random_device{}()} {} ArgumentInfo GetArgumentInfo(IR::Inst* inst); bool IsValueLive(IR::Inst* inst) const; @@ -266,6 +267,7 @@ private: HostLocInfo& ValueInfo(const IR::Inst* value); oaknut::CodeGenerator& code; + FpsrManager& fpsr_manager; std::vector gpr_order; std::vector fpr_order; diff --git a/tests/test_generator.cpp b/tests/test_generator.cpp index c098e627..16b9d7a6 100644 --- a/tests/test_generator.cpp +++ b/tests/test_generator.cpp @@ -31,6 +31,8 @@ #include #include +constexpr bool mask_fpsr_cum_bits = true; + namespace { using namespace Dynarmic; @@ -308,7 +310,7 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit, } fmt::print("\n"); fmt::print("final_cpsr: {:08x}\n", jit.Cpsr()); - fmt::print("final_fpsr: {:08x}\n", jit.Fpscr()); + fmt::print("final_fpsr: {:08x}\n", mask_fpsr_cum_bits ? jit.Fpscr() & 0xffffff00 : jit.Fpscr()); fmt::print("mod_mem: "); for (auto [addr, value] : jit_env.modified_memory) {