backend/arm64: FPCR/FPSR handling
This commit is contained in:
parent
60a119da6a
commit
208b19b89a
11 changed files with 105 additions and 38 deletions
|
@ -5,10 +5,12 @@
|
|||
|
||||
#include "dynarmic/backend/arm64/a32_address_space.h"
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/devirtualize.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/stack_layout.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A32/translate/a32_translate.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
@ -95,11 +97,23 @@ void A32AddressSpace::EmitPrelude() {
|
|||
|
||||
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
|
||||
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
|
||||
code.MOV(Xstate, X1);
|
||||
code.MOV(Xhalt, X2);
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.AND(Wscratch0, Wscratch0, 0xffff0000);
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
|
||||
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
code.BR(X0);
|
||||
|
||||
prelude_info.return_from_run_code = code.ptr<void*>();
|
||||
|
||||
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
code.RET();
|
||||
|
||||
|
@ -129,15 +143,19 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) {
|
|||
|
||||
mem.unprotect();
|
||||
|
||||
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), {
|
||||
const EmitConfig emit_conf{
|
||||
.hook_isb = conf.hook_isb,
|
||||
.enable_cycle_counting = conf.enable_cycle_counting,
|
||||
.always_little_endian = conf.always_little_endian,
|
||||
});
|
||||
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; },
|
||||
.state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv),
|
||||
.state_fpsr_offset = offsetof(A32JitState, fpsr),
|
||||
};
|
||||
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf);
|
||||
|
||||
Link(block_info);
|
||||
|
||||
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
|
||||
|
||||
mem.protect();
|
||||
|
||||
return block_info;
|
||||
|
|
|
@ -59,15 +59,16 @@ void A32JitState::SetCpsr(u32 cpsr) {
|
|||
}
|
||||
|
||||
constexpr u32 FPCR_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
|
||||
constexpr u32 FPSR_MASK = 0xF800009F;
|
||||
constexpr u32 FPSR_MASK = 0x0800'009f;
|
||||
|
||||
u32 A32JitState::Fpscr() const {
|
||||
return (upper_location_descriptor & 0xffff0000) | fpsr;
|
||||
return (upper_location_descriptor & 0xffff'0000) | fpsr | fpsr_nzcv;
|
||||
}
|
||||
|
||||
void A32JitState::SetFpscr(u32 fpscr) {
|
||||
fpsr_nzcv = fpscr & 0xf000'0000;
|
||||
fpsr = fpscr & FPSR_MASK;
|
||||
upper_location_descriptor = (upper_location_descriptor & 0x0000ffff) | (fpscr & FPCR_MASK);
|
||||
upper_location_descriptor = (upper_location_descriptor & 0x0000'ffff) | (fpscr & FPCR_MASK);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
|
@ -20,14 +20,15 @@ struct A32JitState {
|
|||
u32 cpsr_jaifm = 0;
|
||||
u32 cpsr_ge = 0;
|
||||
|
||||
u32 fpsr = 0;
|
||||
u32 fpsr_nzcv = 0;
|
||||
|
||||
std::array<u32, 16> regs{};
|
||||
|
||||
u32 upper_location_descriptor;
|
||||
|
||||
alignas(16) std::array<u32, 64> ext_regs{};
|
||||
|
||||
u32 fpsr = 0;
|
||||
|
||||
u32 exclusive_state = 0;
|
||||
|
||||
u32 Cpsr() const;
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
|
@ -147,8 +148,9 @@ static void EmitAddCycles(oaknut::CodeGenerator& code, EmitContext&, size_t cycl
|
|||
EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& conf) {
|
||||
EmittedBlockInfo ebi;
|
||||
|
||||
RegAlloc reg_alloc{code, GPR_ORDER, FPR_ORDER};
|
||||
EmitContext ctx{block, reg_alloc, conf, ebi, {}};
|
||||
FpsrManager fpsr_manager{code, conf.state_fpsr_offset};
|
||||
RegAlloc reg_alloc{code, fpsr_manager, GPR_ORDER, FPR_ORDER};
|
||||
EmitContext ctx{block, reg_alloc, conf, ebi, fpsr_manager};
|
||||
|
||||
ebi.entry_point = code.ptr<CodePtr>();
|
||||
|
||||
|
@ -193,6 +195,8 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
|
|||
reg_alloc.AssertAllUnlocked();
|
||||
}
|
||||
|
||||
fpsr_manager.Spill();
|
||||
|
||||
reg_alloc.AssertNoMoreUses();
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
|
|
|
@ -18,9 +18,14 @@ using CodeGenerator = BasicCodeGenerator<PointerCodeGeneratorPolicy>;
|
|||
struct Label;
|
||||
} // namespace oaknut
|
||||
|
||||
namespace Dynarmic::FP {
|
||||
class FPCR;
|
||||
} // namespace Dynarmic::FP
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
class Inst;
|
||||
class LocationDescriptor;
|
||||
enum class Cond;
|
||||
enum class Opcode;
|
||||
} // namespace Dynarmic::IR
|
||||
|
@ -57,6 +62,11 @@ struct EmitConfig {
|
|||
bool hook_isb;
|
||||
bool enable_cycle_counting;
|
||||
bool always_little_endian;
|
||||
|
||||
FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor);
|
||||
|
||||
size_t state_nzcv_offset;
|
||||
size_t state_fpsr_offset;
|
||||
};
|
||||
|
||||
struct EmitContext;
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
|
@ -505,34 +506,56 @@ void EmitIR<IR::Opcode::A32InstructionSynchronizationBarrier>(oaknut::CodeGenera
|
|||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetFpscr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Wfpscr = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wfpscr);
|
||||
ctx.fpsr.Spill();
|
||||
|
||||
static_assert(offsetof(A32JitState, fpsr) + sizeof(u32) == offsetof(A32JitState, fpsr_nzcv));
|
||||
|
||||
code.LDR(Wfpscr, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.LDP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, fpsr));
|
||||
code.AND(Wfpscr, Wfpscr, 0xffff'0000);
|
||||
code.ORR(Wscratch0, Wscratch0, Wscratch1);
|
||||
code.ORR(Wfpscr, Wfpscr, Wscratch0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetFpscr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wfpscr = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wfpscr);
|
||||
ctx.fpsr.Overwrite();
|
||||
|
||||
static_assert(offsetof(A32JitState, fpsr) + sizeof(u32) == offsetof(A32JitState, fpsr_nzcv));
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.MOV(Wscratch1, 0x07f7'0000);
|
||||
code.AND(Wscratch1, Wfpscr, Wscratch1);
|
||||
code.AND(Wscratch0, Wscratch0, 0x0000'ffff);
|
||||
code.ORR(Wscratch0, Wscratch0, Wscratch1);
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
|
||||
code.MOV(Wscratch0, 0x0800'009f);
|
||||
code.AND(Wscratch0, Wfpscr, Wscratch0);
|
||||
code.AND(Wscratch1, Wfpscr, 0xf000'0000);
|
||||
code.STP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, fpsr));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetFpscrNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Wnzcv = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.LDR(Wnzcv, Xstate, offsetof(A32JitState, fpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetFpscrNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.STR(Wnzcv, Xstate, offsetof(A32JitState, fpsr_nzcv));
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
|
@ -15,18 +17,19 @@ class Block;
|
|||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct EmitConfig;
|
||||
|
||||
struct FpsrManager {
|
||||
void Spill() {} // TODO
|
||||
void Load() {} // TODO
|
||||
};
|
||||
class FpsrManager;
|
||||
|
||||
struct EmitContext {
|
||||
IR::Block& block;
|
||||
RegAlloc& reg_alloc;
|
||||
const EmitConfig& conf;
|
||||
EmittedBlockInfo& ebi;
|
||||
FpsrManager fpsr;
|
||||
FpsrManager& fpsr;
|
||||
|
||||
FP::FPCR FPCR(bool fpcr_controlled = true) const {
|
||||
const FP::FPCR fpcr = conf.descriptor_to_fpcr(block.Location());
|
||||
return fpcr_controlled ? fpcr : fpcr.ASIMDStandardValue();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <mcl/stdint.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
|
@ -148,6 +149,7 @@ bool RegAlloc::IsValueLive(IR::Inst* inst) const {
|
|||
}
|
||||
|
||||
void RegAlloc::PrepareForCall(IR::Inst* result, std::optional<Argument::copyable_reference> arg0, std::optional<Argument::copyable_reference> arg1, std::optional<Argument::copyable_reference> arg2, std::optional<Argument::copyable_reference> arg3) {
|
||||
fpsr_manager.Spill();
|
||||
SpillFlags();
|
||||
|
||||
// TODO: Spill into callee-save registers
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
class FpsrManager;
|
||||
class RegAlloc;
|
||||
|
||||
struct HostLoc {
|
||||
|
@ -138,8 +139,8 @@ class RegAlloc {
|
|||
public:
|
||||
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
|
||||
|
||||
explicit RegAlloc(oaknut::CodeGenerator& code, std::vector<int> gpr_order, std::vector<int> fpr_order)
|
||||
: code{code}, gpr_order{gpr_order}, fpr_order{fpr_order}, rand_gen{std::random_device{}()} {}
|
||||
explicit RegAlloc(oaknut::CodeGenerator& code, FpsrManager& fpsr_manager, std::vector<int> gpr_order, std::vector<int> fpr_order)
|
||||
: code{code}, fpsr_manager{fpsr_manager}, gpr_order{gpr_order}, fpr_order{fpr_order}, rand_gen{std::random_device{}()} {}
|
||||
|
||||
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
|
||||
bool IsValueLive(IR::Inst* inst) const;
|
||||
|
@ -266,6 +267,7 @@ private:
|
|||
HostLocInfo& ValueInfo(const IR::Inst* value);
|
||||
|
||||
oaknut::CodeGenerator& code;
|
||||
FpsrManager& fpsr_manager;
|
||||
std::vector<int> gpr_order;
|
||||
std::vector<int> fpr_order;
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@
|
|||
#include <fmt/format.h>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
constexpr bool mask_fpsr_cum_bits = true;
|
||||
|
||||
namespace {
|
||||
using namespace Dynarmic;
|
||||
|
||||
|
@ -308,7 +310,7 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit,
|
|||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("final_cpsr: {:08x}\n", jit.Cpsr());
|
||||
fmt::print("final_fpsr: {:08x}\n", jit.Fpscr());
|
||||
fmt::print("final_fpsr: {:08x}\n", mask_fpsr_cum_bits ? jit.Fpscr() & 0xffffff00 : jit.Fpscr());
|
||||
|
||||
fmt::print("mod_mem: ");
|
||||
for (auto [addr, value] : jit_env.modified_memory) {
|
||||
|
|
Loading…
Reference in a new issue