backend/arm64: FPCR/FPSR handling

This commit is contained in:
Merry 2022-08-02 00:36:42 +01:00 committed by merry
parent 60a119da6a
commit 208b19b89a
11 changed files with 105 additions and 38 deletions

View file

@ -5,10 +5,12 @@
#include "dynarmic/backend/arm64/a32_address_space.h"
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/devirtualize.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/stack_layout.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/ir/opt/passes.h"
@ -95,11 +97,23 @@ void A32AddressSpace::EmitPrelude() {
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.AND(Wscratch0, Wscratch0, 0xffff0000);
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
code.BR(X0);
prelude_info.return_from_run_code = code.ptr<void*>();
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.RET();
@ -129,15 +143,19 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) {
mem.unprotect();
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), {
const EmitConfig emit_conf{
.hook_isb = conf.hook_isb,
.enable_cycle_counting = conf.enable_cycle_counting,
.always_little_endian = conf.always_little_endian,
});
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; },
.state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv),
.state_fpsr_offset = offsetof(A32JitState, fpsr),
};
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf);
Link(block_info);
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
mem.protect();
return block_info;

View file

@ -59,15 +59,16 @@ void A32JitState::SetCpsr(u32 cpsr) {
}
constexpr u32 FPCR_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
constexpr u32 FPSR_MASK = 0xF800009F;
constexpr u32 FPSR_MASK = 0x0800'009f;
u32 A32JitState::Fpscr() const {
return (upper_location_descriptor & 0xffff0000) | fpsr;
return (upper_location_descriptor & 0xffff'0000) | fpsr | fpsr_nzcv;
}
void A32JitState::SetFpscr(u32 fpscr) {
fpsr_nzcv = fpscr & 0xf000'0000;
fpsr = fpscr & FPSR_MASK;
upper_location_descriptor = (upper_location_descriptor & 0x0000ffff) | (fpscr & FPCR_MASK);
upper_location_descriptor = (upper_location_descriptor & 0x0000'ffff) | (fpscr & FPCR_MASK);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -20,14 +20,15 @@ struct A32JitState {
u32 cpsr_jaifm = 0;
u32 cpsr_ge = 0;
u32 fpsr = 0;
u32 fpsr_nzcv = 0;
std::array<u32, 16> regs{};
u32 upper_location_descriptor;
alignas(16) std::array<u32, 64> ext_regs{};
u32 fpsr = 0;
u32 exclusive_state = 0;
u32 Cpsr() const;

View file

@ -11,6 +11,7 @@
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
@ -147,8 +148,9 @@ static void EmitAddCycles(oaknut::CodeGenerator& code, EmitContext&, size_t cycl
EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& conf) {
EmittedBlockInfo ebi;
RegAlloc reg_alloc{code, GPR_ORDER, FPR_ORDER};
EmitContext ctx{block, reg_alloc, conf, ebi, {}};
FpsrManager fpsr_manager{code, conf.state_fpsr_offset};
RegAlloc reg_alloc{code, fpsr_manager, GPR_ORDER, FPR_ORDER};
EmitContext ctx{block, reg_alloc, conf, ebi, fpsr_manager};
ebi.entry_point = code.ptr<CodePtr>();
@ -193,6 +195,8 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
reg_alloc.AssertAllUnlocked();
}
fpsr_manager.Spill();
reg_alloc.AssertNoMoreUses();
if (ctx.conf.enable_cycle_counting) {

View file

@ -18,9 +18,14 @@ using CodeGenerator = BasicCodeGenerator<PointerCodeGeneratorPolicy>;
struct Label;
} // namespace oaknut
namespace Dynarmic::FP {
class FPCR;
} // namespace Dynarmic::FP
namespace Dynarmic::IR {
class Block;
class Inst;
class LocationDescriptor;
enum class Cond;
enum class Opcode;
} // namespace Dynarmic::IR
@ -57,6 +62,11 @@ struct EmitConfig {
bool hook_isb;
bool enable_cycle_counting;
bool always_little_endian;
FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor);
size_t state_nzcv_offset;
size_t state_fpsr_offset;
};
struct EmitContext;

View file

@ -11,6 +11,7 @@
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/ir/basic_block.h"
@ -505,34 +506,56 @@ void EmitIR<IR::Opcode::A32InstructionSynchronizationBarrier>(oaknut::CodeGenera
template<>
void EmitIR<IR::Opcode::A32GetFpscr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Wfpscr = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wfpscr);
ctx.fpsr.Spill();
static_assert(offsetof(A32JitState, fpsr) + sizeof(u32) == offsetof(A32JitState, fpsr_nzcv));
code.LDR(Wfpscr, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.LDP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, fpsr));
code.AND(Wfpscr, Wfpscr, 0xffff'0000);
code.ORR(Wscratch0, Wscratch0, Wscratch1);
code.ORR(Wfpscr, Wfpscr, Wscratch0);
}
template<>
void EmitIR<IR::Opcode::A32SetFpscr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wfpscr = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wfpscr);
ctx.fpsr.Overwrite();
static_assert(offsetof(A32JitState, fpsr) + sizeof(u32) == offsetof(A32JitState, fpsr_nzcv));
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.MOV(Wscratch1, 0x07f7'0000);
code.AND(Wscratch1, Wfpscr, Wscratch1);
code.AND(Wscratch0, Wscratch0, 0x0000'ffff);
code.ORR(Wscratch0, Wscratch0, Wscratch1);
code.STR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.MOV(Wscratch0, 0x0800'009f);
code.AND(Wscratch0, Wfpscr, Wscratch0);
code.AND(Wscratch1, Wfpscr, 0xf000'0000);
code.STP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, fpsr));
}
template<>
void EmitIR<IR::Opcode::A32GetFpscrNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Wnzcv = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wnzcv);
code.LDR(Wnzcv, Xstate, offsetof(A32JitState, fpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A32SetFpscrNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wnzcv);
code.STR(Wnzcv, Xstate, offsetof(A32JitState, fpsr_nzcv));
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -9,6 +9,7 @@
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"

View file

@ -7,6 +7,8 @@
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/ir/basic_block.h"
namespace Dynarmic::IR {
class Block;
@ -15,18 +17,19 @@ class Block;
namespace Dynarmic::Backend::Arm64 {
struct EmitConfig;
struct FpsrManager {
void Spill() {} // TODO
void Load() {} // TODO
};
class FpsrManager;
struct EmitContext {
IR::Block& block;
RegAlloc& reg_alloc;
const EmitConfig& conf;
EmittedBlockInfo& ebi;
FpsrManager fpsr;
FpsrManager& fpsr;
FP::FPCR FPCR(bool fpcr_controlled = true) const {
const FP::FPCR fpcr = conf.descriptor_to_fpcr(block.Location());
return fpcr_controlled ? fpcr : fpcr.ASIMDStandardValue();
}
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -13,6 +13,7 @@
#include <mcl/stdint.hpp>
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
namespace Dynarmic::Backend::Arm64 {
@ -148,6 +149,7 @@ bool RegAlloc::IsValueLive(IR::Inst* inst) const {
}
void RegAlloc::PrepareForCall(IR::Inst* result, std::optional<Argument::copyable_reference> arg0, std::optional<Argument::copyable_reference> arg1, std::optional<Argument::copyable_reference> arg2, std::optional<Argument::copyable_reference> arg3) {
fpsr_manager.Spill();
SpillFlags();
// TODO: Spill into callee-save registers

View file

@ -23,6 +23,7 @@
namespace Dynarmic::Backend::Arm64 {
class FpsrManager;
class RegAlloc;
struct HostLoc {
@ -138,8 +139,8 @@ class RegAlloc {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
explicit RegAlloc(oaknut::CodeGenerator& code, std::vector<int> gpr_order, std::vector<int> fpr_order)
: code{code}, gpr_order{gpr_order}, fpr_order{fpr_order}, rand_gen{std::random_device{}()} {}
explicit RegAlloc(oaknut::CodeGenerator& code, FpsrManager& fpsr_manager, std::vector<int> gpr_order, std::vector<int> fpr_order)
: code{code}, fpsr_manager{fpsr_manager}, gpr_order{gpr_order}, fpr_order{fpr_order}, rand_gen{std::random_device{}()} {}
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
bool IsValueLive(IR::Inst* inst) const;
@ -266,6 +267,7 @@ private:
HostLocInfo& ValueInfo(const IR::Inst* value);
oaknut::CodeGenerator& code;
FpsrManager& fpsr_manager;
std::vector<int> gpr_order;
std::vector<int> fpr_order;

View file

@ -31,6 +31,8 @@
#include <fmt/format.h>
#include <fmt/ostream.h>
constexpr bool mask_fpsr_cum_bits = true;
namespace {
using namespace Dynarmic;
@ -308,7 +310,7 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit,
}
fmt::print("\n");
fmt::print("final_cpsr: {:08x}\n", jit.Cpsr());
fmt::print("final_fpsr: {:08x}\n", jit.Fpscr());
fmt::print("final_fpsr: {:08x}\n", mask_fpsr_cum_bits ? jit.Fpscr() & 0xffffff00 : jit.Fpscr());
fmt::print("mod_mem: ");
for (auto [addr, value] : jit_env.modified_memory) {