emit_arm64: Remove is_a64
This commit is contained in:
parent
167ba85ce8
commit
0707aa3a04
6 changed files with 63 additions and 52 deletions
|
@ -303,21 +303,30 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) {
|
||||||
mem.unprotect();
|
mem.unprotect();
|
||||||
|
|
||||||
const EmitConfig emit_conf{
|
const EmitConfig emit_conf{
|
||||||
.tpidr_el0{},
|
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
|
||||||
.tpidrro_el0{},
|
|
||||||
.cntfreq_el0{},
|
|
||||||
.dczid_el0{},
|
|
||||||
.ctr_el0{},
|
|
||||||
.is_a64 = false,
|
|
||||||
.hook_isb = conf.hook_isb,
|
.hook_isb = conf.hook_isb,
|
||||||
.enable_cycle_counting = conf.enable_cycle_counting,
|
|
||||||
|
.cntfreq_el0{},
|
||||||
|
.ctr_el0{},
|
||||||
|
.dczid_el0{},
|
||||||
|
.tpidrro_el0{},
|
||||||
|
.tpidr_el0{},
|
||||||
|
|
||||||
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
||||||
|
.enable_cycle_counting = conf.enable_cycle_counting,
|
||||||
|
|
||||||
.always_little_endian = conf.always_little_endian,
|
.always_little_endian = conf.always_little_endian,
|
||||||
|
|
||||||
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; },
|
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; },
|
||||||
|
.emit_cond = EmitA32Cond,
|
||||||
|
.emit_condition_failed_terminal = EmitA32ConditionFailedTerminal,
|
||||||
|
.emit_terminal = EmitA32Terminal,
|
||||||
|
|
||||||
.state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv),
|
.state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv),
|
||||||
.state_fpsr_offset = offsetof(A32JitState, fpsr),
|
.state_fpsr_offset = offsetof(A32JitState, fpsr),
|
||||||
|
|
||||||
.coprocessors = conf.coprocessors,
|
.coprocessors = conf.coprocessors,
|
||||||
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
|
|
||||||
};
|
};
|
||||||
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf);
|
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf);
|
||||||
|
|
||||||
|
|
|
@ -433,21 +433,30 @@ EmittedBlockInfo A64AddressSpace::Emit(IR::Block block) {
|
||||||
mem.unprotect();
|
mem.unprotect();
|
||||||
|
|
||||||
const EmitConfig emit_conf{
|
const EmitConfig emit_conf{
|
||||||
|
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
|
||||||
|
|
||||||
|
.hook_isb = conf.hook_isb,
|
||||||
|
|
||||||
.tpidr_el0 = conf.tpidr_el0,
|
.tpidr_el0 = conf.tpidr_el0,
|
||||||
.tpidrro_el0 = conf.tpidrro_el0,
|
.tpidrro_el0 = conf.tpidrro_el0,
|
||||||
.cntfreq_el0 = conf.cntfrq_el0,
|
.cntfreq_el0 = conf.cntfrq_el0,
|
||||||
.dczid_el0 = conf.dczid_el0,
|
.dczid_el0 = conf.dczid_el0,
|
||||||
.ctr_el0 = conf.ctr_el0,
|
.ctr_el0 = conf.ctr_el0,
|
||||||
.is_a64 = true,
|
|
||||||
.hook_isb = conf.hook_isb,
|
|
||||||
.enable_cycle_counting = conf.enable_cycle_counting,
|
|
||||||
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
||||||
|
.enable_cycle_counting = conf.enable_cycle_counting,
|
||||||
|
|
||||||
.always_little_endian = true,
|
.always_little_endian = true,
|
||||||
|
|
||||||
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); },
|
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); },
|
||||||
|
.emit_cond = EmitA64Cond,
|
||||||
|
.emit_condition_failed_terminal = EmitA64ConditionFailedTerminal,
|
||||||
|
.emit_terminal = EmitA64Terminal,
|
||||||
|
|
||||||
.state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv),
|
.state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv),
|
||||||
.state_fpsr_offset = offsetof(A64JitState, fpsr),
|
.state_fpsr_offset = offsetof(A64JitState, fpsr),
|
||||||
|
|
||||||
.coprocessors{},
|
.coprocessors{},
|
||||||
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
|
|
||||||
};
|
};
|
||||||
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf);
|
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf);
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,6 @@
|
||||||
#include <fmt/ostream.h>
|
#include <fmt/ostream.h>
|
||||||
#include <oaknut/oaknut.hpp>
|
#include <oaknut/oaknut.hpp>
|
||||||
|
|
||||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
|
||||||
#include "dynarmic/backend/arm64/abi.h"
|
#include "dynarmic/backend/arm64/abi.h"
|
||||||
#include "dynarmic/backend/arm64/emit_context.h"
|
#include "dynarmic/backend/arm64/emit_context.h"
|
||||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||||
|
@ -191,15 +190,9 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
|
||||||
ASSERT(ctx.block.HasConditionFailedLocation());
|
ASSERT(ctx.block.HasConditionFailedLocation());
|
||||||
oaknut::Label pass;
|
oaknut::Label pass;
|
||||||
|
|
||||||
if (conf.is_a64) {
|
pass = conf.emit_cond(code, ctx, ctx.block.GetCondition());
|
||||||
pass = EmitA64Cond(code, ctx, ctx.block.GetCondition());
|
|
||||||
EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount());
|
EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount());
|
||||||
EmitA64ConditionFailedTerminal(code, ctx);
|
conf.emit_condition_failed_terminal(code, ctx);
|
||||||
} else {
|
|
||||||
pass = EmitA32Cond(code, ctx, ctx.block.GetCondition());
|
|
||||||
EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount());
|
|
||||||
EmitA32ConditionFailedTerminal(code, ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
code.l(pass);
|
code.l(pass);
|
||||||
}
|
}
|
||||||
|
@ -238,11 +231,7 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
|
||||||
reg_alloc.AssertNoMoreUses();
|
reg_alloc.AssertNoMoreUses();
|
||||||
|
|
||||||
EmitAddCycles(code, ctx, block.CycleCount());
|
EmitAddCycles(code, ctx, block.CycleCount());
|
||||||
if (conf.is_a64) {
|
conf.emit_terminal(code, ctx);
|
||||||
EmitA64Terminal(code, ctx);
|
|
||||||
} else {
|
|
||||||
EmitA32Terminal(code, ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
ebi.size = code.ptr<CodePtr>() - ebi.entry_point;
|
ebi.size = code.ptr<CodePtr>() - ebi.entry_point;
|
||||||
return ebi;
|
return ebi;
|
||||||
|
|
|
@ -38,6 +38,8 @@ enum class Opcode;
|
||||||
|
|
||||||
namespace Dynarmic::Backend::Arm64 {
|
namespace Dynarmic::Backend::Arm64 {
|
||||||
|
|
||||||
|
struct EmitContext;
|
||||||
|
|
||||||
using CodePtr = std::byte*;
|
using CodePtr = std::byte*;
|
||||||
|
|
||||||
enum class LinkTarget {
|
enum class LinkTarget {
|
||||||
|
@ -90,31 +92,39 @@ struct EmittedBlockInfo {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct EmitConfig {
|
struct EmitConfig {
|
||||||
u64* tpidr_el0;
|
OptimizationFlag optimizations;
|
||||||
const u64* tpidrro_el0;
|
bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; }
|
||||||
u64 cntfreq_el0;
|
|
||||||
u32 dczid_el0;
|
|
||||||
u32 ctr_el0;
|
|
||||||
bool is_a64;
|
|
||||||
bool hook_isb;
|
bool hook_isb;
|
||||||
bool enable_cycle_counting;
|
|
||||||
|
// System registers
|
||||||
|
u64 cntfreq_el0;
|
||||||
|
u32 ctr_el0;
|
||||||
|
u32 dczid_el0;
|
||||||
|
const u64* tpidrro_el0;
|
||||||
|
u64* tpidr_el0;
|
||||||
|
|
||||||
|
// Timing
|
||||||
bool wall_clock_cntpct;
|
bool wall_clock_cntpct;
|
||||||
|
bool enable_cycle_counting;
|
||||||
|
|
||||||
|
// Endianness
|
||||||
bool always_little_endian;
|
bool always_little_endian;
|
||||||
|
|
||||||
|
// Frontend specific callbacks
|
||||||
FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor);
|
FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor);
|
||||||
|
oaknut::Label (*emit_cond)(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
|
||||||
|
void (*emit_condition_failed_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||||
|
void (*emit_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||||
|
|
||||||
|
// State offsets
|
||||||
size_t state_nzcv_offset;
|
size_t state_nzcv_offset;
|
||||||
size_t state_fpsr_offset;
|
size_t state_fpsr_offset;
|
||||||
|
|
||||||
|
// A32 specific
|
||||||
std::array<std::shared_ptr<A32::Coprocessor>, 16> coprocessors{};
|
std::array<std::shared_ptr<A32::Coprocessor>, 16> coprocessors{};
|
||||||
|
|
||||||
OptimizationFlag optimizations;
|
|
||||||
|
|
||||||
bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct EmitContext;
|
|
||||||
|
|
||||||
EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& emit_conf);
|
EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& emit_conf);
|
||||||
|
|
||||||
template<IR::Opcode op>
|
template<IR::Opcode op>
|
||||||
|
|
|
@ -19,8 +19,6 @@ namespace Dynarmic::Backend::Arm64 {
|
||||||
|
|
||||||
using namespace oaknut::util;
|
using namespace oaknut::util;
|
||||||
|
|
||||||
static constexpr int nzcv_c_flag_shift = 29;
|
|
||||||
|
|
||||||
oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext&, IR::Cond cond) {
|
oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext&, IR::Cond cond) {
|
||||||
oaknut::Label pass;
|
oaknut::Label pass;
|
||||||
// TODO: Flags in host flags
|
// TODO: Flags in host flags
|
||||||
|
@ -145,7 +143,7 @@ void EmitIR<IR::Opcode::A64GetCFlag>(oaknut::CodeGenerator& code, EmitContext& c
|
||||||
auto Wflag = ctx.reg_alloc.WriteW(inst);
|
auto Wflag = ctx.reg_alloc.WriteW(inst);
|
||||||
RegAlloc::Realize(Wflag);
|
RegAlloc::Realize(Wflag);
|
||||||
code.LDR(Wflag, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
code.LDR(Wflag, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
||||||
code.AND(Wflag, Wflag, 1 << nzcv_c_flag_shift);
|
code.AND(Wflag, Wflag, 1 << 29);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
|
|
|
@ -258,7 +258,7 @@ void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename JitState, typename Lambda>
|
template<typename Lambda>
|
||||||
static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::QReg Qresult, oaknut::QReg Qarg1, Lambda lambda, bool fpcr_controlled) {
|
static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::QReg Qresult, oaknut::QReg Qarg1, Lambda lambda, bool fpcr_controlled) {
|
||||||
const auto fn = static_cast<mcl::equivalent_function_type<Lambda>*>(lambda);
|
const auto fn = static_cast<mcl::equivalent_function_type<Lambda>*>(lambda);
|
||||||
|
|
||||||
|
@ -274,7 +274,7 @@ static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitCo
|
||||||
code.ADD(X0, Xscratch0, 0 * 16);
|
code.ADD(X0, Xscratch0, 0 * 16);
|
||||||
code.ADD(X1, Xscratch0, 1 * 16);
|
code.ADD(X1, Xscratch0, 1 * 16);
|
||||||
code.MOV(X2, fpcr);
|
code.MOV(X2, fpcr);
|
||||||
code.ADD(X3, Xstate, offsetof(JitState, fpsr));
|
code.ADD(X3, Xstate, ctx.conf.state_fpsr_offset);
|
||||||
code.STR(Qarg1, X1);
|
code.STR(Qarg1, X1);
|
||||||
code.BLR(Xscratch1);
|
code.BLR(Xscratch1);
|
||||||
|
|
||||||
|
@ -289,7 +289,7 @@ static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitCo
|
||||||
code.l(end);
|
code.l(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t fpcr_controlled_arg_index = 1, typename JitState, typename Lambda>
|
template<size_t fpcr_controlled_arg_index = 1, typename Lambda>
|
||||||
static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto Qarg1 = ctx.reg_alloc.ReadQ(args[0]);
|
auto Qarg1 = ctx.reg_alloc.ReadQ(args[0]);
|
||||||
|
@ -300,7 +300,7 @@ static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR:
|
||||||
ctx.fpsr.Spill();
|
ctx.fpsr.Spill();
|
||||||
|
|
||||||
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
||||||
EmitTwoOpFallbackWithoutRegAlloc<JitState>(code, ctx, Qresult, Qarg1, lambda, fpcr_controlled);
|
EmitTwoOpFallbackWithoutRegAlloc(code, ctx, Qresult, Qarg1, lambda, fpcr_controlled);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
|
@ -592,11 +592,7 @@ void EmitIR<IR::Opcode::FPVectorRoundInt16>(oaknut::CodeGenerator& code, EmitCon
|
||||||
},
|
},
|
||||||
mp::cartesian_product<rounding_list, exact_list>{});
|
mp::cartesian_product<rounding_list, exact_list>{});
|
||||||
|
|
||||||
if (ctx.conf.is_a64) {
|
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
||||||
EmitTwoOpFallback<3, A64JitState>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
|
||||||
} else {
|
|
||||||
EmitTwoOpFallback<3, A32JitState>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
|
|
Loading…
Reference in a new issue