Merge branch 'misc'

These commits introduce context save and restore, and a small number of
optimizations that depend on their use for performance.
This commit is contained in:
MerryMage 2017-12-12 22:07:39 +00:00
commit a98821da41
22 changed files with 587 additions and 258 deletions

View file

@ -26,10 +26,10 @@ computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
u64 LocationDescriptor::UniqueHash() const {
// This value MUST BE UNIQUE.
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
u64 pc_u64 = u64(arm_pc);
u64 fpscr_u64 = u64(fpscr.Value()) << 32;
u64 t_u64 = cpsr.T() ? (1ull << 35) : 0;
u64 e_u64 = cpsr.E() ? (1ull << 39) : 0;
u64 pc_u64 = u64(arm_pc) << 32;
u64 fpscr_u64 = u64(fpscr.Value());
u64 t_u64 = cpsr.T() ? 1 : 0;
u64 e_u64 = cpsr.E() ? 2 : 0;
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
}
@ -120,12 +120,10 @@ To check if a predicition is in the RSB, we linearly scan the RSB.
using namespace Xbyak::util;
// This calculation has to match up with IREmitter::PushRSB
code->mov(ebx, MJitStateCpsr());
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
code->and_(ebx, u32((1 << 5) | (1 << 9)));
code->shr(ebx, 2);
code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
code->shl(rbx, 32);
code->shl(rcx, 32);
code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]);
code->or_(rbx, rcx);
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));

View file

@ -0,0 +1,44 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <cstdint>
#include <memory>
namespace Dynarmic {
struct Context {
public:
Context();
~Context();
Context(const Context&);
Context(Context&&);
Context& operator=(const Context&);
Context& operator=(Context&&);
/// View and modify registers.
std::array<std::uint32_t, 16>& Regs();
const std::array<std::uint32_t, 16>& Regs() const;
std::array<std::uint32_t, 64>& ExtRegs();
const std::array<std::uint32_t, 64>& ExtRegs() const;
/// View and modify CPSR.
std::uint32_t Cpsr() const;
void SetCpsr(std::uint32_t value);
/// View and modify FPSCR.
std::uint32_t Fpscr() const;
void SetFpscr(std::uint32_t value);
private:
friend class Jit;
struct Impl;
std::unique_ptr<Impl> impl;
};
} // namespace Dynarmic

View file

@ -15,6 +15,8 @@
namespace Dynarmic {
struct Context;
namespace IR {
class LocationDescriptor;
}
@ -63,12 +65,16 @@ public:
const std::array<std::uint32_t, 64>& ExtRegs() const;
/// View and modify CPSR.
std::uint32_t& Cpsr();
std::uint32_t Cpsr() const;
void SetCpsr(std::uint32_t value);
/// View and modify FPSCR.
std::uint32_t Fpscr() const;
void SetFpscr(std::uint32_t value) const;
void SetFpscr(std::uint32_t value);
Context SaveContext() const;
void SaveContext(Context&) const;
void LoadContext(const Context&);
/**
* Returns true if Jit::Run was called but hasn't returned yet.

View file

@ -82,7 +82,14 @@ void BlockOfCode::RunCode(JitState* jit_state, size_t cycles_to_run) const {
jit_state->cycles_to_run = cycles_to_run;
jit_state->cycles_remaining = cycles_to_run;
run_code(jit_state);
u32 new_rsb_ptr = (jit_state->rsb_ptr - 1) & JitState::RSBPtrMask;
if (jit_state->GetUniqueHash() == jit_state->rsb_location_descriptors[new_rsb_ptr]) {
jit_state->rsb_ptr = new_rsb_ptr;
run_code_from(jit_state, jit_state->rsb_codeptrs[new_rsb_ptr]);
} else {
run_code(jit_state);
}
}
void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) {
@ -102,6 +109,14 @@ void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) {
void BlockOfCode::GenRunCode() {
Xbyak::Label loop, enter_mxcsr_then_loop;
align();
run_code_from = getCurr<RunCodeFromFuncType>();
ABI_PushCalleeSaveRegistersAndAdjustStack(this);
mov(r15, ABI_PARAM1);
SwitchMxcsrOnEntry();
jmp(ABI_PARAM2);
align();
run_code = getCurr<RunCodeFuncType>();

View file

@ -138,7 +138,9 @@ private:
CodePtr far_code_ptr;
using RunCodeFuncType = void(*)(JitState*);
using RunCodeFromFuncType = void(*)(JitState*, u64);
RunCodeFuncType run_code = nullptr;
RunCodeFromFuncType run_code_from = nullptr;
static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0;
static constexpr size_t FORCE_RETURN = 1 << 1;
std::array<const void*, 4> return_from_run_code;

View file

@ -29,6 +29,8 @@
namespace Dynarmic {
namespace BackendX64 {
using namespace Xbyak::util;
constexpr u64 f32_negative_zero = 0x80000000u;
constexpr u64 f32_nan = 0x7fc00000u;
constexpr u64 f32_non_sign_mask = 0x7fffffffu;
@ -43,12 +45,10 @@ constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
static Xbyak::Address MJitStateReg(Arm::Reg reg) {
using namespace Xbyak::util;
return dword[r15 + offsetof(JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
}
static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) {
using namespace Xbyak::util;
if (Arm::IsSingleExtReg(reg)) {
size_t index = static_cast<size_t>(reg) - static_cast<size_t>(Arm::ExtReg::S0);
return dword[r15 + offsetof(JitState, ExtReg) + sizeof(u32) * index];
@ -60,11 +60,6 @@ static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) {
ASSERT_MSG(false, "Should never happen.");
}
static Xbyak::Address MJitStateCpsr() {
using namespace Xbyak::util;
return dword[r15 + offsetof(JitState, Cpsr)];
}
static void EraseInstruction(IR::Block& block, IR::Inst* inst) {
block.Instructions().erase(inst);
inst->Invalidate();
@ -209,21 +204,87 @@ void EmitX64::EmitSetExtendedRegister64(RegAlloc& reg_alloc, IR::Block&, IR::Ins
}
}
static u32 GetCpsrImpl(JitState* jit_state) {
return jit_state->Cpsr();
}
void EmitX64::EmitGetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, MJitStateCpsr());
reg_alloc.DefineValue(inst, result);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 b = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 c = reg_alloc.ScratchGpr().cvt32();
code->mov(c, dword[r15 + offsetof(JitState, CPSR_ge)]);
// Here we observe that CPSR_q and CPSR_nzcv are right next to each other in memory,
// so we load them both at the same time with one 64-bit read. This allows us to
// extract all of their bits together at once with one pext.
code->mov(result.cvt64(), qword[r15 + offsetof(JitState, CPSR_q)]);
code->mov(b.cvt64(), 0xF000000000000001ull);
code->pext(result.cvt64(), result.cvt64(), b.cvt64());
code->mov(b, 0x80808080);
code->pext(c.cvt64(), c.cvt64(), b.cvt64());
code->shl(result, 27);
code->shl(c, 16);
code->or_(result, c);
code->mov(b, 0x00000220);
code->mov(c, dword[r15 + offsetof(JitState, CPSR_et)]);
code->pdep(c.cvt64(), c.cvt64(), b.cvt64());
code->or_(result, dword[r15 + offsetof(JitState, CPSR_jaifm)]);
code->or_(result, c);
reg_alloc.DefineValue(inst, result);
} else {
reg_alloc.HostCall(inst);
code->mov(code->ABI_PARAM1, code->r15);
code->CallFunction(&GetCpsrImpl);
}
}
static void SetCpsrImpl(u32 value, JitState* jit_state) {
jit_state->SetCpsr(value);
}
void EmitX64::EmitSetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg32 arg = reg_alloc.UseGpr(args[0]).cvt32();
code->mov(MJitStateCpsr(), arg);
reg_alloc.HostCall(nullptr, args[0]);
code->mov(code->ABI_PARAM2, code->r15);
code->CallFunction(&SetCpsrImpl);
}
void EmitX64::EmitSetCpsrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
u32 imm = args[0].GetImmediateU32();
code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], u32(imm & 0xF0000000));
} else {
Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->and_(a, 0xF0000000);
code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], a);
}
}
void EmitX64::EmitSetCpsrNZCVQ(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
u32 imm = args[0].GetImmediateU32();
code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], u32(imm & 0xF0000000));
code->mov(code->byte[r15 + offsetof(JitState, CPSR_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
} else {
Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->bt(a, 27);
code->setc(code->byte[r15 + offsetof(JitState, CPSR_q)]);
code->and_(a, 0xF0000000);
code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], a);
}
}
void EmitX64::EmitGetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, MJitStateCpsr());
code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]);
code->shr(result, 31);
reg_alloc.DefineValue(inst, result);
}
@ -234,22 +295,22 @@ void EmitX64::EmitSetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code->or_(MJitStateCpsr(), flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask);
} else {
code->and_(MJitStateCpsr(), ~flag_mask);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
}
} else {
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->shl(to_store, flag_bit);
code->and_(MJitStateCpsr(), ~flag_mask);
code->or_(MJitStateCpsr(), to_store);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store);
}
}
void EmitX64::EmitGetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, MJitStateCpsr());
code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]);
code->shr(result, 30);
code->and_(result, 1);
reg_alloc.DefineValue(inst, result);
@ -261,22 +322,22 @@ void EmitX64::EmitSetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code->or_(MJitStateCpsr(), flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask);
} else {
code->and_(MJitStateCpsr(), ~flag_mask);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
}
} else {
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->shl(to_store, flag_bit);
code->and_(MJitStateCpsr(), ~flag_mask);
code->or_(MJitStateCpsr(), to_store);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store);
}
}
void EmitX64::EmitGetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, MJitStateCpsr());
code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]);
code->shr(result, 29);
code->and_(result, 1);
reg_alloc.DefineValue(inst, result);
@ -288,22 +349,22 @@ void EmitX64::EmitSetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code->or_(MJitStateCpsr(), flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask);
} else {
code->and_(MJitStateCpsr(), ~flag_mask);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
}
} else {
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->shl(to_store, flag_bit);
code->and_(MJitStateCpsr(), ~flag_mask);
code->or_(MJitStateCpsr(), to_store);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store);
}
}
void EmitX64::EmitGetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, MJitStateCpsr());
code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]);
code->shr(result, 28);
code->and_(result, 1);
reg_alloc.DefineValue(inst, result);
@ -315,85 +376,86 @@ void EmitX64::EmitSetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code->or_(MJitStateCpsr(), flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask);
} else {
code->and_(MJitStateCpsr(), ~flag_mask);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
}
} else {
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->shl(to_store, flag_bit);
code->and_(MJitStateCpsr(), ~flag_mask);
code->or_(MJitStateCpsr(), to_store);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store);
}
}
void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
constexpr size_t flag_bit = 27;
constexpr u32 flag_mask = 1u << flag_bit;
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1())
code->or_(MJitStateCpsr(), flag_mask);
code->mov(dword[r15 + offsetof(JitState, CPSR_q)], 1);
} else {
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg8 to_store = reg_alloc.UseGpr(args[0]).cvt8();
code->shl(to_store, flag_bit);
code->or_(MJitStateCpsr(), to_store);
code->or_(code->byte[r15 + offsetof(JitState, CPSR_q)], to_store);
}
}
void EmitX64::EmitGetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 tmp;
if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
tmp = reg_alloc.ScratchGpr().cvt32();
code->mov(tmp, 0x01010101);
}
code->mov(result, MJitStateCpsr());
code->shr(result, 16);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
code->pdep(result, result, tmp);
} else {
code->and_(result, 0xF);
code->imul(result, result, 0x00204081);
code->and_(result, 0x01010101);
}
code->imul(result, result, 0xFF);
Xbyak::Xmm result = reg_alloc.ScratchXmm();
code->movd(result, dword[r15 + offsetof(JitState, CPSR_ge)]);
reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
constexpr size_t flag_bit = 16;
constexpr u32 flag_mask = 0xFu << flag_bit;
auto args = reg_alloc.GetArgumentInfo(inst);
ASSERT(!args[0].IsImmediate());
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32();
code->mov(tmp, 0x80808080);
code->pext(to_store, to_store, tmp);
if (args[0].IsInXmm()) {
Xbyak::Xmm to_store = reg_alloc.UseXmm(args[0]);
code->movd(dword[r15 + offsetof(JitState, CPSR_ge)], to_store);
} else {
code->and_(to_store, 0x80808080);
code->imul(to_store, to_store, 0x00204081);
code->shr(to_store, 28);
Xbyak::Reg32 to_store = reg_alloc.UseGpr(args[0]).cvt32();
code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], to_store);
}
code->shl(to_store, flag_bit);
code->and_(MJitStateCpsr(), ~flag_mask);
code->or_(MJitStateCpsr(), to_store);
}
void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
void EmitX64::EmitSetGEFlagsCompressed(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
u32 imm = args[0].GetImmediateU32();
u32 ge = 0;
ge |= Common::Bit<19>(imm) ? 0xFF000000 : 0;
ge |= Common::Bit<18>(imm) ? 0x00FF0000 : 0;
ge |= Common::Bit<17>(imm) ? 0x0000FF00 : 0;
ge |= Common::Bit<16>(imm) ? 0x000000FF : 0;
code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], ge);
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 b = reg_alloc.ScratchGpr().cvt32();
code->mov(b, 0x01010101);
code->shr(a, 16);
code->pdep(a, a, b);
code->imul(a, a, 0xFF);
code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], a);
} else {
Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->shr(a, 16);
code->and_(a, 0xF);
code->imul(a, a, 0x00204081);
code->and_(a, 0x01010101);
code->imul(a, a, 0xFF);
code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], a);
}
}
void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
auto& arg = args[0];
const u32 T_bit = 1 << 5;
// Pseudocode:
// if (new_pc & 1) {
// new_pc &= 0xFFFFFFFE;
@ -402,42 +464,45 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
// new_pc &= 0xFFFFFFFC;
// cpsr.T = false;
// }
// We rely on the fact we disallow EFlag from changing within a block.
if (arg.IsImmediate()) {
u32 new_pc = arg.GetImmediateU32();
if (Common::Bit<0>(new_pc)) {
new_pc &= 0xFFFFFFFE;
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
code->or_(MJitStateCpsr(), T_bit);
} else {
new_pc &= 0xFFFFFFFC;
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
code->and_(MJitStateCpsr(), ~T_bit);
}
u32 mask = Common::Bit<0>(new_pc) ? 0xFFFFFFFE : 0xFFFFFFFC;
u32 et = 0;
et |= block.Location().EFlag() ? 2 : 0;
et |= Common::Bit<0>(new_pc) ? 1 : 0;
code->mov(MJitStateReg(Arm::Reg::PC), new_pc & mask);
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], et);
} else {
using Xbyak::util::ptr;
if (block.Location().EFlag()) {
Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32();
Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 et = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg64 new_pc = reg_alloc.UseScratchGpr(arg);
Xbyak::Reg64 tmp1 = reg_alloc.ScratchGpr();
Xbyak::Reg64 tmp2 = reg_alloc.ScratchGpr();
code->mov(mask, new_pc);
code->and_(mask, 1);
code->lea(et, ptr[mask.cvt64() + 2]);
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], et);
code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code->and_(new_pc, mask);
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
} else {
Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32();
Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32();
code->mov(tmp1, MJitStateCpsr());
code->mov(tmp2, tmp1);
code->and_(tmp2, u32(~T_bit)); // CPSR.T = 0
code->or_(tmp1, u32(T_bit)); // CPSR.T = 1
code->test(new_pc, u32(1));
code->cmove(tmp1, tmp2); // CPSR.T = pc & 1
code->mov(MJitStateCpsr(), tmp1);
code->lea(tmp2, ptr[new_pc + new_pc * 1]);
code->or_(tmp2, u32(0xFFFFFFFC)); // tmp2 = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code->and_(new_pc, tmp2);
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
code->mov(mask, new_pc);
code->and_(mask, 1);
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], mask);
code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code->and_(new_pc, mask);
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
}
}
}
void EmitX64::EmitCallSupervisor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
reg_alloc.HostCall(nullptr);
code->SwitchMxcsrOnExit();
@ -480,57 +545,51 @@ void EmitX64::EmitSetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
}
void EmitX64::EmitGetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, dword[r15 + offsetof(JitState, FPSCR_nzcv)]);
reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitSetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
auto args = reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg32 value = reg_alloc.UseGpr(args[0]).cvt32();
code->mov(dword[r15 + offsetof(JitState, FPSCR_nzcv)], value);
}
void EmitX64::EmitPushRSB(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, u64 target_hash) {
using namespace Xbyak::util;
auto args = reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate());
u64 unique_hash_of_target = args[0].GetImmediateU64();
auto iter = block_descriptors.find(unique_hash_of_target);
auto iter = block_descriptors.find(target_hash);
CodePtr target_code_ptr = iter != block_descriptors.end()
? iter->second.entrypoint
: code->GetReturnFromRunCodeAddress();
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
code->mov(index_reg.cvt32(), dword[r15 + offsetof(JitState, rsb_ptr)]);
code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]);
code->add(index_reg, 1);
code->and_(index_reg, u32(JitState::RSBSize - 1));
code->mov(loc_desc_reg, target_hash);
code->mov(loc_desc_reg, unique_hash_of_target);
patch_information[unique_hash_of_target].mov_rcx.emplace_back(code->getCurr());
patch_information[target_hash].mov_rcx.emplace_back(code->getCurr());
EmitPatchMovRcx(target_code_ptr);
Xbyak::Label label;
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->je(label, code->T_SHORT);
}
code->mov(qword[r15 + index_reg * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[r15 + index_reg * 8 + offsetof(JitState, rsb_codeptrs)], rcx);
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg);
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
code->L(label);
code->add(index_reg.cvt32(), 1);
code->and_(index_reg.cvt32(), u32(JitState::RSBPtrMask));
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg.cvt32());
}
void EmitX64::EmitPushRSB(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate());
u64 unique_hash_of_target = args[0].GetImmediateU64();
reg_alloc.ScratchGpr({HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
Xbyak::Reg64 index_reg = reg_alloc.ScratchGpr();
PushRSBHelper(loc_desc_reg, index_reg, unique_hash_of_target);
}
void EmitX64::EmitGetCarryFromOp(RegAlloc&, IR::Block&, IR::Inst*) {
@ -2189,7 +2248,6 @@ void EmitX64::EmitPackedSelect(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst)
}
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
using namespace Xbyak::util;
Xbyak::Label end;
// We need to report back whether we've found a denormal on input.
@ -2206,7 +2264,6 @@ static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R
}
static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
using namespace Xbyak::util;
Xbyak::Label end;
auto mask = code->MConst(f64_non_sign_mask);
@ -2225,7 +2282,6 @@ static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R
}
static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
using namespace Xbyak::util;
Xbyak::Label end;
code->movd(gpr_scratch, xmm_value);
@ -2239,7 +2295,6 @@ static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32
}
static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
using namespace Xbyak::util;
Xbyak::Label end;
auto mask = code->MConst(f64_non_sign_mask);
@ -2479,7 +2534,6 @@ static void SetFpscrNzcvFromFlags(BlockOfCode* code, RegAlloc& reg_alloc) {
reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl
Xbyak::Reg32 nzcv = reg_alloc.ScratchGpr().cvt32();
using namespace Xbyak::util;
code->mov(nzcv, 0x28630000);
code->sete(cl);
@ -2798,14 +2852,10 @@ void EmitX64::EmitFPU32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst)
void EmitX64::EmitClearExclusive(RegAlloc&, IR::Block&, IR::Inst*) {
using namespace Xbyak::util;
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
}
void EmitX64::EmitSetExclusive(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
auto args = reg_alloc.GetArgumentInfo(inst);
ASSERT(args[1].IsImmediate());
Xbyak::Reg32 address = reg_alloc.UseGpr(args[0]).cvt32();
@ -2824,7 +2874,6 @@ static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, U
return;
}
using namespace Xbyak::util;
reg_alloc.UseScratch(args[0], ABI_PARAM1);
@ -2878,7 +2927,6 @@ static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst,
return;
}
using namespace Xbyak::util;
reg_alloc.ScratchGpr({ABI_RETURN});
reg_alloc.UseScratch(args[0], ABI_PARAM1);
@ -2965,7 +3013,6 @@ static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* ins
Xbyak::Reg32 passed = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers.
using namespace Xbyak::util;
Xbyak::Label end;
code->mov(passed, u32(1));
@ -3268,18 +3315,15 @@ void EmitX64::EmitCoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in
}
void EmitX64::EmitAddCycles(size_t cycles) {
using namespace Xbyak::util;
ASSERT(cycles < std::numeric_limits<u32>::max());
code->sub(qword[r15 + offsetof(JitState, cycles_remaining)], static_cast<u32>(cycles));
}
static Xbyak::Label EmitCond(BlockOfCode* code, Arm::Cond cond) {
using namespace Xbyak::util;
Xbyak::Label label;
const Xbyak::Reg32 cpsr = eax;
code->mov(cpsr, MJitStateCpsr());
code->mov(cpsr, dword[r15 + offsetof(JitState, CPSR_nzcv)]);
constexpr size_t n_shift = 31;
constexpr size_t z_shift = 30;
@ -3424,22 +3468,16 @@ void EmitX64::EmitTerminal(IR::Term::ReturnToDispatch, IR::LocationDescriptor) {
code->ReturnFromRunCode();
}
void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util;
static u32 CalculateCpsr_et(const IR::LocationDescriptor& desc) {
u32 et = 0;
et |= desc.EFlag() ? 2 : 0;
et |= desc.TFlag() ? 1 : 0;
return et;
}
if (terminal.next.TFlag() != initial_location.TFlag()) {
if (terminal.next.TFlag()) {
code->or_(MJitStateCpsr(), u32(1 << 5));
} else {
code->and_(MJitStateCpsr(), u32(~(1 << 5)));
}
}
if (terminal.next.EFlag() != initial_location.EFlag()) {
if (terminal.next.EFlag()) {
code->or_(MJitStateCpsr(), u32(1 << 9));
} else {
code->and_(MJitStateCpsr(), u32(~(1 << 9)));
}
void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) {
if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) {
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next));
}
code->cmp(qword[r15 + offsetof(JitState, cycles_remaining)], 0);
@ -3450,27 +3488,21 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor
} else {
EmitPatchJg(terminal.next);
}
Xbyak::Label dest;
code->jmp(dest, Xbyak::CodeGenerator::T_NEAR);
code->SwitchToFarCode();
code->align(16);
code->L(dest);
code->mov(MJitStateReg(Arm::Reg::PC), terminal.next.PC());
code->ForceReturnFromRunCode(); // TODO: Check cycles, Properly do a link
PushRSBHelper(rax, rbx, terminal.next.UniqueHash());
code->ForceReturnFromRunCode();
code->SwitchToNearCode();
}
void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util;
if (terminal.next.TFlag() != initial_location.TFlag()) {
if (terminal.next.TFlag()) {
code->or_(MJitStateCpsr(), u32(1 << 5));
} else {
code->and_(MJitStateCpsr(), u32(~(1 << 5)));
}
}
if (terminal.next.EFlag() != initial_location.EFlag()) {
if (terminal.next.EFlag()) {
code->or_(MJitStateCpsr(), u32(1 << 9));
} else {
code->and_(MJitStateCpsr(), u32(~(1 << 9)));
}
if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) {
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next));
}
patch_information[terminal.next.UniqueHash()].jmp.emplace_back(code->getCurr());
@ -3482,23 +3514,21 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescrip
}
void EmitX64::EmitTerminal(IR::Term::PopRSBHint, IR::LocationDescriptor) {
using namespace Xbyak::util;
// This calculation has to match up with IREmitter::PushRSB
code->mov(ebx, MJitStateCpsr());
// TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et.
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
code->and_(ebx, u32((1 << 5) | (1 << 9)));
code->shr(ebx, 2);
code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
code->shl(rbx, 32);
code->shl(rcx, 32);
code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]);
code->or_(rbx, rcx);
code->mov(rax, reinterpret_cast<u64>(code->GetReturnFromRunCodeAddress()));
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
}
code->mov(eax, dword[r15 + offsetof(JitState, rsb_ptr)]);
code->sub(eax, 1);
code->and_(eax, u32(JitState::RSBPtrMask));
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], eax);
code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
code->jne(code->GetReturnFromRunCodeAddress());
code->mov(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + rax * sizeof(u64)]);
code->jmp(rax);
}
@ -3510,8 +3540,6 @@ void EmitX64::EmitTerminal(IR::Term::If terminal, IR::LocationDescriptor initial
}
void EmitX64::EmitTerminal(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util;
code->cmp(code->byte[r15 + offsetof(JitState, halt_requested)], u8(0));
code->jne(code->GetForceReturnFromRunCodeAddress());
EmitTerminal(terminal.else_, initial_location);

View file

@ -73,6 +73,7 @@ private:
// Helpers
void EmitAddCycles(size_t cycles);
void EmitCondPrelude(const IR::Block& block);
void PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, u64 target_hash);
// Terminal instruction emitters
void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location);

View file

@ -20,6 +20,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/scope_exit.h"
#include "dynarmic/context.h"
#include "dynarmic/dynarmic.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/location_descriptor.h"
@ -45,6 +46,7 @@ struct Jit::Impl {
const UserCallbacks callbacks;
// Requests made during execution to invalidate the cache are queued up here.
size_t invalid_cache_generation = 0;
boost::icl::interval_set<u32> invalid_cache_ranges;
bool invalidate_entire_cache = false;
@ -98,6 +100,7 @@ struct Jit::Impl {
invalid_cache_ranges.clear();
invalidate_entire_cache = false;
invalid_cache_generation++;
return;
}
@ -108,6 +111,7 @@ struct Jit::Impl {
jit_state.ResetRSB();
emitter.InvalidateCacheRanges(invalid_cache_ranges);
invalid_cache_ranges.clear();
invalid_cache_generation++;
}
void RequestCacheInvalidation() {
@ -127,7 +131,7 @@ private:
JitState& jit_state = this_.jit_state;
u32 pc = jit_state.Reg[15];
Arm::PSR cpsr{jit_state.Cpsr};
Arm::PSR cpsr{jit_state.Cpsr()};
Arm::FPSCR fpscr{jit_state.FPSCR_mode};
IR::LocationDescriptor descriptor{pc, cpsr, fpscr};
@ -205,22 +209,107 @@ const std::array<u32, 64>& Jit::ExtRegs() const {
return impl->jit_state.ExtReg;
}
u32& Jit::Cpsr() {
return impl->jit_state.Cpsr;
u32 Jit::Cpsr() const {
return impl->jit_state.Cpsr();
}
u32 Jit::Cpsr() const {
return impl->jit_state.Cpsr;
void Jit::SetCpsr(u32 value) {
return impl->jit_state.SetCpsr(value);
}
u32 Jit::Fpscr() const {
return impl->jit_state.Fpscr();
}
void Jit::SetFpscr(u32 value) const {
void Jit::SetFpscr(u32 value) {
return impl->jit_state.SetFpscr(value);
}
Context Jit::SaveContext() const {
Context ctx;
SaveContext(ctx);
return ctx;
}
struct Context::Impl {
JitState jit_state;
size_t invalid_cache_generation;
};
Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); }
Context::~Context() = default;
Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
Context::Context(Context&& ctx) : impl(std::move(ctx.impl)) {}
Context& Context::operator=(const Context& ctx) {
*impl = *ctx.impl;
return *this;
}
Context& Context::operator=(Context&& ctx) {
impl = std::move(ctx.impl);
return *this;
}
std::array<std::uint32_t, 16>& Context::Regs() {
return impl->jit_state.Reg;
}
const std::array<std::uint32_t, 16>& Context::Regs() const {
return impl->jit_state.Reg;
}
std::array<std::uint32_t, 64>& Context::ExtRegs() {
return impl->jit_state.ExtReg;
}
const std::array<std::uint32_t, 64>& Context::ExtRegs() const {
return impl->jit_state.ExtReg;
}
/// View and modify CPSR.
std::uint32_t Context::Cpsr() const {
return impl->jit_state.Cpsr();
}
void Context::SetCpsr(std::uint32_t value) {
impl->jit_state.SetCpsr(value);
}
/// View and modify FPSCR.
std::uint32_t Context::Fpscr() const {
return impl->jit_state.Fpscr();
}
void Context::SetFpscr(std::uint32_t value) {
return impl->jit_state.SetFpscr(value);
}
void TransferJitState(JitState& dest, const JitState& src, bool reset_rsb) {
dest.CPSR_ge = src.CPSR_ge;
dest.CPSR_et = src.CPSR_et;
dest.CPSR_q = src.CPSR_q;
dest.CPSR_nzcv = src.CPSR_nzcv;
dest.CPSR_jaifm = src.CPSR_jaifm;
dest.Reg = src.Reg;
dest.ExtReg = src.ExtReg;
dest.guest_MXCSR = src.guest_MXCSR;
dest.FPSCR_IDC = src.FPSCR_IDC;
dest.FPSCR_UFC = src.FPSCR_UFC;
dest.FPSCR_mode = src.FPSCR_mode;
dest.FPSCR_nzcv = src.FPSCR_nzcv;
if (reset_rsb) {
dest.ResetRSB();
} else {
dest.rsb_ptr = src.rsb_ptr;
dest.rsb_location_descriptors = src.rsb_location_descriptors;
dest.rsb_codeptrs = src.rsb_codeptrs;
}
}
void Jit::SaveContext(Context& ctx) const {
TransferJitState(ctx.impl->jit_state, impl->jit_state, false);
ctx.impl->invalid_cache_generation = impl->invalid_cache_generation;
}
void Jit::LoadContext(const Context& ctx) {
bool reset_rsb = ctx.impl->invalid_cache_generation != impl->invalid_cache_generation;
TransferJitState(impl->jit_state, ctx.impl->jit_state, reset_rsb);
}
std::string Jit::Disassemble(const IR::LocationDescriptor& descriptor) {
return impl->Disassemble(descriptor);
}

View file

@ -14,6 +14,81 @@
namespace Dynarmic {
namespace BackendX64 {
/**
* CPSR Bits
* =========
*
* ARM CPSR flags
* --------------
* N bit 31 Negative flag
* Z bit 30 Zero flag
* C bit 29 Carry flag
* V bit 28 oVerflow flag
* Q bit 27 Saturation flag
* J bit 24 Jazelle instruction set flag
* GE bits 16-19 Greater than or Equal flags
* E bit 9 Data Endianness flag
* A bit 8 Disable imprecise Aborts
* I bit 7 Disable IRQ interrupts
* F bit 6 Disable FIQ interrupts
* T bit 5 Thumb instruction set flag
* M bits 0-4 Processor Mode bits
*
* x64 LAHF+SETO flags
* -------------------
* SF bit 15 Sign flag
* ZF bit 14 Zero flag
* AF bit 12 Auxiliary flag
* PF bit 10 Parity flag
* CF bit 8 Carry flag
* OF bit 0 Overflow flag
*/
u32 JitState::Cpsr() const {
ASSERT((CPSR_nzcv & ~0xF0000000) == 0);
ASSERT((CPSR_q & ~1) == 0);
ASSERT((CPSR_et & ~3) == 0);
ASSERT((CPSR_jaifm & ~0x010001DF) == 0);
u32 cpsr = 0;
// NZCV flags
cpsr |= CPSR_nzcv;
// Q flag
cpsr |= CPSR_q ? 1 << 27 : 0;
// GE flags
cpsr |= Common::Bit<31>(CPSR_ge) ? 1 << 19 : 0;
cpsr |= Common::Bit<23>(CPSR_ge) ? 1 << 18 : 0;
cpsr |= Common::Bit<15>(CPSR_ge) ? 1 << 17 : 0;
cpsr |= Common::Bit<7>(CPSR_ge) ? 1 << 16 : 0;
// E flag, T flag
cpsr |= Common::Bit<1>(CPSR_et) ? 1 << 9 : 0;
cpsr |= Common::Bit<0>(CPSR_et) ? 1 << 5 : 0;
// Other flags
cpsr |= CPSR_jaifm;
return cpsr;
}
void JitState::SetCpsr(u32 cpsr) {
// NZCV flags
CPSR_nzcv = cpsr & 0xF0000000;
// Q flag
CPSR_q = Common::Bit<27>(cpsr) ? 1 : 0;
// GE flags
CPSR_ge = 0;
CPSR_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0;
CPSR_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
CPSR_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
CPSR_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
// E flag, T flag
CPSR_et = 0;
CPSR_et |= Common::Bit<9>(cpsr) ? 2 : 0;
CPSR_et |= Common::Bit<5>(cpsr) ? 1 : 0;
// Other flags
CPSR_jaifm = cpsr & 0x07F0FDDF;
}
void JitState::ResetRSB() {
rsb_location_descriptors.fill(0xFFFFFFFFFFFFFFFFull);
rsb_codeptrs.fill(0);
@ -124,5 +199,9 @@ void JitState::SetFpscr(u32 FPSCR) {
}
}
u64 JitState::GetUniqueHash() const {
return CPSR_et | FPSCR_mode | (static_cast<u64>(Reg[15]) << 32);
}
} // namespace BackendX64
} // namespace Dynarmic

View file

@ -25,10 +25,18 @@ constexpr size_t SpillCount = 64;
struct JitState {
JitState() { ResetRSB(); }
u32 Cpsr = 0;
std::array<u32, 16> Reg{}; // Current register file.
// TODO: Mode-specific register sets unimplemented.
u32 CPSR_ge = 0;
u32 CPSR_et = 0;
u32 CPSR_q = 0;
u32 CPSR_nzcv = 0;
u32 CPSR_jaifm = 0;
u32 Cpsr() const;
void SetCpsr(u32 cpsr);
alignas(u64) std::array<u32, 64> ExtReg{}; // Extension registers.
std::array<u64, SpillCount> Spill{}; // Spill.
@ -46,6 +54,7 @@ struct JitState {
u32 exclusive_address = 0;
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
static constexpr size_t RSBPtrMask = RSBSize - 1;
u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors;
std::array<u64, RSBSize> rsb_codeptrs;
@ -58,6 +67,8 @@ struct JitState {
u32 old_FPSCR = 0;
u32 Fpscr() const;
void SetFpscr(u32 FPSCR);
u64 GetUniqueHash() const;
};
#ifdef _MSC_VER

View file

@ -117,6 +117,14 @@ void IREmitter::SetCpsr(const Value& value) {
Inst(Opcode::SetCpsr, {value});
}
void IREmitter::SetCpsrNZCV(const Value& value) {
Inst(Opcode::SetCpsrNZCV, {value});
}
void IREmitter::SetCpsrNZCVQ(const Value& value) {
Inst(Opcode::SetCpsrNZCVQ, {value});
}
Value IREmitter::GetCFlag() {
return Inst(Opcode::GetCFlag, {});
}
@ -149,6 +157,10 @@ void IREmitter::SetGEFlags(const Value& value) {
Inst(Opcode::SetGEFlags, {value});
}
void IREmitter::SetGEFlagsCompressed(const Value& value) {
Inst(Opcode::SetGEFlagsCompressed, {value});
}
Value IREmitter::GetFpscr() {
return Inst(Opcode::GetFpscr, {});
}

View file

@ -84,6 +84,8 @@ public:
Value GetCpsr();
void SetCpsr(const Value& value);
void SetCpsrNZCV(const Value& value);
void SetCpsrNZCVQ(const Value& value);
Value GetCFlag();
void SetNFlag(const Value& value);
void SetZFlag(const Value& value);
@ -92,6 +94,7 @@ public:
void OrQFlag(const Value& value);
Value GetGEFlags();
void SetGEFlags(const Value& value);
void SetGEFlagsCompressed(const Value& value);
Value GetFpscr();
void SetFpscr(const Value& new_fpscr);

View file

@ -75,10 +75,10 @@ public:
u64 UniqueHash() const {
// This value MUST BE UNIQUE.
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
u64 pc_u64 = u64(arm_pc);
u64 fpscr_u64 = u64(fpscr.Value()) << 32;
u64 t_u64 = cpsr.T() ? (1ull << 35) : 0;
u64 e_u64 = cpsr.E() ? (1ull << 39) : 0;
u64 pc_u64 = u64(arm_pc) << 32;
u64 fpscr_u64 = u64(fpscr.Value());
u64 t_u64 = cpsr.T() ? 1 : 0;
u64 e_u64 = cpsr.E() ? 2 : 0;
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
}

View file

@ -112,12 +112,15 @@ bool Inst::ReadsFromCPSR() const {
bool Inst::WritesToCPSR() const {
switch (op) {
case Opcode::SetCpsr:
case Opcode::SetCpsrNZCV:
case Opcode::SetCpsrNZCVQ:
case Opcode::SetNFlag:
case Opcode::SetZFlag:
case Opcode::SetCFlag:
case Opcode::SetVFlag:
case Opcode::OrQFlag:
case Opcode::SetGEFlags:
case Opcode::SetGEFlagsCompressed:
return true;
default:

View file

@ -13,6 +13,8 @@ OPCODE(SetExtendedRegister32, T::Void, T::ExtRegRef, T::F32
OPCODE(SetExtendedRegister64, T::Void, T::ExtRegRef, T::F64 )
OPCODE(GetCpsr, T::U32, )
OPCODE(SetCpsr, T::Void, T::U32 )
OPCODE(SetCpsrNZCV, T::Void, T::U32 )
OPCODE(SetCpsrNZCVQ, T::Void, T::U32 )
OPCODE(GetNFlag, T::U1, )
OPCODE(SetNFlag, T::Void, T::U1 )
OPCODE(GetZFlag, T::U1, )
@ -24,6 +26,7 @@ OPCODE(SetVFlag, T::Void, T::U1
OPCODE(OrQFlag, T::Void, T::U1 )
OPCODE(GetGEFlags, T::U32, )
OPCODE(SetGEFlags, T::Void, T::U32 )
OPCODE(SetGEFlagsCompressed, T::Void, T::U32 )
OPCODE(BXWritePC, T::Void, T::U32 )
OPCODE(CallSupervisor, T::Void, T::U32 )
OPCODE(GetFpscr, T::U32, )

View file

@ -92,9 +92,8 @@ bool ArmTranslatorVisitor::arm_MRC(Cond cond, size_t opc1, CoprocReg CRn, Reg t,
if (t != Reg::PC) {
ir.SetRegister(t, word);
} else {
auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(0x0FFFFFFF));
auto new_cpsr_nzcv = ir.And(word, ir.Imm32(0xF0000000));
ir.SetCpsr(ir.Or(old_cpsr, new_cpsr_nzcv));
ir.SetCpsrNZCV(new_cpsr_nzcv);
}
}
return true;

View file

@ -6,6 +6,8 @@
#include "translate_arm.h"
#include "common/bit_util.h"
namespace Dynarmic {
namespace Arm {
@ -30,14 +32,12 @@ bool ArmTranslatorVisitor::arm_MSR_imm(Cond cond, int mask, int rotate, Imm8 imm
ASSERT_MSG(write_nzcvq || write_g, "Decode error");
// MSR <spec_reg>, #<imm32>
if (ConditionPassed(cond)) {
u32 cpsr_mask = 0;
if (write_nzcvq)
cpsr_mask |= 0xF8000000;
if (write_g)
cpsr_mask |= 0x000F0000;
auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(~cpsr_mask));
auto new_cpsr = ir.Imm32(imm32 & cpsr_mask);
ir.SetCpsr(ir.Or(old_cpsr, new_cpsr));
if (write_nzcvq) {
ir.SetCpsrNZCVQ(ir.Imm32(imm32 & 0xF8000000));
}
if (write_g) {
ir.SetGEFlagsCompressed(ir.Imm32(imm32 & 0x000F0000));
}
}
return true;
}
@ -51,14 +51,13 @@ bool ArmTranslatorVisitor::arm_MSR_reg(Cond cond, int mask, Reg n) {
return UnpredictableInstruction();
// MSR <spec_reg>, #<imm32>
if (ConditionPassed(cond)) {
u32 cpsr_mask = 0;
if (write_nzcvq)
cpsr_mask |= 0xF8000000;
if (write_g)
cpsr_mask |= 0x000F0000;
auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(~cpsr_mask));
auto new_cpsr = ir.And(ir.GetRegister(n), ir.Imm32(cpsr_mask));
ir.SetCpsr(ir.Or(old_cpsr, new_cpsr));
auto value = ir.GetRegister(n);
if (write_nzcvq){
ir.SetCpsrNZCVQ(ir.And(value, ir.Imm32(0xF8000000)));
}
if (write_g){
ir.SetGEFlagsCompressed(ir.And(value, ir.Imm32(0x000F0000)));
}
}
return true;
}

View file

@ -543,8 +543,7 @@ bool ArmTranslatorVisitor::vfp2_VMRS(Cond cond, Reg t) {
if (t == Reg::R15) {
// This encodes ASPR_nzcv access
auto nzcv = ir.GetFpscrNZCV();
auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(0x0FFFFFFF));
ir.SetCpsr(ir.Or(nzcv, old_cpsr));
ir.SetCpsrNZCV(nzcv);
} else {
ir.SetRegister(t, ir.GetFpscr());
}

View file

@ -682,7 +682,7 @@ struct ThumbTranslatorVisitor final {
ir.LoadWritePC(data);
address = ir.Add(address, ir.Imm32(4));
ir.SetRegister(Reg::SP, address);
ir.SetTerm(IR::Term::ReturnToDispatch{});
ir.SetTerm(IR::Term::PopRSBHint{});
return false;
} else {
ir.SetRegister(Reg::SP, address);

View file

@ -115,7 +115,7 @@ static void InterpreterFallback(u32 pc, Dynarmic::Jit* jit, void*) {
jit->Regs() = interp_state.Reg;
jit->ExtRegs() = interp_state.ExtReg;
jit->Cpsr() = interp_state.Cpsr;
jit->SetCpsr(interp_state.Cpsr);
jit->SetFpscr(interp_state.VFP[VFP_FPSCR]);
}
@ -196,7 +196,7 @@ static bool DoesBehaviorMatch(const ARMul_State& interp, const Dynarmic::Jit& ji
return interp.Reg == jit.Regs()
&& interp.ExtReg == jit.ExtRegs()
&& interp.Cpsr == jit.Cpsr()
&& interp.VFP[VFP_FPSCR] == jit.Fpscr()
//&& interp.VFP[VFP_FPSCR] == jit.Fpscr()
&& interp_write_records == jit_write_records;
}
@ -233,7 +233,7 @@ void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_exe
interp.ExtReg = initial_extregs;
interp.VFP[VFP_FPSCR] = initial_fpscr;
jit.Reset();
jit.Cpsr() = initial_cpsr;
jit.SetCpsr(initial_cpsr);
jit.Regs() = initial_regs;
jit.ExtRegs() = initial_extregs;
jit.SetFpscr(initial_fpscr);
@ -369,7 +369,7 @@ TEST_CASE( "arm: Optimization Failure (Randomized test case)", "[arm]" ) {
0x6973b6bb, 0x267ea626, 0x69debf49, 0x8f976895, 0x4ecd2d0d, 0xcf89b8c7, 0xb6713f85, 0x15e2aa5,
0xcd14336a, 0xafca0f3e, 0xace2efd9, 0x68fb82cd, 0x775447c0, 0xc9e1f8cd, 0xebe0e626, 0x0
};
jit.Cpsr() = 0x000001d0; // User-mode
jit.SetCpsr(0x000001d0); // User-mode
jit.Run(6);
@ -407,7 +407,7 @@ TEST_CASE( "arm: shsax r11, sp, r9 (Edge-case)", "[arm]" ) {
0x3a3b8b18, 0x96156555, 0xffef039f, 0xafb946f2, 0x2030a69a, 0xafe09b2a, 0x896823c8, 0xabde0ded,
0x9825d6a6, 0x17498000, 0x999d2c95, 0x8b812a59, 0x209bdb58, 0x2f7fb1d4, 0x0f378107, 0x00000000
};
jit.Cpsr() = 0x000001d0; // User-mode
jit.SetCpsr(0x000001d0); // User-mode
jit.Run(2);
@ -443,7 +443,7 @@ TEST_CASE( "arm: uasx (Edge-case)", "[arm]" ) {
jit.Regs()[4] = 0x8ed38f4c;
jit.Regs()[5] = 0x0000261d;
jit.Regs()[15] = 0x00000000;
jit.Cpsr() = 0x000001d0; // User-mode
jit.SetCpsr(0x000001d0); // User-mode
jit.Run(2);
@ -472,7 +472,7 @@ static void RunVfpTests(u32 instr, std::vector<VfpTest> tests) {
for (const auto& test : tests) {
jit.Regs()[15] = 0;
jit.Cpsr() = 0x000001d0;
jit.SetCpsr(0x000001d0);
jit.ExtRegs()[4] = test.a;
jit.ExtRegs()[6] = test.b;
jit.SetFpscr(test.initial_fpscr);
@ -1106,7 +1106,7 @@ TEST_CASE( "SMUAD", "[JitX64]" ) {
0, 0, 0, 0,
0, 0, 0, 0,
};
jit.Cpsr() = 0x000001d0; // User-mode
jit.SetCpsr(0x000001d0); // User-mode
jit.Run(6);
@ -1155,6 +1155,38 @@ TEST_CASE("Test ARM misc instructions", "[JitX64]") {
}
}
TEST_CASE("Test ARM MSR instructions", "[JitX64]") {
const auto is_msr_valid = [](u32 instr) -> bool {
return Bits<18, 19>(instr) != 0;
};
const auto is_msr_reg_valid = [&is_msr_valid](u32 instr) -> bool {
return is_msr_valid(instr) && Bits<0, 3>(instr) != 15;
};
const auto is_mrs_valid = [&](u32 inst) -> bool {
return Bits<12, 15>(inst) != 15;
};
const std::array<InstructionGenerator, 3> instructions = {{
InstructionGenerator("cccc00110010mm001111rrrrvvvvvvvv", is_msr_valid), // MSR (imm)
InstructionGenerator("cccc00010010mm00111100000000nnnn", is_msr_reg_valid), // MSR (reg)
InstructionGenerator("cccc000100001111dddd000000000000", is_mrs_valid), // MRS
}};
SECTION("Ones") {
FuzzJitArm(1, 2, 10000, [&instructions]() -> u32 {
return instructions[RandInt<size_t>(0, instructions.size() - 1)].Generate();
});
}
SECTION("Fives") {
FuzzJitArm(5, 6, 10000, [&instructions]() -> u32 {
return instructions[RandInt<size_t>(0, instructions.size() - 1)].Generate();
});
}
}
TEST_CASE("Fuzz ARM saturated add/sub instructions", "[JitX64]") {
auto is_valid = [](u32 inst) -> bool {
// R15 as Rd, Rn, or Rm is UNPREDICTABLE
@ -1225,7 +1257,7 @@ TEST_CASE("arm: Test InvalidateCacheRange", "[arm]") {
code_mem[3] = 0xeafffffe; // b +#0 (infinite loop)
jit.Regs() = {};
jit.Cpsr() = 0x000001d0; // User-mode
jit.SetCpsr(0x000001d0); // User-mode
jit.Run(4);

View file

@ -107,7 +107,7 @@ static void InterpreterFallback(u32 pc, Dynarmic::Jit* jit, void*) {
interp_state.Reg[15] &= T ? 0xFFFFFFFE : 0xFFFFFFFC;
jit->Regs() = interp_state.Reg;
jit->Cpsr() = interp_state.Cpsr;
jit->SetCpsr(interp_state.Cpsr);
}
static void Fail() {
@ -204,7 +204,7 @@ void FuzzJitThumb(const size_t instruction_count, const size_t instructions_to_e
interp.Cpsr = 0x000001F0;
interp.Reg = initial_regs;
jit.Cpsr() = 0x000001F0;
jit.SetCpsr(0x000001F0);
jit.Regs() = initial_regs;
std::generate_n(code_mem.begin(), instruction_count, instruction_generator);
@ -258,11 +258,17 @@ void FuzzJitThumb(const size_t instruction_count, const size_t instructions_to_e
Dynarmic::Arm::PSR cpsr;
cpsr.T(true);
Dynarmic::IR::Block ir_block = Dynarmic::Arm::Translate({0, cpsr, Dynarmic::Arm::FPSCR{}}, MemoryReadCode);
Dynarmic::Optimization::GetSetElimination(ir_block);
Dynarmic::Optimization::DeadCodeElimination(ir_block);
Dynarmic::Optimization::VerificationPass(ir_block);
printf("\n\nIR:\n%s", Dynarmic::IR::DumpBlock(ir_block).c_str());
size_t num_insts = 0;
while (num_insts < instructions_to_execute_count) {
Dynarmic::IR::LocationDescriptor descriptor = {u32(num_insts * 4), cpsr, Dynarmic::Arm::FPSCR{}};
Dynarmic::IR::Block ir_block = Dynarmic::Arm::Translate(descriptor, &MemoryReadCode);
Dynarmic::Optimization::GetSetElimination(ir_block);
Dynarmic::Optimization::DeadCodeElimination(ir_block);
Dynarmic::Optimization::VerificationPass(ir_block);
printf("\n\nIR:\n%s", Dynarmic::IR::DumpBlock(ir_block).c_str());
printf("\n\nx86_64:\n%s", jit.Disassemble(descriptor).c_str());
num_insts += ir_block.CycleCount();
}
#ifdef _MSC_VER
__debugbreak();

View file

@ -43,7 +43,7 @@ static void InterpreterFallback(u32 pc, Dynarmic::Jit* jit, void*) {
InterpreterMainLoop(&interp_state);
jit->Regs() = interp_state.Reg;
jit->Cpsr() = interp_state.Cpsr;
jit->SetCpsr(interp_state.Cpsr);
}
static void AddTicks(u64) {}
@ -66,7 +66,7 @@ TEST_CASE( "thumb: lsls r0, r1, #2", "[thumb]" ) {
jit.Regs()[0] = 1;
jit.Regs()[1] = 2;
jit.Regs()[15] = 0; // PC = 0
jit.Cpsr() = 0x00000030; // Thumb, User-mode
jit.SetCpsr(0x00000030); // Thumb, User-mode
jit.Run(1);
@ -85,7 +85,7 @@ TEST_CASE( "thumb: lsls r0, r1, #31", "[thumb]" ) {
jit.Regs()[0] = 1;
jit.Regs()[1] = 0xFFFFFFFF;
jit.Regs()[15] = 0; // PC = 0
jit.Cpsr() = 0x00000030; // Thumb, User-mode
jit.SetCpsr(0x00000030); // Thumb, User-mode
jit.Run(1);
@ -103,7 +103,7 @@ TEST_CASE( "thumb: revsh r4, r3", "[thumb]" ) {
jit.Regs()[3] = 0x12345678;
jit.Regs()[15] = 0; // PC = 0
jit.Cpsr() = 0x00000030; // Thumb, User-mode
jit.SetCpsr(0x00000030); // Thumb, User-mode
jit.Run(1);
@ -121,7 +121,7 @@ TEST_CASE( "thumb: ldr r3, [r3, #28]", "[thumb]" ) {
jit.Regs()[3] = 0x12345678;
jit.Regs()[15] = 0; // PC = 0
jit.Cpsr() = 0x00000030; // Thumb, User-mode
jit.SetCpsr(0x00000030); // Thumb, User-mode
jit.Run(1);
@ -137,7 +137,7 @@ TEST_CASE( "thumb: blx +#67712", "[thumb]" ) {
code_mem[2] = 0xE7FE; // b +#0
jit.Regs()[15] = 0; // PC = 0
jit.Cpsr() = 0x00000030; // Thumb, User-mode
jit.SetCpsr(0x00000030); // Thumb, User-mode
jit.Run(1);
@ -153,7 +153,7 @@ TEST_CASE( "thumb: bl +#234584", "[thumb]" ) {
code_mem[2] = 0xE7FE; // b +#0
jit.Regs()[15] = 0; // PC = 0
jit.Cpsr() = 0x00000030; // Thumb, User-mode
jit.SetCpsr(0x00000030); // Thumb, User-mode
jit.Run(1);
@ -169,7 +169,7 @@ TEST_CASE( "thumb: bl -#42", "[thumb]" ) {
code_mem[2] = 0xE7FE; // b +#0
jit.Regs()[15] = 0; // PC = 0
jit.Cpsr() = 0x00000030; // Thumb, User-mode
jit.SetCpsr(0x00000030); // Thumb, User-mode
jit.Run(1);