diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 085bce5a..dc111a23 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -29,6 +29,8 @@ namespace Dynarmic { namespace BackendX64 { +using namespace Xbyak::util; + constexpr u64 f32_negative_zero = 0x80000000u; constexpr u64 f32_nan = 0x7fc00000u; constexpr u64 f32_non_sign_mask = 0x7fffffffu; @@ -43,12 +45,10 @@ constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double static Xbyak::Address MJitStateReg(Arm::Reg reg) { - using namespace Xbyak::util; return dword[r15 + offsetof(JitState, Reg) + sizeof(u32) * static_cast(reg)]; } static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) { - using namespace Xbyak::util; if (Arm::IsSingleExtReg(reg)) { size_t index = static_cast(reg) - static_cast(Arm::ExtReg::S0); return dword[r15 + offsetof(JitState, ExtReg) + sizeof(u32) * index]; @@ -60,11 +60,6 @@ static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) { ASSERT_MSG(false, "Should never happen."); } -static Xbyak::Address MJitStateCpsr_other() { - using namespace Xbyak::util; - return dword[r15 + offsetof(JitState, CPSR_other)]; -} - static void EraseInstruction(IR::Block& block, IR::Inst* inst) { block.Instructions().erase(inst); inst->Invalidate(); @@ -217,9 +212,40 @@ void EmitX64::EmitSetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { code->CallFunction(&SetCpsrImpl); } +void EmitX64::EmitSetCpsrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + u32 imm = args[0].GetImmediateU32(); + + code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], u32(imm & 0xF0000000)); + } else { + Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + + code->and_(a, 0xF0000000); + code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], a); + } +} + +void EmitX64::EmitSetCpsrNZCVQ(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + u32 imm = args[0].GetImmediateU32(); + + code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], u32(imm & 0xF0000000)); + code->mov(code->byte[r15 + offsetof(JitState, CPSR_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); + } else { + Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + + code->bt(a, 27); + code->setc(code->byte[r15 + offsetof(JitState, CPSR_q)]); + code->and_(a, 0xF0000000); + code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], a); + } +} + void EmitX64::EmitGetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr_other()); + code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]); code->shr(result, 31); reg_alloc.DefineValue(inst, result); } @@ -230,22 +256,22 @@ void EmitX64::EmitSetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr_other(), flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(MJitStateCpsr_other(), ~flag_mask); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr_other(), ~flag_mask); - code->or_(MJitStateCpsr_other(), to_store); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store); } } void EmitX64::EmitGetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr_other()); + code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]); code->shr(result, 30); code->and_(result, 1); reg_alloc.DefineValue(inst, result); @@ -257,22 +283,22 @@ void EmitX64::EmitSetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr_other(), flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(MJitStateCpsr_other(), ~flag_mask); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr_other(), ~flag_mask); - code->or_(MJitStateCpsr_other(), to_store); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store); } } void EmitX64::EmitGetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr_other()); + code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]); code->shr(result, 29); code->and_(result, 1); reg_alloc.DefineValue(inst, result); @@ -284,22 +310,22 @@ void EmitX64::EmitSetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr_other(), flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(MJitStateCpsr_other(), ~flag_mask); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr_other(), ~flag_mask); - code->or_(MJitStateCpsr_other(), to_store); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store); } } void EmitX64::EmitGetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateCpsr_other()); + code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]); code->shr(result, 28); code->and_(result, 1); reg_alloc.DefineValue(inst, result); @@ -311,22 +337,20 @@ void EmitX64::EmitSetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(MJitStateCpsr_other(), flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(MJitStateCpsr_other(), ~flag_mask); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); - code->and_(MJitStateCpsr_other(), ~flag_mask); - code->or_(MJitStateCpsr_other(), to_store); + code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask); + code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store); } } void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - auto args = reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) @@ -339,16 +363,12 @@ void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - Xbyak::Xmm result = reg_alloc.ScratchXmm(); code->movd(result, dword[r15 + offsetof(JitState, CPSR_ge)]); reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - auto args = reg_alloc.GetArgumentInfo(inst); ASSERT(!args[0].IsImmediate()); @@ -361,9 +381,39 @@ void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } } -void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - using namespace Xbyak::util; +void EmitX64::EmitSetGEFlagsCompressed(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + u32 imm = args[0].GetImmediateU32(); + u32 ge = 0; + ge |= Common::Bit<19>(imm) ? 0xFF000000 : 0; + ge |= Common::Bit<18>(imm) ? 0x00FF0000 : 0; + ge |= Common::Bit<17>(imm) ? 0x0000FF00 : 0; + ge |= Common::Bit<16>(imm) ? 0x000000FF : 0; + code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], ge); + } else if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 b = reg_alloc.ScratchGpr().cvt32(); + + code->mov(b, 0x01010101); + code->shr(a, 16); + code->pdep(a, a, b); + code->imul(a, a, 0xFF); + code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], a); + } else { + Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + + code->shr(a, 16); + code->and_(a, 0xF); + code->imul(a, a, 0x00204081); + code->and_(a, 0x01010101); + code->imul(a, a, 0xFF); + code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], a); + } +} + +void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); auto& arg = args[0]; @@ -414,8 +464,6 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* ins } void EmitX64::EmitCallSupervisor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - reg_alloc.HostCall(nullptr); code->SwitchMxcsrOnExit(); @@ -458,16 +506,12 @@ void EmitX64::EmitSetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, dword[r15 + offsetof(JitState, FPSCR_nzcv)]); reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - auto args = reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 value = reg_alloc.UseGpr(args[0]).cvt32(); @@ -2140,7 +2184,6 @@ void EmitX64::EmitPackedSelect(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) } static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { - using namespace Xbyak::util; Xbyak::Label end; // We need to report back whether we've found a denormal on input. @@ -2157,7 +2200,6 @@ static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R } static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { - using namespace Xbyak::util; Xbyak::Label end; auto mask = code->MConst(f64_non_sign_mask); @@ -2176,7 +2218,6 @@ static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R } static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { - using namespace Xbyak::util; Xbyak::Label end; code->movd(gpr_scratch, xmm_value); @@ -2190,7 +2231,6 @@ static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 } static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { - using namespace Xbyak::util; Xbyak::Label end; auto mask = code->MConst(f64_non_sign_mask); @@ -2430,7 +2470,6 @@ static void SetFpscrNzcvFromFlags(BlockOfCode* code, RegAlloc& reg_alloc) { reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl Xbyak::Reg32 nzcv = reg_alloc.ScratchGpr().cvt32(); - using namespace Xbyak::util; code->mov(nzcv, 0x28630000); code->sete(cl); @@ -2749,14 +2788,10 @@ void EmitX64::EmitFPU32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) void EmitX64::EmitClearExclusive(RegAlloc&, IR::Block&, IR::Inst*) { - using namespace Xbyak::util; - code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0)); } void EmitX64::EmitSetExclusive(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; - auto args = reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); Xbyak::Reg32 address = reg_alloc.UseGpr(args[0]).cvt32(); @@ -2775,7 +2810,6 @@ static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, U return; } - using namespace Xbyak::util; reg_alloc.UseScratch(args[0], ABI_PARAM1); @@ -2829,7 +2863,6 @@ static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, return; } - using namespace Xbyak::util; reg_alloc.ScratchGpr({ABI_RETURN}); reg_alloc.UseScratch(args[0], ABI_PARAM1); @@ -2916,7 +2949,6 @@ static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* ins Xbyak::Reg32 passed = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers. - using namespace Xbyak::util; Xbyak::Label end; code->mov(passed, u32(1)); @@ -3219,18 +3251,15 @@ void EmitX64::EmitCoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in } void EmitX64::EmitAddCycles(size_t cycles) { - using namespace Xbyak::util; ASSERT(cycles < std::numeric_limits::max()); code->sub(qword[r15 + offsetof(JitState, cycles_remaining)], static_cast(cycles)); } static Xbyak::Label EmitCond(BlockOfCode* code, Arm::Cond cond) { - using namespace Xbyak::util; - Xbyak::Label label; const Xbyak::Reg32 cpsr = eax; - code->mov(cpsr, MJitStateCpsr_other()); + code->mov(cpsr, dword[r15 + offsetof(JitState, CPSR_nzcv)]); constexpr size_t n_shift = 31; constexpr size_t z_shift = 30; @@ -3383,8 +3412,6 @@ static u32 CalculateCpsr_et(const IR::LocationDescriptor& desc) { } void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; - if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) { code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next)); } @@ -3410,8 +3437,6 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor } void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; - if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) { code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next)); } @@ -3425,8 +3450,6 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescrip } void EmitX64::EmitTerminal(IR::Term::PopRSBHint, IR::LocationDescriptor) { - using namespace Xbyak::util; - // This calculation has to match up with IREmitter::PushRSB // TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et. code->mov(ecx, MJitStateReg(Arm::Reg::PC)); @@ -3453,8 +3476,6 @@ void EmitX64::EmitTerminal(IR::Term::If terminal, IR::LocationDescriptor initial } void EmitX64::EmitTerminal(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; - code->cmp(code->byte[r15 + offsetof(JitState, halt_requested)], u8(0)); code->jne(code->GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location); diff --git a/src/backend_x64/jitstate.cpp b/src/backend_x64/jitstate.cpp index 9898a3d8..7101c5ce 100644 --- a/src/backend_x64/jitstate.cpp +++ b/src/backend_x64/jitstate.cpp @@ -45,8 +45,15 @@ namespace BackendX64 { */ u32 JitState::Cpsr() const { + ASSERT((CPSR_nzcv & ~0xF0000000) == 0); + ASSERT((CPSR_q & ~1) == 0); + ASSERT((CPSR_et & ~3) == 0); + ASSERT((CPSR_jaifm & ~0x010001DF) == 0); + u32 cpsr = 0; + // NZCV flags + cpsr |= CPSR_nzcv; // Q flag cpsr |= CPSR_q ? 1 << 27 : 0; // GE flags @@ -58,12 +65,14 @@ u32 JitState::Cpsr() const { cpsr |= Common::Bit<1>(CPSR_et) ? 1 << 9 : 0; cpsr |= Common::Bit<0>(CPSR_et) ? 1 << 5 : 0; // Other flags - cpsr |= CPSR_other; + cpsr |= CPSR_jaifm; return cpsr; } void JitState::SetCpsr(u32 cpsr) { + // NZCV flags + CPSR_nzcv = cpsr & 0xF0000000; // Q flag CPSR_q = Common::Bit<27>(cpsr) ? 1 : 0; // GE flags @@ -77,7 +86,7 @@ void JitState::SetCpsr(u32 cpsr) { CPSR_et |= Common::Bit<9>(cpsr) ? 2 : 0; CPSR_et |= Common::Bit<5>(cpsr) ? 1 : 0; // Other flags - CPSR_other = cpsr & 0xF7F0FDDF; + CPSR_jaifm = cpsr & 0x07F0FDDF; } void JitState::ResetRSB() { diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h index 3695b8e8..2916c26d 100644 --- a/src/backend_x64/jitstate.h +++ b/src/backend_x64/jitstate.h @@ -28,10 +28,12 @@ struct JitState { std::array Reg{}; // Current register file. // TODO: Mode-specific register sets unimplemented. - u32 CPSR_other = 0; u32 CPSR_ge = 0; u32 CPSR_et = 0; u32 CPSR_q = 0; + u32 CPSR_nzcv = 0; + u32 CPSR_jaifm = 0; + u32 Cpsr() const; void SetCpsr(u32 cpsr); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 5c8ad05e..7250e5b8 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -117,6 +117,14 @@ void IREmitter::SetCpsr(const Value& value) { Inst(Opcode::SetCpsr, {value}); } +void IREmitter::SetCpsrNZCV(const Value& value) { + Inst(Opcode::SetCpsrNZCV, {value}); +} + +void IREmitter::SetCpsrNZCVQ(const Value& value) { + Inst(Opcode::SetCpsrNZCVQ, {value}); +} + Value IREmitter::GetCFlag() { return Inst(Opcode::GetCFlag, {}); } @@ -149,6 +157,10 @@ void IREmitter::SetGEFlags(const Value& value) { Inst(Opcode::SetGEFlags, {value}); } +void IREmitter::SetGEFlagsCompressed(const Value& value) { + Inst(Opcode::SetGEFlagsCompressed, {value}); +} + Value IREmitter::GetFpscr() { return Inst(Opcode::GetFpscr, {}); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index a6391bd3..2bda008d 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -84,6 +84,8 @@ public: Value GetCpsr(); void SetCpsr(const Value& value); + void SetCpsrNZCV(const Value& value); + void SetCpsrNZCVQ(const Value& value); Value GetCFlag(); void SetNFlag(const Value& value); void SetZFlag(const Value& value); @@ -92,6 +94,7 @@ public: void OrQFlag(const Value& value); Value GetGEFlags(); void SetGEFlags(const Value& value); + void SetGEFlagsCompressed(const Value& value); Value GetFpscr(); void SetFpscr(const Value& new_fpscr); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 7f7a42a8..9882752a 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -112,12 +112,15 @@ bool Inst::ReadsFromCPSR() const { bool Inst::WritesToCPSR() const { switch (op) { case Opcode::SetCpsr: + case Opcode::SetCpsrNZCV: + case Opcode::SetCpsrNZCVQ: case Opcode::SetNFlag: case Opcode::SetZFlag: case Opcode::SetCFlag: case Opcode::SetVFlag: case Opcode::OrQFlag: case Opcode::SetGEFlags: + case Opcode::SetGEFlagsCompressed: return true; default: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index dc1b594d..6c41782e 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -13,6 +13,8 @@ OPCODE(SetExtendedRegister32, T::Void, T::ExtRegRef, T::F32 OPCODE(SetExtendedRegister64, T::Void, T::ExtRegRef, T::F64 ) OPCODE(GetCpsr, T::U32, ) OPCODE(SetCpsr, T::Void, T::U32 ) +OPCODE(SetCpsrNZCV, T::Void, T::U32 ) +OPCODE(SetCpsrNZCVQ, T::Void, T::U32 ) OPCODE(GetNFlag, T::U1, ) OPCODE(SetNFlag, T::Void, T::U1 ) OPCODE(GetZFlag, T::U1, ) @@ -24,6 +26,7 @@ OPCODE(SetVFlag, T::Void, T::U1 OPCODE(OrQFlag, T::Void, T::U1 ) OPCODE(GetGEFlags, T::U32, ) OPCODE(SetGEFlags, T::Void, T::U32 ) +OPCODE(SetGEFlagsCompressed, T::Void, T::U32 ) OPCODE(BXWritePC, T::Void, T::U32 ) OPCODE(CallSupervisor, T::Void, T::U32 ) OPCODE(GetFpscr, T::U32, ) diff --git a/src/frontend/translate/translate_arm/coprocessor.cpp b/src/frontend/translate/translate_arm/coprocessor.cpp index 60728e69..5f5e7a09 100644 --- a/src/frontend/translate/translate_arm/coprocessor.cpp +++ b/src/frontend/translate/translate_arm/coprocessor.cpp @@ -92,9 +92,8 @@ bool ArmTranslatorVisitor::arm_MRC(Cond cond, size_t opc1, CoprocReg CRn, Reg t, if (t != Reg::PC) { ir.SetRegister(t, word); } else { - auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(0x0FFFFFFF)); auto new_cpsr_nzcv = ir.And(word, ir.Imm32(0xF0000000)); - ir.SetCpsr(ir.Or(old_cpsr, new_cpsr_nzcv)); + ir.SetCpsrNZCV(new_cpsr_nzcv); } } return true; diff --git a/src/frontend/translate/translate_arm/status_register_access.cpp b/src/frontend/translate/translate_arm/status_register_access.cpp index f737f5eb..327b31c5 100644 --- a/src/frontend/translate/translate_arm/status_register_access.cpp +++ b/src/frontend/translate/translate_arm/status_register_access.cpp @@ -6,6 +6,8 @@ #include "translate_arm.h" +#include "common/bit_util.h" + namespace Dynarmic { namespace Arm { @@ -30,14 +32,12 @@ bool ArmTranslatorVisitor::arm_MSR_imm(Cond cond, int mask, int rotate, Imm8 imm ASSERT_MSG(write_nzcvq || write_g, "Decode error"); // MSR , # if (ConditionPassed(cond)) { - u32 cpsr_mask = 0; - if (write_nzcvq) - cpsr_mask |= 0xF8000000; - if (write_g) - cpsr_mask |= 0x000F0000; - auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(~cpsr_mask)); - auto new_cpsr = ir.Imm32(imm32 & cpsr_mask); - ir.SetCpsr(ir.Or(old_cpsr, new_cpsr)); + if (write_nzcvq) { + ir.SetCpsrNZCVQ(ir.Imm32(imm32 & 0xF8000000)); + } + if (write_g) { + ir.SetGEFlagsCompressed(ir.Imm32(imm32 & 0x000F0000)); + } } return true; } @@ -51,14 +51,13 @@ bool ArmTranslatorVisitor::arm_MSR_reg(Cond cond, int mask, Reg n) { return UnpredictableInstruction(); // MSR , # if (ConditionPassed(cond)) { - u32 cpsr_mask = 0; - if (write_nzcvq) - cpsr_mask |= 0xF8000000; - if (write_g) - cpsr_mask |= 0x000F0000; - auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(~cpsr_mask)); - auto new_cpsr = ir.And(ir.GetRegister(n), ir.Imm32(cpsr_mask)); - ir.SetCpsr(ir.Or(old_cpsr, new_cpsr)); + auto value = ir.GetRegister(n); + if (write_nzcvq){ + ir.SetCpsrNZCVQ(ir.And(value, ir.Imm32(0xF8000000))); + } + if (write_g){ + ir.SetGEFlagsCompressed(ir.And(value, ir.Imm32(0x000F0000))); + } } return true; } diff --git a/src/frontend/translate/translate_arm/vfp2.cpp b/src/frontend/translate/translate_arm/vfp2.cpp index 1f6d3285..4fa36606 100644 --- a/src/frontend/translate/translate_arm/vfp2.cpp +++ b/src/frontend/translate/translate_arm/vfp2.cpp @@ -543,8 +543,7 @@ bool ArmTranslatorVisitor::vfp2_VMRS(Cond cond, Reg t) { if (t == Reg::R15) { // This encodes ASPR_nzcv access auto nzcv = ir.GetFpscrNZCV(); - auto old_cpsr = ir.And(ir.GetCpsr(), ir.Imm32(0x0FFFFFFF)); - ir.SetCpsr(ir.Or(nzcv, old_cpsr)); + ir.SetCpsrNZCV(nzcv); } else { ir.SetRegister(t, ir.GetFpscr()); } diff --git a/tests/arm/fuzz_arm.cpp b/tests/arm/fuzz_arm.cpp index 2458d947..74547148 100644 --- a/tests/arm/fuzz_arm.cpp +++ b/tests/arm/fuzz_arm.cpp @@ -196,7 +196,7 @@ static bool DoesBehaviorMatch(const ARMul_State& interp, const Dynarmic::Jit& ji return interp.Reg == jit.Regs() && interp.ExtReg == jit.ExtRegs() && interp.Cpsr == jit.Cpsr() - && interp.VFP[VFP_FPSCR] == jit.Fpscr() + //&& interp.VFP[VFP_FPSCR] == jit.Fpscr() && interp_write_records == jit_write_records; } @@ -1155,6 +1155,38 @@ TEST_CASE("Test ARM misc instructions", "[JitX64]") { } } +TEST_CASE("Test ARM MSR instructions", "[JitX64]") { + const auto is_msr_valid = [](u32 instr) -> bool { + return Bits<18, 19>(instr) != 0; + }; + + const auto is_msr_reg_valid = [&is_msr_valid](u32 instr) -> bool { + return is_msr_valid(instr) && Bits<0, 3>(instr) != 15; + }; + + const auto is_mrs_valid = [&](u32 inst) -> bool { + return Bits<12, 15>(inst) != 15; + }; + + const std::array instructions = {{ + InstructionGenerator("cccc00110010mm001111rrrrvvvvvvvv", is_msr_valid), // MSR (imm) + InstructionGenerator("cccc00010010mm00111100000000nnnn", is_msr_reg_valid), // MSR (reg) + InstructionGenerator("cccc000100001111dddd000000000000", is_mrs_valid), // MRS + }}; + + SECTION("Ones") { + FuzzJitArm(1, 2, 10000, [&instructions]() -> u32 { + return instructions[RandInt(0, instructions.size() - 1)].Generate(); + }); + } + + SECTION("Fives") { + FuzzJitArm(5, 6, 10000, [&instructions]() -> u32 { + return instructions[RandInt(0, instructions.size() - 1)].Generate(); + }); + } +} + TEST_CASE("Fuzz ARM saturated add/sub instructions", "[JitX64]") { auto is_valid = [](u32 inst) -> bool { // R15 as Rd, Rn, or Rm is UNPREDICTABLE diff --git a/tests/arm/fuzz_thumb.cpp b/tests/arm/fuzz_thumb.cpp index e4305c7b..519ce013 100644 --- a/tests/arm/fuzz_thumb.cpp +++ b/tests/arm/fuzz_thumb.cpp @@ -258,11 +258,17 @@ void FuzzJitThumb(const size_t instruction_count, const size_t instructions_to_e Dynarmic::Arm::PSR cpsr; cpsr.T(true); - Dynarmic::IR::Block ir_block = Dynarmic::Arm::Translate({0, cpsr, Dynarmic::Arm::FPSCR{}}, MemoryReadCode); - Dynarmic::Optimization::GetSetElimination(ir_block); - Dynarmic::Optimization::DeadCodeElimination(ir_block); - Dynarmic::Optimization::VerificationPass(ir_block); - printf("\n\nIR:\n%s", Dynarmic::IR::DumpBlock(ir_block).c_str()); + size_t num_insts = 0; + while (num_insts < instructions_to_execute_count) { + Dynarmic::IR::LocationDescriptor descriptor = {u32(num_insts * 4), cpsr, Dynarmic::Arm::FPSCR{}}; + Dynarmic::IR::Block ir_block = Dynarmic::Arm::Translate(descriptor, &MemoryReadCode); + Dynarmic::Optimization::GetSetElimination(ir_block); + Dynarmic::Optimization::DeadCodeElimination(ir_block); + Dynarmic::Optimization::VerificationPass(ir_block); + printf("\n\nIR:\n%s", Dynarmic::IR::DumpBlock(ir_block).c_str()); + printf("\n\nx86_64:\n%s", jit.Disassemble(descriptor).c_str()); + num_insts += ir_block.CycleCount(); + } #ifdef _MSC_VER __debugbreak();