diff --git a/src/backend/x64/a32_emit_x64.cpp b/src/backend/x64/a32_emit_x64.cpp index 64dc5f9c..6010a798 100644 --- a/src/backend/x64/a32_emit_x64.cpp +++ b/src/backend/x64/a32_emit_x64.cpp @@ -19,6 +19,7 @@ #include "backend/x64/block_of_code.h" #include "backend/x64/devirtualize.h" #include "backend/x64/emit_x64.h" +#include "backend/x64/nzcv_util.h" #include "backend/x64/perf_map.h" #include "common/assert.h" #include "common/bit_util.h" @@ -307,6 +308,12 @@ void A32EmitX64::GenTerminalHandlers() { } } +void A32EmitX64::EmitA32SetCheckBit(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); + code.mov(code.byte[r15 + offsetof(A32JitState, check_bit)], to_store); +} + void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) { const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -384,6 +391,7 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32(); // Here we observe that cpsr_et and cpsr_ge are right next to each other in memory, // so we load them both at the same time with one 64-bit read. This allows us to @@ -397,7 +405,11 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_q)]); code.shl(tmp, 27); code.or_(result, tmp); - code.or_(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); + code.mov(tmp2, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); + code.mov(tmp, NZCV::x64_mask); + code.pext(tmp2, tmp2, tmp); + code.shl(tmp2, 28); + code.or_(result, tmp2); code.or_(result, dword[r15 + offsetof(A32JitState, cpsr_jaifm)]); ctx.reg_alloc.DefineValue(inst, result); @@ -430,7 +442,9 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // cpsr_nzcv code.mov(tmp, cpsr); - code.and_(tmp, 0xF0000000); + code.shr(tmp, 28); + code.mov(tmp2, NZCV::x64_mask); + code.pdep(tmp, tmp, tmp2); code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], tmp); // cpsr_jaifm @@ -470,11 +484,21 @@ void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsImmediate()) { const u32 imm = args[0].GetImmediateU32(); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], u32(imm & 0xF0000000)); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); + } else if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); + + code.shr(a, 28); + code.mov(b, NZCV::x64_mask); + code.pdep(a, a, b); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); } else { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code.and_(a, 0xF0000000); + code.shr(a, 28); + code.imul(a, a, NZCV::to_x64_multiplier); + code.and_(a, NZCV::x64_mask); code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); } } @@ -484,129 +508,90 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsImmediate()) { const u32 imm = args[0].GetImmediateU32(); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], u32(imm & 0xF0000000)); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); + } else if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); + + code.shr(a, 28); + code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]); + code.mov(b, NZCV::x64_mask); + code.pdep(a, a, b); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); } else { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code.bt(a, 27); + code.shr(a, 28); code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]); - code.and_(a, 0xF0000000); + code.imul(a, a, NZCV::to_x64_multiplier); + code.and_(a, NZCV::x64_mask); code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); } } -void A32EmitX64::EmitA32GetNFlag(A32EmitContext& ctx, IR::Inst* inst) { +static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); - code.shr(result, 31); + if (flag_bit != 0) { + code.shr(result, static_cast(flag_bit)); + } + code.and_(result, 1); ctx.reg_alloc.DefineValue(inst, result); } +static void EmitSetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) { + const u32 flag_mask = 1u << flag_bit; + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { + code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], flag_mask); + } else { + code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask); + } + } else { + const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + + if (flag_bit != 0) { + code.shl(to_store, static_cast(flag_bit)); + code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask); + code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store); + } else { + code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store.cvt8()); + } + } +} + +void A32EmitX64::EmitA32GetNFlag(A32EmitContext& ctx, IR::Inst* inst) { + EmitGetFlag(code, ctx, inst, NZCV::x64_n_flag_bit); +} + void A32EmitX64::EmitA32SetNFlag(A32EmitContext& ctx, IR::Inst* inst) { - constexpr size_t flag_bit = 31; - constexpr u32 flag_mask = 1u << flag_bit; - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (args[0].IsImmediate()) { - if (args[0].GetImmediateU1()) { - code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], flag_mask); - } else { - code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask); - } - } else { - const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - - code.shl(to_store, flag_bit); - code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask); - code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store); - } + EmitSetFlag(code, ctx, inst, NZCV::x64_n_flag_bit); } void A32EmitX64::EmitA32GetZFlag(A32EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); - code.shr(result, 30); - code.and_(result, 1); - ctx.reg_alloc.DefineValue(inst, result); + EmitGetFlag(code, ctx, inst, NZCV::x64_z_flag_bit); } void A32EmitX64::EmitA32SetZFlag(A32EmitContext& ctx, IR::Inst* inst) { - constexpr size_t flag_bit = 30; - constexpr u32 flag_mask = 1u << flag_bit; - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (args[0].IsImmediate()) { - if (args[0].GetImmediateU1()) { - code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], flag_mask); - } else { - code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask); - } - } else { - const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - - code.shl(to_store, flag_bit); - code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask); - code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store); - } -} - -void A32EmitX64::EmitA32SetCheckBit(A32EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); - code.mov(code.byte[r15 + offsetof(A32JitState, check_bit)], to_store); + EmitSetFlag(code, ctx, inst, NZCV::x64_z_flag_bit); } void A32EmitX64::EmitA32GetCFlag(A32EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); - code.shr(result, 29); - code.and_(result, 1); - ctx.reg_alloc.DefineValue(inst, result); + EmitGetFlag(code, ctx, inst, NZCV::x64_c_flag_bit); } void A32EmitX64::EmitA32SetCFlag(A32EmitContext& ctx, IR::Inst* inst) { - constexpr size_t flag_bit = 29; - constexpr u32 flag_mask = 1u << flag_bit; - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (args[0].IsImmediate()) { - if (args[0].GetImmediateU1()) { - code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], flag_mask); - } else { - code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask); - } - } else { - const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - - code.shl(to_store, flag_bit); - code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask); - code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store); - } + EmitSetFlag(code, ctx, inst, NZCV::x64_c_flag_bit); } void A32EmitX64::EmitA32GetVFlag(A32EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); - code.shr(result, 28); - code.and_(result, 1); - ctx.reg_alloc.DefineValue(inst, result); + EmitGetFlag(code, ctx, inst, NZCV::x64_v_flag_bit); } void A32EmitX64::EmitA32SetVFlag(A32EmitContext& ctx, IR::Inst* inst) { - constexpr size_t flag_bit = 28; - constexpr u32 flag_mask = 1u << flag_bit; - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (args[0].IsImmediate()) { - if (args[0].GetImmediateU1()) { - code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], flag_mask); - } else { - code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask); - } - } else { - const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - - code.shl(to_store, flag_bit); - code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask); - code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store); - } + EmitSetFlag(code, ctx, inst, NZCV::x64_v_flag_bit); } void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) { diff --git a/src/backend/x64/a32_jitstate.cpp b/src/backend/x64/a32_jitstate.cpp index c996b4fd..27db4b48 100644 --- a/src/backend/x64/a32_jitstate.cpp +++ b/src/backend/x64/a32_jitstate.cpp @@ -5,6 +5,7 @@ #include "backend/x64/a32_jitstate.h" #include "backend/x64/block_of_code.h" +#include "backend/x64/nzcv_util.h" #include "common/assert.h" #include "common/bit_util.h" #include "common/common_types.h" @@ -45,14 +46,14 @@ namespace Dynarmic::Backend::X64 { */ u32 A32JitState::Cpsr() const { - DEBUG_ASSERT((cpsr_nzcv & ~0xF0000000) == 0); + DEBUG_ASSERT((cpsr_nzcv & ~NZCV::x64_mask) == 0); DEBUG_ASSERT((cpsr_q & ~1) == 0); DEBUG_ASSERT((cpsr_jaifm & ~0x010001DF) == 0); u32 cpsr = 0; // NZCV flags - cpsr |= cpsr_nzcv; + cpsr |= NZCV::FromX64(cpsr_nzcv); // Q flag cpsr |= cpsr_q ? 1 << 27 : 0; // GE flags @@ -74,7 +75,7 @@ u32 A32JitState::Cpsr() const { void A32JitState::SetCpsr(u32 cpsr) { // NZCV flags - cpsr_nzcv = cpsr & 0xF0000000; + cpsr_nzcv = NZCV::ToX64(cpsr); // Q flag cpsr_q = Common::Bit<27>(cpsr) ? 1 : 0; // GE flags diff --git a/src/backend/x64/a64_emit_x64.cpp b/src/backend/x64/a64_emit_x64.cpp index 34fa7e54..a535ffbe 100644 --- a/src/backend/x64/a64_emit_x64.cpp +++ b/src/backend/x64/a64_emit_x64.cpp @@ -15,6 +15,7 @@ #include "backend/x64/block_of_code.h" #include "backend/x64/devirtualize.h" #include "backend/x64/emit_x64.h" +#include "backend/x64/nzcv_util.h" #include "backend/x64/perf_map.h" #include "common/assert.h" #include "common/bit_util.h" @@ -371,7 +372,7 @@ void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); code.mov(result, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]); - code.shr(result, 29); + code.shr(result, NZCV::x64_c_flag_bit); code.and_(result, 1); ctx.reg_alloc.DefineValue(inst, result); } @@ -380,6 +381,9 @@ void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32(); code.mov(nzcv_raw, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]); + code.and_(nzcv_raw, NZCV::x64_mask); + code.imul(nzcv_raw, nzcv_raw, NZCV::from_x64_multiplier); + code.and_(nzcv_raw, NZCV::arm_mask); ctx.reg_alloc.DefineValue(inst, nzcv_raw); } @@ -387,17 +391,15 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code.and_(nzcv_raw, 0xF0000000); + code.shr(nzcv_raw, 28); + code.imul(nzcv_raw, nzcv_raw, NZCV::to_x64_multiplier); + code.and_(nzcv_raw, NZCV::x64_mask); code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw); } void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code.and_(to_store, 0b11000001'00000001); - code.imul(to_store, to_store, 0b00010000'00100001); - code.shl(to_store, 16); - code.and_(to_store, 0xF0000000); code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], to_store); } diff --git a/src/backend/x64/a64_jitstate.h b/src/backend/x64/a64_jitstate.h index eb5574b6..e9ee6969 100644 --- a/src/backend/x64/a64_jitstate.h +++ b/src/backend/x64/a64_jitstate.h @@ -9,6 +9,7 @@ #include +#include "backend/x64/nzcv_util.h" #include "common/common_types.h" #include "frontend/A64/location_descriptor.h" @@ -33,10 +34,10 @@ struct A64JitState { u32 cpsr_nzcv = 0; u32 GetPstate() const { - return cpsr_nzcv; + return NZCV::FromX64(cpsr_nzcv); } void SetPstate(u32 new_pstate) { - cpsr_nzcv = new_pstate & 0xF0000000; + cpsr_nzcv = NZCV::ToX64(new_pstate); } alignas(16) std::array vec{}; // Extension registers. diff --git a/src/backend/x64/emit_x64.cpp b/src/backend/x64/emit_x64.cpp index 41a2b285..8a335a31 100644 --- a/src/backend/x64/emit_x64.cpp +++ b/src/backend/x64/emit_x64.cpp @@ -174,115 +174,82 @@ void EmitX64::EmitAddCycles(size_t cycles) { } Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { - Xbyak::Label label; + Xbyak::Label pass; - const Xbyak::Reg32 cpsr = eax; - code.mov(cpsr, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]); + code.mov(eax, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]); - constexpr size_t n_shift = 31; - constexpr size_t z_shift = 30; - constexpr size_t c_shift = 29; - constexpr size_t v_shift = 28; - constexpr u32 n_mask = 1u << n_shift; - constexpr u32 z_mask = 1u << z_shift; - constexpr u32 c_mask = 1u << c_shift; - constexpr u32 v_mask = 1u << v_shift; + // sahf restores SF, ZF, CF + // add al, 0x7F restores OF switch (cond) { case IR::Cond::EQ: //z - code.test(cpsr, z_mask); - code.jnz(label); + code.sahf(); + code.jz(pass); break; case IR::Cond::NE: //!z - code.test(cpsr, z_mask); - code.jz(label); + code.sahf(); + code.jnz(pass); break; case IR::Cond::CS: //c - code.test(cpsr, c_mask); - code.jnz(label); + code.sahf(); + code.jc(pass); break; case IR::Cond::CC: //!c - code.test(cpsr, c_mask); - code.jz(label); + code.sahf(); + code.jnc(pass); break; case IR::Cond::MI: //n - code.test(cpsr, n_mask); - code.jnz(label); + code.sahf(); + code.js(pass); break; case IR::Cond::PL: //!n - code.test(cpsr, n_mask); - code.jz(label); + code.sahf(); + code.jns(pass); break; case IR::Cond::VS: //v - code.test(cpsr, v_mask); - code.jnz(label); + code.add(al, 0x7F); + code.jo(pass); break; case IR::Cond::VC: //!v - code.test(cpsr, v_mask); - code.jz(label); + code.add(al, 0x7F); + code.jno(pass); break; - case IR::Cond::HI: { //c & !z - code.and_(cpsr, z_mask | c_mask); - code.cmp(cpsr, c_mask); - code.je(label); + case IR::Cond::HI: //c & !z + code.sahf(); + code.cmc(); + code.ja(pass); break; - } - case IR::Cond::LS: { //!c | z - code.and_(cpsr, z_mask | c_mask); - code.cmp(cpsr, c_mask); - code.jne(label); + case IR::Cond::LS: //!c | z + code.sahf(); + code.cmc(); + code.jna(pass); break; - } - case IR::Cond::GE: { // n == v - code.and_(cpsr, n_mask | v_mask); - code.jz(label); - code.cmp(cpsr, n_mask | v_mask); - code.je(label); + case IR::Cond::GE: // n == v + code.add(al, 0x7F); + code.sahf(); + code.jge(pass); break; - } - case IR::Cond::LT: { // n != v - Xbyak::Label fail; - code.and_(cpsr, n_mask | v_mask); - code.jz(fail); - code.cmp(cpsr, n_mask | v_mask); - code.jne(label); - code.L(fail); + case IR::Cond::LT: // n != v + code.add(al, 0x7F); + code.sahf(); + code.jl(pass); break; - } - case IR::Cond::GT: { // !z & (n == v) - const Xbyak::Reg32 tmp1 = ebx; - const Xbyak::Reg32 tmp2 = esi; - code.mov(tmp1, cpsr); - code.mov(tmp2, cpsr); - code.shr(tmp1, n_shift); - code.shr(tmp2, v_shift); - code.shr(cpsr, z_shift); - code.xor_(tmp1, tmp2); - code.or_(tmp1, cpsr); - code.test(tmp1, 1); - code.jz(label); + case IR::Cond::GT: // !z & (n == v) + code.add(al, 0x7F); + code.sahf(); + code.jg(pass); break; - } - case IR::Cond::LE: { // z | (n != v) - const Xbyak::Reg32 tmp1 = ebx; - const Xbyak::Reg32 tmp2 = esi; - code.mov(tmp1, cpsr); - code.mov(tmp2, cpsr); - code.shr(tmp1, n_shift); - code.shr(tmp2, v_shift); - code.shr(cpsr, z_shift); - code.xor_(tmp1, tmp2); - code.or_(tmp1, cpsr); - code.test(tmp1, 1); - code.jnz(label); + case IR::Cond::LE: // z | (n != v) + code.add(al, 0x7F); + code.sahf(); + code.jle(pass); break; - } default: ASSERT_MSG(false, "Unknown cond {}", static_cast(cond)); break; } - return label; + return pass; } EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) { diff --git a/src/backend/x64/emit_x64_data_processing.cpp b/src/backend/x64/emit_x64_data_processing.cpp index 06bd289b..bb19483f 100644 --- a/src/backend/x64/emit_x64_data_processing.cpp +++ b/src/backend/x64/emit_x64_data_processing.cpp @@ -126,56 +126,71 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst* const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize); code.mov(nzcv, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]); - // TODO: Flag optimization - code.shr(nzcv, 28); - code.imul(nzcv, nzcv, 0b00010000'10000001); - code.and_(nzcv.cvt8(), 1); - code.add(nzcv.cvt8(), 0x7F); // restore OF - code.sahf(); // restore SF, ZF, CF + + // sahf restores SF, ZF, CF + // add al, 0x7F restores OF switch (args[0].GetImmediateCond()) { case IR::Cond::EQ: //z + code.sahf(); code.cmovz(else_, then_); break; case IR::Cond::NE: //!z + code.sahf(); code.cmovnz(else_, then_); break; case IR::Cond::CS: //c + code.sahf(); code.cmovc(else_, then_); break; case IR::Cond::CC: //!c + code.sahf(); code.cmovnc(else_, then_); break; case IR::Cond::MI: //n + code.sahf(); code.cmovs(else_, then_); break; case IR::Cond::PL: //!n + code.sahf(); code.cmovns(else_, then_); break; case IR::Cond::VS: //v + code.add(nzcv.cvt8(), 0x7F); code.cmovo(else_, then_); break; case IR::Cond::VC: //!v + code.add(nzcv.cvt8(), 0x7F); code.cmovno(else_, then_); break; case IR::Cond::HI: //c & !z + code.sahf(); code.cmc(); code.cmova(else_, then_); break; case IR::Cond::LS: //!c | z + code.sahf(); code.cmc(); code.cmovna(else_, then_); break; case IR::Cond::GE: // n == v + code.add(nzcv.cvt8(), 0x7F); + code.sahf(); code.cmovge(else_, then_); break; case IR::Cond::LT: // n != v + code.add(nzcv.cvt8(), 0x7F); + code.sahf(); code.cmovl(else_, then_); break; case IR::Cond::GT: // !z & (n == v) + code.add(nzcv.cvt8(), 0x7F); + code.sahf(); code.cmovg(else_, then_); break; case IR::Cond::LE: // z | (n != v) + code.add(nzcv.cvt8(), 0x7F); + code.sahf(); code.cmovle(else_, then_); break; case IR::Cond::AL: diff --git a/src/backend/x64/nzcv_util.h b/src/backend/x64/nzcv_util.h new file mode 100644 index 00000000..3715daa0 --- /dev/null +++ b/src/backend/x64/nzcv_util.h @@ -0,0 +1,53 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include "common/common_types.h" +#include "common/bit_util.h" + +namespace Dynarmic::Backend::X64::NZCV { + +constexpr u32 arm_mask = 0xF000'0000; +constexpr u32 x64_mask = 0xC101; + +constexpr size_t x64_n_flag_bit = 15; +constexpr size_t x64_z_flag_bit = 14; +constexpr size_t x64_c_flag_bit = 8; +constexpr size_t x64_v_flag_bit = 0; + +/// This is a constant used to create the x64 flags format from the ARM format. +/// NZCV * multiplier: NZCV0NZCV000NZCV +/// x64_flags format: NZ-----C-------V +constexpr u32 to_x64_multiplier = 0x1081; + +/// This is a constant used to create the ARM format from the x64 flags format. +constexpr u32 from_x64_multiplier = 0x1021'0000; + +inline u32 ToX64(u32 nzcv) { + /* Naive implementation: + u32 x64_flags = 0; + x64_flags |= Common::Bit<31>(cpsr) ? 1 << 15 : 0; + x64_flags |= Common::Bit<30>(cpsr) ? 1 << 14 : 0; + x64_flags |= Common::Bit<29>(cpsr) ? 1 << 8 : 0; + x64_flags |= Common::Bit<28>(cpsr) ? 1 : 0; + return x64_flags; + */ + return ((nzcv >> 28) * to_x64_multiplier) & x64_mask; +} + +inline u32 FromX64(u32 x64_flags) { + /* Naive implementation: + u32 nzcv = 0; + nzcv |= Common::Bit<15>(x64_flags) ? 1 << 31 : 0; + nzcv |= Common::Bit<14>(x64_flags) ? 1 << 30 : 0; + nzcv |= Common::Bit<8>(x64_flags) ? 1 << 29 : 0; + nzcv |= Common::Bit<0>(x64_flags) ? 1 << 28 : 0; + return nzcv; + */ + return ((x64_flags & x64_mask) * from_x64_multiplier) & arm_mask; +} + +} // namespace Dynarmic::Backend::X64::NZCV