diff --git a/src/backend_x64/a32_emit_x64.cpp b/src/backend_x64/a32_emit_x64.cpp index 5017f78f..40235912 100644 --- a/src/backend_x64/a32_emit_x64.cpp +++ b/src/backend_x64/a32_emit_x64.cpp @@ -629,7 +629,12 @@ void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 value = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + + code.and_(value, 0b11000001'00000001); + code.imul(value, value, 0b00010000'00100001); + code.shl(value, 16); + code.and_(value, 0xF0000000); code.mov(dword[r15 + offsetof(A32JitState, FPSCR_nzcv)], value); } diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index be823703..c890c625 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -281,16 +281,31 @@ void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) { FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd); } -static void SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) { +static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) { ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl - Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(); - code.mov(nzcv, 0x28630000); + // x64 flags ARM flags + // ZF PF CF NZCV + // Unordered 1 1 1 0011 + // Greater than 0 0 0 0010 + // Less than 0 0 1 1000 + // Equal 1 0 0 0110 + // + // Thus we can take use ZF:CF as an index into an array like so: + // x64 ARM ARM as x64 + // ZF:CF NZCV NZ-----C-------V + // 0 0010 0000000100000000 = 0x0100 + // 1 1000 1000000000000000 = 0x8000 + // 2 0110 0100000100000000 = 0x4100 + // 3 0011 0000000100000001 = 0x0101 + + code.mov(nzcv, 0x0101'4100'8000'0100); code.sete(cl); - code.rcl(cl, 3); - code.shl(nzcv, cl); - code.and_(nzcv, 0xF0000000); - code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_nzcv], nzcv); + code.rcl(cl, 5); // cl = ZF:CF:0000 + code.shr(nzcv, cl); + + return nzcv; } void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { @@ -305,7 +320,8 @@ void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { code.ucomiss(reg_a, reg_b); } - SetFpscrNzcvFromFlags(code, ctx); + Xbyak::Reg64 nzcv = SetFpscrNzcvFromFlags(code, ctx); + ctx.reg_alloc.DefineValue(inst, nzcv); } void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) { @@ -320,7 +336,8 @@ void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) { code.ucomisd(reg_a, reg_b); } - SetFpscrNzcvFromFlags(code, ctx); + Xbyak::Reg64 nzcv = SetFpscrNzcvFromFlags(code, ctx); + ctx.reg_alloc.DefineValue(inst, nzcv); } void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/backend_x64/jitstate_info.h b/src/backend_x64/jitstate_info.h index 19b714ea..2c267bb6 100644 --- a/src/backend_x64/jitstate_info.h +++ b/src/backend_x64/jitstate_info.h @@ -24,7 +24,6 @@ struct JitStateInfo { , offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors)) , offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs)) , offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv)) - , offsetof_FPSCR_nzcv(offsetof(JitStateType, FPSCR_nzcv)) , offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC)) , offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC)) {} @@ -38,7 +37,6 @@ struct JitStateInfo { const size_t offsetof_rsb_location_descriptors; const size_t offsetof_rsb_codeptrs; const size_t offsetof_CPSR_nzcv; - const size_t offsetof_FPSCR_nzcv; const size_t offsetof_FPSCR_IDC; const size_t offsetof_FPSCR_UFC; }; diff --git a/src/frontend/A32/ir_emitter.cpp b/src/frontend/A32/ir_emitter.cpp index e1ac917f..5d435948 100644 --- a/src/frontend/A32/ir_emitter.cpp +++ b/src/frontend/A32/ir_emitter.cpp @@ -154,7 +154,7 @@ IR::U32 IREmitter::GetFpscrNZCV() { return Inst(Opcode::A32GetFpscrNZCV); } -void IREmitter::SetFpscrNZCV(const IR::U32& new_fpscr_nzcv) { +void IREmitter::SetFpscrNZCV(const IR::NZCV& new_fpscr_nzcv) { Inst(Opcode::A32SetFpscrNZCV, new_fpscr_nzcv); } diff --git a/src/frontend/A32/ir_emitter.h b/src/frontend/A32/ir_emitter.h index 88793372..e9ccc248 100644 --- a/src/frontend/A32/ir_emitter.h +++ b/src/frontend/A32/ir_emitter.h @@ -64,7 +64,7 @@ public: IR::U32 GetFpscr(); void SetFpscr(const IR::U32& new_fpscr); IR::U32 GetFpscrNZCV(); - void SetFpscrNZCV(const IR::U32& new_fpscr_nzcv); + void SetFpscrNZCV(const IR::NZCV& new_fpscr_nzcv); void ClearExclusive(); void SetExclusive(const IR::U32& vaddr, size_t byte_size); diff --git a/src/frontend/A32/translate/translate_arm/vfp2.cpp b/src/frontend/A32/translate/translate_arm/vfp2.cpp index 1e6fd2f1..fb1fd266 100644 --- a/src/frontend/A32/translate/translate_arm/vfp2.cpp +++ b/src/frontend/A32/translate/translate_arm/vfp2.cpp @@ -473,7 +473,8 @@ bool ArmTranslatorVisitor::vfp2_VCMP(Cond cond, bool D, size_t Vd, bool sz, bool if (ConditionPassed(cond)) { auto reg_d = ir.GetExtendedRegister(d); auto reg_m = ir.GetExtendedRegister(m); - ir.FPCompare(reg_d, reg_m, exc_on_qnan, true); + auto nzcv = ir.FPCompare(reg_d, reg_m, exc_on_qnan, true); + ir.SetFpscrNZCV(nzcv); } return true; } @@ -486,9 +487,11 @@ bool ArmTranslatorVisitor::vfp2_VCMP_zero(Cond cond, bool D, size_t Vd, bool sz, if (ConditionPassed(cond)) { auto reg_d = ir.GetExtendedRegister(d); if (sz) { - ir.FPCompare(reg_d, ir.Imm64(0), exc_on_qnan, true); + auto nzcv = ir.FPCompare(reg_d, ir.Imm64(0), exc_on_qnan, true); + ir.SetFpscrNZCV(nzcv); } else { - ir.FPCompare(reg_d, ir.Imm32(0), exc_on_qnan, true); + auto nzcv = ir.FPCompare(reg_d, ir.Imm32(0), exc_on_qnan, true); + ir.SetFpscrNZCV(nzcv); } } return true; diff --git a/src/frontend/A64/translate/impl/floating_point_compare.cpp b/src/frontend/A64/translate/impl/floating_point_compare.cpp index 6921d7a9..f8a73260 100644 --- a/src/frontend/A64/translate/impl/floating_point_compare.cpp +++ b/src/frontend/A64/translate/impl/floating_point_compare.cpp @@ -37,7 +37,8 @@ bool TranslatorVisitor::FCMP_float(Imm<2> type, Vec Vm, Vec Vn, bool cmp_with_ze operand2 = V_scalar(*datasize, Vm); } - ir.FPCompare(operand1, operand2, false, true); + auto nzcv = ir.FPCompare(operand1, operand2, false, true); + ir.SetNZCV(nzcv); return true; } @@ -55,7 +56,8 @@ bool TranslatorVisitor::FCMPE_float(Imm<2> type, Vec Vm, Vec Vn, bool cmp_with_z operand2 = V_scalar(*datasize, Vm); } - ir.FPCompare(operand1, operand2, true, true); + auto nzcv = ir.FPCompare(operand1, operand2, true, true); + ir.SetNZCV(nzcv); return true; } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 22a81ee1..8db9736f 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -907,13 +907,13 @@ U32U64 IREmitter::FPAdd(const U32U64& a, const U32U64& b, bool fpscr_controlled) } } -void IREmitter::FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled) { +NZCV IREmitter::FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled) { ASSERT(fpscr_controlled); ASSERT(a.GetType() == b.GetType()); if (a.GetType() == Type::U32) { - Inst(Opcode::FPCompare32, a, b, Imm1(exc_on_qnan)); + return Inst(Opcode::FPCompare32, a, b, Imm1(exc_on_qnan)); } else { - Inst(Opcode::FPCompare64, a, b, Imm1(exc_on_qnan)); + return Inst(Opcode::FPCompare64, a, b, Imm1(exc_on_qnan)); } } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index d35903f8..ecfd9c1b 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -236,7 +236,7 @@ public: U32U64 FPAbs(const U32U64& a); U32U64 FPAdd(const U32U64& a, const U32U64& b, bool fpscr_controlled); - void FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled); + NZCV FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled); U32U64 FPDiv(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32U64 FPNeg(const U32U64& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index c397d411..538f76e9 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -33,7 +33,7 @@ A32OPC(ExceptionRaised, T::Void, T::U32, T::U64 A32OPC(GetFpscr, T::U32, ) A32OPC(SetFpscr, T::Void, T::U32, ) A32OPC(GetFpscrNZCV, T::U32, ) -A32OPC(SetFpscrNZCV, T::Void, T::U32, ) +A32OPC(SetFpscrNZCV, T::Void, T::NZCVFlags ) // A64 Context getters/setters A64OPC(SetCheckBit, T::Void, T::U1 ) @@ -230,8 +230,8 @@ OPCODE(FPAbs32, T::U32, T::U32 OPCODE(FPAbs64, T::U64, T::U64 ) OPCODE(FPAdd32, T::U32, T::U32, T::U32 ) OPCODE(FPAdd64, T::U64, T::U64, T::U64 ) -OPCODE(FPCompare32, T::Void, T::U32, T::U32, T::U1 ) -OPCODE(FPCompare64, T::Void, T::U64, T::U64, T::U1 ) +OPCODE(FPCompare32, T::NZCVFlags, T::U32, T::U32, T::U1 ) +OPCODE(FPCompare64, T::NZCVFlags, T::U64, T::U64, T::U1 ) OPCODE(FPDiv32, T::U32, T::U32, T::U32 ) OPCODE(FPDiv64, T::U64, T::U64, T::U64 ) OPCODE(FPMul32, T::U32, T::U32, T::U32 )