IR: FPCompare{32,64} now return NZCV flags instead of implicitly setting them

This commit is contained in:
MerryMage 2018-02-05 12:16:01 +00:00
parent 2ee39d6b36
commit aac5af50e2
10 changed files with 51 additions and 26 deletions

View file

@ -629,7 +629,12 @@ void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[0]).cvt32(); Xbyak::Reg32 value = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
code.and_(value, 0b11000001'00000001);
code.imul(value, value, 0b00010000'00100001);
code.shl(value, 16);
code.and_(value, 0xF0000000);
code.mov(dword[r15 + offsetof(A32JitState, FPSCR_nzcv)], value); code.mov(dword[r15 + offsetof(A32JitState, FPSCR_nzcv)], value);
} }

View file

@ -281,16 +281,31 @@ void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd); FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd);
} }
static void SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) { static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl
Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr();
code.mov(nzcv, 0x28630000); // x64 flags ARM flags
// ZF PF CF NZCV
// Unordered 1 1 1 0011
// Greater than 0 0 0 0010
// Less than 0 0 1 1000
// Equal 1 0 0 0110
//
// Thus we can take use ZF:CF as an index into an array like so:
// x64 ARM ARM as x64
// ZF:CF NZCV NZ-----C-------V
// 0 0010 0000000100000000 = 0x0100
// 1 1000 1000000000000000 = 0x8000
// 2 0110 0100000100000000 = 0x4100
// 3 0011 0000000100000001 = 0x0101
code.mov(nzcv, 0x0101'4100'8000'0100);
code.sete(cl); code.sete(cl);
code.rcl(cl, 3); code.rcl(cl, 5); // cl = ZF:CF:0000
code.shl(nzcv, cl); code.shr(nzcv, cl);
code.and_(nzcv, 0xF0000000);
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_nzcv], nzcv); return nzcv;
} }
void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
@ -305,7 +320,8 @@ void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
code.ucomiss(reg_a, reg_b); code.ucomiss(reg_a, reg_b);
} }
SetFpscrNzcvFromFlags(code, ctx); Xbyak::Reg64 nzcv = SetFpscrNzcvFromFlags(code, ctx);
ctx.reg_alloc.DefineValue(inst, nzcv);
} }
void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
@ -320,7 +336,8 @@ void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
code.ucomisd(reg_a, reg_b); code.ucomisd(reg_a, reg_b);
} }
SetFpscrNzcvFromFlags(code, ctx); Xbyak::Reg64 nzcv = SetFpscrNzcvFromFlags(code, ctx);
ctx.reg_alloc.DefineValue(inst, nzcv);
} }
void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {

View file

@ -24,7 +24,6 @@ struct JitStateInfo {
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors)) , offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs)) , offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
, offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv)) , offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
, offsetof_FPSCR_nzcv(offsetof(JitStateType, FPSCR_nzcv))
, offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC)) , offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
, offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC)) , offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
{} {}
@ -38,7 +37,6 @@ struct JitStateInfo {
const size_t offsetof_rsb_location_descriptors; const size_t offsetof_rsb_location_descriptors;
const size_t offsetof_rsb_codeptrs; const size_t offsetof_rsb_codeptrs;
const size_t offsetof_CPSR_nzcv; const size_t offsetof_CPSR_nzcv;
const size_t offsetof_FPSCR_nzcv;
const size_t offsetof_FPSCR_IDC; const size_t offsetof_FPSCR_IDC;
const size_t offsetof_FPSCR_UFC; const size_t offsetof_FPSCR_UFC;
}; };

View file

@ -154,7 +154,7 @@ IR::U32 IREmitter::GetFpscrNZCV() {
return Inst<IR::U32>(Opcode::A32GetFpscrNZCV); return Inst<IR::U32>(Opcode::A32GetFpscrNZCV);
} }
void IREmitter::SetFpscrNZCV(const IR::U32& new_fpscr_nzcv) { void IREmitter::SetFpscrNZCV(const IR::NZCV& new_fpscr_nzcv) {
Inst(Opcode::A32SetFpscrNZCV, new_fpscr_nzcv); Inst(Opcode::A32SetFpscrNZCV, new_fpscr_nzcv);
} }

View file

@ -64,7 +64,7 @@ public:
IR::U32 GetFpscr(); IR::U32 GetFpscr();
void SetFpscr(const IR::U32& new_fpscr); void SetFpscr(const IR::U32& new_fpscr);
IR::U32 GetFpscrNZCV(); IR::U32 GetFpscrNZCV();
void SetFpscrNZCV(const IR::U32& new_fpscr_nzcv); void SetFpscrNZCV(const IR::NZCV& new_fpscr_nzcv);
void ClearExclusive(); void ClearExclusive();
void SetExclusive(const IR::U32& vaddr, size_t byte_size); void SetExclusive(const IR::U32& vaddr, size_t byte_size);

View file

@ -473,7 +473,8 @@ bool ArmTranslatorVisitor::vfp2_VCMP(Cond cond, bool D, size_t Vd, bool sz, bool
if (ConditionPassed(cond)) { if (ConditionPassed(cond)) {
auto reg_d = ir.GetExtendedRegister(d); auto reg_d = ir.GetExtendedRegister(d);
auto reg_m = ir.GetExtendedRegister(m); auto reg_m = ir.GetExtendedRegister(m);
ir.FPCompare(reg_d, reg_m, exc_on_qnan, true); auto nzcv = ir.FPCompare(reg_d, reg_m, exc_on_qnan, true);
ir.SetFpscrNZCV(nzcv);
} }
return true; return true;
} }
@ -486,9 +487,11 @@ bool ArmTranslatorVisitor::vfp2_VCMP_zero(Cond cond, bool D, size_t Vd, bool sz,
if (ConditionPassed(cond)) { if (ConditionPassed(cond)) {
auto reg_d = ir.GetExtendedRegister(d); auto reg_d = ir.GetExtendedRegister(d);
if (sz) { if (sz) {
ir.FPCompare(reg_d, ir.Imm64(0), exc_on_qnan, true); auto nzcv = ir.FPCompare(reg_d, ir.Imm64(0), exc_on_qnan, true);
ir.SetFpscrNZCV(nzcv);
} else { } else {
ir.FPCompare(reg_d, ir.Imm32(0), exc_on_qnan, true); auto nzcv = ir.FPCompare(reg_d, ir.Imm32(0), exc_on_qnan, true);
ir.SetFpscrNZCV(nzcv);
} }
} }
return true; return true;

View file

@ -37,7 +37,8 @@ bool TranslatorVisitor::FCMP_float(Imm<2> type, Vec Vm, Vec Vn, bool cmp_with_ze
operand2 = V_scalar(*datasize, Vm); operand2 = V_scalar(*datasize, Vm);
} }
ir.FPCompare(operand1, operand2, false, true); auto nzcv = ir.FPCompare(operand1, operand2, false, true);
ir.SetNZCV(nzcv);
return true; return true;
} }
@ -55,7 +56,8 @@ bool TranslatorVisitor::FCMPE_float(Imm<2> type, Vec Vm, Vec Vn, bool cmp_with_z
operand2 = V_scalar(*datasize, Vm); operand2 = V_scalar(*datasize, Vm);
} }
ir.FPCompare(operand1, operand2, true, true); auto nzcv = ir.FPCompare(operand1, operand2, true, true);
ir.SetNZCV(nzcv);
return true; return true;
} }

View file

@ -907,13 +907,13 @@ U32U64 IREmitter::FPAdd(const U32U64& a, const U32U64& b, bool fpscr_controlled)
} }
} }
void IREmitter::FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled) { NZCV IREmitter::FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled) {
ASSERT(fpscr_controlled); ASSERT(fpscr_controlled);
ASSERT(a.GetType() == b.GetType()); ASSERT(a.GetType() == b.GetType());
if (a.GetType() == Type::U32) { if (a.GetType() == Type::U32) {
Inst(Opcode::FPCompare32, a, b, Imm1(exc_on_qnan)); return Inst<NZCV>(Opcode::FPCompare32, a, b, Imm1(exc_on_qnan));
} else { } else {
Inst(Opcode::FPCompare64, a, b, Imm1(exc_on_qnan)); return Inst<NZCV>(Opcode::FPCompare64, a, b, Imm1(exc_on_qnan));
} }
} }

View file

@ -236,7 +236,7 @@ public:
U32U64 FPAbs(const U32U64& a); U32U64 FPAbs(const U32U64& a);
U32U64 FPAdd(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32U64 FPAdd(const U32U64& a, const U32U64& b, bool fpscr_controlled);
void FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled); NZCV FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled);
U32U64 FPDiv(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32U64 FPDiv(const U32U64& a, const U32U64& b, bool fpscr_controlled);
U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled);
U32U64 FPNeg(const U32U64& a); U32U64 FPNeg(const U32U64& a);

View file

@ -33,7 +33,7 @@ A32OPC(ExceptionRaised, T::Void, T::U32, T::U64
A32OPC(GetFpscr, T::U32, ) A32OPC(GetFpscr, T::U32, )
A32OPC(SetFpscr, T::Void, T::U32, ) A32OPC(SetFpscr, T::Void, T::U32, )
A32OPC(GetFpscrNZCV, T::U32, ) A32OPC(GetFpscrNZCV, T::U32, )
A32OPC(SetFpscrNZCV, T::Void, T::U32, ) A32OPC(SetFpscrNZCV, T::Void, T::NZCVFlags )
// A64 Context getters/setters // A64 Context getters/setters
A64OPC(SetCheckBit, T::Void, T::U1 ) A64OPC(SetCheckBit, T::Void, T::U1 )
@ -230,8 +230,8 @@ OPCODE(FPAbs32, T::U32, T::U32
OPCODE(FPAbs64, T::U64, T::U64 ) OPCODE(FPAbs64, T::U64, T::U64 )
OPCODE(FPAdd32, T::U32, T::U32, T::U32 ) OPCODE(FPAdd32, T::U32, T::U32, T::U32 )
OPCODE(FPAdd64, T::U64, T::U64, T::U64 ) OPCODE(FPAdd64, T::U64, T::U64, T::U64 )
OPCODE(FPCompare32, T::Void, T::U32, T::U32, T::U1 ) OPCODE(FPCompare32, T::NZCVFlags, T::U32, T::U32, T::U1 )
OPCODE(FPCompare64, T::Void, T::U64, T::U64, T::U1 ) OPCODE(FPCompare64, T::NZCVFlags, T::U64, T::U64, T::U1 )
OPCODE(FPDiv32, T::U32, T::U32, T::U32 ) OPCODE(FPDiv32, T::U32, T::U32, T::U32 )
OPCODE(FPDiv64, T::U64, T::U64, T::U64 ) OPCODE(FPDiv64, T::U64, T::U64, T::U64 )
OPCODE(FPMul32, T::U32, T::U32, T::U32 ) OPCODE(FPMul32, T::U32, T::U32, T::U32 )