IR: FPCompare{32,64} now return NZCV flags instead of implicitly setting them
This commit is contained in:
parent
2ee39d6b36
commit
aac5af50e2
10 changed files with 51 additions and 26 deletions
|
@ -629,7 +629,12 @@ void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
Xbyak::Reg32 value = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||||
|
|
||||||
|
code.and_(value, 0b11000001'00000001);
|
||||||
|
code.imul(value, value, 0b00010000'00100001);
|
||||||
|
code.shl(value, 16);
|
||||||
|
code.and_(value, 0xF0000000);
|
||||||
|
|
||||||
code.mov(dword[r15 + offsetof(A32JitState, FPSCR_nzcv)], value);
|
code.mov(dword[r15 + offsetof(A32JitState, FPSCR_nzcv)], value);
|
||||||
}
|
}
|
||||||
|
|
|
@ -281,16 +281,31 @@ void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd);
|
FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
|
static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
|
||||||
ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl
|
ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl
|
||||||
Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
code.mov(nzcv, 0x28630000);
|
// x64 flags ARM flags
|
||||||
|
// ZF PF CF NZCV
|
||||||
|
// Unordered 1 1 1 0011
|
||||||
|
// Greater than 0 0 0 0010
|
||||||
|
// Less than 0 0 1 1000
|
||||||
|
// Equal 1 0 0 0110
|
||||||
|
//
|
||||||
|
// Thus we can take use ZF:CF as an index into an array like so:
|
||||||
|
// x64 ARM ARM as x64
|
||||||
|
// ZF:CF NZCV NZ-----C-------V
|
||||||
|
// 0 0010 0000000100000000 = 0x0100
|
||||||
|
// 1 1000 1000000000000000 = 0x8000
|
||||||
|
// 2 0110 0100000100000000 = 0x4100
|
||||||
|
// 3 0011 0000000100000001 = 0x0101
|
||||||
|
|
||||||
|
code.mov(nzcv, 0x0101'4100'8000'0100);
|
||||||
code.sete(cl);
|
code.sete(cl);
|
||||||
code.rcl(cl, 3);
|
code.rcl(cl, 5); // cl = ZF:CF:0000
|
||||||
code.shl(nzcv, cl);
|
code.shr(nzcv, cl);
|
||||||
code.and_(nzcv, 0xF0000000);
|
|
||||||
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_nzcv], nzcv);
|
return nzcv;
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -305,7 +320,8 @@ void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.ucomiss(reg_a, reg_b);
|
code.ucomiss(reg_a, reg_b);
|
||||||
}
|
}
|
||||||
|
|
||||||
SetFpscrNzcvFromFlags(code, ctx);
|
Xbyak::Reg64 nzcv = SetFpscrNzcvFromFlags(code, ctx);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -320,7 +336,8 @@ void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.ucomisd(reg_a, reg_b);
|
code.ucomisd(reg_a, reg_b);
|
||||||
}
|
}
|
||||||
|
|
||||||
SetFpscrNzcvFromFlags(code, ctx);
|
Xbyak::Reg64 nzcv = SetFpscrNzcvFromFlags(code, ctx);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
|
@ -24,7 +24,6 @@ struct JitStateInfo {
|
||||||
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
|
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
|
||||||
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
|
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
|
||||||
, offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
|
, offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
|
||||||
, offsetof_FPSCR_nzcv(offsetof(JitStateType, FPSCR_nzcv))
|
|
||||||
, offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
|
, offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
|
||||||
, offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
|
, offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
|
||||||
{}
|
{}
|
||||||
|
@ -38,7 +37,6 @@ struct JitStateInfo {
|
||||||
const size_t offsetof_rsb_location_descriptors;
|
const size_t offsetof_rsb_location_descriptors;
|
||||||
const size_t offsetof_rsb_codeptrs;
|
const size_t offsetof_rsb_codeptrs;
|
||||||
const size_t offsetof_CPSR_nzcv;
|
const size_t offsetof_CPSR_nzcv;
|
||||||
const size_t offsetof_FPSCR_nzcv;
|
|
||||||
const size_t offsetof_FPSCR_IDC;
|
const size_t offsetof_FPSCR_IDC;
|
||||||
const size_t offsetof_FPSCR_UFC;
|
const size_t offsetof_FPSCR_UFC;
|
||||||
};
|
};
|
||||||
|
|
|
@ -154,7 +154,7 @@ IR::U32 IREmitter::GetFpscrNZCV() {
|
||||||
return Inst<IR::U32>(Opcode::A32GetFpscrNZCV);
|
return Inst<IR::U32>(Opcode::A32GetFpscrNZCV);
|
||||||
}
|
}
|
||||||
|
|
||||||
void IREmitter::SetFpscrNZCV(const IR::U32& new_fpscr_nzcv) {
|
void IREmitter::SetFpscrNZCV(const IR::NZCV& new_fpscr_nzcv) {
|
||||||
Inst(Opcode::A32SetFpscrNZCV, new_fpscr_nzcv);
|
Inst(Opcode::A32SetFpscrNZCV, new_fpscr_nzcv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -64,7 +64,7 @@ public:
|
||||||
IR::U32 GetFpscr();
|
IR::U32 GetFpscr();
|
||||||
void SetFpscr(const IR::U32& new_fpscr);
|
void SetFpscr(const IR::U32& new_fpscr);
|
||||||
IR::U32 GetFpscrNZCV();
|
IR::U32 GetFpscrNZCV();
|
||||||
void SetFpscrNZCV(const IR::U32& new_fpscr_nzcv);
|
void SetFpscrNZCV(const IR::NZCV& new_fpscr_nzcv);
|
||||||
|
|
||||||
void ClearExclusive();
|
void ClearExclusive();
|
||||||
void SetExclusive(const IR::U32& vaddr, size_t byte_size);
|
void SetExclusive(const IR::U32& vaddr, size_t byte_size);
|
||||||
|
|
|
@ -473,7 +473,8 @@ bool ArmTranslatorVisitor::vfp2_VCMP(Cond cond, bool D, size_t Vd, bool sz, bool
|
||||||
if (ConditionPassed(cond)) {
|
if (ConditionPassed(cond)) {
|
||||||
auto reg_d = ir.GetExtendedRegister(d);
|
auto reg_d = ir.GetExtendedRegister(d);
|
||||||
auto reg_m = ir.GetExtendedRegister(m);
|
auto reg_m = ir.GetExtendedRegister(m);
|
||||||
ir.FPCompare(reg_d, reg_m, exc_on_qnan, true);
|
auto nzcv = ir.FPCompare(reg_d, reg_m, exc_on_qnan, true);
|
||||||
|
ir.SetFpscrNZCV(nzcv);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -486,9 +487,11 @@ bool ArmTranslatorVisitor::vfp2_VCMP_zero(Cond cond, bool D, size_t Vd, bool sz,
|
||||||
if (ConditionPassed(cond)) {
|
if (ConditionPassed(cond)) {
|
||||||
auto reg_d = ir.GetExtendedRegister(d);
|
auto reg_d = ir.GetExtendedRegister(d);
|
||||||
if (sz) {
|
if (sz) {
|
||||||
ir.FPCompare(reg_d, ir.Imm64(0), exc_on_qnan, true);
|
auto nzcv = ir.FPCompare(reg_d, ir.Imm64(0), exc_on_qnan, true);
|
||||||
|
ir.SetFpscrNZCV(nzcv);
|
||||||
} else {
|
} else {
|
||||||
ir.FPCompare(reg_d, ir.Imm32(0), exc_on_qnan, true);
|
auto nzcv = ir.FPCompare(reg_d, ir.Imm32(0), exc_on_qnan, true);
|
||||||
|
ir.SetFpscrNZCV(nzcv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -37,7 +37,8 @@ bool TranslatorVisitor::FCMP_float(Imm<2> type, Vec Vm, Vec Vn, bool cmp_with_ze
|
||||||
operand2 = V_scalar(*datasize, Vm);
|
operand2 = V_scalar(*datasize, Vm);
|
||||||
}
|
}
|
||||||
|
|
||||||
ir.FPCompare(operand1, operand2, false, true);
|
auto nzcv = ir.FPCompare(operand1, operand2, false, true);
|
||||||
|
ir.SetNZCV(nzcv);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,7 +56,8 @@ bool TranslatorVisitor::FCMPE_float(Imm<2> type, Vec Vm, Vec Vn, bool cmp_with_z
|
||||||
operand2 = V_scalar(*datasize, Vm);
|
operand2 = V_scalar(*datasize, Vm);
|
||||||
}
|
}
|
||||||
|
|
||||||
ir.FPCompare(operand1, operand2, true, true);
|
auto nzcv = ir.FPCompare(operand1, operand2, true, true);
|
||||||
|
ir.SetNZCV(nzcv);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -907,13 +907,13 @@ U32U64 IREmitter::FPAdd(const U32U64& a, const U32U64& b, bool fpscr_controlled)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void IREmitter::FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled) {
|
NZCV IREmitter::FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled) {
|
||||||
ASSERT(fpscr_controlled);
|
ASSERT(fpscr_controlled);
|
||||||
ASSERT(a.GetType() == b.GetType());
|
ASSERT(a.GetType() == b.GetType());
|
||||||
if (a.GetType() == Type::U32) {
|
if (a.GetType() == Type::U32) {
|
||||||
Inst(Opcode::FPCompare32, a, b, Imm1(exc_on_qnan));
|
return Inst<NZCV>(Opcode::FPCompare32, a, b, Imm1(exc_on_qnan));
|
||||||
} else {
|
} else {
|
||||||
Inst(Opcode::FPCompare64, a, b, Imm1(exc_on_qnan));
|
return Inst<NZCV>(Opcode::FPCompare64, a, b, Imm1(exc_on_qnan));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -236,7 +236,7 @@ public:
|
||||||
|
|
||||||
U32U64 FPAbs(const U32U64& a);
|
U32U64 FPAbs(const U32U64& a);
|
||||||
U32U64 FPAdd(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
U32U64 FPAdd(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
||||||
void FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled);
|
NZCV FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled);
|
||||||
U32U64 FPDiv(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
U32U64 FPDiv(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
||||||
U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
||||||
U32U64 FPNeg(const U32U64& a);
|
U32U64 FPNeg(const U32U64& a);
|
||||||
|
|
|
@ -33,7 +33,7 @@ A32OPC(ExceptionRaised, T::Void, T::U32, T::U64
|
||||||
A32OPC(GetFpscr, T::U32, )
|
A32OPC(GetFpscr, T::U32, )
|
||||||
A32OPC(SetFpscr, T::Void, T::U32, )
|
A32OPC(SetFpscr, T::Void, T::U32, )
|
||||||
A32OPC(GetFpscrNZCV, T::U32, )
|
A32OPC(GetFpscrNZCV, T::U32, )
|
||||||
A32OPC(SetFpscrNZCV, T::Void, T::U32, )
|
A32OPC(SetFpscrNZCV, T::Void, T::NZCVFlags )
|
||||||
|
|
||||||
// A64 Context getters/setters
|
// A64 Context getters/setters
|
||||||
A64OPC(SetCheckBit, T::Void, T::U1 )
|
A64OPC(SetCheckBit, T::Void, T::U1 )
|
||||||
|
@ -230,8 +230,8 @@ OPCODE(FPAbs32, T::U32, T::U32
|
||||||
OPCODE(FPAbs64, T::U64, T::U64 )
|
OPCODE(FPAbs64, T::U64, T::U64 )
|
||||||
OPCODE(FPAdd32, T::U32, T::U32, T::U32 )
|
OPCODE(FPAdd32, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(FPAdd64, T::U64, T::U64, T::U64 )
|
OPCODE(FPAdd64, T::U64, T::U64, T::U64 )
|
||||||
OPCODE(FPCompare32, T::Void, T::U32, T::U32, T::U1 )
|
OPCODE(FPCompare32, T::NZCVFlags, T::U32, T::U32, T::U1 )
|
||||||
OPCODE(FPCompare64, T::Void, T::U64, T::U64, T::U1 )
|
OPCODE(FPCompare64, T::NZCVFlags, T::U64, T::U64, T::U1 )
|
||||||
OPCODE(FPDiv32, T::U32, T::U32, T::U32 )
|
OPCODE(FPDiv32, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(FPDiv64, T::U64, T::U64, T::U64 )
|
OPCODE(FPDiv64, T::U64, T::U64, T::U64 )
|
||||||
OPCODE(FPMul32, T::U32, T::U32, T::U32 )
|
OPCODE(FPMul32, T::U32, T::U32, T::U32 )
|
||||||
|
|
Loading…
Reference in a new issue