a32_jitstate: Remove fpsr_idc

We do not really have accurate FPSR state in any case.
This commit is contained in:
MerryMage 2019-05-05 21:12:43 +01:00
parent 6f49c0ef8e
commit 0de3993373
7 changed files with 16 additions and 46 deletions

View file

@ -270,7 +270,6 @@ void TransferJitState(A32JitState& dest, const A32JitState& src, bool reset_rsb)
dest.Reg = src.Reg;
dest.ExtReg = src.ExtReg;
dest.guest_MXCSR = src.guest_MXCSR;
dest.fpsr_idc = src.fpsr_idc;
dest.fpcr_mode = src.fpcr_mode;
dest.fpsr_nzcv = src.fpsr_nzcv;
if (reset_rsb) {

View file

@ -156,12 +156,10 @@ constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
u32 A32JitState::Fpscr() const {
ASSERT((fpcr_mode & ~FPSCR_MODE_MASK) == 0);
ASSERT((fpsr_nzcv & ~FPSCR_NZCV_MASK) == 0);
ASSERT((fpsr_idc & ~(1 << 7)) == 0);
u32 FPSCR = fpcr_mode | fpsr_nzcv;
FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
FPSCR |= fpsr_idc;
FPSCR |= fpsr_exc;
return FPSCR;
@ -180,7 +178,6 @@ void A32JitState::SetFpscr(u32 FPSCR) {
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
fpsr_idc = 0;
fpsr_exc = FPSCR & 0x9F;
if (Common::Bit<24>(FPSCR)) {

View file

@ -69,7 +69,6 @@ struct A32JitState {
u32 fpsr_exc = 0;
u32 fpsr_qc = 0; // Dummy value
u32 fpsr_idc = 0;
u32 fpcr_mode = 0;
u32 fpsr_nzcv = 0;
u32 Fpscr() const;

View file

@ -103,7 +103,6 @@ u32 A64JitState::GetFpsr() const {
u32 fpsr = 0;
fpsr |= (guest_MXCSR & 0b0000000000001); // IOC = IE
fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
fpsr |= fpsr_idc;
fpsr |= fpsr_exc;
fpsr |= (fpsr_qc == 0 ? 0 : 1) << 27;
return fpsr;
@ -111,7 +110,6 @@ u32 A64JitState::GetFpsr() const {
void A64JitState::SetFpsr(u32 value) {
guest_MXCSR &= ~0x0000003D;
fpsr_idc = 0;
fpsr_qc = (value >> 27) & 1;
fpsr_exc = value & 0x9F;
}

View file

@ -73,7 +73,6 @@ struct A64JitState {
u32 fpsr_exc = 0;
u32 fpsr_qc = 0;
u32 fpsr_idc = 0;
u32 fpcr = 0;
u32 GetFpcr() const;
u32 GetFpsr() const;

View file

@ -52,7 +52,6 @@ constexpr u64 f64_nan = 0x7ff8000000000000u;
constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
constexpr u64 f64_smallest_normal = 0x0010000000000000u;
constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu;
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
@ -89,33 +88,21 @@ std::optional<int> ConvertRoundingModeToX64Immediate(FP::RoundingMode rounding_m
}
template<size_t fsize>
void DenormalsAreZero(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
Xbyak::Label end;
if constexpr (fsize == 32) {
code.movd(gpr_scratch.cvt32(), xmm_value);
code.and_(gpr_scratch.cvt32(), u32(0x7FFFFFFF));
code.sub(gpr_scratch.cvt32(), u32(1));
code.cmp(gpr_scratch.cvt32(), u32(0x007FFFFE));
} else {
auto mask = code.MConst(xword, f64_non_sign_mask);
mask.setBit(64);
auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal);
penult_denormal.setBit(64);
code.movq(gpr_scratch, xmm_value);
code.and_(gpr_scratch, mask);
code.sub(gpr_scratch, u32(1));
code.cmp(gpr_scratch, penult_denormal);
void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list<Xbyak::Xmm> to_daz, Xbyak::Xmm tmp) {
if (ctx.FPCR().FZ()) {
for (const Xbyak::Xmm& xmm : to_daz) {
// TODO: Optimize
code.movaps(tmp, code.MConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask));
code.andps(tmp, xmm);
if constexpr (fsize == 32) {
code.pcmpgtd(tmp, code.MConst(xword, f32_smallest_normal - 1));
} else {
code.pcmpgtq(tmp, code.MConst(xword, f64_smallest_normal - 1));
}
code.orps(tmp, code.MConst(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero));
code.andps(xmm, tmp);
}
}
// We need to report back whether we've found a denormal on input.
// SSE doesn't do this for us when SSE's DAZ is enabled.
code.ja(end);
code.andps(xmm_value, code.MConst(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero));
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_fpsr_idc], u32(1 << 7));
code.L(end);
}
template<size_t fsize>
@ -421,10 +408,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
if (ctx.FPCR().FZ()) {
DenormalsAreZero<fsize>(code, result, gpr_scratch);
DenormalsAreZero<fsize>(code, operand, gpr_scratch);
}
DenormalsAreZero<fsize>(code, ctx, {result, operand}, tmp);
Xbyak::Label equal, end, nan;
@ -483,13 +467,9 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
}
};
if (ctx.FPCR().FZ()) {
DenormalsAreZero<fsize>(code, op1, tmp.cvt64());
DenormalsAreZero<fsize>(code, op2, tmp.cvt64());
}
Xbyak::Label end, z, nan, op2_is_nan, snan, maybe_both_nan, normal;
DenormalsAreZero<fsize>(code, ctx, {op1, op2}, xmm0);
FCODE(ucomis)(op1, op2);
code.jz(z, code.T_NEAR);
code.L(normal);

View file

@ -24,7 +24,6 @@ struct JitStateInfo {
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
, offsetof_fpsr_idc(offsetof(JitStateType, fpsr_idc))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
{}
@ -38,7 +37,6 @@ struct JitStateInfo {
const size_t offsetof_rsb_location_descriptors;
const size_t offsetof_rsb_codeptrs;
const size_t offsetof_cpsr_nzcv;
const size_t offsetof_fpsr_idc;
const size_t offsetof_fpsr_exc;
const size_t offsetof_fpsr_qc;
};