a32_jitstate: Remove fpsr_idc
We do not really have accurate FPSR state in any case.
This commit is contained in:
parent
6f49c0ef8e
commit
0de3993373
7 changed files with 16 additions and 46 deletions
|
@ -270,7 +270,6 @@ void TransferJitState(A32JitState& dest, const A32JitState& src, bool reset_rsb)
|
|||
dest.Reg = src.Reg;
|
||||
dest.ExtReg = src.ExtReg;
|
||||
dest.guest_MXCSR = src.guest_MXCSR;
|
||||
dest.fpsr_idc = src.fpsr_idc;
|
||||
dest.fpcr_mode = src.fpcr_mode;
|
||||
dest.fpsr_nzcv = src.fpsr_nzcv;
|
||||
if (reset_rsb) {
|
||||
|
|
|
@ -156,12 +156,10 @@ constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
|
|||
u32 A32JitState::Fpscr() const {
|
||||
ASSERT((fpcr_mode & ~FPSCR_MODE_MASK) == 0);
|
||||
ASSERT((fpsr_nzcv & ~FPSCR_NZCV_MASK) == 0);
|
||||
ASSERT((fpsr_idc & ~(1 << 7)) == 0);
|
||||
|
||||
u32 FPSCR = fpcr_mode | fpsr_nzcv;
|
||||
FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE
|
||||
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
||||
FPSCR |= fpsr_idc;
|
||||
FPSCR |= fpsr_exc;
|
||||
|
||||
return FPSCR;
|
||||
|
@ -180,7 +178,6 @@ void A32JitState::SetFpscr(u32 FPSCR) {
|
|||
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
|
||||
|
||||
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
|
||||
fpsr_idc = 0;
|
||||
fpsr_exc = FPSCR & 0x9F;
|
||||
|
||||
if (Common::Bit<24>(FPSCR)) {
|
||||
|
|
|
@ -69,7 +69,6 @@ struct A32JitState {
|
|||
|
||||
u32 fpsr_exc = 0;
|
||||
u32 fpsr_qc = 0; // Dummy value
|
||||
u32 fpsr_idc = 0;
|
||||
u32 fpcr_mode = 0;
|
||||
u32 fpsr_nzcv = 0;
|
||||
u32 Fpscr() const;
|
||||
|
|
|
@ -103,7 +103,6 @@ u32 A64JitState::GetFpsr() const {
|
|||
u32 fpsr = 0;
|
||||
fpsr |= (guest_MXCSR & 0b0000000000001); // IOC = IE
|
||||
fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
||||
fpsr |= fpsr_idc;
|
||||
fpsr |= fpsr_exc;
|
||||
fpsr |= (fpsr_qc == 0 ? 0 : 1) << 27;
|
||||
return fpsr;
|
||||
|
@ -111,7 +110,6 @@ u32 A64JitState::GetFpsr() const {
|
|||
|
||||
void A64JitState::SetFpsr(u32 value) {
|
||||
guest_MXCSR &= ~0x0000003D;
|
||||
fpsr_idc = 0;
|
||||
fpsr_qc = (value >> 27) & 1;
|
||||
fpsr_exc = value & 0x9F;
|
||||
}
|
||||
|
|
|
@ -73,7 +73,6 @@ struct A64JitState {
|
|||
|
||||
u32 fpsr_exc = 0;
|
||||
u32 fpsr_qc = 0;
|
||||
u32 fpsr_idc = 0;
|
||||
u32 fpcr = 0;
|
||||
u32 GetFpcr() const;
|
||||
u32 GetFpsr() const;
|
||||
|
|
|
@ -52,7 +52,6 @@ constexpr u64 f64_nan = 0x7ff8000000000000u;
|
|||
constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
|
||||
constexpr u64 f64_smallest_normal = 0x0010000000000000u;
|
||||
|
||||
constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu;
|
||||
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
|
||||
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
|
||||
constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
|
||||
|
@ -89,33 +88,21 @@ std::optional<int> ConvertRoundingModeToX64Immediate(FP::RoundingMode rounding_m
|
|||
}
|
||||
|
||||
template<size_t fsize>
|
||||
void DenormalsAreZero(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
||||
Xbyak::Label end;
|
||||
|
||||
if constexpr (fsize == 32) {
|
||||
code.movd(gpr_scratch.cvt32(), xmm_value);
|
||||
code.and_(gpr_scratch.cvt32(), u32(0x7FFFFFFF));
|
||||
code.sub(gpr_scratch.cvt32(), u32(1));
|
||||
code.cmp(gpr_scratch.cvt32(), u32(0x007FFFFE));
|
||||
} else {
|
||||
auto mask = code.MConst(xword, f64_non_sign_mask);
|
||||
mask.setBit(64);
|
||||
auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal);
|
||||
penult_denormal.setBit(64);
|
||||
|
||||
code.movq(gpr_scratch, xmm_value);
|
||||
code.and_(gpr_scratch, mask);
|
||||
code.sub(gpr_scratch, u32(1));
|
||||
code.cmp(gpr_scratch, penult_denormal);
|
||||
void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list<Xbyak::Xmm> to_daz, Xbyak::Xmm tmp) {
|
||||
if (ctx.FPCR().FZ()) {
|
||||
for (const Xbyak::Xmm& xmm : to_daz) {
|
||||
// TODO: Optimize
|
||||
code.movaps(tmp, code.MConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask));
|
||||
code.andps(tmp, xmm);
|
||||
if constexpr (fsize == 32) {
|
||||
code.pcmpgtd(tmp, code.MConst(xword, f32_smallest_normal - 1));
|
||||
} else {
|
||||
code.pcmpgtq(tmp, code.MConst(xword, f64_smallest_normal - 1));
|
||||
}
|
||||
code.orps(tmp, code.MConst(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero));
|
||||
code.andps(xmm, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
// We need to report back whether we've found a denormal on input.
|
||||
// SSE doesn't do this for us when SSE's DAZ is enabled.
|
||||
|
||||
code.ja(end);
|
||||
code.andps(xmm_value, code.MConst(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero));
|
||||
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_fpsr_idc], u32(1 << 7));
|
||||
code.L(end);
|
||||
}
|
||||
|
||||
template<size_t fsize>
|
||||
|
@ -421,10 +408,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
|
||||
|
||||
if (ctx.FPCR().FZ()) {
|
||||
DenormalsAreZero<fsize>(code, result, gpr_scratch);
|
||||
DenormalsAreZero<fsize>(code, operand, gpr_scratch);
|
||||
}
|
||||
DenormalsAreZero<fsize>(code, ctx, {result, operand}, tmp);
|
||||
|
||||
Xbyak::Label equal, end, nan;
|
||||
|
||||
|
@ -483,13 +467,9 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
}
|
||||
};
|
||||
|
||||
if (ctx.FPCR().FZ()) {
|
||||
DenormalsAreZero<fsize>(code, op1, tmp.cvt64());
|
||||
DenormalsAreZero<fsize>(code, op2, tmp.cvt64());
|
||||
}
|
||||
|
||||
Xbyak::Label end, z, nan, op2_is_nan, snan, maybe_both_nan, normal;
|
||||
|
||||
DenormalsAreZero<fsize>(code, ctx, {op1, op2}, xmm0);
|
||||
FCODE(ucomis)(op1, op2);
|
||||
code.jz(z, code.T_NEAR);
|
||||
code.L(normal);
|
||||
|
|
|
@ -24,7 +24,6 @@ struct JitStateInfo {
|
|||
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
|
||||
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
|
||||
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
|
||||
, offsetof_fpsr_idc(offsetof(JitStateType, fpsr_idc))
|
||||
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
|
||||
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
|
||||
{}
|
||||
|
@ -38,7 +37,6 @@ struct JitStateInfo {
|
|||
const size_t offsetof_rsb_location_descriptors;
|
||||
const size_t offsetof_rsb_codeptrs;
|
||||
const size_t offsetof_cpsr_nzcv;
|
||||
const size_t offsetof_fpsr_idc;
|
||||
const size_t offsetof_fpsr_exc;
|
||||
const size_t offsetof_fpsr_qc;
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue