emit_x64_floating_point: FlushToZero is redundant as hardware already does FTZ
This commit is contained in:
parent
822fd4a875
commit
de9d8c461c
1 changed files with 0 additions and 42 deletions
|
@ -97,33 +97,6 @@ void DenormalsAreZero(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_
|
||||||
code.L(end);
|
code.L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t fsize>
|
|
||||||
void FlushToZero(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
|
||||||
Xbyak::Label end;
|
|
||||||
|
|
||||||
if constexpr (fsize == 32) {
|
|
||||||
code.movd(gpr_scratch.cvt32(), xmm_value);
|
|
||||||
code.and_(gpr_scratch.cvt32(), u32(0x7FFFFFFF));
|
|
||||||
code.sub(gpr_scratch.cvt32(), u32(1));
|
|
||||||
code.cmp(gpr_scratch.cvt32(), u32(0x007FFFFE));
|
|
||||||
} else {
|
|
||||||
auto mask = code.MConst(xword, f64_non_sign_mask);
|
|
||||||
mask.setBit(64);
|
|
||||||
auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal);
|
|
||||||
penult_denormal.setBit(64);
|
|
||||||
|
|
||||||
code.movq(gpr_scratch, xmm_value);
|
|
||||||
code.and_(gpr_scratch, mask);
|
|
||||||
code.sub(gpr_scratch, u32(1));
|
|
||||||
code.cmp(gpr_scratch, penult_denormal);
|
|
||||||
}
|
|
||||||
|
|
||||||
code.ja(end);
|
|
||||||
code.pxor(xmm_value, xmm_value);
|
|
||||||
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3));
|
|
||||||
code.L(end);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<size_t fsize>
|
template<size_t fsize>
|
||||||
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
||||||
code.pxor(xmm_scratch, xmm_scratch);
|
code.pxor(xmm_scratch, xmm_scratch);
|
||||||
|
@ -245,9 +218,6 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
} else {
|
} else {
|
||||||
fn(result);
|
fn(result);
|
||||||
}
|
}
|
||||||
if (ctx.FPSCR_FTZ()) {
|
|
||||||
FlushToZero<fsize>(code, result, gpr_scratch);
|
|
||||||
}
|
|
||||||
if (ctx.FPSCR_DN()) {
|
if (ctx.FPSCR_DN()) {
|
||||||
DefaultNaN<fsize>(code, result);
|
DefaultNaN<fsize>(code, result);
|
||||||
} else if (ctx.AccurateNaN()) {
|
} else if (ctx.AccurateNaN()) {
|
||||||
|
@ -283,9 +253,6 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, [[maybe_unus
|
||||||
} else {
|
} else {
|
||||||
fn(result, operand);
|
fn(result, operand);
|
||||||
}
|
}
|
||||||
if (ctx.FPSCR_FTZ()) {
|
|
||||||
FlushToZero<fsize>(code, result, gpr_scratch);
|
|
||||||
}
|
|
||||||
if (ctx.FPSCR_DN()) {
|
if (ctx.FPSCR_DN()) {
|
||||||
DefaultNaN<fsize>(code, result);
|
DefaultNaN<fsize>(code, result);
|
||||||
} else if (ctx.AccurateNaN()) {
|
} else if (ctx.AccurateNaN()) {
|
||||||
|
@ -321,9 +288,6 @@ void FPFourOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn,
|
||||||
PreProcessNaNs<fsize>(code, ctx, result, operand2, operand3, end, nan_handler);
|
PreProcessNaNs<fsize>(code, ctx, result, operand2, operand3, end, nan_handler);
|
||||||
}
|
}
|
||||||
fn(result, operand2, operand3);
|
fn(result, operand2, operand3);
|
||||||
if (ctx.FPSCR_FTZ()) {
|
|
||||||
FlushToZero<fsize>(code, result, gpr_scratch);
|
|
||||||
}
|
|
||||||
if (ctx.FPSCR_DN()) {
|
if (ctx.FPSCR_DN()) {
|
||||||
DefaultNaN<fsize>(code, result);
|
DefaultNaN<fsize>(code, result);
|
||||||
} else if (ctx.AccurateNaN()) {
|
} else if (ctx.AccurateNaN()) {
|
||||||
|
@ -923,9 +887,6 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
DenormalsAreZero<32>(code, result, gpr_scratch);
|
DenormalsAreZero<32>(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
code.cvtss2sd(result, result);
|
code.cvtss2sd(result, result);
|
||||||
if (ctx.FPSCR_FTZ()) {
|
|
||||||
FlushToZero<64>(code, result, gpr_scratch);
|
|
||||||
}
|
|
||||||
if (ctx.FPSCR_DN()) {
|
if (ctx.FPSCR_DN()) {
|
||||||
DefaultNaN<64>(code, result);
|
DefaultNaN<64>(code, result);
|
||||||
}
|
}
|
||||||
|
@ -942,9 +903,6 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
DenormalsAreZero<64>(code, result, gpr_scratch);
|
DenormalsAreZero<64>(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
code.cvtsd2ss(result, result);
|
code.cvtsd2ss(result, result);
|
||||||
if (ctx.FPSCR_FTZ()) {
|
|
||||||
FlushToZero<32>(code, result, gpr_scratch);
|
|
||||||
}
|
|
||||||
if (ctx.FPSCR_DN()) {
|
if (ctx.FPSCR_DN()) {
|
||||||
DefaultNaN<32>(code, result);
|
DefaultNaN<32>(code, result);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue