emit_x64_floating_point: EmitFPToFixed: No need to round if rounding_mode == TowardsZero

cvttsd2si truncates during operation
This commit is contained in:
Merry 2022-02-23 20:43:50 +00:00
parent b34214f953
commit ea08a389b4

View file

@ -1470,7 +1470,10 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if constexpr (fsize != 16) { if constexpr (fsize != 16) {
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
if (code.HasHostFeature(HostFeature::SSE41) && round_imm) { // cvttsd2si truncates during operation so rounding (and thus SSE4.1) not required
const bool truncating = rounding_mode == FP::RoundingMode::TowardsZero;
if (round_imm && (truncating || code.HasHostFeature(HostFeature::SSE41))) {
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
@ -1480,14 +1483,19 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.mulsd(src, code.MConst(xword, scale_factor)); code.mulsd(src, code.MConst(xword, scale_factor));
} }
if (!truncating) {
code.roundsd(src, src, *round_imm); code.roundsd(src, src, *round_imm);
}
} else { } else {
if (fbits != 0) { if (fbits != 0) {
const u32 scale_factor = static_cast<u32>((fbits + 127) << 23); const u32 scale_factor = static_cast<u32>((fbits + 127) << 23);
code.mulss(src, code.MConst(xword, scale_factor)); code.mulss(src, code.MConst(xword, scale_factor));
} }
if (!truncating) {
code.roundss(src, src, *round_imm); code.roundss(src, src, *round_imm);
}
code.cvtss2sd(src, src); code.cvtss2sd(src, src);
} }