From ea08a389b49cebff4bd3f3248adf619d09a4d484 Mon Sep 17 00:00:00 2001 From: Merry Date: Wed, 23 Feb 2022 20:43:50 +0000 Subject: [PATCH] emit_x64_floating_point: EmitFPToFixed: No need to round if rounding_mode == TowardsZero cvttsd2si truncates during operation --- .../backend/x64/emit_x64_floating_point.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 671a261a..512db1b2 100644 --- a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -1470,7 +1470,10 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { if constexpr (fsize != 16) { const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); - if (code.HasHostFeature(HostFeature::SSE41) && round_imm) { + // cvttsd2si truncates during operation so rounding (and thus SSE4.1) not required + const bool truncating = rounding_mode == FP::RoundingMode::TowardsZero; + + if (round_imm && (truncating || code.HasHostFeature(HostFeature::SSE41))) { const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); @@ -1480,14 +1483,19 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.mulsd(src, code.MConst(xword, scale_factor)); } - code.roundsd(src, src, *round_imm); + if (!truncating) { + code.roundsd(src, src, *round_imm); + } } else { if (fbits != 0) { const u32 scale_factor = static_cast((fbits + 127) << 23); code.mulss(src, code.MConst(xword, scale_factor)); } - code.roundss(src, src, *round_imm); + if (!truncating) { + code.roundss(src, src, *round_imm); + } + code.cvtss2sd(src, src); }