From e9c5c01edaf650c1e83f9199d5b51b527a14ba3b Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Fri, 28 May 2021 17:26:33 -0700 Subject: [PATCH] emit_x64{_vector}_floating_point: AVX512 implementation of ZeroIfNaN Using a single `vfixupimm` to turn `QNaN`/`SNan` to `+0` --- src/dynarmic/backend/x64/emit_x64_floating_point.cpp | 7 +++++++ .../backend/x64/emit_x64_vector_floating_point.cpp | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 9d908a08..89e9711e 100644 --- a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -16,6 +16,7 @@ #include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/block_of_code.h" +#include "dynarmic/backend/x64/constants.h" #include "dynarmic/backend/x64/emit_x64.h" #include "dynarmic/common/assert.h" #include "dynarmic/common/cast_util.h" @@ -116,6 +117,12 @@ void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list template void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, + FpFixup::PosZero); + FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0)); + return; + } code.xorps(xmm_scratch, xmm_scratch); FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN) code.pand(xmm_value, xmm_scratch); diff --git a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index e4f77d6d..dfbd8da0 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -19,6 +19,7 @@ #include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/block_of_code.h" +#include "dynarmic/backend/x64/constants.h" #include "dynarmic/backend/x64/emit_x64.h" #include "dynarmic/common/assert.h" #include "dynarmic/common/fp/fpcr.h" @@ -203,7 +204,11 @@ void ForceToDefaultNaN(BlockOfCode& code, FP::FPCR fpcr, Xbyak::Xmm result) { template void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) { const Xbyak::Xmm nan_mask = xmm0; - if (code.HasHostFeature(HostFeature::AVX)) { + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, + FpFixup::PosZero); + FCODE(vfixupimmp)(result, result, code.MConst(ptr_b, u64(nan_to_zero)), u8(0)); + } else if (code.HasHostFeature(HostFeature::AVX)) { FCODE(vcmpordp)(nan_mask, result, result); FCODE(vandp)(result, result, nan_mask); } else {