emit_x64{_vector}_floating_point: AVX512 implementation of ZeroIfNaN

Using a single `vfixupimm` to turn `QNaN`/`SNan` to `+0`
This commit is contained in:
Wunkolo 2021-05-28 17:26:33 -07:00 committed by merry
parent fe5abdb3e1
commit e9c5c01eda
2 changed files with 13 additions and 1 deletions

View file

@ -16,6 +16,7 @@
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/constants.h"
#include "dynarmic/backend/x64/emit_x64.h" #include "dynarmic/backend/x64/emit_x64.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/cast_util.h" #include "dynarmic/common/cast_util.h"
@ -116,6 +117,12 @@ void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list
template<size_t fsize> template<size_t fsize>
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
FpFixup::PosZero);
FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0));
return;
}
code.xorps(xmm_scratch, xmm_scratch); code.xorps(xmm_scratch, xmm_scratch);
FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN) FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
code.pand(xmm_value, xmm_scratch); code.pand(xmm_value, xmm_scratch);

View file

@ -19,6 +19,7 @@
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/constants.h"
#include "dynarmic/backend/x64/emit_x64.h" #include "dynarmic/backend/x64/emit_x64.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
@ -203,7 +204,11 @@ void ForceToDefaultNaN(BlockOfCode& code, FP::FPCR fpcr, Xbyak::Xmm result) {
template<size_t fsize> template<size_t fsize>
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) { void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) {
const Xbyak::Xmm nan_mask = xmm0; const Xbyak::Xmm nan_mask = xmm0;
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
FpFixup::PosZero);
FCODE(vfixupimmp)(result, result, code.MConst(ptr_b, u64(nan_to_zero)), u8(0));
} else if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vcmpordp)(nan_mask, result, result); FCODE(vcmpordp)(nan_mask, result, result);
FCODE(vandp)(result, result, nan_mask); FCODE(vandp)(result, result, nan_mask);
} else { } else {