emit_x64{_vector}_floating_point: AVX512 implementation of ZeroIfNaN
Using a single `vfixupimm` to turn `QNaN`/`SNan` to `+0`
This commit is contained in:
parent
fe5abdb3e1
commit
e9c5c01eda
2 changed files with 13 additions and 1 deletions
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "dynarmic/backend/x64/abi.h"
|
||||
#include "dynarmic/backend/x64/block_of_code.h"
|
||||
#include "dynarmic/backend/x64/constants.h"
|
||||
#include "dynarmic/backend/x64/emit_x64.h"
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/cast_util.h"
|
||||
|
@ -116,6 +117,12 @@ void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list
|
|||
|
||||
template<size_t fsize>
|
||||
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
|
||||
FpFixup::PosZero);
|
||||
FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0));
|
||||
return;
|
||||
}
|
||||
code.xorps(xmm_scratch, xmm_scratch);
|
||||
FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
||||
code.pand(xmm_value, xmm_scratch);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include "dynarmic/backend/x64/abi.h"
|
||||
#include "dynarmic/backend/x64/block_of_code.h"
|
||||
#include "dynarmic/backend/x64/constants.h"
|
||||
#include "dynarmic/backend/x64/emit_x64.h"
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
|
@ -203,7 +204,11 @@ void ForceToDefaultNaN(BlockOfCode& code, FP::FPCR fpcr, Xbyak::Xmm result) {
|
|||
template<size_t fsize>
|
||||
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) {
|
||||
const Xbyak::Xmm nan_mask = xmm0;
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
|
||||
FpFixup::PosZero);
|
||||
FCODE(vfixupimmp)(result, result, code.MConst(ptr_b, u64(nan_to_zero)), u8(0));
|
||||
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
FCODE(vcmpordp)(nan_mask, result, result);
|
||||
FCODE(vandp)(result, result, nan_mask);
|
||||
} else {
|
||||
|
|
Loading…
Reference in a new issue