emit_x64_floating_point: AVX implementation of ZeroIfNaN

This commit is contained in:
Wunkolo 2021-05-28 17:47:37 -07:00 committed by MerryMage
parent e9c5c01eda
commit 9a23c09c3b
2 changed files with 15 additions and 13 deletions

View file

@ -64,15 +64,14 @@ enum class FpFixup : u8 {
}; };
// Generates 32-bit LUT for vfixupimm instruction // Generates 32-bit LUT for vfixupimm instruction
constexpr u32 FixupLUT( constexpr u32 FixupLUT(FpFixup src_qnan = FpFixup::A,
FpFixup src_qnan = FpFixup::A, FpFixup src_snan = FpFixup::A,
FpFixup src_snan = FpFixup::A, FpFixup src_zero = FpFixup::A,
FpFixup src_zero = FpFixup::A, FpFixup src_posone = FpFixup::A,
FpFixup src_posone = FpFixup::A, FpFixup src_neginf = FpFixup::A,
FpFixup src_neginf = FpFixup::A, FpFixup src_posinf = FpFixup::A,
FpFixup src_posinf = FpFixup::A, FpFixup src_pos = FpFixup::A,
FpFixup src_pos = FpFixup::A, FpFixup src_neg = FpFixup::A) {
FpFixup src_neg = FpFixup::A) {
u32 fixup_lut = 0; u32 fixup_lut = 0;
fixup_lut = Common::ModifyBits<0, 3, u32>(fixup_lut, static_cast<u32>(src_qnan)); fixup_lut = Common::ModifyBits<0, 3, u32>(fixup_lut, static_cast<u32>(src_qnan));
fixup_lut = Common::ModifyBits<4, 7, u32>(fixup_lut, static_cast<u32>(src_snan)); fixup_lut = Common::ModifyBits<4, 7, u32>(fixup_lut, static_cast<u32>(src_snan));

View file

@ -121,11 +121,14 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch)
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
FpFixup::PosZero); FpFixup::PosZero);
FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0)); FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0));
return; } else if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vcmpords)(xmm_scratch, xmm_value, xmm_value);
FCODE(vandp)(xmm_value, xmm_value, xmm_scratch);
} else {
code.xorps(xmm_scratch, xmm_scratch);
FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
code.pand(xmm_value, xmm_scratch);
} }
code.xorps(xmm_scratch, xmm_scratch);
FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
code.pand(xmm_value, xmm_scratch);
} }
template<size_t fsize> template<size_t fsize>