emit_x64_floating_point: AVX implementation of ZeroIfNaN
This commit is contained in:
parent
e9c5c01eda
commit
9a23c09c3b
2 changed files with 15 additions and 13 deletions
|
@ -64,15 +64,14 @@ enum class FpFixup : u8 {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Generates 32-bit LUT for vfixupimm instruction
|
// Generates 32-bit LUT for vfixupimm instruction
|
||||||
constexpr u32 FixupLUT(
|
constexpr u32 FixupLUT(FpFixup src_qnan = FpFixup::A,
|
||||||
FpFixup src_qnan = FpFixup::A,
|
FpFixup src_snan = FpFixup::A,
|
||||||
FpFixup src_snan = FpFixup::A,
|
FpFixup src_zero = FpFixup::A,
|
||||||
FpFixup src_zero = FpFixup::A,
|
FpFixup src_posone = FpFixup::A,
|
||||||
FpFixup src_posone = FpFixup::A,
|
FpFixup src_neginf = FpFixup::A,
|
||||||
FpFixup src_neginf = FpFixup::A,
|
FpFixup src_posinf = FpFixup::A,
|
||||||
FpFixup src_posinf = FpFixup::A,
|
FpFixup src_pos = FpFixup::A,
|
||||||
FpFixup src_pos = FpFixup::A,
|
FpFixup src_neg = FpFixup::A) {
|
||||||
FpFixup src_neg = FpFixup::A) {
|
|
||||||
u32 fixup_lut = 0;
|
u32 fixup_lut = 0;
|
||||||
fixup_lut = Common::ModifyBits<0, 3, u32>(fixup_lut, static_cast<u32>(src_qnan));
|
fixup_lut = Common::ModifyBits<0, 3, u32>(fixup_lut, static_cast<u32>(src_qnan));
|
||||||
fixup_lut = Common::ModifyBits<4, 7, u32>(fixup_lut, static_cast<u32>(src_snan));
|
fixup_lut = Common::ModifyBits<4, 7, u32>(fixup_lut, static_cast<u32>(src_snan));
|
||||||
|
|
|
@ -121,11 +121,14 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch)
|
||||||
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
|
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
|
||||||
FpFixup::PosZero);
|
FpFixup::PosZero);
|
||||||
FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0));
|
FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0));
|
||||||
return;
|
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
|
FCODE(vcmpords)(xmm_scratch, xmm_value, xmm_value);
|
||||||
|
FCODE(vandp)(xmm_value, xmm_value, xmm_scratch);
|
||||||
|
} else {
|
||||||
|
code.xorps(xmm_scratch, xmm_scratch);
|
||||||
|
FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
||||||
|
code.pand(xmm_value, xmm_scratch);
|
||||||
}
|
}
|
||||||
code.xorps(xmm_scratch, xmm_scratch);
|
|
||||||
FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
|
||||||
code.pand(xmm_value, xmm_scratch);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t fsize>
|
template<size_t fsize>
|
||||||
|
|
Loading…
Reference in a new issue