emit_x64_floating_point: AVX implementation of ZeroIfNaN
This commit is contained in:
parent
e9c5c01eda
commit
9a23c09c3b
2 changed files with 15 additions and 13 deletions
|
@ -64,15 +64,14 @@ enum class FpFixup : u8 {
|
|||
};
|
||||
|
||||
// Generates 32-bit LUT for vfixupimm instruction
|
||||
constexpr u32 FixupLUT(
|
||||
FpFixup src_qnan = FpFixup::A,
|
||||
FpFixup src_snan = FpFixup::A,
|
||||
FpFixup src_zero = FpFixup::A,
|
||||
FpFixup src_posone = FpFixup::A,
|
||||
FpFixup src_neginf = FpFixup::A,
|
||||
FpFixup src_posinf = FpFixup::A,
|
||||
FpFixup src_pos = FpFixup::A,
|
||||
FpFixup src_neg = FpFixup::A) {
|
||||
constexpr u32 FixupLUT(FpFixup src_qnan = FpFixup::A,
|
||||
FpFixup src_snan = FpFixup::A,
|
||||
FpFixup src_zero = FpFixup::A,
|
||||
FpFixup src_posone = FpFixup::A,
|
||||
FpFixup src_neginf = FpFixup::A,
|
||||
FpFixup src_posinf = FpFixup::A,
|
||||
FpFixup src_pos = FpFixup::A,
|
||||
FpFixup src_neg = FpFixup::A) {
|
||||
u32 fixup_lut = 0;
|
||||
fixup_lut = Common::ModifyBits<0, 3, u32>(fixup_lut, static_cast<u32>(src_qnan));
|
||||
fixup_lut = Common::ModifyBits<4, 7, u32>(fixup_lut, static_cast<u32>(src_snan));
|
||||
|
|
|
@ -121,11 +121,14 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch)
|
|||
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
|
||||
FpFixup::PosZero);
|
||||
FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0));
|
||||
return;
|
||||
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
FCODE(vcmpords)(xmm_scratch, xmm_value, xmm_value);
|
||||
FCODE(vandp)(xmm_value, xmm_value, xmm_scratch);
|
||||
} else {
|
||||
code.xorps(xmm_scratch, xmm_scratch);
|
||||
FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
||||
code.pand(xmm_value, xmm_scratch);
|
||||
}
|
||||
code.xorps(xmm_scratch, xmm_scratch);
|
||||
FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
||||
code.pand(xmm_value, xmm_scratch);
|
||||
}
|
||||
|
||||
template<size_t fsize>
|
||||
|
|
Loading…
Reference in a new issue