emit_x64_vector_floating_point: AVX512 implementation of EmitFPVector{Min,Max}{32,64}

This commit is contained in:
Wunkolo 2021-06-01 10:49:21 -07:00 committed by merry
parent 87aac2a46b
commit 5385edcc66

View file

@ -943,7 +943,12 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask); DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
constexpr FpRangeSelect range_select = is_max ? FpRangeSelect::Max : FpRangeSelect::Min;
FCODE(vcmpp)(k1, result, xmm_b, Cmp::Unordered_Q);
FCODE(vrangep)(result, result, xmm_b, FpRangeLUT(range_select, FpRangeSign::Preserve));
FCODE(vblendmp)(result | k1, result, GetNaNVector<fsize>(code));
} else if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vcmpeqp)(mask, result, xmm_b); FCODE(vcmpeqp)(mask, result, xmm_b);
FCODE(vcmpunordp)(nan_mask, result, xmm_b); FCODE(vcmpunordp)(nan_mask, result, xmm_b);
if constexpr (is_max) { if constexpr (is_max) {
@ -1001,7 +1006,14 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
// x86-64 treats differently signed zeros as equal while ARM does not. // x86-64 treats differently signed zeros as equal while ARM does not.
// Thus if we AND together things that x86-64 thinks are equal we'll get the positive zero. // Thus if we AND together things that x86-64 thinks are equal we'll get the positive zero.
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
// vrangep{s,d} will already correctly handle comparing
// signed zeros similar to ARM
// max(+0.0, -0.0) = +0.0.
// min(+0.0, -0.0) = -0.0
constexpr FpRangeSelect range_select = is_max ? FpRangeSelect::Max : FpRangeSelect::Min;
FCODE(vrangep)(result, result, xmm_b, FpRangeLUT(range_select, FpRangeSign::Preserve));
} else if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vcmpeqp)(mask, result, xmm_b); FCODE(vcmpeqp)(mask, result, xmm_b);
if constexpr (is_max) { if constexpr (is_max) {
FCODE(vandp)(eq, result, xmm_b); FCODE(vandp)(eq, result, xmm_b);