From 90f8dda966129e5b52787885641fb6375cdc7455 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 31 Jul 2018 21:22:01 +0100 Subject: [PATCH] emit_x64_floating_point: AVX implementation of ForceToDefaultNaN --- src/backend_x64/emit_x64_floating_point.cpp | 25 ++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 82531ced..6cb46dd7 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -176,12 +176,17 @@ void PostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) { } template -void DefaultNaN(BlockOfCode& code, Xbyak::Xmm xmm_value) { - Xbyak::Label end; - FCODE(ucomis)(xmm_value, xmm_value); - code.jnp(end); - code.movaps(xmm_value, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan)); - code.L(end); +void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) { + FCODE(vcmpunords)(xmm0, result, result); + FCODE(blendvp)(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan)); + } else { + Xbyak::Label end; + FCODE(ucomis)(result, result); + code.jnp(end); + code.movaps(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan)); + code.L(end); + } } template @@ -217,7 +222,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { fn(result); } if (ctx.FPSCR_DN()) { - DefaultNaN(code, result); + ForceToDefaultNaN(code, result); } else if (ctx.AccurateNaN()) { PostProcessNaNs(code, result, ctx.reg_alloc.ScratchXmm()); } @@ -257,7 +262,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, [[maybe_unus fn(result, operand); } if (ctx.FPSCR_DN()) { - DefaultNaN(code, result); + ForceToDefaultNaN(code, result); } else if (ctx.AccurateNaN()) { PostProcessNaNs(code, result, operand); } @@ -899,7 +904,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { } code.cvtss2sd(result, result); if (ctx.FPSCR_DN()) { - DefaultNaN<64>(code, result); + ForceToDefaultNaN<64>(code, result); } ctx.reg_alloc.DefineValue(inst, result); @@ -915,7 +920,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { } code.cvtsd2ss(result, result); if (ctx.FPSCR_DN()) { - DefaultNaN<32>(code, result); + ForceToDefaultNaN<32>(code, result); } ctx.reg_alloc.DefineValue(inst, result);