From 90f8dda966129e5b52787885641fb6375cdc7455 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Tue, 31 Jul 2018 21:22:01 +0100
Subject: [PATCH] emit_x64_floating_point: AVX implementation of
 ForceToDefaultNaN

---
 src/backend_x64/emit_x64_floating_point.cpp | 25 ++++++++++++---------
 1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp
index 82531ced..6cb46dd7 100644
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@@ -176,12 +176,17 @@ void PostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) {
 }
 
 template<size_t fsize>
-void DefaultNaN(BlockOfCode& code, Xbyak::Xmm xmm_value) {
-    Xbyak::Label end;
-    FCODE(ucomis)(xmm_value, xmm_value);
-    code.jnp(end);
-    code.movaps(xmm_value, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan));
-    code.L(end);
+void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) {
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
+        FCODE(vcmpunords)(xmm0, result, result);
+        FCODE(blendvp)(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan));
+    } else {
+        Xbyak::Label end;
+        FCODE(ucomis)(result, result);
+        code.jnp(end);
+        code.movaps(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan));
+        code.L(end);
+    }
 }
 
 template<size_t fsize>
@@ -217,7 +222,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
         fn(result);
     }
     if (ctx.FPSCR_DN()) {
-        DefaultNaN<fsize>(code, result);
+        ForceToDefaultNaN<fsize>(code, result);
     } else if (ctx.AccurateNaN()) {
         PostProcessNaNs<fsize>(code, result, ctx.reg_alloc.ScratchXmm());
     }
@@ -257,7 +262,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, [[maybe_unus
         fn(result, operand);
     }
     if (ctx.FPSCR_DN()) {
-        DefaultNaN<fsize>(code, result);
+        ForceToDefaultNaN<fsize>(code, result);
     } else if (ctx.AccurateNaN()) {
         PostProcessNaNs<fsize>(code, result, operand);
     }
@@ -899,7 +904,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
     }
     code.cvtss2sd(result, result);
     if (ctx.FPSCR_DN()) {
-        DefaultNaN<64>(code, result);
+        ForceToDefaultNaN<64>(code, result);
     }
 
     ctx.reg_alloc.DefineValue(inst, result);
@@ -915,7 +920,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
     }
     code.cvtsd2ss(result, result);
     if (ctx.FPSCR_DN()) {
-        DefaultNaN<32>(code, result);
+        ForceToDefaultNaN<32>(code, result);
     }
 
     ctx.reg_alloc.DefineValue(inst, result);