A64: Implement FMINNM (scalar)

2018-02-20 14:08:46 +00:00 · 2018-02-20 14:08:46 +00:00 · 0575e7421b
commit 0575e7421b
parent 1c9804ea07
6 changed files with 98 additions and 1 deletions
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@ -551,6 +551,75 @@ void EmitX64::EmitFPMin64(EmitContext& ctx, IR::Inst* inst) {
    });
 }

+
+void EmitX64::EmitFPMinNumeric32(EmitContext& ctx, IR::Inst* inst) {
+    FPThreeOp32(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand, Xbyak::Reg32 scratch, Xbyak::Label& end){
+        Xbyak::Label normal, normal_or_equal, result_is_result;
+
+        code.ucomiss(result, operand);
+        code.jnp(normal_or_equal);
+        // If operand == QNaN, result = result.
+        code.movd(scratch, operand);
+        code.shl(scratch, 1);
+        code.cmp(scratch, 0xff800000u);
+        code.jae(result_is_result);
+        // If operand == SNaN, let usual NaN code handle it.
+        code.cmp(scratch, 0xff000000u);
+        code.ja(normal);
+        // If result == SNaN, && operand != NaN, result = result.
+        code.movd(scratch, result);
+        code.shl(scratch, 1);
+        code.cmp(scratch, 0xff800000u);
+        code.jnae(result_is_result);
+        // If result == QNaN && operand != NaN, result = operand.
+        code.movaps(result, operand);
+        code.jmp(end);
+
+        code.L(result_is_result);
+        code.movaps(operand, result);
+        code.jmp(normal);
+
+        code.L(normal_or_equal);
+        code.jnz(normal);
+        code.orps(operand, result);
+        code.L(normal);
+    }, &Xbyak::CodeGenerator::minss);
+}
+
+void EmitX64::EmitFPMinNumeric64(EmitContext& ctx, IR::Inst* inst) {
+    FPThreeOp64(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand, Xbyak::Reg64 scratch, Xbyak::Label& end){
+        Xbyak::Label normal, normal_or_equal, result_is_result;
+
+        code.ucomisd(result, operand);
+        code.jnp(normal_or_equal);
+        // If operand == QNaN, result = result.
+        code.movq(scratch, operand);
+        code.shl(scratch, 1);
+        code.cmp(scratch, code.MConst(qword, 0xfff0'0000'0000'0000u));
+        code.jae(result_is_result);
+        // If operand == SNaN, let usual NaN code handle it.
+        code.cmp(scratch, code.MConst(qword, 0xffe0'0000'0000'0000u));
+        code.ja(normal);
+        // If result == SNaN, && operand != NaN, result = result.
+        code.movq(scratch, result);
+        code.shl(scratch, 1);
+        code.cmp(scratch, code.MConst(qword, 0xfff0'0000'0000'0000u));
+        code.jnae(result_is_result);
+        // If result == QNaN && operand != NaN, result = operand.
+        code.movaps(result, operand);
+        code.jmp(end);
+
+        code.L(result_is_result);
+        code.movaps(operand, result);
+        code.jmp(normal);
+
+        code.L(normal_or_equal);
+        code.jnz(normal);
+        code.orps(operand, result);
+        code.L(normal);
+    }, &Xbyak::CodeGenerator::minsd);
+}
+
 void EmitX64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {
    FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::mulss);
 }
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@ -928,7 +928,7 @@ INST(FSUB_float,             "FSUB (scalar)",                             "00011
 INST(FMAX_float,             "FMAX (scalar)",                             "00011110yy1mmmmm010010nnnnnddddd")
 INST(FMIN_float,             "FMIN (scalar)",                             "00011110yy1mmmmm010110nnnnnddddd")
 INST(FMAXNM_float,           "FMAXNM (scalar)",                           "00011110yy1mmmmm011010nnnnnddddd")
-//INST(FMINNM_float,           "FMINNM (scalar)",                           "00011110yy1mmmmm011110nnnnnddddd")
+INST(FMINNM_float,           "FMINNM (scalar)",                           "00011110yy1mmmmm011110nnnnnddddd")
 INST(FNMUL_float,            "FNMUL (scalar)",                            "00011110yy1mmmmm100010nnnnnddddd")

 // Data Processing - FP and SIMD - Floating point conditional select
--- a/src/frontend/A64/translate/impl/floating_point_data_processing_two_register.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_data_processing_two_register.cpp
@ -128,6 +128,21 @@ bool TranslatorVisitor::FMAXNM_float(Imm<2> type, Vec Vm, Vec Vn, Vec Vd) {
    return true;
 }

+bool TranslatorVisitor::FMINNM_float(Imm<2> type, Vec Vm, Vec Vn, Vec Vd) {
+    auto datasize = GetDataSize(type);
+    if (!datasize) {
+        return UnallocatedEncoding();
+    }
+
+    const IR::U32U64 operand1 = V_scalar(*datasize, Vn);
+    const IR::U32U64 operand2 = V_scalar(*datasize, Vm);
+
+    const IR::U32U64 result = ir.FPMinNumeric(operand1, operand2, true);
+
+    V_scalar(*datasize, Vd, result);
+    return true;
+}
+
 bool TranslatorVisitor::FNMUL_float(Imm<2> type, Vec Vm, Vec Vn, Vec Vd) {
    auto datasize = GetDataSize(type);
    if (!datasize) {
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -1205,6 +1205,16 @@ U32U64 IREmitter::FPMin(const U32U64& a, const U32U64& b, bool fpscr_controlled)
    }
 }

+U32U64 IREmitter::FPMinNumeric(const U32U64& a, const U32U64& b, bool fpscr_controlled) {
+    ASSERT(fpscr_controlled);
+    ASSERT(a.GetType() == b.GetType());
+    if (a.GetType() == Type::U32) {
+        return Inst<U32>(Opcode::FPMinNumeric32, a, b);
+    } else {
+        return Inst<U64>(Opcode::FPMinNumeric64, a, b);
+    }
+}
+
 U32U64 IREmitter::FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled) {
    ASSERT(fpscr_controlled);
    ASSERT(a.GetType() == b.GetType());
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -249,6 +249,7 @@ public:
    U32U64 FPMax(const U32U64& a, const U32U64& b, bool fpscr_controlled);
    U32U64 FPMaxNumeric(const U32U64& a, const U32U64& b, bool fpscr_controlled);
    U32U64 FPMin(const U32U64& a, const U32U64& b, bool fpscr_controlled);
+    U32U64 FPMinNumeric(const U32U64& a, const U32U64& b, bool fpscr_controlled);
    U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled);
    U32U64 FPNeg(const U32U64& a);
    U32U64 FPSqrt(const U32U64& a);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -304,6 +304,8 @@ OPCODE(FPMaxNumeric32,              T::U32,         T::U32,         T::U32
 OPCODE(FPMaxNumeric64,              T::U64,         T::U64,         T::U64                          )
 OPCODE(FPMin32,                     T::U32,         T::U32,         T::U32                          )
 OPCODE(FPMin64,                     T::U64,         T::U64,         T::U64                          )
+OPCODE(FPMinNumeric32,              T::U32,         T::U32,         T::U32                          )
+OPCODE(FPMinNumeric64,              T::U64,         T::U64,         T::U64                          )
 OPCODE(FPMul32,                     T::U32,         T::U32,         T::U32                          )
 OPCODE(FPMul64,                     T::U64,         T::U64,         T::U64                          )
 OPCODE(FPNeg32,                     T::U32,         T::U32                                          )