From 5b4673da4b6cfcdc9ecbeb8a46d51ed1f4c540df Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Sat, 13 Apr 2019 17:49:04 -0400
Subject: [PATCH] frontend/ir_emitter: Add half-precision variant of
 FPVectorRoundInt

---
 .../x64/emit_x64_vector_floating_point.cpp    | 46 +++++++++++--------
 src/frontend/ir/ir_emitter.cpp                |  9 +++-
 src/frontend/ir/opcodes.inc                   |  1 +
 3 files changed, 34 insertions(+), 22 deletions(-)
diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp
index deb5ab1f..0bf4f619 100644
--- a/src/backend/x64/emit_x64_vector_floating_point.cpp
+++ b/src/backend/x64/emit_x64_vector_floating_point.cpp
@@ -1160,28 +1160,30 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
     const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
     const bool exact = inst->GetArg(2).GetU1();
 
-    if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero && !exact) {
-        const u8 round_imm = [&]() -> u8 {
-            switch (rounding) {
-            case FP::RoundingMode::ToNearest_TieEven:
-                return 0b00;
-            case FP::RoundingMode::TowardsPlusInfinity:
-                return 0b10;
-            case FP::RoundingMode::TowardsMinusInfinity:
-                return 0b01;
-            case FP::RoundingMode::TowardsZero:
-                return 0b11;
-            default:
-                UNREACHABLE();
-            }
-            return 0;
-        }();
+    if constexpr (fsize != 16) {
+        if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero && !exact) {
+            const u8 round_imm = [&]() -> u8 {
+                switch (rounding) {
+                case FP::RoundingMode::ToNearest_TieEven:
+                    return 0b00;
+                case FP::RoundingMode::TowardsPlusInfinity:
+                    return 0b10;
+                case FP::RoundingMode::TowardsMinusInfinity:
+                    return 0b01;
+                case FP::RoundingMode::TowardsZero:
+                    return 0b11;
+                default:
+                    UNREACHABLE();
+                }
+                return 0;
+            }();
 
-        EmitTwoOpVectorOperation<fsize, DefaultIndexer>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){
-            FCODE(roundp)(result, xmm_a, round_imm);
-        });
+            EmitTwoOpVectorOperation<fsize, DefaultIndexer>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){
+                FCODE(roundp)(result, xmm_a, round_imm);
+            });
 
-        return;
+            return;
+        }
     }
 
     using rounding_list = mp::list<
@@ -1218,6 +1220,10 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
     EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
 }
 
+void EmitX64::EmitFPVectorRoundInt16(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPVectorRoundInt<16>(code, ctx, inst);
+}
+
 void EmitX64::EmitFPVectorRoundInt32(EmitContext& ctx, IR::Inst* inst) {
     EmitFPVectorRoundInt<32>(code, ctx, inst);
 }
diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp
index 61c8bf1c..27527f6c 100644
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@@ -2278,11 +2278,16 @@ U128 IREmitter::FPVectorRecipStepFused(size_t esize, const U128& a, const U128&
 }
 
 U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact) {
+    const IR::U8 rounding_imm = Imm8(static_cast<u8>(rounding));
+    const IR::U1 exact_imm = Imm1(exact);
+
     switch (esize) {
+    case 16:
+        return Inst<U128>(Opcode::FPVectorRoundInt16, operand, rounding_imm, exact_imm);
     case 32:
-        return Inst<U128>(Opcode::FPVectorRoundInt32, operand, Imm8(static_cast<u8>(rounding)), Imm1(exact));
+        return Inst<U128>(Opcode::FPVectorRoundInt32, operand, rounding_imm, exact_imm);
     case 64:
-        return Inst<U128>(Opcode::FPVectorRoundInt64, operand, Imm8(static_cast<u8>(rounding)), Imm1(exact));
+        return Inst<U128>(Opcode::FPVectorRoundInt64, operand, rounding_imm, exact_imm);
     }
     UNREACHABLE();
     return {};
diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc
index 527aef5b..c7a4c227 100644
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@@ -574,6 +574,7 @@ OPCODE(FPVectorRecipEstimate32,                             U128,           U128
 OPCODE(FPVectorRecipEstimate64,                             U128,           U128                                                            )
 OPCODE(FPVectorRecipStepFused32,                            U128,           U128,           U128                                            )
 OPCODE(FPVectorRecipStepFused64,                            U128,           U128,           U128                                            )
+OPCODE(FPVectorRoundInt16,                                  U128,           U128,           U8,             U1                              )
 OPCODE(FPVectorRoundInt32,                                  U128,           U128,           U8,             U1                              )
 OPCODE(FPVectorRoundInt64,                                  U128,           U128,           U8,             U1                              )
 OPCODE(FPVectorRSqrtEstimate32,                             U128,           U128                                                            )