ir/frontend: Add half-precision opcode for FPVectorMulAdd

2019-04-13 01:42:35 -04:00 · 2019-04-13 01:42:35 -04:00 · ec6b3ae084
commit ec6b3ae084
parent 5f74d25bf7
4 changed files with 37 additions and 27 deletions
--- a/src/backend/x64/emit_x64_vector_floating_point.cpp
+++ b/src/backend/x64/emit_x64_vector_floating_point.cpp
@ -908,6 +908,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
        }
    };

+    if constexpr (fsize != 16) {
        if (code.DoesCpuSupport(Xbyak::util::Cpu::tFMA) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
            auto args = ctx.reg_alloc.GetArgumentInfo(inst);

@ -942,10 +943,15 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
            ctx.reg_alloc.DefineValue(inst, result);
            return;
        }
+    }

    EmitFourOpFallback(code, ctx, inst, fallback_fn);
 }

+void EmitX64::EmitFPVectorMulAdd16(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPVectorMulAdd<16>(code, ctx, inst);
+}
+
 void EmitX64::EmitFPVectorMulAdd32(EmitContext& ctx, IR::Inst* inst) {
    EmitFPVectorMulAdd<32>(code, ctx, inst);
 }
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -2173,6 +2173,8 @@ U128 IREmitter::FPVectorMul(size_t esize, const U128& a, const U128& b) {

 U128 IREmitter::FPVectorMulAdd(size_t esize, const U128& a, const U128& b, const U128& c) {
    switch (esize) {
+    case 16:
+        return Inst<U128>(Opcode::FPVectorMulAdd16, a, b, c);
    case 32:
        return Inst<U128>(Opcode::FPVectorMulAdd32, a, b, c);
    case 64:
--- a/src/frontend/ir/microinstruction.cpp
+++ b/src/frontend/ir/microinstruction.cpp
@ -327,6 +327,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
    case Opcode::FPVectorGreaterEqual64:
    case Opcode::FPVectorMul32:
    case Opcode::FPVectorMul64:
+    case Opcode::FPVectorMulAdd16:
    case Opcode::FPVectorMulAdd32:
    case Opcode::FPVectorMulAdd64:
    case Opcode::FPVectorPairedAddLower32:
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -553,6 +553,7 @@ OPCODE(FPVectorMin32,                                       U128,           U128
 OPCODE(FPVectorMin64,                                       U128,           U128,           U128                                            )
 OPCODE(FPVectorMul32,                                       U128,           U128,           U128                                            )
 OPCODE(FPVectorMul64,                                       U128,           U128,           U128                                            )
+OPCODE(FPVectorMulAdd16,                                    U128,           U128,           U128,           U128                            )
 OPCODE(FPVectorMulAdd32,                                    U128,           U128,           U128,           U128                            )
 OPCODE(FPVectorMulAdd64,                                    U128,           U128,           U128,           U128                            )
 OPCODE(FPVectorMulX32,                                      U128,           U128,           U128                                            )