From 04f325a05eb34b04a1bfabd462042c836cd65909 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Wed, 25 Jul 2018 13:25:35 +0100
Subject: [PATCH] IR: Implement FPVectorNeg

---
 .../emit_x64_vector_floating_point.cpp        | 33 +++++++++++++++++++
 src/frontend/ir/ir_emitter.cpp                | 13 ++++++++
 src/frontend/ir/ir_emitter.h                  |  1 +
 src/frontend/ir/opcodes.inc                   |  3 ++
 4 files changed, 50 insertions(+)
diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp
index fa49da74..c3254faa 100644
--- a/src/backend_x64/emit_x64_vector_floating_point.cpp
+++ b/src/backend_x64/emit_x64_vector_floating_point.cpp
@@ -555,6 +555,39 @@ void EmitX64::EmitFPVectorMulAdd64(EmitContext& ctx, IR::Inst* inst) {
     EmitFPVectorMulAdd<64>(code, ctx, inst);
 }
 
+void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Address mask = code.MConst(xword, 0x8000800080008000, 0x8000800080008000);
+
+    code.pxor(a, mask);
+
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitX64::EmitFPVectorNeg32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Address mask = code.MConst(xword, 0x8000000080000000, 0x8000000080000000);
+
+    code.pxor(a, mask);
+
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitX64::EmitFPVectorNeg64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Address mask = code.MConst(xword, 0x8000000000000000, 0x8000000000000000);
+
+    code.pxor(a, mask);
+
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
 void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
     EmitThreeOpVectorOperation<32, PairedIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::haddps);
 }
diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp
index 9a94f744..8be07c93 100644
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@@ -1707,6 +1707,19 @@ U128 IREmitter::FPVectorMulAdd(size_t esize, const U128& a, const U128& b, const
     return {};
 }
 
+U128 IREmitter::FPVectorNeg(size_t esize, const U128& a) {
+    switch (esize) {
+    case 16:
+        return Inst<U128>(Opcode::FPVectorNeg16, a);
+    case 32:
+        return Inst<U128>(Opcode::FPVectorNeg32, a);
+    case 64:
+        return Inst<U128>(Opcode::FPVectorNeg64, a);
+    }
+    UNREACHABLE();
+    return {};
+}
+
 U128 IREmitter::FPVectorPairedAdd(size_t esize, const U128& a, const U128& b) {
     switch (esize) {
     case 32:
diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h
index 2904d8c2..94c23038 100644
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@@ -301,6 +301,7 @@ public:
     U128 FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b);
     U128 FPVectorMul(size_t esize, const U128& a, const U128& b);
     U128 FPVectorMulAdd(size_t esize, const U128& addend, const U128& op1, const U128& op2);
+    U128 FPVectorNeg(size_t esize, const U128& a);
     U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b);
     U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b);
     U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc
index ace84304..409bb2f8 100644
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@@ -443,6 +443,9 @@ OPCODE(FPVectorMul32,                           T::U128,        T::U128,
 OPCODE(FPVectorMul64,                           T::U128,        T::U128,        T::U128                         )
 OPCODE(FPVectorMulAdd32,                        T::U128,        T::U128,        T::U128,        T::U128         )
 OPCODE(FPVectorMulAdd64,                        T::U128,        T::U128,        T::U128,        T::U128         )
+OPCODE(FPVectorNeg16,                           T::U128,        T::U128                                         )
+OPCODE(FPVectorNeg32,                           T::U128,        T::U128                                         )
+OPCODE(FPVectorNeg64,                           T::U128,        T::U128                                         )
 OPCODE(FPVectorPairedAddLower32,                T::U128,        T::U128,        T::U128                         )
 OPCODE(FPVectorPairedAddLower64,                T::U128,        T::U128,        T::U128                         )
 OPCODE(FPVectorPairedAdd32,                     T::U128,        T::U128,        T::U128                         )