A64: Add opcodes for signed saturating negations

2018-09-06 15:50:25 -04:00 · 2018-09-06 15:50:25 -04:00 · fca7eddb9e
commit fca7eddb9e
parent f1ebbcd7bc
5 changed files with 145 additions and 0 deletions
--- a/src/backend/x64/emit_x64_vector.cpp
+++ b/src/backend/x64/emit_x64_vector.cpp
@ -2981,6 +2981,127 @@ void EmitX64::EmitVectorSignedSaturatedNarrowToUnsigned64(EmitContext& ctx, IR::
    });
 }

+static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm data = ctx.reg_alloc.UseXmm(args[0]);
+    const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
+    const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+    const Xbyak::Address mask = [esize, &code] {
+        switch (esize) {
+        case 8:
+            return code.MConst(xword, 0x8080808080808080, 0x8080808080808080);
+        case 16:
+            return code.MConst(xword, 0x8000800080008000, 0x8000800080008000);
+        case 32:
+            return code.MConst(xword, 0x8000000080000000, 0x8000000080000000);
+        case 64:
+            return code.MConst(xword, 0x8000000000000000, 0x8000000000000000);
+        default:
+            UNREACHABLE();
+            return Xbyak::Address{0};
+        }
+    }();
+
+    const u32 test_mask = [esize] {
+        switch (esize) {
+        case 8:
+            return 0b1111'1111'1111'1111;
+        case 16:
+            return 0b1010'1010'1010'1010;
+        case 32:
+            return 0b1000'1000'1000'1000;
+        case 64:
+            return 0b10000000'10000000;
+        default:
+            UNREACHABLE();
+            return 0;
+        }
+    }();
+
+    const auto vector_equality = [esize, &code](const Xbyak::Xmm& x, const auto& y) {
+        switch (esize) {
+        case 8:
+            code.pcmpeqb(x, y);
+            break;
+        case 16:
+            code.pcmpeqw(x, y);
+            break;
+        case 32:
+            code.pcmpeqd(x, y);
+            break;
+        case 64:
+            code.pcmpeqq(x, y);
+            break;
+        }
+    };
+
+    code.movdqa(tmp, data);
+    vector_equality(tmp, mask);
+
+    // Perform negation
+    code.pxor(zero, zero);
+    switch (esize) {
+    case 8:
+        code.psubsb(zero, data);
+        break;
+    case 16:
+        code.psubsw(zero, data);
+        break;
+    case 32:
+        code.psubd(zero, data);
+        code.pxor(zero, tmp);
+        break;
+    case 64:
+        code.psubq(zero, data);
+        code.pxor(zero, tmp);
+        break;
+    }
+
+    // Check if any elements matched the mask prior to performing saturation. If so, set the Q bit.
+    const Xbyak::Reg64 bit = ctx.reg_alloc.ScratchGpr();
+    code.pmovmskb(bit, tmp);
+    code.test(bit.cvt32(), test_mask);
+    code.setnz(bit.cvt8());
+    code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit.cvt8());
+
+    ctx.reg_alloc.DefineValue(inst, zero);
+}
+
+void EmitX64::EmitVectorSignedSaturatedNeg8(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSignedSaturatedNeg(8, code, ctx, inst);
+}
+
+void EmitX64::EmitVectorSignedSaturatedNeg16(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSignedSaturatedNeg(16, code, ctx, inst);
+}
+
+void EmitX64::EmitVectorSignedSaturatedNeg32(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSignedSaturatedNeg(32, code, ctx, inst);
+}
+
+void EmitX64::EmitVectorSignedSaturatedNeg64(EmitContext& ctx, IR::Inst* inst) {
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
+        EmitVectorSignedSaturatedNeg(64, code, ctx, inst);
+        return;
+    }
+
+    EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<s64>& result, const VectorArray<s64>& data) {
+        bool qc_flag = false;
+
+        for (size_t i = 0; i < result.size(); i++) {
+            if (static_cast<u64>(data[i]) == 0x8000000000000000) {
+                result[i] = 0x7FFFFFFFFFFFFFFF;
+                qc_flag = true;
+            } else {
+                result[i] = -data[i];
+            }
+        }
+
+        return qc_flag;
+    });
+}
+
 void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) {
    EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb);
 }
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -1566,6 +1566,21 @@ U128 IREmitter::VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, con
    return {};
 }

+U128 IREmitter::VectorSignedSaturatedNeg(size_t esize, const U128& a) {
+    switch (esize) {
+    case 8:
+        return Inst<U128>(Opcode::VectorSignedSaturatedNeg8, a);
+    case 16:
+        return Inst<U128>(Opcode::VectorSignedSaturatedNeg16, a);
+    case 32:
+        return Inst<U128>(Opcode::VectorSignedSaturatedNeg32, a);
+    case 64:
+        return Inst<U128>(Opcode::VectorSignedSaturatedNeg64, a);
+    }
+    UNREACHABLE();
+    return {};
+}
+
 U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
    switch (esize) {
    case 8:
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -268,6 +268,7 @@ public:
    U128 VectorSignedSaturatedDoublingMultiplyReturnHigh(size_t esize, const U128& a, const U128& b);
    U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
    U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
+    U128 VectorSignedSaturatedNeg(size_t esize, const U128& a);
    U128 VectorSub(size_t esize, const U128& a, const U128& b);
    Table VectorTable(std::vector<U128> values);
    U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices);
--- a/src/frontend/ir/microinstruction.cpp
+++ b/src/frontend/ir/microinstruction.cpp
@ -359,6 +359,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
    case Opcode::VectorSignedSaturatedNarrowToUnsigned64:
    case Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh16:
    case Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh32:
+    case Opcode::VectorSignedSaturatedNeg8:
+    case Opcode::VectorSignedSaturatedNeg16:
+    case Opcode::VectorSignedSaturatedNeg32:
+    case Opcode::VectorSignedSaturatedNeg64:
    case Opcode::VectorUnsignedSaturatedNarrow16:
    case Opcode::VectorUnsignedSaturatedNarrow32:
    case Opcode::VectorUnsignedSaturatedNarrow64:
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -405,6 +405,10 @@ OPCODE(VectorSignedSaturatedNarrowToSigned64,              U128,           U128
 OPCODE(VectorSignedSaturatedNarrowToUnsigned16,            U128,           U128                                                            )
 OPCODE(VectorSignedSaturatedNarrowToUnsigned32,            U128,           U128                                                            )
 OPCODE(VectorSignedSaturatedNarrowToUnsigned64,            U128,           U128                                                            )
+OPCODE(VectorSignedSaturatedNeg8,                          U128,           U128                                                            )
+OPCODE(VectorSignedSaturatedNeg16,                         U128,           U128                                                            )
+OPCODE(VectorSignedSaturatedNeg32,                         U128,           U128                                                            )
+OPCODE(VectorSignedSaturatedNeg64,                         U128,           U128                                                            )
 OPCODE(VectorSub8,                                         U128,           U128,           U128                                            )
 OPCODE(VectorSub16,                                        U128,           U128,           U128                                            )
 OPCODE(VectorSub32,                                        U128,           U128,           U128                                            )