ir: Add opcodes for scalar signed saturated doubling multiplies

2018-09-05 19:16:41 -04:00 · 2018-09-05 19:16:41 -04:00 · 7ebfd0f31c
commit 7ebfd0f31c
parent 9c03311fed
4 changed files with 81 additions and 0 deletions
--- a/src/backend/x64/emit_x64_saturation.cpp
+++ b/src/backend/x64/emit_x64_saturation.cpp
@ -134,6 +134,66 @@ void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
    EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
 }

+void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
+    auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+    const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
+    const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
+
+    code.movsx(x, x.cvt16());
+    code.movsx(y, y.cvt16());
+
+    code.imul(x, y);
+    code.lea(y, ptr[x.cvt64() + x.cvt64()]);
+    code.mov(tmp, x);
+    code.shr(tmp, 15);
+    code.xor_(y, x);
+    code.mov(y, 0x7FFF);
+    code.cmovns(y, tmp);
+
+    if (overflow_inst) {
+        code.sets(tmp.cvt8());
+
+        ctx.reg_alloc.DefineValue(overflow_inst, tmp);
+        ctx.EraseInstruction(overflow_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
+void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
+    auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
+    const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
+    const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
+
+    code.movsxd(x, x.cvt32());
+    code.movsxd(y, y.cvt32());
+
+    code.imul(x, y);
+    code.lea(y, ptr[x + x]);
+    code.mov(tmp, x);
+    code.shr(tmp, 31);
+    code.xor_(y, x);
+    code.mov(y.cvt32(), 0x7FFFFFFF);
+    code.cmovns(y.cvt32(), tmp.cvt32());
+
+    if (overflow_inst) {
+        code.sets(tmp.cvt8());
+
+        ctx.reg_alloc.DefineValue(overflow_inst, tmp);
+        ctx.EraseInstruction(overflow_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
 void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
    EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
 }
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -501,6 +501,24 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny&
    return {result, overflow};
 }

+ResultAndOverflow<UAny> IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b) {
+    ASSERT(a.GetType() == b.GetType());
+    const auto result = [&]() -> IR::UAny {
+        switch (a.GetType()) {
+        case IR::Type::U16:
+            return Inst<U16>(Opcode::SignedSaturatedDoublingMultiplyReturnHigh16, a, b);
+        case IR::Type::U32:
+            return Inst<U32>(Opcode::SignedSaturatedDoublingMultiplyReturnHigh32, a, b);
+        default:
+            UNREACHABLE();
+            return IR::UAny{};
+        }
+    }();
+
+    const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
+    return {result, overflow};
+}
+
 ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
    ASSERT(a.GetType() == b.GetType());
    const auto result = [&]() -> IR::UAny {
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -143,6 +143,7 @@ public:
    U32U64 MinUnsigned(const U32U64& a, const U32U64& b);

    ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
+    ResultAndOverflow<UAny> SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b);
    ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
    ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
    ResultAndOverflow<UAny> UnsignedSaturatedAdd(const UAny& a, const UAny& b);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -157,6 +157,8 @@ OPCODE(SignedSaturatedAdd8,                                U8,             U8,
 OPCODE(SignedSaturatedAdd16,                               U16,            U16,            U16                                             )
 OPCODE(SignedSaturatedAdd32,                               U32,            U32,            U32                                             )
 OPCODE(SignedSaturatedAdd64,                               U64,            U64,            U64                                             )
+OPCODE(SignedSaturatedDoublingMultiplyReturnHigh16,        U16,            U16,            U16                                             )
+OPCODE(SignedSaturatedDoublingMultiplyReturnHigh32,        U32,            U32,            U32                                             )
 OPCODE(SignedSaturatedSub8,                                U8,             U8,             U8                                              )
 OPCODE(SignedSaturatedSub16,                               U16,            U16,            U16                                             )
 OPCODE(SignedSaturatedSub32,                               U32,            U32,            U32                                             )