ir: Add opcodes for scalar signed saturated doubling multiplies

2018-09-05 19:16:41 -04:00 · 2018-09-05 19:16:41 -04:00 · 7ebfd0f31c
commit 7ebfd0f31c
parent 9c03311fed
4 changed files with 81 additions and 0 deletions
--- a/src/backend/x64/emit_x64_saturation.cpp
+++ b/src/backend/x64/emit_x64_saturation.cpp
@ -134,6 +134,66 @@ void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
    EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
 }
 void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
    auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
    const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
    const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
    code.movsx(x, x.cvt16());
    code.movsx(y, y.cvt16());
    code.imul(x, y);
    code.lea(y, ptr[x.cvt64() + x.cvt64()]);
    code.mov(tmp, x);
    code.shr(tmp, 15);
    code.xor_(y, x);
    code.mov(y, 0x7FFF);
    code.cmovns(y, tmp);
    if (overflow_inst) {
        code.sets(tmp.cvt8());
        ctx.reg_alloc.DefineValue(overflow_inst, tmp);
        ctx.EraseInstruction(overflow_inst);
    }
    ctx.reg_alloc.DefineValue(inst, y);
 }
 void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
    auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
    const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
    const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
    code.movsxd(x, x.cvt32());
    code.movsxd(y, y.cvt32());
    code.imul(x, y);
    code.lea(y, ptr[x + x]);
    code.mov(tmp, x);
    code.shr(tmp, 31);
    code.xor_(y, x);
    code.mov(y.cvt32(), 0x7FFFFFFF);
    code.cmovns(y.cvt32(), tmp.cvt32());
    if (overflow_inst) {
        code.sets(tmp.cvt8());
        ctx.reg_alloc.DefineValue(overflow_inst, tmp);
        ctx.EraseInstruction(overflow_inst);
    }
    ctx.reg_alloc.DefineValue(inst, y);
 }
 void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
    EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
 }
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -501,6 +501,24 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny&
    return {result, overflow};
 }
 ResultAndOverflow<UAny> IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b) {
    ASSERT(a.GetType() == b.GetType());
    const auto result = [&]() -> IR::UAny {
        switch (a.GetType()) {
        case IR::Type::U16:
            return Inst<U16>(Opcode::SignedSaturatedDoublingMultiplyReturnHigh16, a, b);
        case IR::Type::U32:
            return Inst<U32>(Opcode::SignedSaturatedDoublingMultiplyReturnHigh32, a, b);
        default:
            UNREACHABLE();
            return IR::UAny{};
        }
    }();
    const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
    return {result, overflow};
 }
 ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
    ASSERT(a.GetType() == b.GetType());
    const auto result = [&]() -> IR::UAny {
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -143,6 +143,7 @@ public:
    U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
    ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
    ResultAndOverflow<UAny> SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b);
    ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
    ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
    ResultAndOverflow<UAny> UnsignedSaturatedAdd(const UAny& a, const UAny& b);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -157,6 +157,8 @@ OPCODE(SignedSaturatedAdd8,                                U8,             U8,
 OPCODE(SignedSaturatedAdd16,                               U16,            U16,            U16                                             )
 OPCODE(SignedSaturatedAdd32,                               U32,            U32,            U32                                             )
 OPCODE(SignedSaturatedAdd64,                               U64,            U64,            U64                                             )
 OPCODE(SignedSaturatedDoublingMultiplyReturnHigh16,        U16,            U16,            U16                                             )
 OPCODE(SignedSaturatedDoublingMultiplyReturnHigh32,        U32,            U32,            U32                                             )
 OPCODE(SignedSaturatedSub8,                                U8,             U8,             U8                                              )
 OPCODE(SignedSaturatedSub16,                               U16,            U16,            U16                                             )
 OPCODE(SignedSaturatedSub32,                               U32,            U32,            U32                                             )