ir: Add opcodes for performing vector unsigned absolute differences

2018-04-01 14:21:14 -04:00 · 2018-04-01 14:21:14 -04:00 · ad5cf584ce
commit ad5cf584ce
parent 7780af56e3
4 changed files with 439 additions and 368 deletions
--- a/src/backend_x64/emit_x64_vector.cpp
+++ b/src/backend_x64/emit_x64_vector.cpp
@ -1285,6 +1285,61 @@ void EmitX64::EmitVectorSub64(EmitContext& ctx, IR::Inst* inst) {
    EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq);
 }
 static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm();
    const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
    const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
    switch (esize) {
    case 8:
        code.movdqa(temp, x);
        code.psubusb(temp, y);
        code.psubusb(y, x);
        code.por(temp, y);
        break;
    case 16:
        code.movdqa(temp, x);
        code.psubusw(temp, y);
        code.psubusw(y, x);
        code.por(temp, y);
        break;
    case 32:
        if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
            code.movdqa(temp, x);
            code.pminud(x, y);
            code.pmaxud(temp, y);
            code.psubd(temp, x);
        } else {
            code.movdqa(temp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
            code.pxor(x, temp);
            code.pxor(y, temp);
            code.movdqa(temp, x);
            code.psubd(temp, y);
            code.pcmpgtd(y, x);
            code.psrld(y, 1);
            code.pxor(temp, y);
            code.psubd(temp, y);
        }
        break;
    }
    ctx.reg_alloc.DefineValue(inst, temp);
 }
 void EmitX64::EmitVectorUnsignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) {
    EmitVectorUnsignedAbsoluteDifference(8, ctx, inst, code);
 }
 void EmitX64::EmitVectorUnsignedAbsoluteDifference16(EmitContext& ctx, IR::Inst* inst) {
    EmitVectorUnsignedAbsoluteDifference(16, ctx, inst, code);
 }
 void EmitX64::EmitVectorUnsignedAbsoluteDifference32(EmitContext& ctx, IR::Inst* inst) {
    EmitVectorUnsignedAbsoluteDifference(32, ctx, inst, code);
 }
 void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -1160,6 +1160,19 @@ U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
    return {};
 }
 U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) {
    switch (esize) {
    case 8:
        return Inst<U128>(Opcode::VectorUnsignedAbsoluteDifference8, a, b);
    case 16:
        return Inst<U128>(Opcode::VectorUnsignedAbsoluteDifference16, a, b);
    case 32:
        return Inst<U128>(Opcode::VectorUnsignedAbsoluteDifference32, a, b);
    }
    UNREACHABLE();
    return {};
 }
 U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) {
    switch (original_esize) {
    case 8:
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -244,6 +244,7 @@ public:
    U128 VectorShuffleWords(const U128& a, u8 mask);
    U128 VectorSignExtend(size_t original_esize, const U128& a);
    U128 VectorSub(size_t esize, const U128& a, const U128& b);
    U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
    U128 VectorZeroExtend(size_t original_esize, const U128& a);
    U128 VectorZeroUpper(const U128& a);
    U128 ZeroVector();
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -75,7 +75,7 @@ A64OPC(GetTPIDRRO,                  T::U64,
 // Hints
 OPCODE(PushRSB,                             T::Void,        T::U64                                          )
-// Pseudo-operation, handled specially at final emit
+// Pseudo-operation, handled special        ly at final emit
 OPCODE(GetCarryFromOp,                      T::U1,          T::U32                                          )
 OPCODE(GetOverflowFromOp,                   T::U1,          T::U32                                          )
 OPCODE(GetGEFromOp,                         T::U32,         T::U32                                          )
@ -202,7 +202,6 @@ OPCODE(AESDecryptSingleRound,       T::U128,        T::U128
 OPCODE(AESEncryptSingleRound,               T::U128,        T::U128                                         )
 OPCODE(AESInverseMixColumns,                T::U128,        T::U128                                         )
 OPCODE(AESMixColumns,                       T::U128,        T::U128                                         )
 // Vector instructions
 OPCODE(VectorGetElement8,                   T::U8,          T::U128,        T::U8                           )
 OPCODE(VectorGetElement16,                  T::U16,         T::U128,        T::U8                           )
@ -218,9 +217,9 @@ OPCODE(VectorAdd32,                 T::U128,        T::U128,        T::U128
 OPCODE(VectorAdd64,                         T::U128,        T::U128,        T::U128                         )
 OPCODE(VectorAnd,                           T::U128,        T::U128,        T::U128                         )
 OPCODE(VectorArithmeticShiftRight8,         T::U128,        T::U128,        T::U8                           )
-OPCODE(VectorArithmeticShiftRight16,T::U128,        T::U128,        T::U8                           )
+OPCODE(VectorArithmeticShiftRight16,        T::U128,        T::U128,        T::U8                           )
-OPCODE(VectorArithmeticShiftRight32,T::U128,        T::U128,        T::U8                           )
+OPCODE(VectorArithmeticShiftRight32,        T::U128,        T::U128,        T::U8                           )
-OPCODE(VectorArithmeticShiftRight64,T::U128,        T::U128,        T::U8                           )
+OPCODE(VectorArithmeticShiftRight64,        T::U128,        T::U128,        T::U8                           )
 OPCODE(VectorBroadcastLower8,               T::U128,        T::U8                                           )
 OPCODE(VectorBroadcastLower16,              T::U128,        T::U16                                          )
 OPCODE(VectorBroadcastLower32,              T::U128,        T::U32                                          )
@ -302,6 +301,9 @@ OPCODE(VectorSub8,                  T::U128,        T::U128,        T::U128
 OPCODE(VectorSub16,                         T::U128,        T::U128,        T::U128                         )
 OPCODE(VectorSub32,                         T::U128,        T::U128,        T::U128                         )
 OPCODE(VectorSub64,                         T::U128,        T::U128,        T::U128                         )
 OPCODE(VectorUnsignedAbsoluteDifference8,   T::U128,        T::U128,        T::U128                         )
 OPCODE(VectorUnsignedAbsoluteDifference16,  T::U128,        T::U128,        T::U128                         )
 OPCODE(VectorUnsignedAbsoluteDifference32,  T::U128,        T::U128,        T::U128                         )
 OPCODE(VectorZeroExtend8,                   T::U128,        T::U128                                         )
 OPCODE(VectorZeroExtend16,                  T::U128,        T::U128                                         )
 OPCODE(VectorZeroExtend32,                  T::U128,        T::U128                                         )