ir: Add opcodes for performing vector unsigned absolute differences

This commit is contained in:
Lioncash 2018-04-01 14:21:14 -04:00 committed by MerryMage
parent 7780af56e3
commit ad5cf584ce
4 changed files with 439 additions and 368 deletions

View file

@ -1285,6 +1285,61 @@ void EmitX64::EmitVectorSub64(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq); EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq);
} }
static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
switch (esize) {
case 8:
code.movdqa(temp, x);
code.psubusb(temp, y);
code.psubusb(y, x);
code.por(temp, y);
break;
case 16:
code.movdqa(temp, x);
code.psubusw(temp, y);
code.psubusw(y, x);
code.por(temp, y);
break;
case 32:
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
code.movdqa(temp, x);
code.pminud(x, y);
code.pmaxud(temp, y);
code.psubd(temp, x);
} else {
code.movdqa(temp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
code.pxor(x, temp);
code.pxor(y, temp);
code.movdqa(temp, x);
code.psubd(temp, y);
code.pcmpgtd(y, x);
code.psrld(y, 1);
code.pxor(temp, y);
code.psubd(temp, y);
}
break;
}
ctx.reg_alloc.DefineValue(inst, temp);
}
void EmitX64::EmitVectorUnsignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) {
EmitVectorUnsignedAbsoluteDifference(8, ctx, inst, code);
}
void EmitX64::EmitVectorUnsignedAbsoluteDifference16(EmitContext& ctx, IR::Inst* inst) {
EmitVectorUnsignedAbsoluteDifference(16, ctx, inst, code);
}
void EmitX64::EmitVectorUnsignedAbsoluteDifference32(EmitContext& ctx, IR::Inst* inst) {
EmitVectorUnsignedAbsoluteDifference(32, ctx, inst, code);
}
void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);

View file

@ -1160,6 +1160,19 @@ U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
return {}; return {};
} }
U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorUnsignedAbsoluteDifference8, a, b);
case 16:
return Inst<U128>(Opcode::VectorUnsignedAbsoluteDifference16, a, b);
case 32:
return Inst<U128>(Opcode::VectorUnsignedAbsoluteDifference32, a, b);
}
UNREACHABLE();
return {};
}
U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) { U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) {
switch (original_esize) { switch (original_esize) {
case 8: case 8:

View file

@ -244,6 +244,7 @@ public:
U128 VectorShuffleWords(const U128& a, u8 mask); U128 VectorShuffleWords(const U128& a, u8 mask);
U128 VectorSignExtend(size_t original_esize, const U128& a); U128 VectorSignExtend(size_t original_esize, const U128& a);
U128 VectorSub(size_t esize, const U128& a, const U128& b); U128 VectorSub(size_t esize, const U128& a, const U128& b);
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
U128 VectorZeroExtend(size_t original_esize, const U128& a); U128 VectorZeroExtend(size_t original_esize, const U128& a);
U128 VectorZeroUpper(const U128& a); U128 VectorZeroUpper(const U128& a);
U128 ZeroVector(); U128 ZeroVector();

View file

@ -75,7 +75,7 @@ A64OPC(GetTPIDRRO, T::U64,
// Hints // Hints
OPCODE(PushRSB, T::Void, T::U64 ) OPCODE(PushRSB, T::Void, T::U64 )
// Pseudo-operation, handled specially at final emit // Pseudo-operation, handled special ly at final emit
OPCODE(GetCarryFromOp, T::U1, T::U32 ) OPCODE(GetCarryFromOp, T::U1, T::U32 )
OPCODE(GetOverflowFromOp, T::U1, T::U32 ) OPCODE(GetOverflowFromOp, T::U1, T::U32 )
OPCODE(GetGEFromOp, T::U32, T::U32 ) OPCODE(GetGEFromOp, T::U32, T::U32 )
@ -202,7 +202,6 @@ OPCODE(AESDecryptSingleRound, T::U128, T::U128
OPCODE(AESEncryptSingleRound, T::U128, T::U128 ) OPCODE(AESEncryptSingleRound, T::U128, T::U128 )
OPCODE(AESInverseMixColumns, T::U128, T::U128 ) OPCODE(AESInverseMixColumns, T::U128, T::U128 )
OPCODE(AESMixColumns, T::U128, T::U128 ) OPCODE(AESMixColumns, T::U128, T::U128 )
// Vector instructions // Vector instructions
OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 ) OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 )
OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 ) OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 )
@ -218,9 +217,9 @@ OPCODE(VectorAdd32, T::U128, T::U128, T::U128
OPCODE(VectorAdd64, T::U128, T::U128, T::U128 ) OPCODE(VectorAdd64, T::U128, T::U128, T::U128 )
OPCODE(VectorAnd, T::U128, T::U128, T::U128 ) OPCODE(VectorAnd, T::U128, T::U128, T::U128 )
OPCODE(VectorArithmeticShiftRight8, T::U128, T::U128, T::U8 ) OPCODE(VectorArithmeticShiftRight8, T::U128, T::U128, T::U8 )
OPCODE(VectorArithmeticShiftRight16,T::U128, T::U128, T::U8 ) OPCODE(VectorArithmeticShiftRight16, T::U128, T::U128, T::U8 )
OPCODE(VectorArithmeticShiftRight32,T::U128, T::U128, T::U8 ) OPCODE(VectorArithmeticShiftRight32, T::U128, T::U128, T::U8 )
OPCODE(VectorArithmeticShiftRight64,T::U128, T::U128, T::U8 ) OPCODE(VectorArithmeticShiftRight64, T::U128, T::U128, T::U8 )
OPCODE(VectorBroadcastLower8, T::U128, T::U8 ) OPCODE(VectorBroadcastLower8, T::U128, T::U8 )
OPCODE(VectorBroadcastLower16, T::U128, T::U16 ) OPCODE(VectorBroadcastLower16, T::U128, T::U16 )
OPCODE(VectorBroadcastLower32, T::U128, T::U32 ) OPCODE(VectorBroadcastLower32, T::U128, T::U32 )
@ -302,6 +301,9 @@ OPCODE(VectorSub8, T::U128, T::U128, T::U128
OPCODE(VectorSub16, T::U128, T::U128, T::U128 ) OPCODE(VectorSub16, T::U128, T::U128, T::U128 )
OPCODE(VectorSub32, T::U128, T::U128, T::U128 ) OPCODE(VectorSub32, T::U128, T::U128, T::U128 )
OPCODE(VectorSub64, T::U128, T::U128, T::U128 ) OPCODE(VectorSub64, T::U128, T::U128, T::U128 )
OPCODE(VectorUnsignedAbsoluteDifference8, T::U128, T::U128, T::U128 )
OPCODE(VectorUnsignedAbsoluteDifference16, T::U128, T::U128, T::U128 )
OPCODE(VectorUnsignedAbsoluteDifference32, T::U128, T::U128, T::U128 )
OPCODE(VectorZeroExtend8, T::U128, T::U128 ) OPCODE(VectorZeroExtend8, T::U128, T::U128 )
OPCODE(VectorZeroExtend16, T::U128, T::U128 ) OPCODE(VectorZeroExtend16, T::U128, T::U128 )
OPCODE(VectorZeroExtend32, T::U128, T::U128 ) OPCODE(VectorZeroExtend32, T::U128, T::U128 )