ir: Add opcodes for performing vector unsigned absolute differences
This commit is contained in:
parent
7780af56e3
commit
ad5cf584ce
4 changed files with 439 additions and 368 deletions
|
@ -1285,6 +1285,61 @@ void EmitX64::EmitVectorSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq);
|
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
code.movdqa(temp, x);
|
||||||
|
code.psubusb(temp, y);
|
||||||
|
code.psubusb(y, x);
|
||||||
|
code.por(temp, y);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
code.movdqa(temp, x);
|
||||||
|
code.psubusw(temp, y);
|
||||||
|
code.psubusw(y, x);
|
||||||
|
code.por(temp, y);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
code.movdqa(temp, x);
|
||||||
|
code.pminud(x, y);
|
||||||
|
code.pmaxud(temp, y);
|
||||||
|
code.psubd(temp, x);
|
||||||
|
} else {
|
||||||
|
code.movdqa(temp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
||||||
|
code.pxor(x, temp);
|
||||||
|
code.pxor(y, temp);
|
||||||
|
code.movdqa(temp, x);
|
||||||
|
code.psubd(temp, y);
|
||||||
|
code.pcmpgtd(y, x);
|
||||||
|
code.psrld(y, 1);
|
||||||
|
code.pxor(temp, y);
|
||||||
|
code.psubd(temp, y);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorUnsignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorUnsignedAbsoluteDifference(8, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorUnsignedAbsoluteDifference16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorUnsignedAbsoluteDifference(16, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorUnsignedAbsoluteDifference32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorUnsignedAbsoluteDifference(32, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
|
@ -1160,6 +1160,19 @@ U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) {
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U128>(Opcode::VectorUnsignedAbsoluteDifference8, a, b);
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorUnsignedAbsoluteDifference16, a, b);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorUnsignedAbsoluteDifference32, a, b);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) {
|
U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) {
|
||||||
switch (original_esize) {
|
switch (original_esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -244,6 +244,7 @@ public:
|
||||||
U128 VectorShuffleWords(const U128& a, u8 mask);
|
U128 VectorShuffleWords(const U128& a, u8 mask);
|
||||||
U128 VectorSignExtend(size_t original_esize, const U128& a);
|
U128 VectorSignExtend(size_t original_esize, const U128& a);
|
||||||
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
||||||
U128 VectorZeroUpper(const U128& a);
|
U128 VectorZeroUpper(const U128& a);
|
||||||
U128 ZeroVector();
|
U128 ZeroVector();
|
||||||
|
|
|
@ -75,7 +75,7 @@ A64OPC(GetTPIDRRO, T::U64,
|
||||||
// Hints
|
// Hints
|
||||||
OPCODE(PushRSB, T::Void, T::U64 )
|
OPCODE(PushRSB, T::Void, T::U64 )
|
||||||
|
|
||||||
// Pseudo-operation, handled specially at final emit
|
// Pseudo-operation, handled special ly at final emit
|
||||||
OPCODE(GetCarryFromOp, T::U1, T::U32 )
|
OPCODE(GetCarryFromOp, T::U1, T::U32 )
|
||||||
OPCODE(GetOverflowFromOp, T::U1, T::U32 )
|
OPCODE(GetOverflowFromOp, T::U1, T::U32 )
|
||||||
OPCODE(GetGEFromOp, T::U32, T::U32 )
|
OPCODE(GetGEFromOp, T::U32, T::U32 )
|
||||||
|
@ -202,7 +202,6 @@ OPCODE(AESDecryptSingleRound, T::U128, T::U128
|
||||||
OPCODE(AESEncryptSingleRound, T::U128, T::U128 )
|
OPCODE(AESEncryptSingleRound, T::U128, T::U128 )
|
||||||
OPCODE(AESInverseMixColumns, T::U128, T::U128 )
|
OPCODE(AESInverseMixColumns, T::U128, T::U128 )
|
||||||
OPCODE(AESMixColumns, T::U128, T::U128 )
|
OPCODE(AESMixColumns, T::U128, T::U128 )
|
||||||
|
|
||||||
// Vector instructions
|
// Vector instructions
|
||||||
OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 )
|
OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 )
|
||||||
OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 )
|
OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 )
|
||||||
|
@ -218,9 +217,9 @@ OPCODE(VectorAdd32, T::U128, T::U128, T::U128
|
||||||
OPCODE(VectorAdd64, T::U128, T::U128, T::U128 )
|
OPCODE(VectorAdd64, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorAnd, T::U128, T::U128, T::U128 )
|
OPCODE(VectorAnd, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorArithmeticShiftRight8, T::U128, T::U128, T::U8 )
|
OPCODE(VectorArithmeticShiftRight8, T::U128, T::U128, T::U8 )
|
||||||
OPCODE(VectorArithmeticShiftRight16,T::U128, T::U128, T::U8 )
|
OPCODE(VectorArithmeticShiftRight16, T::U128, T::U128, T::U8 )
|
||||||
OPCODE(VectorArithmeticShiftRight32,T::U128, T::U128, T::U8 )
|
OPCODE(VectorArithmeticShiftRight32, T::U128, T::U128, T::U8 )
|
||||||
OPCODE(VectorArithmeticShiftRight64,T::U128, T::U128, T::U8 )
|
OPCODE(VectorArithmeticShiftRight64, T::U128, T::U128, T::U8 )
|
||||||
OPCODE(VectorBroadcastLower8, T::U128, T::U8 )
|
OPCODE(VectorBroadcastLower8, T::U128, T::U8 )
|
||||||
OPCODE(VectorBroadcastLower16, T::U128, T::U16 )
|
OPCODE(VectorBroadcastLower16, T::U128, T::U16 )
|
||||||
OPCODE(VectorBroadcastLower32, T::U128, T::U32 )
|
OPCODE(VectorBroadcastLower32, T::U128, T::U32 )
|
||||||
|
@ -302,6 +301,9 @@ OPCODE(VectorSub8, T::U128, T::U128, T::U128
|
||||||
OPCODE(VectorSub16, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSub16, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorSub32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSub32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorSub64, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSub64, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorUnsignedAbsoluteDifference8, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorUnsignedAbsoluteDifference16, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorUnsignedAbsoluteDifference32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorZeroExtend8, T::U128, T::U128 )
|
OPCODE(VectorZeroExtend8, T::U128, T::U128 )
|
||||||
OPCODE(VectorZeroExtend16, T::U128, T::U128 )
|
OPCODE(VectorZeroExtend16, T::U128, T::U128 )
|
||||||
OPCODE(VectorZeroExtend32, T::U128, T::U128 )
|
OPCODE(VectorZeroExtend32, T::U128, T::U128 )
|
||||||
|
|
Loading…
Reference in a new issue