ir: Add opcodes for performing vector halving subtracts
This commit is contained in:
parent
4f37c0ec5a
commit
44a5f8095a
4 changed files with 120 additions and 0 deletions
|
@ -849,6 +849,92 @@ void EmitX64::EmitVectorHalvingAddU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitVectorHalvingAddUnsigned(32, ctx, inst, code);
|
EmitVectorHalvingAddUnsigned(32, ctx, inst, code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
switch (esize) {
|
||||||
|
case 8: {
|
||||||
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
code.movdqa(tmp, code.MConst(xword, 0x8080808080808080, 0x8080808080808080));
|
||||||
|
code.pxor(a, tmp);
|
||||||
|
code.pxor(b, tmp);
|
||||||
|
code.pavgb(b, a);
|
||||||
|
code.psubb(a, b);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 16: {
|
||||||
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
code.movdqa(tmp, code.MConst(xword, 0x8000800080008000, 0x8000800080008000));
|
||||||
|
code.pxor(a, tmp);
|
||||||
|
code.pxor(b, tmp);
|
||||||
|
code.pavgw(b, a);
|
||||||
|
code.psubw(a, b);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 32:
|
||||||
|
code.pxor(a, b);
|
||||||
|
code.pand(b, a);
|
||||||
|
code.psrad(a, 1);
|
||||||
|
code.psubd(a, b);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingSubSigned(8, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingSubSigned(16, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingSubS32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingSubSigned(32, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void EmitVectorHalvingSubUnsigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
code.pavgb(b, a);
|
||||||
|
code.psubb(a, b);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
code.pavgw(b, a);
|
||||||
|
code.psubw(a, b);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
code.pxor(a, b);
|
||||||
|
code.pand(b, a);
|
||||||
|
code.psrld(a, 1);
|
||||||
|
code.psubd(a, b);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingSubUnsigned(8, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingSubUnsigned(16, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingSubU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingSubUnsigned(32, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) {
|
static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
|
|
@ -912,6 +912,32 @@ U128 IREmitter::VectorHalvingAddUnsigned(size_t esize, const U128& a, const U128
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorHalvingSubSigned(size_t esize, const U128& a, const U128& b) {
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingSubS8, a, b);
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingSubS16, a, b);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingSubS32, a, b);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorHalvingSubUnsigned(size_t esize, const U128& a, const U128& b) {
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingSubU8, a, b);
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingSubU16, a, b);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingSubU32, a, b);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorInterleaveLower(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::VectorInterleaveLower(size_t esize, const U128& a, const U128& b) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -210,6 +210,8 @@ public:
|
||||||
U128 VectorGreaterUnsigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorGreaterUnsigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorHalvingAddSigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorHalvingAddSigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorHalvingAddUnsigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorHalvingAddUnsigned(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 VectorHalvingSubSigned(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 VectorHalvingSubUnsigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b);
|
U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorInterleaveUpper(size_t esize, const U128& a, const U128& b);
|
U128 VectorInterleaveUpper(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorLessEqualSigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorLessEqualSigned(size_t esize, const U128& a, const U128& b);
|
||||||
|
|
|
@ -261,6 +261,12 @@ OPCODE(VectorHalvingAddS32, T::U128, T::U128, T::U
|
||||||
OPCODE(VectorHalvingAddU8, T::U128, T::U128, T::U128 )
|
OPCODE(VectorHalvingAddU8, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorHalvingAddU16, T::U128, T::U128, T::U128 )
|
OPCODE(VectorHalvingAddU16, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorHalvingAddU32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorHalvingAddU32, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingSubS8, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingSubS16, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingSubS32, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingSubU8, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingSubU16, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingSubU32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorInterleaveLower8, T::U128, T::U128, T::U128 )
|
OPCODE(VectorInterleaveLower8, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorInterleaveLower16, T::U128, T::U128, T::U128 )
|
OPCODE(VectorInterleaveLower16, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorInterleaveLower32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorInterleaveLower32, T::U128, T::U128, T::U128 )
|
||||||
|
|
Loading…
Reference in a new issue