ir: Add opcodes for performing rounding halving adds
This commit is contained in:
parent
054549da35
commit
bc718c5b28
4 changed files with 135 additions and 0 deletions
|
@ -1789,6 +1789,105 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, data);
|
ctx.reg_alloc.DefineValue(inst, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
switch (esize) {
|
||||||
|
case 8: {
|
||||||
|
const Xbyak::Xmm vec_128 = ctx.reg_alloc.ScratchXmm();
|
||||||
|
code.movdqa(vec_128, code.MConst(xword, 0x8080808080808080, 0x8080808080808080));
|
||||||
|
|
||||||
|
code.paddb(a, vec_128);
|
||||||
|
code.paddb(b, vec_128);
|
||||||
|
code.pavgb(a, b);
|
||||||
|
code.paddb(a, vec_128);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 16: {
|
||||||
|
const Xbyak::Xmm vec_32768 = ctx.reg_alloc.ScratchXmm();
|
||||||
|
code.movdqa(vec_32768, code.MConst(xword, 0x8000800080008000, 0x8000800080008000));
|
||||||
|
|
||||||
|
code.paddw(a, vec_32768);
|
||||||
|
code.paddw(b, vec_32768);
|
||||||
|
code.pavgw(a, b);
|
||||||
|
code.paddw(a, vec_32768);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 32: {
|
||||||
|
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
|
||||||
|
code.movdqa(tmp1, a);
|
||||||
|
|
||||||
|
code.por(a, b);
|
||||||
|
code.psrad(tmp1, 1);
|
||||||
|
code.psrad(b, 1);
|
||||||
|
code.pslld(a, 31);
|
||||||
|
code.paddd(b, tmp1);
|
||||||
|
code.psrld(a, 31);
|
||||||
|
code.paddd(a, b);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorRoundingHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorRoundingHalvingAddSigned(8, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorRoundingHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorRoundingHalvingAddSigned(16, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorRoundingHalvingAddS32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorRoundingHalvingAddSigned(32, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void EmitVectorRoundingHalvingAddUnsigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pavgb);
|
||||||
|
return;
|
||||||
|
case 16:
|
||||||
|
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pavgw);
|
||||||
|
return;
|
||||||
|
case 32: {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
code.movdqa(tmp1, a);
|
||||||
|
|
||||||
|
code.por(a, b);
|
||||||
|
code.psrld(tmp1, 1);
|
||||||
|
code.psrld(b, 1);
|
||||||
|
code.pslld(a, 31);
|
||||||
|
code.paddd(b, tmp1);
|
||||||
|
code.psrld(a, 31);
|
||||||
|
code.paddd(a, b);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorRoundingHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorRoundingHalvingAddUnsigned(8, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorRoundingHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorRoundingHalvingAddUnsigned(16, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorRoundingHalvingAddU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorRoundingHalvingAddUnsigned(32, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
enum class ShuffleType {
|
enum class ShuffleType {
|
||||||
LowHalfwords,
|
LowHalfwords,
|
||||||
HighHalfwords,
|
HighHalfwords,
|
||||||
|
|
|
@ -1198,6 +1198,34 @@ U128 IREmitter::VectorRotateRight(size_t esize, const U128& a, u8 amount) {
|
||||||
VectorLogicalShiftLeft(esize, a, static_cast<u8>(esize - amount)));
|
VectorLogicalShiftLeft(esize, a, static_cast<u8>(esize - amount)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorRoundingHalvingAddSigned(size_t esize, const U128& a, const U128& b) {
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U128>(Opcode::VectorRoundingHalvingAddS8, a, b);
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorRoundingHalvingAddS16, a, b);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorRoundingHalvingAddS32, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorRoundingHalvingAddUnsigned(size_t esize, const U128& a, const U128& b) {
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U128>(Opcode::VectorRoundingHalvingAddU8, a, b);
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorRoundingHalvingAddU16, a, b);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorRoundingHalvingAddU32, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorShuffleHighHalfwords(const U128& a, u8 mask) {
|
U128 IREmitter::VectorShuffleHighHalfwords(const U128& a, u8 mask) {
|
||||||
return Inst<U128>(Opcode::VectorShuffleHighHalfwords, a, mask);
|
return Inst<U128>(Opcode::VectorShuffleHighHalfwords, a, mask);
|
||||||
}
|
}
|
||||||
|
|
|
@ -236,6 +236,8 @@ public:
|
||||||
U128 VectorReverseBits(const U128& a);
|
U128 VectorReverseBits(const U128& a);
|
||||||
U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount);
|
U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount);
|
||||||
U128 VectorRotateRight(size_t esize, const U128& a, u8 amount);
|
U128 VectorRotateRight(size_t esize, const U128& a, u8 amount);
|
||||||
|
U128 VectorRoundingHalvingAddSigned(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 VectorRoundingHalvingAddUnsigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorShuffleHighHalfwords(const U128& a, u8 mask);
|
U128 VectorShuffleHighHalfwords(const U128& a, u8 mask);
|
||||||
U128 VectorShuffleLowHalfwords(const U128& a, u8 mask);
|
U128 VectorShuffleLowHalfwords(const U128& a, u8 mask);
|
||||||
U128 VectorShuffleWords(const U128& a, u8 mask);
|
U128 VectorShuffleWords(const U128& a, u8 mask);
|
||||||
|
|
|
@ -325,6 +325,12 @@ OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U
|
||||||
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
|
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorPopulationCount, T::U128, T::U128 )
|
OPCODE(VectorPopulationCount, T::U128, T::U128 )
|
||||||
OPCODE(VectorReverseBits, T::U128, T::U128 )
|
OPCODE(VectorReverseBits, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorRoundingHalvingAddS8, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorRoundingHalvingAddS16, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorRoundingHalvingAddS32, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorRoundingHalvingAddU8, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorRoundingHalvingAddU16, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorRoundingHalvingAddU32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorShuffleHighHalfwords, T::U128, T::U128, T::U8 )
|
OPCODE(VectorShuffleHighHalfwords, T::U128, T::U128, T::U8 )
|
||||||
OPCODE(VectorShuffleLowHalfwords, T::U128, T::U128, T::U8 )
|
OPCODE(VectorShuffleLowHalfwords, T::U128, T::U128, T::U8 )
|
||||||
OPCODE(VectorShuffleWords, T::U128, T::U128, T::U8 )
|
OPCODE(VectorShuffleWords, T::U128, T::U128, T::U8 )
|
||||||
|
|
Loading…
Reference in a new issue