ir: Add opcodes for performing halving adds
This commit is contained in:
parent
3d00dd63b4
commit
089096948a
4 changed files with 131 additions and 6 deletions
|
@ -357,12 +357,8 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pand);
|
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pand);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitVectorArithmeticShiftRight8(EmitContext& ctx, IR::Inst* inst) {
|
static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
|
||||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
|
||||||
const u8 shift_amount = args[1].GetImmediateU8();
|
|
||||||
|
|
||||||
// TODO: Optimize
|
// TODO: Optimize
|
||||||
code.movdqa(tmp, result);
|
code.movdqa(tmp, result);
|
||||||
|
@ -372,6 +368,15 @@ void EmitX64::EmitVectorArithmeticShiftRight8(EmitContext& ctx, IR::Inst* inst)
|
||||||
code.psllw(result, 8);
|
code.psllw(result, 8);
|
||||||
code.psrlw(tmp, 8);
|
code.psrlw(tmp, 8);
|
||||||
code.por(result, tmp);
|
code.por(result, tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorArithmeticShiftRight8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const u8 shift_amount = args[1].GetImmediateU8();
|
||||||
|
|
||||||
|
ArithmeticShiftRightByte(ctx, code, result, shift_amount);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -758,6 +763,92 @@ void EmitX64::EmitVectorGreaterS64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void EmitVectorHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
code.movdqa(tmp, b);
|
||||||
|
code.pand(tmp, a);
|
||||||
|
code.pxor(a, b);
|
||||||
|
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
ArithmeticShiftRightByte(ctx, code, a, 1);
|
||||||
|
code.paddb(a, tmp);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
code.psraw(a, 1);
|
||||||
|
code.paddw(a, tmp);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
code.psrad(a, 1);
|
||||||
|
code.paddd(a, tmp);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingAddSigned(8, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingAddSigned(16, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingAddS32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingAddSigned(32, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void EmitVectorHalvingAddUnsigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
code.movdqa(tmp, b);
|
||||||
|
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
code.pavgb(tmp, a);
|
||||||
|
code.pxor(a, b);
|
||||||
|
code.pand(a, code.MConst(xword, 0x0101010101010101, 0x0101010101010101));
|
||||||
|
code.psubb(tmp, a);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
code.pavgw(tmp, a);
|
||||||
|
code.pxor(a, b);
|
||||||
|
code.pand(a, code.MConst(xword, 0x0001000100010001, 0x0001000100010001));
|
||||||
|
code.psubw(tmp, a);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
code.pand(tmp, a);
|
||||||
|
code.pxor(a, b);
|
||||||
|
code.psrld(a, 1);
|
||||||
|
code.paddd(tmp, a);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingAddUnsigned(8, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingAddUnsigned(16, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorHalvingAddU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorHalvingAddUnsigned(32, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) {
|
static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
|
|
@ -882,6 +882,32 @@ U128 IREmitter::VectorGreaterUnsigned(size_t esize, const U128& a, const U128& b
|
||||||
return VectorNot(VectorEqual(esize, VectorMinUnsigned(esize, a, b), a));
|
return VectorNot(VectorEqual(esize, VectorMinUnsigned(esize, a, b), a));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorHalvingAddSigned(size_t esize, const U128& a, const U128& b) {
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingAddS8, a, b);
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingAddS16, a, b);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingAddS32, a, b);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorHalvingAddUnsigned(size_t esize, const U128& a, const U128& b) {
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingAddU8, a, b);
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingAddU16, a, b);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorHalvingAddU32, a, b);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorInterleaveLower(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::VectorInterleaveLower(size_t esize, const U128& a, const U128& b) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -206,6 +206,8 @@ public:
|
||||||
U128 VectorGreaterEqualUnsigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorGreaterEqualUnsigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorGreaterSigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorGreaterSigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorGreaterUnsigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorGreaterUnsigned(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 VectorHalvingAddSigned(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 VectorHalvingAddUnsigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b);
|
U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorInterleaveUpper(size_t esize, const U128& a, const U128& b);
|
U128 VectorInterleaveUpper(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorLessEqualSigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorLessEqualSigned(size_t esize, const U128& a, const U128& b);
|
||||||
|
|
|
@ -251,6 +251,12 @@ OPCODE(VectorGreaterS8, T::U128, T::U128, T::U
|
||||||
OPCODE(VectorGreaterS16, T::U128, T::U128, T::U128 )
|
OPCODE(VectorGreaterS16, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorGreaterS32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorGreaterS32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorGreaterS64, T::U128, T::U128, T::U128 )
|
OPCODE(VectorGreaterS64, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingAddS8, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingAddS16, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingAddS32, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingAddU8, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingAddU16, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorHalvingAddU32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorInterleaveLower8, T::U128, T::U128, T::U128 )
|
OPCODE(VectorInterleaveLower8, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorInterleaveLower16, T::U128, T::U128, T::U128 )
|
OPCODE(VectorInterleaveLower16, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorInterleaveLower32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorInterleaveLower32, T::U128, T::U128, T::U128 )
|
||||||
|
|
Loading…
Reference in a new issue