IR: Implement VectorLogicalShiftRight
This commit is contained in:
parent
7ff280827b
commit
b22c5961f9
4 changed files with 73 additions and 0 deletions
|
@ -634,6 +634,59 @@ void EmitX64::EmitVectorLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm();
|
||||||
|
Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const u8 shift_amount = args[1].GetImmediateU8();
|
||||||
|
|
||||||
|
// TODO: Optimize
|
||||||
|
code.pcmpeqb(mask, mask); // mask = 0xFF
|
||||||
|
code.paddb(mask, mask); // mask = 0xFE
|
||||||
|
code.pxor(zeros, zeros);
|
||||||
|
for (size_t i = 0; i < shift_amount; ++i) {
|
||||||
|
code.pand(result, mask);
|
||||||
|
code.pavgb(result, zeros);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorLogicalShiftRight16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const u8 shift_amount = args[1].GetImmediateU8();
|
||||||
|
|
||||||
|
code.psrlw(result, shift_amount);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const u8 shift_amount = args[1].GetImmediateU8();
|
||||||
|
|
||||||
|
code.psrld(result, shift_amount);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const u8 shift_amount = args[1].GetImmediateU8();
|
||||||
|
|
||||||
|
code.psrlq(result, shift_amount);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
static void EmitVectorZeroExtend(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) {
|
static void EmitVectorZeroExtend(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
|
|
@ -883,6 +883,21 @@ U128 IREmitter::VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amo
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount) {
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U128>(Opcode::VectorLogicalShiftRight8, a, Imm8(shift_amount));
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorLogicalShiftRight16, a, Imm8(shift_amount));
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorLogicalShiftRight32, a, Imm8(shift_amount));
|
||||||
|
case 64:
|
||||||
|
return Inst<U128>(Opcode::VectorLogicalShiftRight64, a, Imm8(shift_amount));
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorNot(const U128& a) {
|
U128 IREmitter::VectorNot(const U128& a) {
|
||||||
return Inst<U128>(Opcode::VectorNot, a);
|
return Inst<U128>(Opcode::VectorNot, a);
|
||||||
}
|
}
|
||||||
|
|
|
@ -215,6 +215,7 @@ public:
|
||||||
U128 VectorEqual(size_t esize, const U128& a, const U128& b);
|
U128 VectorEqual(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b);
|
U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount);
|
U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount);
|
||||||
|
U128 VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount);
|
||||||
U128 VectorNot(const U128& a);
|
U128 VectorNot(const U128& a);
|
||||||
U128 VectorOr(const U128& a, const U128& b);
|
U128 VectorOr(const U128& a, const U128& b);
|
||||||
U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b);
|
U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b);
|
||||||
|
|
|
@ -224,6 +224,10 @@ OPCODE(VectorLogicalShiftLeft8, T::U128, T::U128, T::U8
|
||||||
OPCODE(VectorLogicalShiftLeft16, T::U128, T::U128, T::U8 )
|
OPCODE(VectorLogicalShiftLeft16, T::U128, T::U128, T::U8 )
|
||||||
OPCODE(VectorLogicalShiftLeft32, T::U128, T::U128, T::U8 )
|
OPCODE(VectorLogicalShiftLeft32, T::U128, T::U128, T::U8 )
|
||||||
OPCODE(VectorLogicalShiftLeft64, T::U128, T::U128, T::U8 )
|
OPCODE(VectorLogicalShiftLeft64, T::U128, T::U128, T::U8 )
|
||||||
|
OPCODE(VectorLogicalShiftRight8, T::U128, T::U128, T::U8 )
|
||||||
|
OPCODE(VectorLogicalShiftRight16, T::U128, T::U128, T::U8 )
|
||||||
|
OPCODE(VectorLogicalShiftRight32, T::U128, T::U128, T::U8 )
|
||||||
|
OPCODE(VectorLogicalShiftRight64, T::U128, T::U128, T::U8 )
|
||||||
OPCODE(VectorNot, T::U128, T::U128 )
|
OPCODE(VectorNot, T::U128, T::U128 )
|
||||||
OPCODE(VectorOr, T::U128, T::U128, T::U128 )
|
OPCODE(VectorOr, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorPairedAddLower8, T::U128, T::U128, T::U128 )
|
OPCODE(VectorPairedAddLower8, T::U128, T::U128, T::U128 )
|
||||||
|
|
Loading…
Reference in a new issue