ir: Add IR opcodes for emitting vector shuffles
This uses the ARM terminology for sizes (Halfword -> 2 bytes, Word -> 4 bytes) as opposed to the x86 terminology of (Word -> 2 bytes, Double word -> 4 bytes)
This commit is contained in:
parent
eb2d28d2b1
commit
6b0010c940
4 changed files with 54 additions and 0 deletions
|
@ -1138,6 +1138,42 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum class ShuffleType {
|
||||||
|
LowHalfwords,
|
||||||
|
HighHalfwords,
|
||||||
|
Words
|
||||||
|
};
|
||||||
|
|
||||||
|
static void VectorShuffleImpl(ShuffleType type, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const u8 mask = args[1].GetImmediateU8();
|
||||||
|
|
||||||
|
if (type == ShuffleType::LowHalfwords) {
|
||||||
|
code.pshuflw(result, operand, mask);
|
||||||
|
} else if (type == ShuffleType::HighHalfwords) {
|
||||||
|
code.pshufhw(result, operand, mask);
|
||||||
|
} else {
|
||||||
|
code.pshufw(result, operand, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorShuffleHighHalfwords(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
VectorShuffleImpl(ShuffleType::HighHalfwords, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorShuffleLowHalfwords(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
VectorShuffleImpl(ShuffleType::LowHalfwords, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorShuffleWords(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
VectorShuffleImpl(ShuffleType::Words, ctx, inst, code);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
|
|
@ -1103,6 +1103,18 @@ U128 IREmitter::VectorPopulationCount(const U128& a) {
|
||||||
return Inst<U128>(Opcode::VectorPopulationCount, a);
|
return Inst<U128>(Opcode::VectorPopulationCount, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorShuffleHighHalfwords(const U128& a, u8 mask) {
|
||||||
|
return Inst<U128>(Opcode::VectorShuffleHighHalfwords, a, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorShuffleLowHalfwords(const U128& a, u8 mask) {
|
||||||
|
return Inst<U128>(Opcode::VectorShuffleLowHalfwords, a, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorShuffleWords(const U128& a, u8 mask) {
|
||||||
|
return Inst<U128>(Opcode::VectorShuffleWords, a, mask);
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorSignExtend(size_t original_esize, const U128& a) {
|
U128 IREmitter::VectorSignExtend(size_t original_esize, const U128& a) {
|
||||||
switch (original_esize) {
|
switch (original_esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -238,6 +238,9 @@ public:
|
||||||
U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b);
|
U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorPopulationCount(const U128& a);
|
U128 VectorPopulationCount(const U128& a);
|
||||||
|
U128 VectorShuffleHighHalfwords(const U128& a, u8 mask);
|
||||||
|
U128 VectorShuffleLowHalfwords(const U128& a, u8 mask);
|
||||||
|
U128 VectorShuffleWords(const U128& a, u8 mask);
|
||||||
U128 VectorSignExtend(size_t original_esize, const U128& a);
|
U128 VectorSignExtend(size_t original_esize, const U128& a);
|
||||||
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
||||||
|
|
|
@ -287,6 +287,9 @@ OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128
|
||||||
OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
|
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorPopulationCount, T::U128, T::U128 )
|
OPCODE(VectorPopulationCount, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorShuffleHighHalfwords, T::U128, T::U128, T::U8 )
|
||||||
|
OPCODE(VectorShuffleLowHalfwords, T::U128, T::U128, T::U8 )
|
||||||
|
OPCODE(VectorShuffleWords, T::U128, T::U128, T::U8 )
|
||||||
OPCODE(VectorSignExtend8, T::U128, T::U128 )
|
OPCODE(VectorSignExtend8, T::U128, T::U128 )
|
||||||
OPCODE(VectorSignExtend16, T::U128, T::U128 )
|
OPCODE(VectorSignExtend16, T::U128, T::U128 )
|
||||||
OPCODE(VectorSignExtend32, T::U128, T::U128 )
|
OPCODE(VectorSignExtend32, T::U128, T::U128 )
|
||||||
|
|
Loading…
Reference in a new issue