IR: Add IR instruction VectorGetElement{8,16,32,64}
This commit is contained in:
parent
28ccd85e5c
commit
e00a522cba
4 changed files with 95 additions and 0 deletions
|
@ -17,6 +17,79 @@ namespace BackendX64 {
|
||||||
|
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
ASSERT(args[1].IsImmediate());
|
||||||
|
u8 index = args[1].GetImmediateU8();
|
||||||
|
|
||||||
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
|
||||||
|
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
code->pextrb(dest, source, index);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
|
} else {
|
||||||
|
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
code->pextrw(dest, source, index);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorGetElement16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
ASSERT(args[1].IsImmediate());
|
||||||
|
u8 index = args[1].GetImmediateU8();
|
||||||
|
|
||||||
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
code->pextrw(dest, source, index);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
ASSERT(args[1].IsImmediate());
|
||||||
|
u8 index = args[1].GetImmediateU8();
|
||||||
|
|
||||||
|
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
if (index == 0) {
|
||||||
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
code->movd(dest, source);
|
||||||
|
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
code->pextrd(dest, source, index);
|
||||||
|
} else {
|
||||||
|
Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
code->pshufd(source, source, index);
|
||||||
|
code->movd(dest, source);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
ASSERT(args[1].IsImmediate());
|
||||||
|
u8 index = args[1].GetImmediateU8();
|
||||||
|
|
||||||
|
Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr().cvt64();
|
||||||
|
|
||||||
|
if (index == 0) {
|
||||||
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
code->movq(dest, source);
|
||||||
|
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
code->pextrq(dest, source, 1);
|
||||||
|
} else {
|
||||||
|
Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
code->punpckhqdq(source, source);
|
||||||
|
code->movq(dest, source);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
|
}
|
||||||
|
|
||||||
static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
|
|
@ -636,6 +636,23 @@ U32 IREmitter::PackedSelect(const U32& ge, const U32& a, const U32& b) {
|
||||||
return Inst<U32>(Opcode::PackedSelect, ge, a, b);
|
return Inst<U32>(Opcode::PackedSelect, ge, a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UAny IREmitter::VectorGetElement(size_t esize, const U128& a, size_t index) {
|
||||||
|
ASSERT_MSG(esize * index < 128, "Invalid index");
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U8>(Opcode::VectorGetElement8, a, Imm8(static_cast<u8>(index)));
|
||||||
|
case 16:
|
||||||
|
return Inst<U16>(Opcode::VectorGetElement16, a, Imm8(static_cast<u8>(index)));
|
||||||
|
case 32:
|
||||||
|
return Inst<U32>(Opcode::VectorGetElement32, a, Imm8(static_cast<u8>(index)));
|
||||||
|
case 64:
|
||||||
|
return Inst<U64>(Opcode::VectorGetElement64, a, Imm8(static_cast<u8>(index)));
|
||||||
|
default:
|
||||||
|
ASSERT_MSG(false, "Unreachable");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorAdd8(const U128& a, const U128& b) {
|
U128 IREmitter::VectorAdd8(const U128& a, const U128& b) {
|
||||||
return Inst<U128>(Opcode::VectorAdd8, a, b);
|
return Inst<U128>(Opcode::VectorAdd8, a, b);
|
||||||
}
|
}
|
||||||
|
|
|
@ -178,6 +178,7 @@ public:
|
||||||
U32 PackedAbsDiffSumS8(const U32& a, const U32& b);
|
U32 PackedAbsDiffSumS8(const U32& a, const U32& b);
|
||||||
U32 PackedSelect(const U32& ge, const U32& a, const U32& b);
|
U32 PackedSelect(const U32& ge, const U32& a, const U32& b);
|
||||||
|
|
||||||
|
UAny VectorGetElement(size_t esize, const U128& a, size_t index);
|
||||||
U128 VectorAdd8(const U128& a, const U128& b);
|
U128 VectorAdd8(const U128& a, const U128& b);
|
||||||
U128 VectorAdd16(const U128& a, const U128& b);
|
U128 VectorAdd16(const U128& a, const U128& b);
|
||||||
U128 VectorAdd32(const U128& a, const U128& b);
|
U128 VectorAdd32(const U128& a, const U128& b);
|
||||||
|
|
|
@ -162,6 +162,10 @@ OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32
|
||||||
OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 )
|
||||||
|
|
||||||
// Vector instructions
|
// Vector instructions
|
||||||
|
OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 )
|
||||||
|
OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 )
|
||||||
|
OPCODE(VectorGetElement32, T::U32, T::U128, T::U8 )
|
||||||
|
OPCODE(VectorGetElement64, T::U64, T::U128, T::U8 )
|
||||||
OPCODE(VectorAdd8, T::U128, T::U128, T::U128 )
|
OPCODE(VectorAdd8, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorAdd16, T::U128, T::U128, T::U128 )
|
OPCODE(VectorAdd16, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorAdd32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorAdd32, T::U128, T::U128, T::U128 )
|
||||||
|
|
Loading…
Reference in a new issue