IR: Add IR instruction VectorGetElement{8,16,32,64}

This commit is contained in:
MerryMage 2018-01-24 15:54:56 +00:00
parent 28ccd85e5c
commit e00a522cba
4 changed files with 95 additions and 0 deletions

View file

@ -17,6 +17,79 @@ namespace BackendX64 {
using namespace Xbyak::util; using namespace Xbyak::util;
void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[1].IsImmediate());
u8 index = args[1].GetImmediateU8();
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
code->pextrb(dest, source, index);
ctx.reg_alloc.DefineValue(inst, dest);
} else {
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
code->pextrw(dest, source, index);
ctx.reg_alloc.DefineValue(inst, dest);
}
}
void EmitX64::EmitVectorGetElement16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[1].IsImmediate());
u8 index = args[1].GetImmediateU8();
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
code->pextrw(dest, source, index);
ctx.reg_alloc.DefineValue(inst, dest);
}
void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[1].IsImmediate());
u8 index = args[1].GetImmediateU8();
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
if (index == 0) {
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
code->movd(dest, source);
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
code->pextrd(dest, source, index);
} else {
Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]);
code->pshufd(source, source, index);
code->movd(dest, source);
}
ctx.reg_alloc.DefineValue(inst, dest);
}
void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[1].IsImmediate());
u8 index = args[1].GetImmediateU8();
Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr().cvt64();
if (index == 0) {
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
code->movq(dest, source);
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
code->pextrq(dest, source, 1);
} else {
Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]);
code->punpckhqdq(source, source);
code->movq(dest, source);
}
ctx.reg_alloc.DefineValue(inst, dest);
}
static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);

View file

@ -636,6 +636,23 @@ U32 IREmitter::PackedSelect(const U32& ge, const U32& a, const U32& b) {
return Inst<U32>(Opcode::PackedSelect, ge, a, b); return Inst<U32>(Opcode::PackedSelect, ge, a, b);
} }
UAny IREmitter::VectorGetElement(size_t esize, const U128& a, size_t index) {
ASSERT_MSG(esize * index < 128, "Invalid index");
switch (esize) {
case 8:
return Inst<U8>(Opcode::VectorGetElement8, a, Imm8(static_cast<u8>(index)));
case 16:
return Inst<U16>(Opcode::VectorGetElement16, a, Imm8(static_cast<u8>(index)));
case 32:
return Inst<U32>(Opcode::VectorGetElement32, a, Imm8(static_cast<u8>(index)));
case 64:
return Inst<U64>(Opcode::VectorGetElement64, a, Imm8(static_cast<u8>(index)));
default:
ASSERT_MSG(false, "Unreachable");
return {};
}
}
U128 IREmitter::VectorAdd8(const U128& a, const U128& b) { U128 IREmitter::VectorAdd8(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorAdd8, a, b); return Inst<U128>(Opcode::VectorAdd8, a, b);
} }

View file

@ -178,6 +178,7 @@ public:
U32 PackedAbsDiffSumS8(const U32& a, const U32& b); U32 PackedAbsDiffSumS8(const U32& a, const U32& b);
U32 PackedSelect(const U32& ge, const U32& a, const U32& b); U32 PackedSelect(const U32& ge, const U32& a, const U32& b);
UAny VectorGetElement(size_t esize, const U128& a, size_t index);
U128 VectorAdd8(const U128& a, const U128& b); U128 VectorAdd8(const U128& a, const U128& b);
U128 VectorAdd16(const U128& a, const U128& b); U128 VectorAdd16(const U128& a, const U128& b);
U128 VectorAdd32(const U128& a, const U128& b); U128 VectorAdd32(const U128& a, const U128& b);

View file

@ -162,6 +162,10 @@ OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32
OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 ) OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 )
// Vector instructions // Vector instructions
OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 )
OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 )
OPCODE(VectorGetElement32, T::U32, T::U128, T::U8 )
OPCODE(VectorGetElement64, T::U64, T::U128, T::U8 )
OPCODE(VectorAdd8, T::U128, T::U128, T::U128 ) OPCODE(VectorAdd8, T::U128, T::U128, T::U128 )
OPCODE(VectorAdd16, T::U128, T::U128, T::U128 ) OPCODE(VectorAdd16, T::U128, T::U128, T::U128 )
OPCODE(VectorAdd32, T::U128, T::U128, T::U128 ) OPCODE(VectorAdd32, T::U128, T::U128, T::U128 )