IR: Implement VectorSignExtend

This commit is contained in:
MerryMage 2018-02-11 16:24:33 +00:00
parent a90e4955ab
commit eae518a338
4 changed files with 75 additions and 0 deletions

View file

@ -938,6 +938,61 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
}); });
} }
void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pmovsxbw(a, a);
ctx.reg_alloc.DefineValue(inst, a);
} else {
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.pxor(result, result);
code.punpcklbw(result, a);
code.psraw(result, 8);
ctx.reg_alloc.DefineValue(inst, result);
}
}
void EmitX64::EmitVectorSignExtend16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
ctx.reg_alloc.DefineValue(inst, a);
code.pmovsxwd(a, a);
} else {
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.pxor(result, result);
code.punpcklwd(result, a);
code.psrad(result, 16);
ctx.reg_alloc.DefineValue(inst, result);
}
}
void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pmovsxdq(a, a);
ctx.reg_alloc.DefineValue(inst, a);
return;
}
EmitOneArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u32, 4>& a){
for (size_t i = 0; i < 2; ++i) {
result[i] = Common::SignExtend<32, u64>(a[i]);
}
});
}
void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) {
EmitOneArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u64, 2>& a){
result[1] = (a[0] >> 63) ? ~u64(0) : 0;
result[0] = a[0];
});
}
void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb); EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb);
} }

View file

@ -981,6 +981,21 @@ U128 IREmitter::VectorPopulationCount(const U128& a) {
return Inst<U128>(Opcode::VectorPopulationCount, a); return Inst<U128>(Opcode::VectorPopulationCount, a);
} }
U128 IREmitter::VectorSignExtend(size_t original_esize, const U128& a) {
switch (original_esize) {
case 8:
return Inst<U128>(Opcode::VectorSignExtend8, a);
case 16:
return Inst<U128>(Opcode::VectorSignExtend16, a);
case 32:
return Inst<U128>(Opcode::VectorSignExtend32, a);
case 64:
return Inst<U128>(Opcode::VectorSignExtend64, a);
}
UNREACHABLE();
return {};
}
U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) { U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
switch (esize) { switch (esize) {
case 8: case 8:

View file

@ -224,6 +224,7 @@ public:
U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b); U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b);
U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b); U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b);
U128 VectorPopulationCount(const U128& a); U128 VectorPopulationCount(const U128& a);
U128 VectorSignExtend(size_t original_esize, const U128& a);
U128 VectorSub(size_t esize, const U128& a, const U128& b); U128 VectorSub(size_t esize, const U128& a, const U128& b);
U128 VectorZeroExtend(size_t original_esize, const U128& a); U128 VectorZeroExtend(size_t original_esize, const U128& a);
U128 VectorZeroUpper(const U128& a); U128 VectorZeroUpper(const U128& a);

View file

@ -249,6 +249,10 @@ OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128
OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
OPCODE(VectorPopulationCount, T::U128, T::U128 ) OPCODE(VectorPopulationCount, T::U128, T::U128 )
OPCODE(VectorSignExtend8, T::U128, T::U128 )
OPCODE(VectorSignExtend16, T::U128, T::U128 )
OPCODE(VectorSignExtend32, T::U128, T::U128 )
OPCODE(VectorSignExtend64, T::U128, T::U128 )
OPCODE(VectorSub8, T::U128, T::U128, T::U128 ) OPCODE(VectorSub8, T::U128, T::U128, T::U128 )
OPCODE(VectorSub16, T::U128, T::U128, T::U128 ) OPCODE(VectorSub16, T::U128, T::U128, T::U128 )
OPCODE(VectorSub32, T::U128, T::U128, T::U128 ) OPCODE(VectorSub32, T::U128, T::U128, T::U128 )