IR: Implement VectorSignExtend
This commit is contained in:
parent
a90e4955ab
commit
eae518a338
4 changed files with 75 additions and 0 deletions
|
@ -938,6 +938,61 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
code.pmovsxbw(a, a);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
} else {
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
code.pxor(result, result);
|
||||||
|
code.punpcklbw(result, a);
|
||||||
|
code.psraw(result, 8);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignExtend16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
code.pmovsxwd(a, a);
|
||||||
|
} else {
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
code.pxor(result, result);
|
||||||
|
code.punpcklwd(result, a);
|
||||||
|
code.psrad(result, 16);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
code.pmovsxdq(a, a);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
EmitOneArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u32, 4>& a){
|
||||||
|
for (size_t i = 0; i < 2; ++i) {
|
||||||
|
result[i] = Common::SignExtend<32, u64>(a[i]);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitOneArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u64, 2>& a){
|
||||||
|
result[1] = (a[0] >> 63) ? ~u64(0) : 0;
|
||||||
|
result[0] = a[0];
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb);
|
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb);
|
||||||
}
|
}
|
||||||
|
|
|
@ -981,6 +981,21 @@ U128 IREmitter::VectorPopulationCount(const U128& a) {
|
||||||
return Inst<U128>(Opcode::VectorPopulationCount, a);
|
return Inst<U128>(Opcode::VectorPopulationCount, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorSignExtend(size_t original_esize, const U128& a) {
|
||||||
|
switch (original_esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U128>(Opcode::VectorSignExtend8, a);
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorSignExtend16, a);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorSignExtend32, a);
|
||||||
|
case 64:
|
||||||
|
return Inst<U128>(Opcode::VectorSignExtend64, a);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -224,6 +224,7 @@ public:
|
||||||
U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b);
|
U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorPopulationCount(const U128& a);
|
U128 VectorPopulationCount(const U128& a);
|
||||||
|
U128 VectorSignExtend(size_t original_esize, const U128& a);
|
||||||
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
||||||
U128 VectorZeroUpper(const U128& a);
|
U128 VectorZeroUpper(const U128& a);
|
||||||
|
|
|
@ -249,6 +249,10 @@ OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128
|
||||||
OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
|
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorPopulationCount, T::U128, T::U128 )
|
OPCODE(VectorPopulationCount, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorSignExtend8, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorSignExtend16, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorSignExtend32, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorSignExtend64, T::U128, T::U128 )
|
||||||
OPCODE(VectorSub8, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSub8, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorSub16, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSub16, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorSub32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSub32, T::U128, T::U128, T::U128 )
|
||||||
|
|
Loading…
Reference in a new issue