A64: Implement SQXTN (vector)
This commit is contained in:
parent
8ef114d48f
commit
3874cb37e3
8 changed files with 139 additions and 38 deletions
|
@ -2193,6 +2193,73 @@ void EmitX64::EmitVectorSignedAbsoluteDifference32(EmitContext& ctx, IR::Inst* i
|
|||
EmitVectorSignedAbsoluteDifference(32, ctx, inst, code);
|
||||
}
|
||||
|
||||
static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm sign = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movdqa(dest, src);
|
||||
|
||||
switch (original_esize) {
|
||||
case 16:
|
||||
code.packsswb(dest, dest);
|
||||
code.movdqa(sign, src);
|
||||
code.psraw(sign, 15);
|
||||
code.packsswb(sign, sign);
|
||||
code.movdqa(reconstructed, dest);
|
||||
code.punpcklbw(reconstructed, sign);
|
||||
break;
|
||||
case 32:
|
||||
code.packssdw(dest, dest);
|
||||
code.movdqa(reconstructed, dest);
|
||||
code.movdqa(sign, dest);
|
||||
code.psraw(sign, 15);
|
||||
code.punpcklwd(reconstructed, sign);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
|
||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
code.pxor(reconstructed, src);
|
||||
code.ptest(reconstructed, reconstructed);
|
||||
} else {
|
||||
code.pcmpeqd(reconstructed, src);
|
||||
code.movmskps(bit, reconstructed);
|
||||
code.cmp(bit, 0);
|
||||
}
|
||||
|
||||
code.setnz(bit.cvt8());
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, dest);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorSignedSaturatedNarrowToSigned16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorSignedSaturatedNarrowToSigned(16, code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorSignedSaturatedNarrowToSigned32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorSignedSaturatedNarrowToSigned(32, code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorSignedSaturatedNarrowToSigned64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<s32>& result, const VectorArray<s64>& a) {
|
||||
bool qc_flag = false;
|
||||
for (size_t i = 0; i < a.size(); ++i) {
|
||||
const s64 saturated = std::clamp<s64>(a[i], -0x80000000, 0x7FFFFFFF);
|
||||
result[i] = static_cast<s32>(saturated);
|
||||
qc_flag |= saturated != a[i];
|
||||
}
|
||||
return qc_flag;
|
||||
});
|
||||
}
|
||||
|
||||
static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(args[0]);
|
||||
|
|
|
@ -579,7 +579,7 @@ INST(CMEQ_zero_2, "CMEQ (zero)", "0Q001
|
|||
INST(CMLT_2, "CMLT (zero)", "0Q001110zz100000101010nnnnnddddd")
|
||||
INST(ABS_2, "ABS", "0Q001110zz100000101110nnnnnddddd")
|
||||
INST(XTN, "XTN, XTN2", "0Q001110zz100001001010nnnnnddddd")
|
||||
//INST(SQXTN_2, "SQXTN, SQXTN2", "0Q001110zz100001010010nnnnnddddd")
|
||||
INST(SQXTN_2, "SQXTN, SQXTN2", "0Q001110zz100001010010nnnnnddddd")
|
||||
//INST(FCVTN, "FCVTN, FCVTN2", "0Q0011100z100001011010nnnnnddddd")
|
||||
//INST(FCVTL, "FCVTL, FCVTL2", "0Q0011100z100001011110nnnnnddddd")
|
||||
//INST(FRINTN_1, "FRINTN (vector)", "0Q00111001111001100010nnnnnddddd")
|
||||
|
|
|
@ -515,35 +515,20 @@ struct TranslatorVisitor final {
|
|||
|
||||
// Data Processing - FP and SIMD - Scalar two-register misc
|
||||
bool SUQADD_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool SUQADD_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool SQABS_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool SQABS_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMGT_zero_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMGT_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMEQ_zero_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMEQ_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMLT_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMLT_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool ABS_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool ABS_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool SQXTN_1(Imm<2> size, Vec Vn, Reg Rd);
|
||||
bool SQXTN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd);
|
||||
bool USQADD_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool USQADD_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool SQNEG_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool SQNEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMGE_zero_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMGE_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMLE_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMLE_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool NEG_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool SQXTUN_1(Imm<2> size, Vec Vn, Reg Rd);
|
||||
bool SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool UQXTN_1(Imm<2> size, Vec Vn, Reg Rd);
|
||||
bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd);
|
||||
bool FCVTXN_1(bool sz, Vec Vn, Reg Rd);
|
||||
bool FCVTXN_2(bool Q, bool sz, Vec Vn, Reg Rd);
|
||||
|
||||
// Data Processing - FP and SIMD - SIMD Scalar pairwise
|
||||
bool ADDP_pair(Imm<2> size, Vec Vn, Vec Vd);
|
||||
|
@ -704,28 +689,6 @@ struct TranslatorVisitor final {
|
|||
bool FMINNMP_vec_1(bool Q, Vec Vm, Vec Vn, Vec Vd);
|
||||
bool FMINP_vec_1(bool Q, Vec Vm, Vec Vn, Vec Vd);
|
||||
|
||||
// Data Processing - FP and SIMD - SIMD Two-register misc
|
||||
bool FRINTN_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTN_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTM_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTM_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FABS_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FABS_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTP_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTP_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTZ_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTZ_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTA_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTA_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTX_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTX_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FNEG_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FNEG_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTI_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTI_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FSQRT_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FSQRT_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
|
||||
// Data Processing - FP and SIMD - SIMD Three same extra
|
||||
bool SDOT_vec(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
|
||||
bool UDOT_vec(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
|
||||
|
@ -751,6 +714,41 @@ struct TranslatorVisitor final {
|
|||
bool NOT(bool Q, Vec Vn, Vec Vd);
|
||||
bool RBIT_asimd(bool Q, Vec Vn, Vec Vd);
|
||||
bool URSQRTE(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool SUQADD_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool SQABS_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMGT_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMEQ_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMLT_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool ABS_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool SQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool USQADD_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool SQNEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMGE_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool CMLE_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd);
|
||||
bool FCVTXN_2(bool Q, bool sz, Vec Vn, Reg Rd);
|
||||
bool FRINTN_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTN_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTM_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTM_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FABS_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FABS_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTP_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTP_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTZ_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTZ_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTA_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTA_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTX_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTX_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FNEG_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FNEG_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FRINTI_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FRINTI_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
bool FSQRT_1(bool Q, Vec Vn, Vec Vd);
|
||||
bool FSQRT_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||
|
||||
// Data Processing - FP and SIMD - SIMD across lanes
|
||||
bool SADDLV(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||
|
|
|
@ -291,6 +291,22 @@ bool TranslatorVisitor::SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::SQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||
if (size == 0b11) {
|
||||
return ReservedValue();
|
||||
}
|
||||
|
||||
const size_t esize = 8 << size.ZeroExtend<size_t>();
|
||||
const size_t datasize = 64;
|
||||
const size_t part = Q ? 1 : 0;
|
||||
|
||||
const IR::U128 operand = V(2 * datasize, Vn);
|
||||
const IR::U128 result = ir.VectorSignedSaturatedNarrowToSigned(2 * esize, operand);
|
||||
|
||||
Vpart(datasize, Vd, part, result);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::NOT(bool Q, Vec Vn, Vec Vd) {
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
|
||||
|
|
|
@ -1292,6 +1292,19 @@ U128 IREmitter::VectorSignedAbsoluteDifference(size_t esize, const U128& a, cons
|
|||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a) {
|
||||
switch (original_esize) {
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorSignedSaturatedNarrowToSigned16, a);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorSignedSaturatedNarrowToSigned32, a);
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::VectorSignedSaturatedNarrowToSigned64, a);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a) {
|
||||
switch (original_esize) {
|
||||
case 16:
|
||||
|
|
|
@ -249,6 +249,7 @@ public:
|
|||
U128 VectorShuffleWords(const U128& a, u8 mask);
|
||||
U128 VectorSignExtend(size_t original_esize, const U128& a);
|
||||
U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
|
||||
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
||||
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
||||
|
|
|
@ -341,6 +341,9 @@ bool Inst::ReadsFromFPSRCumulativeSaturationBit() const {
|
|||
|
||||
bool Inst::WritesToFPSRCumulativeSaturationBit() const {
|
||||
switch (op) {
|
||||
case Opcode::VectorSignedSaturatedNarrowToSigned16:
|
||||
case Opcode::VectorSignedSaturatedNarrowToSigned32:
|
||||
case Opcode::VectorSignedSaturatedNarrowToSigned64:
|
||||
case Opcode::VectorSignedSaturatedNarrowToUnsigned16:
|
||||
case Opcode::VectorSignedSaturatedNarrowToUnsigned32:
|
||||
case Opcode::VectorSignedSaturatedNarrowToUnsigned64:
|
||||
|
|
|
@ -347,6 +347,9 @@ OPCODE(VectorSignExtend64, T::U128, T::U128
|
|||
OPCODE(VectorSignedAbsoluteDifference8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorSignedAbsoluteDifference16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorSignedAbsoluteDifference32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorSignedSaturatedNarrowToSigned16, T::U128, T::U128 )
|
||||
OPCODE(VectorSignedSaturatedNarrowToSigned32, T::U128, T::U128 )
|
||||
OPCODE(VectorSignedSaturatedNarrowToSigned64, T::U128, T::U128 )
|
||||
OPCODE(VectorSignedSaturatedNarrowToUnsigned16, T::U128, T::U128 )
|
||||
OPCODE(VectorSignedSaturatedNarrowToUnsigned32, T::U128, T::U128 )
|
||||
OPCODE(VectorSignedSaturatedNarrowToUnsigned64, T::U128, T::U128 )
|
||||
|
|
Loading…
Add table
Reference in a new issue