A64: Implement UQXTN (vector)
This commit is contained in:
parent
e686a81612
commit
b455b566e7
8 changed files with 80 additions and 27 deletions
|
@ -2419,6 +2419,42 @@ void EmitX64::EmitVectorUnsignedAbsoluteDifference32(EmitContext& ctx, IR::Inst*
|
||||||
EmitVectorUnsignedAbsoluteDifference(32, ctx, inst, code);
|
EmitVectorUnsignedAbsoluteDifference(32, ctx, inst, code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorUnsignedSaturatedNarrow16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<u8>& result, const VectorArray<u16>& a) {
|
||||||
|
bool qc_flag = false;
|
||||||
|
for (size_t i = 0; i < a.size(); ++i) {
|
||||||
|
const u16 saturated = std::clamp<u16>(a[i], 0, 0xFF);
|
||||||
|
result[i] = static_cast<u8>(saturated);
|
||||||
|
qc_flag |= saturated != a[i];
|
||||||
|
}
|
||||||
|
return qc_flag;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorUnsignedSaturatedNarrow32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<u16>& result, const VectorArray<u32>& a) {
|
||||||
|
bool qc_flag = false;
|
||||||
|
for (size_t i = 0; i < a.size(); ++i) {
|
||||||
|
const u32 saturated = std::clamp<u32>(a[i], 0, 0xFFFF);
|
||||||
|
result[i] = static_cast<u16>(saturated);
|
||||||
|
qc_flag |= saturated != a[i];
|
||||||
|
}
|
||||||
|
return qc_flag;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorUnsignedSaturatedNarrow64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<u32>& result, const VectorArray<u64>& a) {
|
||||||
|
bool qc_flag = false;
|
||||||
|
for (size_t i = 0; i < a.size(); ++i) {
|
||||||
|
const u64 saturated = std::clamp<u64>(a[i], 0, 0xFFFFFFFF);
|
||||||
|
result[i] = static_cast<u32>(saturated);
|
||||||
|
qc_flag |= saturated != a[i];
|
||||||
|
}
|
||||||
|
return qc_flag;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
|
@ -624,7 +624,7 @@ INST(CMLE_2, "CMLE (zero)", "0Q101
|
||||||
INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd")
|
INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd")
|
||||||
INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd")
|
INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd")
|
||||||
INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd")
|
INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd")
|
||||||
//INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd")
|
INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd")
|
||||||
//INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd")
|
//INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd")
|
||||||
//INST(FRINTA_1, "FRINTA (vector)", "0Q10111001111001100010nnnnnddddd")
|
//INST(FRINTA_1, "FRINTA (vector)", "0Q10111001111001100010nnnnnddddd")
|
||||||
//INST(FRINTA_2, "FRINTA (vector)", "0Q1011100z100001100010nnnnnddddd")
|
//INST(FRINTA_2, "FRINTA (vector)", "0Q1011100z100001100010nnnnnddddd")
|
||||||
|
|
|
@ -727,7 +727,7 @@ struct TranslatorVisitor final {
|
||||||
bool CMLE_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
bool CMLE_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||||
bool NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
bool NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||||
bool SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
bool SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||||
bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd);
|
bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||||
bool FCVTXN_2(bool Q, bool sz, Vec Vn, Reg Rd);
|
bool FCVTXN_2(bool Q, bool sz, Vec Vn, Reg Rd);
|
||||||
bool FRINTN_1(bool Q, Vec Vn, Vec Vd);
|
bool FRINTN_1(bool Q, Vec Vn, Vec Vd);
|
||||||
bool FRINTN_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
bool FRINTN_2(bool Q, bool sz, Vec Vn, Vec Vd);
|
||||||
|
|
|
@ -106,6 +106,23 @@ bool IntegerConvertToFloat(TranslatorVisitor& v, bool Q, bool sz, Vec Vn, Vec Vd
|
||||||
v.V(datasize, Vd, result);
|
v.V(datasize, Vd, result);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SaturatedNarrow(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd, IR::U128 (IR::IREmitter::*fn)(size_t, const IR::U128&)) {
|
||||||
|
if (size == 0b11) {
|
||||||
|
return v.ReservedValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t esize = 8 << size.ZeroExtend<size_t>();
|
||||||
|
const size_t datasize = 64;
|
||||||
|
const size_t part = Q ? 1 : 0;
|
||||||
|
|
||||||
|
const IR::U128 operand = v.V(2 * datasize, Vn);
|
||||||
|
const IR::U128 result = (v.ir.*fn)(2 * esize, operand);
|
||||||
|
|
||||||
|
v.Vpart(datasize, Vd, part, result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
bool TranslatorVisitor::CNT(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::CNT(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
|
@ -276,35 +293,15 @@ bool TranslatorVisitor::NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
if (size == 0b11) {
|
return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorSignedSaturatedNarrowToUnsigned);
|
||||||
return ReservedValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t esize = 8 << size.ZeroExtend<size_t>();
|
|
||||||
const size_t datasize = 64;
|
|
||||||
const size_t part = Q ? 1 : 0;
|
|
||||||
|
|
||||||
const IR::U128 operand = V(2 * datasize, Vn);
|
|
||||||
const IR::U128 result = ir.VectorSignedSaturatedNarrowToUnsigned(2 * esize, operand);
|
|
||||||
|
|
||||||
Vpart(datasize, Vd, part, result);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::SQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::SQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
if (size == 0b11) {
|
return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorSignedSaturatedNarrowToSigned);
|
||||||
return ReservedValue();
|
}
|
||||||
}
|
|
||||||
|
|
||||||
const size_t esize = 8 << size.ZeroExtend<size_t>();
|
bool TranslatorVisitor::UQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
const size_t datasize = 64;
|
return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorUnsignedSaturatedNarrow);
|
||||||
const size_t part = Q ? 1 : 0;
|
|
||||||
|
|
||||||
const IR::U128 operand = V(2 * datasize, Vn);
|
|
||||||
const IR::U128 result = ir.VectorSignedSaturatedNarrowToSigned(2 * esize, operand);
|
|
||||||
|
|
||||||
Vpart(datasize, Vd, part, result);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::NOT(bool Q, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::NOT(bool Q, Vec Vn, Vec Vd) {
|
||||||
|
|
|
@ -1346,6 +1346,19 @@ U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, co
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorUnsignedSaturatedNarrow(size_t esize, const U128& a) {
|
||||||
|
switch (esize) {
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorUnsignedSaturatedNarrow16, a);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorUnsignedSaturatedNarrow32, a);
|
||||||
|
case 64:
|
||||||
|
return Inst<U128>(Opcode::VectorUnsignedSaturatedNarrow64, a);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) {
|
U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) {
|
||||||
switch (original_esize) {
|
switch (original_esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -253,6 +253,7 @@ public:
|
||||||
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
||||||
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a);
|
||||||
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
||||||
U128 VectorZeroUpper(const U128& a);
|
U128 VectorZeroUpper(const U128& a);
|
||||||
U128 ZeroVector();
|
U128 ZeroVector();
|
||||||
|
|
|
@ -347,6 +347,9 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
|
||||||
case Opcode::VectorSignedSaturatedNarrowToUnsigned16:
|
case Opcode::VectorSignedSaturatedNarrowToUnsigned16:
|
||||||
case Opcode::VectorSignedSaturatedNarrowToUnsigned32:
|
case Opcode::VectorSignedSaturatedNarrowToUnsigned32:
|
||||||
case Opcode::VectorSignedSaturatedNarrowToUnsigned64:
|
case Opcode::VectorSignedSaturatedNarrowToUnsigned64:
|
||||||
|
case Opcode::VectorUnsignedSaturatedNarrow16:
|
||||||
|
case Opcode::VectorUnsignedSaturatedNarrow32:
|
||||||
|
case Opcode::VectorUnsignedSaturatedNarrow64:
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -360,6 +360,9 @@ OPCODE(VectorSub64, T::U128, T::U128,
|
||||||
OPCODE(VectorUnsignedAbsoluteDifference8, T::U128, T::U128, T::U128 )
|
OPCODE(VectorUnsignedAbsoluteDifference8, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorUnsignedAbsoluteDifference16, T::U128, T::U128, T::U128 )
|
OPCODE(VectorUnsignedAbsoluteDifference16, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorUnsignedAbsoluteDifference32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorUnsignedAbsoluteDifference32, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorUnsignedSaturatedNarrow16, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorUnsignedSaturatedNarrow32, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorUnsignedSaturatedNarrow64, T::U128, T::U128 )
|
||||||
OPCODE(VectorZeroExtend8, T::U128, T::U128 )
|
OPCODE(VectorZeroExtend8, T::U128, T::U128 )
|
||||||
OPCODE(VectorZeroExtend16, T::U128, T::U128 )
|
OPCODE(VectorZeroExtend16, T::U128, T::U128 )
|
||||||
OPCODE(VectorZeroExtend32, T::U128, T::U128 )
|
OPCODE(VectorZeroExtend32, T::U128, T::U128 )
|
||||||
|
|
Loading…
Reference in a new issue