A64: Implement UQXTN (vector)

This commit is contained in:
MerryMage 2018-07-24 18:17:45 +01:00
parent e686a81612
commit b455b566e7
8 changed files with 80 additions and 27 deletions

View file

@ -2419,6 +2419,42 @@ void EmitX64::EmitVectorUnsignedAbsoluteDifference32(EmitContext& ctx, IR::Inst*
EmitVectorUnsignedAbsoluteDifference(32, ctx, inst, code); EmitVectorUnsignedAbsoluteDifference(32, ctx, inst, code);
} }
void EmitX64::EmitVectorUnsignedSaturatedNarrow16(EmitContext& ctx, IR::Inst* inst) {
EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<u8>& result, const VectorArray<u16>& a) {
bool qc_flag = false;
for (size_t i = 0; i < a.size(); ++i) {
const u16 saturated = std::clamp<u16>(a[i], 0, 0xFF);
result[i] = static_cast<u8>(saturated);
qc_flag |= saturated != a[i];
}
return qc_flag;
});
}
void EmitX64::EmitVectorUnsignedSaturatedNarrow32(EmitContext& ctx, IR::Inst* inst) {
EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<u16>& result, const VectorArray<u32>& a) {
bool qc_flag = false;
for (size_t i = 0; i < a.size(); ++i) {
const u32 saturated = std::clamp<u32>(a[i], 0, 0xFFFF);
result[i] = static_cast<u16>(saturated);
qc_flag |= saturated != a[i];
}
return qc_flag;
});
}
void EmitX64::EmitVectorUnsignedSaturatedNarrow64(EmitContext& ctx, IR::Inst* inst) {
EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<u32>& result, const VectorArray<u64>& a) {
bool qc_flag = false;
for (size_t i = 0; i < a.size(); ++i) {
const u64 saturated = std::clamp<u64>(a[i], 0, 0xFFFFFFFF);
result[i] = static_cast<u32>(saturated);
qc_flag |= saturated != a[i];
}
return qc_flag;
});
}
void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);

View file

@ -624,7 +624,7 @@ INST(CMLE_2, "CMLE (zero)", "0Q101
INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd") INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd")
INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd") INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd")
INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd") INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd")
//INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd") INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd")
//INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd") //INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd")
//INST(FRINTA_1, "FRINTA (vector)", "0Q10111001111001100010nnnnnddddd") //INST(FRINTA_1, "FRINTA (vector)", "0Q10111001111001100010nnnnnddddd")
//INST(FRINTA_2, "FRINTA (vector)", "0Q1011100z100001100010nnnnnddddd") //INST(FRINTA_2, "FRINTA (vector)", "0Q1011100z100001100010nnnnnddddd")

View file

@ -727,7 +727,7 @@ struct TranslatorVisitor final {
bool CMLE_2(bool Q, Imm<2> size, Vec Vn, Vec Vd); bool CMLE_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd); bool NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd); bool SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd); bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool FCVTXN_2(bool Q, bool sz, Vec Vn, Reg Rd); bool FCVTXN_2(bool Q, bool sz, Vec Vn, Reg Rd);
bool FRINTN_1(bool Q, Vec Vn, Vec Vd); bool FRINTN_1(bool Q, Vec Vn, Vec Vd);
bool FRINTN_2(bool Q, bool sz, Vec Vn, Vec Vd); bool FRINTN_2(bool Q, bool sz, Vec Vn, Vec Vd);

View file

@ -106,6 +106,23 @@ bool IntegerConvertToFloat(TranslatorVisitor& v, bool Q, bool sz, Vec Vn, Vec Vd
v.V(datasize, Vd, result); v.V(datasize, Vd, result);
return true; return true;
} }
bool SaturatedNarrow(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd, IR::U128 (IR::IREmitter::*fn)(size_t, const IR::U128&)) {
if (size == 0b11) {
return v.ReservedValue();
}
const size_t esize = 8 << size.ZeroExtend<size_t>();
const size_t datasize = 64;
const size_t part = Q ? 1 : 0;
const IR::U128 operand = v.V(2 * datasize, Vn);
const IR::U128 result = (v.ir.*fn)(2 * esize, operand);
v.Vpart(datasize, Vd, part, result);
return true;
}
} // Anonymous namespace } // Anonymous namespace
bool TranslatorVisitor::CNT(bool Q, Imm<2> size, Vec Vn, Vec Vd) { bool TranslatorVisitor::CNT(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
@ -276,35 +293,15 @@ bool TranslatorVisitor::NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
} }
bool TranslatorVisitor::SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) { bool TranslatorVisitor::SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
if (size == 0b11) { return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorSignedSaturatedNarrowToUnsigned);
return ReservedValue();
}
const size_t esize = 8 << size.ZeroExtend<size_t>();
const size_t datasize = 64;
const size_t part = Q ? 1 : 0;
const IR::U128 operand = V(2 * datasize, Vn);
const IR::U128 result = ir.VectorSignedSaturatedNarrowToUnsigned(2 * esize, operand);
Vpart(datasize, Vd, part, result);
return true;
} }
bool TranslatorVisitor::SQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) { bool TranslatorVisitor::SQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
if (size == 0b11) { return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorSignedSaturatedNarrowToSigned);
return ReservedValue(); }
}
const size_t esize = 8 << size.ZeroExtend<size_t>(); bool TranslatorVisitor::UQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
const size_t datasize = 64; return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorUnsignedSaturatedNarrow);
const size_t part = Q ? 1 : 0;
const IR::U128 operand = V(2 * datasize, Vn);
const IR::U128 result = ir.VectorSignedSaturatedNarrowToSigned(2 * esize, operand);
Vpart(datasize, Vd, part, result);
return true;
} }
bool TranslatorVisitor::NOT(bool Q, Vec Vn, Vec Vd) { bool TranslatorVisitor::NOT(bool Q, Vec Vn, Vec Vd) {

View file

@ -1346,6 +1346,19 @@ U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, co
return {}; return {};
} }
U128 IREmitter::VectorUnsignedSaturatedNarrow(size_t esize, const U128& a) {
switch (esize) {
case 16:
return Inst<U128>(Opcode::VectorUnsignedSaturatedNarrow16, a);
case 32:
return Inst<U128>(Opcode::VectorUnsignedSaturatedNarrow32, a);
case 64:
return Inst<U128>(Opcode::VectorUnsignedSaturatedNarrow64, a);
}
UNREACHABLE();
return {};
}
U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) { U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) {
switch (original_esize) { switch (original_esize) {
case 8: case 8:

View file

@ -253,6 +253,7 @@ public:
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
U128 VectorSub(size_t esize, const U128& a, const U128& b); U128 VectorSub(size_t esize, const U128& a, const U128& b);
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b); U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a);
U128 VectorZeroExtend(size_t original_esize, const U128& a); U128 VectorZeroExtend(size_t original_esize, const U128& a);
U128 VectorZeroUpper(const U128& a); U128 VectorZeroUpper(const U128& a);
U128 ZeroVector(); U128 ZeroVector();

View file

@ -347,6 +347,9 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
case Opcode::VectorSignedSaturatedNarrowToUnsigned16: case Opcode::VectorSignedSaturatedNarrowToUnsigned16:
case Opcode::VectorSignedSaturatedNarrowToUnsigned32: case Opcode::VectorSignedSaturatedNarrowToUnsigned32:
case Opcode::VectorSignedSaturatedNarrowToUnsigned64: case Opcode::VectorSignedSaturatedNarrowToUnsigned64:
case Opcode::VectorUnsignedSaturatedNarrow16:
case Opcode::VectorUnsignedSaturatedNarrow32:
case Opcode::VectorUnsignedSaturatedNarrow64:
return true; return true;
default: default:

View file

@ -360,6 +360,9 @@ OPCODE(VectorSub64, T::U128, T::U128,
OPCODE(VectorUnsignedAbsoluteDifference8, T::U128, T::U128, T::U128 ) OPCODE(VectorUnsignedAbsoluteDifference8, T::U128, T::U128, T::U128 )
OPCODE(VectorUnsignedAbsoluteDifference16, T::U128, T::U128, T::U128 ) OPCODE(VectorUnsignedAbsoluteDifference16, T::U128, T::U128, T::U128 )
OPCODE(VectorUnsignedAbsoluteDifference32, T::U128, T::U128, T::U128 ) OPCODE(VectorUnsignedAbsoluteDifference32, T::U128, T::U128, T::U128 )
OPCODE(VectorUnsignedSaturatedNarrow16, T::U128, T::U128 )
OPCODE(VectorUnsignedSaturatedNarrow32, T::U128, T::U128 )
OPCODE(VectorUnsignedSaturatedNarrow64, T::U128, T::U128 )
OPCODE(VectorZeroExtend8, T::U128, T::U128 ) OPCODE(VectorZeroExtend8, T::U128, T::U128 )
OPCODE(VectorZeroExtend16, T::U128, T::U128 ) OPCODE(VectorZeroExtend16, T::U128, T::U128 )
OPCODE(VectorZeroExtend32, T::U128, T::U128 ) OPCODE(VectorZeroExtend32, T::U128, T::U128 )