Merge pull request #448 from lioncash/saturate
A64: Implement SQSHRN, SQSHRUN, and UQSHRN's scalar variants
This commit is contained in:
commit
37c4c39d62
3 changed files with 72 additions and 9 deletions
|
@ -513,7 +513,7 @@ INST(SRSHR_1, "SRSHR", "01011
|
||||||
INST(SRSRA_1, "SRSRA", "010111110IIIIiii001101nnnnnddddd")
|
INST(SRSRA_1, "SRSRA", "010111110IIIIiii001101nnnnnddddd")
|
||||||
INST(SHL_1, "SHL", "010111110IIIIiii010101nnnnnddddd")
|
INST(SHL_1, "SHL", "010111110IIIIiii010101nnnnnddddd")
|
||||||
//INST(SQSHL_imm_1, "SQSHL (immediate)", "010111110IIIIiii011101nnnnnddddd")
|
//INST(SQSHL_imm_1, "SQSHL (immediate)", "010111110IIIIiii011101nnnnnddddd")
|
||||||
//INST(SQSHRN_1, "SQSHRN, SQSHRN2", "010111110IIIIiii100101nnnnnddddd")
|
INST(SQSHRN_1, "SQSHRN, SQSHRN2", "010111110IIIIiii100101nnnnnddddd")
|
||||||
//INST(SQRSHRN_1, "SQRSHRN, SQRSHRN2", "010111110IIIIiii100111nnnnnddddd")
|
//INST(SQRSHRN_1, "SQRSHRN, SQRSHRN2", "010111110IIIIiii100111nnnnnddddd")
|
||||||
INST(SCVTF_fix_1, "SCVTF (vector, fixed-point)", "010111110IIIIiii111001nnnnnddddd")
|
INST(SCVTF_fix_1, "SCVTF (vector, fixed-point)", "010111110IIIIiii111001nnnnnddddd")
|
||||||
INST(FCVTZS_fix_1, "FCVTZS (vector, fixed-point)", "010111110IIIIiii111111nnnnnddddd")
|
INST(FCVTZS_fix_1, "FCVTZS (vector, fixed-point)", "010111110IIIIiii111111nnnnnddddd")
|
||||||
|
@ -525,9 +525,9 @@ INST(SRI_1, "SRI", "01111
|
||||||
INST(SLI_1, "SLI", "011111110IIIIiii010101nnnnnddddd")
|
INST(SLI_1, "SLI", "011111110IIIIiii010101nnnnnddddd")
|
||||||
//INST(SQSHLU_1, "SQSHLU", "011111110IIIIiii011001nnnnnddddd")
|
//INST(SQSHLU_1, "SQSHLU", "011111110IIIIiii011001nnnnnddddd")
|
||||||
//INST(UQSHL_imm_1, "UQSHL (immediate)", "011111110IIIIiii011101nnnnnddddd")
|
//INST(UQSHL_imm_1, "UQSHL (immediate)", "011111110IIIIiii011101nnnnnddddd")
|
||||||
//INST(SQSHRUN_1, "SQSHRUN, SQSHRUN2", "011111110IIIIiii100001nnnnnddddd")
|
INST(SQSHRUN_1, "SQSHRUN, SQSHRUN2", "011111110IIIIiii100001nnnnnddddd")
|
||||||
//INST(SQRSHRUN_1, "SQRSHRUN, SQRSHRUN2", "011111110IIIIiii100011nnnnnddddd")
|
//INST(SQRSHRUN_1, "SQRSHRUN, SQRSHRUN2", "011111110IIIIiii100011nnnnnddddd")
|
||||||
//INST(UQSHRN_1, "UQSHRN, UQSHRN2", "011111110IIIIiii100101nnnnnddddd")
|
INST(UQSHRN_1, "UQSHRN, UQSHRN2", "011111110IIIIiii100101nnnnnddddd")
|
||||||
//INST(UQRSHRN_1, "UQRSHRN, UQRSHRN2", "011111110IIIIiii100111nnnnnddddd")
|
//INST(UQRSHRN_1, "UQRSHRN, UQRSHRN2", "011111110IIIIiii100111nnnnnddddd")
|
||||||
INST(UCVTF_fix_1, "UCVTF (vector, fixed-point)", "011111110IIIIiii111001nnnnnddddd")
|
INST(UCVTF_fix_1, "UCVTF (vector, fixed-point)", "011111110IIIIiii111001nnnnnddddd")
|
||||||
INST(FCVTZU_fix_1, "FCVTZU (vector, fixed-point)", "011111110IIIIiii111111nnnnnddddd")
|
INST(FCVTZU_fix_1, "FCVTZU (vector, fixed-point)", "011111110IIIIiii111111nnnnnddddd")
|
||||||
|
|
|
@ -622,8 +622,8 @@ struct TranslatorVisitor final {
|
||||||
bool SRSRA_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
bool SRSRA_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool SHL_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
bool SHL_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool SQSHL_imm_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
bool SQSHL_imm_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool SQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
|
bool SQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool SQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
|
bool SQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool SCVTF_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
bool SCVTF_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool FCVTZS_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
bool FCVTZS_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool USHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
bool USHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
|
@ -634,10 +634,10 @@ struct TranslatorVisitor final {
|
||||||
bool SLI_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
bool SLI_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool SQSHLU_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
bool SQSHLU_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool UQSHL_imm_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
bool UQSHL_imm_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool SQSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
|
bool SQSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool SQRSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
|
bool SQRSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool UQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
|
bool UQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool UQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
|
bool UQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool UCVTF_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
bool UCVTF_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
bool FCVTZU_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
bool FCVTZU_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,12 @@
|
||||||
|
|
||||||
namespace Dynarmic::A64 {
|
namespace Dynarmic::A64 {
|
||||||
namespace {
|
namespace {
|
||||||
|
enum class Narrowing {
|
||||||
|
Truncation,
|
||||||
|
SaturateToUnsigned,
|
||||||
|
SaturateToSigned,
|
||||||
|
};
|
||||||
|
|
||||||
enum class ShiftExtraBehavior {
|
enum class ShiftExtraBehavior {
|
||||||
None,
|
None,
|
||||||
Accumulate,
|
Accumulate,
|
||||||
|
@ -127,6 +133,51 @@ bool ShiftAndInsert(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ShiftRightNarrowing(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd,
|
||||||
|
Narrowing narrowing, Signedness signedness) {
|
||||||
|
if (immh == 0b0000) {
|
||||||
|
return v.UnallocatedEncoding();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (immh.Bit<3>()) {
|
||||||
|
return v.UnallocatedEncoding();
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t esize = 8 << Common::HighestSetBit(immh.ZeroExtend());
|
||||||
|
const size_t source_esize = 2 * esize;
|
||||||
|
const u8 shift_amount = static_cast<u8>(source_esize - concatenate(immh, immb).ZeroExtend());
|
||||||
|
|
||||||
|
const IR::U128 operand = v.ir.ZeroExtendToQuad(v.ir.VectorGetElement(source_esize, v.V(128, Vn), 0));
|
||||||
|
|
||||||
|
IR::U128 wide_result = [&] {
|
||||||
|
if (signedness == Signedness::Signed) {
|
||||||
|
return v.ir.VectorArithmeticShiftRight(source_esize, operand, shift_amount);
|
||||||
|
}
|
||||||
|
return v.ir.VectorLogicalShiftRight(source_esize, operand, shift_amount);
|
||||||
|
}();
|
||||||
|
|
||||||
|
const IR::U128 result = [&] {
|
||||||
|
switch (narrowing) {
|
||||||
|
case Narrowing::Truncation:
|
||||||
|
return v.ir.VectorNarrow(source_esize, wide_result);
|
||||||
|
case Narrowing::SaturateToUnsigned:
|
||||||
|
if (signedness == Signedness::Signed) {
|
||||||
|
return v.ir.VectorSignedSaturatedNarrowToUnsigned(source_esize, wide_result);
|
||||||
|
}
|
||||||
|
return v.ir.VectorUnsignedSaturatedNarrow(source_esize, wide_result);
|
||||||
|
case Narrowing::SaturateToSigned:
|
||||||
|
ASSERT(signedness == Signedness::Signed);
|
||||||
|
return v.ir.VectorSignedSaturatedNarrowToSigned(source_esize, wide_result);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return IR::U128{};
|
||||||
|
}();
|
||||||
|
|
||||||
|
const IR::UAny segment = v.ir.VectorGetElement(esize, result, 0);
|
||||||
|
v.V_scalar(esize, Vd, segment);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool ScalarFPConvertWithRound(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd, Signedness sign, FloatConversionDirection direction, FP::RoundingMode rounding_mode) {
|
bool ScalarFPConvertWithRound(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd, Signedness sign, FloatConversionDirection direction, FP::RoundingMode rounding_mode) {
|
||||||
const u32 immh_value = immh.ZeroExtend();
|
const u32 immh_value = immh.ZeroExtend();
|
||||||
|
|
||||||
|
@ -202,6 +253,14 @@ bool TranslatorVisitor::SRI_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
|
||||||
return ShiftAndInsert(*this, immh, immb, Vn, Vd, ShiftDirection::Right);
|
return ShiftAndInsert(*this, immh, immb, Vn, Vd, ShiftDirection::Right);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TranslatorVisitor::SQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
|
||||||
|
return ShiftRightNarrowing(*this, immh, immb, Vn, Vd, Narrowing::SaturateToSigned, Signedness::Signed);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TranslatorVisitor::SQSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
|
||||||
|
return ShiftRightNarrowing(*this, immh, immb, Vn, Vd, Narrowing::SaturateToUnsigned, Signedness::Signed);
|
||||||
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::SRSHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::SRSHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
|
||||||
return RoundingShiftRight(*this, immh, immb, Vn, Vd, ShiftExtraBehavior::None, Signedness::Signed);
|
return RoundingShiftRight(*this, immh, immb, Vn, Vd, ShiftExtraBehavior::None, Signedness::Signed);
|
||||||
}
|
}
|
||||||
|
@ -233,6 +292,10 @@ bool TranslatorVisitor::SHL_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TranslatorVisitor::UQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
|
||||||
|
return ShiftRightNarrowing(*this, immh, immb, Vn, Vd, Narrowing::SaturateToUnsigned, Signedness::Unsigned);
|
||||||
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::URSHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::URSHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
|
||||||
return RoundingShiftRight(*this, immh, immb, Vn, Vd, ShiftExtraBehavior::None, Signedness::Unsigned);
|
return RoundingShiftRight(*this, immh, immb, Vn, Vd, ShiftExtraBehavior::None, Signedness::Unsigned);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue