Merge pull request #477 from lioncash/rsqrt

A64: Handle half-precision variants of FRSQRTE
This commit is contained in:
Merry 2019-04-14 11:21:05 +01:00 committed by MerryMage
commit 554c8c27c6
10 changed files with 53 additions and 10 deletions

View file

@ -920,6 +920,10 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
code.CallFunction(&FP::FPRSqrtEstimate<FPT>); code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
} }
void EmitX64::EmitFPRSqrtEstimate16(EmitContext& ctx, IR::Inst* inst) {
EmitFPRSqrtEstimate<u16>(code, ctx, inst);
}
void EmitX64::EmitFPRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) {
EmitFPRSqrtEstimate<u32>(code, ctx, inst); EmitFPRSqrtEstimate<u32>(code, ctx, inst);
} }

View file

@ -1241,6 +1241,10 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
}); });
} }
void EmitX64::EmitFPVectorRSqrtEstimate16(EmitContext& ctx, IR::Inst* inst) {
EmitRSqrtEstimate<u16>(code, ctx, inst);
}
void EmitX64::EmitFPVectorRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) {
EmitRSqrtEstimate<u32>(code, ctx, inst); EmitRSqrtEstimate<u32>(code, ctx, inst);
} }

View file

@ -19,7 +19,7 @@ namespace Dynarmic::FP {
template<typename FPT> template<typename FPT>
FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr); const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
if (type == FPType::SNaN || type == FPType::QNaN) { if (type == FPType::SNaN || type == FPType::QNaN) {
return FPProcessNaN(type, op, fpcr, fpsr); return FPProcessNaN(type, op, fpcr, fpsr);
@ -27,16 +27,16 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
if (type == FPType::Zero) { if (type == FPType::Zero) {
FPProcessException(FPExc::DivideByZero, fpcr, fpsr); FPProcessException(FPExc::DivideByZero, fpcr, fpsr);
return FPInfo<FPT>::Infinity(sign); return FPT(FPInfo<FPT>::Infinity(sign));
} }
if (sign) { if (sign) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr); FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
return FPInfo<FPT>::DefaultNaN(); return FPT(FPInfo<FPT>::DefaultNaN());
} }
if (type == FPType::Infinity) { if (type == FPType::Infinity) {
return FPInfo<FPT>::Zero(false); return FPT(FPInfo<FPT>::Zero(false));
} }
const int result_exponent = (-(value.exponent + 1)) >> 1; const int result_exponent = (-(value.exponent + 1)) >> 1;
@ -50,6 +50,7 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
return (bits_exponent << FPInfo<FPT>::explicit_mantissa_width) | (bits_mantissa & FPInfo<FPT>::mantissa_mask); return (bits_exponent << FPInfo<FPT>::explicit_mantissa_width) | (bits_mantissa & FPInfo<FPT>::mantissa_mask);
} }
template u16 FPRSqrtEstimate<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
template u32 FPRSqrtEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr); template u32 FPRSqrtEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
template u64 FPRSqrtEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr); template u64 FPRSqrtEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);

View file

@ -438,7 +438,7 @@ INST(FCMLE_2, "FCMLE (zero)", "01111
INST(FCVTPU_2, "FCVTPU (vector)", "011111101z100001101010nnnnnddddd") INST(FCVTPU_2, "FCVTPU (vector)", "011111101z100001101010nnnnnddddd")
//INST(FCVTZU_int_1, "FCVTZU (vector, integer)", "0111111011111001101110nnnnnddddd") //INST(FCVTZU_int_1, "FCVTZU (vector, integer)", "0111111011111001101110nnnnnddddd")
INST(FCVTZU_int_2, "FCVTZU (vector, integer)", "011111101z100001101110nnnnnddddd") INST(FCVTZU_int_2, "FCVTZU (vector, integer)", "011111101z100001101110nnnnnddddd")
//INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd") INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd")
INST(FRSQRTE_2, "FRSQRTE", "011111101z100001110110nnnnnddddd") INST(FRSQRTE_2, "FRSQRTE", "011111101z100001110110nnnnnddddd")
// Data Processing - FP and SIMD - Scalar three same extra // Data Processing - FP and SIMD - Scalar three same extra
@ -692,7 +692,7 @@ INST(FCVTPU_4, "FCVTPU (vector)", "0Q101
//INST(FCVTZU_int_3, "FCVTZU (vector, integer)", "0Q10111011111001101110nnnnnddddd") //INST(FCVTZU_int_3, "FCVTZU (vector, integer)", "0Q10111011111001101110nnnnnddddd")
INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd") INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd")
INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd") INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd")
//INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd") INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd")
INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd") INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd")
//INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd") //INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd")
INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd") INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd")

View file

@ -200,6 +200,16 @@ bool TranslatorVisitor::FRECPX_2(bool sz, Vec Vn, Vec Vd) {
return true; return true;
} }
bool TranslatorVisitor::FRSQRTE_1(Vec Vn, Vec Vd) {
const size_t esize = 16;
const IR::U16 operand = V_scalar(esize, Vn);
const IR::U16 result = ir.FPRSqrtEstimate(operand);
V_scalar(esize, Vd, result);
return true;
}
bool TranslatorVisitor::FRSQRTE_2(bool sz, Vec Vn, Vec Vd) { bool TranslatorVisitor::FRSQRTE_2(bool sz, Vec Vn, Vec Vd) {
const size_t esize = sz ? 64 : 32; const size_t esize = sz ? 64 : 32;

View file

@ -548,6 +548,17 @@ bool TranslatorVisitor::FSQRT_2(bool Q, bool sz, Vec Vn, Vec Vd) {
return true; return true;
} }
bool TranslatorVisitor::FRSQRTE_3(bool Q, Vec Vn, Vec Vd) {
const size_t datasize = Q ? 128 : 64;
const size_t esize = 16;
const IR::U128 operand = V(datasize, Vn);
const IR::U128 result = ir.FPVectorRSqrtEstimate(esize, operand);
V(datasize, Vd, result);
return true;
}
bool TranslatorVisitor::FRSQRTE_4(bool Q, bool sz, Vec Vn, Vec Vd) { bool TranslatorVisitor::FRSQRTE_4(bool Q, bool sz, Vec Vn, Vec Vd) {
if (sz && !Q) { if (sz && !Q) {
return ReservedValue(); return ReservedValue();

View file

@ -1967,11 +1967,18 @@ U16U32U64 IREmitter::FPRoundInt(const U16U32U64& a, FP::RoundingMode rounding, b
} }
} }
U32U64 IREmitter::FPRSqrtEstimate(const U32U64& a) { U16U32U64 IREmitter::FPRSqrtEstimate(const U16U32U64& a) {
if (a.GetType() == Type::U32) { switch (a.GetType()) {
case Type::U16:
return Inst<U16>(Opcode::FPRSqrtEstimate16, a);
case Type::U32:
return Inst<U32>(Opcode::FPRSqrtEstimate32, a); return Inst<U32>(Opcode::FPRSqrtEstimate32, a);
} case Type::U64:
return Inst<U64>(Opcode::FPRSqrtEstimate64, a); return Inst<U64>(Opcode::FPRSqrtEstimate64, a);
default:
UNREACHABLE();
return U16U32U64{};
}
} }
U32U64 IREmitter::FPRSqrtStepFused(const U32U64& a, const U32U64& b) { U32U64 IREmitter::FPRSqrtStepFused(const U32U64& a, const U32U64& b) {
@ -2295,6 +2302,8 @@ U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::Rounding
U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) { U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) {
switch (esize) { switch (esize) {
case 16:
return Inst<U128>(Opcode::FPVectorRSqrtEstimate16, a);
case 32: case 32:
return Inst<U128>(Opcode::FPVectorRSqrtEstimate32, a); return Inst<U128>(Opcode::FPVectorRSqrtEstimate32, a);
case 64: case 64:

View file

@ -309,7 +309,7 @@ public:
U16U32U64 FPRecipExponent(const U16U32U64& a); U16U32U64 FPRecipExponent(const U16U32U64& a);
U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b); U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b);
U16U32U64 FPRoundInt(const U16U32U64& a, FP::RoundingMode rounding, bool exact); U16U32U64 FPRoundInt(const U16U32U64& a, FP::RoundingMode rounding, bool exact);
U32U64 FPRSqrtEstimate(const U32U64& a); U16U32U64 FPRSqrtEstimate(const U16U32U64& a);
U32U64 FPRSqrtStepFused(const U32U64& a, const U32U64& b); U32U64 FPRSqrtStepFused(const U32U64& a, const U32U64& b);
U32U64 FPSqrt(const U32U64& a); U32U64 FPSqrt(const U32U64& a);
U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpcr_controlled); U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpcr_controlled);

View file

@ -282,6 +282,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
case Opcode::FPRoundInt16: case Opcode::FPRoundInt16:
case Opcode::FPRoundInt32: case Opcode::FPRoundInt32:
case Opcode::FPRoundInt64: case Opcode::FPRoundInt64:
case Opcode::FPRSqrtEstimate16:
case Opcode::FPRSqrtEstimate32: case Opcode::FPRSqrtEstimate32:
case Opcode::FPRSqrtEstimate64: case Opcode::FPRSqrtEstimate64:
case Opcode::FPRSqrtStepFused32: case Opcode::FPRSqrtStepFused32:
@ -342,6 +343,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
case Opcode::FPVectorRoundInt16: case Opcode::FPVectorRoundInt16:
case Opcode::FPVectorRoundInt32: case Opcode::FPVectorRoundInt32:
case Opcode::FPVectorRoundInt64: case Opcode::FPVectorRoundInt64:
case Opcode::FPVectorRSqrtEstimate16:
case Opcode::FPVectorRSqrtEstimate32: case Opcode::FPVectorRSqrtEstimate32:
case Opcode::FPVectorRSqrtEstimate64: case Opcode::FPVectorRSqrtEstimate64:
case Opcode::FPVectorRSqrtStepFused32: case Opcode::FPVectorRSqrtStepFused32:

View file

@ -501,6 +501,7 @@ OPCODE(FPRecipStepFused64, U64, U64,
OPCODE(FPRoundInt16, U16, U16, U8, U1 ) OPCODE(FPRoundInt16, U16, U16, U8, U1 )
OPCODE(FPRoundInt32, U32, U32, U8, U1 ) OPCODE(FPRoundInt32, U32, U32, U8, U1 )
OPCODE(FPRoundInt64, U64, U64, U8, U1 ) OPCODE(FPRoundInt64, U64, U64, U8, U1 )
OPCODE(FPRSqrtEstimate16, U16, U16 )
OPCODE(FPRSqrtEstimate32, U32, U32 ) OPCODE(FPRSqrtEstimate32, U32, U32 )
OPCODE(FPRSqrtEstimate64, U64, U64 ) OPCODE(FPRSqrtEstimate64, U64, U64 )
OPCODE(FPRSqrtStepFused32, U32, U32, U32 ) OPCODE(FPRSqrtStepFused32, U32, U32, U32 )
@ -577,6 +578,7 @@ OPCODE(FPVectorRecipStepFused64, U128, U128
OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 ) OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 )
OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 ) OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 )
OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 ) OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 )
OPCODE(FPVectorRSqrtEstimate16, U128, U128 )
OPCODE(FPVectorRSqrtEstimate32, U128, U128 ) OPCODE(FPVectorRSqrtEstimate32, U128, U128 )
OPCODE(FPVectorRSqrtEstimate64, U128, U128 ) OPCODE(FPVectorRSqrtEstimate64, U128, U128 )
OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 ) OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 )