A64: Implement FRSQRTE (vector), single/double variant
This commit is contained in:
parent
b74d5520f9
commit
45dc5f74f3
6 changed files with 75 additions and 1 deletions
|
@ -10,7 +10,10 @@
|
|||
#include "backend_x64/block_of_code.h"
|
||||
#include "backend_x64/emit_x64.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/fp/fpcr.h"
|
||||
#include "common/fp/op.h"
|
||||
#include "common/fp/util.h"
|
||||
#include "common/mp.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
|
||||
|
@ -222,6 +225,31 @@ static void EmitVectorOperation64(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
template <typename Lambda>
|
||||
inline void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||
const auto fn = static_cast<mp::equivalent_function_type_t<Lambda>*>(lambda);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
|
||||
constexpr u32 stack_space = 2 * 16;
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR());
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
|
||||
code.movaps(xword[code.ABI_PARAM2], arg1);
|
||||
code.CallFunction(fn);
|
||||
code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm0);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
|
@ -365,6 +393,23 @@ void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
|
|||
});
|
||||
}
|
||||
|
||||
template<typename FPT>
|
||||
static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitOneArgumentFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||
for (size_t i = 0; i < result.size(); i++) {
|
||||
result[i] = FP::FPRSqrtEstimate<FPT>(operand[i], fpcr, fpsr);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitRSqrtEstimate<u32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorRSqrtEstimate64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitRSqrtEstimate<u64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
|
|
@ -654,7 +654,7 @@ INST(FCMLE_4, "FCMLE (zero)", "0Q101
|
|||
//INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd")
|
||||
//INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd")
|
||||
//INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd")
|
||||
//INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd")
|
||||
INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd")
|
||||
//INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd")
|
||||
//INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd")
|
||||
|
||||
|
|
|
@ -217,6 +217,21 @@ bool TranslatorVisitor::FCMLT_4(bool Q, bool sz, Vec Vn, Vec Vd) {
|
|||
return FPCompareAgainstZero(*this, Q, sz, Vn, Vd, ComparisonType::LT);
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::FRSQRTE_4(bool Q, bool sz, Vec Vn, Vec Vd) {
|
||||
if (sz && !Q) {
|
||||
return ReservedValue();
|
||||
}
|
||||
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
const size_t esize = sz ? 64 : 32;
|
||||
|
||||
const IR::U128 operand = V(datasize, Vn);
|
||||
const IR::U128 result = ir.FPVectorRSqrtEstimate(esize, operand);
|
||||
|
||||
V(datasize, Vd, result);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::FNEG_1(bool Q, Vec Vn, Vec Vd) {
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
|
||||
|
|
|
@ -1679,6 +1679,17 @@ U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128&
|
|||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) {
|
||||
switch (esize) {
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::FPVectorRSqrtEstimate32, a);
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::FPVectorRSqrtEstimate64, a);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 32:
|
||||
|
|
|
@ -299,6 +299,7 @@ public:
|
|||
U128 FPVectorMul(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
|
||||
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorS32ToSingle(const U128& a);
|
||||
U128 FPVectorS64ToDouble(const U128& a);
|
||||
|
|
|
@ -435,6 +435,8 @@ OPCODE(FPVectorPairedAddLower32, T::U128, T::U128, T::U
|
|||
OPCODE(FPVectorPairedAddLower64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorPairedAdd32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorPairedAdd64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 )
|
||||
OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 )
|
||||
OPCODE(FPVectorS32ToSingle, T::U128, T::U128 )
|
||||
OPCODE(FPVectorS64ToDouble, T::U128, T::U128 )
|
||||
OPCODE(FPVectorSub32, T::U128, T::U128, T::U128 )
|
||||
|
|
Loading…
Reference in a new issue