IR: Implement FPVectorRecipEstimate
This commit is contained in:
parent
27c73dd56a
commit
939f5f5c7a
4 changed files with 31 additions and 0 deletions
|
@ -614,6 +614,23 @@ void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename FPT>
|
||||||
|
static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||||
|
for (size_t i = 0; i < result.size(); i++) {
|
||||||
|
result[i] = FP::FPRecipEstimate<FPT>(operand[i], fpcr, fpsr);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorRecipEstimate32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitRecipEstimate<u32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorRecipEstimate64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitRecipEstimate<u64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||||
|
|
|
@ -1749,6 +1749,17 @@ U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128&
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::FPVectorRecipEstimate(size_t esize, const U128& a) {
|
||||||
|
switch (esize) {
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::FPVectorRecipEstimate32, a);
|
||||||
|
case 64:
|
||||||
|
return Inst<U128>(Opcode::FPVectorRecipEstimate64, a);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) {
|
U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 32:
|
case 32:
|
||||||
|
|
|
@ -305,6 +305,7 @@ public:
|
||||||
U128 FPVectorNeg(size_t esize, const U128& a);
|
U128 FPVectorNeg(size_t esize, const U128& a);
|
||||||
U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 FPVectorRecipEstimate(size_t esize, const U128& a);
|
||||||
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
|
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
|
||||||
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
|
||||||
|
|
|
@ -452,6 +452,8 @@ OPCODE(FPVectorPairedAddLower32, T::U128, T::U128,
|
||||||
OPCODE(FPVectorPairedAddLower64, T::U128, T::U128, T::U128 )
|
OPCODE(FPVectorPairedAddLower64, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorPairedAdd32, T::U128, T::U128, T::U128 )
|
OPCODE(FPVectorPairedAdd32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorPairedAdd64, T::U128, T::U128, T::U128 )
|
OPCODE(FPVectorPairedAdd64, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(FPVectorRecipEstimate32, T::U128, T::U128 )
|
||||||
|
OPCODE(FPVectorRecipEstimate64, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 )
|
OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 )
|
OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorRSqrtStepFused32, T::U128, T::U128, T::U128 )
|
OPCODE(FPVectorRSqrtStepFused32, T::U128, T::U128, T::U128 )
|
||||||
|
|
Loading…
Reference in a new issue