IR: Implement FPVectorRecipEstimate

This commit is contained in:
MerryMage 2018-07-25 18:55:40 +01:00
parent 27c73dd56a
commit 939f5f5c7a
4 changed files with 31 additions and 0 deletions

View file

@ -614,6 +614,23 @@ void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
}); });
} }
template<typename FPT>
static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
for (size_t i = 0; i < result.size(); i++) {
result[i] = FP::FPRecipEstimate<FPT>(operand[i], fpcr, fpsr);
}
});
}
void EmitX64::EmitFPVectorRecipEstimate32(EmitContext& ctx, IR::Inst* inst) {
EmitRecipEstimate<u32>(code, ctx, inst);
}
void EmitX64::EmitFPVectorRecipEstimate64(EmitContext& ctx, IR::Inst* inst) {
EmitRecipEstimate<u64>(code, ctx, inst);
}
template<typename FPT> template<typename FPT>
static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) { EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {

View file

@ -1749,6 +1749,17 @@ U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128&
return {}; return {};
} }
U128 IREmitter::FPVectorRecipEstimate(size_t esize, const U128& a) {
switch (esize) {
case 32:
return Inst<U128>(Opcode::FPVectorRecipEstimate32, a);
case 64:
return Inst<U128>(Opcode::FPVectorRecipEstimate64, a);
}
UNREACHABLE();
return {};
}
U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) { U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) {
switch (esize) { switch (esize) {
case 32: case 32:

View file

@ -305,6 +305,7 @@ public:
U128 FPVectorNeg(size_t esize, const U128& a); U128 FPVectorNeg(size_t esize, const U128& a);
U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b);
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b); U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b);
U128 FPVectorRecipEstimate(size_t esize, const U128& a);
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a); U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b); U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b);
U128 FPVectorSub(size_t esize, const U128& a, const U128& b); U128 FPVectorSub(size_t esize, const U128& a, const U128& b);

View file

@ -452,6 +452,8 @@ OPCODE(FPVectorPairedAddLower32, T::U128, T::U128,
OPCODE(FPVectorPairedAddLower64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorPairedAddLower64, T::U128, T::U128, T::U128 )
OPCODE(FPVectorPairedAdd32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorPairedAdd32, T::U128, T::U128, T::U128 )
OPCODE(FPVectorPairedAdd64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorPairedAdd64, T::U128, T::U128, T::U128 )
OPCODE(FPVectorRecipEstimate32, T::U128, T::U128 )
OPCODE(FPVectorRecipEstimate64, T::U128, T::U128 )
OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 ) OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 )
OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 ) OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 )
OPCODE(FPVectorRSqrtStepFused32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorRSqrtStepFused32, T::U128, T::U128, T::U128 )