diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index de24a168..a16f2d91 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -614,6 +614,23 @@ void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) { }); } +template +static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + EmitTwoOpFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& operand, FP::FPCR fpcr, FP::FPSR& fpsr) { + for (size_t i = 0; i < result.size(); i++) { + result[i] = FP::FPRecipEstimate(operand[i], fpcr, fpsr); + } + }); +} + +void EmitX64::EmitFPVectorRecipEstimate32(EmitContext& ctx, IR::Inst* inst) { + EmitRecipEstimate(code, ctx, inst); +} + +void EmitX64::EmitFPVectorRecipEstimate64(EmitContext& ctx, IR::Inst* inst) { + EmitRecipEstimate(code, ctx, inst); +} + template static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { EmitTwoOpFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& operand, FP::FPCR fpcr, FP::FPSR& fpsr) { diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index fc69608e..9a060056 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1749,6 +1749,17 @@ U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128& return {}; } +U128 IREmitter::FPVectorRecipEstimate(size_t esize, const U128& a) { + switch (esize) { + case 32: + return Inst(Opcode::FPVectorRecipEstimate32, a); + case 64: + return Inst(Opcode::FPVectorRecipEstimate64, a); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) { switch (esize) { case 32: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 85747d1b..da013cd2 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -305,6 +305,7 @@ public: U128 FPVectorNeg(size_t esize, const U128& a); U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b); + U128 FPVectorRecipEstimate(size_t esize, const U128& a); U128 FPVectorRSqrtEstimate(size_t esize, const U128& a); U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b); U128 FPVectorSub(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 1f115310..7ec26325 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -452,6 +452,8 @@ OPCODE(FPVectorPairedAddLower32, T::U128, T::U128, OPCODE(FPVectorPairedAddLower64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorPairedAdd32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorPairedAdd64, T::U128, T::U128, T::U128 ) +OPCODE(FPVectorRecipEstimate32, T::U128, T::U128 ) +OPCODE(FPVectorRecipEstimate64, T::U128, T::U128 ) OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 ) OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 ) OPCODE(FPVectorRSqrtStepFused32, T::U128, T::U128, T::U128 )