IR: Initial implementation of FPVectorRoundInt
This commit is contained in:
parent
f2393488fe
commit
f976c47008
4 changed files with 64 additions and 0 deletions
|
@ -24,6 +24,7 @@
|
||||||
#include "common/mp/list.h"
|
#include "common/mp/list.h"
|
||||||
#include "common/mp/lut.h"
|
#include "common/mp/lut.h"
|
||||||
#include "common/mp/to_tuple.h"
|
#include "common/mp/to_tuple.h"
|
||||||
|
#include "common/mp/vlift.h"
|
||||||
#include "common/mp/vllift.h"
|
#include "common/mp/vllift.h"
|
||||||
#include "frontend/ir/basic_block.h"
|
#include "frontend/ir/basic_block.h"
|
||||||
#include "frontend/ir/microinstruction.h"
|
#include "frontend/ir/microinstruction.h"
|
||||||
|
@ -728,6 +729,55 @@ void EmitX64::EmitFPVectorRecipStepFused64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitRecipStepFused<u64>(code, ctx, inst);
|
EmitRecipStepFused<u64>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<size_t fsize>
|
||||||
|
void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
|
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||||
|
const bool exact = inst->GetArg(2).GetU1();
|
||||||
|
|
||||||
|
using rounding_list = mp::list<
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
|
||||||
|
>;
|
||||||
|
using exact_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
|
||||||
|
|
||||||
|
using key_type = std::tuple<FP::RoundingMode, bool>;
|
||||||
|
using value_type = void(*)(VectorArray<FPT>&, const VectorArray<FPT>&, FP::FPCR, FP::FPSR&);
|
||||||
|
|
||||||
|
static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
|
||||||
|
[](auto arg) {
|
||||||
|
return std::pair<key_type, value_type>{
|
||||||
|
mp::to_tuple<decltype(arg)>,
|
||||||
|
static_cast<value_type>(
|
||||||
|
[](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||||
|
constexpr FP::RoundingMode rounding_mode = std::get<0>(mp::to_tuple<decltype(arg)>);
|
||||||
|
constexpr bool exact = std::get<1>(mp::to_tuple<decltype(arg)>);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < output.size(); ++i) {
|
||||||
|
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
};
|
||||||
|
},
|
||||||
|
mp::cartesian_product<rounding_list, exact_list>{}
|
||||||
|
);
|
||||||
|
|
||||||
|
EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorRoundInt32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPVectorRoundInt<32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorRoundInt64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPVectorRoundInt<64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||||
|
|
|
@ -1932,6 +1932,17 @@ U128 IREmitter::FPVectorRecipStepFused(size_t esize, const U128& a, const U128&
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact) {
|
||||||
|
switch (esize) {
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::FPVectorRoundInt32, operand, Imm8(static_cast<u8>(rounding)), Imm1(exact));
|
||||||
|
case 64:
|
||||||
|
return Inst<U128>(Opcode::FPVectorRoundInt64, operand, Imm8(static_cast<u8>(rounding)), Imm1(exact));
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) {
|
U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 32:
|
case 32:
|
||||||
|
|
|
@ -320,6 +320,7 @@ public:
|
||||||
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorRecipEstimate(size_t esize, const U128& a);
|
U128 FPVectorRecipEstimate(size_t esize, const U128& a);
|
||||||
U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact);
|
||||||
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
|
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
|
||||||
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorS32ToSingle(const U128& a);
|
U128 FPVectorS32ToSingle(const U128& a);
|
||||||
|
|
|
@ -491,6 +491,8 @@ OPCODE(FPVectorRecipEstimate32, T::U128, T::U128
|
||||||
OPCODE(FPVectorRecipEstimate64, T::U128, T::U128 )
|
OPCODE(FPVectorRecipEstimate64, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorRecipStepFused32, T::U128, T::U128, T::U128 )
|
OPCODE(FPVectorRecipStepFused32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorRecipStepFused64, T::U128, T::U128, T::U128 )
|
OPCODE(FPVectorRecipStepFused64, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(FPVectorRoundInt32, T::U128, T::U128, T::U8, T::U1 )
|
||||||
|
OPCODE(FPVectorRoundInt64, T::U128, T::U128, T::U8, T::U1 )
|
||||||
OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 )
|
OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 )
|
OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorRSqrtStepFused32, T::U128, T::U128, T::U128 )
|
OPCODE(FPVectorRSqrtStepFused32, T::U128, T::U128, T::U128 )
|
||||||
|
|
Loading…
Reference in a new issue