IR: Implement FPVectorTo{Signed,Unsigned}Fixed
This commit is contained in:
parent
8f75a1fe04
commit
507bcd8b8b
4 changed files with 99 additions and 7 deletions
|
@ -18,8 +18,13 @@
|
|||
#include "common/fp/info.h"
|
||||
#include "common/fp/op.h"
|
||||
#include "common/fp/util.h"
|
||||
#include "common/mp/cartesian_product.h"
|
||||
#include "common/mp/function_info.h"
|
||||
#include "common/mp/integer.h"
|
||||
#include "common/mp/list.h"
|
||||
#include "common/mp/lut.h"
|
||||
#include "common/mp/to_tuple.h"
|
||||
#include "common/mp/vllift.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
|
||||
|
@ -815,6 +820,63 @@ void EmitX64::EmitFPVectorSub64(EmitContext& ctx, IR::Inst* inst) {
|
|||
EmitThreeOpVectorOperation<64, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::subpd);
|
||||
}
|
||||
|
||||
template<size_t fsize, bool unsigned_>
|
||||
void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||
|
||||
const size_t fbits = inst->GetArg(1).GetU8();
|
||||
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(2).GetU8());
|
||||
|
||||
using fbits_list = mp::vllift<std::make_index_sequence<fsize>>;
|
||||
using rounding_list = mp::list<
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
|
||||
>;
|
||||
|
||||
using key_type = std::tuple<size_t, FP::RoundingMode>;
|
||||
using value_type = void(*)(VectorArray<FPT>&, const VectorArray<FPT>&, FP::FPCR, FP::FPSR&);
|
||||
|
||||
static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
|
||||
[](auto arg) {
|
||||
return std::pair<key_type, value_type>{
|
||||
mp::to_tuple<decltype(arg)>,
|
||||
static_cast<value_type>(
|
||||
[](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||
constexpr size_t fbits = std::get<0>(mp::to_tuple<decltype(arg)>);
|
||||
constexpr FP::RoundingMode rounding_mode = std::get<1>(mp::to_tuple<decltype(arg)>);
|
||||
|
||||
for (size_t i = 0; i < output.size(); ++i) {
|
||||
output[i] = static_cast<FPT>(FP::FPToFixed<FPT>(fsize, input[i], fbits, unsigned_, fpcr, rounding_mode, fpsr));
|
||||
}
|
||||
}
|
||||
)
|
||||
};
|
||||
},
|
||||
mp::cartesian_product<fbits_list, rounding_list>{}
|
||||
);
|
||||
|
||||
EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding)));
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorToSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFPVectorToFixed<32, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorToSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFPVectorToFixed<64, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorToUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFPVectorToFixed<32, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorToUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFPVectorToFixed<64, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
|
|
@ -1822,6 +1822,14 @@ U128 IREmitter::FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128&
|
|||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorS32ToSingle(const U128& a) {
|
||||
return Inst<U128>(Opcode::FPVectorS32ToSingle, a);
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorS64ToDouble(const U128& a) {
|
||||
return Inst<U128>(Opcode::FPVectorS64ToDouble, a);
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 32:
|
||||
|
@ -1833,12 +1841,28 @@ U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) {
|
|||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorS32ToSingle(const U128& a) {
|
||||
return Inst<U128>(Opcode::FPVectorS32ToSingle, a);
|
||||
U128 IREmitter::FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
||||
ASSERT(fbits <= esize);
|
||||
switch (esize) {
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::FPVectorToSignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::FPVectorToSignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorS64ToDouble(const U128& a) {
|
||||
return Inst<U128>(Opcode::FPVectorS64ToDouble, a);
|
||||
U128 IREmitter::FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
||||
ASSERT(fbits <= esize);
|
||||
switch (esize) {
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::FPVectorToUnsignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::FPVectorToUnsignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorU32ToSingle(const U128& a) {
|
||||
|
|
|
@ -312,9 +312,11 @@ public:
|
|||
U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
|
||||
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorS32ToSingle(const U128& a);
|
||||
U128 FPVectorS64ToDouble(const U128& a);
|
||||
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
||||
U128 FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
||||
U128 FPVectorU32ToSingle(const U128& a);
|
||||
U128 FPVectorU64ToDouble(const U128& a);
|
||||
|
||||
|
|
|
@ -454,10 +454,10 @@ OPCODE(FPVectorMulAdd64, T::U128, T::U128,
|
|||
OPCODE(FPVectorNeg16, T::U128, T::U128 )
|
||||
OPCODE(FPVectorNeg32, T::U128, T::U128 )
|
||||
OPCODE(FPVectorNeg64, T::U128, T::U128 )
|
||||
OPCODE(FPVectorPairedAddLower32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorPairedAddLower64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorPairedAdd32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorPairedAdd64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorPairedAddLower32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorPairedAddLower64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorRecipEstimate32, T::U128, T::U128 )
|
||||
OPCODE(FPVectorRecipEstimate64, T::U128, T::U128 )
|
||||
OPCODE(FPVectorRecipStepFused32, T::U128, T::U128, T::U128 )
|
||||
|
@ -470,6 +470,10 @@ OPCODE(FPVectorS32ToSingle, T::U128, T::U128
|
|||
OPCODE(FPVectorS64ToDouble, T::U128, T::U128 )
|
||||
OPCODE(FPVectorSub32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorSub64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorToSignedFixed32, T::U128, T::U128, T::U8, T::U8 )
|
||||
OPCODE(FPVectorToSignedFixed64, T::U128, T::U128, T::U8, T::U8 )
|
||||
OPCODE(FPVectorToUnsignedFixed32, T::U128, T::U128, T::U8, T::U8 )
|
||||
OPCODE(FPVectorToUnsignedFixed64, T::U128, T::U128, T::U8, T::U8 )
|
||||
OPCODE(FPVectorU32ToSingle, T::U128, T::U128 )
|
||||
OPCODE(FPVectorU64ToDouble, T::U128, T::U128 )
|
||||
|
||||
|
|
Loading…
Reference in a new issue