frontend/ir_emitter: Add half-precision->fixed-point opcodes

This commit is contained in:
Lioncash 2019-04-15 00:20:37 -04:00 committed by MerryMage
parent 4ecfbc14de
commit 604f39f00a
6 changed files with 270 additions and 163 deletions

View file

@ -1222,6 +1222,8 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const size_t fbits = args[1].GetImmediateU8();
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
if constexpr (fsize != 16) {
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && round_imm){
@ -1291,6 +1293,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
return;
}
}
using fbits_list = mp::vllift<std::make_index_sequence<isize + 1>>;
using rounding_list = mp::list<
@ -1345,6 +1348,22 @@ void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, true, 64>(code, ctx, inst);
}
void EmitX64::EmitFPHalfToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<16, false, 32>(code, ctx, inst);
}
void EmitX64::EmitFPHalfToFixedS64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<16, false, 64>(code, ctx, inst);
}
void EmitX64::EmitFPHalfToFixedU32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<16, true, 32>(code, ctx, inst);
}
void EmitX64::EmitFPHalfToFixedU64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<16, true, 64>(code, ctx, inst);
}
void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, false, 32>(code, ctx, inst);
}

View file

@ -1355,6 +1355,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
// TODO: AVX512 implementation
if constexpr (fsize != 16) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -1448,6 +1449,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, src);
return;
}
}
using fbits_list = mp::vllift<std::make_index_sequence<fsize + 1>>;
using rounding_list = mp::list<
@ -1483,6 +1485,10 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding)));
}
void EmitX64::EmitFPVectorToSignedFixed16(EmitContext& ctx, IR::Inst* inst) {
EmitFPVectorToFixed<16, false>(code, ctx, inst);
}
void EmitX64::EmitFPVectorToSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
EmitFPVectorToFixed<32, false>(code, ctx, inst);
}
@ -1491,6 +1497,10 @@ void EmitX64::EmitFPVectorToSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
EmitFPVectorToFixed<64, false>(code, ctx, inst);
}
void EmitX64::EmitFPVectorToUnsignedFixed16(EmitContext& ctx, IR::Inst* inst) {
EmitFPVectorToFixed<16, true>(code, ctx, inst);
}
void EmitX64::EmitFPVectorToUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) {
EmitFPVectorToFixed<32, true>(code, ctx, inst);
}

View file

@ -2046,28 +2046,80 @@ U16 IREmitter::FPSingleToHalf(const U32& a, FP::RoundingMode rounding) {
return Inst<U16>(Opcode::FPSingleToHalf, a, Imm8(static_cast<u8>(rounding)));
}
U32 IREmitter::FPToFixedS32(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
U32 IREmitter::FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 32);
const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedS32 : Opcode::FPDoubleToFixedS32;
return Inst<U32>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
switch (a.GetType()) {
case Type::U16:
return Inst<U32>(Opcode::FPHalfToFixedS32, a, fbits_imm, rounding_imm);
case Type::U32:
return Inst<U32>(Opcode::FPSingleToFixedS32, a, fbits_imm, rounding_imm);
case Type::U64:
return Inst<U32>(Opcode::FPDoubleToFixedS32, a, fbits_imm, rounding_imm);
default:
UNREACHABLE();
return U32{};
}
}
U64 IREmitter::FPToFixedS64(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
U64 IREmitter::FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 64);
const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedS64 : Opcode::FPDoubleToFixedS64;
return Inst<U64>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
switch (a.GetType()) {
case Type::U16:
return Inst<U64>(Opcode::FPHalfToFixedS64, a, fbits_imm, rounding_imm);
case Type::U32:
return Inst<U64>(Opcode::FPSingleToFixedS64, a, fbits_imm, rounding_imm);
case Type::U64:
return Inst<U64>(Opcode::FPDoubleToFixedS64, a, fbits_imm, rounding_imm);
default:
UNREACHABLE();
return U64{};
}
}
U32 IREmitter::FPToFixedU32(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
U32 IREmitter::FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 32);
const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedU32 : Opcode::FPDoubleToFixedU32;
return Inst<U32>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
switch (a.GetType()) {
case Type::U16:
return Inst<U32>(Opcode::FPHalfToFixedU32, a, fbits_imm, rounding_imm);
case Type::U32:
return Inst<U32>(Opcode::FPSingleToFixedU32, a, fbits_imm, rounding_imm);
case Type::U64:
return Inst<U32>(Opcode::FPDoubleToFixedU32, a, fbits_imm, rounding_imm);
default:
UNREACHABLE();
return U32{};
}
}
U64 IREmitter::FPToFixedU64(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
U64 IREmitter::FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 64);
const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedU64 : Opcode::FPDoubleToFixedU64;
return Inst<U64>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
switch (a.GetType()) {
case Type::U16:
return Inst<U64>(Opcode::FPHalfToFixedU64, a, fbits_imm, rounding_imm);
case Type::U32:
return Inst<U64>(Opcode::FPSingleToFixedU64, a, fbits_imm, rounding_imm);
case Type::U64:
return Inst<U64>(Opcode::FPDoubleToFixedU64, a, fbits_imm, rounding_imm);
default:
UNREACHABLE();
return U64{};
}
}
U32 IREmitter::FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
@ -2368,24 +2420,38 @@ U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) {
U128 IREmitter::FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= esize);
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
switch (esize) {
case 16:
return Inst<U128>(Opcode::FPVectorToSignedFixed16, a, fbits_imm, rounding_imm);
case 32:
return Inst<U128>(Opcode::FPVectorToSignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
return Inst<U128>(Opcode::FPVectorToSignedFixed32, a, fbits_imm, rounding_imm);
case 64:
return Inst<U128>(Opcode::FPVectorToSignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
return Inst<U128>(Opcode::FPVectorToSignedFixed64, a, fbits_imm, rounding_imm);
}
UNREACHABLE();
return {};
}
U128 IREmitter::FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= esize);
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
switch (esize) {
case 16:
return Inst<U128>(Opcode::FPVectorToUnsignedFixed16, a, fbits_imm, rounding_imm);
case 32:
return Inst<U128>(Opcode::FPVectorToUnsignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
return Inst<U128>(Opcode::FPVectorToUnsignedFixed32, a, fbits_imm, rounding_imm);
case 64:
return Inst<U128>(Opcode::FPVectorToUnsignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
return Inst<U128>(Opcode::FPVectorToUnsignedFixed64, a, fbits_imm, rounding_imm);
}
UNREACHABLE();
return {};
}

View file

@ -319,10 +319,10 @@ public:
U32 FPHalfToSingle(const U16& a, FP::RoundingMode rounding);
U16 FPSingleToHalf(const U32& a, FP::RoundingMode rounding);
U64 FPSingleToDouble(const U32& a, FP::RoundingMode rounding);
U32 FPToFixedS32(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
U64 FPToFixedS64(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPToFixedU32(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
U64 FPToFixedU64(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
U64 FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
U64 FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPUnsignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
U64 FPSignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding);

View file

@ -303,6 +303,10 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
case Opcode::FPDoubleToFixedS64:
case Opcode::FPDoubleToFixedU32:
case Opcode::FPDoubleToFixedU64:
case Opcode::FPHalfToFixedS32:
case Opcode::FPHalfToFixedS64:
case Opcode::FPHalfToFixedU32:
case Opcode::FPHalfToFixedU64:
case Opcode::FPSingleToFixedS32:
case Opcode::FPSingleToFixedS64:
case Opcode::FPSingleToFixedU32:
@ -356,6 +360,8 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
case Opcode::FPVectorSqrt64:
case Opcode::FPVectorSub32:
case Opcode::FPVectorSub64:
case Opcode::FPVectorToSignedFixed16:
case Opcode::FPVectorToUnsignedFixed16:
return true;
default:

View file

@ -524,6 +524,10 @@ OPCODE(FPDoubleToFixedS32, U32, U64,
OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 )
OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 )
OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 )
OPCODE(FPHalfToFixedS32, U32, U16, U8, U8 )
OPCODE(FPHalfToFixedS64, U64, U16, U8, U8 )
OPCODE(FPHalfToFixedU32, U32, U16, U8, U8 )
OPCODE(FPHalfToFixedU64, U64, U16, U8, U8 )
OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 )
OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 )
OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 )
@ -591,8 +595,10 @@ OPCODE(FPVectorSqrt32, U128, U128
OPCODE(FPVectorSqrt64, U128, U128 )
OPCODE(FPVectorSub32, U128, U128, U128 )
OPCODE(FPVectorSub64, U128, U128, U128 )
OPCODE(FPVectorToSignedFixed16, U128, U128, U8, U8 )
OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 )
OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 )
OPCODE(FPVectorToUnsignedFixed16, U128, U128, U8, U8 )
OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 )
OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 )