frontend/ir_emitter: Add half-precision->fixed-point opcodes
This commit is contained in:
parent
4ecfbc14de
commit
604f39f00a
6 changed files with 270 additions and 163 deletions
|
@ -1222,74 +1222,77 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
const size_t fbits = args[1].GetImmediateU8();
|
const size_t fbits = args[1].GetImmediateU8();
|
||||||
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
|
|
||||||
|
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && round_imm){
|
if constexpr (fsize != 16) {
|
||||||
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
|
||||||
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
|
|
||||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
|
|
||||||
|
|
||||||
if constexpr (fsize == 64) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && round_imm){
|
||||||
if (fbits != 0) {
|
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52);
|
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
|
||||||
code.mulsd(src, code.MConst(xword, scale_factor));
|
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
|
||||||
}
|
|
||||||
|
|
||||||
code.roundsd(src, src, *round_imm);
|
if constexpr (fsize == 64) {
|
||||||
} else {
|
if (fbits != 0) {
|
||||||
if (fbits != 0) {
|
const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52);
|
||||||
const u32 scale_factor = static_cast<u32>((fbits + 127) << 23);
|
code.mulsd(src, code.MConst(xword, scale_factor));
|
||||||
code.mulss(src, code.MConst(xword, scale_factor));
|
}
|
||||||
}
|
|
||||||
|
|
||||||
code.roundss(src, src, *round_imm);
|
code.roundsd(src, src, *round_imm);
|
||||||
code.cvtss2sd(src, src);
|
|
||||||
}
|
|
||||||
|
|
||||||
ZeroIfNaN<64>(code, src, scratch);
|
|
||||||
|
|
||||||
if constexpr (isize == 64) {
|
|
||||||
Xbyak::Label saturate_max, end;
|
|
||||||
|
|
||||||
if (unsigned_) {
|
|
||||||
code.maxsd(src, code.MConst(xword, f64_min_u64));
|
|
||||||
}
|
|
||||||
code.movsd(scratch, code.MConst(xword, unsigned_ ? f64_max_u64_lim : f64_max_s64_lim));
|
|
||||||
code.comisd(scratch, src);
|
|
||||||
code.jna(saturate_max, code.T_NEAR);
|
|
||||||
if (unsigned_) {
|
|
||||||
Xbyak::Label below_max;
|
|
||||||
|
|
||||||
code.movsd(scratch, code.MConst(xword, f64_max_s64_lim));
|
|
||||||
code.comisd(src, scratch);
|
|
||||||
code.jb(below_max);
|
|
||||||
code.subsd(src, scratch);
|
|
||||||
code.cvttsd2si(result, src);
|
|
||||||
code.btc(result, 63);
|
|
||||||
code.jmp(end);
|
|
||||||
code.L(below_max);
|
|
||||||
}
|
|
||||||
code.cvttsd2si(result, src); // 64 bit gpr
|
|
||||||
code.L(end);
|
|
||||||
|
|
||||||
code.SwitchToFarCode();
|
|
||||||
code.L(saturate_max);
|
|
||||||
code.mov(result, unsigned_ ? 0xFFFF'FFFF'FFFF'FFFF : 0x7FFF'FFFF'FFFF'FFFF);
|
|
||||||
code.jmp(end, code.T_NEAR);
|
|
||||||
code.SwitchToNearCode();
|
|
||||||
} else {
|
|
||||||
code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32));
|
|
||||||
if (unsigned_) {
|
|
||||||
code.maxsd(src, code.MConst(xword, f64_min_u32));
|
|
||||||
code.cvttsd2si(result, src); // 64 bit gpr
|
|
||||||
} else {
|
} else {
|
||||||
code.cvttsd2si(result.cvt32(), src);
|
if (fbits != 0) {
|
||||||
|
const u32 scale_factor = static_cast<u32>((fbits + 127) << 23);
|
||||||
|
code.mulss(src, code.MConst(xword, scale_factor));
|
||||||
|
}
|
||||||
|
|
||||||
|
code.roundss(src, src, *round_imm);
|
||||||
|
code.cvtss2sd(src, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ZeroIfNaN<64>(code, src, scratch);
|
||||||
|
|
||||||
|
if constexpr (isize == 64) {
|
||||||
|
Xbyak::Label saturate_max, end;
|
||||||
|
|
||||||
|
if (unsigned_) {
|
||||||
|
code.maxsd(src, code.MConst(xword, f64_min_u64));
|
||||||
|
}
|
||||||
|
code.movsd(scratch, code.MConst(xword, unsigned_ ? f64_max_u64_lim : f64_max_s64_lim));
|
||||||
|
code.comisd(scratch, src);
|
||||||
|
code.jna(saturate_max, code.T_NEAR);
|
||||||
|
if (unsigned_) {
|
||||||
|
Xbyak::Label below_max;
|
||||||
|
|
||||||
|
code.movsd(scratch, code.MConst(xword, f64_max_s64_lim));
|
||||||
|
code.comisd(src, scratch);
|
||||||
|
code.jb(below_max);
|
||||||
|
code.subsd(src, scratch);
|
||||||
|
code.cvttsd2si(result, src);
|
||||||
|
code.btc(result, 63);
|
||||||
|
code.jmp(end);
|
||||||
|
code.L(below_max);
|
||||||
|
}
|
||||||
|
code.cvttsd2si(result, src); // 64 bit gpr
|
||||||
|
code.L(end);
|
||||||
|
|
||||||
|
code.SwitchToFarCode();
|
||||||
|
code.L(saturate_max);
|
||||||
|
code.mov(result, unsigned_ ? 0xFFFF'FFFF'FFFF'FFFF : 0x7FFF'FFFF'FFFF'FFFF);
|
||||||
|
code.jmp(end, code.T_NEAR);
|
||||||
|
code.SwitchToNearCode();
|
||||||
|
} else {
|
||||||
|
code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32));
|
||||||
|
if (unsigned_) {
|
||||||
|
code.maxsd(src, code.MConst(xword, f64_min_u32));
|
||||||
|
code.cvttsd2si(result, src); // 64 bit gpr
|
||||||
|
} else {
|
||||||
|
code.cvttsd2si(result.cvt32(), src);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
using fbits_list = mp::vllift<std::make_index_sequence<isize + 1>>;
|
using fbits_list = mp::vllift<std::make_index_sequence<isize + 1>>;
|
||||||
|
@ -1345,6 +1348,22 @@ void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitFPToFixed<64, true, 64>(code, ctx, inst);
|
EmitFPToFixed<64, true, 64>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPHalfToFixedS32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<16, false, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPHalfToFixedS64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<16, false, 64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPHalfToFixedU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<16, true, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPHalfToFixedU64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixed<16, true, 64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitFPToFixed<32, false, 32>(code, ctx, inst);
|
EmitFPToFixed<32, false, 32>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1355,98 +1355,100 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
// TODO: AVX512 implementation
|
// TODO: AVX512 implementation
|
||||||
|
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) {
|
if constexpr (fsize != 16) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
const int round_imm = [&]{
|
const int round_imm = [&]{
|
||||||
switch (rounding) {
|
switch (rounding) {
|
||||||
case FP::RoundingMode::ToNearest_TieEven:
|
case FP::RoundingMode::ToNearest_TieEven:
|
||||||
default:
|
default:
|
||||||
return 0b00;
|
return 0b00;
|
||||||
case FP::RoundingMode::TowardsPlusInfinity:
|
case FP::RoundingMode::TowardsPlusInfinity:
|
||||||
return 0b10;
|
return 0b10;
|
||||||
case FP::RoundingMode::TowardsMinusInfinity:
|
case FP::RoundingMode::TowardsMinusInfinity:
|
||||||
return 0b01;
|
return 0b01;
|
||||||
case FP::RoundingMode::TowardsZero:
|
case FP::RoundingMode::TowardsZero:
|
||||||
return 0b11;
|
return 0b11;
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
|
const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) {
|
||||||
|
// MSVC doesn't allow us to use a [&] capture, so we have to do this instead.
|
||||||
|
(void)ctx;
|
||||||
|
|
||||||
|
if constexpr (fsize == 32) {
|
||||||
|
code.cvttps2dq(src, src);
|
||||||
|
} else {
|
||||||
|
const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr();
|
||||||
|
const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
code.cvttsd2si(lo, src);
|
||||||
|
code.punpckhqdq(src, src);
|
||||||
|
code.cvttsd2si(hi, src);
|
||||||
|
code.movq(src, lo);
|
||||||
|
code.pinsrq(src, hi, 1);
|
||||||
|
|
||||||
|
ctx.reg_alloc.Release(hi);
|
||||||
|
ctx.reg_alloc.Release(lo);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
const u64 scale_factor = fsize == 32
|
||||||
|
? static_cast<u64>(fbits + 127) << 23
|
||||||
|
: static_cast<u64>(fbits + 1023) << 52;
|
||||||
|
FCODE(mulp)(src, GetVectorOf<fsize>(code, scale_factor));
|
||||||
}
|
}
|
||||||
}();
|
|
||||||
|
|
||||||
const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) {
|
FCODE(roundp)(src, src, static_cast<u8>(round_imm));
|
||||||
// MSVC doesn't allow us to use a [&] capture, so we have to do this instead.
|
ZeroIfNaN<fsize>(code, src);
|
||||||
(void)ctx;
|
|
||||||
|
|
||||||
if constexpr (fsize == 32) {
|
constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000;
|
||||||
code.cvttps2dq(src, src);
|
[[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000;
|
||||||
|
|
||||||
|
if constexpr (unsigned_) {
|
||||||
|
// Zero is minimum
|
||||||
|
code.xorps(xmm0, xmm0);
|
||||||
|
FCODE(cmplep)(xmm0, src);
|
||||||
|
FCODE(andp)(src, xmm0);
|
||||||
|
|
||||||
|
// Will we exceed unsigned range?
|
||||||
|
const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm();
|
||||||
|
code.movaps(exceed_unsigned, GetVectorOf<fsize, float_upper_limit_unsigned>(code));
|
||||||
|
FCODE(cmplep)(exceed_unsigned, src);
|
||||||
|
|
||||||
|
// Will be exceed signed range?
|
||||||
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
code.movaps(tmp, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
||||||
|
code.movaps(xmm0, tmp);
|
||||||
|
FCODE(cmplep)(xmm0, src);
|
||||||
|
FCODE(andp)(tmp, xmm0);
|
||||||
|
FCODE(subp)(src, tmp);
|
||||||
|
perform_conversion(src);
|
||||||
|
if constexpr (fsize == 32) {
|
||||||
|
code.pslld(xmm0, 31);
|
||||||
|
} else {
|
||||||
|
code.psllq(xmm0, 63);
|
||||||
|
}
|
||||||
|
FCODE(orp)(src, xmm0);
|
||||||
|
|
||||||
|
// Saturate to max
|
||||||
|
FCODE(orp)(src, exceed_unsigned);
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr();
|
constexpr u64 integer_max = static_cast<FPT>(std::numeric_limits<std::conditional_t<unsigned_, FPT, std::make_signed_t<FPT>>>::max());
|
||||||
const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr();
|
|
||||||
|
|
||||||
code.cvttsd2si(lo, src);
|
code.movaps(xmm0, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
||||||
code.punpckhqdq(src, src);
|
FCODE(cmplep)(xmm0, src);
|
||||||
code.cvttsd2si(hi, src);
|
perform_conversion(src);
|
||||||
code.movq(src, lo);
|
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
|
||||||
code.pinsrq(src, hi, 1);
|
|
||||||
|
|
||||||
ctx.reg_alloc.Release(hi);
|
|
||||||
ctx.reg_alloc.Release(lo);
|
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
if (fbits != 0) {
|
ctx.reg_alloc.DefineValue(inst, src);
|
||||||
const u64 scale_factor = fsize == 32
|
return;
|
||||||
? static_cast<u64>(fbits + 127) << 23
|
|
||||||
: static_cast<u64>(fbits + 1023) << 52;
|
|
||||||
FCODE(mulp)(src, GetVectorOf<fsize>(code, scale_factor));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
FCODE(roundp)(src, src, static_cast<u8>(round_imm));
|
|
||||||
ZeroIfNaN<fsize>(code, src);
|
|
||||||
|
|
||||||
constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000;
|
|
||||||
[[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000;
|
|
||||||
|
|
||||||
if constexpr (unsigned_) {
|
|
||||||
// Zero is minimum
|
|
||||||
code.xorps(xmm0, xmm0);
|
|
||||||
FCODE(cmplep)(xmm0, src);
|
|
||||||
FCODE(andp)(src, xmm0);
|
|
||||||
|
|
||||||
// Will we exceed unsigned range?
|
|
||||||
const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm();
|
|
||||||
code.movaps(exceed_unsigned, GetVectorOf<fsize, float_upper_limit_unsigned>(code));
|
|
||||||
FCODE(cmplep)(exceed_unsigned, src);
|
|
||||||
|
|
||||||
// Will be exceed signed range?
|
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
|
||||||
code.movaps(tmp, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
|
||||||
code.movaps(xmm0, tmp);
|
|
||||||
FCODE(cmplep)(xmm0, src);
|
|
||||||
FCODE(andp)(tmp, xmm0);
|
|
||||||
FCODE(subp)(src, tmp);
|
|
||||||
perform_conversion(src);
|
|
||||||
if constexpr (fsize == 32) {
|
|
||||||
code.pslld(xmm0, 31);
|
|
||||||
} else {
|
|
||||||
code.psllq(xmm0, 63);
|
|
||||||
}
|
|
||||||
FCODE(orp)(src, xmm0);
|
|
||||||
|
|
||||||
// Saturate to max
|
|
||||||
FCODE(orp)(src, exceed_unsigned);
|
|
||||||
} else {
|
|
||||||
constexpr u64 integer_max = static_cast<FPT>(std::numeric_limits<std::conditional_t<unsigned_, FPT, std::make_signed_t<FPT>>>::max());
|
|
||||||
|
|
||||||
code.movaps(xmm0, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
|
||||||
FCODE(cmplep)(xmm0, src);
|
|
||||||
perform_conversion(src);
|
|
||||||
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, src);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
using fbits_list = mp::vllift<std::make_index_sequence<fsize + 1>>;
|
using fbits_list = mp::vllift<std::make_index_sequence<fsize + 1>>;
|
||||||
|
@ -1483,6 +1485,10 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding)));
|
EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorToSignedFixed16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPVectorToFixed<16, false>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorToSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorToSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitFPVectorToFixed<32, false>(code, ctx, inst);
|
EmitFPVectorToFixed<32, false>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
@ -1491,6 +1497,10 @@ void EmitX64::EmitFPVectorToSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitFPVectorToFixed<64, false>(code, ctx, inst);
|
EmitFPVectorToFixed<64, false>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorToUnsignedFixed16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPVectorToFixed<16, true>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorToUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorToUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitFPVectorToFixed<32, true>(code, ctx, inst);
|
EmitFPVectorToFixed<32, true>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2046,28 +2046,80 @@ U16 IREmitter::FPSingleToHalf(const U32& a, FP::RoundingMode rounding) {
|
||||||
return Inst<U16>(Opcode::FPSingleToHalf, a, Imm8(static_cast<u8>(rounding)));
|
return Inst<U16>(Opcode::FPSingleToHalf, a, Imm8(static_cast<u8>(rounding)));
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::FPToFixedS32(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
|
U32 IREmitter::FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
ASSERT(fbits <= 32);
|
ASSERT(fbits <= 32);
|
||||||
const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedS32 : Opcode::FPDoubleToFixedS32;
|
|
||||||
return Inst<U32>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
|
||||||
|
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
|
||||||
|
|
||||||
|
switch (a.GetType()) {
|
||||||
|
case Type::U16:
|
||||||
|
return Inst<U32>(Opcode::FPHalfToFixedS32, a, fbits_imm, rounding_imm);
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(Opcode::FPSingleToFixedS32, a, fbits_imm, rounding_imm);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U32>(Opcode::FPDoubleToFixedS32, a, fbits_imm, rounding_imm);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return U32{};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U64 IREmitter::FPToFixedS64(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
|
U64 IREmitter::FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
ASSERT(fbits <= 64);
|
ASSERT(fbits <= 64);
|
||||||
const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedS64 : Opcode::FPDoubleToFixedS64;
|
|
||||||
return Inst<U64>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
|
||||||
|
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
|
||||||
|
|
||||||
|
switch (a.GetType()) {
|
||||||
|
case Type::U16:
|
||||||
|
return Inst<U64>(Opcode::FPHalfToFixedS64, a, fbits_imm, rounding_imm);
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U64>(Opcode::FPSingleToFixedS64, a, fbits_imm, rounding_imm);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::FPDoubleToFixedS64, a, fbits_imm, rounding_imm);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return U64{};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::FPToFixedU32(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
|
U32 IREmitter::FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
ASSERT(fbits <= 32);
|
ASSERT(fbits <= 32);
|
||||||
const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedU32 : Opcode::FPDoubleToFixedU32;
|
|
||||||
return Inst<U32>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
|
||||||
|
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
|
||||||
|
|
||||||
|
switch (a.GetType()) {
|
||||||
|
case Type::U16:
|
||||||
|
return Inst<U32>(Opcode::FPHalfToFixedU32, a, fbits_imm, rounding_imm);
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(Opcode::FPSingleToFixedU32, a, fbits_imm, rounding_imm);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U32>(Opcode::FPDoubleToFixedU32, a, fbits_imm, rounding_imm);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return U32{};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U64 IREmitter::FPToFixedU64(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
|
U64 IREmitter::FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
ASSERT(fbits <= 64);
|
ASSERT(fbits <= 64);
|
||||||
const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedU64 : Opcode::FPDoubleToFixedU64;
|
|
||||||
return Inst<U64>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
|
||||||
|
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
|
||||||
|
|
||||||
|
switch (a.GetType()) {
|
||||||
|
case Type::U16:
|
||||||
|
return Inst<U64>(Opcode::FPHalfToFixedU64, a, fbits_imm, rounding_imm);
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U64>(Opcode::FPSingleToFixedU64, a, fbits_imm, rounding_imm);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::FPDoubleToFixedU64, a, fbits_imm, rounding_imm);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return U64{};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
|
U32 IREmitter::FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
|
@ -2368,24 +2420,38 @@ U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) {
|
||||||
|
|
||||||
U128 IREmitter::FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
U128 IREmitter::FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
ASSERT(fbits <= esize);
|
ASSERT(fbits <= esize);
|
||||||
|
|
||||||
|
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
|
||||||
|
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
|
||||||
|
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::FPVectorToSignedFixed16, a, fbits_imm, rounding_imm);
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorToSignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
return Inst<U128>(Opcode::FPVectorToSignedFixed32, a, fbits_imm, rounding_imm);
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorToSignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
return Inst<U128>(Opcode::FPVectorToSignedFixed64, a, fbits_imm, rounding_imm);
|
||||||
}
|
}
|
||||||
|
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
U128 IREmitter::FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
ASSERT(fbits <= esize);
|
ASSERT(fbits <= esize);
|
||||||
|
|
||||||
|
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
|
||||||
|
const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
|
||||||
|
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::FPVectorToUnsignedFixed16, a, fbits_imm, rounding_imm);
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorToUnsignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
return Inst<U128>(Opcode::FPVectorToUnsignedFixed32, a, fbits_imm, rounding_imm);
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorToUnsignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
return Inst<U128>(Opcode::FPVectorToUnsignedFixed64, a, fbits_imm, rounding_imm);
|
||||||
}
|
}
|
||||||
|
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
|
@ -319,10 +319,10 @@ public:
|
||||||
U32 FPHalfToSingle(const U16& a, FP::RoundingMode rounding);
|
U32 FPHalfToSingle(const U16& a, FP::RoundingMode rounding);
|
||||||
U16 FPSingleToHalf(const U32& a, FP::RoundingMode rounding);
|
U16 FPSingleToHalf(const U32& a, FP::RoundingMode rounding);
|
||||||
U64 FPSingleToDouble(const U32& a, FP::RoundingMode rounding);
|
U64 FPSingleToDouble(const U32& a, FP::RoundingMode rounding);
|
||||||
U32 FPToFixedS32(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
U32 FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U64 FPToFixedS64(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
U64 FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U32 FPToFixedU32(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
U32 FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U64 FPToFixedU64(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
U64 FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U32 FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
U32 FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U32 FPUnsignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
U32 FPUnsignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U64 FPSignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
U64 FPSignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
|
|
|
@ -303,6 +303,10 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
|
||||||
case Opcode::FPDoubleToFixedS64:
|
case Opcode::FPDoubleToFixedS64:
|
||||||
case Opcode::FPDoubleToFixedU32:
|
case Opcode::FPDoubleToFixedU32:
|
||||||
case Opcode::FPDoubleToFixedU64:
|
case Opcode::FPDoubleToFixedU64:
|
||||||
|
case Opcode::FPHalfToFixedS32:
|
||||||
|
case Opcode::FPHalfToFixedS64:
|
||||||
|
case Opcode::FPHalfToFixedU32:
|
||||||
|
case Opcode::FPHalfToFixedU64:
|
||||||
case Opcode::FPSingleToFixedS32:
|
case Opcode::FPSingleToFixedS32:
|
||||||
case Opcode::FPSingleToFixedS64:
|
case Opcode::FPSingleToFixedS64:
|
||||||
case Opcode::FPSingleToFixedU32:
|
case Opcode::FPSingleToFixedU32:
|
||||||
|
@ -356,6 +360,8 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
|
||||||
case Opcode::FPVectorSqrt64:
|
case Opcode::FPVectorSqrt64:
|
||||||
case Opcode::FPVectorSub32:
|
case Opcode::FPVectorSub32:
|
||||||
case Opcode::FPVectorSub64:
|
case Opcode::FPVectorSub64:
|
||||||
|
case Opcode::FPVectorToSignedFixed16:
|
||||||
|
case Opcode::FPVectorToUnsignedFixed16:
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -524,6 +524,10 @@ OPCODE(FPDoubleToFixedS32, U32, U64,
|
||||||
OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 )
|
OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 )
|
||||||
OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 )
|
OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 )
|
||||||
OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 )
|
OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 )
|
||||||
|
OPCODE(FPHalfToFixedS32, U32, U16, U8, U8 )
|
||||||
|
OPCODE(FPHalfToFixedS64, U64, U16, U8, U8 )
|
||||||
|
OPCODE(FPHalfToFixedU32, U32, U16, U8, U8 )
|
||||||
|
OPCODE(FPHalfToFixedU64, U64, U16, U8, U8 )
|
||||||
OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 )
|
OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 )
|
||||||
OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 )
|
OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 )
|
||||||
OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 )
|
OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 )
|
||||||
|
@ -591,8 +595,10 @@ OPCODE(FPVectorSqrt32, U128, U128
|
||||||
OPCODE(FPVectorSqrt64, U128, U128 )
|
OPCODE(FPVectorSqrt64, U128, U128 )
|
||||||
OPCODE(FPVectorSub32, U128, U128, U128 )
|
OPCODE(FPVectorSub32, U128, U128, U128 )
|
||||||
OPCODE(FPVectorSub64, U128, U128, U128 )
|
OPCODE(FPVectorSub64, U128, U128, U128 )
|
||||||
|
OPCODE(FPVectorToSignedFixed16, U128, U128, U8, U8 )
|
||||||
OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 )
|
OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 )
|
||||||
OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 )
|
OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 )
|
||||||
|
OPCODE(FPVectorToUnsignedFixed16, U128, U128, U8, U8 )
|
||||||
OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 )
|
OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 )
|
||||||
OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 )
|
OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 )
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue