emit_x64_floating_point: SSE4.1 implementation of EmitFPRound
This commit is contained in:
parent
a40127a054
commit
83be491875
1 changed files with 32 additions and 3 deletions
|
@ -794,10 +794,38 @@ void EmitX64::EmitFPMulAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize) {
|
static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||||
|
const bool exact = inst->GetArg(2).GetU1();
|
||||||
|
|
||||||
const auto rounding = static_cast<FP::RoundingMode>(args[1].GetImmediateU8());
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero && !exact) {
|
||||||
const bool exact = args[2].GetImmediateU1();
|
const int round_imm = [&]{
|
||||||
|
switch (rounding) {
|
||||||
|
case FP::RoundingMode::ToNearest_TieEven:
|
||||||
|
return 0b00;
|
||||||
|
case FP::RoundingMode::TowardsPlusInfinity:
|
||||||
|
return 0b10;
|
||||||
|
case FP::RoundingMode::TowardsMinusInfinity:
|
||||||
|
return 0b01;
|
||||||
|
case FP::RoundingMode::TowardsZero:
|
||||||
|
return 0b11;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}();
|
||||||
|
|
||||||
|
if (fsize == 64) {
|
||||||
|
FPTwoOp64(code, ctx, inst, [&](Xbyak::Xmm result) {
|
||||||
|
code.roundsd(result, result, round_imm);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
FPTwoOp32(code, ctx, inst, [&](Xbyak::Xmm result) {
|
||||||
|
code.roundss(result, result, round_imm);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
|
using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
|
||||||
using rounding_list = mp::list<
|
using rounding_list = mp::list<
|
||||||
|
@ -832,6 +860,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
|
||||||
mp::cartesian_product<fsize_list, rounding_list, exact_list>{}
|
mp::cartesian_product<fsize_list, rounding_list, exact_list>{}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||||
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR());
|
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR());
|
||||||
|
|
Loading…
Reference in a new issue