IR: Add fbits argument to FPVectorFrom{Signed,Unsigned}Fixed
This commit is contained in:
parent
027b0ef725
commit
02150bc0b7
6 changed files with 223 additions and 182 deletions
|
@ -123,6 +123,15 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, std::array<Xbyak::Xmm, narg
|
||||||
code.SwitchToNearCode();
|
code.SwitchToNearCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<size_t fsize>
|
||||||
|
Xbyak::Address GetVectorOf(BlockOfCode& code, u64 value) {
|
||||||
|
if constexpr (fsize == 32) {
|
||||||
|
return code.MConst(xword, (value << 32) | value, (value << 32) | value);
|
||||||
|
} else {
|
||||||
|
return code.MConst(xword, value, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<size_t fsize, u64 value>
|
template<size_t fsize, u64 value>
|
||||||
Xbyak::Address GetVectorOf(BlockOfCode& code) {
|
Xbyak::Address GetVectorOf(BlockOfCode& code) {
|
||||||
if constexpr (fsize == 32) {
|
if constexpr (fsize == 32) {
|
||||||
|
@ -537,6 +546,181 @@ void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const int fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
code.cvtdq2ps(xmm, xmm);
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23));
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const int fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ)) {
|
||||||
|
code.vcvtqq2pd(xmm, xmm);
|
||||||
|
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
// First quadword
|
||||||
|
code.movq(tmp, xmm);
|
||||||
|
code.cvtsi2sd(xmm, tmp);
|
||||||
|
|
||||||
|
// Second quadword
|
||||||
|
code.pextrq(tmp, xmm, 1);
|
||||||
|
code.cvtsi2sd(xmm_tmp, tmp);
|
||||||
|
|
||||||
|
// Combine
|
||||||
|
code.unpcklpd(xmm, xmm_tmp);
|
||||||
|
} else {
|
||||||
|
const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
// First quadword
|
||||||
|
code.movhlps(high_xmm, xmm);
|
||||||
|
code.movq(tmp, xmm);
|
||||||
|
code.cvtsi2sd(xmm, tmp);
|
||||||
|
|
||||||
|
// Second quadword
|
||||||
|
code.movq(tmp, high_xmm);
|
||||||
|
code.cvtsi2sd(xmm_tmp, tmp);
|
||||||
|
|
||||||
|
// Combine
|
||||||
|
code.unpcklpd(xmm, xmm_tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.mulpd(xmm, GetVectorOf<64>(code, static_cast<u64>(1023 - fbits) << 52));
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const int fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) {
|
||||||
|
code.vcvtudq2ps(xmm, xmm);
|
||||||
|
} else {
|
||||||
|
const Xbyak::Address mem_4B000000 = code.MConst(xword, 0x4B0000004B000000, 0x4B0000004B000000);
|
||||||
|
const Xbyak::Address mem_53000000 = code.MConst(xword, 0x5300000053000000, 0x5300000053000000);
|
||||||
|
const Xbyak::Address mem_D3000080 = code.MConst(xword, 0xD3000080D3000080, 0xD3000080D3000080);
|
||||||
|
|
||||||
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
|
||||||
|
code.vpblendw(tmp, xmm, mem_4B000000, 0b10101010);
|
||||||
|
code.vpsrld(xmm, xmm, 16);
|
||||||
|
code.vpblendw(xmm, xmm, mem_53000000, 0b10101010);
|
||||||
|
code.vaddps(xmm, xmm, mem_D3000080);
|
||||||
|
code.vaddps(xmm, tmp, xmm);
|
||||||
|
} else {
|
||||||
|
const Xbyak::Address mem_0xFFFF = code.MConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF);
|
||||||
|
|
||||||
|
code.movdqa(tmp, mem_0xFFFF);
|
||||||
|
|
||||||
|
code.pand(tmp, xmm);
|
||||||
|
code.por(tmp, mem_4B000000);
|
||||||
|
code.psrld(xmm, 16);
|
||||||
|
code.por(xmm, mem_53000000);
|
||||||
|
code.addps(xmm, mem_D3000080);
|
||||||
|
code.addps(xmm, tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx.FPSCR_RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
||||||
|
code.pand(xmm, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF));
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const int fbits = args[1].GetImmediateU8();
|
||||||
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
|
ASSERT(rounding_mode == ctx.FPSCR_RMode());
|
||||||
|
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) {
|
||||||
|
code.vcvtuqq2pd(xmm, xmm);
|
||||||
|
} else {
|
||||||
|
const Xbyak::Address unpack = code.MConst(xword, 0x4530000043300000, 0);
|
||||||
|
const Xbyak::Address subtrahend = code.MConst(xword, 0x4330000000000000, 0x4530000000000000);
|
||||||
|
|
||||||
|
const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
|
||||||
|
code.vmovapd(unpack_reg, unpack);
|
||||||
|
code.vmovapd(subtrahend_reg, subtrahend);
|
||||||
|
|
||||||
|
code.vunpcklps(tmp1, xmm, unpack_reg);
|
||||||
|
code.vsubpd(tmp1, tmp1, subtrahend_reg);
|
||||||
|
|
||||||
|
code.vpermilps(xmm, xmm, 0b01001110);
|
||||||
|
|
||||||
|
code.vunpcklps(xmm, xmm, unpack_reg);
|
||||||
|
code.vsubpd(xmm, xmm, subtrahend_reg);
|
||||||
|
|
||||||
|
code.vhaddpd(xmm, tmp1, xmm);
|
||||||
|
} else {
|
||||||
|
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
code.movapd(unpack_reg, unpack);
|
||||||
|
code.movapd(subtrahend_reg, subtrahend);
|
||||||
|
|
||||||
|
code.pshufd(tmp1, xmm, 0b01001110);
|
||||||
|
|
||||||
|
code.punpckldq(xmm, unpack_reg);
|
||||||
|
code.subpd(xmm, subtrahend_reg);
|
||||||
|
code.pshufd(tmp2, xmm, 0b01001110);
|
||||||
|
code.addpd(xmm, tmp2);
|
||||||
|
|
||||||
|
code.punpckldq(tmp1, unpack_reg);
|
||||||
|
code.subpd(tmp1, subtrahend_reg);
|
||||||
|
|
||||||
|
code.pshufd(unpack_reg, tmp1, 0b01001110);
|
||||||
|
code.addpd(unpack_reg, tmp1);
|
||||||
|
|
||||||
|
code.unpcklpd(xmm, unpack_reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fbits != 0) {
|
||||||
|
code.mulpd(xmm, GetVectorOf<64>(code, static_cast<u64>(1023 - fbits) << 52));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx.FPSCR_RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
||||||
|
code.pand(xmm, code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF));
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
@ -1042,56 +1226,6 @@ void EmitX64::EmitFPVectorRSqrtStepFused64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitRSqrtStepFused<64>(code, ctx, inst);
|
EmitRSqrtStepFused<64>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
|
||||||
|
|
||||||
code.cvtdq2ps(xmm, xmm);
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
|
||||||
|
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ)) {
|
|
||||||
code.vcvtqq2pd(xmm, xmm);
|
|
||||||
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
|
||||||
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
|
||||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
|
||||||
|
|
||||||
// First quadword
|
|
||||||
code.movq(tmp, xmm);
|
|
||||||
code.cvtsi2sd(xmm, tmp);
|
|
||||||
|
|
||||||
// Second quadword
|
|
||||||
code.pextrq(tmp, xmm, 1);
|
|
||||||
code.cvtsi2sd(xmm_tmp, tmp);
|
|
||||||
|
|
||||||
// Combine
|
|
||||||
code.unpcklpd(xmm, xmm_tmp);
|
|
||||||
} else {
|
|
||||||
const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm();
|
|
||||||
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
|
||||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
|
||||||
|
|
||||||
// First quadword
|
|
||||||
code.movhlps(high_xmm, xmm);
|
|
||||||
code.movq(tmp, xmm);
|
|
||||||
code.cvtsi2sd(xmm, tmp);
|
|
||||||
|
|
||||||
// Second quadword
|
|
||||||
code.movq(tmp, high_xmm);
|
|
||||||
code.cvtsi2sd(xmm_tmp, tmp);
|
|
||||||
|
|
||||||
// Combine
|
|
||||||
code.unpcklpd(xmm, xmm_tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<32, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::subps);
|
EmitThreeOpVectorOperation<32, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::subps);
|
||||||
}
|
}
|
||||||
|
@ -1157,101 +1291,4 @@ void EmitX64::EmitFPVectorToUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitFPVectorToFixed<64, true>(code, ctx, inst);
|
EmitFPVectorToFixed<64, true>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
|
||||||
|
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) {
|
|
||||||
code.vcvtudq2ps(xmm, xmm);
|
|
||||||
} else {
|
|
||||||
const Xbyak::Address mem_4B000000 = code.MConst(xword, 0x4B0000004B000000, 0x4B0000004B000000);
|
|
||||||
const Xbyak::Address mem_53000000 = code.MConst(xword, 0x5300000053000000, 0x5300000053000000);
|
|
||||||
const Xbyak::Address mem_D3000080 = code.MConst(xword, 0xD3000080D3000080, 0xD3000080D3000080);
|
|
||||||
|
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
|
||||||
|
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
|
|
||||||
code.vpblendw(tmp, xmm, mem_4B000000, 0b10101010);
|
|
||||||
code.vpsrld(xmm, xmm, 16);
|
|
||||||
code.vpblendw(xmm, xmm, mem_53000000, 0b10101010);
|
|
||||||
code.vaddps(xmm, xmm, mem_D3000080);
|
|
||||||
code.vaddps(xmm, tmp, xmm);
|
|
||||||
} else {
|
|
||||||
const Xbyak::Address mem_0xFFFF = code.MConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF);
|
|
||||||
|
|
||||||
code.movdqa(tmp, mem_0xFFFF);
|
|
||||||
|
|
||||||
code.pand(tmp, xmm);
|
|
||||||
code.por(tmp, mem_4B000000);
|
|
||||||
code.psrld(xmm, 16);
|
|
||||||
code.por(xmm, mem_53000000);
|
|
||||||
code.addps(xmm, mem_D3000080);
|
|
||||||
code.addps(xmm, tmp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ctx.FPSCR_RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
|
||||||
code.pand(xmm, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF));
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
|
||||||
|
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) {
|
|
||||||
code.vcvtuqq2pd(xmm, xmm);
|
|
||||||
} else {
|
|
||||||
const Xbyak::Address unpack = code.MConst(xword, 0x4530000043300000, 0);
|
|
||||||
const Xbyak::Address subtrahend = code.MConst(xword, 0x4330000000000000, 0x4530000000000000);
|
|
||||||
|
|
||||||
const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm();
|
|
||||||
const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm();
|
|
||||||
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
|
|
||||||
|
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
|
|
||||||
code.vmovapd(unpack_reg, unpack);
|
|
||||||
code.vmovapd(subtrahend_reg, subtrahend);
|
|
||||||
|
|
||||||
code.vunpcklps(tmp1, xmm, unpack_reg);
|
|
||||||
code.vsubpd(tmp1, tmp1, subtrahend_reg);
|
|
||||||
|
|
||||||
code.vpermilps(xmm, xmm, 0b01001110);
|
|
||||||
|
|
||||||
code.vunpcklps(xmm, xmm, unpack_reg);
|
|
||||||
code.vsubpd(xmm, xmm, subtrahend_reg);
|
|
||||||
|
|
||||||
code.vhaddpd(xmm, tmp1, xmm);
|
|
||||||
} else {
|
|
||||||
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
|
|
||||||
|
|
||||||
code.movapd(unpack_reg, unpack);
|
|
||||||
code.movapd(subtrahend_reg, subtrahend);
|
|
||||||
|
|
||||||
code.pshufd(tmp1, xmm, 0b01001110);
|
|
||||||
|
|
||||||
code.punpckldq(xmm, unpack_reg);
|
|
||||||
code.subpd(xmm, subtrahend_reg);
|
|
||||||
code.pshufd(tmp2, xmm, 0b01001110);
|
|
||||||
code.addpd(xmm, tmp2);
|
|
||||||
|
|
||||||
code.punpckldq(tmp1, unpack_reg);
|
|
||||||
code.subpd(tmp1, subtrahend_reg);
|
|
||||||
|
|
||||||
code.pshufd(unpack_reg, tmp1, 0b01001110);
|
|
||||||
code.addpd(unpack_reg, tmp1);
|
|
||||||
|
|
||||||
code.unpcklpd(xmm, unpack_reg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ctx.FPSCR_RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
|
||||||
code.pand(xmm, code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF));
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Dynarmic::BackendX64
|
} // namespace Dynarmic::BackendX64
|
||||||
|
|
|
@ -94,15 +94,13 @@ bool IntegerConvertToFloat(TranslatorVisitor& v, bool Q, bool sz, Vec Vn, Vec Vd
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t datasize = Q ? 128 : 64;
|
const size_t datasize = Q ? 128 : 64;
|
||||||
|
const size_t esize = sz ? 64 : 32;
|
||||||
|
const FP::RoundingMode rounding_mode = v.ir.current_location->FPCR().RMode();
|
||||||
|
|
||||||
const IR::U128 operand = v.V(datasize, Vn);
|
const IR::U128 operand = v.V(datasize, Vn);
|
||||||
const IR::U128 result = [&] {
|
const IR::U128 result = signedness == Signedness::Signed
|
||||||
if (signedness == Signedness::Signed) {
|
? v.ir.FPVectorFromSignedFixed(esize, operand, 0, rounding_mode)
|
||||||
return sz ? v.ir.FPVectorS64ToDouble(operand) : v.ir.FPVectorS32ToSingle(operand);
|
: v.ir.FPVectorFromUnsignedFixed(esize, operand, 0, rounding_mode);
|
||||||
}
|
|
||||||
|
|
||||||
return sz ? v.ir.FPVectorU64ToDouble(operand) : v.ir.FPVectorU32ToSingle(operand);
|
|
||||||
}();
|
|
||||||
|
|
||||||
v.V(datasize, Vd, result);
|
v.V(datasize, Vd, result);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -2030,6 +2030,30 @@ U128 IREmitter::FPVectorEqual(size_t esize, const U128& a, const U128& b) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::FPVectorFromSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
|
ASSERT(fbits <= esize);
|
||||||
|
switch (esize) {
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::FPVectorFromSignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
|
case 64:
|
||||||
|
return Inst<U128>(Opcode::FPVectorFromSignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
|
ASSERT(fbits <= esize);
|
||||||
|
switch (esize) {
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::FPVectorFromUnsignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
|
case 64:
|
||||||
|
return Inst<U128>(Opcode::FPVectorFromUnsignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorGreater(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::FPVectorGreater(size_t esize, const U128& a, const U128& b) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 32:
|
case 32:
|
||||||
|
@ -2186,14 +2210,6 @@ U128 IREmitter::FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128&
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorS32ToSingle(const U128& a) {
|
|
||||||
return Inst<U128>(Opcode::FPVectorS32ToSingle, a);
|
|
||||||
}
|
|
||||||
|
|
||||||
U128 IREmitter::FPVectorS64ToDouble(const U128& a) {
|
|
||||||
return Inst<U128>(Opcode::FPVectorS64ToDouble, a);
|
|
||||||
}
|
|
||||||
|
|
||||||
U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 32:
|
case 32:
|
||||||
|
@ -2229,14 +2245,6 @@ U128 IREmitter::FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbit
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorU32ToSingle(const U128& a) {
|
|
||||||
return Inst<U128>(Opcode::FPVectorU32ToSingle, a);
|
|
||||||
}
|
|
||||||
|
|
||||||
U128 IREmitter::FPVectorU64ToDouble(const U128& a) {
|
|
||||||
return Inst<U128>(Opcode::FPVectorU64ToDouble, a);
|
|
||||||
}
|
|
||||||
|
|
||||||
void IREmitter::Breakpoint() {
|
void IREmitter::Breakpoint() {
|
||||||
Inst(Opcode::Breakpoint);
|
Inst(Opcode::Breakpoint);
|
||||||
}
|
}
|
||||||
|
|
|
@ -325,6 +325,8 @@ public:
|
||||||
U128 FPVectorAdd(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorAdd(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorDiv(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorDiv(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorEqual(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorEqual(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 FPVectorFromSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
|
U128 FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U128 FPVectorGreater(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorGreater(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorMax(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorMax(size_t esize, const U128& a, const U128& b);
|
||||||
|
@ -339,13 +341,9 @@ public:
|
||||||
U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact);
|
U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact);
|
||||||
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
|
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
|
||||||
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorS32ToSingle(const U128& a);
|
|
||||||
U128 FPVectorS64ToDouble(const U128& a);
|
|
||||||
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
U128 FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U128 FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
U128 FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U128 FPVectorU32ToSingle(const U128& a);
|
|
||||||
U128 FPVectorU64ToDouble(const U128& a);
|
|
||||||
|
|
||||||
void Breakpoint();
|
void Breakpoint();
|
||||||
|
|
||||||
|
|
|
@ -307,6 +307,10 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
|
||||||
case Opcode::FPVectorDiv64:
|
case Opcode::FPVectorDiv64:
|
||||||
case Opcode::FPVectorEqual32:
|
case Opcode::FPVectorEqual32:
|
||||||
case Opcode::FPVectorEqual64:
|
case Opcode::FPVectorEqual64:
|
||||||
|
case Opcode::FPVectorFromSignedFixed32:
|
||||||
|
case Opcode::FPVectorFromSignedFixed64:
|
||||||
|
case Opcode::FPVectorFromUnsignedFixed32:
|
||||||
|
case Opcode::FPVectorFromUnsignedFixed64:
|
||||||
case Opcode::FPVectorGreater32:
|
case Opcode::FPVectorGreater32:
|
||||||
case Opcode::FPVectorGreater64:
|
case Opcode::FPVectorGreater64:
|
||||||
case Opcode::FPVectorGreaterEqual32:
|
case Opcode::FPVectorGreaterEqual32:
|
||||||
|
@ -327,12 +331,8 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
|
||||||
case Opcode::FPVectorRSqrtEstimate64:
|
case Opcode::FPVectorRSqrtEstimate64:
|
||||||
case Opcode::FPVectorRSqrtStepFused32:
|
case Opcode::FPVectorRSqrtStepFused32:
|
||||||
case Opcode::FPVectorRSqrtStepFused64:
|
case Opcode::FPVectorRSqrtStepFused64:
|
||||||
case Opcode::FPVectorS32ToSingle:
|
|
||||||
case Opcode::FPVectorS64ToDouble:
|
|
||||||
case Opcode::FPVectorSub32:
|
case Opcode::FPVectorSub32:
|
||||||
case Opcode::FPVectorSub64:
|
case Opcode::FPVectorSub64:
|
||||||
case Opcode::FPVectorU32ToSingle:
|
|
||||||
case Opcode::FPVectorU64ToDouble:
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -523,6 +523,10 @@ OPCODE(FPVectorDiv32, U128, U128
|
||||||
OPCODE(FPVectorDiv64, U128, U128, U128 )
|
OPCODE(FPVectorDiv64, U128, U128, U128 )
|
||||||
OPCODE(FPVectorEqual32, U128, U128, U128 )
|
OPCODE(FPVectorEqual32, U128, U128, U128 )
|
||||||
OPCODE(FPVectorEqual64, U128, U128, U128 )
|
OPCODE(FPVectorEqual64, U128, U128, U128 )
|
||||||
|
OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 )
|
||||||
|
OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8 )
|
||||||
|
OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8 )
|
||||||
|
OPCODE(FPVectorFromUnsignedFixed64, U128, U128, U8, U8 )
|
||||||
OPCODE(FPVectorGreater32, U128, U128, U128 )
|
OPCODE(FPVectorGreater32, U128, U128, U128 )
|
||||||
OPCODE(FPVectorGreater64, U128, U128, U128 )
|
OPCODE(FPVectorGreater64, U128, U128, U128 )
|
||||||
OPCODE(FPVectorGreaterEqual32, U128, U128, U128 )
|
OPCODE(FPVectorGreaterEqual32, U128, U128, U128 )
|
||||||
|
@ -552,16 +556,12 @@ OPCODE(FPVectorRSqrtEstimate32, U128, U128
|
||||||
OPCODE(FPVectorRSqrtEstimate64, U128, U128 )
|
OPCODE(FPVectorRSqrtEstimate64, U128, U128 )
|
||||||
OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 )
|
OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 )
|
||||||
OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128 )
|
OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128 )
|
||||||
OPCODE(FPVectorS32ToSingle, U128, U128 )
|
|
||||||
OPCODE(FPVectorS64ToDouble, U128, U128 )
|
|
||||||
OPCODE(FPVectorSub32, U128, U128, U128 )
|
OPCODE(FPVectorSub32, U128, U128, U128 )
|
||||||
OPCODE(FPVectorSub64, U128, U128, U128 )
|
OPCODE(FPVectorSub64, U128, U128, U128 )
|
||||||
OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 )
|
OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 )
|
||||||
OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 )
|
OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 )
|
||||||
OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 )
|
OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 )
|
||||||
OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 )
|
OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 )
|
||||||
OPCODE(FPVectorU32ToSingle, U128, U128 )
|
|
||||||
OPCODE(FPVectorU64ToDouble, U128, U128 )
|
|
||||||
|
|
||||||
// A32 Memory access
|
// A32 Memory access
|
||||||
A32OPC(ClearExclusive, Void, )
|
A32OPC(ClearExclusive, Void, )
|
||||||
|
|
Loading…
Reference in a new issue