IR: Initial implementation of FP{Double,Single}ToFixed{S,U}{32,64}

This implementation just falls-back to the software floating point implementation.
This commit is contained in:
MerryMage 2018-06-30 10:49:47 +01:00
parent 760cc3ca89
commit caaf36dfd6
12 changed files with 159 additions and 173 deletions

View file

@ -163,6 +163,7 @@ u32 A32JitState::Fpscr() const {
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
FPSCR |= FPSCR_IDC; FPSCR |= FPSCR_IDC;
FPSCR |= FPSCR_UFC; FPSCR |= FPSCR_UFC;
FPSCR |= fpsr_exc;
return FPSCR; return FPSCR;
} }
@ -183,13 +184,10 @@ void A32JitState::SetFpscr(u32 FPSCR) {
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3]; guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
// Cumulative flags IOC, IXC, UFC, OFC, DZC // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
guest_MXCSR |= ( FPSCR ) & 0b0000000000001; // IE = IOC FPSCR_IDC = 0;
guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC FPSCR_UFC = 0;
fpsr_exc = FPSCR & 0x9F;
// Cumulative flag IDC, UFC
FPSCR_IDC = FPSCR & (1 << 7);
FPSCR_UFC = FPSCR & (1 << 3);
if (Common::Bit<24>(FPSCR)) { if (Common::Bit<24>(FPSCR)) {
// VFP Flush to Zero // VFP Flush to Zero

View file

@ -66,6 +66,7 @@ struct A32JitState {
std::array<u64, RSBSize> rsb_codeptrs; std::array<u64, RSBSize> rsb_codeptrs;
void ResetRSB(); void ResetRSB();
u32 fpsr_exc = 0;
u32 FPSCR_IDC = 0; u32 FPSCR_IDC = 0;
u32 FPSCR_UFC = 0; u32 FPSCR_UFC = 0;
u32 FPSCR_mode = 0; u32 FPSCR_mode = 0;

View file

@ -105,16 +105,15 @@ u32 A64JitState::GetFpsr() const {
fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
fpsr |= FPSCR_IDC; fpsr |= FPSCR_IDC;
fpsr |= FPSCR_UFC; fpsr |= FPSCR_UFC;
fpsr |= fpsr_exc;
return fpsr; return fpsr;
} }
void A64JitState::SetFpsr(u32 value) { void A64JitState::SetFpsr(u32 value) {
guest_MXCSR &= ~0x0000003D; guest_MXCSR &= ~0x0000003D;
guest_MXCSR |= ( value ) & 0b0000000000001; // IE = IOC FPSCR_IDC = 0;
guest_MXCSR |= ( value << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC FPSCR_UFC = 0;
fpsr_exc = value & 0x9F;
FPSCR_IDC = value & (1 << 7);
FPSCR_UFC = value & (1 << 3);
} }
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendX64

View file

@ -71,6 +71,7 @@ struct A64JitState {
rsb_codeptrs.fill(0); rsb_codeptrs.fill(0);
} }
u32 fpsr_exc = 0;
u32 FPSCR_IDC = 0; u32 FPSCR_IDC = 0;
u32 FPSCR_UFC = 0; u32 FPSCR_UFC = 0;
u32 fpcr = 0; u32 fpcr = 0;

View file

@ -5,13 +5,22 @@
*/ */
#include <type_traits> #include <type_traits>
#include <utility>
#include "backend_x64/abi.h" #include "backend_x64/abi.h"
#include "backend_x64/block_of_code.h" #include "backend_x64/block_of_code.h"
#include "backend_x64/emit_x64.h" #include "backend_x64/emit_x64.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/fp/op.h"
#include "common/fp/util.h" #include "common/fp/util.h"
#include "common/mp/cartesian_product.h"
#include "common/mp/integer.h"
#include "common/mp/list.h"
#include "common/mp/lut.h"
#include "common/mp/to_tuple.h"
#include "common/mp/vlift.h"
#include "common/mp/vllift.h"
#include "frontend/ir/basic_block.h" #include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h" #include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h" #include "frontend/ir/opcodes.h"
@ -19,6 +28,7 @@
namespace Dynarmic::BackendX64 { namespace Dynarmic::BackendX64 {
using namespace Xbyak::util; using namespace Xbyak::util;
namespace mp = Dynarmic::Common::mp;
constexpr u64 f32_negative_zero = 0x80000000u; constexpr u64 f32_negative_zero = 0x80000000u;
constexpr u64 f32_nan = 0x7fc00000u; constexpr u64 f32_nan = 0x7fc00000u;
@ -29,10 +39,6 @@ constexpr u64 f64_nan = 0x7ff8000000000000u;
constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu; constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu; constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu;
constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
Xbyak::Label end; Xbyak::Label end;
@ -99,12 +105,6 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64
code.L(end); code.L(end);
} }
static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
code.pxor(xmm_scratch, xmm_scratch);
code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
code.pand(xmm_value, xmm_scratch);
}
static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) { static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) {
Xbyak::Label nan; Xbyak::Label nan;
@ -892,129 +892,82 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) { static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) {
using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
using unsigned_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
using isize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
using rounding_list = mp::list<
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
>;
using key_type = std::tuple<size_t, bool, size_t, FP::RoundingMode>;
using value_type = u64(*)(u64, u8, FP::FPSR&, A64::FPCR);
static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
[](auto args) {
return std::pair<key_type, value_type>{
mp::to_tuple<decltype(args)>,
static_cast<value_type>(
[](u64 input, u8 fbits, FP::FPSR& fpsr, A64::FPCR fpcr) {
constexpr auto t = mp::to_tuple<decltype(args)>;
constexpr size_t fsize = std::get<0>(t);
constexpr bool unsigned_ = std::get<1>(t);
constexpr size_t isize = std::get<2>(t);
constexpr FP::RoundingMode rounding_mode = std::get<3>(t);
using InputSize = mp::unsigned_integer_of_size<fsize>;
return FP::FPToFixed<InputSize>(isize, static_cast<InputSize>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
}
)
};
},
mp::cartesian_product<fsize_list, unsigned_list, isize_list, rounding_list>{}
);
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
bool round_towards_zero = args[1].GetImmediateU1();
// ARM saturates on conversion; this differs from x64 which returns a sentinel value. const auto rounding = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
// Conversion to double is lossless, and allows for clamping.
if (ctx.FPSCR_FTZ()) { ctx.reg_alloc.HostCall(inst, args[0], args[1]);
DenormalsAreZero32(code, from, to); code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
} code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR());
code.cvtss2sd(from, from); code.CallFunction(lut.at(std::make_tuple(fsize, unsigned_, isize, rounding)));
// First time is to set flags
if (round_towards_zero) {
code.cvttsd2si(to, from); // 32 bit gpr
} else {
code.cvtsd2si(to, from); // 32 bit gpr
}
// Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch);
code.minsd(from, code.MConst(xword, f64_max_s32));
code.maxsd(from, code.MConst(xword, f64_min_s32));
// Second time is for real
if (round_towards_zero) {
code.cvttsd2si(to, from); // 32 bit gpr
} else {
code.cvtsd2si(to, from); // 32 bit gpr
} }
ctx.reg_alloc.DefineValue(inst, to); void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixedFallback(code, ctx, inst, 64, false, 32);
} }
void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); EmitFPToFixedFallback(code, ctx, inst, 64, false, 64);
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
bool round_towards_zero = args[1].GetImmediateU1();
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
// Conversion to double is lossless, and allows for accurate clamping.
//
// Since SSE2 doesn't provide an unsigned conversion, we use a 64-bit signed conversion.
//
// FIXME: None of the FPSR exception bits are correctly signalled with the below code
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero64(code, from, to);
}
code.cvtss2sd(from, from);
// Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch);
code.minsd(from, code.MConst(xword, f64_max_u32));
code.maxsd(from, code.MConst(xword, f64_min_u32));
if (round_towards_zero) {
code.cvttsd2si(to, from); // 64 bit gpr
} else {
code.cvtsd2si(to, from); // 64 bit gpr
} }
ctx.reg_alloc.DefineValue(inst, to); void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixedFallback(code, ctx, inst, 64, true, 32);
} }
void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); EmitFPToFixedFallback(code, ctx, inst, 64, true, 64);
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
bool round_towards_zero = args[1].GetImmediateU1();
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
}
// First time is to set flags
if (round_towards_zero) {
code.cvttsd2si(gpr_scratch, from); // 32 bit gpr
} else {
code.cvtsd2si(gpr_scratch, from); // 32 bit gpr
}
// Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch);
code.minsd(from, code.MConst(xword, f64_max_s32));
code.maxsd(from, code.MConst(xword, f64_min_s32));
// Second time is for real
if (round_towards_zero) {
code.cvttsd2si(to, from); // 32 bit gpr
} else {
code.cvtsd2si(to, from); // 32 bit gpr
} }
ctx.reg_alloc.DefineValue(inst, to); void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixedFallback(code, ctx, inst, 32, false, 32);
} }
void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); EmitFPToFixedFallback(code, ctx, inst, 32, false, 64);
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
bool round_towards_zero = args[1].GetImmediateU1();
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
// TODO: Use VCVTPD2UDQ when AVX512VL is available.
// FIXME: None of the FPSR exception bits are correctly signalled with the below code
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero64(code, from, to);
}
// Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch);
code.minsd(from, code.MConst(xword, f64_max_u32));
code.maxsd(from, code.MConst(xword, f64_min_u32));
if (round_towards_zero) {
code.cvttsd2si(to, from); // 64 bit gpr
} else {
code.cvtsd2si(to, from); // 64 bit gpr
} }
ctx.reg_alloc.DefineValue(inst, to); void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixedFallback(code, ctx, inst, 32, true, 32);
}
void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixedFallback(code, ctx, inst, 32, true, 64);
} }
void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {

View file

@ -26,6 +26,7 @@ struct JitStateInfo {
, offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv)) , offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
, offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC)) , offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
, offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC)) , offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
{} {}
const size_t offsetof_cycles_remaining; const size_t offsetof_cycles_remaining;
@ -39,6 +40,7 @@ struct JitStateInfo {
const size_t offsetof_CPSR_nzcv; const size_t offsetof_CPSR_nzcv;
const size_t offsetof_FPSCR_IDC; const size_t offsetof_FPSCR_IDC;
const size_t offsetof_FPSCR_UFC; const size_t offsetof_FPSCR_UFC;
const size_t offsetof_fpsr_exc;
}; };
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendX64

View file

@ -442,8 +442,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool s
if (ConditionPassed(cond)) { if (ConditionPassed(cond)) {
auto reg_m = ir.GetExtendedRegister(m); auto reg_m = ir.GetExtendedRegister(m);
auto result = sz auto result = sz
? ir.FPDoubleToU32(reg_m, round_towards_zero, true) ? ir.FPDoubleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
: ir.FPSingleToU32(reg_m, round_towards_zero, true); : ir.FPSingleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
ir.SetExtendedRegister(d, result); ir.SetExtendedRegister(d, result);
} }
return true; return true;
@ -457,8 +457,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool s
if (ConditionPassed(cond)) { if (ConditionPassed(cond)) {
auto reg_m = ir.GetExtendedRegister(m); auto reg_m = ir.GetExtendedRegister(m);
auto result = sz auto result = sz
? ir.FPDoubleToS32(reg_m, round_towards_zero, true) ? ir.FPDoubleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
: ir.FPSingleToS32(reg_m, round_towards_zero, true); : ir.FPSingleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
ir.SetExtendedRegister(d, result); ir.SetExtendedRegister(d, result);
} }
return true; return true;

View file

@ -38,13 +38,13 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
IR::U32U64 intval; IR::U32U64 intval;
if (intsize == 32 && *fltsize == 32) { if (intsize == 32 && *fltsize == 32) {
intval = ir.FPSingleToS32(fltval, true, true); intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 32 && *fltsize == 64) { } else if (intsize == 32 && *fltsize == 64) {
intval = ir.FPDoubleToS32(fltval, true, true); intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 32) { } else if (intsize == 64 && *fltsize == 32) {
return InterpretThisInstruction(); intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 64) { } else if (intsize == 64 && *fltsize == 64) {
return InterpretThisInstruction(); intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }
@ -69,13 +69,13 @@ bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
IR::U32U64 intval; IR::U32U64 intval;
if (intsize == 32 && *fltsize == 32) { if (intsize == 32 && *fltsize == 32) {
intval = ir.FPSingleToU32(fltval, true, true); intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 32 && *fltsize == 64) { } else if (intsize == 32 && *fltsize == 64) {
intval = ir.FPDoubleToU32(fltval, true, true); intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 32) { } else if (intsize == 64 && *fltsize == 32) {
return InterpretThisInstruction(); intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 64) { } else if (intsize == 64 && *fltsize == 64) {
return InterpretThisInstruction(); intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }

View file

@ -146,13 +146,13 @@ bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
IR::U32U64 intval; IR::U32U64 intval;
if (intsize == 32 && *fltsize == 32) { if (intsize == 32 && *fltsize == 32) {
intval = ir.FPSingleToS32(fltval, true, true); intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 32 && *fltsize == 64) { } else if (intsize == 32 && *fltsize == 64) {
intval = ir.FPDoubleToS32(fltval, true, true); intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 32) { } else if (intsize == 64 && *fltsize == 32) {
return InterpretThisInstruction(); intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 64) { } else if (intsize == 64 && *fltsize == 64) {
return InterpretThisInstruction(); intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }
@ -173,13 +173,13 @@ bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
IR::U32U64 intval; IR::U32U64 intval;
if (intsize == 32 && *fltsize == 32) { if (intsize == 32 && *fltsize == 32) {
intval = ir.FPSingleToU32(fltval, true, true); intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 32 && *fltsize == 64) { } else if (intsize == 32 && *fltsize == 64) {
intval = ir.FPDoubleToU32(fltval, true, true); intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 32) { } else if (intsize == 64 && *fltsize == 32) {
return InterpretThisInstruction(); intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 64) { } else if (intsize == 64 && *fltsize == 64) {
return InterpretThisInstruction(); intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }

View file

@ -1451,24 +1451,44 @@ U64 IREmitter::FPSingleToDouble(const U32& a, bool fpscr_controlled) {
return Inst<U64>(Opcode::FPSingleToDouble, a); return Inst<U64>(Opcode::FPSingleToDouble, a);
} }
U32 IREmitter::FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled) { U32 IREmitter::FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fpscr_controlled); ASSERT(fbits <= 32);
return Inst<U32>(Opcode::FPSingleToS32, a, Imm1(round_towards_zero)); return Inst<U32>(Opcode::FPDoubleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
} }
U32 IREmitter::FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled) { U64 IREmitter::FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fpscr_controlled); ASSERT(fbits <= 64);
return Inst<U32>(Opcode::FPSingleToU32, a, Imm1(round_towards_zero)); return Inst<U64>(Opcode::FPDoubleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
} }
U32 IREmitter::FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled) { U32 IREmitter::FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fpscr_controlled); ASSERT(fbits <= 32);
return Inst<U32>(Opcode::FPDoubleToS32, a, Imm1(round_towards_zero)); return Inst<U32>(Opcode::FPDoubleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
} }
U32 IREmitter::FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled) { U64 IREmitter::FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fpscr_controlled); ASSERT(fbits <= 64);
return Inst<U32>(Opcode::FPDoubleToU32, a, Imm1(round_towards_zero)); return Inst<U64>(Opcode::FPDoubleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
}
U32 IREmitter::FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 32);
return Inst<U32>(Opcode::FPSingleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
}
U64 IREmitter::FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 64);
return Inst<U64>(Opcode::FPSingleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
}
U32 IREmitter::FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 32);
return Inst<U32>(Opcode::FPSingleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
}
U64 IREmitter::FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 64);
return Inst<U64>(Opcode::FPSingleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
} }
U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) { U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) {

View file

@ -12,6 +12,10 @@
#include "frontend/ir/terminal.h" #include "frontend/ir/terminal.h"
#include "frontend/ir/value.h" #include "frontend/ir/value.h"
namespace Dynarmic::FP {
enum class RoundingMode;
} // namespace Dynarmic::FP
// ARM JIT Microinstruction Intermediate Representation // ARM JIT Microinstruction Intermediate Representation
// //
// This intermediate representation is an SSA IR. It is designed primarily for analysis, // This intermediate representation is an SSA IR. It is designed primarily for analysis,
@ -264,10 +268,14 @@ public:
U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled);
U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled); U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled);
U64 FPSingleToDouble(const U32& a, bool fpscr_controlled); U64 FPSingleToDouble(const U32& a, bool fpscr_controlled);
U32 FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled); U32 FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled); U64 FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled); U32 FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled); U64 FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding);
U64 FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding);
U32 FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding);
U64 FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding);
U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled); U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled); U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled);

View file

@ -386,10 +386,14 @@ OPCODE(FPSub64, T::U64, T::U64, T::U
// Floating-point conversions // Floating-point conversions
OPCODE(FPSingleToDouble, T::U64, T::U32 ) OPCODE(FPSingleToDouble, T::U64, T::U32 )
OPCODE(FPDoubleToSingle, T::U32, T::U64 ) OPCODE(FPDoubleToSingle, T::U32, T::U64 )
OPCODE(FPSingleToU32, T::U32, T::U32, T::U1 ) OPCODE(FPDoubleToFixedS32, T::U32, T::U64, T::U8, T::U8 )
OPCODE(FPSingleToS32, T::U32, T::U32, T::U1 ) OPCODE(FPDoubleToFixedS64, T::U64, T::U64, T::U8, T::U8 )
OPCODE(FPDoubleToU32, T::U32, T::U64, T::U1 ) OPCODE(FPDoubleToFixedU32, T::U32, T::U64, T::U8, T::U8 )
OPCODE(FPDoubleToS32, T::U32, T::U64, T::U1 ) OPCODE(FPDoubleToFixedU64, T::U64, T::U64, T::U8, T::U8 )
OPCODE(FPSingleToFixedS32, T::U32, T::U32, T::U8, T::U8 )
OPCODE(FPSingleToFixedS64, T::U64, T::U32, T::U8, T::U8 )
OPCODE(FPSingleToFixedU32, T::U32, T::U32, T::U8, T::U8 )
OPCODE(FPSingleToFixedU64, T::U64, T::U32, T::U8, T::U8 )
OPCODE(FPU32ToSingle, T::U32, T::U32, T::U1 ) OPCODE(FPU32ToSingle, T::U32, T::U32, T::U1 )
OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 ) OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 )
OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 ) OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 )