IR: Initial implementation of FP{Double,Single}ToFixed{S,U}{32,64}
This implementation just falls-back to the software floating point implementation.
This commit is contained in:
parent
760cc3ca89
commit
caaf36dfd6
12 changed files with 159 additions and 173 deletions
|
@ -163,6 +163,7 @@ u32 A32JitState::Fpscr() const {
|
||||||
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
||||||
FPSCR |= FPSCR_IDC;
|
FPSCR |= FPSCR_IDC;
|
||||||
FPSCR |= FPSCR_UFC;
|
FPSCR |= FPSCR_UFC;
|
||||||
|
FPSCR |= fpsr_exc;
|
||||||
|
|
||||||
return FPSCR;
|
return FPSCR;
|
||||||
}
|
}
|
||||||
|
@ -183,13 +184,10 @@ void A32JitState::SetFpscr(u32 FPSCR) {
|
||||||
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
|
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
|
||||||
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
|
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
|
||||||
|
|
||||||
// Cumulative flags IOC, IXC, UFC, OFC, DZC
|
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
|
||||||
guest_MXCSR |= ( FPSCR ) & 0b0000000000001; // IE = IOC
|
FPSCR_IDC = 0;
|
||||||
guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
|
FPSCR_UFC = 0;
|
||||||
|
fpsr_exc = FPSCR & 0x9F;
|
||||||
// Cumulative flag IDC, UFC
|
|
||||||
FPSCR_IDC = FPSCR & (1 << 7);
|
|
||||||
FPSCR_UFC = FPSCR & (1 << 3);
|
|
||||||
|
|
||||||
if (Common::Bit<24>(FPSCR)) {
|
if (Common::Bit<24>(FPSCR)) {
|
||||||
// VFP Flush to Zero
|
// VFP Flush to Zero
|
||||||
|
|
|
@ -66,6 +66,7 @@ struct A32JitState {
|
||||||
std::array<u64, RSBSize> rsb_codeptrs;
|
std::array<u64, RSBSize> rsb_codeptrs;
|
||||||
void ResetRSB();
|
void ResetRSB();
|
||||||
|
|
||||||
|
u32 fpsr_exc = 0;
|
||||||
u32 FPSCR_IDC = 0;
|
u32 FPSCR_IDC = 0;
|
||||||
u32 FPSCR_UFC = 0;
|
u32 FPSCR_UFC = 0;
|
||||||
u32 FPSCR_mode = 0;
|
u32 FPSCR_mode = 0;
|
||||||
|
|
|
@ -105,16 +105,15 @@ u32 A64JitState::GetFpsr() const {
|
||||||
fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
||||||
fpsr |= FPSCR_IDC;
|
fpsr |= FPSCR_IDC;
|
||||||
fpsr |= FPSCR_UFC;
|
fpsr |= FPSCR_UFC;
|
||||||
|
fpsr |= fpsr_exc;
|
||||||
return fpsr;
|
return fpsr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64JitState::SetFpsr(u32 value) {
|
void A64JitState::SetFpsr(u32 value) {
|
||||||
guest_MXCSR &= ~0x0000003D;
|
guest_MXCSR &= ~0x0000003D;
|
||||||
guest_MXCSR |= ( value ) & 0b0000000000001; // IE = IOC
|
FPSCR_IDC = 0;
|
||||||
guest_MXCSR |= ( value << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
|
FPSCR_UFC = 0;
|
||||||
|
fpsr_exc = value & 0x9F;
|
||||||
FPSCR_IDC = value & (1 << 7);
|
|
||||||
FPSCR_UFC = value & (1 << 3);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::BackendX64
|
} // namespace Dynarmic::BackendX64
|
||||||
|
|
|
@ -71,6 +71,7 @@ struct A64JitState {
|
||||||
rsb_codeptrs.fill(0);
|
rsb_codeptrs.fill(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 fpsr_exc = 0;
|
||||||
u32 FPSCR_IDC = 0;
|
u32 FPSCR_IDC = 0;
|
||||||
u32 FPSCR_UFC = 0;
|
u32 FPSCR_UFC = 0;
|
||||||
u32 fpcr = 0;
|
u32 fpcr = 0;
|
||||||
|
|
|
@ -5,13 +5,22 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "backend_x64/abi.h"
|
#include "backend_x64/abi.h"
|
||||||
#include "backend_x64/block_of_code.h"
|
#include "backend_x64/block_of_code.h"
|
||||||
#include "backend_x64/emit_x64.h"
|
#include "backend_x64/emit_x64.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "common/fp/op.h"
|
||||||
#include "common/fp/util.h"
|
#include "common/fp/util.h"
|
||||||
|
#include "common/mp/cartesian_product.h"
|
||||||
|
#include "common/mp/integer.h"
|
||||||
|
#include "common/mp/list.h"
|
||||||
|
#include "common/mp/lut.h"
|
||||||
|
#include "common/mp/to_tuple.h"
|
||||||
|
#include "common/mp/vlift.h"
|
||||||
|
#include "common/mp/vllift.h"
|
||||||
#include "frontend/ir/basic_block.h"
|
#include "frontend/ir/basic_block.h"
|
||||||
#include "frontend/ir/microinstruction.h"
|
#include "frontend/ir/microinstruction.h"
|
||||||
#include "frontend/ir/opcodes.h"
|
#include "frontend/ir/opcodes.h"
|
||||||
|
@ -19,6 +28,7 @@
|
||||||
namespace Dynarmic::BackendX64 {
|
namespace Dynarmic::BackendX64 {
|
||||||
|
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
namespace mp = Dynarmic::Common::mp;
|
||||||
|
|
||||||
constexpr u64 f32_negative_zero = 0x80000000u;
|
constexpr u64 f32_negative_zero = 0x80000000u;
|
||||||
constexpr u64 f32_nan = 0x7fc00000u;
|
constexpr u64 f32_nan = 0x7fc00000u;
|
||||||
|
@ -29,10 +39,6 @@ constexpr u64 f64_nan = 0x7ff8000000000000u;
|
||||||
constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
|
constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
|
||||||
|
|
||||||
constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu;
|
constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu;
|
||||||
constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
|
|
||||||
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
|
|
||||||
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
|
|
||||||
constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
|
|
||||||
|
|
||||||
static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
@ -99,12 +105,6 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64
|
||||||
code.L(end);
|
code.L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
|
||||||
code.pxor(xmm_scratch, xmm_scratch);
|
|
||||||
code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
|
||||||
code.pand(xmm_value, xmm_scratch);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) {
|
static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) {
|
||||||
Xbyak::Label nan;
|
Xbyak::Label nan;
|
||||||
|
|
||||||
|
@ -892,129 +892,82 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) {
|
static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) {
|
||||||
|
using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
|
||||||
|
using unsigned_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
|
||||||
|
using isize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
|
||||||
|
using rounding_list = mp::list<
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
|
||||||
|
>;
|
||||||
|
|
||||||
|
using key_type = std::tuple<size_t, bool, size_t, FP::RoundingMode>;
|
||||||
|
using value_type = u64(*)(u64, u8, FP::FPSR&, A64::FPCR);
|
||||||
|
|
||||||
|
static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
|
||||||
|
[](auto args) {
|
||||||
|
return std::pair<key_type, value_type>{
|
||||||
|
mp::to_tuple<decltype(args)>,
|
||||||
|
static_cast<value_type>(
|
||||||
|
[](u64 input, u8 fbits, FP::FPSR& fpsr, A64::FPCR fpcr) {
|
||||||
|
constexpr auto t = mp::to_tuple<decltype(args)>;
|
||||||
|
constexpr size_t fsize = std::get<0>(t);
|
||||||
|
constexpr bool unsigned_ = std::get<1>(t);
|
||||||
|
constexpr size_t isize = std::get<2>(t);
|
||||||
|
constexpr FP::RoundingMode rounding_mode = std::get<3>(t);
|
||||||
|
using InputSize = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
|
return FP::FPToFixed<InputSize>(isize, static_cast<InputSize>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
|
||||||
|
}
|
||||||
|
)
|
||||||
|
};
|
||||||
|
},
|
||||||
|
mp::cartesian_product<fsize_list, unsigned_list, isize_list, rounding_list>{}
|
||||||
|
);
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
|
|
||||||
Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
|
|
||||||
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
|
|
||||||
bool round_towards_zero = args[1].GetImmediateU1();
|
|
||||||
|
|
||||||
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
const auto rounding = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
// Conversion to double is lossless, and allows for clamping.
|
|
||||||
|
|
||||||
if (ctx.FPSCR_FTZ()) {
|
ctx.reg_alloc.HostCall(inst, args[0], args[1]);
|
||||||
DenormalsAreZero32(code, from, to);
|
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||||
}
|
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR());
|
||||||
code.cvtss2sd(from, from);
|
code.CallFunction(lut.at(std::make_tuple(fsize, unsigned_, isize, rounding)));
|
||||||
// First time is to set flags
|
|
||||||
if (round_towards_zero) {
|
|
||||||
code.cvttsd2si(to, from); // 32 bit gpr
|
|
||||||
} else {
|
|
||||||
code.cvtsd2si(to, from); // 32 bit gpr
|
|
||||||
}
|
|
||||||
// Clamp to output range
|
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
|
||||||
code.minsd(from, code.MConst(xword, f64_max_s32));
|
|
||||||
code.maxsd(from, code.MConst(xword, f64_min_s32));
|
|
||||||
// Second time is for real
|
|
||||||
if (round_towards_zero) {
|
|
||||||
code.cvttsd2si(to, from); // 32 bit gpr
|
|
||||||
} else {
|
|
||||||
code.cvtsd2si(to, from); // 32 bit gpr
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
EmitFPToFixedFallback(code, ctx, inst, 64, false, 32);
|
||||||
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
|
|
||||||
Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
|
|
||||||
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
|
|
||||||
bool round_towards_zero = args[1].GetImmediateU1();
|
|
||||||
|
|
||||||
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
||||||
// Conversion to double is lossless, and allows for accurate clamping.
|
|
||||||
//
|
|
||||||
// Since SSE2 doesn't provide an unsigned conversion, we use a 64-bit signed conversion.
|
|
||||||
//
|
|
||||||
// FIXME: None of the FPSR exception bits are correctly signalled with the below code
|
|
||||||
|
|
||||||
if (ctx.FPSCR_FTZ()) {
|
|
||||||
DenormalsAreZero64(code, from, to);
|
|
||||||
}
|
|
||||||
code.cvtss2sd(from, from);
|
|
||||||
// Clamp to output range
|
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
|
||||||
code.minsd(from, code.MConst(xword, f64_max_u32));
|
|
||||||
code.maxsd(from, code.MConst(xword, f64_min_u32));
|
|
||||||
if (round_towards_zero) {
|
|
||||||
code.cvttsd2si(to, from); // 64 bit gpr
|
|
||||||
} else {
|
|
||||||
code.cvtsd2si(to, from); // 64 bit gpr
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
EmitFPToFixedFallback(code, ctx, inst, 64, false, 64);
|
||||||
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
|
|
||||||
Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
|
|
||||||
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
|
|
||||||
Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
|
|
||||||
bool round_towards_zero = args[1].GetImmediateU1();
|
|
||||||
|
|
||||||
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
||||||
|
|
||||||
if (ctx.FPSCR_FTZ()) {
|
|
||||||
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
|
|
||||||
}
|
|
||||||
// First time is to set flags
|
|
||||||
if (round_towards_zero) {
|
|
||||||
code.cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
|
||||||
} else {
|
|
||||||
code.cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
|
||||||
}
|
|
||||||
// Clamp to output range
|
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
|
||||||
code.minsd(from, code.MConst(xword, f64_max_s32));
|
|
||||||
code.maxsd(from, code.MConst(xword, f64_min_s32));
|
|
||||||
// Second time is for real
|
|
||||||
if (round_towards_zero) {
|
|
||||||
code.cvttsd2si(to, from); // 32 bit gpr
|
|
||||||
} else {
|
|
||||||
code.cvtsd2si(to, from); // 32 bit gpr
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
EmitFPToFixedFallback(code, ctx, inst, 64, true, 32);
|
||||||
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
|
}
|
||||||
Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
|
|
||||||
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
|
|
||||||
bool round_towards_zero = args[1].GetImmediateU1();
|
|
||||||
|
|
||||||
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// TODO: Use VCVTPD2UDQ when AVX512VL is available.
|
EmitFPToFixedFallback(code, ctx, inst, 64, true, 64);
|
||||||
// FIXME: None of the FPSR exception bits are correctly signalled with the below code
|
}
|
||||||
|
|
||||||
if (ctx.FPSCR_FTZ()) {
|
void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
DenormalsAreZero64(code, from, to);
|
EmitFPToFixedFallback(code, ctx, inst, 32, false, 32);
|
||||||
}
|
}
|
||||||
// Clamp to output range
|
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
|
||||||
code.minsd(from, code.MConst(xword, f64_max_u32));
|
|
||||||
code.maxsd(from, code.MConst(xword, f64_min_u32));
|
|
||||||
if (round_towards_zero) {
|
|
||||||
code.cvttsd2si(to, from); // 64 bit gpr
|
|
||||||
} else {
|
|
||||||
code.cvtsd2si(to, from); // 64 bit gpr
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixedFallback(code, ctx, inst, 32, false, 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixedFallback(code, ctx, inst, 32, true, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPToFixedFallback(code, ctx, inst, 32, true, 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
|
@ -26,6 +26,7 @@ struct JitStateInfo {
|
||||||
, offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
|
, offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
|
||||||
, offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
|
, offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
|
||||||
, offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
|
, offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
|
||||||
|
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
const size_t offsetof_cycles_remaining;
|
const size_t offsetof_cycles_remaining;
|
||||||
|
@ -39,6 +40,7 @@ struct JitStateInfo {
|
||||||
const size_t offsetof_CPSR_nzcv;
|
const size_t offsetof_CPSR_nzcv;
|
||||||
const size_t offsetof_FPSCR_IDC;
|
const size_t offsetof_FPSCR_IDC;
|
||||||
const size_t offsetof_FPSCR_UFC;
|
const size_t offsetof_FPSCR_UFC;
|
||||||
|
const size_t offsetof_fpsr_exc;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::BackendX64
|
} // namespace Dynarmic::BackendX64
|
||||||
|
|
|
@ -442,8 +442,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool s
|
||||||
if (ConditionPassed(cond)) {
|
if (ConditionPassed(cond)) {
|
||||||
auto reg_m = ir.GetExtendedRegister(m);
|
auto reg_m = ir.GetExtendedRegister(m);
|
||||||
auto result = sz
|
auto result = sz
|
||||||
? ir.FPDoubleToU32(reg_m, round_towards_zero, true)
|
? ir.FPDoubleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
|
||||||
: ir.FPSingleToU32(reg_m, round_towards_zero, true);
|
: ir.FPSingleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
|
||||||
ir.SetExtendedRegister(d, result);
|
ir.SetExtendedRegister(d, result);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -457,8 +457,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool s
|
||||||
if (ConditionPassed(cond)) {
|
if (ConditionPassed(cond)) {
|
||||||
auto reg_m = ir.GetExtendedRegister(m);
|
auto reg_m = ir.GetExtendedRegister(m);
|
||||||
auto result = sz
|
auto result = sz
|
||||||
? ir.FPDoubleToS32(reg_m, round_towards_zero, true)
|
? ir.FPDoubleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
|
||||||
: ir.FPSingleToS32(reg_m, round_towards_zero, true);
|
: ir.FPSingleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
|
||||||
ir.SetExtendedRegister(d, result);
|
ir.SetExtendedRegister(d, result);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -38,13 +38,13 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
|
||||||
|
|
||||||
IR::U32U64 intval;
|
IR::U32U64 intval;
|
||||||
if (intsize == 32 && *fltsize == 32) {
|
if (intsize == 32 && *fltsize == 32) {
|
||||||
intval = ir.FPSingleToS32(fltval, true, true);
|
intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 32 && *fltsize == 64) {
|
} else if (intsize == 32 && *fltsize == 64) {
|
||||||
intval = ir.FPDoubleToS32(fltval, true, true);
|
intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 64 && *fltsize == 32) {
|
} else if (intsize == 64 && *fltsize == 32) {
|
||||||
return InterpretThisInstruction();
|
intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 64 && *fltsize == 64) {
|
} else if (intsize == 64 && *fltsize == 64) {
|
||||||
return InterpretThisInstruction();
|
intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -69,13 +69,13 @@ bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
|
||||||
|
|
||||||
IR::U32U64 intval;
|
IR::U32U64 intval;
|
||||||
if (intsize == 32 && *fltsize == 32) {
|
if (intsize == 32 && *fltsize == 32) {
|
||||||
intval = ir.FPSingleToU32(fltval, true, true);
|
intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 32 && *fltsize == 64) {
|
} else if (intsize == 32 && *fltsize == 64) {
|
||||||
intval = ir.FPDoubleToU32(fltval, true, true);
|
intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 64 && *fltsize == 32) {
|
} else if (intsize == 64 && *fltsize == 32) {
|
||||||
return InterpretThisInstruction();
|
intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 64 && *fltsize == 64) {
|
} else if (intsize == 64 && *fltsize == 64) {
|
||||||
return InterpretThisInstruction();
|
intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
|
@ -146,13 +146,13 @@ bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
|
||||||
IR::U32U64 intval;
|
IR::U32U64 intval;
|
||||||
|
|
||||||
if (intsize == 32 && *fltsize == 32) {
|
if (intsize == 32 && *fltsize == 32) {
|
||||||
intval = ir.FPSingleToS32(fltval, true, true);
|
intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 32 && *fltsize == 64) {
|
} else if (intsize == 32 && *fltsize == 64) {
|
||||||
intval = ir.FPDoubleToS32(fltval, true, true);
|
intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 64 && *fltsize == 32) {
|
} else if (intsize == 64 && *fltsize == 32) {
|
||||||
return InterpretThisInstruction();
|
intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 64 && *fltsize == 64) {
|
} else if (intsize == 64 && *fltsize == 64) {
|
||||||
return InterpretThisInstruction();
|
intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -173,13 +173,13 @@ bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
|
||||||
IR::U32U64 intval;
|
IR::U32U64 intval;
|
||||||
|
|
||||||
if (intsize == 32 && *fltsize == 32) {
|
if (intsize == 32 && *fltsize == 32) {
|
||||||
intval = ir.FPSingleToU32(fltval, true, true);
|
intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 32 && *fltsize == 64) {
|
} else if (intsize == 32 && *fltsize == 64) {
|
||||||
intval = ir.FPDoubleToU32(fltval, true, true);
|
intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 64 && *fltsize == 32) {
|
} else if (intsize == 64 && *fltsize == 32) {
|
||||||
return InterpretThisInstruction();
|
intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else if (intsize == 64 && *fltsize == 64) {
|
} else if (intsize == 64 && *fltsize == 64) {
|
||||||
return InterpretThisInstruction();
|
intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1451,24 +1451,44 @@ U64 IREmitter::FPSingleToDouble(const U32& a, bool fpscr_controlled) {
|
||||||
return Inst<U64>(Opcode::FPSingleToDouble, a);
|
return Inst<U64>(Opcode::FPSingleToDouble, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled) {
|
U32 IREmitter::FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
ASSERT(fpscr_controlled);
|
ASSERT(fbits <= 32);
|
||||||
return Inst<U32>(Opcode::FPSingleToS32, a, Imm1(round_towards_zero));
|
return Inst<U32>(Opcode::FPDoubleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled) {
|
U64 IREmitter::FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
ASSERT(fpscr_controlled);
|
ASSERT(fbits <= 64);
|
||||||
return Inst<U32>(Opcode::FPSingleToU32, a, Imm1(round_towards_zero));
|
return Inst<U64>(Opcode::FPDoubleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled) {
|
U32 IREmitter::FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
ASSERT(fpscr_controlled);
|
ASSERT(fbits <= 32);
|
||||||
return Inst<U32>(Opcode::FPDoubleToS32, a, Imm1(round_towards_zero));
|
return Inst<U32>(Opcode::FPDoubleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled) {
|
U64 IREmitter::FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
ASSERT(fpscr_controlled);
|
ASSERT(fbits <= 64);
|
||||||
return Inst<U32>(Opcode::FPDoubleToU32, a, Imm1(round_towards_zero));
|
return Inst<U64>(Opcode::FPDoubleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
|
}
|
||||||
|
|
||||||
|
U32 IREmitter::FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
|
ASSERT(fbits <= 32);
|
||||||
|
return Inst<U32>(Opcode::FPSingleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
|
}
|
||||||
|
|
||||||
|
U64 IREmitter::FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
|
ASSERT(fbits <= 64);
|
||||||
|
return Inst<U64>(Opcode::FPSingleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
|
}
|
||||||
|
|
||||||
|
U32 IREmitter::FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
|
ASSERT(fbits <= 32);
|
||||||
|
return Inst<U32>(Opcode::FPSingleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
|
}
|
||||||
|
|
||||||
|
U64 IREmitter::FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
|
||||||
|
ASSERT(fbits <= 64);
|
||||||
|
return Inst<U64>(Opcode::FPSingleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) {
|
U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) {
|
||||||
|
|
|
@ -12,6 +12,10 @@
|
||||||
#include "frontend/ir/terminal.h"
|
#include "frontend/ir/terminal.h"
|
||||||
#include "frontend/ir/value.h"
|
#include "frontend/ir/value.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::FP {
|
||||||
|
enum class RoundingMode;
|
||||||
|
} // namespace Dynarmic::FP
|
||||||
|
|
||||||
// ARM JIT Microinstruction Intermediate Representation
|
// ARM JIT Microinstruction Intermediate Representation
|
||||||
//
|
//
|
||||||
// This intermediate representation is an SSA IR. It is designed primarily for analysis,
|
// This intermediate representation is an SSA IR. It is designed primarily for analysis,
|
||||||
|
@ -264,10 +268,14 @@ public:
|
||||||
U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
||||||
U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled);
|
U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled);
|
||||||
U64 FPSingleToDouble(const U32& a, bool fpscr_controlled);
|
U64 FPSingleToDouble(const U32& a, bool fpscr_controlled);
|
||||||
U32 FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled);
|
U32 FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U32 FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled);
|
U64 FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U32 FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled);
|
U32 FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U32 FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled);
|
U64 FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
|
U32 FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
|
U64 FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
|
U32 FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
|
U64 FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding);
|
||||||
U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
|
U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
|
||||||
U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
|
U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
|
||||||
U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled);
|
U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled);
|
||||||
|
|
|
@ -386,10 +386,14 @@ OPCODE(FPSub64, T::U64, T::U64, T::U
|
||||||
// Floating-point conversions
|
// Floating-point conversions
|
||||||
OPCODE(FPSingleToDouble, T::U64, T::U32 )
|
OPCODE(FPSingleToDouble, T::U64, T::U32 )
|
||||||
OPCODE(FPDoubleToSingle, T::U32, T::U64 )
|
OPCODE(FPDoubleToSingle, T::U32, T::U64 )
|
||||||
OPCODE(FPSingleToU32, T::U32, T::U32, T::U1 )
|
OPCODE(FPDoubleToFixedS32, T::U32, T::U64, T::U8, T::U8 )
|
||||||
OPCODE(FPSingleToS32, T::U32, T::U32, T::U1 )
|
OPCODE(FPDoubleToFixedS64, T::U64, T::U64, T::U8, T::U8 )
|
||||||
OPCODE(FPDoubleToU32, T::U32, T::U64, T::U1 )
|
OPCODE(FPDoubleToFixedU32, T::U32, T::U64, T::U8, T::U8 )
|
||||||
OPCODE(FPDoubleToS32, T::U32, T::U64, T::U1 )
|
OPCODE(FPDoubleToFixedU64, T::U64, T::U64, T::U8, T::U8 )
|
||||||
|
OPCODE(FPSingleToFixedS32, T::U32, T::U32, T::U8, T::U8 )
|
||||||
|
OPCODE(FPSingleToFixedS64, T::U64, T::U32, T::U8, T::U8 )
|
||||||
|
OPCODE(FPSingleToFixedU32, T::U32, T::U32, T::U8, T::U8 )
|
||||||
|
OPCODE(FPSingleToFixedU64, T::U64, T::U32, T::U8, T::U8 )
|
||||||
OPCODE(FPU32ToSingle, T::U32, T::U32, T::U1 )
|
OPCODE(FPU32ToSingle, T::U32, T::U32, T::U1 )
|
||||||
OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 )
|
OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 )
|
||||||
OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 )
|
OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 )
|
||||||
|
|
Loading…
Reference in a new issue