Merge pull request #289 from MerryMage/fptofixed

Implement most of the scalar fp -> integer instructions
This commit is contained in:
Merry 2018-07-15 17:12:52 +01:00 committed by MerryMage
commit d50eaedaa7
52 changed files with 1931 additions and 217 deletions

View file

@ -16,8 +16,17 @@ add_library(dynarmic
common/common_types.h common/common_types.h
common/crc32.cpp common/crc32.cpp
common/crc32.h common/crc32.h
common/fp_util.h common/fp/fpsr.h
common/fp/info.h
common/fp/mantissa_util.h
common/fp/op.cpp
common/fp/op.h
common/fp/process_exception.cpp
common/fp/process_exception.h
common/fp/rounding_mode.h common/fp/rounding_mode.h
common/fp/unpacked.cpp
common/fp/unpacked.h
common/fp/util.h
common/intrusive_list.h common/intrusive_list.h
common/iterator_util.h common/iterator_util.h
common/llvm_disassemble.cpp common/llvm_disassemble.cpp
@ -27,10 +36,24 @@ add_library(dynarmic
common/memory_pool.cpp common/memory_pool.cpp
common/memory_pool.h common/memory_pool.h
common/mp.h common/mp.h
common/mp/append.h
common/mp/bind.h
common/mp/cartesian_product.h
common/mp/concat.h
common/mp/fapply.h
common/mp/fmap.h
common/mp/list.h
common/mp/lut.h
common/mp/to_tuple.h
common/mp/vlift.h
common/mp/vllift.h
common/safe_ops.h
common/scope_exit.h common/scope_exit.h
common/sm4.cpp common/sm4.cpp
common/sm4.h common/sm4.h
common/string_util.h common/string_util.h
common/u128.cpp
common/u128.h
common/variant_util.h common/variant_util.h
frontend/A32/decoder/arm.h frontend/A32/decoder/arm.h
frontend/A32/decoder/thumb16.h frontend/A32/decoder/thumb16.h

View file

@ -62,6 +62,10 @@ FP::RoundingMode A32EmitContext::FPSCR_RMode() const {
return Location().FPSCR().RMode(); return Location().FPSCR().RMode();
} }
u32 A32EmitContext::FPCR() const {
return Location().FPSCR().Value();
}
bool A32EmitContext::FPSCR_RoundTowardsZero() const { bool A32EmitContext::FPSCR_RoundTowardsZero() const {
return Location().FPSCR().RMode() != FP::RoundingMode::TowardsZero; return Location().FPSCR().RMode() != FP::RoundingMode::TowardsZero;
} }

View file

@ -24,6 +24,7 @@ struct A32EmitContext final : public EmitContext {
A32EmitContext(RegAlloc& reg_alloc, IR::Block& block); A32EmitContext(RegAlloc& reg_alloc, IR::Block& block);
A32::LocationDescriptor Location() const; A32::LocationDescriptor Location() const;
FP::RoundingMode FPSCR_RMode() const override; FP::RoundingMode FPSCR_RMode() const override;
u32 FPCR() const override;
bool FPSCR_RoundTowardsZero() const override; bool FPSCR_RoundTowardsZero() const override;
bool FPSCR_FTZ() const override; bool FPSCR_FTZ() const override;
bool FPSCR_DN() const override; bool FPSCR_DN() const override;

View file

@ -163,6 +163,7 @@ u32 A32JitState::Fpscr() const {
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
FPSCR |= FPSCR_IDC; FPSCR |= FPSCR_IDC;
FPSCR |= FPSCR_UFC; FPSCR |= FPSCR_UFC;
FPSCR |= fpsr_exc;
return FPSCR; return FPSCR;
} }
@ -183,13 +184,10 @@ void A32JitState::SetFpscr(u32 FPSCR) {
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3]; guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
// Cumulative flags IOC, IXC, UFC, OFC, DZC // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
guest_MXCSR |= ( FPSCR ) & 0b0000000000001; // IE = IOC FPSCR_IDC = 0;
guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC FPSCR_UFC = 0;
fpsr_exc = FPSCR & 0x9F;
// Cumulative flag IDC, UFC
FPSCR_IDC = FPSCR & (1 << 7);
FPSCR_UFC = FPSCR & (1 << 3);
if (Common::Bit<24>(FPSCR)) { if (Common::Bit<24>(FPSCR)) {
// VFP Flush to Zero // VFP Flush to Zero

View file

@ -66,6 +66,7 @@ struct A32JitState {
std::array<u64, RSBSize> rsb_codeptrs; std::array<u64, RSBSize> rsb_codeptrs;
void ResetRSB(); void ResetRSB();
u32 fpsr_exc = 0;
u32 FPSCR_IDC = 0; u32 FPSCR_IDC = 0;
u32 FPSCR_UFC = 0; u32 FPSCR_UFC = 0;
u32 FPSCR_mode = 0; u32 FPSCR_mode = 0;

View file

@ -44,6 +44,10 @@ FP::RoundingMode A64EmitContext::FPSCR_RMode() const {
return Location().FPCR().RMode(); return Location().FPCR().RMode();
} }
u32 A64EmitContext::FPCR() const {
return Location().FPCR().Value();
}
bool A64EmitContext::FPSCR_RoundTowardsZero() const { bool A64EmitContext::FPSCR_RoundTowardsZero() const {
return Location().FPCR().RMode() != FP::RoundingMode::TowardsZero; return Location().FPCR().RMode() != FP::RoundingMode::TowardsZero;
} }

View file

@ -24,6 +24,7 @@ struct A64EmitContext final : public EmitContext {
A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block); A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
A64::LocationDescriptor Location() const; A64::LocationDescriptor Location() const;
FP::RoundingMode FPSCR_RMode() const override; FP::RoundingMode FPSCR_RMode() const override;
u32 FPCR() const override;
bool FPSCR_RoundTowardsZero() const override; bool FPSCR_RoundTowardsZero() const override;
bool FPSCR_FTZ() const override; bool FPSCR_FTZ() const override;
bool FPSCR_DN() const override; bool FPSCR_DN() const override;

View file

@ -105,16 +105,15 @@ u32 A64JitState::GetFpsr() const {
fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
fpsr |= FPSCR_IDC; fpsr |= FPSCR_IDC;
fpsr |= FPSCR_UFC; fpsr |= FPSCR_UFC;
fpsr |= fpsr_exc;
return fpsr; return fpsr;
} }
void A64JitState::SetFpsr(u32 value) { void A64JitState::SetFpsr(u32 value) {
guest_MXCSR &= ~0x0000003D; guest_MXCSR &= ~0x0000003D;
guest_MXCSR |= ( value ) & 0b0000000000001; // IE = IOC FPSCR_IDC = 0;
guest_MXCSR |= ( value << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC FPSCR_UFC = 0;
fpsr_exc = value & 0x9F;
FPSCR_IDC = value & (1 << 7);
FPSCR_UFC = value & (1 << 3);
} }
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendX64

View file

@ -71,6 +71,7 @@ struct A64JitState {
rsb_codeptrs.fill(0); rsb_codeptrs.fill(0);
} }
u32 fpsr_exc = 0;
u32 FPSCR_IDC = 0; u32 FPSCR_IDC = 0;
u32 FPSCR_UFC = 0; u32 FPSCR_UFC = 0;
u32 fpcr = 0; u32 fpcr = 0;

View file

@ -35,6 +35,7 @@ struct EmitContext {
void EraseInstruction(IR::Inst* inst); void EraseInstruction(IR::Inst* inst);
virtual FP::RoundingMode FPSCR_RMode() const = 0; virtual FP::RoundingMode FPSCR_RMode() const = 0;
virtual u32 FPCR() const = 0;
virtual bool FPSCR_RoundTowardsZero() const = 0; virtual bool FPSCR_RoundTowardsZero() const = 0;
virtual bool FPSCR_FTZ() const = 0; virtual bool FPSCR_FTZ() const = 0;
virtual bool FPSCR_DN() const = 0; virtual bool FPSCR_DN() const = 0;

View file

@ -5,13 +5,22 @@
*/ */
#include <type_traits> #include <type_traits>
#include <utility>
#include "backend_x64/abi.h" #include "backend_x64/abi.h"
#include "backend_x64/block_of_code.h" #include "backend_x64/block_of_code.h"
#include "backend_x64/emit_x64.h" #include "backend_x64/emit_x64.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/fp_util.h" #include "common/fp/op.h"
#include "common/fp/util.h"
#include "common/mp/cartesian_product.h"
#include "common/mp/integer.h"
#include "common/mp/list.h"
#include "common/mp/lut.h"
#include "common/mp/to_tuple.h"
#include "common/mp/vlift.h"
#include "common/mp/vllift.h"
#include "frontend/ir/basic_block.h" #include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h" #include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h" #include "frontend/ir/opcodes.h"
@ -19,6 +28,7 @@
namespace Dynarmic::BackendX64 { namespace Dynarmic::BackendX64 {
using namespace Xbyak::util; using namespace Xbyak::util;
namespace mp = Dynarmic::Common::mp;
constexpr u64 f32_negative_zero = 0x80000000u; constexpr u64 f32_negative_zero = 0x80000000u;
constexpr u64 f32_nan = 0x7fc00000u; constexpr u64 f32_nan = 0x7fc00000u;
@ -33,6 +43,10 @@ constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
constexpr u64 f64_min_s64 = 0xc3e0000000000000u; // -2^63 as a double
constexpr u64 f64_max_s64_lim = 0x43e0000000000000u; // 2^63 as a double (actual maximum unrepresentable)
constexpr u64 f64_min_u64 = 0x0000000000000000u; // 0 as a double
constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual maximum unrepresentable)
static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
Xbyak::Label end; Xbyak::Label end;
@ -120,7 +134,7 @@ static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
code.movd(code.ABI_PARAM1.cvt32(), a); code.movd(code.ABI_PARAM1.cvt32(), a);
code.movd(code.ABI_PARAM2.cvt32(), b); code.movd(code.ABI_PARAM2.cvt32(), b);
code.CallFunction(static_cast<u32(*)(u32, u32)>([](u32 a, u32 b) -> u32 { code.CallFunction(static_cast<u32(*)(u32, u32)>([](u32 a, u32 b) -> u32 {
return *Common::ProcessNaNs(a, b); return *FP::ProcessNaNs(a, b);
})); }));
code.movd(a, code.ABI_RETURN.cvt32()); code.movd(a, code.ABI_RETURN.cvt32());
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -149,7 +163,7 @@ static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
code.movd(code.ABI_PARAM2.cvt32(), b); code.movd(code.ABI_PARAM2.cvt32(), b);
code.movd(code.ABI_PARAM3.cvt32(), c); code.movd(code.ABI_PARAM3.cvt32(), c);
code.CallFunction(static_cast<u32(*)(u32, u32, u32)>([](u32 a, u32 b, u32 c) -> u32 { code.CallFunction(static_cast<u32(*)(u32, u32, u32)>([](u32 a, u32 b, u32 c) -> u32 {
return *Common::ProcessNaNs(a, b, c); return *FP::ProcessNaNs(a, b, c);
})); }));
code.movd(a, code.ABI_RETURN.cvt32()); code.movd(a, code.ABI_RETURN.cvt32());
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -187,7 +201,7 @@ static void PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
code.movq(code.ABI_PARAM1, a); code.movq(code.ABI_PARAM1, a);
code.movq(code.ABI_PARAM2, b); code.movq(code.ABI_PARAM2, b);
code.CallFunction(static_cast<u64(*)(u64, u64)>([](u64 a, u64 b) -> u64 { code.CallFunction(static_cast<u64(*)(u64, u64)>([](u64 a, u64 b) -> u64 {
return *Common::ProcessNaNs(a, b); return *FP::ProcessNaNs(a, b);
})); }));
code.movq(a, code.ABI_RETURN); code.movq(a, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -213,7 +227,7 @@ static void PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
code.movq(code.ABI_PARAM2, b); code.movq(code.ABI_PARAM2, b);
code.movq(code.ABI_PARAM3, c); code.movq(code.ABI_PARAM3, c);
code.CallFunction(static_cast<u64(*)(u64, u64, u64)>([](u64 a, u64 b, u64 c) -> u64 { code.CallFunction(static_cast<u64(*)(u64, u64, u64)>([](u64 a, u64 b, u64 c) -> u64 {
return *Common::ProcessNaNs(a, b, c); return *FP::ProcessNaNs(a, b, c);
})); }));
code.movq(a, code.ABI_RETURN); code.movq(a, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -892,129 +906,160 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) { static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
bool round_towards_zero = args[1].GetImmediateU1();
// ARM saturates on conversion; this differs from x64 which returns a sentinel value. const size_t fbits = args[1].GetImmediateU8();
// Conversion to double is lossless, and allows for clamping. const auto rounding = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
if (ctx.FPSCR_FTZ()) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero){
DenormalsAreZero32(code, from, to); const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
const int round_imm = [&]{
switch (rounding) {
case FP::RoundingMode::ToNearest_TieEven:
default:
return 0b00;
case FP::RoundingMode::TowardsPlusInfinity:
return 0b10;
case FP::RoundingMode::TowardsMinusInfinity:
return 0b01;
case FP::RoundingMode::TowardsZero:
return 0b11;
} }
code.cvtss2sd(from, from); }();
// First time is to set flags
if (round_towards_zero) { const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
code.cvttsd2si(to, from); // 32 bit gpr const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
if (fsize == 64) {
if (fbits != 0) {
const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52);
code.mulsd(src, code.MConst(xword, scale_factor));
}
code.roundsd(src, src, round_imm);
ZeroIfNaN64(code, src, scratch);
} else { } else {
code.cvtsd2si(to, from); // 32 bit gpr if (fbits != 0) {
const u32 scale_factor = static_cast<u32>((fbits + 127) << 23);
code.mulss(src, code.MConst(xword, scale_factor));
} }
// Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch); code.roundss(src, src, round_imm);
code.minsd(from, code.MConst(xword, f64_max_s32)); code.cvtss2sd(src, src);
code.maxsd(from, code.MConst(xword, f64_min_s32)); ZeroIfNaN64(code, src, scratch);
// Second time is for real }
if (round_towards_zero) {
code.cvttsd2si(to, from); // 32 bit gpr if (isize == 64) {
Xbyak::Label saturate_max, end;
code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u64 : f64_min_s64));
code.movsd(scratch, code.MConst(xword, unsigned_ ? f64_max_u64_lim : f64_max_s64_lim));
code.comisd(scratch, src);
code.jna(saturate_max, code.T_NEAR);
if (unsigned_) {
Xbyak::Label below_max;
code.movsd(scratch, code.MConst(xword, f64_max_s64_lim));
code.comisd(src, scratch);
code.jb(below_max);
code.subsd(src, scratch);
code.cvttsd2si(result, src);
code.btc(result, 63);
code.jmp(end);
code.L(below_max);
}
code.cvttsd2si(result, src); // 64 bit gpr
code.L(end);
code.SwitchToFarCode();
code.L(saturate_max);
code.mov(result, unsigned_ ? 0xFFFF'FFFF'FFFF'FFFF : 0x7FFF'FFFF'FFFF'FFFF);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
} else { } else {
code.cvtsd2si(to, from); // 32 bit gpr code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32));
code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u32 : f64_min_s32));
code.cvttsd2si(result, src); // 64 bit gpr
} }
ctx.reg_alloc.DefineValue(inst, to); ctx.reg_alloc.DefineValue(inst, result);
return;
} }
void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); using unsigned_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); using isize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64(); using rounding_list = mp::list<
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
bool round_towards_zero = args[1].GetImmediateU1(); std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
>;
// ARM saturates on conversion; this differs from x64 which returns a sentinel value. using key_type = std::tuple<size_t, bool, size_t, FP::RoundingMode>;
// Conversion to double is lossless, and allows for accurate clamping. using value_type = u64(*)(u64, u8, FP::FPSR&, A64::FPCR);
//
// Since SSE2 doesn't provide an unsigned conversion, we use a 64-bit signed conversion.
//
// FIXME: None of the FPSR exception bits are correctly signalled with the below code
if (ctx.FPSCR_FTZ()) { static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
DenormalsAreZero64(code, from, to); [](auto args) {
return std::pair<key_type, value_type>{
mp::to_tuple<decltype(args)>,
static_cast<value_type>(
[](u64 input, u8 fbits, FP::FPSR& fpsr, A64::FPCR fpcr) {
constexpr auto t = mp::to_tuple<decltype(args)>;
constexpr size_t fsize = std::get<0>(t);
constexpr bool unsigned_ = std::get<1>(t);
constexpr size_t isize = std::get<2>(t);
constexpr FP::RoundingMode rounding_mode = std::get<3>(t);
using InputSize = mp::unsigned_integer_of_size<fsize>;
return FP::FPToFixed<InputSize>(isize, static_cast<InputSize>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
} }
code.cvtss2sd(from, from); )
// Clamp to output range };
ZeroIfNaN64(code, from, xmm_scratch); },
code.minsd(from, code.MConst(xword, f64_max_u32)); mp::cartesian_product<fsize_list, unsigned_list, isize_list, rounding_list>{}
code.maxsd(from, code.MConst(xword, f64_min_u32)); );
if (round_towards_zero) {
code.cvttsd2si(to, from); // 64 bit gpr ctx.reg_alloc.HostCall(inst, args[0], args[1]);
} else { code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.cvtsd2si(to, from); // 64 bit gpr code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR());
code.CallFunction(lut.at(std::make_tuple(fsize, unsigned_, isize, rounding)));
} }
ctx.reg_alloc.DefineValue(inst, to); void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed(code, ctx, inst, 64, false, 32);
} }
void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); EmitFPToFixed(code, ctx, inst, 64, false, 64);
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
bool round_towards_zero = args[1].GetImmediateU1();
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
}
// First time is to set flags
if (round_towards_zero) {
code.cvttsd2si(gpr_scratch, from); // 32 bit gpr
} else {
code.cvtsd2si(gpr_scratch, from); // 32 bit gpr
}
// Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch);
code.minsd(from, code.MConst(xword, f64_max_s32));
code.maxsd(from, code.MConst(xword, f64_min_s32));
// Second time is for real
if (round_towards_zero) {
code.cvttsd2si(to, from); // 32 bit gpr
} else {
code.cvtsd2si(to, from); // 32 bit gpr
} }
ctx.reg_alloc.DefineValue(inst, to); void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed(code, ctx, inst, 64, true, 32);
} }
void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); EmitFPToFixed(code, ctx, inst, 64, true, 64);
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
bool round_towards_zero = args[1].GetImmediateU1();
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
// TODO: Use VCVTPD2UDQ when AVX512VL is available.
// FIXME: None of the FPSR exception bits are correctly signalled with the below code
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero64(code, from, to);
}
// Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch);
code.minsd(from, code.MConst(xword, f64_max_u32));
code.maxsd(from, code.MConst(xword, f64_min_u32));
if (round_towards_zero) {
code.cvttsd2si(to, from); // 64 bit gpr
} else {
code.cvtsd2si(to, from); // 64 bit gpr
} }
ctx.reg_alloc.DefineValue(inst, to); void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed(code, ctx, inst, 32, false, 32);
}
void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed(code, ctx, inst, 32, false, 64);
}
void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed(code, ctx, inst, 32, true, 32);
}
void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed(code, ctx, inst, 32, true, 64);
} }
void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {

View file

@ -10,7 +10,7 @@
#include "backend_x64/block_of_code.h" #include "backend_x64/block_of_code.h"
#include "backend_x64/emit_x64.h" #include "backend_x64/emit_x64.h"
#include "common/bit_util.h" #include "common/bit_util.h"
#include "common/fp_util.h" #include "common/fp/util.h"
#include "frontend/ir/basic_block.h" #include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h" #include "frontend/ir/microinstruction.h"
@ -69,9 +69,9 @@ static void HandleNaNs(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& xm
code.CallFunction(static_cast<void(*)(RegArray&, const RegArray&, const RegArray&)>( code.CallFunction(static_cast<void(*)(RegArray&, const RegArray&, const RegArray&)>(
[](RegArray& result, const RegArray& a, const RegArray& b) { [](RegArray& result, const RegArray& a, const RegArray& b) {
for (size_t i = 0; i < result.size(); ++i) { for (size_t i = 0; i < result.size(); ++i) {
if (auto r = Common::ProcessNaNs(a[i], b[i])) { if (auto r = FP::ProcessNaNs(a[i], b[i])) {
result[i] = *r; result[i] = *r;
} else if (Common::IsNaN(result[i])) { } else if (FP::IsNaN(result[i])) {
result[i] = NaNWrapper<T>::value; result[i] = NaNWrapper<T>::value;
} }
} }

View file

@ -26,6 +26,7 @@ struct JitStateInfo {
, offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv)) , offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
, offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC)) , offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
, offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC)) , offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
{} {}
const size_t offsetof_cycles_remaining; const size_t offsetof_cycles_remaining;
@ -39,6 +40,7 @@ struct JitStateInfo {
const size_t offsetof_CPSR_nzcv; const size_t offsetof_CPSR_nzcv;
const size_t offsetof_FPSCR_IDC; const size_t offsetof_FPSCR_IDC;
const size_t offsetof_FPSCR_UFC; const size_t offsetof_FPSCR_UFC;
const size_t offsetof_fpsr_exc;
}; };
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendX64

View file

@ -21,29 +21,29 @@ constexpr size_t BitSize() {
return sizeof(T) * CHAR_BIT; return sizeof(T) * CHAR_BIT;
} }
template <typename T>
inline T Ones(size_t count) {
ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
if (count == BitSize<T>())
return static_cast<T>(~static_cast<T>(0));
return ~(static_cast<T>(~static_cast<T>(0)) << count);
}
/// Extract bits [begin_bit, end_bit] inclusive from value of type T. /// Extract bits [begin_bit, end_bit] inclusive from value of type T.
template<size_t begin_bit, size_t end_bit, typename T> template<size_t begin_bit, size_t end_bit, typename T>
constexpr T Bits(const T value) { constexpr T Bits(const T value) {
static_assert(begin_bit <= end_bit, static_assert(begin_bit <= end_bit,
"invalid bit range (position of beginning bit cannot be greater than that of end bit)"); "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
static_assert(begin_bit < BitSize<T>(), "begin_bit must be smaller than size of T"); static_assert(begin_bit < BitSize<T>(), "begin_bit must be smaller than size of T");
static_assert(end_bit < BitSize<T>(), "begin_bit must be smaller than size of T"); static_assert(end_bit < BitSize<T>(), "end_bit must be smaller than size of T");
return (value >> begin_bit) & ((1 << (end_bit - begin_bit + 1)) - 1); return (value >> begin_bit) & Ones<T>(end_bit - begin_bit + 1);
} }
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) #pragma warning(push)
#pragma warning(disable:4554) #pragma warning(disable:4554)
#endif #endif
/// Extracts a single bit at bit_position from value of type T.
template<size_t bit_position, typename T>
constexpr bool Bit(const T value) {
static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
return ((value >> bit_position) & 1) != 0;
}
/// Extracts a single bit at bit_position from value of type T. /// Extracts a single bit at bit_position from value of type T.
template<typename T> template<typename T>
inline bool Bit(size_t bit_position, const T value) { inline bool Bit(size_t bit_position, const T value) {
@ -51,6 +51,46 @@ inline bool Bit(size_t bit_position, const T value) {
return ((value >> bit_position) & 1) != 0; return ((value >> bit_position) & 1) != 0;
} }
/// Extracts a single bit at bit_position from value of type T.
template<size_t bit_position, typename T>
constexpr bool Bit(const T value) {
static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
return Bit<T>(bit_position, value);
}
/// Clears a single bit at bit_position from value of type T.
template<typename T>
inline T ClearBit(size_t bit_position, const T value) {
ASSERT_MSG(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
return value & ~(static_cast<T>(1) << bit_position);
}
/// Clears a single bit at bit_position from value of type T.
template<size_t bit_position, typename T>
constexpr T ClearBit(const T value) {
static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
return ClearBit<T>(bit_position, value);
}
/// Modifies a single bit at bit_position from value of type T.
template<typename T>
inline T ModifyBit(size_t bit_position, const T value, bool new_bit) {
ASSERT_MSG(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
return ClearBit<T>(bit_position, value) | (static_cast<T>(new_bit) << bit_position);
}
/// Modifies a single bit at bit_position from value of type T.
template<size_t bit_position, typename T>
constexpr T ModifyBit(const T value, bool new_bit) {
static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
return ModifyBit<T>(bit_position, value, new_bit);
}
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) #pragma warning(pop)
#endif #endif
@ -112,11 +152,8 @@ inline size_t LowestSetBit(T value) {
} }
template <typename T> template <typename T>
inline T Ones(size_t count) { inline bool MostSignificantBit(T value) {
ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T"); return Bit<BitSize<T>() - 1, T>(value);
if (count == BitSize<T>())
return ~static_cast<T>(0);
return ~(~static_cast<T>(0) << count);
} }
template <typename T> template <typename T>

162
src/common/fp/fpsr.h Normal file
View file

@ -0,0 +1,162 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <boost/optional.hpp>
#include "common/bit_util.h"
#include "common/common_types.h"
namespace Dynarmic::FP {
/**
* Representation of the Floating-Point Status Register.
*/
class FPSR final {
public:
FPSR() = default;
FPSR(const FPSR&) = default;
FPSR(FPSR&&) = default;
explicit FPSR(u32 data) : value{data & mask} {}
FPSR& operator=(const FPSR&) = default;
FPSR& operator=(FPSR&&) = default;
FPSR& operator=(u32 data) {
value = data & mask;
return *this;
}
/// Get negative condition flag
bool N() const {
return Common::Bit<31>(value);
}
/// Set negative condition flag
void N(bool N_) {
value = Common::ModifyBit<31>(value, N_);
}
/// Get zero condition flag
bool Z() const {
return Common::Bit<30>(value);
}
/// Set zero condition flag
void Z(bool Z_) {
value = Common::ModifyBit<30>(value, Z_);
}
/// Get carry condition flag
bool C() const {
return Common::Bit<29>(value);
}
/// Set carry condition flag
void C(bool C_) {
value = Common::ModifyBit<29>(value, C_);
}
/// Get overflow condition flag
bool V() const {
return Common::Bit<28>(value);
}
/// Set overflow condition flag
void V(bool V_) {
value = Common::ModifyBit<28>(value, V_);
}
/// Get cumulative saturation bit
bool QC() const {
return Common::Bit<27>(value);
}
/// Set cumulative saturation bit
void QC(bool QC_) {
value = Common::ModifyBit<27>(value, QC_);
}
/// Get input denormal floating-point exception bit
bool IDC() const {
return Common::Bit<7>(value);
}
/// Set input denormal floating-point exception bit
void IDC(bool IDC_) {
value = Common::ModifyBit<7>(value, IDC_);
}
/// Get inexact cumulative floating-point exception bit
bool IXC() const {
return Common::Bit<4>(value);
}
/// Set inexact cumulative floating-point exception bit
void IXC(bool IXC_) {
value = Common::ModifyBit<4>(value, IXC_);
}
/// Get underflow cumulative floating-point exception bit
bool UFC() const {
return Common::Bit<3>(value);
}
/// Set underflow cumulative floating-point exception bit
void UFC(bool UFC_) {
value = Common::ModifyBit<3>(value, UFC_);
}
/// Get overflow cumulative floating-point exception bit
bool OFC() const {
return Common::Bit<2>(value);
}
/// Set overflow cumulative floating-point exception bit
void OFC(bool OFC_) {
value = Common::ModifyBit<2>(value, OFC_);
}
/// Get divide by zero cumulative floating-point exception bit
bool DZC() const {
return Common::Bit<1>(value);
}
/// Set divide by zero cumulative floating-point exception bit
void DZC(bool DZC_) {
value = Common::ModifyBit<1>(value, DZC_);
}
/// Get invalid operation cumulative floating-point exception bit
bool IOC() const {
return Common::Bit<0>(value);
}
/// Set invalid operation cumulative floating-point exception bit
void IOC(bool IOC_) {
value = Common::ModifyBit<0>(value, IOC_);
}
/// Gets the underlying raw value within the FPSR.
u32 Value() const {
return value;
}
private:
// Bits 5-6 and 8-26 are reserved.
static constexpr u32 mask = 0xF800009F;
u32 value = 0;
};
inline bool operator==(FPSR lhs, FPSR rhs) {
return lhs.Value() == rhs.Value();
}
inline bool operator!=(FPSR lhs, FPSR rhs) {
return !operator==(lhs, rhs);
}
} // namespace Dynarmic::FP

58
src/common/fp/info.h Normal file
View file

@ -0,0 +1,58 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include "common/common_types.h"
namespace Dynarmic::FP {
template<typename FPT>
struct FPInfo {};
template<>
struct FPInfo<u32> {
static constexpr size_t total_width = 32;
static constexpr size_t exponent_width = 8;
static constexpr size_t explicit_mantissa_width = 23;
static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
static constexpr u32 implicit_leading_bit = u32(1) << explicit_mantissa_width;
static constexpr u32 sign_mask = 0x80000000;
static constexpr u32 exponent_mask = 0x7F800000;
static constexpr u32 mantissa_mask = 0x007FFFFF;
static constexpr int exponent_min = -126;
static constexpr int exponent_max = 127;
static constexpr int exponent_bias = 127;
static constexpr u32 Zero(bool sign) { return sign ? sign_mask : 0; }
static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); }
static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
};
template<>
struct FPInfo<u64> {
static constexpr size_t total_width = 64;
static constexpr size_t exponent_width = 11;
static constexpr size_t explicit_mantissa_width = 52;
static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
static constexpr u64 implicit_leading_bit = u64(1) << explicit_mantissa_width;
static constexpr u64 sign_mask = 0x8000'0000'0000'0000;
static constexpr u64 exponent_mask = 0x7FF0'0000'0000'0000;
static constexpr u64 mantissa_mask = 0x000F'FFFF'FFFF'FFFF;
static constexpr int exponent_min = -1022;
static constexpr int exponent_max = 1023;
static constexpr int exponent_bias = 1023;
static constexpr u64 Zero(bool sign) { return sign ? sign_mask : 0; }
static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); }
static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
};
} // namespace Dynarmic::FP

View file

@ -0,0 +1,48 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include "common/bit_util.h"
#include "common/common_types.h"
namespace Dynarmic::FP {
enum class ResidualError {
Zero,
LessThanHalf,
Half,
GreaterThanHalf,
};
template<typename MantissaT>
ResidualError ResidualErrorOnRightShift(MantissaT mantissa, int shift_amount) {
if (shift_amount <= 0 || mantissa == 0) {
return ResidualError::Zero;
}
if (shift_amount > static_cast<int>(Common::BitSize<MantissaT>())) {
return Common::MostSignificantBit(mantissa) ? ResidualError::GreaterThanHalf : ResidualError::LessThanHalf;
}
const size_t half_bit_position = static_cast<size_t>(shift_amount - 1);
const MantissaT half = static_cast<MantissaT>(1) << half_bit_position;
const MantissaT error_mask = Common::Ones<MantissaT>(static_cast<size_t>(shift_amount));
const MantissaT error = mantissa & error_mask;
if (error == 0) {
return ResidualError::Zero;
}
if (error < half) {
return ResidualError::LessThanHalf;
}
if (error == half) {
return ResidualError::Half;
}
return ResidualError::GreaterThanHalf;
}
} // namespace Dynarmic::FP

101
src/common/fp/op.cpp Normal file
View file

@ -0,0 +1,101 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/safe_ops.h"
#include "common/fp/fpsr.h"
#include "common/fp/mantissa_util.h"
#include "common/fp/op.h"
#include "common/fp/process_exception.h"
#include "common/fp/rounding_mode.h"
#include "common/fp/unpacked.h"
#include "frontend/A64/FPCR.h"
namespace Dynarmic::FP {
template<typename FPT>
u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
ASSERT(rounding != RoundingMode::ToOdd);
ASSERT(ibits <= 64);
ASSERT(fbits <= ibits);
auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
if (type == FPType::SNaN || type == FPType::QNaN) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
}
// Handle zero
if (value.mantissa == 0) {
return 0;
}
if (sign && unsigned_) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
return 0;
}
// value *= 2.0^fbits
value.exponent += static_cast<int>(fbits);
u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent);
int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent);
bool round_up = false;
switch (rounding) {
case RoundingMode::ToNearest_TieEven:
round_up = error > ResidualError::Half || (error == ResidualError::Half && Common::Bit<0>(int_result));
break;
case RoundingMode::TowardsPlusInfinity:
round_up = error != ResidualError::Zero;
break;
case RoundingMode::TowardsMinusInfinity:
round_up = false;
break;
case RoundingMode::TowardsZero:
round_up = error != ResidualError::Zero && Common::MostSignificantBit(int_result);
break;
case RoundingMode::ToNearest_TieAwayFromZero:
round_up = error > ResidualError::Half || (error == ResidualError::Half && !Common::MostSignificantBit(int_result));
break;
case RoundingMode::ToOdd:
UNREACHABLE();
}
if (round_up) {
int_result++;
}
// Detect Overflow
const int min_exponent_for_overflow = static_cast<int>(ibits) - static_cast<int>(Common::HighestSetBit(value.mantissa + (round_up ? 1 : 0))) - (unsigned_ ? 0 : 1);
if (value.exponent >= min_exponent_for_overflow) {
// Positive overflow
if (unsigned_ || !sign) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
return Common::Ones<u64>(ibits - (unsigned_ ? 0 : 1));
}
// Negative overflow
const u64 min_value = Safe::Negate<u64>(static_cast<u64>(1) << (ibits - 1));
if (!(value.exponent == min_exponent_for_overflow && int_result == min_value)) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
return static_cast<u64>(1) << (ibits - 1);
}
}
if (error != ResidualError::Zero) {
FPProcessException(FPExc::Inexact, fpcr, fpsr);
}
return int_result & Common::Ones<u64>(ibits);
}
template u64 FPToFixed<u32>(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
template u64 FPToFixed<u64>(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
} // namespace Dynarmic::FP

21
src/common/fp/op.h Normal file
View file

@ -0,0 +1,21 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include "common/common_types.h"
#include "common/fp/fpsr.h"
#include "common/fp/rounding_mode.h"
#include "frontend/A64/FPCR.h"
namespace Dynarmic::FP {
using FPCR = A64::FPCR;
template<typename FPT>
u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
} // namespace Dynarmic::FP

View file

@ -0,0 +1,58 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "common/assert.h"
#include "common/fp/fpsr.h"
#include "common/fp/process_exception.h"
#include "frontend/A64/FPCR.h"
namespace Dynarmic::FP {
void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr) {
switch (exception) {
case FPExc::InvalidOp:
if (fpcr.IOE()) {
UNIMPLEMENTED();
}
fpsr.IOC(true);
break;
case FPExc::DivideByZero:
if (fpcr.DZE()) {
UNIMPLEMENTED();
}
fpsr.DZC(true);
break;
case FPExc::Overflow:
if (fpcr.OFE()) {
UNIMPLEMENTED();
}
fpsr.OFC(true);
break;
case FPExc::Underflow:
if (fpcr.UFE()) {
UNIMPLEMENTED();
}
fpsr.UFC(true);
break;
case FPExc::Inexact:
if (fpcr.IXE()) {
UNIMPLEMENTED();
}
fpsr.IXC(true);
break;
case FPExc::InputDenorm:
if (fpcr.IDE()) {
UNIMPLEMENTED();
}
fpsr.IDC(true);
break;
default:
UNREACHABLE();
break;
}
}
} // namespace Dynarmic::FP

View file

@ -0,0 +1,27 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include "common/fp/fpsr.h"
#include "frontend/A64/FPCR.h"
namespace Dynarmic::FP {
using FPCR = A64::FPCR;
enum class FPExc {
InvalidOp,
DivideByZero,
Overflow,
Underflow,
Inexact,
InputDenorm,
};
void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP

179
src/common/fp/unpacked.cpp Normal file
View file

@ -0,0 +1,179 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "common/fp/info.h"
#include "common/fp/process_exception.h"
#include "common/fp/unpacked.h"
#include "common/safe_ops.h"
namespace Dynarmic::FP {
template<typename FPT>
std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) {
constexpr size_t sign_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width;
constexpr size_t exponent_high_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width - 1;
constexpr size_t exponent_low_bit = FPInfo<FPT>::explicit_mantissa_width;
constexpr size_t mantissa_high_bit = FPInfo<FPT>::explicit_mantissa_width - 1;
constexpr size_t mantissa_low_bit = 0;
constexpr int denormal_exponent = FPInfo<FPT>::exponent_min - int(FPInfo<FPT>::explicit_mantissa_width);
const bool sign = Common::Bit<sign_bit>(op);
const FPT exp_raw = Common::Bits<exponent_low_bit, exponent_high_bit>(op);
const FPT frac_raw = Common::Bits<mantissa_low_bit, mantissa_high_bit>(op);
if (exp_raw == 0) {
if (frac_raw == 0 || fpcr.FZ()) {
if (frac_raw != 0) {
FPProcessException(FPExc::InputDenorm, fpcr, fpsr);
}
return {FPType::Zero, sign, {sign, 0, 0}};
}
return {FPType::Nonzero, sign, {sign, denormal_exponent, frac_raw}};
}
if (exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width)) {
if (frac_raw == 0) {
return {FPType::Infinity, sign, {sign, 1000000, 1}};
}
const bool is_quiet = Common::Bit<mantissa_high_bit>(frac_raw);
return {is_quiet ? FPType::QNaN : FPType::SNaN, sign, {sign, 0, 0}};
}
const int exp = static_cast<int>(exp_raw) - FPInfo<FPT>::exponent_bias - FPInfo<FPT>::explicit_mantissa_width;
const u64 frac = frac_raw | FPInfo<FPT>::implicit_leading_bit;
return {FPType::Nonzero, sign, {sign, exp, frac}};
}
template std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
template std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
template<size_t F, typename MantissaT>
std::tuple<bool, int, MantissaT, MantissaT> Normalize(FPUnpacked<MantissaT> op) {
const int highest_set_bit = Common::HighestSetBit(op.mantissa);
const int shift_amount = highest_set_bit - static_cast<int>(F);
const MantissaT mantissa = Safe::LogicalShiftRight(op.mantissa, shift_amount);
const MantissaT error = Safe::LogicalShiftRightDouble(op.mantissa, static_cast<MantissaT>(0), shift_amount);
const int exponent = op.exponent + highest_set_bit;
return std::make_tuple(op.sign, exponent, mantissa, error);
}
template<typename FPT, typename MantissaT>
FPT FPRoundBase(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
ASSERT(op.mantissa != 0);
ASSERT(rounding != RoundingMode::ToNearest_TieAwayFromZero);
constexpr int minimum_exp = FPInfo<FPT>::exponent_min;
constexpr size_t E = FPInfo<FPT>::exponent_width;
constexpr size_t F = FPInfo<FPT>::explicit_mantissa_width;
constexpr bool isFP16 = FPInfo<FPT>::total_width == 16;
auto [sign, exponent, mantissa, error] = Normalize<F>(op);
if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) {
fpsr.UFC(true);
return FPInfo<FPT>::Zero(sign);
}
int biased_exp = std::max<int>(exponent - minimum_exp + 1, 0);
if (biased_exp == 0) {
error = Safe::LogicalShiftRightDouble(mantissa, error, minimum_exp - exponent);
mantissa = Safe::LogicalShiftRight(mantissa, minimum_exp - exponent);
}
if (biased_exp == 0 && (error != 0 || fpcr.UFE())) {
FPProcessException(FPExc::Underflow, fpcr, fpsr);
}
bool round_up = false, overflow_to_inf = false;
switch (rounding) {
case RoundingMode::ToNearest_TieEven: {
constexpr MantissaT half = static_cast<MantissaT>(1) << (Common::BitSize<MantissaT>() - 1);
round_up = (error > half) || (error == half && Common::Bit<0>(mantissa));
overflow_to_inf = true;
break;
}
case RoundingMode::TowardsPlusInfinity:
round_up = error != 0 && !sign;
overflow_to_inf = !sign;
break;
case RoundingMode::TowardsMinusInfinity:
round_up = error != 0 && sign;
overflow_to_inf = sign;
break;
default:
break;
}
if (round_up) {
if ((mantissa & FPInfo<FPT>::mantissa_mask) == FPInfo<FPT>::mantissa_mask) {
// Overflow on rounding up is going to happen
if (mantissa == FPInfo<FPT>::mantissa_mask) {
// Rounding up from denormal to normal
mantissa++;
biased_exp++;
} else {
// Rounding up to next exponent
mantissa = (mantissa + 1) / 2;
biased_exp++;
}
} else {
mantissa++;
}
}
if (error != 0 && rounding == RoundingMode::ToOdd) {
mantissa = Common::ModifyBit<0>(mantissa, true);
}
FPT result = 0;
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable:4127) // C4127: conditional expression is constant
#endif
if (!isFP16 || !fpcr.AHP()) {
#ifdef _MSC_VER
#pragma warning(pop)
#endif
constexpr int max_biased_exp = (1 << E) - 1;
if (biased_exp >= max_biased_exp) {
result = overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
FPProcessException(FPExc::Overflow, fpcr, fpsr);
FPProcessException(FPExc::Inexact, fpcr, fpsr);
} else {
result = sign ? 1 : 0;
result <<= E;
result += biased_exp;
result <<= F;
result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
if (error != 0) {
FPProcessException(FPExc::Inexact, fpcr, fpsr);
}
}
} else {
constexpr int max_biased_exp = (1 << E);
if (biased_exp >= max_biased_exp) {
result = sign ? 0xFFFF : 0x7FFF;
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
} else {
result = sign ? 1 : 0;
result <<= E;
result += biased_exp;
result <<= F;
result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
if (error != 0) {
FPProcessException(FPExc::Inexact, fpcr, fpsr);
}
}
}
return result;
}
template u32 FPRoundBase<u32, u64>(FPUnpacked<u64> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
template u64 FPRoundBase<u64, u64>(FPUnpacked<u64> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
} // namespace Dynarmic::FP

57
src/common/fp/unpacked.h Normal file
View file

@ -0,0 +1,57 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <tuple>
#include "common/common_types.h"
#include "common/fp/fpsr.h"
#include "frontend/A64/FPCR.h"
namespace Dynarmic::FP {
using FPCR = A64::FPCR;
enum class FPType {
Nonzero,
Zero,
Infinity,
QNaN,
SNaN,
};
/// value = (sign ? -1 : +1) * mantissa * 2^exponent
template<typename MantissaT>
struct FPUnpacked {
bool sign;
int exponent;
MantissaT mantissa;
};
template<typename MantissaT>
inline bool operator==(const FPUnpacked<MantissaT>& a, const FPUnpacked<MantissaT>& b) {
return std::tie(a.sign, a.exponent, a.mantissa) == std::tie(b.sign, b.exponent, b.mantissa);
}
template<typename FPT>
std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr);
template<typename FPT, typename MantissaT>
FPT FPRoundBase(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
template<typename FPT, typename MantissaT>
FPT FPRound(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
fpcr.AHP(false);
return FPRoundBase<FPT, MantissaT>(op, fpcr, rounding, fpsr);
}
template<typename FPT, typename MantissaT>
FPT FPRound(FPUnpacked<MantissaT> op, FPCR fpcr, FPSR& fpsr) {
return FPRound<FPT, MantissaT>(op, fpcr, fpcr.RMode(), fpsr);
}
} // namespace Dynarmic::FP

View file

@ -8,8 +8,7 @@
#include <boost/optional.hpp> #include <boost/optional.hpp>
namespace Dynarmic { namespace Dynarmic::FP {
namespace Common {
/// Is 32-bit floating point value a QNaN? /// Is 32-bit floating point value a QNaN?
constexpr bool IsQNaN(u32 value) { constexpr bool IsQNaN(u32 value) {
@ -110,5 +109,4 @@ inline boost::optional<u64> ProcessNaNs(u64 a, u64 b, u64 c) {
return boost::none; return boost::none;
} }
} // namespace Common } // namespace Dynarmic::FP
} // namespace Dynarmic

27
src/common/mp/append.h Normal file
View file

@ -0,0 +1,27 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
namespace Dynarmic::Common::mp {
namespace detail {
template<class... L>
struct append_impl;
template<template<class...> class LT, class... T1, class... T2>
struct append_impl<LT<T1...>, T2...> {
using type = LT<T1..., T2...>;
};
} // namespace detail
/// Append items T to list L
template<class L, class... T>
using append = typename detail::append_impl<L, T...>::type;
} // namespace Dynarmic::Common::mp

18
src/common/mp/bind.h Normal file
View file

@ -0,0 +1,18 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
namespace Dynarmic::Common::mp {
/// Binds the first sizeof...(A) arguments of metafunction F with arguments A
template<template<class...> class F, class... A>
struct bind {
template<class... T>
using type = F<A..., T...>;
};
} // namespace Dynarmic::Common::mp

View file

@ -0,0 +1,51 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include "common/mp/append.h"
#include "common/mp/bind.h"
#include "common/mp/concat.h"
#include "common/mp/fmap.h"
#include "common/mp/list.h"
namespace Dynarmic::Common::mp {
namespace detail {
template<class... Ls>
struct cartesian_product_impl{};
template<class RL>
struct cartesian_product_impl<RL> {
using type = RL;
};
template<template<class...> class LT, class... RT, class... T1>
struct cartesian_product_impl<LT<RT...>, LT<T1...>> {
using type = concat<
fmap<bind<append, RT>::template type, list<T1...>>...
>;
};
template<class RL, class L1, class L2, class... Ls>
struct cartesian_product_impl<RL, L1, L2, Ls...> {
using type = typename cartesian_product_impl<
typename cartesian_product_impl<RL, L1>::type,
L2,
Ls...
>::type;
};
} // namespace detail
/// Produces the cartesian product of a set of lists
/// For example:
/// cartesian_product<list<A, B>, list<D, E>> == list<list<A, D>, list<A, E>, list<B, D>, list<B, E>
template<typename L1, typename... Ls>
using cartesian_product = typename detail::cartesian_product_impl<fmap<list, L1>, Ls...>::type;
} // namespace Dynarmic::Common::mp

57
src/common/mp/concat.h Normal file
View file

@ -0,0 +1,57 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include "common/mp/list.h"
namespace Dynarmic::Common::mp {
namespace detail {
template<class... L>
struct concat_impl;
template<>
struct concat_impl<> {
using type = list<>;
};
template<class L>
struct concat_impl<L> {
using type = L;
};
template<template<class...> class LT, class... T1, class... T2, class... Ls>
struct concat_impl<LT<T1...>, LT<T2...>, Ls...> {
using type = typename concat_impl<LT<T1..., T2...>, Ls...>::type;
};
template<template<class...> class LT,
class... T1, class... T2, class... T3, class... T4, class... T5, class... T6, class... T7, class... T8,
class... T9, class... T10, class... T11, class... T12, class... T13, class... T14, class... T15, class... T16,
class... Ls>
struct concat_impl<
LT<T1...>, LT<T2...>, LT<T3...>, LT<T4...>, LT<T5...>, LT<T6...>, LT<T7...>, LT<T8...>,
LT<T9...>, LT<T10...>, LT<T11...>, LT<T12...>, LT<T13...>, LT<T14...>, LT<T15...>, LT<T16...>,
Ls...>
{
using type = typename concat_impl<
LT<
T1..., T2..., T3..., T4..., T5..., T6..., T7..., T8...,
T9..., T10..., T11..., T12..., T13..., T14..., T15..., T16...
>,
Ls...
>::type;
};
} // namespace detail
/// Concatenate lists together
template<class... L>
using concat = typename detail::concat_impl<L...>::type;
} // namespace Dynarmic::Common::mp

27
src/common/mp/fapply.h Normal file
View file

@ -0,0 +1,27 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
namespace Dynarmic::Common::mp {
namespace detail {
template<template<class...> class F, class L>
struct fapply_impl;
template<template<class...> class F, template<class...> class LT, class... T>
struct fapply_impl<F, LT<T...>> {
using type = F<T...>;
};
} // namespace detail
/// Invokes metafunction F where the arguments are all the members of list L
template<template<class...> class F, class L>
using fapply = typename detail::fapply_impl<F, L>::type;
} // namespace Dynarmic::Common::mp

27
src/common/mp/fmap.h Normal file
View file

@ -0,0 +1,27 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
namespace Dynarmic::Common::mp {
namespace detail {
template<template<class...> class F, class L>
struct fmap_impl;
template<template<class...> class F, template<class...> class LT, class... T>
struct fmap_impl<F, LT<T...>> {
using type = LT<F<T>...>;
};
} // namespace detail
/// Metafunction that applies each element of list L to metafunction F
template<template<class...> class F, class L>
using fmap = typename detail::fmap_impl<F, L>::type;
} // namespace Dynarmic::Common::mp

51
src/common/mp/integer.h Normal file
View file

@ -0,0 +1,51 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstddef>
#include <cstdint>
namespace Dynarmic::Common::mp {
namespace detail {
template<std::size_t size>
struct integer_of_size_impl{};
template<>
struct integer_of_size_impl<8> {
using unsigned_type = std::uint8_t;
using signed_type = std::int8_t;
};
template<>
struct integer_of_size_impl<16> {
using unsigned_type = std::uint16_t;
using signed_type = std::int16_t;
};
template<>
struct integer_of_size_impl<32> {
using unsigned_type = std::uint32_t;
using signed_type = std::int32_t;
};
template<>
struct integer_of_size_impl<64> {
using unsigned_type = std::uint64_t;
using signed_type = std::int64_t;
};
} // namespace detail
template<std::size_t size>
using unsigned_integer_of_size = typename detail::integer_of_size_impl<size>::unsigned_type;
template<std::size_t size>
using signed_integer_of_size = typename detail::integer_of_size_impl<size>::signed_type;
} // namespace Dynarmic::Common::mp

15
src/common/mp/list.h Normal file
View file

@ -0,0 +1,15 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
namespace Dynarmic::Common::mp {
/// Contains a list of types
template<class... T>
struct list {};
} // namespace Dynarmic::Common::mp

23
src/common/mp/lut.h Normal file
View file

@ -0,0 +1,23 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <map>
#include <type_traits>
#include "common/mp/list.h"
namespace Dynarmic::Common::mp {
template <typename KeyT, typename ValueT, typename Function, typename ...Values>
inline auto GenerateLookupTableFromList(Function f, list<Values...>) {
static const std::array<std::pair<KeyT, ValueT>, sizeof...(Values)> pair_array{f(Values{})...};
return std::map<KeyT, ValueT>(pair_array.begin(), pair_array.end());
}
} // namespace Dynarmic::Common::mp

29
src/common/mp/to_tuple.h Normal file
View file

@ -0,0 +1,29 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <tuple>
namespace Dynarmic::Common::mp {
namespace detail {
template<class L>
struct to_tuple_impl;
template<template<class...> class LT, class... T>
struct to_tuple_impl<LT<T...>> {
static constexpr auto value = std::make_tuple(static_cast<typename T::value_type>(T::value)...);
};
} // namespace detail
/// Metafunction that converts a list of metavalues to a tuple value.
template<class L>
constexpr auto to_tuple = detail::to_tuple_impl<L>::value;
} // namespace Dynarmic::Common::mp

17
src/common/mp/vlift.h Normal file
View file

@ -0,0 +1,17 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <type_traits>
namespace Dynarmic::Common::mp {
/// Lifts a value into a type
template<auto V>
using vlift = std::integral_constant<decltype(V), V>;
} // namespace Dynarmic::Common::mp

31
src/common/mp/vllift.h Normal file
View file

@ -0,0 +1,31 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <type_traits>
#include "common/mp/list.h"
namespace Dynarmic::Common::mp {
namespace detail {
template<class VL>
struct vllift_impl{};
template<class T, T... values>
struct vllift_impl<std::integer_sequence<T, values...>> {
using type = list<std::integral_constant<T, values>...>;
};
} // namespace detail
/// Lifts values in value list VL to create a type list.
template<class VL>
using vllift = typename detail::vllift_impl<VL>::type;
} // namespace Dynarmic::Common::mp

109
src/common/safe_ops.h Normal file
View file

@ -0,0 +1,109 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <type_traits>
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/u128.h"
namespace Dynarmic::Safe {
template<typename T> T LogicalShiftLeft(T value, int shift_amount);
template<typename T> T LogicalShiftRight(T value, int shift_amount);
template<typename T> T ArithmeticShiftLeft(T value, int shift_amount);
template<typename T> T ArithmeticShiftRight(T value, int shift_amount);
template<typename T>
T LogicalShiftLeft(T value, int shift_amount) {
static_assert(std::is_integral_v<T>);
if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
return 0;
}
if (shift_amount < 0) {
return LogicalShiftRight(value, -shift_amount);
}
auto unsigned_value = static_cast<std::make_unsigned_t<T>>(value);
return static_cast<T>(unsigned_value << shift_amount);
}
template<>
inline u128 LogicalShiftLeft(u128 value, int shift_amount) {
return value << shift_amount;
}
template<typename T>
T LogicalShiftRight(T value, int shift_amount) {
static_assert(std::is_integral_v<T>);
if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
return 0;
}
if (shift_amount < 0) {
return LogicalShiftLeft(value, -shift_amount);
}
auto unsigned_value = static_cast<std::make_unsigned_t<T>>(value);
return static_cast<T>(unsigned_value >> shift_amount);
}
template<>
inline u128 LogicalShiftRight(u128 value, int shift_amount) {
return value >> shift_amount;
}
template<typename T>
T LogicalShiftRightDouble(T top, T bottom, int shift_amount) {
return LogicalShiftLeft(top, int(Common::BitSize<T>()) - shift_amount) | LogicalShiftRight(bottom, shift_amount);
}
template<typename T>
T ArithmeticShiftLeft(T value, int shift_amount) {
static_assert(std::is_integral_v<T>);
if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
return 0;
}
if (shift_amount < 0) {
return ArithmeticShiftRight(value, -shift_amount);
}
auto signed_value = static_cast<std::make_signed_t<T>>(value);
return static_cast<T>(signed_value << shift_amount);
}
template<typename T>
T ArithmeticShiftRight(T value, int shift_amount) {
static_assert(std::is_integral_v<T>);
if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
return Common::MostSignificantBit(value) ? ~static_cast<T>(0) : 0;
}
if (shift_amount < 0) {
return ArithmeticShiftLeft(value, -shift_amount);
}
auto signed_value = static_cast<std::make_signed_t<T>>(value);
return static_cast<T>(signed_value >> shift_amount);
}
template<typename T>
T ArithmeticShiftRightDouble(T top, T bottom, int shift_amount) {
return ArithmeticShiftLeft(top, int(Common::BitSize<T>()) - shift_amount) | LogicalShiftRight(bottom, shift_amount);
}
template<typename T>
T Negate(T value) {
return static_cast<T>(-static_cast<std::make_signed_t<T>>(value));
}
} // namespace Dynarmic::Safe

64
src/common/u128.cpp Normal file
View file

@ -0,0 +1,64 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <array>
#include "common/common_types.h"
#include "common/u128.h"
namespace Dynarmic {
u128 operator<<(u128 operand, int amount) {
if (amount < 0) {
return operand >> -amount;
}
if (amount == 0) {
return operand;
}
if (amount < 64) {
u128 result;
result.lower = (operand.lower << amount);
result.upper = (operand.upper << amount) | (operand.lower >> (64 - amount));
return result;
}
if (amount < 128) {
u128 result;
result.upper = operand.lower << (amount - 64);
return result;
}
return {};
}
u128 operator>>(u128 operand, int amount) {
if (amount < 0) {
return operand << -amount;
}
if (amount == 0) {
return operand;
}
if (amount < 64) {
u128 result;
result.lower = (operand.lower >> amount) | (operand.upper << (64 - amount));
result.upper = (operand.upper >> amount);
return result;
}
if (amount < 128) {
u128 result;
result.lower = operand.upper >> (amount - 64);
return result;
}
return {};
}
} // namespace Dynarmic

57
src/common/u128.h Normal file
View file

@ -0,0 +1,57 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstring>
#include <type_traits>
#include "common/bit_util.h"
#include "common/common_types.h"
namespace Dynarmic {
struct u128 {
u128() = default;
u128(const u128&) = default;
u128(u128&&) = default;
u128& operator=(const u128&) = default;
u128& operator=(u128&&) = default;
u128(u64 lower_, u64 upper_) : lower(lower_), upper(upper_) {}
template <typename T>
/* implicit */ u128(T value) : lower(value), upper(0) {
static_assert(std::is_integral_v<T>);
static_assert(Common::BitSize<T>() <= Common::BitSize<u64>());
}
u64 lower = 0;
u64 upper = 0;
};
static_assert(Common::BitSize<u128>() == 128);
static_assert(std::is_standard_layout_v<u128>);
static_assert(std::is_trivially_copyable_v<u128>);
inline u128 operator+(u128 a, u128 b) {
u128 result;
result.lower = a.lower + b.lower;
result.upper = a.upper + b.upper + (a.lower > result.lower);
return result;
}
inline u128 operator-(u128 a, u128 b) {
u128 result;
result.lower = a.lower - b.lower;
result.upper = a.upper - b.upper - (a.lower < result.lower);
return result;
}
u128 operator<<(u128 operand, int amount);
u128 operator>>(u128 operand, int amount);
} // namespace Dynarmic

View file

@ -442,8 +442,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool s
if (ConditionPassed(cond)) { if (ConditionPassed(cond)) {
auto reg_m = ir.GetExtendedRegister(m); auto reg_m = ir.GetExtendedRegister(m);
auto result = sz auto result = sz
? ir.FPDoubleToU32(reg_m, round_towards_zero, true) ? ir.FPDoubleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
: ir.FPSingleToU32(reg_m, round_towards_zero, true); : ir.FPSingleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
ir.SetExtendedRegister(d, result); ir.SetExtendedRegister(d, result);
} }
return true; return true;
@ -457,8 +457,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool s
if (ConditionPassed(cond)) { if (ConditionPassed(cond)) {
auto reg_m = ir.GetExtendedRegister(m); auto reg_m = ir.GetExtendedRegister(m);
auto result = sz auto result = sz
? ir.FPDoubleToS32(reg_m, round_towards_zero, true) ? ir.FPDoubleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
: ir.FPSingleToS32(reg_m, round_towards_zero, true); : ir.FPSingleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
ir.SetExtendedRegister(d, result); ir.SetExtendedRegister(d, result);
} }
return true; return true;

View file

@ -37,6 +37,11 @@ public:
return Common::Bit<26>(value); return Common::Bit<26>(value);
} }
/// Alternate half-precision control flag.
void AHP(bool AHP_) {
value = Common::ModifyBit<26>(value, AHP_);
}
/// Default NaN mode control bit. /// Default NaN mode control bit.
bool DN() const { bool DN() const {
return Common::Bit<25>(value); return Common::Bit<25>(value);
@ -52,6 +57,10 @@ public:
return static_cast<FP::RoundingMode>(Common::Bits<22, 23>(value)); return static_cast<FP::RoundingMode>(Common::Bits<22, 23>(value));
} }
bool FZ16() const {
return Common::Bit<19>(value);
}
/// Input denormal exception trap enable flag. /// Input denormal exception trap enable flag.
bool IDE() const { bool IDE() const {
return Common::Bit<15>(value); return Common::Bit<15>(value);

View file

@ -884,17 +884,17 @@ INST(FCVTZS_float_fix, "FCVTZS (scalar, fixed-point)", "z0011
INST(FCVTZU_float_fix, "FCVTZU (scalar, fixed-point)", "z0011110yy011001ppppppnnnnnddddd") INST(FCVTZU_float_fix, "FCVTZU (scalar, fixed-point)", "z0011110yy011001ppppppnnnnnddddd")
// Data Processing - FP and SIMD - Conversion between floating point and integer // Data Processing - FP and SIMD - Conversion between floating point and integer
//INST(FCVTNS_float, "FCVTNS (scalar)", "z0011110yy100000000000nnnnnddddd") INST(FCVTNS_float, "FCVTNS (scalar)", "z0011110yy100000000000nnnnnddddd")
//INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd") INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd")
INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd") INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd")
INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd") INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd")
//INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd")
//INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd")
INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd")
//INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd")
//INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd")
//INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd")
//INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd")
INST(FCVTZS_float_int, "FCVTZS (scalar, integer)", "z0011110yy111000000000nnnnnddddd") INST(FCVTZS_float_int, "FCVTZS (scalar, integer)", "z0011110yy111000000000nnnnnddddd")
INST(FCVTZU_float_int, "FCVTZU (scalar, integer)", "z0011110yy111001000000nnnnnddddd") INST(FCVTZU_float_int, "FCVTZU (scalar, integer)", "z0011110yy111001000000nnnnnddddd")
//INST(FJCVTZS, "FJCVTZS", "0001111001111110000000nnnnnddddd") //INST(FJCVTZS, "FJCVTZS", "0001111001111110000000nnnnnddddd")

View file

@ -38,13 +38,13 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
IR::U32U64 intval; IR::U32U64 intval;
if (intsize == 32 && *fltsize == 32) { if (intsize == 32 && *fltsize == 32) {
intval = ir.FPSingleToS32(fltval, true, true); intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 32 && *fltsize == 64) { } else if (intsize == 32 && *fltsize == 64) {
intval = ir.FPDoubleToS32(fltval, true, true); intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 32) { } else if (intsize == 64 && *fltsize == 32) {
return InterpretThisInstruction(); intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 64) { } else if (intsize == 64 && *fltsize == 64) {
return InterpretThisInstruction(); intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }
@ -69,13 +69,13 @@ bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
IR::U32U64 intval; IR::U32U64 intval;
if (intsize == 32 && *fltsize == 32) { if (intsize == 32 && *fltsize == 32) {
intval = ir.FPSingleToU32(fltval, true, true); intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 32 && *fltsize == 64) { } else if (intsize == 32 && *fltsize == 64) {
intval = ir.FPDoubleToU32(fltval, true, true); intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 32) { } else if (intsize == 64 && *fltsize == 32) {
return InterpretThisInstruction(); intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
} else if (intsize == 64 && *fltsize == 64) { } else if (intsize == 64 && *fltsize == 64) {
return InterpretThisInstruction(); intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }

View file

@ -6,6 +6,7 @@
#include <boost/optional.hpp> #include <boost/optional.hpp>
#include "common/fp/rounding_mode.h"
#include "frontend/A64/translate/impl/impl.h" #include "frontend/A64/translate/impl/impl.h"
namespace Dynarmic::A64 { namespace Dynarmic::A64 {
@ -135,58 +136,98 @@ bool TranslatorVisitor::FMOV_float_gen(bool sf, Imm<2> type, Imm<1> rmode_0, Imm
return true; return true;
} }
bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { static bool FloaingPointConvertSignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) {
const size_t intsize = sf ? 64 : 32; const size_t intsize = sf ? 64 : 32;
const auto fltsize = GetDataSize(type); const auto fltsize = GetDataSize(type);
if (!fltsize || *fltsize == 16) { if (!fltsize || *fltsize == 16) {
return UnallocatedEncoding(); return v.UnallocatedEncoding();
} }
const IR::U32U64 fltval = V_scalar(*fltsize, Vn); const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn);
IR::U32U64 intval; IR::U32U64 intval;
if (intsize == 32 && *fltsize == 32) { if (intsize == 32 && *fltsize == 32) {
intval = ir.FPSingleToS32(fltval, true, true); intval = v.ir.FPSingleToFixedS32(fltval, 0, rounding_mode);
} else if (intsize == 32 && *fltsize == 64) { } else if (intsize == 32 && *fltsize == 64) {
intval = ir.FPDoubleToS32(fltval, true, true); intval = v.ir.FPDoubleToFixedS32(fltval, 0, rounding_mode);
} else if (intsize == 64 && *fltsize == 32) { } else if (intsize == 64 && *fltsize == 32) {
return InterpretThisInstruction(); intval = v.ir.FPSingleToFixedS64(fltval, 0, rounding_mode);
} else if (intsize == 64 && *fltsize == 64) { } else if (intsize == 64 && *fltsize == 64) {
return InterpretThisInstruction(); intval = v.ir.FPDoubleToFixedS64(fltval, 0, rounding_mode);
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }
X(intsize, Rd, intval); v.X(intsize, Rd, intval);
return true; return true;
} }
static bool FloaingPointConvertUnsignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) {
const size_t intsize = sf ? 64 : 32;
const auto fltsize = GetDataSize(type);
if (!fltsize || *fltsize == 16) {
return v.UnallocatedEncoding();
}
const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn);
IR::U32U64 intval;
if (intsize == 32 && *fltsize == 32) {
intval = v.ir.FPSingleToFixedU32(fltval, 0, rounding_mode);
} else if (intsize == 32 && *fltsize == 64) {
intval = v.ir.FPDoubleToFixedU32(fltval, 0, rounding_mode);
} else if (intsize == 64 && *fltsize == 32) {
intval = v.ir.FPSingleToFixedU64(fltval, 0, rounding_mode);
} else if (intsize == 64 && *fltsize == 64) {
intval = v.ir.FPDoubleToFixedU64(fltval, 0, rounding_mode);
} else {
UNREACHABLE();
}
v.X(intsize, Rd, intval);
return true;
}
bool TranslatorVisitor::FCVTNS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven);
}
bool TranslatorVisitor::FCVTNU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven);
}
bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero);
}
bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
const size_t intsize = sf ? 64 : 32; return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero);
const auto fltsize = GetDataSize(type);
if (!fltsize || *fltsize == 16) {
return UnallocatedEncoding();
} }
const IR::U32U64 fltval = V_scalar(*fltsize, Vn); bool TranslatorVisitor::FCVTAS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
IR::U32U64 intval; return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero);
if (intsize == 32 && *fltsize == 32) {
intval = ir.FPSingleToU32(fltval, true, true);
} else if (intsize == 32 && *fltsize == 64) {
intval = ir.FPDoubleToU32(fltval, true, true);
} else if (intsize == 64 && *fltsize == 32) {
return InterpretThisInstruction();
} else if (intsize == 64 && *fltsize == 64) {
return InterpretThisInstruction();
} else {
UNREACHABLE();
} }
X(intsize, Rd, intval); bool TranslatorVisitor::FCVTAU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero);
}
return true; bool TranslatorVisitor::FCVTPS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity);
}
bool TranslatorVisitor::FCVTPU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity);
}
bool TranslatorVisitor::FCVTMS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity);
}
bool TranslatorVisitor::FCVTMU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity);
} }
} // namespace Dynarmic::A64 } // namespace Dynarmic::A64

View file

@ -1451,24 +1451,44 @@ U64 IREmitter::FPSingleToDouble(const U32& a, bool fpscr_controlled) {
return Inst<U64>(Opcode::FPSingleToDouble, a); return Inst<U64>(Opcode::FPSingleToDouble, a);
} }
U32 IREmitter::FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled) { U32 IREmitter::FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fpscr_controlled); ASSERT(fbits <= 32);
return Inst<U32>(Opcode::FPSingleToS32, a, Imm1(round_towards_zero)); return Inst<U32>(Opcode::FPDoubleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
} }
U32 IREmitter::FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled) { U64 IREmitter::FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fpscr_controlled); ASSERT(fbits <= 64);
return Inst<U32>(Opcode::FPSingleToU32, a, Imm1(round_towards_zero)); return Inst<U64>(Opcode::FPDoubleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
} }
U32 IREmitter::FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled) { U32 IREmitter::FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fpscr_controlled); ASSERT(fbits <= 32);
return Inst<U32>(Opcode::FPDoubleToS32, a, Imm1(round_towards_zero)); return Inst<U32>(Opcode::FPDoubleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
} }
U32 IREmitter::FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled) { U64 IREmitter::FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fpscr_controlled); ASSERT(fbits <= 64);
return Inst<U32>(Opcode::FPDoubleToU32, a, Imm1(round_towards_zero)); return Inst<U64>(Opcode::FPDoubleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
}
U32 IREmitter::FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 32);
return Inst<U32>(Opcode::FPSingleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
}
U64 IREmitter::FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 64);
return Inst<U64>(Opcode::FPSingleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
}
U32 IREmitter::FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 32);
return Inst<U32>(Opcode::FPSingleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
}
U64 IREmitter::FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
ASSERT(fbits <= 64);
return Inst<U64>(Opcode::FPSingleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
} }
U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) { U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) {

View file

@ -12,6 +12,10 @@
#include "frontend/ir/terminal.h" #include "frontend/ir/terminal.h"
#include "frontend/ir/value.h" #include "frontend/ir/value.h"
namespace Dynarmic::FP {
enum class RoundingMode;
} // namespace Dynarmic::FP
// ARM JIT Microinstruction Intermediate Representation // ARM JIT Microinstruction Intermediate Representation
// //
// This intermediate representation is an SSA IR. It is designed primarily for analysis, // This intermediate representation is an SSA IR. It is designed primarily for analysis,
@ -264,10 +268,14 @@ public:
U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled);
U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled); U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled);
U64 FPSingleToDouble(const U32& a, bool fpscr_controlled); U64 FPSingleToDouble(const U32& a, bool fpscr_controlled);
U32 FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled); U32 FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled); U64 FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled); U32 FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled); U64 FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding);
U32 FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding);
U64 FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding);
U32 FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding);
U64 FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding);
U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled); U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled); U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled);

View file

@ -386,10 +386,14 @@ OPCODE(FPSub64, T::U64, T::U64, T::U
// Floating-point conversions // Floating-point conversions
OPCODE(FPSingleToDouble, T::U64, T::U32 ) OPCODE(FPSingleToDouble, T::U64, T::U32 )
OPCODE(FPDoubleToSingle, T::U32, T::U64 ) OPCODE(FPDoubleToSingle, T::U32, T::U64 )
OPCODE(FPSingleToU32, T::U32, T::U32, T::U1 ) OPCODE(FPDoubleToFixedS32, T::U32, T::U64, T::U8, T::U8 )
OPCODE(FPSingleToS32, T::U32, T::U32, T::U1 ) OPCODE(FPDoubleToFixedS64, T::U64, T::U64, T::U8, T::U8 )
OPCODE(FPDoubleToU32, T::U32, T::U64, T::U1 ) OPCODE(FPDoubleToFixedU32, T::U32, T::U64, T::U8, T::U8 )
OPCODE(FPDoubleToS32, T::U32, T::U64, T::U1 ) OPCODE(FPDoubleToFixedU64, T::U64, T::U64, T::U8, T::U8 )
OPCODE(FPSingleToFixedS32, T::U32, T::U32, T::U8, T::U8 )
OPCODE(FPSingleToFixedS64, T::U64, T::U32, T::U8, T::U8 )
OPCODE(FPSingleToFixedU32, T::U32, T::U32, T::U8, T::U8 )
OPCODE(FPSingleToFixedU64, T::U64, T::U32, T::U8, T::U8 )
OPCODE(FPU32ToSingle, T::U32, T::U32, T::U1 ) OPCODE(FPU32ToSingle, T::U32, T::U32, T::U1 )
OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 ) OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 )
OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 ) OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 )

View file

@ -29,7 +29,11 @@ add_executable(dynarmic_tests
A64/inst_gen.cpp A64/inst_gen.cpp
A64/inst_gen.h A64/inst_gen.h
A64/testenv.h A64/testenv.h
fp/FPToFixed.cpp
fp/mantissa_util_tests.cpp
fp/unpacked_tests.cpp
main.cpp main.cpp
mp.cpp
rand_int.h rand_int.h
) )

38
tests/fp/FPToFixed.cpp Normal file
View file

@ -0,0 +1,38 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <tuple>
#include <vector>
#include <catch.hpp>
#include "common/fp/fpsr.h"
#include "common/fp/op.h"
#include "rand_int.h"
using namespace Dynarmic;
using namespace Dynarmic::FP;
TEST_CASE("FPToFixed", "[fp]") {
const std::vector<std::tuple<u32, size_t, u64, u32>> test_cases {
{0x447A0000, 64, 0x000003E8, 0x00},
{0xC47A0000, 32, 0xFFFFFC18, 0x00},
{0x4479E000, 64, 0x000003E8, 0x10},
{0x50800000, 32, 0x7FFFFFFF, 0x01},
{0xD0800000, 32, 0x80000000, 0x01},
{0xCF000000, 32, 0x80000000, 0x00},
{0x80002B94, 64, 0x00000000, 0x10},
{0x80636D24, 64, 0x00000000, 0x10},
};
const FPCR fpcr;
for (auto [input, ibits, expected_output, expected_fpsr] : test_cases) {
FPSR fpsr;
const u64 output = FPToFixed<u32>(ibits, input, 0, false, fpcr, RoundingMode::ToNearest_TieEven, fpsr);
REQUIRE(output == expected_output);
REQUIRE(fpsr.Value() == expected_fpsr);
}
}

View file

@ -0,0 +1,63 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <tuple>
#include <vector>
#include <catch.hpp>
#include "common/fp/mantissa_util.h"
#include "common/safe_ops.h"
#include "rand_int.h"
using namespace Dynarmic;
using namespace Dynarmic::FP;
TEST_CASE("ResidualErrorOnRightShift", "[fp]") {
const std::vector<std::tuple<u32, int, ResidualError>> test_cases {
{0x00000001, 1, ResidualError::Half},
{0x00000002, 1, ResidualError::Zero},
{0x00000001, 2, ResidualError::LessThanHalf},
{0x00000002, 2, ResidualError::Half},
{0x00000003, 2, ResidualError::GreaterThanHalf},
{0x00000004, 2, ResidualError::Zero},
{0x00000005, 2, ResidualError::LessThanHalf},
{0x00000006, 2, ResidualError::Half},
{0x00000007, 2, ResidualError::GreaterThanHalf},
};
for (auto [mantissa, shift, expected_result] : test_cases) {
const ResidualError result = ResidualErrorOnRightShift(mantissa, shift);
REQUIRE(result == expected_result);
}
}
TEST_CASE("ResidualErrorOnRightShift Randomized", "[fp]") {
for (size_t test = 0; test < 100000; test++) {
const u32 mantissa = RandInt<u32>(0, 0xFFFFFFFF);
const int shift = RandInt<int>(-60, 60);
const ResidualError result = ResidualErrorOnRightShift(mantissa, shift);
const u64 calculated_error = Safe::ArithmeticShiftRightDouble(Common::SignExtend<32, u64>(mantissa), u64(0), shift);
const ResidualError expected_result = [&]{
constexpr u64 half_error = 0x8000'0000'0000'0000ull;
if (calculated_error == 0) {
return ResidualError::Zero;
}
if (calculated_error < half_error) {
return ResidualError::LessThanHalf;
}
if (calculated_error == half_error) {
return ResidualError::Half;
}
return ResidualError::GreaterThanHalf;
}();
INFO(std::hex << "mantissa " << mantissa << " shift " << shift << " calculated_error " << calculated_error);
REQUIRE(result == expected_result);
}
}

View file

@ -0,0 +1,71 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <catch.hpp>
#include "common/fp/unpacked.h"
#include "rand_int.h"
using namespace Dynarmic;
using namespace Dynarmic::FP;
TEST_CASE("FPUnpack Tests", "[fp]") {
const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked<u64>>, u32>> test_cases {
{0x00000000, {FPType::Zero, false, {false, 0, 0}}, 0},
{0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0},
{0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0},
{0x7F800001, {FPType::SNaN, false, {false, 0, 0}}, 0},
{0xFF800001, {FPType::SNaN, true, {true, 0, 0}}, 0},
{0x7FC00001, {FPType::QNaN, false, {false, 0, 0}}, 0},
{0xFFC00001, {FPType::QNaN, true, {true, 0, 0}}, 0},
{0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
{0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
};
const FPCR fpcr;
for (const auto& [input, expected_output, expected_fpsr] : test_cases) {
FPSR fpsr;
const auto output = FPUnpack<u32>(input, fpcr, fpsr);
INFO("Input: " << std::hex << input);
REQUIRE(output == expected_output);
REQUIRE(fpsr.Value() == expected_fpsr);
}
}
TEST_CASE("FPRound Tests", "[fp]") {
const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked<u64>>, u32>> test_cases {
{0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0x14},
{0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0x14},
{0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
{0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
{0x3F800000, {FPType::Nonzero, false, {false, -28, 0xFFFFFFF}}, 0x10}, // rounds to 1.0
};
const FPCR fpcr;
for (const auto& [expected_output, input, expected_fpsr] : test_cases) {
FPSR fpsr;
const auto output = FPRound<u32>(std::get<2>(input), fpcr, fpsr);
INFO("Expected Output: " << std::hex << expected_output);
REQUIRE(output == expected_output);
REQUIRE(fpsr.Value() == expected_fpsr);
}
}
TEST_CASE("FPUnpack<->FPRound Round-trip Tests", "[fp]") {
const FPCR fpcr;
for (size_t count = 0; count < 100000; count++) {
FPSR fpsr;
const u32 input = RandInt(0, 1) == 0 ? RandInt<u32>(0x00000001, 0x7F800000) : RandInt<u32>(0x80000001, 0xFF800000);
const auto intermediate = std::get<2>(FPUnpack<u32>(input, fpcr, fpsr));
const u32 output = FPRound<u32>(intermediate, fpcr, fpsr);
INFO("Count: " << count);
INFO("Intermediate Values: " << std::hex << intermediate.sign << ';' << intermediate.exponent << ';' << intermediate.mantissa);
REQUIRE(input == output);
}
}

27
tests/mp.cpp Normal file
View file

@ -0,0 +1,27 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <type_traits>
#include "common/mp/cartesian_product.h"
using namespace Dynarmic::Common::mp;
static_assert(
std::is_same_v<
cartesian_product<list<int, bool>, list<double, float>, list<char, unsigned>>,
list<
list<int, double, char>,
list<int, double, unsigned>,
list<int, float, char>,
list<int, float, unsigned>,
list<bool, double, char>,
list<bool, double, unsigned>,
list<bool, float, char>,
list<bool, float, unsigned>
>
>
);