diff --git a/include/dynarmic/A32/config.h b/include/dynarmic/A32/config.h index 0c0919fc..a0091841 100644 --- a/include/dynarmic/A32/config.h +++ b/include/dynarmic/A32/config.h @@ -109,12 +109,21 @@ struct UserConfig { /// - Block linking optimizations /// - RSB optimizations /// This is intended to be used for debugging. - OptimizationFlag optimizations = all_optimizations; + OptimizationFlag optimizations = all_safe_optimizations; bool HasOptimization(OptimizationFlag f) const { + if (!unsafe_optimizations) { + f &= all_safe_optimizations; + } return (f & optimizations) != no_optimizations; } + /// This enables unsafe optimizations that reduce emulation accuracy in favour of speed. + /// For safety, in order to enable unsafe optimizations you have to set BOTH this flag + /// AND the appropriate flag bits above. + /// The prefered and tested mode for this library is with unsafe optimizations disabled. + bool unsafe_optimizations = false; + // Page Table // The page table is used for faster memory access. If an entry in the table is nullptr, // the JIT will fallback to calling the MemoryRead*/MemoryWrite* callbacks. diff --git a/include/dynarmic/A64/config.h b/include/dynarmic/A64/config.h index 3c413efb..6c607717 100644 --- a/include/dynarmic/A64/config.h +++ b/include/dynarmic/A64/config.h @@ -132,13 +132,19 @@ struct UserConfig { /// - Block linking optimizations /// - RSB optimizations /// This is intended to be used for debugging. - OptimizationFlag optimizations = all_optimizations; + OptimizationFlag optimizations = all_safe_optimizations; bool HasOptimization(OptimizationFlag f) const { + if (!unsafe_optimizations) { + f &= all_safe_optimizations; + } return (f & optimizations) != no_optimizations; } /// This enables unsafe optimizations that reduce emulation accuracy in favour of speed. + /// For safety, in order to enable unsafe optimizations you have to set BOTH this flag + /// AND the appropriate flag bits above. + /// The prefered and tested mode for this library is with unsafe optimizations disabled. bool unsafe_optimizations = false; /// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any diff --git a/include/dynarmic/optimization_flags.h b/include/dynarmic/optimization_flags.h index 5b516b7f..29b35425 100644 --- a/include/dynarmic/optimization_flags.h +++ b/include/dynarmic/optimization_flags.h @@ -10,16 +10,39 @@ namespace Dynarmic { enum class OptimizationFlag : std::uint32_t { - BlockLinking = 0x01, - ReturnStackBuffer = 0x02, - FastDispatch = 0x04, - GetSetElimination = 0x08, - ConstProp = 0x10, - MiscIROpt = 0x20, + /// This optimization avoids dispatcher lookups by allowing emitted basic blocks to jump + /// directly to other basic blocks if the destination PC is predictable at JIT-time. + /// This is a safe optimization. + BlockLinking = 0x00000001, + /// This optimization avoids dispatcher lookups by emulating a return stack buffer. This + /// allows for function returns and syscall returns to be predicted at runtime. + /// This is a safe optimization. + ReturnStackBuffer = 0x00000002, + /// This optimization enables a two-tiered dispatch system. + /// A fast dispatcher (written in assembly) first does a look-up in a small MRU cache. + /// If this fails, it falls back to the usual slower dispatcher. + /// This is a safe optimization. + FastDispatch = 0x00000004, + /// This is an IR optimization. This optimization eliminates unnecessary emulated CPU state + /// context lookups. + /// This is a safe optimization. + GetSetElimination = 0x00000008, + /// This is an IR optimization. This optimization does constant propagation. + /// This is a safe optimization. + ConstProp = 0x00000010, + /// This is enables miscellaneous safe IR optimizations. + MiscIROpt = 0x00000020, + + /// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations. + /// This unfuses fused instructions to improve performance on host CPUs without FMA support. + Unsafe_UnfuseFMA = 0x00010000, + /// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions. + /// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows. + Unsafe_ReducedErrorFP = 0x00020000, }; constexpr OptimizationFlag no_optimizations = static_cast(0); -constexpr OptimizationFlag all_optimizations = static_cast(~std::uint32_t(0)); +constexpr OptimizationFlag all_safe_optimizations = static_cast(0x0000FFFF); constexpr OptimizationFlag operator~(OptimizationFlag f) { return static_cast(~static_cast(f)); diff --git a/src/backend/x64/a32_emit_x64.h b/src/backend/x64/a32_emit_x64.h index db4bc665..a946e49f 100644 --- a/src/backend/x64/a32_emit_x64.h +++ b/src/backend/x64/a32_emit_x64.h @@ -32,6 +32,10 @@ struct A32EmitContext final : public EmitContext { bool IsSingleStep() const; FP::FPCR FPCR(bool fpcr_controlled = true) const override; + bool HasOptimization(OptimizationFlag flag) const override { + return conf.HasOptimization(flag); + } + const A32::UserConfig& conf; }; diff --git a/src/backend/x64/a64_emit_x64.h b/src/backend/x64/a64_emit_x64.h index 0730e6b8..f7db0243 100644 --- a/src/backend/x64/a64_emit_x64.h +++ b/src/backend/x64/a64_emit_x64.h @@ -29,7 +29,9 @@ struct A64EmitContext final : public EmitContext { bool IsSingleStep() const; FP::FPCR FPCR(bool fpcr_controlled = true) const override; - bool UnsafeOptimizations() const override { return conf.unsafe_optimizations; } + bool HasOptimization(OptimizationFlag flag) const override { + return conf.HasOptimization(flag); + } const A64::UserConfig& conf; }; diff --git a/src/backend/x64/emit_x64.h b/src/backend/x64/emit_x64.h index 2266c032..7f784b47 100644 --- a/src/backend/x64/emit_x64.h +++ b/src/backend/x64/emit_x64.h @@ -28,6 +28,10 @@ class Block; class Inst; } // namespace Dynarmic::IR +namespace Dynarmic { +enum class OptimizationFlag : u32; +} // namespace Dynarmic + namespace Dynarmic::Backend::X64 { class BlockOfCode; @@ -51,7 +55,7 @@ struct EmitContext { virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0; - virtual bool UnsafeOptimizations() const { return false; } + virtual bool HasOptimization(OptimizationFlag flag) const = 0; RegAlloc& reg_alloc; IR::Block& block; diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index 5efd5258..67142359 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -14,6 +14,8 @@ #include #include +#include + #include "backend/x64/abi.h" #include "backend/x64/block_of_code.h" #include "backend/x64/emit_x64.h" @@ -638,7 +640,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { return; } - if (ctx.UnsafeOptimizations()) { + if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -743,7 +745,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i using FPT = mp::unsigned_integer_of_size; if constexpr (fsize != 16) { - if (ctx.UnsafeOptimizations()) { + if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); @@ -939,7 +941,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i using FPT = mp::unsigned_integer_of_size; if constexpr (fsize != 16) { - if (ctx.UnsafeOptimizations()) { + if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index f5fe2c84..d9b4f0a0 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -17,6 +17,8 @@ #include #include +#include + #include "backend/x64/abi.h" #include "backend/x64/block_of_code.h" #include "backend/x64/emit_x64.h" @@ -1022,7 +1024,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { return; } - if (ctx.UnsafeOptimizations()) { + if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -1183,7 +1185,7 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins using FPT = mp::unsigned_integer_of_size; if constexpr (fsize != 16) { - if (ctx.UnsafeOptimizations()) { + if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); @@ -1363,7 +1365,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins using FPT = mp::unsigned_integer_of_size; if constexpr (fsize != 16) { - if (ctx.UnsafeOptimizations()) { + if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();