Improve documentation of unsafe optimizations
This commit is contained in:
parent
82417da780
commit
d05d95c132
8 changed files with 69 additions and 17 deletions
|
@ -109,12 +109,21 @@ struct UserConfig {
|
|||
/// - Block linking optimizations
|
||||
/// - RSB optimizations
|
||||
/// This is intended to be used for debugging.
|
||||
OptimizationFlag optimizations = all_optimizations;
|
||||
OptimizationFlag optimizations = all_safe_optimizations;
|
||||
|
||||
bool HasOptimization(OptimizationFlag f) const {
|
||||
if (!unsafe_optimizations) {
|
||||
f &= all_safe_optimizations;
|
||||
}
|
||||
return (f & optimizations) != no_optimizations;
|
||||
}
|
||||
|
||||
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
|
||||
/// For safety, in order to enable unsafe optimizations you have to set BOTH this flag
|
||||
/// AND the appropriate flag bits above.
|
||||
/// The prefered and tested mode for this library is with unsafe optimizations disabled.
|
||||
bool unsafe_optimizations = false;
|
||||
|
||||
// Page Table
|
||||
// The page table is used for faster memory access. If an entry in the table is nullptr,
|
||||
// the JIT will fallback to calling the MemoryRead*/MemoryWrite* callbacks.
|
||||
|
|
|
@ -132,13 +132,19 @@ struct UserConfig {
|
|||
/// - Block linking optimizations
|
||||
/// - RSB optimizations
|
||||
/// This is intended to be used for debugging.
|
||||
OptimizationFlag optimizations = all_optimizations;
|
||||
OptimizationFlag optimizations = all_safe_optimizations;
|
||||
|
||||
bool HasOptimization(OptimizationFlag f) const {
|
||||
if (!unsafe_optimizations) {
|
||||
f &= all_safe_optimizations;
|
||||
}
|
||||
return (f & optimizations) != no_optimizations;
|
||||
}
|
||||
|
||||
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
|
||||
/// For safety, in order to enable unsafe optimizations you have to set BOTH this flag
|
||||
/// AND the appropriate flag bits above.
|
||||
/// The prefered and tested mode for this library is with unsafe optimizations disabled.
|
||||
bool unsafe_optimizations = false;
|
||||
|
||||
/// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any
|
||||
|
|
|
@ -10,16 +10,39 @@
|
|||
namespace Dynarmic {
|
||||
|
||||
enum class OptimizationFlag : std::uint32_t {
|
||||
BlockLinking = 0x01,
|
||||
ReturnStackBuffer = 0x02,
|
||||
FastDispatch = 0x04,
|
||||
GetSetElimination = 0x08,
|
||||
ConstProp = 0x10,
|
||||
MiscIROpt = 0x20,
|
||||
/// This optimization avoids dispatcher lookups by allowing emitted basic blocks to jump
|
||||
/// directly to other basic blocks if the destination PC is predictable at JIT-time.
|
||||
/// This is a safe optimization.
|
||||
BlockLinking = 0x00000001,
|
||||
/// This optimization avoids dispatcher lookups by emulating a return stack buffer. This
|
||||
/// allows for function returns and syscall returns to be predicted at runtime.
|
||||
/// This is a safe optimization.
|
||||
ReturnStackBuffer = 0x00000002,
|
||||
/// This optimization enables a two-tiered dispatch system.
|
||||
/// A fast dispatcher (written in assembly) first does a look-up in a small MRU cache.
|
||||
/// If this fails, it falls back to the usual slower dispatcher.
|
||||
/// This is a safe optimization.
|
||||
FastDispatch = 0x00000004,
|
||||
/// This is an IR optimization. This optimization eliminates unnecessary emulated CPU state
|
||||
/// context lookups.
|
||||
/// This is a safe optimization.
|
||||
GetSetElimination = 0x00000008,
|
||||
/// This is an IR optimization. This optimization does constant propagation.
|
||||
/// This is a safe optimization.
|
||||
ConstProp = 0x00000010,
|
||||
/// This is enables miscellaneous safe IR optimizations.
|
||||
MiscIROpt = 0x00000020,
|
||||
|
||||
/// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations.
|
||||
/// This unfuses fused instructions to improve performance on host CPUs without FMA support.
|
||||
Unsafe_UnfuseFMA = 0x00010000,
|
||||
/// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions.
|
||||
/// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows.
|
||||
Unsafe_ReducedErrorFP = 0x00020000,
|
||||
};
|
||||
|
||||
constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0);
|
||||
constexpr OptimizationFlag all_optimizations = static_cast<OptimizationFlag>(~std::uint32_t(0));
|
||||
constexpr OptimizationFlag all_safe_optimizations = static_cast<OptimizationFlag>(0x0000FFFF);
|
||||
|
||||
constexpr OptimizationFlag operator~(OptimizationFlag f) {
|
||||
return static_cast<OptimizationFlag>(~static_cast<std::uint32_t>(f));
|
||||
|
|
|
@ -32,6 +32,10 @@ struct A32EmitContext final : public EmitContext {
|
|||
bool IsSingleStep() const;
|
||||
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
|
||||
|
||||
bool HasOptimization(OptimizationFlag flag) const override {
|
||||
return conf.HasOptimization(flag);
|
||||
}
|
||||
|
||||
const A32::UserConfig& conf;
|
||||
};
|
||||
|
||||
|
|
|
@ -29,7 +29,9 @@ struct A64EmitContext final : public EmitContext {
|
|||
bool IsSingleStep() const;
|
||||
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
|
||||
|
||||
bool UnsafeOptimizations() const override { return conf.unsafe_optimizations; }
|
||||
bool HasOptimization(OptimizationFlag flag) const override {
|
||||
return conf.HasOptimization(flag);
|
||||
}
|
||||
|
||||
const A64::UserConfig& conf;
|
||||
};
|
||||
|
|
|
@ -28,6 +28,10 @@ class Block;
|
|||
class Inst;
|
||||
} // namespace Dynarmic::IR
|
||||
|
||||
namespace Dynarmic {
|
||||
enum class OptimizationFlag : u32;
|
||||
} // namespace Dynarmic
|
||||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
class BlockOfCode;
|
||||
|
@ -51,7 +55,7 @@ struct EmitContext {
|
|||
|
||||
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
||||
|
||||
virtual bool UnsafeOptimizations() const { return false; }
|
||||
virtual bool HasOptimization(OptimizationFlag flag) const = 0;
|
||||
|
||||
RegAlloc& reg_alloc;
|
||||
IR::Block& block;
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#include <mp/typelist/list.h>
|
||||
#include <mp/typelist/lower_to_tuple.h>
|
||||
|
||||
#include <dynarmic/optimization_flags.h>
|
||||
|
||||
#include "backend/x64/abi.h"
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
|
@ -638,7 +640,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (ctx.UnsafeOptimizations()) {
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
@ -743,7 +745,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||
|
||||
if constexpr (fsize != 16) {
|
||||
if (ctx.UnsafeOptimizations()) {
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -939,7 +941,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||
|
||||
if constexpr (fsize != 16) {
|
||||
if (ctx.UnsafeOptimizations()) {
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#include <mp/typelist/list.h>
|
||||
#include <mp/typelist/lower_to_tuple.h>
|
||||
|
||||
#include <dynarmic/optimization_flags.h>
|
||||
|
||||
#include "backend/x64/abi.h"
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
|
@ -1022,7 +1024,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (ctx.UnsafeOptimizations()) {
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
@ -1183,7 +1185,7 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
|||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||
|
||||
if constexpr (fsize != 16) {
|
||||
if (ctx.UnsafeOptimizations()) {
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -1363,7 +1365,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
|||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||
|
||||
if constexpr (fsize != 16) {
|
||||
if (ctx.UnsafeOptimizations()) {
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
|
|
Loading…
Reference in a new issue