Improve documentation of unsafe optimizations
This commit is contained in:
parent
82417da780
commit
d05d95c132
8 changed files with 69 additions and 17 deletions
|
@ -109,12 +109,21 @@ struct UserConfig {
|
||||||
/// - Block linking optimizations
|
/// - Block linking optimizations
|
||||||
/// - RSB optimizations
|
/// - RSB optimizations
|
||||||
/// This is intended to be used for debugging.
|
/// This is intended to be used for debugging.
|
||||||
OptimizationFlag optimizations = all_optimizations;
|
OptimizationFlag optimizations = all_safe_optimizations;
|
||||||
|
|
||||||
bool HasOptimization(OptimizationFlag f) const {
|
bool HasOptimization(OptimizationFlag f) const {
|
||||||
|
if (!unsafe_optimizations) {
|
||||||
|
f &= all_safe_optimizations;
|
||||||
|
}
|
||||||
return (f & optimizations) != no_optimizations;
|
return (f & optimizations) != no_optimizations;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
|
||||||
|
/// For safety, in order to enable unsafe optimizations you have to set BOTH this flag
|
||||||
|
/// AND the appropriate flag bits above.
|
||||||
|
/// The prefered and tested mode for this library is with unsafe optimizations disabled.
|
||||||
|
bool unsafe_optimizations = false;
|
||||||
|
|
||||||
// Page Table
|
// Page Table
|
||||||
// The page table is used for faster memory access. If an entry in the table is nullptr,
|
// The page table is used for faster memory access. If an entry in the table is nullptr,
|
||||||
// the JIT will fallback to calling the MemoryRead*/MemoryWrite* callbacks.
|
// the JIT will fallback to calling the MemoryRead*/MemoryWrite* callbacks.
|
||||||
|
|
|
@ -132,13 +132,19 @@ struct UserConfig {
|
||||||
/// - Block linking optimizations
|
/// - Block linking optimizations
|
||||||
/// - RSB optimizations
|
/// - RSB optimizations
|
||||||
/// This is intended to be used for debugging.
|
/// This is intended to be used for debugging.
|
||||||
OptimizationFlag optimizations = all_optimizations;
|
OptimizationFlag optimizations = all_safe_optimizations;
|
||||||
|
|
||||||
bool HasOptimization(OptimizationFlag f) const {
|
bool HasOptimization(OptimizationFlag f) const {
|
||||||
|
if (!unsafe_optimizations) {
|
||||||
|
f &= all_safe_optimizations;
|
||||||
|
}
|
||||||
return (f & optimizations) != no_optimizations;
|
return (f & optimizations) != no_optimizations;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
|
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
|
||||||
|
/// For safety, in order to enable unsafe optimizations you have to set BOTH this flag
|
||||||
|
/// AND the appropriate flag bits above.
|
||||||
|
/// The prefered and tested mode for this library is with unsafe optimizations disabled.
|
||||||
bool unsafe_optimizations = false;
|
bool unsafe_optimizations = false;
|
||||||
|
|
||||||
/// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any
|
/// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any
|
||||||
|
|
|
@ -10,16 +10,39 @@
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
|
|
||||||
enum class OptimizationFlag : std::uint32_t {
|
enum class OptimizationFlag : std::uint32_t {
|
||||||
BlockLinking = 0x01,
|
/// This optimization avoids dispatcher lookups by allowing emitted basic blocks to jump
|
||||||
ReturnStackBuffer = 0x02,
|
/// directly to other basic blocks if the destination PC is predictable at JIT-time.
|
||||||
FastDispatch = 0x04,
|
/// This is a safe optimization.
|
||||||
GetSetElimination = 0x08,
|
BlockLinking = 0x00000001,
|
||||||
ConstProp = 0x10,
|
/// This optimization avoids dispatcher lookups by emulating a return stack buffer. This
|
||||||
MiscIROpt = 0x20,
|
/// allows for function returns and syscall returns to be predicted at runtime.
|
||||||
|
/// This is a safe optimization.
|
||||||
|
ReturnStackBuffer = 0x00000002,
|
||||||
|
/// This optimization enables a two-tiered dispatch system.
|
||||||
|
/// A fast dispatcher (written in assembly) first does a look-up in a small MRU cache.
|
||||||
|
/// If this fails, it falls back to the usual slower dispatcher.
|
||||||
|
/// This is a safe optimization.
|
||||||
|
FastDispatch = 0x00000004,
|
||||||
|
/// This is an IR optimization. This optimization eliminates unnecessary emulated CPU state
|
||||||
|
/// context lookups.
|
||||||
|
/// This is a safe optimization.
|
||||||
|
GetSetElimination = 0x00000008,
|
||||||
|
/// This is an IR optimization. This optimization does constant propagation.
|
||||||
|
/// This is a safe optimization.
|
||||||
|
ConstProp = 0x00000010,
|
||||||
|
/// This is enables miscellaneous safe IR optimizations.
|
||||||
|
MiscIROpt = 0x00000020,
|
||||||
|
|
||||||
|
/// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations.
|
||||||
|
/// This unfuses fused instructions to improve performance on host CPUs without FMA support.
|
||||||
|
Unsafe_UnfuseFMA = 0x00010000,
|
||||||
|
/// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions.
|
||||||
|
/// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows.
|
||||||
|
Unsafe_ReducedErrorFP = 0x00020000,
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0);
|
constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0);
|
||||||
constexpr OptimizationFlag all_optimizations = static_cast<OptimizationFlag>(~std::uint32_t(0));
|
constexpr OptimizationFlag all_safe_optimizations = static_cast<OptimizationFlag>(0x0000FFFF);
|
||||||
|
|
||||||
constexpr OptimizationFlag operator~(OptimizationFlag f) {
|
constexpr OptimizationFlag operator~(OptimizationFlag f) {
|
||||||
return static_cast<OptimizationFlag>(~static_cast<std::uint32_t>(f));
|
return static_cast<OptimizationFlag>(~static_cast<std::uint32_t>(f));
|
||||||
|
|
|
@ -32,6 +32,10 @@ struct A32EmitContext final : public EmitContext {
|
||||||
bool IsSingleStep() const;
|
bool IsSingleStep() const;
|
||||||
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
|
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
|
||||||
|
|
||||||
|
bool HasOptimization(OptimizationFlag flag) const override {
|
||||||
|
return conf.HasOptimization(flag);
|
||||||
|
}
|
||||||
|
|
||||||
const A32::UserConfig& conf;
|
const A32::UserConfig& conf;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,9 @@ struct A64EmitContext final : public EmitContext {
|
||||||
bool IsSingleStep() const;
|
bool IsSingleStep() const;
|
||||||
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
|
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
|
||||||
|
|
||||||
bool UnsafeOptimizations() const override { return conf.unsafe_optimizations; }
|
bool HasOptimization(OptimizationFlag flag) const override {
|
||||||
|
return conf.HasOptimization(flag);
|
||||||
|
}
|
||||||
|
|
||||||
const A64::UserConfig& conf;
|
const A64::UserConfig& conf;
|
||||||
};
|
};
|
||||||
|
|
|
@ -28,6 +28,10 @@ class Block;
|
||||||
class Inst;
|
class Inst;
|
||||||
} // namespace Dynarmic::IR
|
} // namespace Dynarmic::IR
|
||||||
|
|
||||||
|
namespace Dynarmic {
|
||||||
|
enum class OptimizationFlag : u32;
|
||||||
|
} // namespace Dynarmic
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
class BlockOfCode;
|
class BlockOfCode;
|
||||||
|
@ -51,7 +55,7 @@ struct EmitContext {
|
||||||
|
|
||||||
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
||||||
|
|
||||||
virtual bool UnsafeOptimizations() const { return false; }
|
virtual bool HasOptimization(OptimizationFlag flag) const = 0;
|
||||||
|
|
||||||
RegAlloc& reg_alloc;
|
RegAlloc& reg_alloc;
|
||||||
IR::Block& block;
|
IR::Block& block;
|
||||||
|
|
|
@ -14,6 +14,8 @@
|
||||||
#include <mp/typelist/list.h>
|
#include <mp/typelist/list.h>
|
||||||
#include <mp/typelist/lower_to_tuple.h>
|
#include <mp/typelist/lower_to_tuple.h>
|
||||||
|
|
||||||
|
#include <dynarmic/optimization_flags.h>
|
||||||
|
|
||||||
#include "backend/x64/abi.h"
|
#include "backend/x64/abi.h"
|
||||||
#include "backend/x64/block_of_code.h"
|
#include "backend/x64/block_of_code.h"
|
||||||
#include "backend/x64/emit_x64.h"
|
#include "backend/x64/emit_x64.h"
|
||||||
|
@ -638,7 +640,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.UnsafeOptimizations()) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
@ -743,7 +745,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
if (ctx.UnsafeOptimizations()) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -939,7 +941,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
if (ctx.UnsafeOptimizations()) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
#include <mp/typelist/list.h>
|
#include <mp/typelist/list.h>
|
||||||
#include <mp/typelist/lower_to_tuple.h>
|
#include <mp/typelist/lower_to_tuple.h>
|
||||||
|
|
||||||
|
#include <dynarmic/optimization_flags.h>
|
||||||
|
|
||||||
#include "backend/x64/abi.h"
|
#include "backend/x64/abi.h"
|
||||||
#include "backend/x64/block_of_code.h"
|
#include "backend/x64/block_of_code.h"
|
||||||
#include "backend/x64/emit_x64.h"
|
#include "backend/x64/emit_x64.h"
|
||||||
|
@ -1022,7 +1024,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.UnsafeOptimizations()) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
@ -1183,7 +1185,7 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
if (ctx.UnsafeOptimizations()) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -1363,7 +1365,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
if (ctx.UnsafeOptimizations()) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
Loading…
Reference in a new issue