Improve documentation of unsafe optimizations

This commit is contained in:
MerryMage 2020-07-12 12:38:22 +01:00
parent 82417da780
commit d05d95c132
8 changed files with 69 additions and 17 deletions

View file

@ -109,12 +109,21 @@ struct UserConfig {
/// - Block linking optimizations
/// - RSB optimizations
/// This is intended to be used for debugging.
OptimizationFlag optimizations = all_optimizations;
OptimizationFlag optimizations = all_safe_optimizations;
bool HasOptimization(OptimizationFlag f) const {
if (!unsafe_optimizations) {
f &= all_safe_optimizations;
}
return (f & optimizations) != no_optimizations;
}
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
/// For safety, in order to enable unsafe optimizations you have to set BOTH this flag
/// AND the appropriate flag bits above.
/// The prefered and tested mode for this library is with unsafe optimizations disabled.
bool unsafe_optimizations = false;
// Page Table
// The page table is used for faster memory access. If an entry in the table is nullptr,
// the JIT will fallback to calling the MemoryRead*/MemoryWrite* callbacks.

View file

@ -132,13 +132,19 @@ struct UserConfig {
/// - Block linking optimizations
/// - RSB optimizations
/// This is intended to be used for debugging.
OptimizationFlag optimizations = all_optimizations;
OptimizationFlag optimizations = all_safe_optimizations;
bool HasOptimization(OptimizationFlag f) const {
if (!unsafe_optimizations) {
f &= all_safe_optimizations;
}
return (f & optimizations) != no_optimizations;
}
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
/// For safety, in order to enable unsafe optimizations you have to set BOTH this flag
/// AND the appropriate flag bits above.
/// The prefered and tested mode for this library is with unsafe optimizations disabled.
bool unsafe_optimizations = false;
/// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any

View file

@ -10,16 +10,39 @@
namespace Dynarmic {
enum class OptimizationFlag : std::uint32_t {
BlockLinking = 0x01,
ReturnStackBuffer = 0x02,
FastDispatch = 0x04,
GetSetElimination = 0x08,
ConstProp = 0x10,
MiscIROpt = 0x20,
/// This optimization avoids dispatcher lookups by allowing emitted basic blocks to jump
/// directly to other basic blocks if the destination PC is predictable at JIT-time.
/// This is a safe optimization.
BlockLinking = 0x00000001,
/// This optimization avoids dispatcher lookups by emulating a return stack buffer. This
/// allows for function returns and syscall returns to be predicted at runtime.
/// This is a safe optimization.
ReturnStackBuffer = 0x00000002,
/// This optimization enables a two-tiered dispatch system.
/// A fast dispatcher (written in assembly) first does a look-up in a small MRU cache.
/// If this fails, it falls back to the usual slower dispatcher.
/// This is a safe optimization.
FastDispatch = 0x00000004,
/// This is an IR optimization. This optimization eliminates unnecessary emulated CPU state
/// context lookups.
/// This is a safe optimization.
GetSetElimination = 0x00000008,
/// This is an IR optimization. This optimization does constant propagation.
/// This is a safe optimization.
ConstProp = 0x00000010,
/// This is enables miscellaneous safe IR optimizations.
MiscIROpt = 0x00000020,
/// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations.
/// This unfuses fused instructions to improve performance on host CPUs without FMA support.
Unsafe_UnfuseFMA = 0x00010000,
/// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions.
/// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows.
Unsafe_ReducedErrorFP = 0x00020000,
};
constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0);
constexpr OptimizationFlag all_optimizations = static_cast<OptimizationFlag>(~std::uint32_t(0));
constexpr OptimizationFlag all_safe_optimizations = static_cast<OptimizationFlag>(0x0000FFFF);
constexpr OptimizationFlag operator~(OptimizationFlag f) {
return static_cast<OptimizationFlag>(~static_cast<std::uint32_t>(f));

View file

@ -32,6 +32,10 @@ struct A32EmitContext final : public EmitContext {
bool IsSingleStep() const;
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
bool HasOptimization(OptimizationFlag flag) const override {
return conf.HasOptimization(flag);
}
const A32::UserConfig& conf;
};

View file

@ -29,7 +29,9 @@ struct A64EmitContext final : public EmitContext {
bool IsSingleStep() const;
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
bool UnsafeOptimizations() const override { return conf.unsafe_optimizations; }
bool HasOptimization(OptimizationFlag flag) const override {
return conf.HasOptimization(flag);
}
const A64::UserConfig& conf;
};

View file

@ -28,6 +28,10 @@ class Block;
class Inst;
} // namespace Dynarmic::IR
namespace Dynarmic {
enum class OptimizationFlag : u32;
} // namespace Dynarmic
namespace Dynarmic::Backend::X64 {
class BlockOfCode;
@ -51,7 +55,7 @@ struct EmitContext {
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
virtual bool UnsafeOptimizations() const { return false; }
virtual bool HasOptimization(OptimizationFlag flag) const = 0;
RegAlloc& reg_alloc;
IR::Block& block;

View file

@ -14,6 +14,8 @@
#include <mp/typelist/list.h>
#include <mp/typelist/lower_to_tuple.h>
#include <dynarmic/optimization_flags.h>
#include "backend/x64/abi.h"
#include "backend/x64/block_of_code.h"
#include "backend/x64/emit_x64.h"
@ -638,7 +640,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
return;
}
if (ctx.UnsafeOptimizations()) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
@ -743,7 +745,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
using FPT = mp::unsigned_integer_of_size<fsize>;
if constexpr (fsize != 16) {
if (ctx.UnsafeOptimizations()) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
@ -939,7 +941,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
using FPT = mp::unsigned_integer_of_size<fsize>;
if constexpr (fsize != 16) {
if (ctx.UnsafeOptimizations()) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();

View file

@ -17,6 +17,8 @@
#include <mp/typelist/list.h>
#include <mp/typelist/lower_to_tuple.h>
#include <dynarmic/optimization_flags.h>
#include "backend/x64/abi.h"
#include "backend/x64/block_of_code.h"
#include "backend/x64/emit_x64.h"
@ -1022,7 +1024,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
return;
}
if (ctx.UnsafeOptimizations()) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
@ -1183,7 +1185,7 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
using FPT = mp::unsigned_integer_of_size<fsize>;
if constexpr (fsize != 16) {
if (ctx.UnsafeOptimizations()) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
@ -1363,7 +1365,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
using FPT = mp::unsigned_integer_of_size<fsize>;
if constexpr (fsize != 16) {
if (ctx.UnsafeOptimizations()) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();