Add .clang-format file

Using clang-format version 12.0.0
This commit is contained in:
MerryMage 2021-05-22 14:51:20 +01:00
parent 51b155df92
commit 53493b2024
315 changed files with 3178 additions and 2660 deletions

218
.clang-format Normal file
View file

@ -0,0 +1,218 @@
---
Language: Cpp
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: None
AlignConsecutiveAssignments: None
AlignConsecutiveBitFields: None
AlignConsecutiveDeclarations: None
AlignConsecutiveMacros: None
AlignEscapedNewlines: Right
AlignOperands: AlignAfterOperator
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortEnumsOnASingleLine: true
AllowShortBlocksOnASingleLine: Empty
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
AttributeMacros:
- __capability
BinPackArguments: true
BinPackParameters: false
BitFieldColonSpacing: Both
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
BreakBeforeBinaryOperators: All
BreakBeforeBraces: Custom
BreakBeforeConceptDeclarations: true
BreakBeforeTernaryOperators: true
BreakBeforeInheritanceComma: false
BreakConstructorInitializersBeforeComma: true
BreakConstructorInitializers: BeforeComma
BreakInheritanceList: BeforeComma
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 0
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 8
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: false
DisableFormat: false
# EmptyLineAfterAccessModifier: Leave
EmptyLineBeforeAccessModifier: Always
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<mach/'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '^<windows.h>'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '(^<signal.h>)|(^<sys/ucontext.h>)|(^<ucontext.h>)'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '^<([^\.])*>$'
Priority: 2
SortPriority: 0
CaseSensitive: false
- Regex: '^<.*\.'
Priority: 3
SortPriority: 0
CaseSensitive: false
- Regex: '.*'
Priority: 4
SortPriority: 0
CaseSensitive: false
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
# IndentAccessModifiers: false
IndentCaseBlocks: false
IndentCaseLabels: false
IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: AfterHash
IndentRequires: false
IndentWidth: 4
IndentWrappedFunctionNames: false
# InsertTrailingCommas: None
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
NamespaceMacros:
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCBreakBeforeNestedBlockParam: true
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PenaltyIndentedWhitespace: 0
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
- ParseTestProto
- ParsePartialTestProto
CanonicalDelimiter: ''
BasedOnStyle: google
ReflowComments: true
# ShortNamespaceLines: 5
SortIncludes: true
SortJavaStaticImport: Before
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: false
SpaceAroundPointerQualifiers: Default
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceAroundPointerQualifiers: Default
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: false
# SpacesInLineCommentPrefix: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Latest
StatementAttributeLikeMacros:
- Q_EMIT
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 4
TypenameMacros:
UseCRLF: false
UseTab: Never
WhitespaceSensitiveMacros:
- STRINGIZE
- PP_STRINGIZE
- BOOST_PP_STRINGIZE
- NS_SWIFT_NAME
- CF_SWIFT_NAME
- FCODE
- ICODE
...

View file

@ -3,6 +3,8 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/a32_emit_x64.h"
#include <algorithm> #include <algorithm>
#include <optional> #include <optional>
#include <utility> #include <utility>
@ -11,7 +13,6 @@
#include <fmt/ostream.h> #include <fmt/ostream.h>
#include <mp/traits/integer_of_size.h> #include <mp/traits/integer_of_size.h>
#include "dynarmic/backend/x64/a32_emit_x64.h"
#include "dynarmic/backend/x64/a32_jitstate.h" #include "dynarmic/backend/x64/a32_jitstate.h"
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
@ -61,7 +62,7 @@ static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) {
} }
A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block) A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block)
: EmitContext(reg_alloc, block), conf(conf) {} : EmitContext(reg_alloc, block), conf(conf) {}
A32::LocationDescriptor A32EmitContext::Location() const { A32::LocationDescriptor A32EmitContext::Location() const {
return A32::LocationDescriptor{block.Location()}; return A32::LocationDescriptor{block.Location()};
@ -87,7 +88,7 @@ A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig conf, A32::Jit* jit_in
code.PreludeComplete(); code.PreludeComplete();
ClearFastDispatchTable(); ClearFastDispatchTable();
exception_handler.SetFastmemCallback([this](u64 rip_){ exception_handler.SetFastmemCallback([this](u64 rip_) {
return FastmemCallback(rip_); return FastmemCallback(rip_);
}); });
} }
@ -98,7 +99,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
code.EnableWriting(); code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); }; SCOPE_EXIT { code.DisableWriting(); };
static const std::vector<HostLoc> gpr_order = [this]{ static const std::vector<HostLoc> gpr_order = [this] {
std::vector<HostLoc> gprs{any_gpr}; std::vector<HostLoc> gprs{any_gpr};
if (conf.page_table) { if (conf.page_table) {
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
@ -126,15 +127,14 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
// Call the relevant Emit* member function. // Call the relevant Emit* member function.
switch (inst->GetOpcode()) { switch (inst->GetOpcode()) {
#define OPCODE(name, type, ...) \
#define OPCODE(name, type, ...) \ case IR::Opcode::name: \
case IR::Opcode::name: \ A32EmitX64::Emit##name(ctx, inst); \
A32EmitX64::Emit##name(ctx, inst); \ break;
break; #define A32OPC(name, type, ...) \
#define A32OPC(name, type, ...) \ case IR::Opcode::A32##name: \
case IR::Opcode::A32##name: \ A32EmitX64::EmitA32##name(ctx, inst); \
A32EmitX64::EmitA32##name(ctx, inst); \ break;
break;
#define A64OPC(...) #define A64OPC(...)
#include "dynarmic/ir/opcodes.inc" #include "dynarmic/ir/opcodes.inc"
#undef OPCODE #undef OPCODE
@ -216,7 +216,7 @@ void A32EmitX64::GenFastmemFallbacks() {
for (int value_idx : idxes) { for (int value_idx : idxes) {
for (const auto& [bitsize, callback] : read_callbacks) { for (const auto& [bitsize, callback] : read_callbacks) {
code.align(); code.align();
read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
if (vaddr_idx != code.ABI_PARAM2.getIdx()) { if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
@ -232,7 +232,7 @@ void A32EmitX64::GenFastmemFallbacks() {
for (const auto& [bitsize, callback] : write_callbacks) { for (const auto& [bitsize, callback] : write_callbacks) {
code.align(); code.align();
write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStack(code); ABI_PushCallerSaveRegistersAndAdjustStack(code);
if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) { if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) {
code.xchg(code.ABI_PARAM2, code.ABI_PARAM3); code.xchg(code.ABI_PARAM2, code.ABI_PARAM3);
@ -310,7 +310,7 @@ void A32EmitX64::GenTerminalHandlers() {
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint"); PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint");
code.align(); code.align();
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry&(*)(u64)>(); fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>();
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data())); code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
if (code.HasHostFeature(HostFeature::SSE42)) { if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(code.ABI_PARAM1.cvt32(), code.ABI_PARAM2.cvt32()); code.crc32(code.ABI_PARAM1.cvt32(), code.ABI_PARAM2.cvt32());
@ -728,7 +728,7 @@ void A32EmitX64::EmitA32DataMemoryBarrier(A32EmitContext&, IR::Inst*) {
void A32EmitX64::EmitA32InstructionSynchronizationBarrier(A32EmitContext& ctx, IR::Inst*) { void A32EmitX64::EmitA32InstructionSynchronizationBarrier(A32EmitContext& ctx, IR::Inst*) {
if (!conf.hook_isb) { if (!conf.hook_isb) {
return; return;
} }
ctx.reg_alloc.HostCall(nullptr); ctx.reg_alloc.HostCall(nullptr);
@ -766,7 +766,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(mask, new_pc); code.mov(mask, new_pc);
code.and_(mask, 1); code.and_(mask, 1);
code.lea(new_upper, ptr[mask.cvt64() + upper_without_t]); code.lea(new_upper, ptr[mask.cvt64() + upper_without_t]);
code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code.and_(new_pc, mask); code.and_(new_pc, mask);
code.mov(MJitStateReg(A32::Reg::PC), new_pc); code.mov(MJitStateReg(A32::Reg::PC), new_pc);
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper); code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
@ -1021,7 +1021,7 @@ void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbya
} }
} }
} // anonymous namespace } // anonymous namespace
template<std::size_t bitsize, auto callback> template<std::size_t bitsize, auto callback>
void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
@ -1048,8 +1048,7 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
Common::BitCast<u64>(code.getCurr()), Common::BitCast<u64>(code.getCurr()),
Common::BitCast<u64>(wrapped_fn), Common::BitCast<u64>(wrapped_fn),
*marker, *marker,
} });
);
ctx.reg_alloc.DefineValue(inst, value); ctx.reg_alloc.DefineValue(inst, value);
return; return;
@ -1095,8 +1094,7 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
Common::BitCast<u64>(code.getCurr()), Common::BitCast<u64>(code.getCurr()),
Common::BitCast<u64>(wrapped_fn), Common::BitCast<u64>(wrapped_fn),
*marker, *marker,
} });
);
return; return;
} }
@ -1146,7 +1144,7 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
WriteMemory<64, &A32::UserCallbacks::MemoryWrite64>(ctx, inst); WriteMemory<64, &A32::UserCallbacks::MemoryWrite64>(ctx, inst);
} }
template <size_t bitsize, auto callback> template<size_t bitsize, auto callback>
void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
using T = mp::unsigned_integer_of_size<bitsize>; using T = mp::unsigned_integer_of_size<bitsize>;
@ -1162,11 +1160,10 @@ void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T { return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr); return (conf.callbacks->*callback)(vaddr);
}); });
} });
);
} }
template <size_t bitsize, auto callback> template<size_t bitsize, auto callback>
void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
using T = mp::unsigned_integer_of_size<bitsize>; using T = mp::unsigned_integer_of_size<bitsize>;
@ -1185,11 +1182,12 @@ void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
code.CallLambda( code.CallLambda(
[](A32::UserConfig& conf, u32 vaddr, T value) -> u32 { [](A32::UserConfig& conf, u32 vaddr, T value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr, return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
[&](T expected) -> bool { [&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected); return (conf.callbacks->*callback)(vaddr, value, expected);
}) ? 0 : 1; })
} ? 0
); : 1;
});
code.L(end); code.L(end);
} }
@ -1229,10 +1227,7 @@ static void EmitCoprocessorException() {
ASSERT_FALSE("Should raise coproc exception here"); ASSERT_FALSE("Should raise coproc exception here");
} }
static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface, static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr,
std::optional<Argument::copyable_reference> arg0 = {},
std::optional<Argument::copyable_reference> arg1 = {}) {
reg_alloc.HostCall(inst, {}, {}, arg0, arg1); reg_alloc.HostCall(inst, {}, {}, arg0, arg1);
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(jit_interface)); code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(jit_interface));
@ -1519,7 +1514,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc
code.mov(MJitStateReg(A32::Reg::PC), code.ABI_PARAM2.cvt32()); code.mov(MJitStateReg(A32::Reg::PC), code.ABI_PARAM2.cvt32());
code.SwitchMxcsrOnExit(); code.SwitchMxcsrOnExit();
Devirtualize<&A32::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code); Devirtualize<&A32::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code);
code.ReturnFromRunCode(true); // TODO: Check cycles code.ReturnFromRunCode(true); // TODO: Check cycles
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) { void A32EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
@ -1532,7 +1527,7 @@ void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat
}; };
const u32 old_upper = get_upper(old_location); const u32 old_upper = get_upper(old_location);
const u32 new_upper = [&]{ const u32 new_upper = [&] {
const u32 mask = ~u32(conf.always_little_endian ? 0x2 : 0); const u32 mask = ~u32(conf.always_little_endian ? 0x2 : 0);
return get_upper(new_location) & mask; return get_upper(new_location) & mask;
}(); }();
@ -1666,4 +1661,4 @@ void A32EmitX64::Unpatch(const IR::LocationDescriptor& location) {
} }
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -71,8 +71,8 @@ protected:
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table; std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
void ClearFastDispatchTable(); void ClearFastDispatchTable();
std::map<std::tuple<size_t, int, int>, void(*)()> read_fallbacks; std::map<std::tuple<size_t, int, int>, void (*)()> read_fallbacks;
std::map<std::tuple<size_t, int, int>, void(*)()> write_fallbacks; std::map<std::tuple<size_t, int, int>, void (*)()> write_fallbacks;
void GenFastmemFallbacks(); void GenFastmemFallbacks();
const void* terminal_handler_pop_rsb_hint; const void* terminal_handler_pop_rsb_hint;
@ -133,4 +133,4 @@ protected:
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override; void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -55,8 +55,7 @@ struct Jit::Impl {
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) : block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit) , emitter(block_of_code, conf, jit)
, conf(std::move(conf)) , conf(std::move(conf))
, jit_interface(jit) , jit_interface(jit) {}
{}
A32JitState jit_state; A32JitState jit_state;
BlockOfCode block_of_code; BlockOfCode block_of_code;
@ -70,7 +69,7 @@ struct Jit::Impl {
bool invalidate_entire_cache = false; bool invalidate_entire_cache = false;
void Execute() { void Execute() {
const CodePtr current_codeptr = [this]{ const CodePtr current_codeptr = [this] {
// RSB optimization // RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask; const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
@ -176,7 +175,8 @@ private:
} }
}; };
Jit::Jit(UserConfig conf) : impl(std::make_unique<Impl>(this, std::move(conf))) {} Jit::Jit(UserConfig conf)
: impl(std::make_unique<Impl>(this, std::move(conf))) {}
Jit::~Jit() = default; Jit::~Jit() = default;
@ -269,10 +269,15 @@ struct Context::Impl {
size_t invalid_cache_generation; size_t invalid_cache_generation;
}; };
Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); } Context::Context()
: impl(std::make_unique<Context::Impl>()) {
impl->jit_state.ResetRSB();
}
Context::~Context() = default; Context::~Context() = default;
Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {} Context::Context(const Context& ctx)
Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {} : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
Context::Context(Context&& ctx) noexcept
: impl(std::move(ctx.impl)) {}
Context& Context::operator=(const Context& ctx) { Context& Context::operator=(const Context& ctx) {
*impl = *ctx.impl; *impl = *ctx.impl;
return *this; return *this;
@ -323,4 +328,4 @@ std::string Jit::Disassemble() const {
return Common::DisassembleX64(impl->block_of_code.GetCodeBegin(), impl->block_of_code.getCurr()); return Common::DisassembleX64(impl->block_of_code.GetCodeBegin(), impl->block_of_code.getCurr());
} }
} // namespace Dynarmic::A32 } // namespace Dynarmic::A32

View file

@ -4,6 +4,7 @@
*/ */
#include "dynarmic/backend/x64/a32_jitstate.h" #include "dynarmic/backend/x64/a32_jitstate.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/nzcv_util.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
@ -89,7 +90,7 @@ void A32JitState::SetCpsr(u32 cpsr) {
upper_location_descriptor |= Common::Bit<9>(cpsr) ? 2 : 0; upper_location_descriptor |= Common::Bit<9>(cpsr) ? 2 : 0;
upper_location_descriptor |= Common::Bit<5>(cpsr) ? 1 : 0; upper_location_descriptor |= Common::Bit<5>(cpsr) ? 1 : 0;
// IT state // IT state
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000; upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000; upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
// Other flags // Other flags
@ -188,7 +189,7 @@ void A32JitState::SetFpscr(u32 FPSCR) {
asimd_MXCSR = 0x00009fc0; asimd_MXCSR = 0x00009fc0;
// RMode // RMode
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; const std::array<u32, 4> MXCSR_RMode{0x0, 0x4000, 0x2000, 0x6000};
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3]; guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
@ -196,9 +197,9 @@ void A32JitState::SetFpscr(u32 FPSCR) {
if (Common::Bit<24>(FPSCR)) { if (Common::Bit<24>(FPSCR)) {
// VFP Flush to Zero // VFP Flush to Zero
guest_MXCSR |= (1 << 15); // SSE Flush to Zero guest_MXCSR |= (1 << 15); // SSE Flush to Zero
guest_MXCSR |= (1 << 6); // SSE Denormals are Zero guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
} }
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -16,8 +16,8 @@ namespace Dynarmic::Backend::X64 {
class BlockOfCode; class BlockOfCode;
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) # pragma warning(push)
#pragma warning(disable:4324) // Structure was padded due to alignment specifier # pragma warning(disable : 4324) // Structure was padded due to alignment specifier
#endif #endif
struct A32JitState { struct A32JitState {
@ -25,7 +25,7 @@ struct A32JitState {
A32JitState() { ResetRSB(); } A32JitState() { ResetRSB(); }
std::array<u32, 16> Reg{}; // Current register file. std::array<u32, 16> Reg{}; // Current register file.
// TODO: Mode-specific register sets unimplemented. // TODO: Mode-specific register sets unimplemented.
u32 upper_location_descriptor = 0; u32 upper_location_descriptor = 0;
@ -37,7 +37,7 @@ struct A32JitState {
u32 Cpsr() const; u32 Cpsr() const;
void SetCpsr(u32 cpsr); void SetCpsr(u32 cpsr);
alignas(16) std::array<u32, 64> ExtReg{}; // Extension registers. alignas(16) std::array<u32, 64> ExtReg{}; // Extension registers.
// For internal use (See: BlockOfCode::RunCode) // For internal use (See: BlockOfCode::RunCode)
u32 guest_MXCSR = 0x00001f80; u32 guest_MXCSR = 0x00001f80;
@ -47,7 +47,7 @@ struct A32JitState {
// Exclusive state // Exclusive state
u32 exclusive_state = 0; u32 exclusive_state = 0;
static constexpr size_t RSBSize = 8; // MUST be a power of 2. static constexpr size_t RSBSize = 8; // MUST be a power of 2.
static constexpr size_t RSBPtrMask = RSBSize - 1; static constexpr size_t RSBPtrMask = RSBSize - 1;
u32 rsb_ptr = 0; u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors; std::array<u64, RSBSize> rsb_location_descriptors;
@ -55,7 +55,7 @@ struct A32JitState {
void ResetRSB(); void ResetRSB();
u32 fpsr_exc = 0; u32 fpsr_exc = 0;
u32 fpsr_qc = 0; // Dummy value u32 fpsr_qc = 0; // Dummy value
u32 fpsr_nzcv = 0; u32 fpsr_nzcv = 0;
u32 Fpscr() const; u32 Fpscr() const;
void SetFpscr(u32 FPSCR); void SetFpscr(u32 FPSCR);
@ -91,9 +91,9 @@ struct A32JitState {
}; };
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) # pragma warning(pop)
#endif #endif
using CodePtr = const void*; using CodePtr = const void*;
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -3,13 +3,14 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/a64_emit_x64.h"
#include <initializer_list> #include <initializer_list>
#include <fmt/format.h> #include <fmt/format.h>
#include <fmt/ostream.h> #include <fmt/ostream.h>
#include <mp/traits/integer_of_size.h> #include <mp/traits/integer_of_size.h>
#include "dynarmic/backend/x64/a64_emit_x64.h"
#include "dynarmic/backend/x64/a64_jitstate.h" #include "dynarmic/backend/x64/a64_jitstate.h"
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
@ -38,7 +39,7 @@ namespace Dynarmic::Backend::X64 {
using namespace Xbyak::util; using namespace Xbyak::util;
A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block) A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block)
: EmitContext(reg_alloc, block), conf(conf) {} : EmitContext(reg_alloc, block), conf(conf) {}
A64::LocationDescriptor A64EmitContext::Location() const { A64::LocationDescriptor A64EmitContext::Location() const {
return A64::LocationDescriptor{block.Location()}; return A64::LocationDescriptor{block.Location()};
@ -67,7 +68,7 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
code.EnableWriting(); code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); }; SCOPE_EXIT { code.DisableWriting(); };
static const std::vector<HostLoc> gpr_order = [this]{ static const std::vector<HostLoc> gpr_order = [this] {
std::vector<HostLoc> gprs{any_gpr}; std::vector<HostLoc> gprs{any_gpr};
if (conf.page_table) { if (conf.page_table) {
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
@ -92,16 +93,15 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
// Call the relevant Emit* member function. // Call the relevant Emit* member function.
switch (inst->GetOpcode()) { switch (inst->GetOpcode()) {
#define OPCODE(name, type, ...) \
#define OPCODE(name, type, ...) \ case IR::Opcode::name: \
case IR::Opcode::name: \ A64EmitX64::Emit##name(ctx, inst); \
A64EmitX64::Emit##name(ctx, inst); \ break;
break;
#define A32OPC(...) #define A32OPC(...)
#define A64OPC(name, type, ...) \ #define A64OPC(name, type, ...) \
case IR::Opcode::A64##name: \ case IR::Opcode::A64##name: \
A64EmitX64::EmitA64##name(ctx, inst); \ A64EmitX64::EmitA64##name(ctx, inst); \
break; break;
#include "dynarmic/ir/opcodes.inc" #include "dynarmic/ir/opcodes.inc"
#undef OPCODE #undef OPCODE
#undef A32OPC #undef A32OPC
@ -150,14 +150,13 @@ void A64EmitX64::ClearFastDispatchTable() {
void A64EmitX64::GenMemory128Accessors() { void A64EmitX64::GenMemory128Accessors() {
code.align(); code.align();
memory_read_128 = code.getCurr<void(*)()>(); memory_read_128 = code.getCurr<void (*)()>();
#ifdef _WIN32 #ifdef _WIN32
Devirtualize<&A64::UserCallbacks::MemoryRead128>(conf.callbacks).EmitCallWithReturnPointer(code, Devirtualize<&A64::UserCallbacks::MemoryRead128>(conf.callbacks).EmitCallWithReturnPointer(code, [&](Xbyak::Reg64 return_value_ptr, [[maybe_unused]] RegList args) {
[&](Xbyak::Reg64 return_value_ptr, [[maybe_unused]] RegList args) { code.mov(code.ABI_PARAM3, code.ABI_PARAM2);
code.mov(code.ABI_PARAM3, code.ABI_PARAM2); code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE);
code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); code.lea(return_value_ptr, ptr[rsp + ABI_SHADOW_SPACE]);
code.lea(return_value_ptr, ptr[rsp + ABI_SHADOW_SPACE]); });
});
code.movups(xmm1, xword[code.ABI_RETURN]); code.movups(xmm1, xword[code.ABI_RETURN]);
code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE); code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE);
#else #else
@ -177,7 +176,7 @@ void A64EmitX64::GenMemory128Accessors() {
PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_read_128"); PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_read_128");
code.align(); code.align();
memory_write_128 = code.getCurr<void(*)()>(); memory_write_128 = code.getCurr<void (*)()>();
#ifdef _WIN32 #ifdef _WIN32
code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
@ -223,7 +222,7 @@ void A64EmitX64::GenFastmemFallbacks() {
for (int value_idx : idxes) { for (int value_idx : idxes) {
code.align(); code.align();
read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx)); ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx));
if (vaddr_idx != code.ABI_PARAM2.getIdx()) { if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
@ -237,7 +236,7 @@ void A64EmitX64::GenFastmemFallbacks() {
PerfMapRegister(read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_read_fallback_128"); PerfMapRegister(read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_read_fallback_128");
code.align(); code.align();
write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStack(code); ABI_PushCallerSaveRegistersAndAdjustStack(code);
if (vaddr_idx != code.ABI_PARAM2.getIdx()) { if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
@ -256,7 +255,7 @@ void A64EmitX64::GenFastmemFallbacks() {
for (const auto& [bitsize, callback] : read_callbacks) { for (const auto& [bitsize, callback] : read_callbacks) {
code.align(); code.align();
read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
if (vaddr_idx != code.ABI_PARAM2.getIdx()) { if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
@ -272,7 +271,7 @@ void A64EmitX64::GenFastmemFallbacks() {
for (const auto& [bitsize, callback] : write_callbacks) { for (const auto& [bitsize, callback] : write_callbacks) {
code.align(); code.align();
write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStack(code); ABI_PushCallerSaveRegistersAndAdjustStack(code);
if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) { if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) {
code.xchg(code.ABI_PARAM2, code.ABI_PARAM3); code.xchg(code.ABI_PARAM2, code.ABI_PARAM3);
@ -353,7 +352,7 @@ void A64EmitX64::GenTerminalHandlers() {
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint"); PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint");
code.align(); code.align();
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry&(*)(u64)>(); fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>();
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data())); code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
if (code.HasHostFeature(HostFeature::SSE42)) { if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(code.ABI_PARAM1, code.ABI_PARAM2); code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
@ -542,7 +541,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)]; const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
code.movq(to_store, to_store); // TODO: Remove when able code.movq(to_store, to_store); // TODO: Remove when able
code.movaps(addr, to_store); code.movaps(addr, to_store);
} }
@ -628,10 +627,9 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate()); ASSERT(args[0].IsImmediate());
const u32 imm = args[0].GetImmediateU32(); const u32 imm = args[0].GetImmediateU32();
Devirtualize<&A64::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code, Devirtualize<&A64::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code, [&](RegList param) {
[&](RegList param) { code.mov(param[0], imm);
code.mov(param[0], imm); });
});
// The kernel would have to execute ERET to get here, which would clear exclusive state. // The kernel would have to execute ERET to get here, which would clear exclusive state.
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0));
} }
@ -642,11 +640,10 @@ void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
ASSERT(args[0].IsImmediate() && args[1].IsImmediate()); ASSERT(args[0].IsImmediate() && args[1].IsImmediate());
const u64 pc = args[0].GetImmediateU64(); const u64 pc = args[0].GetImmediateU64();
const u64 exception = args[1].GetImmediateU64(); const u64 exception = args[1].GetImmediateU64();
Devirtualize<&A64::UserCallbacks::ExceptionRaised>(conf.callbacks).EmitCall(code, Devirtualize<&A64::UserCallbacks::ExceptionRaised>(conf.callbacks).EmitCall(code, [&](RegList param) {
[&](RegList param) { code.mov(param[0], pc);
code.mov(param[0], pc); code.mov(param[1], exception);
code.mov(param[1], exception); });
});
} }
void A64EmitX64::EmitA64DataCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64DataCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) {
@ -881,7 +878,7 @@ void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbya
} }
} }
} // anonymous namepsace } // namespace
template<std::size_t bitsize> template<std::size_t bitsize>
void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst) {
@ -1090,8 +1087,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T { return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr); return (conf.callbacks->*callback)(vaddr);
}); });
} });
);
} else { } else {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
ctx.reg_alloc.Use(args[0], ABI_PARAM2); ctx.reg_alloc.Use(args[0], ABI_PARAM2);
@ -1107,8 +1103,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
ret = conf.global_monitor->ReadAndMark<A64::Vector>(conf.processor_id, vaddr, [&]() -> A64::Vector { ret = conf.global_monitor->ReadAndMark<A64::Vector>(conf.processor_id, vaddr, [&]() -> A64::Vector {
return (conf.callbacks->*callback)(vaddr); return (conf.callbacks->*callback)(vaddr);
}); });
} });
);
code.movups(result, xword[rsp + ABI_SHADOW_SPACE]); code.movups(result, xword[rsp + ABI_SHADOW_SPACE]);
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
@ -1163,11 +1158,12 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
code.CallLambda( code.CallLambda(
[](A64::UserConfig& conf, u64 vaddr, T value) -> u32 { [](A64::UserConfig& conf, u64 vaddr, T value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr, return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
[&](T expected) -> bool { [&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected); return (conf.callbacks->*callback)(vaddr, value, expected);
}) ? 0 : 1; })
} ? 0
); : 1;
});
} else { } else {
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
@ -1175,11 +1171,12 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
code.CallLambda( code.CallLambda(
[](A64::UserConfig& conf, u64 vaddr, A64::Vector& value) -> u32 { [](A64::UserConfig& conf, u64 vaddr, A64::Vector& value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<A64::Vector>(conf.processor_id, vaddr, return conf.global_monitor->DoExclusiveOperation<A64::Vector>(conf.processor_id, vaddr,
[&](A64::Vector expected) -> bool { [&](A64::Vector expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected); return (conf.callbacks->*callback)(vaddr, value, expected);
}) ? 0 : 1; })
} ? 0
); : 1;
});
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
} }
code.L(end); code.L(end);
@ -1214,13 +1211,12 @@ std::string A64EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescr
void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor, bool) { void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor, bool) {
code.SwitchMxcsrOnExit(); code.SwitchMxcsrOnExit();
Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, [&](RegList param) {
[&](RegList param) { code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC());
code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC()); code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]);
code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]); code.mov(param[1].cvt32(), terminal.num_instructions);
code.mov(param[1].cvt32(), terminal.num_instructions); });
}); code.ReturnFromRunCode(true); // TODO: Check cycles
code.ReturnFromRunCode(true); // TODO: Check cycles
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) { void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
@ -1352,4 +1348,4 @@ void A64EmitX64::Unpatch(const IR::LocationDescriptor& location) {
} }
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -69,8 +69,8 @@ protected:
void (*memory_write_128)(); void (*memory_write_128)();
void GenMemory128Accessors(); void GenMemory128Accessors();
std::map<std::tuple<size_t, int, int>, void(*)()> read_fallbacks; std::map<std::tuple<size_t, int, int>, void (*)()> read_fallbacks;
std::map<std::tuple<size_t, int, int>, void(*)()> write_fallbacks; std::map<std::tuple<size_t, int, int>, void (*)()> write_fallbacks;
void GenFastmemFallbacks(); void GenFastmemFallbacks();
const void* terminal_handler_pop_rsb_hint; const void* terminal_handler_pop_rsb_hint;
@ -118,4 +118,4 @@ protected:
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override; void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -44,10 +44,9 @@ static std::function<void(BlockOfCode&)> GenRCP(const A64::UserConfig& conf) {
struct Jit::Impl final { struct Jit::Impl final {
public: public:
Impl(Jit* jit, UserConfig conf) Impl(Jit* jit, UserConfig conf)
: conf(conf) : conf(conf)
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) , block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit) , emitter(block_of_code, conf, jit) {
{
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64); ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
} }
@ -61,7 +60,7 @@ public:
// TODO: Check code alignment // TODO: Check code alignment
const CodePtr current_code_ptr = [this]{ const CodePtr current_code_ptr = [this] {
// RSB optimization // RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask;
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
@ -233,7 +232,7 @@ private:
// JIT Compile // JIT Compile
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
Optimization::A64CallbackConfigPass(ir_block, conf); Optimization::A64CallbackConfigPass(ir_block, conf);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A64GetSetElimination(ir_block); Optimization::A64GetSetElimination(ir_block);
@ -287,7 +286,7 @@ private:
}; };
Jit::Jit(UserConfig conf) Jit::Jit(UserConfig conf)
: impl(std::make_unique<Jit::Impl>(this, conf)) {} : impl(std::make_unique<Jit::Impl>(this, conf)) {}
Jit::~Jit() = default; Jit::~Jit() = default;
@ -399,4 +398,4 @@ std::string Jit::Disassemble() const {
return impl->Disassemble(); return impl->Disassemble();
} }
} // namespace Dynarmic::A64 } // namespace Dynarmic::A64

View file

@ -4,6 +4,7 @@
*/ */
#include "dynarmic/backend/x64/a64_jitstate.h" #include "dynarmic/backend/x64/a64_jitstate.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/frontend/A64/location_descriptor.h" #include "dynarmic/frontend/A64/location_descriptor.h"
@ -58,15 +59,15 @@ void A64JitState::SetFpcr(u32 value) {
asimd_MXCSR &= 0x0000003D; asimd_MXCSR &= 0x0000003D;
guest_MXCSR &= 0x0000003D; guest_MXCSR &= 0x0000003D;
asimd_MXCSR |= 0x00001f80; asimd_MXCSR |= 0x00001f80;
guest_MXCSR |= 0x00001f80; // Mask all exceptions guest_MXCSR |= 0x00001f80; // Mask all exceptions
// RMode // RMode
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; const std::array<u32, 4> MXCSR_RMode{0x0, 0x4000, 0x2000, 0x6000};
guest_MXCSR |= MXCSR_RMode[(value >> 22) & 0x3]; guest_MXCSR |= MXCSR_RMode[(value >> 22) & 0x3];
if (Common::Bit<24>(value)) { if (Common::Bit<24>(value)) {
guest_MXCSR |= (1 << 15); // SSE Flush to Zero guest_MXCSR |= (1 << 15); // SSE Flush to Zero
guest_MXCSR |= (1 << 6); // SSE Denormals are Zero guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
} }
} }
@ -111,4 +112,4 @@ void A64JitState::SetFpsr(u32 value) {
fpsr_exc = value & 0x9F; fpsr_exc = value & 0x9F;
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -18,8 +18,8 @@ namespace Dynarmic::Backend::X64 {
class BlockOfCode; class BlockOfCode;
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) # pragma warning(push)
#pragma warning(disable:4324) // Structure was padded due to alignment specifier # pragma warning(disable : 4324) // Structure was padded due to alignment specifier
#endif #endif
struct A64JitState { struct A64JitState {
@ -40,7 +40,7 @@ struct A64JitState {
cpsr_nzcv = NZCV::ToX64(new_pstate); cpsr_nzcv = NZCV::ToX64(new_pstate);
} }
alignas(16) std::array<u64, 64> vec{}; // Extension registers. alignas(16) std::array<u64, 64> vec{}; // Extension registers.
// For internal use (See: BlockOfCode::RunCode) // For internal use (See: BlockOfCode::RunCode)
u32 guest_MXCSR = 0x00001f80; u32 guest_MXCSR = 0x00001f80;
@ -51,7 +51,7 @@ struct A64JitState {
static constexpr u64 RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull; static constexpr u64 RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull;
u8 exclusive_state = 0; u8 exclusive_state = 0;
static constexpr size_t RSBSize = 8; // MUST be a power of 2. static constexpr size_t RSBSize = 8; // MUST be a power of 2.
static constexpr size_t RSBPtrMask = RSBSize - 1; static constexpr size_t RSBPtrMask = RSBSize - 1;
u32 rsb_ptr = 0; u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors; std::array<u64, RSBSize> rsb_location_descriptors;
@ -77,9 +77,9 @@ struct A64JitState {
}; };
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) # pragma warning(pop)
#endif #endif
using CodePtr = const void*; using CodePtr = const void*;
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/abi.h"
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include <xbyak.h> #include <xbyak.h>
#include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/iterator_util.h" #include "dynarmic/common/iterator_util.h"
@ -131,4 +132,4 @@ void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc e
ABI_PopRegistersAndAdjustStack(code, 0, regs); ABI_PopRegistersAndAdjustStack(code, 0, regs);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -61,7 +61,7 @@ constexpr std::array<HostLoc, 18> ABI_ALL_CALLEE_SAVE = {
HostLoc::XMM15, HostLoc::XMM15,
}; };
constexpr size_t ABI_SHADOW_SPACE = 32; // bytes constexpr size_t ABI_SHADOW_SPACE = 32; // bytes
#else #else
@ -114,7 +114,7 @@ constexpr std::array<HostLoc, 6> ABI_ALL_CALLEE_SAVE = {
HostLoc::R15, HostLoc::R15,
}; };
constexpr size_t ABI_SHADOW_SPACE = 0; // bytes constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
#endif #endif
@ -128,4 +128,4 @@ void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_si
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception); void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception); void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -3,6 +3,15 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/block_of_code.h"
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
#else
# include <sys/mman.h>
#endif
#include <array> #include <array>
#include <cstring> #include <cstring>
@ -10,19 +19,12 @@
#include "dynarmic/backend/x64/a32_jitstate.h" #include "dynarmic/backend/x64/a32_jitstate.h"
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/hostloc.h" #include "dynarmic/backend/x64/hostloc.h"
#include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/perf_map.h"
#include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/stack_layout.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#endif
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
#ifdef _WIN32 #ifdef _WIN32
@ -60,47 +62,66 @@ CustomXbyakAllocator s_allocator;
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT #ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
void ProtectMemory(const void* base, size_t size, bool is_executable) { void ProtectMemory(const void* base, size_t size, bool is_executable) {
#ifdef _WIN32 # ifdef _WIN32
DWORD oldProtect = 0; DWORD oldProtect = 0;
VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect); VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
#else # else
static const size_t pageSize = sysconf(_SC_PAGESIZE); static const size_t pageSize = sysconf(_SC_PAGESIZE);
const size_t iaddr = reinterpret_cast<size_t>(base); const size_t iaddr = reinterpret_cast<size_t>(base);
const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1)); const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE); const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode); mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
#endif # endif
} }
#endif #endif
HostFeature GetHostFeatures() HostFeature GetHostFeatures() {
{
HostFeature features = {}; HostFeature features = {};
#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION #ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
using Cpu = Xbyak::util::Cpu; using Cpu = Xbyak::util::Cpu;
Xbyak::util::Cpu cpu_info; Xbyak::util::Cpu cpu_info;
if (cpu_info.has(Cpu::tSSSE3)) features |= HostFeature::SSSE3; if (cpu_info.has(Cpu::tSSSE3))
if (cpu_info.has(Cpu::tSSE41)) features |= HostFeature::SSE41; features |= HostFeature::SSSE3;
if (cpu_info.has(Cpu::tSSE42)) features |= HostFeature::SSE42; if (cpu_info.has(Cpu::tSSE41))
if (cpu_info.has(Cpu::tAVX)) features |= HostFeature::AVX; features |= HostFeature::SSE41;
if (cpu_info.has(Cpu::tAVX2)) features |= HostFeature::AVX2; if (cpu_info.has(Cpu::tSSE42))
if (cpu_info.has(Cpu::tAVX512F)) features |= HostFeature::AVX512F; features |= HostFeature::SSE42;
if (cpu_info.has(Cpu::tAVX512CD)) features |= HostFeature::AVX512CD; if (cpu_info.has(Cpu::tAVX))
if (cpu_info.has(Cpu::tAVX512VL)) features |= HostFeature::AVX512VL; features |= HostFeature::AVX;
if (cpu_info.has(Cpu::tAVX512BW)) features |= HostFeature::AVX512BW; if (cpu_info.has(Cpu::tAVX2))
if (cpu_info.has(Cpu::tAVX512DQ)) features |= HostFeature::AVX512DQ; features |= HostFeature::AVX2;
if (cpu_info.has(Cpu::tAVX512_BITALG)) features |= HostFeature::AVX512BITALG; if (cpu_info.has(Cpu::tAVX512F))
if (cpu_info.has(Cpu::tPCLMULQDQ)) features |= HostFeature::PCLMULQDQ; features |= HostFeature::AVX512F;
if (cpu_info.has(Cpu::tF16C)) features |= HostFeature::F16C; if (cpu_info.has(Cpu::tAVX512CD))
if (cpu_info.has(Cpu::tFMA)) features |= HostFeature::FMA; features |= HostFeature::AVX512CD;
if (cpu_info.has(Cpu::tAESNI)) features |= HostFeature::AES; if (cpu_info.has(Cpu::tAVX512VL))
if (cpu_info.has(Cpu::tPOPCNT)) features |= HostFeature::POPCNT; features |= HostFeature::AVX512VL;
if (cpu_info.has(Cpu::tBMI1)) features |= HostFeature::BMI1; if (cpu_info.has(Cpu::tAVX512BW))
if (cpu_info.has(Cpu::tBMI2)) features |= HostFeature::BMI2; features |= HostFeature::AVX512BW;
if (cpu_info.has(Cpu::tLZCNT)) features |= HostFeature::LZCNT; if (cpu_info.has(Cpu::tAVX512DQ))
if (cpu_info.has(Cpu::tGFNI)) features |= HostFeature::GFNI; features |= HostFeature::AVX512DQ;
if (cpu_info.has(Cpu::tAVX512_BITALG))
features |= HostFeature::AVX512BITALG;
if (cpu_info.has(Cpu::tPCLMULQDQ))
features |= HostFeature::PCLMULQDQ;
if (cpu_info.has(Cpu::tF16C))
features |= HostFeature::F16C;
if (cpu_info.has(Cpu::tFMA))
features |= HostFeature::FMA;
if (cpu_info.has(Cpu::tAESNI))
features |= HostFeature::AES;
if (cpu_info.has(Cpu::tPOPCNT))
features |= HostFeature::POPCNT;
if (cpu_info.has(Cpu::tBMI1))
features |= HostFeature::BMI1;
if (cpu_info.has(Cpu::tBMI2))
features |= HostFeature::BMI2;
if (cpu_info.has(Cpu::tLZCNT))
features |= HostFeature::LZCNT;
if (cpu_info.has(Cpu::tGFNI))
features |= HostFeature::GFNI;
if (cpu_info.has(Cpu::tBMI2)) { if (cpu_info.has(Cpu::tBMI2)) {
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3. // BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
@ -109,7 +130,7 @@ HostFeature GetHostFeatures()
if (cpu_info.has(Cpu::tAMD)) { if (cpu_info.has(Cpu::tAMD)) {
std::array<u32, 4> data{}; std::array<u32, 4> data{};
cpu_info.getCpuid(1, data.data()); cpu_info.getCpuid(1, data.data());
const u32 family_base = Common::Bits< 8, 11>(data[0]); const u32 family_base = Common::Bits<8, 11>(data[0]);
const u32 family_extended = Common::Bits<20, 27>(data[0]); const u32 family_extended = Common::Bits<20, 27>(data[0]);
const u32 family = family_base + family_extended; const u32 family = family_base + family_extended;
if (family >= 0x19) if (family >= 0x19)
@ -123,7 +144,7 @@ HostFeature GetHostFeatures()
return features; return features;
} }
} // anonymous namespace } // anonymous namespace
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, size_t far_code_offset, std::function<void(BlockOfCode&)> rcp) BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, size_t far_code_offset, std::function<void(BlockOfCode&)> rcp)
: Xbyak::CodeGenerator(total_code_size, nullptr, &s_allocator) : Xbyak::CodeGenerator(total_code_size, nullptr, &s_allocator)
@ -131,8 +152,7 @@ BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_cod
, jsi(jsi) , jsi(jsi)
, far_code_offset(far_code_offset) , far_code_offset(far_code_offset)
, constant_pool(*this, CONSTANT_POOL_SIZE) , constant_pool(*this, CONSTANT_POOL_SIZE)
, host_features(GetHostFeatures()) , host_features(GetHostFeatures()) {
{
ASSERT(total_code_size > far_code_offset); ASSERT(total_code_size > far_code_offset);
EnableWriting(); EnableWriting();
GenRunCode(rcp); GenRunCode(rcp);
@ -210,7 +230,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
mov(r15, ABI_PARAM1); mov(r15, ABI_PARAM1);
mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
cb.GetTicksRemaining->EmitCall(*this); cb.GetTicksRemaining->EmitCall(*this);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN); mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN);
@ -368,4 +388,4 @@ void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
nop(size - current_size); nop(size - current_size);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -73,12 +73,12 @@ public:
void LookupBlock(); void LookupBlock();
/// Code emitter: Calls the function /// Code emitter: Calls the function
template <typename FunctionPointer> template<typename FunctionPointer>
void CallFunction(FunctionPointer fn) { void CallFunction(FunctionPointer fn) {
static_assert(std::is_pointer_v<FunctionPointer> && std::is_function_v<std::remove_pointer_t<FunctionPointer>>, static_assert(std::is_pointer_v<FunctionPointer> && std::is_function_v<std::remove_pointer_t<FunctionPointer>>,
"Supplied type must be a pointer to a function"); "Supplied type must be a pointer to a function");
const u64 address = reinterpret_cast<u64>(fn); const u64 address = reinterpret_cast<u64>(fn);
const u64 distance = address - (getCurr<u64>() + 5); const u64 distance = address - (getCurr<u64>() + 5);
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
@ -91,7 +91,7 @@ public:
} }
/// Code emitter: Calls the lambda. Lambda must not have any captures. /// Code emitter: Calls the lambda. Lambda must not have any captures.
template <typename Lambda> template<typename Lambda>
void CallLambda(Lambda l) { void CallLambda(Lambda l) {
CallFunction(Common::FptrCast(l)); CallFunction(Common::FptrCast(l));
} }
@ -165,7 +165,7 @@ private:
CodePtr near_code_ptr; CodePtr near_code_ptr;
CodePtr far_code_ptr; CodePtr far_code_ptr;
using RunCodeFuncType = void(*)(void*, CodePtr); using RunCodeFuncType = void (*)(void*, CodePtr);
RunCodeFuncType run_code = nullptr; RunCodeFuncType run_code = nullptr;
RunCodeFuncType step_code = nullptr; RunCodeFuncType step_code = nullptr;
static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0; static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0;
@ -176,4 +176,4 @@ private:
const HostFeature host_features; const HostFeature host_features;
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -3,32 +3,33 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/block_range_information.h"
#include <boost/icl/interval_map.hpp> #include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp> #include <boost/icl/interval_set.hpp>
#include <tsl/robin_set.h> #include <tsl/robin_set.h>
#include "dynarmic/backend/x64/block_range_information.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
template <typename ProgramCounterType> template<typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) { void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location})); block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
} }
template <typename ProgramCounterType> template<typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::ClearCache() { void BlockRangeInformation<ProgramCounterType>::ClearCache() {
block_ranges.clear(); block_ranges.clear();
} }
template <typename ProgramCounterType> template<typename ProgramCounterType>
tsl::robin_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) { tsl::robin_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
tsl::robin_set<IR::LocationDescriptor> erase_locations; tsl::robin_set<IR::LocationDescriptor> erase_locations;
for (auto invalidate_interval : ranges) { for (auto invalidate_interval : ranges) {
auto pair = block_ranges.equal_range(invalidate_interval); auto pair = block_ranges.equal_range(invalidate_interval);
for (auto it = pair.first; it != pair.second; ++it) { for (auto it = pair.first; it != pair.second; ++it) {
for (const auto &descriptor : it->second) { for (const auto& descriptor : it->second) {
erase_locations.insert(descriptor); erase_locations.insert(descriptor);
} }
} }
@ -40,4 +41,4 @@ tsl::robin_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>
template class BlockRangeInformation<u32>; template class BlockRangeInformation<u32>;
template class BlockRangeInformation<u64>; template class BlockRangeInformation<u64>;
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -15,7 +15,7 @@
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
template <typename ProgramCounterType> template<typename ProgramCounterType>
class BlockRangeInformation { class BlockRangeInformation {
public: public:
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location); void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
@ -26,4 +26,4 @@ private:
boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges; boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -4,6 +4,7 @@
*/ */
#include "dynarmic/backend/x64/callback.h" #include "dynarmic/backend/x64/callback.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
@ -37,4 +38,4 @@ void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<voi
code.CallFunction(fn); code.CallFunction(fn);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -22,16 +22,23 @@ class Callback {
public: public:
virtual ~Callback(); virtual ~Callback();
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const = 0; void EmitCall(BlockOfCode& code) const {
EmitCall(code, [](RegList) {});
}
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn) const = 0;
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const = 0; virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const = 0;
}; };
class SimpleCallback final : public Callback { class SimpleCallback final : public Callback {
public: public:
template <typename Function> template<typename Function>
SimpleCallback(Function fn) : fn(reinterpret_cast<void(*)()>(fn)) {} SimpleCallback(Function fn)
: fn(reinterpret_cast<void (*)()>(fn)) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override; using Callback::EmitCall;
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override; void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
private: private:
@ -40,10 +47,13 @@ private:
class ArgCallback final : public Callback { class ArgCallback final : public Callback {
public: public:
template <typename Function> template<typename Function>
ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast<void(*)()>(fn)), arg(arg) {} ArgCallback(Function fn, u64 arg)
: fn(reinterpret_cast<void (*)()>(fn)), arg(arg) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override; using Callback::EmitCall;
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override; void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
private: private:
@ -51,4 +61,4 @@ private:
u64 arg; u64 arg;
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -3,15 +3,17 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/constant_pool.h"
#include <cstring> #include <cstring>
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/constant_pool.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
ConstantPool::ConstantPool(BlockOfCode& code, size_t size) : code(code), pool_size(size) { ConstantPool::ConstantPool(BlockOfCode& code, size_t size)
: code(code), pool_size(size) {
code.int3(); code.int3();
code.align(align_size); code.align(align_size);
pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size)); pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size));
@ -31,4 +33,4 @@ Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 l
return frame[code.rip + iter->second]; return frame[code.rip + iter->second];
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -27,7 +27,7 @@ public:
Xbyak::Address GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0); Xbyak::Address GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
private: private:
static constexpr size_t align_size = 16; // bytes static constexpr size_t align_size = 16; // bytes
std::map<std::tuple<u64, u64>, void*> constant_info; std::map<std::tuple<u64, u64>, void*> constant_info;
@ -37,4 +37,4 @@ private:
u8* current_pool_ptr; u8* current_pool_ptr;
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -19,17 +19,17 @@ namespace Backend::X64 {
namespace impl { namespace impl {
template <typename FunctionType, FunctionType mfp> template<typename FunctionType, FunctionType mfp>
struct ThunkBuilder; struct ThunkBuilder;
template <typename C, typename R, typename... Args, R(C::*mfp)(Args...)> template<typename C, typename R, typename... Args, R (C::*mfp)(Args...)>
struct ThunkBuilder<R(C::*)(Args...), mfp> { struct ThunkBuilder<R (C::*)(Args...), mfp> {
static R Thunk(C* this_, Args... args) { static R Thunk(C* this_, Args... args) {
return (this_->*mfp)(std::forward<Args>(args)...); return (this_->*mfp)(std::forward<Args>(args)...);
} }
}; };
} // namespace impl } // namespace impl
template<auto mfp> template<auto mfp>
ArgCallback DevirtualizeGeneric(mp::class_type<decltype(mfp)>* this_) { ArgCallback DevirtualizeGeneric(mp::class_type<decltype(mfp)>* this_) {
@ -77,5 +77,5 @@ ArgCallback Devirtualize(mp::class_type<decltype(mfp)>* this_) {
#endif #endif
} }
} // namespace Backend::X64 } // namespace Backend::X64
} // namespace Dynarmic } // namespace Dynarmic

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/emit_x64.h"
#include <iterator> #include <iterator>
#include <tsl/robin_set.h> #include <tsl/robin_set.h>
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/emit_x64.h"
#include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/nzcv_util.h"
#include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/perf_map.h"
#include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/stack_layout.h"
@ -29,7 +30,7 @@ namespace Dynarmic::Backend::X64 {
using namespace Xbyak::util; using namespace Xbyak::util;
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
: reg_alloc(reg_alloc), block(block) {} : reg_alloc(reg_alloc), block(block) {}
size_t EmitContext::GetInstOffset(IR::Inst* inst) const { size_t EmitContext::GetInstOffset(IR::Inst* inst) const {
return static_cast<size_t>(std::distance(block.begin(), IR::Block::iterator(inst))); return static_cast<size_t>(std::distance(block.begin(), IR::Block::iterator(inst)));
@ -40,7 +41,8 @@ void EmitContext::EraseInstruction(IR::Inst* inst) {
inst->ClearArgs(); inst->ClearArgs();
} }
EmitX64::EmitX64(BlockOfCode& code) : code(code) { EmitX64::EmitX64(BlockOfCode& code)
: code(code) {
exception_handler.Register(code); exception_handler.Register(code);
} }
@ -73,8 +75,8 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I
const auto iter = block_descriptors.find(target); const auto iter = block_descriptors.find(target);
CodePtr target_code_ptr = iter != block_descriptors.end() CodePtr target_code_ptr = iter != block_descriptors.end()
? iter->second.entrypoint ? iter->second.entrypoint
: code.GetReturnFromRunCodeAddress(); : code.GetReturnFromRunCodeAddress();
code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]); code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]);
@ -126,7 +128,7 @@ void EmitX64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) {
void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const int bitsize = [&]{ const int bitsize = [&] {
switch (args[0].GetType()) { switch (args[0].GetType()) {
case IR::Type::U8: case IR::Type::U8:
return 8; return 8;
@ -195,64 +197,64 @@ Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
// add al, 0x7F restores OF // add al, 0x7F restores OF
switch (cond) { switch (cond) {
case IR::Cond::EQ: //z case IR::Cond::EQ: //z
code.sahf(); code.sahf();
code.jz(pass); code.jz(pass);
break; break;
case IR::Cond::NE: //!z case IR::Cond::NE: //!z
code.sahf(); code.sahf();
code.jnz(pass); code.jnz(pass);
break; break;
case IR::Cond::CS: //c case IR::Cond::CS: //c
code.sahf(); code.sahf();
code.jc(pass); code.jc(pass);
break; break;
case IR::Cond::CC: //!c case IR::Cond::CC: //!c
code.sahf(); code.sahf();
code.jnc(pass); code.jnc(pass);
break; break;
case IR::Cond::MI: //n case IR::Cond::MI: //n
code.sahf(); code.sahf();
code.js(pass); code.js(pass);
break; break;
case IR::Cond::PL: //!n case IR::Cond::PL: //!n
code.sahf(); code.sahf();
code.jns(pass); code.jns(pass);
break; break;
case IR::Cond::VS: //v case IR::Cond::VS: //v
code.cmp(al, 0x81); code.cmp(al, 0x81);
code.jo(pass); code.jo(pass);
break; break;
case IR::Cond::VC: //!v case IR::Cond::VC: //!v
code.cmp(al, 0x81); code.cmp(al, 0x81);
code.jno(pass); code.jno(pass);
break; break;
case IR::Cond::HI: //c & !z case IR::Cond::HI: //c & !z
code.sahf(); code.sahf();
code.cmc(); code.cmc();
code.ja(pass); code.ja(pass);
break; break;
case IR::Cond::LS: //!c | z case IR::Cond::LS: //!c | z
code.sahf(); code.sahf();
code.cmc(); code.cmc();
code.jna(pass); code.jna(pass);
break; break;
case IR::Cond::GE: // n == v case IR::Cond::GE: // n == v
code.cmp(al, 0x81); code.cmp(al, 0x81);
code.sahf(); code.sahf();
code.jge(pass); code.jge(pass);
break; break;
case IR::Cond::LT: // n != v case IR::Cond::LT: // n != v
code.cmp(al, 0x81); code.cmp(al, 0x81);
code.sahf(); code.sahf();
code.jl(pass); code.jl(pass);
break; break;
case IR::Cond::GT: // !z & (n == v) case IR::Cond::GT: // !z & (n == v)
code.cmp(al, 0x81); code.cmp(al, 0x81);
code.sahf(); code.sahf();
code.jg(pass); code.jg(pass);
break; break;
case IR::Cond::LE: // z | (n != v) case IR::Cond::LE: // z | (n != v)
code.cmp(al, 0x81); code.cmp(al, 0x81);
code.sahf(); code.sahf();
code.jle(pass); code.jle(pass);
@ -325,7 +327,7 @@ void EmitX64::InvalidateBasicBlocks(const tsl::robin_set<IR::LocationDescriptor>
code.EnableWriting(); code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); }; SCOPE_EXIT { code.DisableWriting(); };
for (const auto &descriptor : locations) { for (const auto& descriptor : locations) {
const auto it = block_descriptors.find(descriptor); const auto it = block_descriptors.find(descriptor);
if (it == block_descriptors.end()) { if (it == block_descriptors.end()) {
continue; continue;
@ -338,4 +340,4 @@ void EmitX64::InvalidateBasicBlocks(const tsl::robin_set<IR::LocationDescriptor>
} }
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -13,7 +13,6 @@
#include <tsl/robin_map.h> #include <tsl/robin_map.h>
#include <tsl/robin_set.h> #include <tsl/robin_set.h>
#include <xbyak_util.h> #include <xbyak_util.h>
#include "dynarmic/backend/x64/exception_handler.h" #include "dynarmic/backend/x64/exception_handler.h"
@ -26,11 +25,11 @@
namespace Dynarmic::IR { namespace Dynarmic::IR {
class Block; class Block;
class Inst; class Inst;
} // namespace Dynarmic::IR } // namespace Dynarmic::IR
namespace Dynarmic { namespace Dynarmic {
enum class OptimizationFlag : u32; enum class OptimizationFlag : u32;
} // namespace Dynarmic } // namespace Dynarmic
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
@ -41,10 +40,10 @@ using A64FullVectorWidth = std::integral_constant<size_t, 128>;
// Array alias that always sizes itself according to the given type T // Array alias that always sizes itself according to the given type T
// relative to the size of a vector register. e.g. T = u32 would result // relative to the size of a vector register. e.g. T = u32 would result
// in a std::array<u32, 4>. // in a std::array<u32, 4>.
template <typename T> template<typename T>
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>; using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
template <typename T> template<typename T>
using HalfVectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>() / 2>; using HalfVectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>() / 2>;
struct EmitContext { struct EmitContext {
@ -128,4 +127,4 @@ protected:
tsl::robin_map<IR::LocationDescriptor, PatchInformation> patch_information; tsl::robin_map<IR::LocationDescriptor, PatchInformation> patch_information;
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -73,7 +73,7 @@ void EmitX64::EmitAESEncryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
} }
void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AES)) { if (code.HasHostFeature(HostFeature::AES)) {
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
@ -105,4 +105,4 @@ void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) {
EmitAESFunction(args, ctx, code, inst, AES::MixColumns); EmitAESFunction(args, ctx, code, inst, AES::MixColumns);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -145,4 +145,4 @@ void EmitX64::EmitCRC32ISO64(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32ISO(code, ctx, inst, 64); EmitCRC32ISO(code, ctx, inst, 64);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -24,7 +24,7 @@ void EmitX64::EmitPack2x32To1x64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg64 hi = ctx.reg_alloc.UseScratchGpr(args[1]); const Xbyak::Reg64 hi = ctx.reg_alloc.UseScratchGpr(args[1]);
code.shl(hi, 32); code.shl(hi, 32);
code.mov(lo.cvt32(), lo.cvt32()); // Zero extend to 64-bits code.mov(lo.cvt32(), lo.cvt32()); // Zero extend to 64-bits
code.or_(lo, hi); code.or_(lo, hi);
ctx.reg_alloc.DefineValue(inst, lo); ctx.reg_alloc.DefineValue(inst, lo);
@ -146,64 +146,64 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst*
// add al, 0x7F restores OF // add al, 0x7F restores OF
switch (args[0].GetImmediateCond()) { switch (args[0].GetImmediateCond()) {
case IR::Cond::EQ: //z case IR::Cond::EQ: //z
code.sahf(); code.sahf();
code.cmovz(else_, then_); code.cmovz(else_, then_);
break; break;
case IR::Cond::NE: //!z case IR::Cond::NE: //!z
code.sahf(); code.sahf();
code.cmovnz(else_, then_); code.cmovnz(else_, then_);
break; break;
case IR::Cond::CS: //c case IR::Cond::CS: //c
code.sahf(); code.sahf();
code.cmovc(else_, then_); code.cmovc(else_, then_);
break; break;
case IR::Cond::CC: //!c case IR::Cond::CC: //!c
code.sahf(); code.sahf();
code.cmovnc(else_, then_); code.cmovnc(else_, then_);
break; break;
case IR::Cond::MI: //n case IR::Cond::MI: //n
code.sahf(); code.sahf();
code.cmovs(else_, then_); code.cmovs(else_, then_);
break; break;
case IR::Cond::PL: //!n case IR::Cond::PL: //!n
code.sahf(); code.sahf();
code.cmovns(else_, then_); code.cmovns(else_, then_);
break; break;
case IR::Cond::VS: //v case IR::Cond::VS: //v
code.cmp(nzcv.cvt8(), 0x81); code.cmp(nzcv.cvt8(), 0x81);
code.cmovo(else_, then_); code.cmovo(else_, then_);
break; break;
case IR::Cond::VC: //!v case IR::Cond::VC: //!v
code.cmp(nzcv.cvt8(), 0x81); code.cmp(nzcv.cvt8(), 0x81);
code.cmovno(else_, then_); code.cmovno(else_, then_);
break; break;
case IR::Cond::HI: //c & !z case IR::Cond::HI: //c & !z
code.sahf(); code.sahf();
code.cmc(); code.cmc();
code.cmova(else_, then_); code.cmova(else_, then_);
break; break;
case IR::Cond::LS: //!c | z case IR::Cond::LS: //!c | z
code.sahf(); code.sahf();
code.cmc(); code.cmc();
code.cmovna(else_, then_); code.cmovna(else_, then_);
break; break;
case IR::Cond::GE: // n == v case IR::Cond::GE: // n == v
code.cmp(nzcv.cvt8(), 0x81); code.cmp(nzcv.cvt8(), 0x81);
code.sahf(); code.sahf();
code.cmovge(else_, then_); code.cmovge(else_, then_);
break; break;
case IR::Cond::LT: // n != v case IR::Cond::LT: // n != v
code.cmp(nzcv.cvt8(), 0x81); code.cmp(nzcv.cvt8(), 0x81);
code.sahf(); code.sahf();
code.cmovl(else_, then_); code.cmovl(else_, then_);
break; break;
case IR::Cond::GT: // !z & (n == v) case IR::Cond::GT: // !z & (n == v)
code.cmp(nzcv.cvt8(), 0x81); code.cmp(nzcv.cvt8(), 0x81);
code.sahf(); code.sahf();
code.cmovg(else_, then_); code.cmovg(else_, then_);
break; break;
case IR::Cond::LE: // z | (n != v) case IR::Cond::LE: // z | (n != v)
code.cmp(nzcv.cvt8(), 0x81); code.cmp(nzcv.cvt8(), 0x81);
code.sahf(); code.sahf();
code.cmovle(else_, then_); code.cmovle(else_, then_);
@ -814,7 +814,7 @@ void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename ShfitFT, typename BMI2FT> template<typename ShfitFT, typename BMI2FT>
static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) { static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0]; auto& operand_arg = args[0];
@ -851,7 +851,7 @@ static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename ShfitFT, typename BMI2FT> template<typename ShfitFT, typename BMI2FT>
static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) { static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0]; auto& operand_arg = args[0];
@ -889,35 +889,43 @@ static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
} }
void EmitX64::EmitLogicalShiftLeftMasked32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitLogicalShiftLeftMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx); EmitMaskedShift32(
code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx);
} }
void EmitX64::EmitLogicalShiftLeftMasked64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitLogicalShiftLeftMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx); EmitMaskedShift64(
code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx);
} }
void EmitX64::EmitLogicalShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitLogicalShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx); EmitMaskedShift32(
code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx);
} }
void EmitX64::EmitLogicalShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitLogicalShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx); EmitMaskedShift64(
code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx);
} }
void EmitX64::EmitArithmeticShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitArithmeticShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx); EmitMaskedShift32(
code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx);
} }
void EmitX64::EmitArithmeticShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitArithmeticShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx); EmitMaskedShift64(
code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx);
} }
void EmitX64::EmitRotateRightMasked32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitRotateRightMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr); EmitMaskedShift32(
code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr);
} }
void EmitX64::EmitRotateRightMasked64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitRotateRightMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr); EmitMaskedShift64(
code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr);
} }
static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) { static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) {
@ -1132,25 +1140,25 @@ void EmitX64::EmitMul64(EmitContext& ctx, IR::Inst* inst) {
} }
void EmitX64::EmitUnsignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitUnsignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX); ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX); ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX);
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
code.mul(*op_arg); code.mul(*op_arg);
ctx.reg_alloc.DefineValue(inst, rdx); ctx.reg_alloc.DefineValue(inst, rdx);
} }
void EmitX64::EmitSignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitSignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX); ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX); ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX);
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
code.imul(*op_arg); code.imul(*op_arg);
ctx.reg_alloc.DefineValue(inst, rdx); ctx.reg_alloc.DefineValue(inst, rdx);
} }
void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) {
@ -1441,7 +1449,7 @@ void EmitX64::EmitZeroExtendHalfToLong(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitZeroExtendWordToLong(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitZeroExtendWordToLong(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]);
code.mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move code.mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -1505,27 +1513,27 @@ void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) {
} }
void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::LZCNT)) { if (code.HasHostFeature(HostFeature::LZCNT)) {
const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]).cvt64(); const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]).cvt64();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
code.lzcnt(result, source); code.lzcnt(result, source);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} else { } else {
const Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64(); const Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
// The result of a bsr of zero is undefined, but zf is set after it. // The result of a bsr of zero is undefined, but zf is set after it.
code.bsr(result, source); code.bsr(result, source);
code.mov(source.cvt32(), 0xFFFFFFFF); code.mov(source.cvt32(), 0xFFFFFFFF);
code.cmovz(result.cvt32(), source.cvt32()); code.cmovz(result.cvt32(), source.cvt32());
code.neg(result.cvt32()); code.neg(result.cvt32());
code.add(result.cvt32(), 63); code.add(result.cvt32(), 63);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
} }
void EmitX64::EmitMaxSigned32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitMaxSigned32(EmitContext& ctx, IR::Inst* inst) {
@ -1624,4 +1632,4 @@ void EmitX64::EmitMinUnsigned64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, y); ctx.reg_alloc.DefineValue(inst, y);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -51,32 +51,32 @@ constexpr u64 f64_nan = 0x7ff8000000000000u;
constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu; constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
constexpr u64 f64_smallest_normal = 0x0010000000000000u; constexpr u64 f64_smallest_normal = 0x0010000000000000u;
constexpr u64 f64_min_s16 = 0xc0e0000000000000u; // -32768 as a double constexpr u64 f64_min_s16 = 0xc0e0000000000000u; // -32768 as a double
constexpr u64 f64_max_s16 = 0x40dfffc000000000u; // 32767 as a double constexpr u64 f64_max_s16 = 0x40dfffc000000000u; // 32767 as a double
constexpr u64 f64_min_u16 = 0x0000000000000000u; // 0 as a double constexpr u64 f64_min_u16 = 0x0000000000000000u; // 0 as a double
constexpr u64 f64_max_u16 = 0x40efffe000000000u; // 65535 as a double constexpr u64 f64_max_u16 = 0x40efffe000000000u; // 65535 as a double
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
constexpr u64 f64_max_s64_lim = 0x43e0000000000000u; // 2^63 as a double (actual maximum unrepresentable) constexpr u64 f64_max_s64_lim = 0x43e0000000000000u; // 2^63 as a double (actual maximum unrepresentable)
constexpr u64 f64_min_u64 = 0x0000000000000000u; // 0 as a double constexpr u64 f64_min_u64 = 0x0000000000000000u; // 0 as a double
constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual maximum unrepresentable) constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual maximum unrepresentable)
#define FCODE(NAME) \ #define FCODE(NAME) \
[&code](auto... args){ \ [&code](auto... args) { \
if constexpr (fsize == 32) { \ if constexpr (fsize == 32) { \
code.NAME##s(args...); \ code.NAME##s(args...); \
} else { \ } else { \
code.NAME##d(args...); \ code.NAME##d(args...); \
} \ } \
} }
#define ICODE(NAME) \ #define ICODE(NAME) \
[&code](auto... args){ \ [&code](auto... args) { \
if constexpr (fsize == 32) { \ if constexpr (fsize == 32) { \
code.NAME##d(args...); \ code.NAME##d(args...); \
} else { \ } else { \
code.NAME##q(args...); \ code.NAME##q(args...); \
} \ } \
} }
std::optional<int> ConvertRoundingModeToX64Immediate(FP::RoundingMode rounding_mode) { std::optional<int> ConvertRoundingModeToX64Immediate(FP::RoundingMode rounding_mode) {
@ -117,7 +117,7 @@ void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list
template<size_t fsize> template<size_t fsize>
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
code.xorps(xmm_scratch, xmm_scratch); code.xorps(xmm_scratch, xmm_scratch);
FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN) FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
code.pand(xmm_value, xmm_scratch); code.pand(xmm_value, xmm_scratch);
} }
@ -248,7 +248,7 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
} }
template <size_t fsize, typename Function> template<size_t fsize, typename Function>
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -276,7 +276,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <size_t fsize, typename Function> template<size_t fsize, typename Function>
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;
@ -331,7 +331,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
} // anonymous namespace } // anonymous namespace
void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -459,7 +459,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm op1 = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm op1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm op2 = ctx.reg_alloc.UseScratchXmm(args[1]); // Result stored here! const Xbyak::Xmm op2 = ctx.reg_alloc.UseScratchXmm(args[1]); // Result stored here!
Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr(); Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr();
tmp.setBit(fsize); tmp.setBit(fsize);
@ -793,7 +793,7 @@ void EmitX64::EmitFPRecipEstimate64(EmitContext& ctx, IR::Inst* inst) {
EmitFPRecipEstimate<64>(code, ctx, inst); EmitFPRecipEstimate<64>(code, ctx, inst);
} }
template <size_t fsize> template<size_t fsize>
static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;
@ -930,8 +930,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>, mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>, mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsZero>, mp::lift_value<FP::RoundingMode::TowardsZero>,
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero> mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
>;
using exact_list = mp::list<std::true_type, std::false_type>; using exact_list = mp::list<std::true_type, std::false_type>;
static const auto lut = Common::GenerateLookupTableFromList( static const auto lut = Common::GenerateLookupTableFromList(
@ -947,12 +946,9 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
using InputSize = mp::unsigned_integer_of_size<fsize>; using InputSize = mp::unsigned_integer_of_size<fsize>;
return FP::FPRoundInt<InputSize>(static_cast<InputSize>(input), fpcr, rounding_mode, exact, fpsr); return FP::FPRoundInt<InputSize>(static_cast<InputSize>(input), fpcr, rounding_mode, exact, fpsr);
} })};
)
};
}, },
mp::cartesian_product<fsize_list, rounding_list, exact_list>{} mp::cartesian_product<fsize_list, rounding_list, exact_list>{});
);
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
@ -1267,7 +1263,7 @@ void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
} }
static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) { static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
ctx.reg_alloc.ScratchGpr(HostLoc::RCX); // shifting requires use of cl ctx.reg_alloc.ScratchGpr(HostLoc::RCX); // shifting requires use of cl
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(); const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr();
// x64 flags ARM flags // x64 flags ARM flags
@ -1287,7 +1283,7 @@ static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
code.mov(nzcv, 0x0101'4100'8000'0100); code.mov(nzcv, 0x0101'4100'8000'0100);
code.sete(cl); code.sete(cl);
code.rcl(cl, 5); // cl = ZF:CF:0000 code.rcl(cl, 5); // cl = ZF:CF:0000
code.shr(nzcv, cl); code.shr(nzcv, cl);
return nzcv; return nzcv;
@ -1467,7 +1463,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if constexpr (fsize != 16) { if constexpr (fsize != 16) {
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
if (code.HasHostFeature(HostFeature::SSE41) && round_imm){ if (code.HasHostFeature(HostFeature::SSE41) && round_imm) {
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
@ -1512,7 +1508,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.jmp(end); code.jmp(end);
code.L(below_max); code.L(below_max);
} }
code.cvttsd2si(result, src); // 64 bit gpr code.cvttsd2si(result, src); // 64 bit gpr
code.L(end); code.L(end);
code.SwitchToFarCode(); code.SwitchToFarCode();
@ -1524,14 +1520,14 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32)); code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32));
if (unsigned_) { if (unsigned_) {
code.maxsd(src, code.MConst(xword, f64_min_u32)); code.maxsd(src, code.MConst(xword, f64_min_u32));
code.cvttsd2si(result, src); // 64 bit gpr code.cvttsd2si(result, src); // 64 bit gpr
} else { } else {
code.cvttsd2si(result.cvt32(), src); code.cvttsd2si(result.cvt32(), src);
} }
} else { } else {
code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u16 : f64_max_s16)); code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u16 : f64_max_s16));
code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u16 : f64_min_s16)); code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u16 : f64_min_s16));
code.cvttsd2si(result, src); // 64 bit gpr code.cvttsd2si(result, src); // 64 bit gpr
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1546,8 +1542,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>, mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>, mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsZero>, mp::lift_value<FP::RoundingMode::TowardsZero>,
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero> mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
>;
static const auto lut = Common::GenerateLookupTableFromList( static const auto lut = Common::GenerateLookupTableFromList(
[](auto args) { [](auto args) {
@ -1561,12 +1556,9 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;
return FP::FPToFixed<FPT>(isize, static_cast<FPT>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr); return FP::FPToFixed<FPT>(isize, static_cast<FPT>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
} })};
)
};
}, },
mp::cartesian_product<fbits_list, rounding_list>{} mp::cartesian_product<fbits_list, rounding_list>{});
);
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
@ -1653,7 +1645,7 @@ void EmitX64::EmitFPFixedS16ToSingle(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const size_t fbits = args[1].GetImmediateU8(); const size_t fbits = args[1].GetImmediateU8();
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
code.movsx(tmp, from); code.movsx(tmp, from);
code.cvtsi2ss(result, tmp); code.cvtsi2ss(result, tmp);
@ -1673,7 +1665,7 @@ void EmitX64::EmitFPFixedU16ToSingle(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const size_t fbits = args[1].GetImmediateU8(); const size_t fbits = args[1].GetImmediateU8();
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
code.movzx(tmp, from); code.movzx(tmp, from);
code.cvtsi2ss(result, tmp); code.cvtsi2ss(result, tmp);
@ -1718,14 +1710,14 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
const size_t fbits = args[1].GetImmediateU8(); const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const auto op = [&]{ const auto op = [&] {
if (code.HasHostFeature(HostFeature::AVX512F)) { if (code.HasHostFeature(HostFeature::AVX512F)) {
const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]);
code.vcvtusi2ss(result, result, from.cvt32()); code.vcvtusi2ss(result, result, from.cvt32());
} else { } else {
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]); const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]);
code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
code.cvtsi2ss(result, from); code.cvtsi2ss(result, from);
} }
}; };
@ -1754,7 +1746,7 @@ void EmitX64::EmitFPFixedS16ToDouble(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const size_t fbits = args[1].GetImmediateU8(); const size_t fbits = args[1].GetImmediateU8();
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
code.movsx(tmp, from); code.movsx(tmp, from);
code.cvtsi2sd(result, tmp); code.cvtsi2sd(result, tmp);
@ -1774,7 +1766,7 @@ void EmitX64::EmitFPFixedU16ToDouble(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const size_t fbits = args[1].GetImmediateU8(); const size_t fbits = args[1].GetImmediateU8();
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
code.movzx(tmp, from); code.movzx(tmp, from);
code.cvtsi2sd(result, tmp); code.cvtsi2sd(result, tmp);
@ -1793,7 +1785,7 @@ void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32(); const Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const size_t fbits = args[1].GetImmediateU8(); const size_t fbits = args[1].GetImmediateU8();
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
code.cvtsi2sd(result, from); code.cvtsi2sd(result, from);
@ -1810,7 +1802,7 @@ void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm();
const size_t fbits = args[1].GetImmediateU8(); const size_t fbits = args[1].GetImmediateU8();
[[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
if (code.HasHostFeature(HostFeature::AVX512F)) { if (code.HasHostFeature(HostFeature::AVX512F)) {
const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]);
@ -1818,7 +1810,7 @@ void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
} else { } else {
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]); const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]);
code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
code.cvtsi2sd(to, from); code.cvtsi2sd(to, from);
} }
@ -1943,4 +1935,4 @@ void EmitX64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -98,7 +98,7 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
code.movdqa(tmp_b, xmm_b); code.movdqa(tmp_b, xmm_b);
code.paddw(tmp_a, code.MConst(xword, 0x80008000)); code.paddw(tmp_a, code.MConst(xword, 0x80008000));
code.paddw(tmp_b, code.MConst(xword, 0x80008000)); code.paddw(tmp_b, code.MConst(xword, 0x80008000));
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison! code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
ctx.reg_alloc.DefineValue(ge_inst, tmp_b); ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
ctx.EraseInstruction(ge_inst); ctx.EraseInstruction(ge_inst);
@ -205,7 +205,7 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code.movdqa(xmm_ge, xmm_a); code.movdqa(xmm_ge, xmm_a);
code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1 code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
code.pcmpeqw(xmm_ge, xmm_a); code.pcmpeqw(xmm_ge, xmm_a);
code.psubw(xmm_a, xmm_b); code.psubw(xmm_a, xmm_b);
@ -226,7 +226,7 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
code.paddw(xmm_a, code.MConst(xword, 0x80008000)); code.paddw(xmm_a, code.MConst(xword, 0x80008000));
code.paddw(xmm_b, code.MConst(xword, 0x80008000)); code.paddw(xmm_b, code.MConst(xword, 0x80008000));
code.movdqa(xmm_ge, xmm_b); code.movdqa(xmm_ge, xmm_b);
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison! code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
code.pxor(xmm_ge, ones); code.pxor(xmm_ge, ones);
code.psubw(xmm_a, xmm_b); code.psubw(xmm_a, xmm_b);
@ -709,4 +709,4 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
} }
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -106,7 +106,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
ctx.reg_alloc.DefineValue(inst, addend); ctx.reg_alloc.DefineValue(inst, addend);
} }
} // anonymous namespace } // anonymous namespace
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst); EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
@ -312,4 +312,4 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -17,4 +17,4 @@ void EmitX64::EmitSM4AccessSubstitutionBox(EmitContext& ctx, IR::Inst* inst) {
code.CallFunction(&Common::Crypto::SM4::AccessSubstitutionBox); code.CallFunction(&Common::Crypto::SM4::AccessSubstitutionBox);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -25,7 +25,7 @@ namespace Dynarmic::Backend::X64 {
using namespace Xbyak::util; using namespace Xbyak::util;
template <typename Function> template<typename Function>
static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -37,7 +37,7 @@ static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
ctx.reg_alloc.DefineValue(inst, xmm_a); ctx.reg_alloc.DefineValue(inst, xmm_a);
} }
template <typename Function> template<typename Function>
static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -49,7 +49,7 @@ static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst
ctx.reg_alloc.DefineValue(inst, xmm_a); ctx.reg_alloc.DefineValue(inst, xmm_a);
} }
template <typename Lambda> template<typename Lambda>
static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda); const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
constexpr u32 stack_space = 2 * 16; constexpr u32 stack_space = 2 * 16;
@ -72,7 +72,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename Lambda> template<typename Lambda>
static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda); const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
constexpr u32 stack_space = 2 * 16; constexpr u32 stack_space = 2 * 16;
@ -97,7 +97,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename Lambda> template<typename Lambda>
static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda); const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
constexpr u32 stack_space = 3 * 16; constexpr u32 stack_space = 3 * 16;
@ -125,7 +125,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename Lambda> template<typename Lambda>
static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda); const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
constexpr u32 stack_space = 3 * 16; constexpr u32 stack_space = 3 * 16;
@ -168,7 +168,7 @@ void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) {
if (index % 2 == 1) { if (index % 2 == 1) {
code.shr(dest, 8); code.shr(dest, 8);
} else { } else {
code.and_(dest, 0xFF); // TODO: Remove when zext handling is corrected code.and_(dest, 0xFF); // TODO: Remove when zext handling is corrected
} }
} }
@ -441,8 +441,8 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) {
static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) { static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) { if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
const u64 shift_matrix = shift_amount < 8 const u64 shift_matrix = shift_amount < 8
? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8)) ? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8))
: 0x8080808080808080; : 0x8080808080808080;
code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0); code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
return; return;
} }
@ -513,7 +513,7 @@ void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst)
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename T> template<typename T>
static constexpr T VShift(T x, T y) { static constexpr T VShift(T x, T y) {
const s8 shift_amount = static_cast<s8>(static_cast<u8>(y)); const s8 shift_amount = static_cast<s8>(static_cast<u8>(y));
const s64 bit_size = static_cast<s64>(Common::BitSize<T>()); const s64 bit_size = static_cast<s64>(Common::BitSize<T>());
@ -740,7 +740,7 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, a); ctx.reg_alloc.DefineValue(inst, a);
} }
template <typename T> template<typename T>
static void EmitVectorCountLeadingZeros(VectorArray<T>& result, const VectorArray<T>& data) { static void EmitVectorCountLeadingZeros(VectorArray<T>& result, const VectorArray<T>& data) {
for (size_t i = 0; i < result.size(); i++) { for (size_t i = 0; i < result.size(); i++) {
T element = data[i]; T element = data[i];
@ -1875,7 +1875,7 @@ void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s64>& result, const VectorArray<s64>& a, const VectorArray<s64>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s64>& result, const VectorArray<s64>& a, const VectorArray<s64>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
}); });
} }
@ -1955,7 +1955,7 @@ void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u64>& a, const VectorArray<u64>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u64>& a, const VectorArray<u64>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
}); });
} }
@ -2184,7 +2184,7 @@ void EmitX64::EmitVectorPairedAddLower16(EmitContext& ctx, IR::Inst* inst) {
code.paddd(xmm_a, tmp); code.paddd(xmm_a, tmp);
code.pxor(tmp, tmp); code.pxor(tmp, tmp);
code.psrad(xmm_a, 16); code.psrad(xmm_a, 16);
code.packssdw(xmm_a, tmp); // Note: packusdw is SSE4.1, hence the arithmetic shift above. code.packssdw(xmm_a, tmp); // Note: packusdw is SSE4.1, hence the arithmetic shift above.
} }
ctx.reg_alloc.DefineValue(inst, xmm_a); ctx.reg_alloc.DefineValue(inst, xmm_a);
@ -2413,7 +2413,7 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* ins
ctx.reg_alloc.DefineValue(inst, a); ctx.reg_alloc.DefineValue(inst, a);
} }
template <typename T, typename Function> template<typename T, typename Function>
static void PairedOperation(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y, Function fn) { static void PairedOperation(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y, Function fn) {
const size_t range = x.size() / 2; const size_t range = x.size() / 2;
@ -2426,12 +2426,12 @@ static void PairedOperation(VectorArray<T>& result, const VectorArray<T>& x, con
} }
} }
template <typename T> template<typename T>
static void PairedMax(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) { static void PairedMax(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) {
PairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); }); PairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); });
} }
template <typename T> template<typename T>
static void PairedMin(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) { static void PairedMin(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) {
PairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); }); PairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); });
} }
@ -2606,7 +2606,7 @@ void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) {
} }
} }
template <typename D, typename T> template<typename D, typename T>
static D PolynomialMultiply(T lhs, T rhs) { static D PolynomialMultiply(T lhs, T rhs) {
constexpr size_t bit_size = Common::BitSize<T>(); constexpr size_t bit_size = Common::BitSize<T>();
const std::bitset<bit_size> operand(lhs); const std::bitset<bit_size> operand(lhs);
@ -2762,8 +2762,8 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
code.movdqa(high_a, low_a); code.movdqa(high_a, low_a);
code.psrlw(high_a, 4); code.psrlw(high_a, 4);
code.movdqa(tmp1, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F)); code.movdqa(tmp1, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F));
code.pand(high_a, tmp1); // High nibbles code.pand(high_a, tmp1); // High nibbles
code.pand(low_a, tmp1); // Low nibbles code.pand(low_a, tmp1); // Low nibbles
code.movdqa(tmp1, code.MConst(xword, 0x0302020102010100, 0x0403030203020201)); code.movdqa(tmp1, code.MConst(xword, 0x0302020102010100, 0x0403030203020201));
code.movdqa(tmp2, tmp1); code.movdqa(tmp2, tmp1);
@ -2930,7 +2930,7 @@ void EmitX64::EmitVectorRoundingHalvingAddU32(EmitContext& ctx, IR::Inst* inst)
EmitVectorRoundingHalvingAddUnsigned(32, ctx, inst, code); EmitVectorRoundingHalvingAddUnsigned(32, ctx, inst, code);
} }
template <typename T, typename U> template<typename T, typename U>
static void RoundingShiftLeft(VectorArray<T>& out, const VectorArray<T>& lhs, const VectorArray<U>& rhs) { static void RoundingShiftLeft(VectorArray<T>& out, const VectorArray<T>& lhs, const VectorArray<U>& rhs) {
using signed_type = std::make_signed_t<T>; using signed_type = std::make_signed_t<T>;
using unsigned_type = std::make_unsigned_t<T>; using unsigned_type = std::make_unsigned_t<T>;
@ -2947,8 +2947,7 @@ static void RoundingShiftLeft(VectorArray<T>& out, const VectorArray<T>& lhs, co
out[i] = static_cast<T>(static_cast<unsigned_type>(lhs[i]) << extended_shift); out[i] = static_cast<T>(static_cast<unsigned_type>(lhs[i]) << extended_shift);
} }
} else { } else {
if ((std::is_unsigned_v<T> && extended_shift < -bit_size) || if ((std::is_unsigned_v<T> && extended_shift < -bit_size) || (std::is_signed_v<T> && extended_shift <= -bit_size)) {
(std::is_signed_v<T> && extended_shift <= -bit_size)) {
out[i] = 0; out[i] = 0;
} else { } else {
const s64 shift_value = -extended_shift - 1; const s64 shift_value = -extended_shift - 1;
@ -3350,7 +3349,6 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo
ctx.reg_alloc.DefineValue(inst, data); ctx.reg_alloc.DefineValue(inst, data);
} }
void EmitX64::EmitVectorSignedSaturatedAbs8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignedSaturatedAbs8(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturatedAbs(8, code, ctx, inst); EmitVectorSignedSaturatedAbs(8, code, ctx, inst);
} }
@ -3869,7 +3867,7 @@ static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, Blo
break; break;
case 32: case 32:
ASSERT(code.HasHostFeature(HostFeature::SSE41)); ASSERT(code.HasHostFeature(HostFeature::SSE41));
code.packusdw(dest, dest); // SSE4.1 code.packusdw(dest, dest); // SSE4.1
code.movdqa(reconstructed, dest); code.movdqa(reconstructed, dest);
code.punpcklwd(reconstructed, zero); code.punpcklwd(reconstructed, zero);
break; break;
@ -4024,10 +4022,10 @@ void EmitX64::EmitVectorSignedSaturatedNeg64(EmitContext& ctx, IR::Inst* inst) {
// MSVC requires the capture within the saturate lambda, but it's // MSVC requires the capture within the saturate lambda, but it's
// determined to be unnecessary via clang and GCC. // determined to be unnecessary via clang and GCC.
#ifdef __clang__ #ifdef __clang__
#pragma clang diagnostic push # pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-lambda-capture" # pragma clang diagnostic ignored "-Wunused-lambda-capture"
#endif #endif
template <typename T, typename U = std::make_unsigned_t<T>> template<typename T, typename U = std::make_unsigned_t<T>>
static bool VectorSignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) { static bool VectorSignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
static_assert(std::is_signed_v<T>, "T must be signed."); static_assert(std::is_signed_v<T>, "T must be signed.");
@ -4066,7 +4064,7 @@ static bool VectorSignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArra
return qc_flag; return qc_flag;
} }
#ifdef __clang__ #ifdef __clang__
#pragma clang diagnostic pop # pragma clang diagnostic pop
#endif #endif
void EmitX64::EmitVectorSignedSaturatedShiftLeft8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignedSaturatedShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
@ -4085,7 +4083,7 @@ void EmitX64::EmitVectorSignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* i
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft<s64>); EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft<s64>);
} }
template <typename T, typename U = std::make_unsigned_t<T>> template<typename T, typename U = std::make_unsigned_t<T>>
static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) { static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
static_assert(std::is_signed_v<T>, "T must be signed."); static_assert(std::is_signed_v<T>, "T must be signed.");
@ -4166,7 +4164,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst()); auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); }); const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem) { return !elem.IsVoid(); });
const bool is_defaults_zero = inst->GetArg(0).IsZero(); const bool is_defaults_zero = inst->GetArg(0).IsZero();
// TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B) // TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B)
@ -4318,8 +4316,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
result[i] = table[index][elem]; result[i] = table[index][elem];
} }
} }
} });
);
code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]); code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
@ -4333,7 +4330,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst()); auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); }); const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem) { return !elem.IsVoid(); });
const bool is_defaults_zero = !inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetInst()->GetOpcode() == IR::Opcode::ZeroVector; const bool is_defaults_zero = !inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetInst()->GetOpcode() == IR::Opcode::ZeroVector;
// TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B) // TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B)
@ -4448,8 +4445,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
result[i] = table[index][elem]; result[i] = table[index][elem];
} }
} }
} });
);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
@ -4732,7 +4728,7 @@ void EmitX64::EmitVectorUnsignedRecipSqrtEstimate(EmitContext& ctx, IR::Inst* in
// Simple generic case for 8, 16, and 32-bit values. 64-bit values // Simple generic case for 8, 16, and 32-bit values. 64-bit values
// will need to be special-cased as we can't simply use a larger integral size. // will need to be special-cased as we can't simply use a larger integral size.
template <typename T, typename U = std::make_unsigned_t<T>> template<typename T, typename U = std::make_unsigned_t<T>>
static bool EmitVectorUnsignedSaturatedAccumulateSigned(VectorArray<U>& result, const VectorArray<T>& lhs, const VectorArray<T>& rhs) { static bool EmitVectorUnsignedSaturatedAccumulateSigned(VectorArray<U>& result, const VectorArray<T>& lhs, const VectorArray<T>& rhs) {
static_assert(std::is_signed_v<T>, "T must be signed."); static_assert(std::is_signed_v<T>, "T must be signed.");
static_assert(Common::BitSize<T>() < 64, "T must be less than 64 bits in size."); static_assert(Common::BitSize<T>() < 64, "T must be less than 64 bits in size.");
@ -4833,7 +4829,7 @@ void EmitX64::EmitVectorUnsignedSaturatedNarrow64(EmitContext& ctx, IR::Inst* in
}); });
} }
template <typename T, typename S = std::make_signed_t<T>> template<typename T, typename S = std::make_signed_t<T>>
static bool VectorUnsignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) { static bool VectorUnsignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
static_assert(std::is_unsigned_v<T>, "T must be an unsigned type."); static_assert(std::is_unsigned_v<T>, "T must be an unsigned type.");
@ -4937,7 +4933,7 @@ void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
code.movq(a, a); // TODO: !IsLastUse code.movq(a, a); // TODO: !IsLastUse
ctx.reg_alloc.DefineValue(inst, a); ctx.reg_alloc.DefineValue(inst, a);
} }
@ -4948,4 +4944,4 @@ void EmitX64::EmitZeroVector(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, a); ctx.reg_alloc.DefineValue(inst, a);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -36,21 +36,21 @@ using namespace Xbyak::util;
namespace { namespace {
#define FCODE(NAME) \ #define FCODE(NAME) \
[&code](auto... args){ \ [&code](auto... args) { \
if constexpr (fsize == 32) { \ if constexpr (fsize == 32) { \
code.NAME##s(args...); \ code.NAME##s(args...); \
} else { \ } else { \
code.NAME##d(args...); \ code.NAME##d(args...); \
} \ } \
} }
#define ICODE(NAME) \ #define ICODE(NAME) \
[&code](auto... args){ \ [&code](auto... args) { \
if constexpr (fsize == 32) { \ if constexpr (fsize == 32) { \
code.NAME##d(args...); \ code.NAME##d(args...); \
} else { \ } else { \
code.NAME##q(args...); \ code.NAME##q(args...); \
} \ } \
} }
template<typename Lambda> template<typename Lambda>
@ -71,7 +71,7 @@ struct NaNHandler {
public: public:
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;
using function_type = void(*)(std::array<VectorArray<FPT>, narg>&, FP::FPCR); using function_type = void (*)(std::array<VectorArray<FPT>, narg>&, FP::FPCR);
static function_type GetDefault() { static function_type GetDefault() {
return GetDefaultImpl(std::make_index_sequence<narg - 1>{}); return GetDefaultImpl(std::make_index_sequence<narg - 1>{});
@ -294,13 +294,13 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
if constexpr (std::is_member_function_pointer_v<Function>) { if constexpr (std::is_member_function_pointer_v<Function>) {
result = ctx.reg_alloc.UseScratchXmm(args[0]); result = ctx.reg_alloc.UseScratchXmm(args[0]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
(code.*fn)(result); (code.*fn)(result);
}); });
} else { } else {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
result = ctx.reg_alloc.ScratchXmm(); result = ctx.reg_alloc.ScratchXmm();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
fn(result, xmm_a); fn(result, xmm_a);
}); });
} }
@ -337,7 +337,8 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
} }
enum CheckInputNaN { enum CheckInputNaN {
Yes, No, Yes,
No,
}; };
template<size_t fsize, template<typename> class Indexer, typename Function> template<size_t fsize, template<typename> class Indexer, typename Function>
@ -352,11 +353,11 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
if constexpr (std::is_member_function_pointer_v<Function>) { if constexpr (std::is_member_function_pointer_v<Function>) {
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
(code.*fn)(xmm_a, xmm_b); (code.*fn)(xmm_a, xmm_b);
}); });
} else { } else {
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
fn(xmm_a, xmm_b); fn(xmm_a, xmm_b);
}); });
} }
@ -549,7 +550,7 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
} // anonymous namespace } // anonymous namespace
void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -614,7 +615,7 @@ void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpeqps(a, b); code.cmpeqps(a, b);
}); });
@ -628,7 +629,7 @@ void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpeqpd(a, b); code.cmpeqpd(a, b);
}); });
@ -644,7 +645,7 @@ void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
const bool fpcr_controlled = args[3].GetImmediateU1(); const bool fpcr_controlled = args[3].GetImmediateU1();
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode()); ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.cvtdq2ps(xmm, xmm); code.cvtdq2ps(xmm, xmm);
if (fbits != 0) { if (fbits != 0) {
code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23)); code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23));
@ -662,7 +663,7 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
const bool fpcr_controlled = args[3].GetImmediateU1(); const bool fpcr_controlled = args[3].GetImmediateU1();
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode()); ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
code.vcvtqq2pd(xmm, xmm); code.vcvtqq2pd(xmm, xmm);
} else if (code.HasHostFeature(HostFeature::SSE41)) { } else if (code.HasHostFeature(HostFeature::SSE41)) {
@ -713,7 +714,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
const bool fpcr_controlled = args[3].GetImmediateU1(); const bool fpcr_controlled = args[3].GetImmediateU1();
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode()); ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
code.vcvtudq2ps(xmm, xmm); code.vcvtudq2ps(xmm, xmm);
} else { } else {
@ -763,7 +764,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
const bool fpcr_controlled = args[3].GetImmediateU1(); const bool fpcr_controlled = args[3].GetImmediateU1();
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode()); ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
code.vcvtuqq2pd(xmm, xmm); code.vcvtuqq2pd(xmm, xmm);
} else { } else {
@ -828,7 +829,7 @@ void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpltps(b, a); code.cmpltps(b, a);
}); });
@ -842,7 +843,7 @@ void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpltpd(b, a); code.cmpltpd(b, a);
}); });
@ -856,7 +857,7 @@ void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpleps(b, a); code.cmpleps(b, a);
}); });
@ -870,7 +871,7 @@ void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmplepd(b, a); code.cmplepd(b, a);
}); });
@ -891,7 +892,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask); DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
@ -936,49 +937,51 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
return; return;
} }
EmitThreeOpVectorOperation<fsize, DefaultIndexer>(code, ctx, inst, [&](const Xbyak::Xmm& result, Xbyak::Xmm xmm_b){ EmitThreeOpVectorOperation<fsize, DefaultIndexer>(
const Xbyak::Xmm mask = xmm0; code, ctx, inst, [&](const Xbyak::Xmm& result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm mask = xmm0;
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
if (ctx.FPCR(fpcr_controlled).FZ()) { if (ctx.FPCR(fpcr_controlled).FZ()) {
const Xbyak::Xmm prev_xmm_b = xmm_b; const Xbyak::Xmm prev_xmm_b = xmm_b;
xmm_b = ctx.reg_alloc.ScratchXmm(); xmm_b = ctx.reg_alloc.ScratchXmm();
code.movaps(xmm_b, prev_xmm_b); code.movaps(xmm_b, prev_xmm_b);
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask); DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
}
// What we are doing here is handling the case when the inputs are differently signed zeros.
// x86-64 treats differently signed zeros as equal while ARM does not.
// Thus if we AND together things that x86-64 thinks are equal we'll get the positive zero.
if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vcmpeqp)(mask, result, xmm_b);
if constexpr (is_max) {
FCODE(vandp)(eq, result, xmm_b);
FCODE(vmaxp)(result, result, xmm_b);
} else {
FCODE(vorp)(eq, result, xmm_b);
FCODE(vminp)(result, result, xmm_b);
}
FCODE(blendvp)(result, eq);
} else {
code.movaps(mask, result);
code.movaps(eq, result);
FCODE(cmpneqp)(mask, xmm_b);
if constexpr (is_max) {
code.andps(eq, xmm_b);
FCODE(maxp)(result, xmm_b);
} else {
code.orps(eq, xmm_b);
FCODE(minp)(result, xmm_b);
} }
code.andps(result, mask); // What we are doing here is handling the case when the inputs are differently signed zeros.
code.andnps(mask, eq); // x86-64 treats differently signed zeros as equal while ARM does not.
code.orps(result, mask); // Thus if we AND together things that x86-64 thinks are equal we'll get the positive zero.
}
}, CheckInputNaN::Yes); if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vcmpeqp)(mask, result, xmm_b);
if constexpr (is_max) {
FCODE(vandp)(eq, result, xmm_b);
FCODE(vmaxp)(result, result, xmm_b);
} else {
FCODE(vorp)(eq, result, xmm_b);
FCODE(vminp)(result, result, xmm_b);
}
FCODE(blendvp)(result, eq);
} else {
code.movaps(mask, result);
code.movaps(eq, result);
FCODE(cmpneqp)(mask, xmm_b);
if constexpr (is_max) {
code.andps(eq, xmm_b);
FCODE(maxp)(result, xmm_b);
} else {
code.orps(eq, xmm_b);
FCODE(minp)(result, xmm_b);
}
code.andps(result, mask);
code.andnps(mask, eq);
code.orps(result, mask);
}
},
CheckInputNaN::Yes);
} }
void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) {
@ -1024,7 +1027,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
FCODE(vfmadd231p)(result, xmm_b, xmm_c); FCODE(vfmadd231p)(result, xmm_b, xmm_c);
}); });
@ -1044,7 +1047,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
Xbyak::Label end, fallback; Xbyak::Label end, fallback;
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.movaps(result, xmm_a); code.movaps(result, xmm_a);
FCODE(vfmadd231p)(result, xmm_b, xmm_c); FCODE(vfmadd231p)(result, xmm_b, xmm_c);
@ -1113,7 +1116,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
FCODE(vcmpunordp)(xmm0, result, operand); FCODE(vcmpunordp)(xmm0, result, operand);
FCODE(vxorp)(twos, result, operand); FCODE(vxorp)(twos, result, operand);
FCODE(mulp)(result, operand); FCODE(mulp)(result, operand);
@ -1151,8 +1154,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
result[elementi] = sign | FP::FPValue<FPT, false, 0, 2>(); result[elementi] = sign | FP::FPValue<FPT, false, 0, 2>();
} }
} }
} });
);
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler); HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
@ -1287,7 +1289,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code)); code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
FCODE(vfnmadd231p)(result, operand1, operand2); FCODE(vfnmadd231p)(result, operand1, operand2);
}); });
@ -1307,7 +1309,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
Xbyak::Label end, fallback; Xbyak::Label end, fallback;
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code)); code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
FCODE(vfnmadd231p)(result, operand1, operand2); FCODE(vfnmadd231p)(result, operand1, operand2);
@ -1386,7 +1388,7 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
} }
}(); }();
EmitTwoOpVectorOperation<fsize, DefaultIndexer, 3>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){ EmitTwoOpVectorOperation<fsize, DefaultIndexer, 3>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a) {
FCODE(roundp)(result, xmm_a, round_imm); FCODE(roundp)(result, xmm_a, round_imm);
}); });
@ -1399,8 +1401,7 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>, mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>, mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsZero>, mp::lift_value<FP::RoundingMode::TowardsZero>,
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero> mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
>;
using exact_list = mp::list<std::true_type, std::false_type>; using exact_list = mp::list<std::true_type, std::false_type>;
static const auto lut = Common::GenerateLookupTableFromList( static const auto lut = Common::GenerateLookupTableFromList(
@ -1416,12 +1417,9 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
for (size_t i = 0; i < output.size(); ++i) { for (size_t i = 0; i < output.size(); ++i) {
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr)); output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
} }
} })};
)
};
}, },
mp::cartesian_product<rounding_list, exact_list>{} mp::cartesian_product<rounding_list, exact_list>{});
);
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact))); EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
} }
@ -1501,7 +1499,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code)); code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
FCODE(vfnmadd231p)(result, operand1, operand2); FCODE(vfnmadd231p)(result, operand1, operand2);
FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code)); FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code));
@ -1523,12 +1521,12 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
Xbyak::Label end, fallback; Xbyak::Label end, fallback;
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code)); code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
FCODE(vfnmadd231p)(result, operand1, operand2); FCODE(vfnmadd231p)(result, operand1, operand2);
// An explanation for this is given in EmitFPRSqrtStepFused. // An explanation for this is given in EmitFPRSqrtStepFused.
code.vmovaps(mask, GetVectorOf<fsize, fsize == 32 ? 0x7f000000 : 0x7fe0000000000000>(code)); code.vmovaps(mask, GetVectorOf<fsize, (fsize == 32 ? 0x7f000000 : 0x7fe0000000000000)>(code));
FCODE(vandp)(tmp, result, mask); FCODE(vandp)(tmp, result, mask);
ICODE(vpcmpeq)(tmp, tmp, mask); ICODE(vpcmpeq)(tmp, tmp, mask);
code.ptest(tmp, tmp); code.ptest(tmp, tmp);
@ -1620,9 +1618,8 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
const int round_imm = [&] {
const int round_imm = [&]{
switch (rounding) { switch (rounding) {
case FP::RoundingMode::ToNearest_TieEven: case FP::RoundingMode::ToNearest_TieEven:
default: default:
@ -1659,8 +1656,8 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u64 scale_factor = fsize == 32 const u64 scale_factor = fsize == 32
? static_cast<u64>(fbits + 127) << 23 ? static_cast<u64>(fbits + 127) << 23
: static_cast<u64>(fbits + 1023) << 52; : static_cast<u64>(fbits + 1023) << 52;
FCODE(mulp)(src, GetVectorOf<fsize>(code, scale_factor)); FCODE(mulp)(src, GetVectorOf<fsize>(code, scale_factor));
} }
@ -1702,7 +1699,6 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
perform_conversion(src); perform_conversion(src);
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code)); FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
} }
}); });
ctx.reg_alloc.DefineValue(inst, src); ctx.reg_alloc.DefineValue(inst, src);
@ -1716,8 +1712,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>, mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>, mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsZero>, mp::lift_value<FP::RoundingMode::TowardsZero>,
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero> mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
>;
static const auto lut = Common::GenerateLookupTableFromList( static const auto lut = Common::GenerateLookupTableFromList(
[](auto arg) { [](auto arg) {
@ -1732,12 +1727,9 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
for (size_t i = 0; i < output.size(); ++i) { for (size_t i = 0; i < output.size(); ++i) {
output[i] = static_cast<FPT>(FP::FPToFixed<FPT>(fsize, input[i], fbits, unsigned_, fpcr, rounding_mode, fpsr)); output[i] = static_cast<FPT>(FP::FPToFixed<FPT>(fsize, input[i], fbits, unsigned_, fpcr, rounding_mode, fpsr));
} }
} })};
)
};
}, },
mp::cartesian_product<fbits_list, rounding_list>{} mp::cartesian_product<fbits_list, rounding_list>{});
);
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding))); EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding)));
} }
@ -1766,4 +1758,4 @@ void EmitX64::EmitFPVectorToUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) {
EmitFPVectorToFixed<64, true>(code, ctx, inst); EmitFPVectorToFixed<64, true>(code, ctx, inst);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -131,7 +131,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
} }
} }
} // anonymous namespace } // anonymous namespace
void EmitX64::EmitVectorSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::paddsb, &Xbyak::CodeGenerator::paddb, &Xbyak::CodeGenerator::psubb); EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::paddsb, &Xbyak::CodeGenerator::paddb, &Xbyak::CodeGenerator::psubb);
@ -321,4 +321,4 @@ void EmitX64::EmitVectorUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst)
ctx.reg_alloc.DefineValue(inst, tmp); ctx.reg_alloc.DefineValue(inst, tmp);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -34,4 +34,4 @@ private:
std::unique_ptr<Impl> impl; std::unique_ptr<Impl> impl;
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -25,4 +25,4 @@ void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)>) {
// Do nothing // Do nothing
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -3,8 +3,6 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/exception_handler.h"
#include <mach/mach.h> #include <mach/mach.h>
#include <mach/message.h> #include <mach/message.h>
@ -18,6 +16,7 @@
#include <fmt/format.h> #include <fmt/format.h>
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/exception_handler.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/cast_util.h" #include "dynarmic/common/cast_util.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
@ -36,7 +35,7 @@ struct CodeBlockInfo {
struct MachMessage { struct MachMessage {
mach_msg_header_t head; mach_msg_header_t head;
char data[2048]; ///< Arbitrary size char data[2048]; ///< Arbitrary size
}; };
class MachHandler final { class MachHandler final {
@ -64,7 +63,7 @@ private:
}; };
MachHandler::MachHandler() { MachHandler::MachHandler() {
#define KCHECK(x) ASSERT_MSG((x) == KERN_SUCCESS, "dynarmic: macOS MachHandler: init failure at {}", #x) #define KCHECK(x) ASSERT_MSG((x) == KERN_SUCCESS, "dynarmic: macOS MachHandler: init failure at {}", #x)
KCHECK(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &server_port)); KCHECK(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &server_port));
KCHECK(mach_port_insert_right(mach_task_self(), server_port, server_port, MACH_MSG_TYPE_MAKE_SEND)); KCHECK(mach_port_insert_right(mach_task_self(), server_port, server_port, MACH_MSG_TYPE_MAKE_SEND));
@ -74,7 +73,7 @@ MachHandler::MachHandler() {
mach_port_t prev; mach_port_t prev;
KCHECK(mach_port_request_notification(mach_task_self(), server_port, MACH_NOTIFY_PORT_DESTROYED, 0, server_port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &prev)); KCHECK(mach_port_request_notification(mach_task_self(), server_port, MACH_NOTIFY_PORT_DESTROYED, 0, server_port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &prev));
#undef KCHECK #undef KCHECK
thread = std::thread(&MachHandler::MessagePump, this); thread = std::thread(&MachHandler::MessagePump, this);
} }
@ -102,7 +101,7 @@ void MachHandler::MessagePump() {
} }
mr = mach_msg(&reply.head, MACH_SEND_MSG, reply.head.msgh_size, 0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); mr = mach_msg(&reply.head, MACH_SEND_MSG, reply.head.msgh_size, 0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
if (mr != MACH_MSG_SUCCESS){ if (mr != MACH_MSG_SUCCESS) {
fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to send mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr)); fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to send mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr));
return; return;
} }
@ -146,7 +145,7 @@ void MachHandler::RemoveCodeBlock(u64 rip) {
MachHandler mach_handler; MachHandler mach_handler;
} // anonymous namespace } // anonymous namespace
mig_external kern_return_t catch_mach_exception_raise(mach_port_t, mach_port_t, mach_port_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t) { mig_external kern_return_t catch_mach_exception_raise(mach_port_t, mach_port_t, mach_port_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t) {
fmt::print(stderr, "dynarmic: Unexpected mach message: mach_exception_raise\n"); fmt::print(stderr, "dynarmic: Unexpected mach message: mach_exception_raise\n");
@ -161,14 +160,13 @@ mig_external kern_return_t catch_mach_exception_raise_state_identity(mach_port_t
mig_external kern_return_t catch_mach_exception_raise_state( mig_external kern_return_t catch_mach_exception_raise_state(
mach_port_t /*exception_port*/, mach_port_t /*exception_port*/,
exception_type_t exception, exception_type_t exception,
const mach_exception_data_t /*code*/, // code[0] is as per kern_return.h, code[1] is rip. const mach_exception_data_t /*code*/, // code[0] is as per kern_return.h, code[1] is rip.
mach_msg_type_number_t /*codeCnt*/, mach_msg_type_number_t /*codeCnt*/,
int* flavor, int* flavor,
const thread_state_t old_state, const thread_state_t old_state,
mach_msg_type_number_t old_stateCnt, mach_msg_type_number_t old_stateCnt,
thread_state_t new_state, thread_state_t new_state,
mach_msg_type_number_t* new_stateCnt mach_msg_type_number_t* new_stateCnt) {
) {
if (!flavor || !new_stateCnt) { if (!flavor || !new_stateCnt) {
fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Invalid arguments.\n"); fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Invalid arguments.\n");
return KERN_INVALID_ARGUMENT; return KERN_INVALID_ARGUMENT;
@ -191,9 +189,8 @@ mig_external kern_return_t catch_mach_exception_raise_state(
struct ExceptionHandler::Impl final { struct ExceptionHandler::Impl final {
Impl(BlockOfCode& code) Impl(BlockOfCode& code)
: code_begin(Common::BitCast<u64>(code.getCode())) : code_begin(Common::BitCast<u64>(code.getCode()))
, code_end(code_begin + code.GetTotalCodeSize()) , code_end(code_begin + code.GetTotalCodeSize()) {}
{}
void SetCallback(std::function<FakeCall(u64)> cb) { void SetCallback(std::function<FakeCall(u64)> cb) {
CodeBlockInfo cbi; CodeBlockInfo cbi;
@ -227,4 +224,4 @@ void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
impl->SetCallback(cb); impl->SetCallback(cb);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -5,19 +5,20 @@
#include "dynarmic/backend/x64/exception_handler.h" #include "dynarmic/backend/x64/exception_handler.h"
#ifdef __APPLE__
# include <signal.h>
# include <sys/ucontext.h>
#else
# include <signal.h>
# include <ucontext.h>
#endif
#include <cstring> #include <cstring>
#include <functional> #include <functional>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include <vector> #include <vector>
#include <signal.h>
#ifdef __APPLE__
#include <sys/ucontext.h>
#else
#include <ucontext.h>
#endif
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/cast_util.h" #include "dynarmic/common/cast_util.h"
@ -121,16 +122,16 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
ASSERT(sig == SIGSEGV || sig == SIGBUS); ASSERT(sig == SIGSEGV || sig == SIGBUS);
#if defined(__APPLE__) #if defined(__APPLE__)
#define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rip) # define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rip)
#define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rsp) # define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rsp)
#elif defined(__linux__) #elif defined(__linux__)
#define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RIP]) # define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RIP])
#define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RSP]) # define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RSP])
#elif defined(__FreeBSD__) #elif defined(__FreeBSD__)
#define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.mc_rip) # define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.mc_rip)
#define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.mc_rsp) # define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.mc_rsp)
#else #else
#error "Unknown platform" # error "Unknown platform"
#endif #endif
{ {
@ -152,26 +153,25 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus; struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
if (retry_sa->sa_flags & SA_SIGINFO) { if (retry_sa->sa_flags & SA_SIGINFO) {
retry_sa->sa_sigaction(sig, info, raw_context); retry_sa->sa_sigaction(sig, info, raw_context);
return; return;
} }
if (retry_sa->sa_handler == SIG_DFL) { if (retry_sa->sa_handler == SIG_DFL) {
signal(sig, SIG_DFL); signal(sig, SIG_DFL);
return; return;
} }
if (retry_sa->sa_handler == SIG_IGN) { if (retry_sa->sa_handler == SIG_IGN) {
return; return;
} }
retry_sa->sa_handler(sig); retry_sa->sa_handler(sig);
} }
} // anonymous namespace } // anonymous namespace
struct ExceptionHandler::Impl final { struct ExceptionHandler::Impl final {
Impl(BlockOfCode& code) Impl(BlockOfCode& code)
: code_begin(Common::BitCast<u64>(code.getCode())) : code_begin(Common::BitCast<u64>(code.getCode()))
, code_end(code_begin + code.GetTotalCodeSize()) , code_end(code_begin + code.GetTotalCodeSize()) {}
{}
void SetCallback(std::function<FakeCall(u64)> cb) { void SetCallback(std::function<FakeCall(u64)> cb) {
CodeBlockInfo cbi; CodeBlockInfo cbi;
@ -204,4 +204,4 @@ void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
impl->SetCallback(cb); impl->SetCallback(cb);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -3,12 +3,12 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include <cstring>
#include <vector>
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
#include <cstring>
#include <vector>
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/exception_handler.h" #include "dynarmic/backend/x64/exception_handler.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
@ -187,14 +187,13 @@ struct ExceptionHandler::Impl final {
code.mov(code.ABI_PARAM1, Common::BitCast<u64>(&cb)); code.mov(code.ABI_PARAM1, Common::BitCast<u64>(&cb));
code.mov(code.ABI_PARAM2, code.ABI_PARAM3); code.mov(code.ABI_PARAM2, code.ABI_PARAM3);
code.CallLambda( code.CallLambda(
[](const std::function<FakeCall(u64)>& cb_, PCONTEXT ctx){ [](const std::function<FakeCall(u64)>& cb_, PCONTEXT ctx) {
FakeCall fc = cb_(ctx->Rip); FakeCall fc = cb_(ctx->Rip);
ctx->Rsp -= sizeof(u64); ctx->Rsp -= sizeof(u64);
*Common::BitCast<u64*>(ctx->Rsp) = fc.ret_rip; *Common::BitCast<u64*>(ctx->Rsp) = fc.ret_rip;
ctx->Rip = fc.call_rip; ctx->Rip = fc.call_rip;
} });
);
code.add(code.rsp, 8); code.add(code.rsp, 8);
code.mov(code.eax, static_cast<u32>(ExceptionContinueExecution)); code.mov(code.eax, static_cast<u32>(ExceptionContinueExecution));
code.ret(); code.ret();
@ -208,8 +207,8 @@ struct ExceptionHandler::Impl final {
unwind_info->Flags = UNW_FLAG_EHANDLER; unwind_info->Flags = UNW_FLAG_EHANDLER;
unwind_info->SizeOfProlog = prolog_info.prolog_size; unwind_info->SizeOfProlog = prolog_info.prolog_size;
unwind_info->CountOfCodes = static_cast<UBYTE>(prolog_info.number_of_unwind_code_entries); unwind_info->CountOfCodes = static_cast<UBYTE>(prolog_info.number_of_unwind_code_entries);
unwind_info->FrameRegister = 0; // No frame register present unwind_info->FrameRegister = 0; // No frame register present
unwind_info->FrameOffset = 0; // Unused because FrameRegister == 0 unwind_info->FrameOffset = 0; // Unused because FrameRegister == 0
// UNWIND_INFO::UnwindCode field: // UNWIND_INFO::UnwindCode field:
const size_t size_of_unwind_code = sizeof(UNWIND_CODE) * prolog_info.unwind_code.size(); const size_t size_of_unwind_code = sizeof(UNWIND_CODE) * prolog_info.unwind_code.size();
UNWIND_CODE* unwind_code = static_cast<UNWIND_CODE*>(code.AllocateFromCodeSpace(size_of_unwind_code)); UNWIND_CODE* unwind_code = static_cast<UNWIND_CODE*>(code.AllocateFromCodeSpace(size_of_unwind_code));
@ -259,4 +258,4 @@ void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
impl->SetCallback(cb); impl->SetCallback(cb);
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -3,15 +3,16 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/interface/exclusive_monitor.h"
#include <algorithm> #include <algorithm>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/interface/exclusive_monitor.h"
namespace Dynarmic { namespace Dynarmic {
ExclusiveMonitor::ExclusiveMonitor(size_t processor_count) : ExclusiveMonitor::ExclusiveMonitor(size_t processor_count)
exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) { : exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {
Unlock(); Unlock();
} }
@ -56,4 +57,4 @@ void ExclusiveMonitor::ClearProcessor(size_t processor_id) {
Unlock(); Unlock();
} }
} // namespace Dynarmic } // namespace Dynarmic

View file

@ -10,35 +10,35 @@
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
enum class HostFeature : u64 { enum class HostFeature : u64 {
SSSE3 = 1ULL << 0, SSSE3 = 1ULL << 0,
SSE41 = 1ULL << 1, SSE41 = 1ULL << 1,
SSE42 = 1ULL << 2, SSE42 = 1ULL << 2,
AVX = 1ULL << 3, AVX = 1ULL << 3,
AVX2 = 1ULL << 4, AVX2 = 1ULL << 4,
AVX512F = 1ULL << 5, AVX512F = 1ULL << 5,
AVX512CD = 1ULL << 6, AVX512CD = 1ULL << 6,
AVX512VL = 1ULL << 7, AVX512VL = 1ULL << 7,
AVX512BW = 1ULL << 8, AVX512BW = 1ULL << 8,
AVX512DQ = 1ULL << 9, AVX512DQ = 1ULL << 9,
AVX512BITALG = 1ULL << 10, AVX512BITALG = 1ULL << 10,
PCLMULQDQ = 1ULL << 11, PCLMULQDQ = 1ULL << 11,
F16C = 1ULL << 12, F16C = 1ULL << 12,
FMA = 1ULL << 13, FMA = 1ULL << 13,
AES = 1ULL << 14, AES = 1ULL << 14,
POPCNT = 1ULL << 15, POPCNT = 1ULL << 15,
BMI1 = 1ULL << 16, BMI1 = 1ULL << 16,
BMI2 = 1ULL << 17, BMI2 = 1ULL << 17,
LZCNT = 1ULL << 18, LZCNT = 1ULL << 18,
GFNI = 1ULL << 19, GFNI = 1ULL << 19,
// Zen-based BMI2 // Zen-based BMI2
FastBMI2 = 1ULL << 20, FastBMI2 = 1ULL << 20,
// Orthographic AVX512 features on 128 and 256 vectors // Orthographic AVX512 features on 128 and 256 vectors
AVX512_Ortho = AVX512F | AVX512VL, AVX512_Ortho = AVX512F | AVX512VL,
// Orthographic AVX512 features for both 32-bit and 64-bit floats // Orthographic AVX512 features for both 32-bit and 64-bit floats
AVX512_OrthoFloat = AVX512_Ortho | AVX512DQ, AVX512_OrthoFloat = AVX512_Ortho | AVX512DQ,
}; };
constexpr HostFeature operator~(HostFeature f) { constexpr HostFeature operator~(HostFeature f) {
@ -61,4 +61,4 @@ constexpr HostFeature operator&=(HostFeature& result, HostFeature f) {
return result = (result & f); return result = (result & f);
} }
} } // namespace Dynarmic::Backend::X64

View file

@ -3,10 +3,11 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/hostloc.h"
#include <xbyak.h> #include <xbyak.h>
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/hostloc.h"
#include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/stack_layout.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
@ -21,4 +22,4 @@ Xbyak::Xmm HostLocToXmm(HostLoc loc) {
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0)); return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -13,10 +13,44 @@ namespace Dynarmic::Backend::X64 {
enum class HostLoc { enum class HostLoc {
// Ordering of the registers is intentional. See also: HostLocToX64. // Ordering of the registers is intentional. See also: HostLocToX64.
RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, RAX,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, RCX,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, RDX,
CF, PF, AF, ZF, SF, OF, RBX,
RSP,
RBP,
RSI,
RDI,
R8,
R9,
R10,
R11,
R12,
R13,
R14,
R15,
XMM0,
XMM1,
XMM2,
XMM3,
XMM4,
XMM5,
XMM6,
XMM7,
XMM8,
XMM9,
XMM10,
XMM11,
XMM12,
XMM13,
XMM14,
XMM15,
CF,
PF,
AF,
ZF,
SF,
OF,
FirstSpill, FirstSpill,
}; };
@ -111,4 +145,4 @@ const HostLocList any_xmm = {
Xbyak::Reg64 HostLocToReg64(HostLoc loc); Xbyak::Reg64 HostLocToReg64(HostLoc loc);
Xbyak::Xmm HostLocToXmm(HostLoc loc); Xbyak::Xmm HostLocToXmm(HostLoc loc);
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -10,18 +10,17 @@
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
struct JitStateInfo { struct JitStateInfo {
template <typename JitStateType> template<typename JitStateType>
JitStateInfo(const JitStateType&) JitStateInfo(const JitStateType&)
: offsetof_guest_MXCSR(offsetof(JitStateType, guest_MXCSR)) : offsetof_guest_MXCSR(offsetof(JitStateType, guest_MXCSR))
, offsetof_asimd_MXCSR(offsetof(JitStateType, asimd_MXCSR)) , offsetof_asimd_MXCSR(offsetof(JitStateType, asimd_MXCSR))
, offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr)) , offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr))
, rsb_ptr_mask(JitStateType::RSBPtrMask) , rsb_ptr_mask(JitStateType::RSBPtrMask)
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors)) , offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs)) , offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv)) , offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc)) , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) {}
{}
const size_t offsetof_guest_MXCSR; const size_t offsetof_guest_MXCSR;
const size_t offsetof_asimd_MXCSR; const size_t offsetof_asimd_MXCSR;
@ -34,4 +33,4 @@ struct JitStateInfo {
const size_t offsetof_fpsr_qc; const size_t offsetof_fpsr_qc;
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -5,8 +5,8 @@
#pragma once #pragma once
#include "dynarmic/common/common_types.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/common/common_types.h"
namespace Dynarmic::Backend::X64::NZCV { namespace Dynarmic::Backend::X64::NZCV {
@ -50,4 +50,4 @@ inline u32 FromX64(u32 x64_flags) {
return ((x64_flags & x64_mask) * from_x64_multiplier) & arm_mask; return ((x64_flags & x64_mask) * from_x64_multiplier) & arm_mask;
} }
} // namespace Dynarmic::Backend::X64::NZCV } // namespace Dynarmic::Backend::X64::NZCV

View file

@ -12,9 +12,12 @@
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
struct OpArg { struct OpArg {
OpArg() : type(Type::Operand), inner_operand() {} OpArg()
/* implicit */ OpArg(const Xbyak::Address& address) : type(Type::Address), inner_address(address) {} : type(Type::Operand), inner_operand() {}
/* implicit */ OpArg(const Xbyak::Reg& reg) : type(Type::Reg), inner_reg(reg) {} /* implicit */ OpArg(const Xbyak::Address& address)
: type(Type::Address), inner_address(address) {}
/* implicit */ OpArg(const Xbyak::Reg& reg)
: type(Type::Reg), inner_reg(reg) {}
Xbyak::Operand& operator*() { Xbyak::Operand& operator*() {
switch (type) { switch (type) {
@ -74,4 +77,4 @@ private:
}; };
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -3,22 +3,22 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/perf_map.h"
#include <cstddef> #include <cstddef>
#include <string> #include <string>
#include "dynarmic/backend/x64/perf_map.h"
#ifdef __linux__ #ifdef __linux__
#include <cstdio> # include <cstdio>
#include <cstdlib> # include <cstdlib>
#include <mutex> # include <mutex>
#include <sys/types.h>
#include <unistd.h>
#include <fmt/format.h> # include <fmt/format.h>
# include <sys/types.h>
# include <unistd.h>
#include "dynarmic/common/common_types.h" # include "dynarmic/common/common_types.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
@ -43,7 +43,7 @@ void OpenFile() {
std::setvbuf(file, nullptr, _IONBF, 0); std::setvbuf(file, nullptr, _IONBF, 0);
} }
} // anonymous namespace } // anonymous namespace
namespace detail { namespace detail {
void PerfMapRegister(const void* start, const void* end, std::string_view friendly_name) { void PerfMapRegister(const void* start, const void* end, std::string_view friendly_name) {
@ -64,7 +64,7 @@ void PerfMapRegister(const void* start, const void* end, std::string_view friend
const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name); const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name);
std::fwrite(line.data(), sizeof *line.data(), line.size(), file); std::fwrite(line.data(), sizeof *line.data(), line.size(), file);
} }
} // namespace detail } // namespace detail
void PerfMapClear() { void PerfMapClear() {
std::lock_guard guard{mutex}; std::lock_guard guard{mutex};
@ -78,7 +78,7 @@ void PerfMapClear() {
OpenFile(); OpenFile();
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64
#else #else
@ -86,10 +86,10 @@ namespace Dynarmic::Backend::X64 {
namespace detail { namespace detail {
void PerfMapRegister(const void*, const void*, std::string_view) {} void PerfMapRegister(const void*, const void*, std::string_view) {}
} // namespace detail } // namespace detail
void PerfMapClear() {} void PerfMapClear() {}
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64
#endif #endif

View file

@ -13,7 +13,7 @@ namespace Dynarmic::Backend::X64 {
namespace detail { namespace detail {
void PerfMapRegister(const void* start, const void* end, std::string_view friendly_name); void PerfMapRegister(const void* start, const void* end, std::string_view friendly_name);
} // namespace detail } // namespace detail
template<typename T> template<typename T>
void PerfMapRegister(T start, const void* end, std::string_view friendly_name) { void PerfMapRegister(T start, const void* end, std::string_view friendly_name) {
@ -22,4 +22,4 @@ void PerfMapRegister(T start, const void* end, std::string_view friendly_name) {
void PerfMapClear(); void PerfMapClear();
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -3,6 +3,8 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/reg_alloc.h"
#include <algorithm> #include <algorithm>
#include <numeric> #include <numeric>
#include <utility> #include <utility>
@ -11,19 +13,18 @@
#include <xbyak.h> #include <xbyak.h>
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/reg_alloc.h"
#include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/stack_layout.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
#define MAYBE_AVX(OPCODE, ...) \ #define MAYBE_AVX(OPCODE, ...) \
[&] { \ [&] { \
if (code.HasHostFeature(HostFeature::AVX)) { \ if (code.HasHostFeature(HostFeature::AVX)) { \
code.v##OPCODE(__VA_ARGS__); \ code.v##OPCODE(__VA_ARGS__); \
} else { \ } else { \
code.OPCODE(__VA_ARGS__); \ code.OPCODE(__VA_ARGS__); \
} \ } \
}() }()
static bool CanExchange(HostLoc a, HostLoc b) { static bool CanExchange(HostLoc a, HostLoc b) {
@ -57,7 +58,7 @@ static size_t GetBitWidth(IR::Type type) {
case IR::Type::U128: case IR::Type::U128:
return 128; return 128;
case IR::Type::NZCVFlags: case IR::Type::NZCVFlags:
return 32; // TODO: Update to 16 when flags optimization is done return 32; // TODO: Update to 16 when flags optimization is done
} }
UNREACHABLE(); UNREACHABLE();
} }
@ -225,11 +226,10 @@ bool Argument::IsInMemory() const {
} }
RegAlloc::RegAlloc(BlockOfCode& code, std::vector<HostLoc> gpr_order, std::vector<HostLoc> xmm_order) RegAlloc::RegAlloc(BlockOfCode& code, std::vector<HostLoc> gpr_order, std::vector<HostLoc> xmm_order)
: gpr_order(gpr_order) : gpr_order(gpr_order)
, xmm_order(xmm_order) , xmm_order(xmm_order)
, hostloc_info(NonSpillHostLocCount + SpillCount) , hostloc_info(NonSpillHostLocCount + SpillCount)
, code(code) , code(code) {}
{}
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) { RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}}; ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
@ -382,13 +382,14 @@ HostLoc RegAlloc::ScratchImpl(const std::vector<HostLoc>& desired_locations) {
return location; return location;
} }
void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_reference> arg0, void RegAlloc::HostCall(IR::Inst* result_def,
std::optional<Argument::copyable_reference> arg0,
std::optional<Argument::copyable_reference> arg1, std::optional<Argument::copyable_reference> arg1,
std::optional<Argument::copyable_reference> arg2, std::optional<Argument::copyable_reference> arg2,
std::optional<Argument::copyable_reference> arg3) { std::optional<Argument::copyable_reference> arg3) {
constexpr size_t args_count = 4; constexpr size_t args_count = 4;
constexpr std::array<HostLoc, args_count> args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4 }; constexpr std::array<HostLoc, args_count> args_hostloc = {ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4};
const std::array<std::optional<Argument::copyable_reference>, args_count> args = { arg0, arg1, arg2, arg3 }; const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3};
static const std::vector<HostLoc> other_caller_save = [args_hostloc]() { static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end()); std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
@ -420,7 +421,7 @@ void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_r
code.movzx(reg.cvt32(), reg.cvt16()); code.movzx(reg.cvt32(), reg.cvt16());
break; break;
default: default:
break; // Nothing needs to be done break; // Nothing needs to be done
} }
#endif #endif
} }
@ -717,4 +718,4 @@ Xbyak::Address RegAlloc::SpillToOpArg(HostLoc loc) {
return xword[rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0])]; return xword[rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0])];
} }
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -85,7 +85,8 @@ public:
private: private:
friend class RegAlloc; friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {} explicit Argument(RegAlloc& reg_alloc)
: reg_alloc(reg_alloc) {}
bool allocated = false; bool allocated = false;
RegAlloc& reg_alloc; RegAlloc& reg_alloc;
@ -170,4 +171,4 @@ private:
Xbyak::Address SpillToOpArg(HostLoc loc); Xbyak::Address SpillToOpArg(HostLoc loc);
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -14,8 +14,8 @@ namespace Dynarmic::Backend::X64 {
constexpr size_t SpillCount = 64; constexpr size_t SpillCount = 64;
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) # pragma warning(push)
#pragma warning(disable:4324) // Structure was padded due to alignment specifier # pragma warning(disable : 4324) // Structure was padded due to alignment specifier
#endif #endif
struct alignas(16) StackLayout { struct alignas(16) StackLayout {
@ -31,9 +31,9 @@ struct alignas(16) StackLayout {
}; };
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) # pragma warning(pop)
#endif #endif
static_assert(sizeof(StackLayout) % 16 == 0); static_assert(sizeof(StackLayout) % 16 == 0);
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -3,13 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/assert.h"
#include <cstdio> #include <cstdio>
#include <exception> #include <exception>
#include <fmt/format.h> #include <fmt/format.h>
#include "dynarmic/common/assert.h"
namespace Dynarmic::Common { namespace Dynarmic::Common {
[[noreturn]] void Terminate(fmt::string_view msg, fmt::format_args args) { [[noreturn]] void Terminate(fmt::string_view msg, fmt::format_args args) {
@ -18,4 +18,4 @@ namespace Dynarmic::Common {
std::terminate(); std::terminate();
} }
} // namespace Dynarmic::Common } // namespace Dynarmic::Common

View file

@ -15,57 +15,57 @@ namespace Dynarmic::Common {
namespace detail { namespace detail {
template <typename... Ts> template<typename... Ts>
[[noreturn]] void TerminateHelper(fmt::string_view msg, Ts... args) { [[noreturn]] void TerminateHelper(fmt::string_view msg, Ts... args) {
Terminate(msg, fmt::make_format_args(args...)); Terminate(msg, fmt::make_format_args(args...));
} }
} // namespace detail } // namespace detail
} // namespace Dynarmic::Common } // namespace Dynarmic::Common
#if defined(__clang) || defined(__GNUC__) #if defined(__clang) || defined(__GNUC__)
#define ASSUME(expr) [&]{ if (!(expr)) __builtin_unreachable(); }() # define ASSUME(expr) [&] { if (!(expr)) __builtin_unreachable(); }()
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#define ASSUME(expr) __assume(expr) # define ASSUME(expr) __assume(expr)
#else #else
#define ASSUME(expr) # define ASSUME(expr)
#endif #endif
#ifdef DYNARMIC_IGNORE_ASSERTS #ifdef DYNARMIC_IGNORE_ASSERTS
#if defined(__clang) || defined(__GNUC__) # if defined(__clang) || defined(__GNUC__)
#define UNREACHABLE() __builtin_unreachable() # define UNREACHABLE() __builtin_unreachable()
#elif defined(_MSC_VER) # elif defined(_MSC_VER)
#define UNREACHABLE() __assume(0) # define UNREACHABLE() __assume(0)
#else # else
#define UNREACHABLE() # define UNREACHABLE()
#endif # endif
#define ASSERT(expr) ASSUME(expr) # define ASSERT(expr) ASSUME(expr)
#define ASSERT_MSG(expr, ...) ASSUME(expr) # define ASSERT_MSG(expr, ...) ASSUME(expr)
#define ASSERT_FALSE(...) UNREACHABLE() # define ASSERT_FALSE(...) UNREACHABLE()
#else #else
#define UNREACHABLE() ASSERT_FALSE("Unreachable code!") # define UNREACHABLE() ASSERT_FALSE("Unreachable code!")
#define ASSERT(expr) \ # define ASSERT(expr) \
[&]{ \ [&] { \
if (UNLIKELY(!(expr))) { \ if (UNLIKELY(!(expr))) { \
::Dynarmic::Common::detail::TerminateHelper(#expr); \ ::Dynarmic::Common::detail::TerminateHelper(#expr); \
} \ } \
}() }()
#define ASSERT_MSG(expr, ...) \ # define ASSERT_MSG(expr, ...) \
[&]{ \ [&] { \
if (UNLIKELY(!(expr))) { \ if (UNLIKELY(!(expr))) { \
::Dynarmic::Common::detail::TerminateHelper(#expr "\nMessage: " __VA_ARGS__); \ ::Dynarmic::Common::detail::TerminateHelper(#expr "\nMessage: " __VA_ARGS__); \
} \ } \
}() }()
#define ASSERT_FALSE(...) ::Dynarmic::Common::detail::TerminateHelper("false\nMessage: " __VA_ARGS__) # define ASSERT_FALSE(...) ::Dynarmic::Common::detail::TerminateHelper("false\nMessage: " __VA_ARGS__)
#endif #endif
#if defined(NDEBUG) || defined(DYNARMIC_IGNORE_ASSERTS) #if defined(NDEBUG) || defined(DYNARMIC_IGNORE_ASSERTS)
#define DEBUG_ASSERT(expr) ASSUME(expr) # define DEBUG_ASSERT(expr) ASSUME(expr)
#define DEBUG_ASSERT_MSG(expr, ...) ASSUME(expr) # define DEBUG_ASSERT_MSG(expr, ...) ASSUME(expr)
#else #else
#define DEBUG_ASSERT(expr) ASSERT(expr) # define DEBUG_ASSERT(expr) ASSERT(expr)
#define DEBUG_ASSERT_MSG(expr, ...) ASSERT_MSG(expr, __VA_ARGS__) # define DEBUG_ASSERT_MSG(expr, ...) ASSERT_MSG(expr, __VA_ARGS__)
#endif #endif

View file

@ -21,7 +21,7 @@ constexpr size_t BitSize() {
return sizeof(T) * CHAR_BIT; return sizeof(T) * CHAR_BIT;
} }
template <typename T> template<typename T>
constexpr T Ones(size_t count) { constexpr T Ones(size_t count) {
ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T"); ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
if (count == BitSize<T>()) if (count == BitSize<T>())
@ -72,8 +72,8 @@ constexpr T ModifyBits(const T value, const T new_bits) {
} }
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) # pragma warning(push)
#pragma warning(disable:4554) # pragma warning(disable : 4554)
#endif #endif
/// Extracts a single bit at bit_position from value of type T. /// Extracts a single bit at bit_position from value of type T.
template<typename T> template<typename T>
@ -123,7 +123,7 @@ constexpr T ModifyBit(const T value, bool new_bit) {
return ModifyBit<T>(bit_position, value, new_bit); return ModifyBit<T>(bit_position, value, new_bit);
} }
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) # pragma warning(pop)
#endif #endif
/// Sign-extends a value that has bit_count bits to the full bitwidth of type T. /// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
@ -152,12 +152,12 @@ inline T SignExtend(const size_t bit_count, const T value) {
return value; return value;
} }
template <typename Integral> template<typename Integral>
inline size_t BitCount(Integral value) { inline size_t BitCount(Integral value) {
return std::bitset<BitSize<Integral>()>(value).count(); return std::bitset<BitSize<Integral>()>(value).count();
} }
template <typename T> template<typename T>
constexpr size_t CountLeadingZeros(T value) { constexpr size_t CountLeadingZeros(T value) {
auto x = static_cast<std::make_unsigned_t<T>>(value); auto x = static_cast<std::make_unsigned_t<T>>(value);
size_t result = BitSize<T>(); size_t result = BitSize<T>();
@ -168,7 +168,7 @@ constexpr size_t CountLeadingZeros(T value) {
return result; return result;
} }
template <typename T> template<typename T>
constexpr int HighestSetBit(T value) { constexpr int HighestSetBit(T value) {
auto x = static_cast<std::make_unsigned_t<T>>(value); auto x = static_cast<std::make_unsigned_t<T>>(value);
int result = -1; int result = -1;
@ -179,7 +179,7 @@ constexpr int HighestSetBit(T value) {
return result; return result;
} }
template <typename T> template<typename T>
constexpr size_t LowestSetBit(T value) { constexpr size_t LowestSetBit(T value) {
auto x = static_cast<std::make_unsigned_t<T>>(value); auto x = static_cast<std::make_unsigned_t<T>>(value);
if (x == 0) if (x == 0)
@ -193,12 +193,12 @@ constexpr size_t LowestSetBit(T value) {
return result; return result;
} }
template <typename T> template<typename T>
constexpr bool MostSignificantBit(T value) { constexpr bool MostSignificantBit(T value) {
return Bit<BitSize<T>() - 1, T>(value); return Bit<BitSize<T>() - 1, T>(value);
} }
template <typename T> template<typename T>
inline T Replicate(T value, size_t element_size) { inline T Replicate(T value, size_t element_size) {
ASSERT_MSG(BitSize<T>() % element_size == 0, "bitsize of T not divisible by element_size"); ASSERT_MSG(BitSize<T>() % element_size == 0, "bitsize of T not divisible by element_size");
if (element_size == BitSize<T>()) if (element_size == BitSize<T>())
@ -206,7 +206,7 @@ inline T Replicate(T value, size_t element_size) {
return Replicate(value | (value << element_size), element_size * 2); return Replicate(value | (value << element_size), element_size * 2);
} }
template <typename T> template<typename T>
constexpr T RotateRight(T value, size_t amount) { constexpr T RotateRight(T value, size_t amount) {
amount %= BitSize<T>(); amount %= BitSize<T>();
@ -219,8 +219,8 @@ constexpr T RotateRight(T value, size_t amount) {
} }
constexpr u32 SwapHalves32(u32 value) { constexpr u32 SwapHalves32(u32 value) {
return ((value & 0xFFFF0000U) >> 16) | return ((value & 0xFFFF0000U) >> 16)
((value & 0x0000FFFFU) << 16); | ((value & 0x0000FFFFU) << 16);
} }
constexpr u16 SwapBytes16(u16 value) { constexpr u16 SwapBytes16(u16 value) {
@ -228,21 +228,21 @@ constexpr u16 SwapBytes16(u16 value) {
} }
constexpr u32 SwapBytes32(u32 value) { constexpr u32 SwapBytes32(u32 value) {
return ((value & 0xFF000000U) >> 24) | return ((value & 0xFF000000U) >> 24)
((value & 0x00FF0000U) >> 8) | | ((value & 0x00FF0000U) >> 8)
((value & 0x0000FF00U) << 8) | | ((value & 0x0000FF00U) << 8)
((value & 0x000000FFU) << 24); | ((value & 0x000000FFU) << 24);
} }
constexpr u64 SwapBytes64(u64 value) { constexpr u64 SwapBytes64(u64 value) {
return ((value & 0xFF00000000000000ULL) >> 56) | return ((value & 0xFF00000000000000ULL) >> 56)
((value & 0x00FF000000000000ULL) >> 40) | | ((value & 0x00FF000000000000ULL) >> 40)
((value & 0x0000FF0000000000ULL) >> 24) | | ((value & 0x0000FF0000000000ULL) >> 24)
((value & 0x000000FF00000000ULL) >> 8) | | ((value & 0x000000FF00000000ULL) >> 8)
((value & 0x00000000FF000000ULL) << 8) | | ((value & 0x00000000FF000000ULL) << 8)
((value & 0x0000000000FF0000ULL) << 24) | | ((value & 0x0000000000FF0000ULL) << 24)
((value & 0x000000000000FF00ULL) << 40) | | ((value & 0x000000000000FF00ULL) << 40)
((value & 0x00000000000000FFULL) << 56); | ((value & 0x00000000000000FFULL) << 56);
} }
} // namespace Dynarmic::Common } // namespace Dynarmic::Common

View file

@ -13,7 +13,7 @@
namespace Dynarmic::Common { namespace Dynarmic::Common {
/// Reinterpret objects of one type as another by bit-casting between object representations. /// Reinterpret objects of one type as another by bit-casting between object representations.
template <class Dest, class Source> template<class Dest, class Source>
inline Dest BitCast(const Source& source) noexcept { inline Dest BitCast(const Source& source) noexcept {
static_assert(sizeof(Dest) == sizeof(Source), "size of destination and source objects must be equal"); static_assert(sizeof(Dest) == sizeof(Source), "size of destination and source objects must be equal");
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable."); static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
@ -26,7 +26,7 @@ inline Dest BitCast(const Source& source) noexcept {
/// Reinterpret objects of any arbitrary type as another type by bit-casting between object representations. /// Reinterpret objects of any arbitrary type as another type by bit-casting between object representations.
/// Note that here we do not verify if source has enough bytes to read from. /// Note that here we do not verify if source has enough bytes to read from.
template <class Dest, class SourcePtr> template<class Dest, class SourcePtr>
inline Dest BitCastPointee(const SourcePtr source) noexcept { inline Dest BitCastPointee(const SourcePtr source) noexcept {
static_assert(sizeof(SourcePtr) == sizeof(void*), "source pointer must have size of a pointer"); static_assert(sizeof(SourcePtr) == sizeof(void*), "source pointer must have size of a pointer");
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable."); static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
@ -37,9 +37,9 @@ inline Dest BitCastPointee(const SourcePtr source) noexcept {
} }
/// Cast a lambda into an equivalent function pointer. /// Cast a lambda into an equivalent function pointer.
template <class Function> template<class Function>
inline auto FptrCast(Function f) noexcept { inline auto FptrCast(Function f) noexcept {
return static_cast<mp::equivalent_function_type<Function>*>(f); return static_cast<mp::equivalent_function_type<Function>*>(f);
} }
} // namespace Dynarmic::Common } // namespace Dynarmic::Common

View file

@ -3,56 +3,55 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/crypto/aes.h"
#include <array> #include <array>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/crypto/aes.h"
namespace Dynarmic::Common::Crypto::AES { namespace Dynarmic::Common::Crypto::AES {
using SubstitutionTable = std::array<u8, 256>; using SubstitutionTable = std::array<u8, 256>;
// See section 5.1.1 Figure 7 in FIPS 197 // See section 5.1.1 Figure 7 in FIPS 197
constexpr SubstitutionTable substitution_box{{ constexpr SubstitutionTable substitution_box{
// 0 1 2 3 4 5 6 7 8 9 A B C D E F {// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16}};
}};
// See section 5.3.2 Figure 14 in FIPS 197 // See section 5.3.2 Figure 14 in FIPS 197
constexpr SubstitutionTable inverse_substitution_box{{ constexpr SubstitutionTable inverse_substitution_box{
// 0 1 2 3 4 5 6 7 8 9 A B C D E F {// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB, 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25, 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, 0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06, 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E, 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D}};
}};
// See section 4.2.1 in FIPS 197. // See section 4.2.1 in FIPS 197.
static constexpr u8 xtime(u8 x) { static constexpr u8 xtime(u8 x) {
@ -61,11 +60,11 @@ static constexpr u8 xtime(u8 x) {
// Galois Field multiplication. // Galois Field multiplication.
static constexpr u8 Multiply(u8 x, u8 y) { static constexpr u8 Multiply(u8 x, u8 y) {
return static_cast<u8>(((y & 1) * x) ^ return static_cast<u8>(((y & 1) * x)
((y >> 1 & 1) * xtime(x)) ^ ^ ((y >> 1 & 1) * xtime(x))
((y >> 2 & 1) * xtime(xtime(x))) ^ ^ ((y >> 2 & 1) * xtime(xtime(x)))
((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^ ^ ((y >> 3 & 1) * xtime(xtime(xtime(x))))
((y >> 4 & 1) * xtime(xtime(xtime(xtime(x)))))); ^ ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x))))));
} }
static void ShiftRows(State& out_state, const State& state) { static void ShiftRows(State& out_state, const State& state) {
@ -178,4 +177,4 @@ void InverseMixColumns(State& out_state, const State& state) {
} }
} }
} // namespace Dynarmic::Common::Crypto::AES } // namespace Dynarmic::Common::Crypto::AES

View file

@ -6,6 +6,7 @@
#pragma once #pragma once
#include <array> #include <array>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
namespace Dynarmic::Common::Crypto::AES { namespace Dynarmic::Common::Crypto::AES {
@ -19,4 +20,4 @@ void EncryptSingleRound(State& out_state, const State& state);
void MixColumns(State& out_state, const State& state); void MixColumns(State& out_state, const State& state);
void InverseMixColumns(State& out_state, const State& state); void InverseMixColumns(State& out_state, const State& state);
} // namespace Dynarmic::Common::Crypto::AES } // namespace Dynarmic::Common::Crypto::AES

View file

@ -3,150 +3,149 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/crypto/crc32.h"
#include <array> #include <array>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/crypto/crc32.h"
namespace Dynarmic::Common::Crypto::CRC32 { namespace Dynarmic::Common::Crypto::CRC32 {
using CRC32Table = std::array<u32, 256>; using CRC32Table = std::array<u32, 256>;
// CRC32 algorithm that uses polynomial 0x1EDC6F41 // CRC32 algorithm that uses polynomial 0x1EDC6F41
constexpr CRC32Table castagnoli_table{{ constexpr CRC32Table castagnoli_table{
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, {0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351}};
}};
// CRC32 algorithm that uses polynomial 0x04C11DB7 // CRC32 algorithm that uses polynomial 0x04C11DB7
constexpr CRC32Table iso_table{{ constexpr CRC32Table iso_table{
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, {0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940,
0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116,
0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A,
0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818,
0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C,
0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086,
0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4,
0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE,
0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252,
0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60,
0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04,
0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E,
0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0,
0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6,
0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D}};
}};
static u32 ComputeCRC32(const CRC32Table& table, u32 crc, const u64 value, int length) { static u32 ComputeCRC32(const CRC32Table& table, u32 crc, const u64 value, int length) {
const auto* data = reinterpret_cast<const unsigned char*>(&value); const auto* data = reinterpret_cast<const unsigned char*>(&value);
@ -166,4 +165,4 @@ u32 ComputeCRC32ISO(u32 crc, u64 value, int length) {
return ComputeCRC32(iso_table, crc, value, length); return ComputeCRC32(iso_table, crc, value, length);
} }
} // namespace Dynarmic::Common::Crypto::CRC32 } // namespace Dynarmic::Common::Crypto::CRC32

View file

@ -37,4 +37,4 @@ u32 ComputeCRC32Castagnoli(u32 crc, u64 value, int length);
*/ */
u32 ComputeCRC32ISO(u32 crc, u64 value, int length); u32 ComputeCRC32ISO(u32 crc, u64 value, int length);
} // namespace Dynarmic::Common::Crypto::CRC32 } // namespace Dynarmic::Common::Crypto::CRC32

View file

@ -3,52 +3,52 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/crypto/sm4.h"
#include <array> #include <array>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/crypto/sm4.h"
namespace Dynarmic::Common::Crypto::SM4 { namespace Dynarmic::Common::Crypto::SM4 {
using SubstitutionTable = std::array<u8, 256>; using SubstitutionTable = std::array<u8, 256>;
constexpr SubstitutionTable substitution_box{{ constexpr SubstitutionTable substitution_box{
0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, {0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7,
0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05,
0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3,
0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A,
0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62,
0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95,
0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6,
0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA,
0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8,
0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B,
0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35,
0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2,
0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87,
0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52,
0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E,
0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5,
0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1,
0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55,
0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3,
0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60,
0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F,
0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F,
0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51,
0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F,
0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8,
0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD,
0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0,
0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E,
0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20,
0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48}};
}};
u8 AccessSubstitutionBox(u8 index) { u8 AccessSubstitutionBox(u8 index) {
return substitution_box[index]; return substitution_box[index];
} }
} // namespace Dynarmic::Common::Crypto::SM4 } // namespace Dynarmic::Common::Crypto::SM4

View file

@ -11,4 +11,4 @@ namespace Dynarmic::Common::Crypto::SM4 {
u8 AccessSubstitutionBox(u8 index); u8 AccessSubstitutionBox(u8 index);
} // namespace Dynarmic::Common::Crypto::SM4 } // namespace Dynarmic::Common::Crypto::SM4

View file

@ -22,7 +22,8 @@ public:
FPCR() = default; FPCR() = default;
FPCR(const FPCR&) = default; FPCR(const FPCR&) = default;
FPCR(FPCR&&) = default; FPCR(FPCR&&) = default;
explicit FPCR(u32 data) : value{data & mask} {} explicit FPCR(u32 data)
: value{data & mask} {}
FPCR& operator=(const FPCR&) = default; FPCR& operator=(const FPCR&) = default;
FPCR& operator=(FPCR&&) = default; FPCR& operator=(FPCR&&) = default;
@ -204,4 +205,4 @@ inline bool operator!=(FPCR lhs, FPCR rhs) {
return !operator==(lhs, rhs); return !operator==(lhs, rhs);
} }
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -18,7 +18,8 @@ public:
FPSR() = default; FPSR() = default;
FPSR(const FPSR&) = default; FPSR(const FPSR&) = default;
FPSR(FPSR&&) = default; FPSR(FPSR&&) = default;
explicit FPSR(u32 data) : value{data & mask} {} explicit FPSR(u32 data)
: value{data & mask} {}
FPSR& operator=(const FPSR&) = default; FPSR& operator=(const FPSR&) = default;
FPSR& operator=(FPSR&&) = default; FPSR& operator=(FPSR&&) = default;
@ -156,4 +157,4 @@ inline bool operator!=(FPSR lhs, FPSR rhs) {
return !operator==(lhs, rhs); return !operator==(lhs, rhs);
} }
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -4,6 +4,7 @@
*/ */
#include "dynarmic/common/fp/fused.h" #include "dynarmic/common/fp/fused.h"
#include "dynarmic/common/fp/mantissa_util.h" #include "dynarmic/common/fp/mantissa_util.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
#include "dynarmic/common/u128.h" #include "dynarmic/common/u128.h"
@ -20,7 +21,7 @@ static FPUnpacked ReduceMantissa(bool sign, int exponent, const u128& mantissa)
FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) { FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
const bool product_sign = op1.sign != op2.sign; const bool product_sign = op1.sign != op2.sign;
const auto [product_exponent, product_value] = [op1, op2]{ const auto [product_exponent, product_value] = [op1, op2] {
int exponent = op1.exponent + op2.exponent; int exponent = op1.exponent + op2.exponent;
u128 value = Multiply64To128(op1.mantissa, op2.mantissa); u128 value = Multiply64To128(op1.mantissa, op2.mantissa);
if (value.Bit<product_point_position + 1>()) { if (value.Bit<product_point_position + 1>()) {
@ -86,4 +87,4 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
return ReduceMantissa(result_sign, result_exponent, result); return ReduceMantissa(result_sign, result_exponent, result);
} }
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -12,4 +12,4 @@ struct FPUnpacked;
/// This function assumes all arguments have been normalized. /// This function assumes all arguments have been normalized.
FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2); FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -135,4 +135,4 @@ constexpr FPT FPValue() {
return FPT(FPInfo<FPT>::Zero(sign) | mantissa | (biased_exponent << FPInfo<FPT>::explicit_mantissa_width)); return FPT(FPInfo<FPT>::Zero(sign) | mantissa | (biased_exponent << FPInfo<FPT>::explicit_mantissa_width));
} }
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -43,4 +43,4 @@ inline ResidualError ResidualErrorOnRightShift(u64 mantissa, int shift_amount) {
return ResidualError::GreaterThanHalf; return ResidualError::GreaterThanHalf;
} }
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -8,10 +8,10 @@
#include "dynarmic/common/fp/op/FPCompare.h" #include "dynarmic/common/fp/op/FPCompare.h"
#include "dynarmic/common/fp/op/FPConvert.h" #include "dynarmic/common/fp/op/FPConvert.h"
#include "dynarmic/common/fp/op/FPMulAdd.h" #include "dynarmic/common/fp/op/FPMulAdd.h"
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
#include "dynarmic/common/fp/op/FPRecipEstimate.h" #include "dynarmic/common/fp/op/FPRecipEstimate.h"
#include "dynarmic/common/fp/op/FPRecipExponent.h" #include "dynarmic/common/fp/op/FPRecipExponent.h"
#include "dynarmic/common/fp/op/FPRecipStepFused.h" #include "dynarmic/common/fp/op/FPRecipStepFused.h"
#include "dynarmic/common/fp/op/FPRoundInt.h" #include "dynarmic/common/fp/op/FPRoundInt.h"
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
#include "dynarmic/common/fp/op/FPToFixed.h" #include "dynarmic/common/fp/op/FPToFixed.h"

View file

@ -3,15 +3,16 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPCompare.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/op/FPCompare.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {
template <typename FPT> template<typename FPT>
bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) { bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) {
const auto unpacked1 = FPUnpack(lhs, fpcr, fpsr); const auto unpacked1 = FPUnpack(lhs, fpcr, fpsr);
const auto unpacked2 = FPUnpack(rhs, fpcr, fpsr); const auto unpacked2 = FPUnpack(rhs, fpcr, fpsr);
@ -20,8 +21,7 @@ bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) {
const auto& value1 = std::get<FPUnpacked>(unpacked1); const auto& value1 = std::get<FPUnpacked>(unpacked1);
const auto& value2 = std::get<FPUnpacked>(unpacked2); const auto& value2 = std::get<FPUnpacked>(unpacked2);
if (type1 == FPType::QNaN || type1 == FPType::SNaN || if (type1 == FPType::QNaN || type1 == FPType::SNaN || type2 == FPType::QNaN || type2 == FPType::SNaN) {
type2 == FPType::QNaN || type2 == FPType::SNaN) {
if (type1 == FPType::SNaN || type2 == FPType::SNaN) { if (type1 == FPType::SNaN || type2 == FPType::SNaN) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr); FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
} }
@ -37,4 +37,4 @@ template bool FPCompareEQ<u16>(u16 lhs, u16 rhs, FPCR fpcr, FPSR& fpsr);
template bool FPCompareEQ<u32>(u32 lhs, u32 rhs, FPCR fpcr, FPSR& fpsr); template bool FPCompareEQ<u32>(u32 lhs, u32 rhs, FPCR fpcr, FPSR& fpsr);
template bool FPCompareEQ<u64>(u64 lhs, u64 rhs, FPCR fpcr, FPSR& fpsr); template bool FPCompareEQ<u64>(u64 lhs, u64 rhs, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -10,7 +10,7 @@ namespace Dynarmic::FP {
class FPCR; class FPCR;
class FPSR; class FPSR;
template <typename FPT> template<typename FPT>
bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr); bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,17 +3,18 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPConvert.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPConvert.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {
namespace { namespace {
template <typename FPT_TO, typename FPT_FROM> template<typename FPT_TO, typename FPT_FROM>
FPT_TO FPConvertNaN(FPT_FROM op) { FPT_TO FPConvertNaN(FPT_FROM op) {
const bool sign = Common::Bit<Common::BitSize<FPT_FROM>() - 1>(op); const bool sign = Common::Bit<Common::BitSize<FPT_FROM>() - 1>(op);
const u64 frac = [op] { const u64 frac = [op] {
@ -38,9 +39,9 @@ FPT_TO FPConvertNaN(FPT_FROM op) {
return FPT_TO(shifted_sign | exponent << 9 | Common::Bits<42, 50>(frac)); return FPT_TO(shifted_sign | exponent << 9 | Common::Bits<42, 50>(frac));
} }
} }
} // Anonymous namespace } // Anonymous namespace
template <typename FPT_TO, typename FPT_FROM> template<typename FPT_TO, typename FPT_FROM>
FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr) { FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr) {
const auto [type, sign, value] = FPUnpackCV<FPT_FROM>(op, fpcr, fpsr); const auto [type, sign, value] = FPUnpackCV<FPT_FROM>(op, fpcr, fpsr);
const bool is_althp = Common::BitSize<FPT_TO>() == 16 && fpcr.AHP(); const bool is_althp = Common::BitSize<FPT_TO>() == 16 && fpcr.AHP();
@ -86,4 +87,4 @@ template u32 FPConvert<u32, u64>(u64 op, FPCR fpcr, RoundingMode rounding_mode,
template u64 FPConvert<u64, u16>(u16 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr); template u64 FPConvert<u64, u16>(u16 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
template u64 FPConvert<u64, u32>(u32 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr); template u64 FPConvert<u64, u32>(u32 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -11,7 +11,7 @@ class FPCR;
class FPSR; class FPSR;
enum class RoundingMode; enum class RoundingMode;
template <typename FPT_TO, typename FPT_FROM> template<typename FPT_TO, typename FPT_FROM>
FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr); FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPMulAdd.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/fused.h" #include "dynarmic/common/fp/fused.h"
#include "dynarmic/common/fp/op/FPMulAdd.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
@ -76,4 +77,4 @@ template u16 FPMulAdd<u16>(u16 addend, u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
template u32 FPMulAdd<u32>(u32 addend, u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr); template u32 FPMulAdd<u32>(u32 addend, u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
template u64 FPMulAdd<u64>(u64 addend, u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr); template u64 FPMulAdd<u64>(u64 addend, u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -13,4 +13,4 @@ class FPSR;
template<typename FPT> template<typename FPT>
FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr); FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -14,4 +14,4 @@ constexpr FPT FPNeg(FPT op) {
return op ^ FPInfo<FPT>::sign_mask; return op ^ FPInfo<FPT>::sign_mask;
} }
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,11 +3,12 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
@ -54,4 +55,4 @@ template u16 FPRSqrtEstimate<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
template u32 FPRSqrtEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr); template u32 FPRSqrtEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
template u64 FPRSqrtEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr); template u64 FPRSqrtEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -13,4 +13,4 @@ class FPSR;
template<typename FPT> template<typename FPT>
FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr); FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/fused.h" #include "dynarmic/common/fp/fused.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPNeg.h" #include "dynarmic/common/fp/op/FPNeg.h"
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
@ -53,4 +54,4 @@ template u16 FPRSqrtStepFused<u16>(u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
template u32 FPRSqrtStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr); template u32 FPRSqrtStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
template u64 FPRSqrtStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr); template u64 FPRSqrtStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -13,4 +13,4 @@ class FPSR;
template<typename FPT> template<typename FPT>
FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr); FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,6 +3,8 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPRecipEstimate.h"
#include <tuple> #include <tuple>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
@ -10,7 +12,6 @@
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPRecipEstimate.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
@ -39,7 +40,7 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
} }
if (value.exponent < FPInfo<FPT>::exponent_min - 2) { if (value.exponent < FPInfo<FPT>::exponent_min - 2) {
const bool overflow_to_inf = [&]{ const bool overflow_to_inf = [&] {
switch (fpcr.RMode()) { switch (fpcr.RMode()) {
case RoundingMode::ToNearest_TieEven: case RoundingMode::ToNearest_TieEven:
return true; return true;
@ -95,4 +96,4 @@ template u16 FPRecipEstimate<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
template u32 FPRecipEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr); template u32 FPRecipEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
template u64 FPRecipEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr); template u64 FPRecipEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -13,4 +13,4 @@ class FPSR;
template<typename FPT> template<typename FPT>
FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr); FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,18 +3,19 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/common_types.h" #include "dynarmic/common/fp/op/FPRecipExponent.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPRecipExponent.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {
namespace { namespace {
template <typename FPT> template<typename FPT>
FPT DetermineExponentValue(size_t value) { FPT DetermineExponentValue(size_t value) {
if constexpr (sizeof(FPT) == sizeof(u32)) { if constexpr (sizeof(FPT) == sizeof(u32)) {
return static_cast<FPT>(Common::Bits<23, 30>(value)); return static_cast<FPT>(Common::Bits<23, 30>(value));
@ -24,9 +25,9 @@ FPT DetermineExponentValue(size_t value) {
return static_cast<FPT>(Common::Bits<10, 14>(value)); return static_cast<FPT>(Common::Bits<10, 14>(value));
} }
} }
} // Anonymous namespace } // Anonymous namespace
template <typename FPT> template<typename FPT>
FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) { FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr); const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
(void)value; (void)value;
@ -54,4 +55,4 @@ template u16 FPRecipExponent<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
template u32 FPRecipExponent<u32>(u32 op, FPCR fpcr, FPSR& fpsr); template u32 FPRecipExponent<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
template u64 FPRecipExponent<u64>(u64 op, FPCR fpcr, FPSR& fpsr); template u64 FPRecipExponent<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -10,7 +10,7 @@ namespace Dynarmic::FP {
class FPCR; class FPCR;
class FPSR; class FPSR;
template <typename FPT> template<typename FPT>
FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr); FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPRecipStepFused.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/fused.h" #include "dynarmic/common/fp/fused.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPNeg.h" #include "dynarmic/common/fp/op/FPNeg.h"
#include "dynarmic/common/fp/op/FPRecipStepFused.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
@ -52,4 +53,4 @@ template u16 FPRecipStepFused<u16>(u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
template u32 FPRecipStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr); template u32 FPRecipStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
template u64 FPRecipStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr); template u64 FPRecipStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -13,4 +13,4 @@ class FPSR;
template<typename FPT> template<typename FPT>
FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr); FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,6 +3,8 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPRoundInt.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
@ -10,7 +12,6 @@
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/mantissa_util.h" #include "dynarmic/common/fp/mantissa_util.h"
#include "dynarmic/common/fp/op/FPRoundInt.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/rounding_mode.h" #include "dynarmic/common/fp/rounding_mode.h"
@ -78,8 +79,8 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
const u64 abs_int_result = new_sign ? Safe::Negate<u64>(int_result) : static_cast<u64>(int_result); const u64 abs_int_result = new_sign ? Safe::Negate<u64>(int_result) : static_cast<u64>(int_result);
const FPT result = int_result == 0 const FPT result = int_result == 0
? FPInfo<FPT>::Zero(sign) ? FPInfo<FPT>::Zero(sign)
: FPRound<FPT>(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr); : FPRound<FPT>(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
if (error != ResidualError::Zero && exact) { if (error != ResidualError::Zero && exact) {
FPProcessException(FPExc::Inexact, fpcr, fpsr); FPProcessException(FPExc::Inexact, fpcr, fpsr);
@ -92,4 +93,4 @@ template u64 FPRoundInt<u16>(u16 op, FPCR fpcr, RoundingMode rounding, bool exac
template u64 FPRoundInt<u32>(u32 op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr); template u64 FPRoundInt<u32>(u32 op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
template u64 FPRoundInt<u64>(u64 op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr); template u64 FPRoundInt<u64>(u64 op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -16,4 +16,4 @@ enum class RoundingMode;
template<typename FPT> template<typename FPT>
u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr); u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,13 +3,14 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPToFixed.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/mantissa_util.h" #include "dynarmic/common/fp/mantissa_util.h"
#include "dynarmic/common/fp/op/FPToFixed.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/rounding_mode.h" #include "dynarmic/common/fp/rounding_mode.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
@ -98,4 +99,4 @@ template u64 FPToFixed<u16>(size_t ibits, u16 op, size_t fbits, bool unsigned_,
template u64 FPToFixed<u32>(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); template u64 FPToFixed<u32>(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
template u64 FPToFixed<u64>(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); template u64 FPToFixed<u64>(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -16,4 +16,4 @@ enum class RoundingMode;
template<typename FPT> template<typename FPT>
u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,10 +3,11 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/process_exception.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {
@ -54,4 +55,4 @@ void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr) {
} }
} }
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -21,4 +21,4 @@ enum class FPExc {
void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr); void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,6 +3,8 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/process_nan.h"
#include <optional> #include <optional>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
@ -11,7 +13,6 @@
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {
@ -88,4 +89,4 @@ template std::optional<u16> FPProcessNaNs3<u16>(FPType type1, FPType type2, FPTy
template std::optional<u32> FPProcessNaNs3<u32>(FPType type1, FPType type2, FPType type3, u32 op1, u32 op2, u32 op3, FPCR fpcr, FPSR& fpsr); template std::optional<u32> FPProcessNaNs3<u32>(FPType type1, FPType type2, FPType type3, u32 op1, u32 op2, u32 op3, FPCR fpcr, FPSR& fpsr);
template std::optional<u64> FPProcessNaNs3<u64>(FPType type1, FPType type2, FPType type3, u64 op1, u64 op2, u64 op3, FPCR fpcr, FPSR& fpsr); template std::optional<u64> FPProcessNaNs3<u64>(FPType type1, FPType type2, FPType type3, u64 op1, u64 op2, u64 op3, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -22,4 +22,4 @@ std::optional<FPT> FPProcessNaNs(FPType type1, FPType type2, FPT op1, FPT op2, F
template<typename FPT> template<typename FPT>
std::optional<FPT> FPProcessNaNs3(FPType type1, FPType type2, FPType type3, FPT op1, FPT op2, FPT op3, FPCR fpcr, FPSR& fpsr); std::optional<FPT> FPProcessNaNs3(FPType type1, FPType type2, FPType type3, FPT op1, FPT op2, FPT op3, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -24,4 +24,4 @@ enum class RoundingMode {
ToOdd, ToOdd,
}; };
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/unpacked.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/mantissa_util.h" #include "dynarmic/common/fp/mantissa_util.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/rounding_mode.h" #include "dynarmic/common/fp/rounding_mode.h"
#include "dynarmic/common/fp/unpacked.h"
#include "dynarmic/common/safe_ops.h" #include "dynarmic/common/safe_ops.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {
@ -143,12 +144,12 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
FPT result = 0; FPT result = 0;
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) # pragma warning(push)
#pragma warning(disable:4127) // C4127: conditional expression is constant # pragma warning(disable : 4127) // C4127: conditional expression is constant
#endif #endif
if (!isFP16 || !fpcr.AHP()) { if (!isFP16 || !fpcr.AHP()) {
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) # pragma warning(pop)
#endif #endif
constexpr int max_biased_exp = (1 << E) - 1; constexpr int max_biased_exp = (1 << E) - 1;
if (biased_exp >= max_biased_exp) { if (biased_exp >= max_biased_exp) {
@ -188,4 +189,4 @@ template u16 FPRoundBase<u16>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, F
template u32 FPRoundBase<u32>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); template u32 FPRoundBase<u32>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
template u64 FPRoundBase<u64>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); template u64 FPRoundBase<u64>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -85,4 +85,4 @@ FPT FPRound(FPUnpacked op, FPCR fpcr, FPSR& fpsr) {
return FPRound<FPT>(op, fpcr, fpcr.RMode(), fpsr); return FPRound<FPT>(op, fpcr, fpcr.RMode(), fpsr);
} }
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -96,4 +96,4 @@ constexpr std::optional<FPT> ProcessNaNs(FPT a, FPT b, FPT c) {
return std::nullopt; return std::nullopt;
} }
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -14,10 +14,12 @@
namespace Dynarmic::Common { namespace Dynarmic::Common {
template <typename T> class IntrusiveList; template<typename T>
template <typename T> class IntrusiveListIterator; class IntrusiveList;
template<typename T>
class IntrusiveListIterator;
template <typename T> template<typename T>
class IntrusiveListNode { class IntrusiveListNode {
public: public:
bool IsSentinel() const { bool IsSentinel() const {
@ -34,9 +36,8 @@ protected:
friend class IntrusiveListIterator<const T>; friend class IntrusiveListIterator<const T>;
}; };
template <typename T> template<typename T>
class IntrusiveListSentinel final : public IntrusiveListNode<T> class IntrusiveListSentinel final : public IntrusiveListNode<T> {
{
using IntrusiveListNode<T>::next; using IntrusiveListNode<T>::next;
using IntrusiveListNode<T>::prev; using IntrusiveListNode<T>::prev;
using IntrusiveListNode<T>::is_sentinel; using IntrusiveListNode<T>::is_sentinel;
@ -49,33 +50,36 @@ public:
} }
}; };
template <typename T> template<typename T>
class IntrusiveListIterator { class IntrusiveListIterator {
public: public:
using iterator_category = std::bidirectional_iterator_tag; using iterator_category = std::bidirectional_iterator_tag;
using difference_type = std::ptrdiff_t; using difference_type = std::ptrdiff_t;
using value_type = T; using value_type = T;
using pointer = value_type*; using pointer = value_type*;
using const_pointer = const value_type*; using const_pointer = const value_type*;
using reference = value_type&; using reference = value_type&;
using const_reference = const value_type&; using const_reference = const value_type&;
// If value_type is const, we want "const IntrusiveListNode<value_type>", not "const IntrusiveListNode<const value_type>" // If value_type is const, we want "const IntrusiveListNode<value_type>", not "const IntrusiveListNode<const value_type>"
using node_type = std::conditional_t<std::is_const<value_type>::value, using node_type = std::conditional_t<std::is_const<value_type>::value,
const IntrusiveListNode<std::remove_const_t<value_type>>, const IntrusiveListNode<std::remove_const_t<value_type>>,
IntrusiveListNode<value_type>>; IntrusiveListNode<value_type>>;
using node_pointer = node_type*; using node_pointer = node_type*;
using node_reference = node_type&; using node_reference = node_type&;
IntrusiveListIterator() = default; IntrusiveListIterator() = default;
IntrusiveListIterator(const IntrusiveListIterator& other) = default; IntrusiveListIterator(const IntrusiveListIterator& other) = default;
IntrusiveListIterator& operator=(const IntrusiveListIterator& other) = default; IntrusiveListIterator& operator=(const IntrusiveListIterator& other) = default;
explicit IntrusiveListIterator(node_pointer list_node) : node(list_node) { explicit IntrusiveListIterator(node_pointer list_node)
: node(list_node) {
} }
explicit IntrusiveListIterator(pointer data) : node(data) { explicit IntrusiveListIterator(pointer data)
: node(data) {
} }
explicit IntrusiveListIterator(reference data) : node(&data) { explicit IntrusiveListIterator(reference data)
: node(&data) {
} }
IntrusiveListIterator& operator++() { IntrusiveListIterator& operator++() {
@ -121,19 +125,19 @@ private:
node_pointer node = nullptr; node_pointer node = nullptr;
}; };
template <typename T> template<typename T>
class IntrusiveList { class IntrusiveList {
public: public:
using difference_type = std::ptrdiff_t; using difference_type = std::ptrdiff_t;
using size_type = std::size_t; using size_type = std::size_t;
using value_type = T; using value_type = T;
using pointer = value_type*; using pointer = value_type*;
using const_pointer = const value_type*; using const_pointer = const value_type*;
using reference = value_type&; using reference = value_type&;
using const_reference = const value_type&; using const_reference = const value_type&;
using iterator = IntrusiveListIterator<value_type>; using iterator = IntrusiveListIterator<value_type>;
using const_iterator = IntrusiveListIterator<const value_type>; using const_iterator = IntrusiveListIterator<const value_type>;
using reverse_iterator = std::reverse_iterator<iterator>; using reverse_iterator = std::reverse_iterator<iterator>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>; using const_reverse_iterator = std::reverse_iterator<const_iterator>;
/** /**
@ -222,10 +226,10 @@ public:
node->prev->next = node->next; node->prev->next = node->next;
node->next->prev = node->prev; node->next->prev = node->prev;
#if !defined(NDEBUG) #if !defined(NDEBUG)
node->next = nullptr; node->next = nullptr;
node->prev = nullptr; node->prev = nullptr;
#endif #endif
return node; return node;
} }
@ -308,21 +312,21 @@ public:
} }
// Iterator interface // Iterator interface
iterator begin() { return iterator(root->next); } iterator begin() { return iterator(root->next); }
const_iterator begin() const { return const_iterator(root->next); } const_iterator begin() const { return const_iterator(root->next); }
const_iterator cbegin() const { return begin(); } const_iterator cbegin() const { return begin(); }
iterator end() { return iterator(root.get()); } iterator end() { return iterator(root.get()); }
const_iterator end() const { return const_iterator(root.get()); } const_iterator end() const { return const_iterator(root.get()); }
const_iterator cend() const { return end(); } const_iterator cend() const { return end(); }
reverse_iterator rbegin() { return reverse_iterator(end()); } reverse_iterator rbegin() { return reverse_iterator(end()); }
const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
const_reverse_iterator crbegin() const { return rbegin(); } const_reverse_iterator crbegin() const { return rbegin(); }
reverse_iterator rend() { return reverse_iterator(begin()); } reverse_iterator rend() { return reverse_iterator(begin()); }
const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
const_reverse_iterator crend() const { return rend(); } const_reverse_iterator crend() const { return rend(); }
/** /**
* Erases a node from the list, indicated by an iterator. * Erases a node from the list, indicated by an iterator.
@ -367,9 +371,9 @@ private:
* @param lhs The first list. * @param lhs The first list.
* @param rhs The second list. * @param rhs The second list.
*/ */
template <typename T> template<typename T>
void swap(IntrusiveList<T>& lhs, IntrusiveList<T>& rhs) noexcept { void swap(IntrusiveList<T>& lhs, IntrusiveList<T>& rhs) noexcept {
lhs.swap(rhs); lhs.swap(rhs);
} }
} // namespace Dynarmic::Common } // namespace Dynarmic::Common

View file

@ -25,11 +25,11 @@ struct ReverseAdapter {
} }
}; };
} // namespace detail } // namespace detail
template<typename T> template<typename T>
constexpr detail::ReverseAdapter<T> Reverse(T&& iterable) { constexpr detail::ReverseAdapter<T> Reverse(T&& iterable) {
return detail::ReverseAdapter<T>{iterable}; return detail::ReverseAdapter<T>{iterable};
} }
} // namespace Dynarmic::Common } // namespace Dynarmic::Common

View file

@ -8,8 +8,8 @@
#include <fmt/format.h> #include <fmt/format.h>
#ifdef DYNARMIC_USE_LLVM #ifdef DYNARMIC_USE_LLVM
#include <llvm-c/Disassembler.h> # include <llvm-c/Disassembler.h>
#include <llvm-c/Target.h> # include <llvm-c/Target.h>
#endif #endif
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
@ -79,8 +79,10 @@ std::string DisassembleAArch32([[maybe_unused]] bool is_thumb, [[maybe_unused]]
result += inst_size > 0 ? buffer : "<invalid instruction>"; result += inst_size > 0 ? buffer : "<invalid instruction>";
result += '\n'; result += '\n';
if (inst_size == 0) inst_size = is_thumb ? 2 : 4; if (inst_size == 0)
if (length <= inst_size) break; inst_size = is_thumb ? 2 : 4;
if (length <= inst_size)
break;
pc += inst_size; pc += inst_size;
instructions += inst_size; instructions += inst_size;
@ -118,4 +120,4 @@ std::string DisassembleAArch64([[maybe_unused]] u32 instruction, [[maybe_unused]
return result; return result;
} }
} // namespace Dynarmic::Common } // namespace Dynarmic::Common

Some files were not shown because too many files have changed in this diff Show more