Add .clang-format file

Using clang-format version 12.0.0
This commit is contained in:
MerryMage 2021-05-22 14:51:20 +01:00
parent 51b155df92
commit 53493b2024
315 changed files with 3178 additions and 2660 deletions

218
.clang-format Normal file
View file

@ -0,0 +1,218 @@
---
Language: Cpp
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: None
AlignConsecutiveAssignments: None
AlignConsecutiveBitFields: None
AlignConsecutiveDeclarations: None
AlignConsecutiveMacros: None
AlignEscapedNewlines: Right
AlignOperands: AlignAfterOperator
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortEnumsOnASingleLine: true
AllowShortBlocksOnASingleLine: Empty
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
AttributeMacros:
- __capability
BinPackArguments: true
BinPackParameters: false
BitFieldColonSpacing: Both
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
BreakBeforeBinaryOperators: All
BreakBeforeBraces: Custom
BreakBeforeConceptDeclarations: true
BreakBeforeTernaryOperators: true
BreakBeforeInheritanceComma: false
BreakConstructorInitializersBeforeComma: true
BreakConstructorInitializers: BeforeComma
BreakInheritanceList: BeforeComma
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 0
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 8
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: false
DisableFormat: false
# EmptyLineAfterAccessModifier: Leave
EmptyLineBeforeAccessModifier: Always
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<mach/'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '^<windows.h>'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '(^<signal.h>)|(^<sys/ucontext.h>)|(^<ucontext.h>)'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '^<([^\.])*>$'
Priority: 2
SortPriority: 0
CaseSensitive: false
- Regex: '^<.*\.'
Priority: 3
SortPriority: 0
CaseSensitive: false
- Regex: '.*'
Priority: 4
SortPriority: 0
CaseSensitive: false
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
# IndentAccessModifiers: false
IndentCaseBlocks: false
IndentCaseLabels: false
IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: AfterHash
IndentRequires: false
IndentWidth: 4
IndentWrappedFunctionNames: false
# InsertTrailingCommas: None
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
NamespaceMacros:
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCBreakBeforeNestedBlockParam: true
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PenaltyIndentedWhitespace: 0
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
- ParseTestProto
- ParsePartialTestProto
CanonicalDelimiter: ''
BasedOnStyle: google
ReflowComments: true
# ShortNamespaceLines: 5
SortIncludes: true
SortJavaStaticImport: Before
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: false
SpaceAroundPointerQualifiers: Default
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceAroundPointerQualifiers: Default
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: false
# SpacesInLineCommentPrefix: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Latest
StatementAttributeLikeMacros:
- Q_EMIT
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 4
TypenameMacros:
UseCRLF: false
UseTab: Never
WhitespaceSensitiveMacros:
- STRINGIZE
- PP_STRINGIZE
- BOOST_PP_STRINGIZE
- NS_SWIFT_NAME
- CF_SWIFT_NAME
- FCODE
- ICODE
...

View file

@ -3,6 +3,8 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/a32_emit_x64.h"
#include <algorithm> #include <algorithm>
#include <optional> #include <optional>
#include <utility> #include <utility>
@ -11,7 +13,6 @@
#include <fmt/ostream.h> #include <fmt/ostream.h>
#include <mp/traits/integer_of_size.h> #include <mp/traits/integer_of_size.h>
#include "dynarmic/backend/x64/a32_emit_x64.h"
#include "dynarmic/backend/x64/a32_jitstate.h" #include "dynarmic/backend/x64/a32_jitstate.h"
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
@ -87,7 +88,7 @@ A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig conf, A32::Jit* jit_in
code.PreludeComplete(); code.PreludeComplete();
ClearFastDispatchTable(); ClearFastDispatchTable();
exception_handler.SetFastmemCallback([this](u64 rip_){ exception_handler.SetFastmemCallback([this](u64 rip_) {
return FastmemCallback(rip_); return FastmemCallback(rip_);
}); });
} }
@ -98,7 +99,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
code.EnableWriting(); code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); }; SCOPE_EXIT { code.DisableWriting(); };
static const std::vector<HostLoc> gpr_order = [this]{ static const std::vector<HostLoc> gpr_order = [this] {
std::vector<HostLoc> gprs{any_gpr}; std::vector<HostLoc> gprs{any_gpr};
if (conf.page_table) { if (conf.page_table) {
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
@ -126,7 +127,6 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
// Call the relevant Emit* member function. // Call the relevant Emit* member function.
switch (inst->GetOpcode()) { switch (inst->GetOpcode()) {
#define OPCODE(name, type, ...) \ #define OPCODE(name, type, ...) \
case IR::Opcode::name: \ case IR::Opcode::name: \
A32EmitX64::Emit##name(ctx, inst); \ A32EmitX64::Emit##name(ctx, inst); \
@ -216,7 +216,7 @@ void A32EmitX64::GenFastmemFallbacks() {
for (int value_idx : idxes) { for (int value_idx : idxes) {
for (const auto& [bitsize, callback] : read_callbacks) { for (const auto& [bitsize, callback] : read_callbacks) {
code.align(); code.align();
read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
if (vaddr_idx != code.ABI_PARAM2.getIdx()) { if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
@ -232,7 +232,7 @@ void A32EmitX64::GenFastmemFallbacks() {
for (const auto& [bitsize, callback] : write_callbacks) { for (const auto& [bitsize, callback] : write_callbacks) {
code.align(); code.align();
write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStack(code); ABI_PushCallerSaveRegistersAndAdjustStack(code);
if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) { if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) {
code.xchg(code.ABI_PARAM2, code.ABI_PARAM3); code.xchg(code.ABI_PARAM2, code.ABI_PARAM3);
@ -310,7 +310,7 @@ void A32EmitX64::GenTerminalHandlers() {
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint"); PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint");
code.align(); code.align();
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry&(*)(u64)>(); fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>();
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data())); code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
if (code.HasHostFeature(HostFeature::SSE42)) { if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(code.ABI_PARAM1.cvt32(), code.ABI_PARAM2.cvt32()); code.crc32(code.ABI_PARAM1.cvt32(), code.ABI_PARAM2.cvt32());
@ -1048,8 +1048,7 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
Common::BitCast<u64>(code.getCurr()), Common::BitCast<u64>(code.getCurr()),
Common::BitCast<u64>(wrapped_fn), Common::BitCast<u64>(wrapped_fn),
*marker, *marker,
} });
);
ctx.reg_alloc.DefineValue(inst, value); ctx.reg_alloc.DefineValue(inst, value);
return; return;
@ -1095,8 +1094,7 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
Common::BitCast<u64>(code.getCurr()), Common::BitCast<u64>(code.getCurr()),
Common::BitCast<u64>(wrapped_fn), Common::BitCast<u64>(wrapped_fn),
*marker, *marker,
} });
);
return; return;
} }
@ -1146,7 +1144,7 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
WriteMemory<64, &A32::UserCallbacks::MemoryWrite64>(ctx, inst); WriteMemory<64, &A32::UserCallbacks::MemoryWrite64>(ctx, inst);
} }
template <size_t bitsize, auto callback> template<size_t bitsize, auto callback>
void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
using T = mp::unsigned_integer_of_size<bitsize>; using T = mp::unsigned_integer_of_size<bitsize>;
@ -1162,11 +1160,10 @@ void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T { return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr); return (conf.callbacks->*callback)(vaddr);
}); });
} });
);
} }
template <size_t bitsize, auto callback> template<size_t bitsize, auto callback>
void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
using T = mp::unsigned_integer_of_size<bitsize>; using T = mp::unsigned_integer_of_size<bitsize>;
@ -1187,9 +1184,10 @@ void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr, return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
[&](T expected) -> bool { [&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected); return (conf.callbacks->*callback)(vaddr, value, expected);
}) ? 0 : 1; })
} ? 0
); : 1;
});
code.L(end); code.L(end);
} }
@ -1229,10 +1227,7 @@ static void EmitCoprocessorException() {
ASSERT_FALSE("Should raise coproc exception here"); ASSERT_FALSE("Should raise coproc exception here");
} }
static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface, static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr,
std::optional<Argument::copyable_reference> arg0 = {},
std::optional<Argument::copyable_reference> arg1 = {}) {
reg_alloc.HostCall(inst, {}, {}, arg0, arg1); reg_alloc.HostCall(inst, {}, {}, arg0, arg1);
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(jit_interface)); code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(jit_interface));
@ -1532,7 +1527,7 @@ void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat
}; };
const u32 old_upper = get_upper(old_location); const u32 old_upper = get_upper(old_location);
const u32 new_upper = [&]{ const u32 new_upper = [&] {
const u32 mask = ~u32(conf.always_little_endian ? 0x2 : 0); const u32 mask = ~u32(conf.always_little_endian ? 0x2 : 0);
return get_upper(new_location) & mask; return get_upper(new_location) & mask;
}(); }();

View file

@ -71,8 +71,8 @@ protected:
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table; std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
void ClearFastDispatchTable(); void ClearFastDispatchTable();
std::map<std::tuple<size_t, int, int>, void(*)()> read_fallbacks; std::map<std::tuple<size_t, int, int>, void (*)()> read_fallbacks;
std::map<std::tuple<size_t, int, int>, void(*)()> write_fallbacks; std::map<std::tuple<size_t, int, int>, void (*)()> write_fallbacks;
void GenFastmemFallbacks(); void GenFastmemFallbacks();
const void* terminal_handler_pop_rsb_hint; const void* terminal_handler_pop_rsb_hint;

View file

@ -55,8 +55,7 @@ struct Jit::Impl {
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) : block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit) , emitter(block_of_code, conf, jit)
, conf(std::move(conf)) , conf(std::move(conf))
, jit_interface(jit) , jit_interface(jit) {}
{}
A32JitState jit_state; A32JitState jit_state;
BlockOfCode block_of_code; BlockOfCode block_of_code;
@ -70,7 +69,7 @@ struct Jit::Impl {
bool invalidate_entire_cache = false; bool invalidate_entire_cache = false;
void Execute() { void Execute() {
const CodePtr current_codeptr = [this]{ const CodePtr current_codeptr = [this] {
// RSB optimization // RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask; const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
@ -176,7 +175,8 @@ private:
} }
}; };
Jit::Jit(UserConfig conf) : impl(std::make_unique<Impl>(this, std::move(conf))) {} Jit::Jit(UserConfig conf)
: impl(std::make_unique<Impl>(this, std::move(conf))) {}
Jit::~Jit() = default; Jit::~Jit() = default;
@ -269,10 +269,15 @@ struct Context::Impl {
size_t invalid_cache_generation; size_t invalid_cache_generation;
}; };
Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); } Context::Context()
: impl(std::make_unique<Context::Impl>()) {
impl->jit_state.ResetRSB();
}
Context::~Context() = default; Context::~Context() = default;
Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {} Context::Context(const Context& ctx)
Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {} : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
Context::Context(Context&& ctx) noexcept
: impl(std::move(ctx.impl)) {}
Context& Context::operator=(const Context& ctx) { Context& Context::operator=(const Context& ctx) {
*impl = *ctx.impl; *impl = *ctx.impl;
return *this; return *this;

View file

@ -4,6 +4,7 @@
*/ */
#include "dynarmic/backend/x64/a32_jitstate.h" #include "dynarmic/backend/x64/a32_jitstate.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/nzcv_util.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
@ -188,7 +189,7 @@ void A32JitState::SetFpscr(u32 FPSCR) {
asimd_MXCSR = 0x00009fc0; asimd_MXCSR = 0x00009fc0;
// RMode // RMode
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; const std::array<u32, 4> MXCSR_RMode{0x0, 0x4000, 0x2000, 0x6000};
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3]; guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC

View file

@ -16,8 +16,8 @@ namespace Dynarmic::Backend::X64 {
class BlockOfCode; class BlockOfCode;
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) # pragma warning(push)
#pragma warning(disable:4324) // Structure was padded due to alignment specifier # pragma warning(disable : 4324) // Structure was padded due to alignment specifier
#endif #endif
struct A32JitState { struct A32JitState {
@ -91,7 +91,7 @@ struct A32JitState {
}; };
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) # pragma warning(pop)
#endif #endif
using CodePtr = const void*; using CodePtr = const void*;

View file

@ -3,13 +3,14 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/a64_emit_x64.h"
#include <initializer_list> #include <initializer_list>
#include <fmt/format.h> #include <fmt/format.h>
#include <fmt/ostream.h> #include <fmt/ostream.h>
#include <mp/traits/integer_of_size.h> #include <mp/traits/integer_of_size.h>
#include "dynarmic/backend/x64/a64_emit_x64.h"
#include "dynarmic/backend/x64/a64_jitstate.h" #include "dynarmic/backend/x64/a64_jitstate.h"
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
@ -67,7 +68,7 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
code.EnableWriting(); code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); }; SCOPE_EXIT { code.DisableWriting(); };
static const std::vector<HostLoc> gpr_order = [this]{ static const std::vector<HostLoc> gpr_order = [this] {
std::vector<HostLoc> gprs{any_gpr}; std::vector<HostLoc> gprs{any_gpr};
if (conf.page_table) { if (conf.page_table) {
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
@ -92,7 +93,6 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
// Call the relevant Emit* member function. // Call the relevant Emit* member function.
switch (inst->GetOpcode()) { switch (inst->GetOpcode()) {
#define OPCODE(name, type, ...) \ #define OPCODE(name, type, ...) \
case IR::Opcode::name: \ case IR::Opcode::name: \
A64EmitX64::Emit##name(ctx, inst); \ A64EmitX64::Emit##name(ctx, inst); \
@ -150,10 +150,9 @@ void A64EmitX64::ClearFastDispatchTable() {
void A64EmitX64::GenMemory128Accessors() { void A64EmitX64::GenMemory128Accessors() {
code.align(); code.align();
memory_read_128 = code.getCurr<void(*)()>(); memory_read_128 = code.getCurr<void (*)()>();
#ifdef _WIN32 #ifdef _WIN32
Devirtualize<&A64::UserCallbacks::MemoryRead128>(conf.callbacks).EmitCallWithReturnPointer(code, Devirtualize<&A64::UserCallbacks::MemoryRead128>(conf.callbacks).EmitCallWithReturnPointer(code, [&](Xbyak::Reg64 return_value_ptr, [[maybe_unused]] RegList args) {
[&](Xbyak::Reg64 return_value_ptr, [[maybe_unused]] RegList args) {
code.mov(code.ABI_PARAM3, code.ABI_PARAM2); code.mov(code.ABI_PARAM3, code.ABI_PARAM2);
code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE);
code.lea(return_value_ptr, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(return_value_ptr, ptr[rsp + ABI_SHADOW_SPACE]);
@ -177,7 +176,7 @@ void A64EmitX64::GenMemory128Accessors() {
PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_read_128"); PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_read_128");
code.align(); code.align();
memory_write_128 = code.getCurr<void(*)()>(); memory_write_128 = code.getCurr<void (*)()>();
#ifdef _WIN32 #ifdef _WIN32
code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
@ -223,7 +222,7 @@ void A64EmitX64::GenFastmemFallbacks() {
for (int value_idx : idxes) { for (int value_idx : idxes) {
code.align(); code.align();
read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx)); ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx));
if (vaddr_idx != code.ABI_PARAM2.getIdx()) { if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
@ -237,7 +236,7 @@ void A64EmitX64::GenFastmemFallbacks() {
PerfMapRegister(read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_read_fallback_128"); PerfMapRegister(read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_read_fallback_128");
code.align(); code.align();
write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStack(code); ABI_PushCallerSaveRegistersAndAdjustStack(code);
if (vaddr_idx != code.ABI_PARAM2.getIdx()) { if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
@ -256,7 +255,7 @@ void A64EmitX64::GenFastmemFallbacks() {
for (const auto& [bitsize, callback] : read_callbacks) { for (const auto& [bitsize, callback] : read_callbacks) {
code.align(); code.align();
read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
if (vaddr_idx != code.ABI_PARAM2.getIdx()) { if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
@ -272,7 +271,7 @@ void A64EmitX64::GenFastmemFallbacks() {
for (const auto& [bitsize, callback] : write_callbacks) { for (const auto& [bitsize, callback] : write_callbacks) {
code.align(); code.align();
write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>(); write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void (*)()>();
ABI_PushCallerSaveRegistersAndAdjustStack(code); ABI_PushCallerSaveRegistersAndAdjustStack(code);
if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) { if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) {
code.xchg(code.ABI_PARAM2, code.ABI_PARAM3); code.xchg(code.ABI_PARAM2, code.ABI_PARAM3);
@ -353,7 +352,7 @@ void A64EmitX64::GenTerminalHandlers() {
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint"); PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint");
code.align(); code.align();
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry&(*)(u64)>(); fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>();
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data())); code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
if (code.HasHostFeature(HostFeature::SSE42)) { if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(code.ABI_PARAM1, code.ABI_PARAM2); code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
@ -628,8 +627,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate()); ASSERT(args[0].IsImmediate());
const u32 imm = args[0].GetImmediateU32(); const u32 imm = args[0].GetImmediateU32();
Devirtualize<&A64::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code, Devirtualize<&A64::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code, [&](RegList param) {
[&](RegList param) {
code.mov(param[0], imm); code.mov(param[0], imm);
}); });
// The kernel would have to execute ERET to get here, which would clear exclusive state. // The kernel would have to execute ERET to get here, which would clear exclusive state.
@ -642,8 +640,7 @@ void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
ASSERT(args[0].IsImmediate() && args[1].IsImmediate()); ASSERT(args[0].IsImmediate() && args[1].IsImmediate());
const u64 pc = args[0].GetImmediateU64(); const u64 pc = args[0].GetImmediateU64();
const u64 exception = args[1].GetImmediateU64(); const u64 exception = args[1].GetImmediateU64();
Devirtualize<&A64::UserCallbacks::ExceptionRaised>(conf.callbacks).EmitCall(code, Devirtualize<&A64::UserCallbacks::ExceptionRaised>(conf.callbacks).EmitCall(code, [&](RegList param) {
[&](RegList param) {
code.mov(param[0], pc); code.mov(param[0], pc);
code.mov(param[1], exception); code.mov(param[1], exception);
}); });
@ -881,7 +878,7 @@ void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbya
} }
} }
} // anonymous namepsace } // namespace
template<std::size_t bitsize> template<std::size_t bitsize>
void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst) {
@ -1090,8 +1087,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T { return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr); return (conf.callbacks->*callback)(vaddr);
}); });
} });
);
} else { } else {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
ctx.reg_alloc.Use(args[0], ABI_PARAM2); ctx.reg_alloc.Use(args[0], ABI_PARAM2);
@ -1107,8 +1103,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
ret = conf.global_monitor->ReadAndMark<A64::Vector>(conf.processor_id, vaddr, [&]() -> A64::Vector { ret = conf.global_monitor->ReadAndMark<A64::Vector>(conf.processor_id, vaddr, [&]() -> A64::Vector {
return (conf.callbacks->*callback)(vaddr); return (conf.callbacks->*callback)(vaddr);
}); });
} });
);
code.movups(result, xword[rsp + ABI_SHADOW_SPACE]); code.movups(result, xword[rsp + ABI_SHADOW_SPACE]);
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
@ -1165,9 +1160,10 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr, return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
[&](T expected) -> bool { [&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected); return (conf.callbacks->*callback)(vaddr, value, expected);
}) ? 0 : 1; })
} ? 0
); : 1;
});
} else { } else {
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
@ -1177,9 +1173,10 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
return conf.global_monitor->DoExclusiveOperation<A64::Vector>(conf.processor_id, vaddr, return conf.global_monitor->DoExclusiveOperation<A64::Vector>(conf.processor_id, vaddr,
[&](A64::Vector expected) -> bool { [&](A64::Vector expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected); return (conf.callbacks->*callback)(vaddr, value, expected);
}) ? 0 : 1; })
} ? 0
); : 1;
});
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
} }
code.L(end); code.L(end);
@ -1214,8 +1211,7 @@ std::string A64EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescr
void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor, bool) { void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor, bool) {
code.SwitchMxcsrOnExit(); code.SwitchMxcsrOnExit();
Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, [&](RegList param) {
[&](RegList param) {
code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC()); code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]); code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]);
code.mov(param[1].cvt32(), terminal.num_instructions); code.mov(param[1].cvt32(), terminal.num_instructions);

View file

@ -69,8 +69,8 @@ protected:
void (*memory_write_128)(); void (*memory_write_128)();
void GenMemory128Accessors(); void GenMemory128Accessors();
std::map<std::tuple<size_t, int, int>, void(*)()> read_fallbacks; std::map<std::tuple<size_t, int, int>, void (*)()> read_fallbacks;
std::map<std::tuple<size_t, int, int>, void(*)()> write_fallbacks; std::map<std::tuple<size_t, int, int>, void (*)()> write_fallbacks;
void GenFastmemFallbacks(); void GenFastmemFallbacks();
const void* terminal_handler_pop_rsb_hint; const void* terminal_handler_pop_rsb_hint;

View file

@ -46,8 +46,7 @@ public:
Impl(Jit* jit, UserConfig conf) Impl(Jit* jit, UserConfig conf)
: conf(conf) : conf(conf)
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) , block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit) , emitter(block_of_code, conf, jit) {
{
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64); ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
} }
@ -61,7 +60,7 @@ public:
// TODO: Check code alignment // TODO: Check code alignment
const CodePtr current_code_ptr = [this]{ const CodePtr current_code_ptr = [this] {
// RSB optimization // RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask;
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {

View file

@ -4,6 +4,7 @@
*/ */
#include "dynarmic/backend/x64/a64_jitstate.h" #include "dynarmic/backend/x64/a64_jitstate.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/frontend/A64/location_descriptor.h" #include "dynarmic/frontend/A64/location_descriptor.h"
@ -61,7 +62,7 @@ void A64JitState::SetFpcr(u32 value) {
guest_MXCSR |= 0x00001f80; // Mask all exceptions guest_MXCSR |= 0x00001f80; // Mask all exceptions
// RMode // RMode
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; const std::array<u32, 4> MXCSR_RMode{0x0, 0x4000, 0x2000, 0x6000};
guest_MXCSR |= MXCSR_RMode[(value >> 22) & 0x3]; guest_MXCSR |= MXCSR_RMode[(value >> 22) & 0x3];
if (Common::Bit<24>(value)) { if (Common::Bit<24>(value)) {

View file

@ -18,8 +18,8 @@ namespace Dynarmic::Backend::X64 {
class BlockOfCode; class BlockOfCode;
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) # pragma warning(push)
#pragma warning(disable:4324) // Structure was padded due to alignment specifier # pragma warning(disable : 4324) // Structure was padded due to alignment specifier
#endif #endif
struct A64JitState { struct A64JitState {
@ -77,7 +77,7 @@ struct A64JitState {
}; };
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) # pragma warning(pop)
#endif #endif
using CodePtr = const void*; using CodePtr = const void*;

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/abi.h"
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include <xbyak.h> #include <xbyak.h>
#include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/iterator_util.h" #include "dynarmic/common/iterator_util.h"

View file

@ -3,6 +3,15 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/block_of_code.h"
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
#else
# include <sys/mman.h>
#endif
#include <array> #include <array>
#include <cstring> #include <cstring>
@ -10,19 +19,12 @@
#include "dynarmic/backend/x64/a32_jitstate.h" #include "dynarmic/backend/x64/a32_jitstate.h"
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/hostloc.h" #include "dynarmic/backend/x64/hostloc.h"
#include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/perf_map.h"
#include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/stack_layout.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#endif
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
#ifdef _WIN32 #ifdef _WIN32
@ -60,47 +62,66 @@ CustomXbyakAllocator s_allocator;
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT #ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
void ProtectMemory(const void* base, size_t size, bool is_executable) { void ProtectMemory(const void* base, size_t size, bool is_executable) {
#ifdef _WIN32 # ifdef _WIN32
DWORD oldProtect = 0; DWORD oldProtect = 0;
VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect); VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
#else # else
static const size_t pageSize = sysconf(_SC_PAGESIZE); static const size_t pageSize = sysconf(_SC_PAGESIZE);
const size_t iaddr = reinterpret_cast<size_t>(base); const size_t iaddr = reinterpret_cast<size_t>(base);
const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1)); const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE); const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode); mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
#endif # endif
} }
#endif #endif
HostFeature GetHostFeatures() HostFeature GetHostFeatures() {
{
HostFeature features = {}; HostFeature features = {};
#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION #ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
using Cpu = Xbyak::util::Cpu; using Cpu = Xbyak::util::Cpu;
Xbyak::util::Cpu cpu_info; Xbyak::util::Cpu cpu_info;
if (cpu_info.has(Cpu::tSSSE3)) features |= HostFeature::SSSE3; if (cpu_info.has(Cpu::tSSSE3))
if (cpu_info.has(Cpu::tSSE41)) features |= HostFeature::SSE41; features |= HostFeature::SSSE3;
if (cpu_info.has(Cpu::tSSE42)) features |= HostFeature::SSE42; if (cpu_info.has(Cpu::tSSE41))
if (cpu_info.has(Cpu::tAVX)) features |= HostFeature::AVX; features |= HostFeature::SSE41;
if (cpu_info.has(Cpu::tAVX2)) features |= HostFeature::AVX2; if (cpu_info.has(Cpu::tSSE42))
if (cpu_info.has(Cpu::tAVX512F)) features |= HostFeature::AVX512F; features |= HostFeature::SSE42;
if (cpu_info.has(Cpu::tAVX512CD)) features |= HostFeature::AVX512CD; if (cpu_info.has(Cpu::tAVX))
if (cpu_info.has(Cpu::tAVX512VL)) features |= HostFeature::AVX512VL; features |= HostFeature::AVX;
if (cpu_info.has(Cpu::tAVX512BW)) features |= HostFeature::AVX512BW; if (cpu_info.has(Cpu::tAVX2))
if (cpu_info.has(Cpu::tAVX512DQ)) features |= HostFeature::AVX512DQ; features |= HostFeature::AVX2;
if (cpu_info.has(Cpu::tAVX512_BITALG)) features |= HostFeature::AVX512BITALG; if (cpu_info.has(Cpu::tAVX512F))
if (cpu_info.has(Cpu::tPCLMULQDQ)) features |= HostFeature::PCLMULQDQ; features |= HostFeature::AVX512F;
if (cpu_info.has(Cpu::tF16C)) features |= HostFeature::F16C; if (cpu_info.has(Cpu::tAVX512CD))
if (cpu_info.has(Cpu::tFMA)) features |= HostFeature::FMA; features |= HostFeature::AVX512CD;
if (cpu_info.has(Cpu::tAESNI)) features |= HostFeature::AES; if (cpu_info.has(Cpu::tAVX512VL))
if (cpu_info.has(Cpu::tPOPCNT)) features |= HostFeature::POPCNT; features |= HostFeature::AVX512VL;
if (cpu_info.has(Cpu::tBMI1)) features |= HostFeature::BMI1; if (cpu_info.has(Cpu::tAVX512BW))
if (cpu_info.has(Cpu::tBMI2)) features |= HostFeature::BMI2; features |= HostFeature::AVX512BW;
if (cpu_info.has(Cpu::tLZCNT)) features |= HostFeature::LZCNT; if (cpu_info.has(Cpu::tAVX512DQ))
if (cpu_info.has(Cpu::tGFNI)) features |= HostFeature::GFNI; features |= HostFeature::AVX512DQ;
if (cpu_info.has(Cpu::tAVX512_BITALG))
features |= HostFeature::AVX512BITALG;
if (cpu_info.has(Cpu::tPCLMULQDQ))
features |= HostFeature::PCLMULQDQ;
if (cpu_info.has(Cpu::tF16C))
features |= HostFeature::F16C;
if (cpu_info.has(Cpu::tFMA))
features |= HostFeature::FMA;
if (cpu_info.has(Cpu::tAESNI))
features |= HostFeature::AES;
if (cpu_info.has(Cpu::tPOPCNT))
features |= HostFeature::POPCNT;
if (cpu_info.has(Cpu::tBMI1))
features |= HostFeature::BMI1;
if (cpu_info.has(Cpu::tBMI2))
features |= HostFeature::BMI2;
if (cpu_info.has(Cpu::tLZCNT))
features |= HostFeature::LZCNT;
if (cpu_info.has(Cpu::tGFNI))
features |= HostFeature::GFNI;
if (cpu_info.has(Cpu::tBMI2)) { if (cpu_info.has(Cpu::tBMI2)) {
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3. // BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
@ -109,7 +130,7 @@ HostFeature GetHostFeatures()
if (cpu_info.has(Cpu::tAMD)) { if (cpu_info.has(Cpu::tAMD)) {
std::array<u32, 4> data{}; std::array<u32, 4> data{};
cpu_info.getCpuid(1, data.data()); cpu_info.getCpuid(1, data.data());
const u32 family_base = Common::Bits< 8, 11>(data[0]); const u32 family_base = Common::Bits<8, 11>(data[0]);
const u32 family_extended = Common::Bits<20, 27>(data[0]); const u32 family_extended = Common::Bits<20, 27>(data[0]);
const u32 family = family_base + family_extended; const u32 family = family_base + family_extended;
if (family >= 0x19) if (family >= 0x19)
@ -131,8 +152,7 @@ BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_cod
, jsi(jsi) , jsi(jsi)
, far_code_offset(far_code_offset) , far_code_offset(far_code_offset)
, constant_pool(*this, CONSTANT_POOL_SIZE) , constant_pool(*this, CONSTANT_POOL_SIZE)
, host_features(GetHostFeatures()) , host_features(GetHostFeatures()) {
{
ASSERT(total_code_size > far_code_offset); ASSERT(total_code_size > far_code_offset);
EnableWriting(); EnableWriting();
GenRunCode(rcp); GenRunCode(rcp);

View file

@ -73,7 +73,7 @@ public:
void LookupBlock(); void LookupBlock();
/// Code emitter: Calls the function /// Code emitter: Calls the function
template <typename FunctionPointer> template<typename FunctionPointer>
void CallFunction(FunctionPointer fn) { void CallFunction(FunctionPointer fn) {
static_assert(std::is_pointer_v<FunctionPointer> && std::is_function_v<std::remove_pointer_t<FunctionPointer>>, static_assert(std::is_pointer_v<FunctionPointer> && std::is_function_v<std::remove_pointer_t<FunctionPointer>>,
"Supplied type must be a pointer to a function"); "Supplied type must be a pointer to a function");
@ -91,7 +91,7 @@ public:
} }
/// Code emitter: Calls the lambda. Lambda must not have any captures. /// Code emitter: Calls the lambda. Lambda must not have any captures.
template <typename Lambda> template<typename Lambda>
void CallLambda(Lambda l) { void CallLambda(Lambda l) {
CallFunction(Common::FptrCast(l)); CallFunction(Common::FptrCast(l));
} }
@ -165,7 +165,7 @@ private:
CodePtr near_code_ptr; CodePtr near_code_ptr;
CodePtr far_code_ptr; CodePtr far_code_ptr;
using RunCodeFuncType = void(*)(void*, CodePtr); using RunCodeFuncType = void (*)(void*, CodePtr);
RunCodeFuncType run_code = nullptr; RunCodeFuncType run_code = nullptr;
RunCodeFuncType step_code = nullptr; RunCodeFuncType step_code = nullptr;
static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0; static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0;

View file

@ -3,32 +3,33 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/block_range_information.h"
#include <boost/icl/interval_map.hpp> #include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp> #include <boost/icl/interval_set.hpp>
#include <tsl/robin_set.h> #include <tsl/robin_set.h>
#include "dynarmic/backend/x64/block_range_information.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
template <typename ProgramCounterType> template<typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) { void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location})); block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
} }
template <typename ProgramCounterType> template<typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::ClearCache() { void BlockRangeInformation<ProgramCounterType>::ClearCache() {
block_ranges.clear(); block_ranges.clear();
} }
template <typename ProgramCounterType> template<typename ProgramCounterType>
tsl::robin_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) { tsl::robin_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
tsl::robin_set<IR::LocationDescriptor> erase_locations; tsl::robin_set<IR::LocationDescriptor> erase_locations;
for (auto invalidate_interval : ranges) { for (auto invalidate_interval : ranges) {
auto pair = block_ranges.equal_range(invalidate_interval); auto pair = block_ranges.equal_range(invalidate_interval);
for (auto it = pair.first; it != pair.second; ++it) { for (auto it = pair.first; it != pair.second; ++it) {
for (const auto &descriptor : it->second) { for (const auto& descriptor : it->second) {
erase_locations.insert(descriptor); erase_locations.insert(descriptor);
} }
} }

View file

@ -15,7 +15,7 @@
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
template <typename ProgramCounterType> template<typename ProgramCounterType>
class BlockRangeInformation { class BlockRangeInformation {
public: public:
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location); void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);

View file

@ -4,6 +4,7 @@
*/ */
#include "dynarmic/backend/x64/callback.h" #include "dynarmic/backend/x64/callback.h"
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {

View file

@ -22,16 +22,23 @@ class Callback {
public: public:
virtual ~Callback(); virtual ~Callback();
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const = 0; void EmitCall(BlockOfCode& code) const {
EmitCall(code, [](RegList) {});
}
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn) const = 0;
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const = 0; virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const = 0;
}; };
class SimpleCallback final : public Callback { class SimpleCallback final : public Callback {
public: public:
template <typename Function> template<typename Function>
SimpleCallback(Function fn) : fn(reinterpret_cast<void(*)()>(fn)) {} SimpleCallback(Function fn)
: fn(reinterpret_cast<void (*)()>(fn)) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override; using Callback::EmitCall;
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override; void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
private: private:
@ -40,10 +47,13 @@ private:
class ArgCallback final : public Callback { class ArgCallback final : public Callback {
public: public:
template <typename Function> template<typename Function>
ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast<void(*)()>(fn)), arg(arg) {} ArgCallback(Function fn, u64 arg)
: fn(reinterpret_cast<void (*)()>(fn)), arg(arg) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override; using Callback::EmitCall;
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override; void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
private: private:

View file

@ -3,15 +3,17 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/constant_pool.h"
#include <cstring> #include <cstring>
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/constant_pool.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
ConstantPool::ConstantPool(BlockOfCode& code, size_t size) : code(code), pool_size(size) { ConstantPool::ConstantPool(BlockOfCode& code, size_t size)
: code(code), pool_size(size) {
code.int3(); code.int3();
code.align(align_size); code.align(align_size);
pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size)); pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size));

View file

@ -19,11 +19,11 @@ namespace Backend::X64 {
namespace impl { namespace impl {
template <typename FunctionType, FunctionType mfp> template<typename FunctionType, FunctionType mfp>
struct ThunkBuilder; struct ThunkBuilder;
template <typename C, typename R, typename... Args, R(C::*mfp)(Args...)> template<typename C, typename R, typename... Args, R (C::*mfp)(Args...)>
struct ThunkBuilder<R(C::*)(Args...), mfp> { struct ThunkBuilder<R (C::*)(Args...), mfp> {
static R Thunk(C* this_, Args... args) { static R Thunk(C* this_, Args... args) {
return (this_->*mfp)(std::forward<Args>(args)...); return (this_->*mfp)(std::forward<Args>(args)...);
} }

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/emit_x64.h"
#include <iterator> #include <iterator>
#include <tsl/robin_set.h> #include <tsl/robin_set.h>
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/emit_x64.h"
#include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/nzcv_util.h"
#include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/perf_map.h"
#include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/stack_layout.h"
@ -40,7 +41,8 @@ void EmitContext::EraseInstruction(IR::Inst* inst) {
inst->ClearArgs(); inst->ClearArgs();
} }
EmitX64::EmitX64(BlockOfCode& code) : code(code) { EmitX64::EmitX64(BlockOfCode& code)
: code(code) {
exception_handler.Register(code); exception_handler.Register(code);
} }
@ -126,7 +128,7 @@ void EmitX64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) {
void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const int bitsize = [&]{ const int bitsize = [&] {
switch (args[0].GetType()) { switch (args[0].GetType()) {
case IR::Type::U8: case IR::Type::U8:
return 8; return 8;
@ -325,7 +327,7 @@ void EmitX64::InvalidateBasicBlocks(const tsl::robin_set<IR::LocationDescriptor>
code.EnableWriting(); code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); }; SCOPE_EXIT { code.DisableWriting(); };
for (const auto &descriptor : locations) { for (const auto& descriptor : locations) {
const auto it = block_descriptors.find(descriptor); const auto it = block_descriptors.find(descriptor);
if (it == block_descriptors.end()) { if (it == block_descriptors.end()) {
continue; continue;

View file

@ -13,7 +13,6 @@
#include <tsl/robin_map.h> #include <tsl/robin_map.h>
#include <tsl/robin_set.h> #include <tsl/robin_set.h>
#include <xbyak_util.h> #include <xbyak_util.h>
#include "dynarmic/backend/x64/exception_handler.h" #include "dynarmic/backend/x64/exception_handler.h"
@ -41,10 +40,10 @@ using A64FullVectorWidth = std::integral_constant<size_t, 128>;
// Array alias that always sizes itself according to the given type T // Array alias that always sizes itself according to the given type T
// relative to the size of a vector register. e.g. T = u32 would result // relative to the size of a vector register. e.g. T = u32 would result
// in a std::array<u32, 4>. // in a std::array<u32, 4>.
template <typename T> template<typename T>
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>; using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
template <typename T> template<typename T>
using HalfVectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>() / 2>; using HalfVectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>() / 2>;
struct EmitContext { struct EmitContext {

View file

@ -814,7 +814,7 @@ void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename ShfitFT, typename BMI2FT> template<typename ShfitFT, typename BMI2FT>
static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) { static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0]; auto& operand_arg = args[0];
@ -851,7 +851,7 @@ static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename ShfitFT, typename BMI2FT> template<typename ShfitFT, typename BMI2FT>
static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) { static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0]; auto& operand_arg = args[0];
@ -889,35 +889,43 @@ static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
} }
void EmitX64::EmitLogicalShiftLeftMasked32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitLogicalShiftLeftMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx); EmitMaskedShift32(
code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx);
} }
void EmitX64::EmitLogicalShiftLeftMasked64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitLogicalShiftLeftMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx); EmitMaskedShift64(
code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx);
} }
void EmitX64::EmitLogicalShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitLogicalShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx); EmitMaskedShift32(
code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx);
} }
void EmitX64::EmitLogicalShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitLogicalShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx); EmitMaskedShift64(
code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx);
} }
void EmitX64::EmitArithmeticShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitArithmeticShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx); EmitMaskedShift32(
code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx);
} }
void EmitX64::EmitArithmeticShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitArithmeticShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx); EmitMaskedShift64(
code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx);
} }
void EmitX64::EmitRotateRightMasked32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitRotateRightMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr); EmitMaskedShift32(
code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr);
} }
void EmitX64::EmitRotateRightMasked64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitRotateRightMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr); EmitMaskedShift64(
code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr);
} }
static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) { static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) {

View file

@ -63,7 +63,7 @@ constexpr u64 f64_min_u64 = 0x0000000000000000u; // 0 as a double
constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual maximum unrepresentable) constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual maximum unrepresentable)
#define FCODE(NAME) \ #define FCODE(NAME) \
[&code](auto... args){ \ [&code](auto... args) { \
if constexpr (fsize == 32) { \ if constexpr (fsize == 32) { \
code.NAME##s(args...); \ code.NAME##s(args...); \
} else { \ } else { \
@ -71,7 +71,7 @@ constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual
} \ } \
} }
#define ICODE(NAME) \ #define ICODE(NAME) \
[&code](auto... args){ \ [&code](auto... args) { \
if constexpr (fsize == 32) { \ if constexpr (fsize == 32) { \
code.NAME##d(args...); \ code.NAME##d(args...); \
} else { \ } else { \
@ -248,7 +248,7 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
} }
template <size_t fsize, typename Function> template<size_t fsize, typename Function>
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -276,7 +276,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <size_t fsize, typename Function> template<size_t fsize, typename Function>
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;
@ -793,7 +793,7 @@ void EmitX64::EmitFPRecipEstimate64(EmitContext& ctx, IR::Inst* inst) {
EmitFPRecipEstimate<64>(code, ctx, inst); EmitFPRecipEstimate<64>(code, ctx, inst);
} }
template <size_t fsize> template<size_t fsize>
static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;
@ -930,8 +930,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>, mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>, mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsZero>, mp::lift_value<FP::RoundingMode::TowardsZero>,
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero> mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
>;
using exact_list = mp::list<std::true_type, std::false_type>; using exact_list = mp::list<std::true_type, std::false_type>;
static const auto lut = Common::GenerateLookupTableFromList( static const auto lut = Common::GenerateLookupTableFromList(
@ -947,12 +946,9 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
using InputSize = mp::unsigned_integer_of_size<fsize>; using InputSize = mp::unsigned_integer_of_size<fsize>;
return FP::FPRoundInt<InputSize>(static_cast<InputSize>(input), fpcr, rounding_mode, exact, fpsr); return FP::FPRoundInt<InputSize>(static_cast<InputSize>(input), fpcr, rounding_mode, exact, fpsr);
} })};
)
};
}, },
mp::cartesian_product<fsize_list, rounding_list, exact_list>{} mp::cartesian_product<fsize_list, rounding_list, exact_list>{});
);
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
@ -1467,7 +1463,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if constexpr (fsize != 16) { if constexpr (fsize != 16) {
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
if (code.HasHostFeature(HostFeature::SSE41) && round_imm){ if (code.HasHostFeature(HostFeature::SSE41) && round_imm) {
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
@ -1546,8 +1542,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>, mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>, mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsZero>, mp::lift_value<FP::RoundingMode::TowardsZero>,
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero> mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
>;
static const auto lut = Common::GenerateLookupTableFromList( static const auto lut = Common::GenerateLookupTableFromList(
[](auto args) { [](auto args) {
@ -1561,12 +1556,9 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;
return FP::FPToFixed<FPT>(isize, static_cast<FPT>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr); return FP::FPToFixed<FPT>(isize, static_cast<FPT>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
} })};
)
};
}, },
mp::cartesian_product<fbits_list, rounding_list>{} mp::cartesian_product<fbits_list, rounding_list>{});
);
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
@ -1718,7 +1710,7 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
const size_t fbits = args[1].GetImmediateU8(); const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const auto op = [&]{ const auto op = [&] {
if (code.HasHostFeature(HostFeature::AVX512F)) { if (code.HasHostFeature(HostFeature::AVX512F)) {
const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]);
code.vcvtusi2ss(result, result, from.cvt32()); code.vcvtusi2ss(result, result, from.cvt32());

View file

@ -25,7 +25,7 @@ namespace Dynarmic::Backend::X64 {
using namespace Xbyak::util; using namespace Xbyak::util;
template <typename Function> template<typename Function>
static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -37,7 +37,7 @@ static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
ctx.reg_alloc.DefineValue(inst, xmm_a); ctx.reg_alloc.DefineValue(inst, xmm_a);
} }
template <typename Function> template<typename Function>
static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -49,7 +49,7 @@ static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst
ctx.reg_alloc.DefineValue(inst, xmm_a); ctx.reg_alloc.DefineValue(inst, xmm_a);
} }
template <typename Lambda> template<typename Lambda>
static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda); const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
constexpr u32 stack_space = 2 * 16; constexpr u32 stack_space = 2 * 16;
@ -72,7 +72,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename Lambda> template<typename Lambda>
static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda); const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
constexpr u32 stack_space = 2 * 16; constexpr u32 stack_space = 2 * 16;
@ -97,7 +97,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename Lambda> template<typename Lambda>
static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda); const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
constexpr u32 stack_space = 3 * 16; constexpr u32 stack_space = 3 * 16;
@ -125,7 +125,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename Lambda> template<typename Lambda>
static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda); const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
constexpr u32 stack_space = 3 * 16; constexpr u32 stack_space = 3 * 16;
@ -513,7 +513,7 @@ void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst)
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template <typename T> template<typename T>
static constexpr T VShift(T x, T y) { static constexpr T VShift(T x, T y) {
const s8 shift_amount = static_cast<s8>(static_cast<u8>(y)); const s8 shift_amount = static_cast<s8>(static_cast<u8>(y));
const s64 bit_size = static_cast<s64>(Common::BitSize<T>()); const s64 bit_size = static_cast<s64>(Common::BitSize<T>());
@ -740,7 +740,7 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, a); ctx.reg_alloc.DefineValue(inst, a);
} }
template <typename T> template<typename T>
static void EmitVectorCountLeadingZeros(VectorArray<T>& result, const VectorArray<T>& data) { static void EmitVectorCountLeadingZeros(VectorArray<T>& result, const VectorArray<T>& data) {
for (size_t i = 0; i < result.size(); i++) { for (size_t i = 0; i < result.size(); i++) {
T element = data[i]; T element = data[i];
@ -1875,7 +1875,7 @@ void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s64>& result, const VectorArray<s64>& a, const VectorArray<s64>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s64>& result, const VectorArray<s64>& a, const VectorArray<s64>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
}); });
} }
@ -1955,7 +1955,7 @@ void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u64>& a, const VectorArray<u64>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u64>& a, const VectorArray<u64>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
}); });
} }
@ -2413,7 +2413,7 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* ins
ctx.reg_alloc.DefineValue(inst, a); ctx.reg_alloc.DefineValue(inst, a);
} }
template <typename T, typename Function> template<typename T, typename Function>
static void PairedOperation(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y, Function fn) { static void PairedOperation(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y, Function fn) {
const size_t range = x.size() / 2; const size_t range = x.size() / 2;
@ -2426,12 +2426,12 @@ static void PairedOperation(VectorArray<T>& result, const VectorArray<T>& x, con
} }
} }
template <typename T> template<typename T>
static void PairedMax(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) { static void PairedMax(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) {
PairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); }); PairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); });
} }
template <typename T> template<typename T>
static void PairedMin(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) { static void PairedMin(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) {
PairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); }); PairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); });
} }
@ -2606,7 +2606,7 @@ void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) {
} }
} }
template <typename D, typename T> template<typename D, typename T>
static D PolynomialMultiply(T lhs, T rhs) { static D PolynomialMultiply(T lhs, T rhs) {
constexpr size_t bit_size = Common::BitSize<T>(); constexpr size_t bit_size = Common::BitSize<T>();
const std::bitset<bit_size> operand(lhs); const std::bitset<bit_size> operand(lhs);
@ -2930,7 +2930,7 @@ void EmitX64::EmitVectorRoundingHalvingAddU32(EmitContext& ctx, IR::Inst* inst)
EmitVectorRoundingHalvingAddUnsigned(32, ctx, inst, code); EmitVectorRoundingHalvingAddUnsigned(32, ctx, inst, code);
} }
template <typename T, typename U> template<typename T, typename U>
static void RoundingShiftLeft(VectorArray<T>& out, const VectorArray<T>& lhs, const VectorArray<U>& rhs) { static void RoundingShiftLeft(VectorArray<T>& out, const VectorArray<T>& lhs, const VectorArray<U>& rhs) {
using signed_type = std::make_signed_t<T>; using signed_type = std::make_signed_t<T>;
using unsigned_type = std::make_unsigned_t<T>; using unsigned_type = std::make_unsigned_t<T>;
@ -2947,8 +2947,7 @@ static void RoundingShiftLeft(VectorArray<T>& out, const VectorArray<T>& lhs, co
out[i] = static_cast<T>(static_cast<unsigned_type>(lhs[i]) << extended_shift); out[i] = static_cast<T>(static_cast<unsigned_type>(lhs[i]) << extended_shift);
} }
} else { } else {
if ((std::is_unsigned_v<T> && extended_shift < -bit_size) || if ((std::is_unsigned_v<T> && extended_shift < -bit_size) || (std::is_signed_v<T> && extended_shift <= -bit_size)) {
(std::is_signed_v<T> && extended_shift <= -bit_size)) {
out[i] = 0; out[i] = 0;
} else { } else {
const s64 shift_value = -extended_shift - 1; const s64 shift_value = -extended_shift - 1;
@ -3350,7 +3349,6 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo
ctx.reg_alloc.DefineValue(inst, data); ctx.reg_alloc.DefineValue(inst, data);
} }
void EmitX64::EmitVectorSignedSaturatedAbs8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignedSaturatedAbs8(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturatedAbs(8, code, ctx, inst); EmitVectorSignedSaturatedAbs(8, code, ctx, inst);
} }
@ -4024,10 +4022,10 @@ void EmitX64::EmitVectorSignedSaturatedNeg64(EmitContext& ctx, IR::Inst* inst) {
// MSVC requires the capture within the saturate lambda, but it's // MSVC requires the capture within the saturate lambda, but it's
// determined to be unnecessary via clang and GCC. // determined to be unnecessary via clang and GCC.
#ifdef __clang__ #ifdef __clang__
#pragma clang diagnostic push # pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-lambda-capture" # pragma clang diagnostic ignored "-Wunused-lambda-capture"
#endif #endif
template <typename T, typename U = std::make_unsigned_t<T>> template<typename T, typename U = std::make_unsigned_t<T>>
static bool VectorSignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) { static bool VectorSignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
static_assert(std::is_signed_v<T>, "T must be signed."); static_assert(std::is_signed_v<T>, "T must be signed.");
@ -4066,7 +4064,7 @@ static bool VectorSignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArra
return qc_flag; return qc_flag;
} }
#ifdef __clang__ #ifdef __clang__
#pragma clang diagnostic pop # pragma clang diagnostic pop
#endif #endif
void EmitX64::EmitVectorSignedSaturatedShiftLeft8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignedSaturatedShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
@ -4085,7 +4083,7 @@ void EmitX64::EmitVectorSignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* i
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft<s64>); EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft<s64>);
} }
template <typename T, typename U = std::make_unsigned_t<T>> template<typename T, typename U = std::make_unsigned_t<T>>
static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) { static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
static_assert(std::is_signed_v<T>, "T must be signed."); static_assert(std::is_signed_v<T>, "T must be signed.");
@ -4166,7 +4164,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst()); auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); }); const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem) { return !elem.IsVoid(); });
const bool is_defaults_zero = inst->GetArg(0).IsZero(); const bool is_defaults_zero = inst->GetArg(0).IsZero();
// TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B) // TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B)
@ -4318,8 +4316,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
result[i] = table[index][elem]; result[i] = table[index][elem];
} }
} }
} });
);
code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]); code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
@ -4333,7 +4330,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst()); auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); }); const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem) { return !elem.IsVoid(); });
const bool is_defaults_zero = !inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetInst()->GetOpcode() == IR::Opcode::ZeroVector; const bool is_defaults_zero = !inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetInst()->GetOpcode() == IR::Opcode::ZeroVector;
// TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B) // TODO: AVX512VL implementation when available (VPERMB / VPERMI2B / VPERMT2B)
@ -4448,8 +4445,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
result[i] = table[index][elem]; result[i] = table[index][elem];
} }
} }
} });
);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
@ -4732,7 +4728,7 @@ void EmitX64::EmitVectorUnsignedRecipSqrtEstimate(EmitContext& ctx, IR::Inst* in
// Simple generic case for 8, 16, and 32-bit values. 64-bit values // Simple generic case for 8, 16, and 32-bit values. 64-bit values
// will need to be special-cased as we can't simply use a larger integral size. // will need to be special-cased as we can't simply use a larger integral size.
template <typename T, typename U = std::make_unsigned_t<T>> template<typename T, typename U = std::make_unsigned_t<T>>
static bool EmitVectorUnsignedSaturatedAccumulateSigned(VectorArray<U>& result, const VectorArray<T>& lhs, const VectorArray<T>& rhs) { static bool EmitVectorUnsignedSaturatedAccumulateSigned(VectorArray<U>& result, const VectorArray<T>& lhs, const VectorArray<T>& rhs) {
static_assert(std::is_signed_v<T>, "T must be signed."); static_assert(std::is_signed_v<T>, "T must be signed.");
static_assert(Common::BitSize<T>() < 64, "T must be less than 64 bits in size."); static_assert(Common::BitSize<T>() < 64, "T must be less than 64 bits in size.");
@ -4833,7 +4829,7 @@ void EmitX64::EmitVectorUnsignedSaturatedNarrow64(EmitContext& ctx, IR::Inst* in
}); });
} }
template <typename T, typename S = std::make_signed_t<T>> template<typename T, typename S = std::make_signed_t<T>>
static bool VectorUnsignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) { static bool VectorUnsignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
static_assert(std::is_unsigned_v<T>, "T must be an unsigned type."); static_assert(std::is_unsigned_v<T>, "T must be an unsigned type.");

View file

@ -37,7 +37,7 @@ using namespace Xbyak::util;
namespace { namespace {
#define FCODE(NAME) \ #define FCODE(NAME) \
[&code](auto... args){ \ [&code](auto... args) { \
if constexpr (fsize == 32) { \ if constexpr (fsize == 32) { \
code.NAME##s(args...); \ code.NAME##s(args...); \
} else { \ } else { \
@ -45,7 +45,7 @@ namespace {
} \ } \
} }
#define ICODE(NAME) \ #define ICODE(NAME) \
[&code](auto... args){ \ [&code](auto... args) { \
if constexpr (fsize == 32) { \ if constexpr (fsize == 32) { \
code.NAME##d(args...); \ code.NAME##d(args...); \
} else { \ } else { \
@ -71,7 +71,7 @@ struct NaNHandler {
public: public:
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;
using function_type = void(*)(std::array<VectorArray<FPT>, narg>&, FP::FPCR); using function_type = void (*)(std::array<VectorArray<FPT>, narg>&, FP::FPCR);
static function_type GetDefault() { static function_type GetDefault() {
return GetDefaultImpl(std::make_index_sequence<narg - 1>{}); return GetDefaultImpl(std::make_index_sequence<narg - 1>{});
@ -294,13 +294,13 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
if constexpr (std::is_member_function_pointer_v<Function>) { if constexpr (std::is_member_function_pointer_v<Function>) {
result = ctx.reg_alloc.UseScratchXmm(args[0]); result = ctx.reg_alloc.UseScratchXmm(args[0]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
(code.*fn)(result); (code.*fn)(result);
}); });
} else { } else {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
result = ctx.reg_alloc.ScratchXmm(); result = ctx.reg_alloc.ScratchXmm();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
fn(result, xmm_a); fn(result, xmm_a);
}); });
} }
@ -337,7 +337,8 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
} }
enum CheckInputNaN { enum CheckInputNaN {
Yes, No, Yes,
No,
}; };
template<size_t fsize, template<typename> class Indexer, typename Function> template<size_t fsize, template<typename> class Indexer, typename Function>
@ -352,11 +353,11 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
if constexpr (std::is_member_function_pointer_v<Function>) { if constexpr (std::is_member_function_pointer_v<Function>) {
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
(code.*fn)(xmm_a, xmm_b); (code.*fn)(xmm_a, xmm_b);
}); });
} else { } else {
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
fn(xmm_a, xmm_b); fn(xmm_a, xmm_b);
}); });
} }
@ -614,7 +615,7 @@ void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpeqps(a, b); code.cmpeqps(a, b);
}); });
@ -628,7 +629,7 @@ void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpeqpd(a, b); code.cmpeqpd(a, b);
}); });
@ -644,7 +645,7 @@ void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
const bool fpcr_controlled = args[3].GetImmediateU1(); const bool fpcr_controlled = args[3].GetImmediateU1();
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode()); ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.cvtdq2ps(xmm, xmm); code.cvtdq2ps(xmm, xmm);
if (fbits != 0) { if (fbits != 0) {
code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23)); code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23));
@ -662,7 +663,7 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
const bool fpcr_controlled = args[3].GetImmediateU1(); const bool fpcr_controlled = args[3].GetImmediateU1();
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode()); ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
code.vcvtqq2pd(xmm, xmm); code.vcvtqq2pd(xmm, xmm);
} else if (code.HasHostFeature(HostFeature::SSE41)) { } else if (code.HasHostFeature(HostFeature::SSE41)) {
@ -713,7 +714,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
const bool fpcr_controlled = args[3].GetImmediateU1(); const bool fpcr_controlled = args[3].GetImmediateU1();
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode()); ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
code.vcvtudq2ps(xmm, xmm); code.vcvtudq2ps(xmm, xmm);
} else { } else {
@ -763,7 +764,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
const bool fpcr_controlled = args[3].GetImmediateU1(); const bool fpcr_controlled = args[3].GetImmediateU1();
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode()); ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
code.vcvtuqq2pd(xmm, xmm); code.vcvtuqq2pd(xmm, xmm);
} else { } else {
@ -828,7 +829,7 @@ void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpltps(b, a); code.cmpltps(b, a);
}); });
@ -842,7 +843,7 @@ void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpltpd(b, a); code.cmpltpd(b, a);
}); });
@ -856,7 +857,7 @@ void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpleps(b, a); code.cmpleps(b, a);
}); });
@ -870,7 +871,7 @@ void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmplepd(b, a); code.cmplepd(b, a);
}); });
@ -891,7 +892,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask); DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
@ -936,7 +937,8 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
return; return;
} }
EmitThreeOpVectorOperation<fsize, DefaultIndexer>(code, ctx, inst, [&](const Xbyak::Xmm& result, Xbyak::Xmm xmm_b){ EmitThreeOpVectorOperation<fsize, DefaultIndexer>(
code, ctx, inst, [&](const Xbyak::Xmm& result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm mask = xmm0; const Xbyak::Xmm mask = xmm0;
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
@ -978,7 +980,8 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.andnps(mask, eq); code.andnps(mask, eq);
code.orps(result, mask); code.orps(result, mask);
} }
}, CheckInputNaN::Yes); },
CheckInputNaN::Yes);
} }
void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) {
@ -1024,7 +1027,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
FCODE(vfmadd231p)(result, xmm_b, xmm_c); FCODE(vfmadd231p)(result, xmm_b, xmm_c);
}); });
@ -1044,7 +1047,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
Xbyak::Label end, fallback; Xbyak::Label end, fallback;
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.movaps(result, xmm_a); code.movaps(result, xmm_a);
FCODE(vfmadd231p)(result, xmm_b, xmm_c); FCODE(vfmadd231p)(result, xmm_b, xmm_c);
@ -1113,7 +1116,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
FCODE(vcmpunordp)(xmm0, result, operand); FCODE(vcmpunordp)(xmm0, result, operand);
FCODE(vxorp)(twos, result, operand); FCODE(vxorp)(twos, result, operand);
FCODE(mulp)(result, operand); FCODE(mulp)(result, operand);
@ -1151,8 +1154,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
result[elementi] = sign | FP::FPValue<FPT, false, 0, 2>(); result[elementi] = sign | FP::FPValue<FPT, false, 0, 2>();
} }
} }
} });
);
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler); HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
@ -1287,7 +1289,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code)); code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
FCODE(vfnmadd231p)(result, operand1, operand2); FCODE(vfnmadd231p)(result, operand1, operand2);
}); });
@ -1307,7 +1309,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
Xbyak::Label end, fallback; Xbyak::Label end, fallback;
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code)); code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
FCODE(vfnmadd231p)(result, operand1, operand2); FCODE(vfnmadd231p)(result, operand1, operand2);
@ -1386,7 +1388,7 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
} }
}(); }();
EmitTwoOpVectorOperation<fsize, DefaultIndexer, 3>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){ EmitTwoOpVectorOperation<fsize, DefaultIndexer, 3>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a) {
FCODE(roundp)(result, xmm_a, round_imm); FCODE(roundp)(result, xmm_a, round_imm);
}); });
@ -1399,8 +1401,7 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>, mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>, mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsZero>, mp::lift_value<FP::RoundingMode::TowardsZero>,
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero> mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
>;
using exact_list = mp::list<std::true_type, std::false_type>; using exact_list = mp::list<std::true_type, std::false_type>;
static const auto lut = Common::GenerateLookupTableFromList( static const auto lut = Common::GenerateLookupTableFromList(
@ -1416,12 +1417,9 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
for (size_t i = 0; i < output.size(); ++i) { for (size_t i = 0; i < output.size(); ++i) {
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr)); output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
} }
} })};
)
};
}, },
mp::cartesian_product<rounding_list, exact_list>{} mp::cartesian_product<rounding_list, exact_list>{});
);
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact))); EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
} }
@ -1501,7 +1499,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code)); code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
FCODE(vfnmadd231p)(result, operand1, operand2); FCODE(vfnmadd231p)(result, operand1, operand2);
FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code)); FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code));
@ -1523,12 +1521,12 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
Xbyak::Label end, fallback; Xbyak::Label end, fallback;
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code)); code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
FCODE(vfnmadd231p)(result, operand1, operand2); FCODE(vfnmadd231p)(result, operand1, operand2);
// An explanation for this is given in EmitFPRSqrtStepFused. // An explanation for this is given in EmitFPRSqrtStepFused.
code.vmovaps(mask, GetVectorOf<fsize, fsize == 32 ? 0x7f000000 : 0x7fe0000000000000>(code)); code.vmovaps(mask, GetVectorOf<fsize, (fsize == 32 ? 0x7f000000 : 0x7fe0000000000000)>(code));
FCODE(vandp)(tmp, result, mask); FCODE(vandp)(tmp, result, mask);
ICODE(vpcmpeq)(tmp, tmp, mask); ICODE(vpcmpeq)(tmp, tmp, mask);
code.ptest(tmp, tmp); code.ptest(tmp, tmp);
@ -1620,9 +1618,8 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
const int round_imm = [&] {
const int round_imm = [&]{
switch (rounding) { switch (rounding) {
case FP::RoundingMode::ToNearest_TieEven: case FP::RoundingMode::ToNearest_TieEven:
default: default:
@ -1702,7 +1699,6 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
perform_conversion(src); perform_conversion(src);
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code)); FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
} }
}); });
ctx.reg_alloc.DefineValue(inst, src); ctx.reg_alloc.DefineValue(inst, src);
@ -1716,8 +1712,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>, mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>, mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsZero>, mp::lift_value<FP::RoundingMode::TowardsZero>,
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero> mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
>;
static const auto lut = Common::GenerateLookupTableFromList( static const auto lut = Common::GenerateLookupTableFromList(
[](auto arg) { [](auto arg) {
@ -1732,12 +1727,9 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
for (size_t i = 0; i < output.size(); ++i) { for (size_t i = 0; i < output.size(); ++i) {
output[i] = static_cast<FPT>(FP::FPToFixed<FPT>(fsize, input[i], fbits, unsigned_, fpcr, rounding_mode, fpsr)); output[i] = static_cast<FPT>(FP::FPToFixed<FPT>(fsize, input[i], fbits, unsigned_, fpcr, rounding_mode, fpsr));
} }
} })};
)
};
}, },
mp::cartesian_product<fbits_list, rounding_list>{} mp::cartesian_product<fbits_list, rounding_list>{});
);
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding))); EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding)));
} }

View file

@ -3,8 +3,6 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/exception_handler.h"
#include <mach/mach.h> #include <mach/mach.h>
#include <mach/message.h> #include <mach/message.h>
@ -18,6 +16,7 @@
#include <fmt/format.h> #include <fmt/format.h>
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/exception_handler.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/cast_util.h" #include "dynarmic/common/cast_util.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
@ -64,7 +63,7 @@ private:
}; };
MachHandler::MachHandler() { MachHandler::MachHandler() {
#define KCHECK(x) ASSERT_MSG((x) == KERN_SUCCESS, "dynarmic: macOS MachHandler: init failure at {}", #x) #define KCHECK(x) ASSERT_MSG((x) == KERN_SUCCESS, "dynarmic: macOS MachHandler: init failure at {}", #x)
KCHECK(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &server_port)); KCHECK(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &server_port));
KCHECK(mach_port_insert_right(mach_task_self(), server_port, server_port, MACH_MSG_TYPE_MAKE_SEND)); KCHECK(mach_port_insert_right(mach_task_self(), server_port, server_port, MACH_MSG_TYPE_MAKE_SEND));
@ -74,7 +73,7 @@ MachHandler::MachHandler() {
mach_port_t prev; mach_port_t prev;
KCHECK(mach_port_request_notification(mach_task_self(), server_port, MACH_NOTIFY_PORT_DESTROYED, 0, server_port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &prev)); KCHECK(mach_port_request_notification(mach_task_self(), server_port, MACH_NOTIFY_PORT_DESTROYED, 0, server_port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &prev));
#undef KCHECK #undef KCHECK
thread = std::thread(&MachHandler::MessagePump, this); thread = std::thread(&MachHandler::MessagePump, this);
} }
@ -102,7 +101,7 @@ void MachHandler::MessagePump() {
} }
mr = mach_msg(&reply.head, MACH_SEND_MSG, reply.head.msgh_size, 0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); mr = mach_msg(&reply.head, MACH_SEND_MSG, reply.head.msgh_size, 0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
if (mr != MACH_MSG_SUCCESS){ if (mr != MACH_MSG_SUCCESS) {
fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to send mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr)); fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to send mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr));
return; return;
} }
@ -167,8 +166,7 @@ mig_external kern_return_t catch_mach_exception_raise_state(
const thread_state_t old_state, const thread_state_t old_state,
mach_msg_type_number_t old_stateCnt, mach_msg_type_number_t old_stateCnt,
thread_state_t new_state, thread_state_t new_state,
mach_msg_type_number_t* new_stateCnt mach_msg_type_number_t* new_stateCnt) {
) {
if (!flavor || !new_stateCnt) { if (!flavor || !new_stateCnt) {
fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Invalid arguments.\n"); fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Invalid arguments.\n");
return KERN_INVALID_ARGUMENT; return KERN_INVALID_ARGUMENT;
@ -192,8 +190,7 @@ mig_external kern_return_t catch_mach_exception_raise_state(
struct ExceptionHandler::Impl final { struct ExceptionHandler::Impl final {
Impl(BlockOfCode& code) Impl(BlockOfCode& code)
: code_begin(Common::BitCast<u64>(code.getCode())) : code_begin(Common::BitCast<u64>(code.getCode()))
, code_end(code_begin + code.GetTotalCodeSize()) , code_end(code_begin + code.GetTotalCodeSize()) {}
{}
void SetCallback(std::function<FakeCall(u64)> cb) { void SetCallback(std::function<FakeCall(u64)> cb) {
CodeBlockInfo cbi; CodeBlockInfo cbi;

View file

@ -5,19 +5,20 @@
#include "dynarmic/backend/x64/exception_handler.h" #include "dynarmic/backend/x64/exception_handler.h"
#ifdef __APPLE__
# include <signal.h>
# include <sys/ucontext.h>
#else
# include <signal.h>
# include <ucontext.h>
#endif
#include <cstring> #include <cstring>
#include <functional> #include <functional>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include <vector> #include <vector>
#include <signal.h>
#ifdef __APPLE__
#include <sys/ucontext.h>
#else
#include <ucontext.h>
#endif
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/cast_util.h" #include "dynarmic/common/cast_util.h"
@ -121,16 +122,16 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
ASSERT(sig == SIGSEGV || sig == SIGBUS); ASSERT(sig == SIGSEGV || sig == SIGBUS);
#if defined(__APPLE__) #if defined(__APPLE__)
#define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rip) # define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rip)
#define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rsp) # define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rsp)
#elif defined(__linux__) #elif defined(__linux__)
#define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RIP]) # define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RIP])
#define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RSP]) # define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RSP])
#elif defined(__FreeBSD__) #elif defined(__FreeBSD__)
#define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.mc_rip) # define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.mc_rip)
#define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.mc_rsp) # define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.mc_rsp)
#else #else
#error "Unknown platform" # error "Unknown platform"
#endif #endif
{ {
@ -170,8 +171,7 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
struct ExceptionHandler::Impl final { struct ExceptionHandler::Impl final {
Impl(BlockOfCode& code) Impl(BlockOfCode& code)
: code_begin(Common::BitCast<u64>(code.getCode())) : code_begin(Common::BitCast<u64>(code.getCode()))
, code_end(code_begin + code.GetTotalCodeSize()) , code_end(code_begin + code.GetTotalCodeSize()) {}
{}
void SetCallback(std::function<FakeCall(u64)> cb) { void SetCallback(std::function<FakeCall(u64)> cb) {
CodeBlockInfo cbi; CodeBlockInfo cbi;

View file

@ -3,12 +3,12 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include <cstring>
#include <vector>
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
#include <cstring>
#include <vector>
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/exception_handler.h" #include "dynarmic/backend/x64/exception_handler.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
@ -187,14 +187,13 @@ struct ExceptionHandler::Impl final {
code.mov(code.ABI_PARAM1, Common::BitCast<u64>(&cb)); code.mov(code.ABI_PARAM1, Common::BitCast<u64>(&cb));
code.mov(code.ABI_PARAM2, code.ABI_PARAM3); code.mov(code.ABI_PARAM2, code.ABI_PARAM3);
code.CallLambda( code.CallLambda(
[](const std::function<FakeCall(u64)>& cb_, PCONTEXT ctx){ [](const std::function<FakeCall(u64)>& cb_, PCONTEXT ctx) {
FakeCall fc = cb_(ctx->Rip); FakeCall fc = cb_(ctx->Rip);
ctx->Rsp -= sizeof(u64); ctx->Rsp -= sizeof(u64);
*Common::BitCast<u64*>(ctx->Rsp) = fc.ret_rip; *Common::BitCast<u64*>(ctx->Rsp) = fc.ret_rip;
ctx->Rip = fc.call_rip; ctx->Rip = fc.call_rip;
} });
);
code.add(code.rsp, 8); code.add(code.rsp, 8);
code.mov(code.eax, static_cast<u32>(ExceptionContinueExecution)); code.mov(code.eax, static_cast<u32>(ExceptionContinueExecution));
code.ret(); code.ret();

View file

@ -3,15 +3,16 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/interface/exclusive_monitor.h"
#include <algorithm> #include <algorithm>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/interface/exclusive_monitor.h"
namespace Dynarmic { namespace Dynarmic {
ExclusiveMonitor::ExclusiveMonitor(size_t processor_count) : ExclusiveMonitor::ExclusiveMonitor(size_t processor_count)
exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) { : exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {
Unlock(); Unlock();
} }

View file

@ -61,4 +61,4 @@ constexpr HostFeature operator&=(HostFeature& result, HostFeature f) {
return result = (result & f); return result = (result & f);
} }
} } // namespace Dynarmic::Backend::X64

View file

@ -3,10 +3,11 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/hostloc.h"
#include <xbyak.h> #include <xbyak.h>
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/hostloc.h"
#include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/stack_layout.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {

View file

@ -13,10 +13,44 @@ namespace Dynarmic::Backend::X64 {
enum class HostLoc { enum class HostLoc {
// Ordering of the registers is intentional. See also: HostLocToX64. // Ordering of the registers is intentional. See also: HostLocToX64.
RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, RAX,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, RCX,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, RDX,
CF, PF, AF, ZF, SF, OF, RBX,
RSP,
RBP,
RSI,
RDI,
R8,
R9,
R10,
R11,
R12,
R13,
R14,
R15,
XMM0,
XMM1,
XMM2,
XMM3,
XMM4,
XMM5,
XMM6,
XMM7,
XMM8,
XMM9,
XMM10,
XMM11,
XMM12,
XMM13,
XMM14,
XMM15,
CF,
PF,
AF,
ZF,
SF,
OF,
FirstSpill, FirstSpill,
}; };

View file

@ -10,7 +10,7 @@
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
struct JitStateInfo { struct JitStateInfo {
template <typename JitStateType> template<typename JitStateType>
JitStateInfo(const JitStateType&) JitStateInfo(const JitStateType&)
: offsetof_guest_MXCSR(offsetof(JitStateType, guest_MXCSR)) : offsetof_guest_MXCSR(offsetof(JitStateType, guest_MXCSR))
, offsetof_asimd_MXCSR(offsetof(JitStateType, asimd_MXCSR)) , offsetof_asimd_MXCSR(offsetof(JitStateType, asimd_MXCSR))
@ -20,8 +20,7 @@ struct JitStateInfo {
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs)) , offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv)) , offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc)) , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) {}
{}
const size_t offsetof_guest_MXCSR; const size_t offsetof_guest_MXCSR;
const size_t offsetof_asimd_MXCSR; const size_t offsetof_asimd_MXCSR;

View file

@ -5,8 +5,8 @@
#pragma once #pragma once
#include "dynarmic/common/common_types.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/common/common_types.h"
namespace Dynarmic::Backend::X64::NZCV { namespace Dynarmic::Backend::X64::NZCV {

View file

@ -12,9 +12,12 @@
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
struct OpArg { struct OpArg {
OpArg() : type(Type::Operand), inner_operand() {} OpArg()
/* implicit */ OpArg(const Xbyak::Address& address) : type(Type::Address), inner_address(address) {} : type(Type::Operand), inner_operand() {}
/* implicit */ OpArg(const Xbyak::Reg& reg) : type(Type::Reg), inner_reg(reg) {} /* implicit */ OpArg(const Xbyak::Address& address)
: type(Type::Address), inner_address(address) {}
/* implicit */ OpArg(const Xbyak::Reg& reg)
: type(Type::Reg), inner_reg(reg) {}
Xbyak::Operand& operator*() { Xbyak::Operand& operator*() {
switch (type) { switch (type) {

View file

@ -3,22 +3,22 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/perf_map.h"
#include <cstddef> #include <cstddef>
#include <string> #include <string>
#include "dynarmic/backend/x64/perf_map.h"
#ifdef __linux__ #ifdef __linux__
#include <cstdio> # include <cstdio>
#include <cstdlib> # include <cstdlib>
#include <mutex> # include <mutex>
#include <sys/types.h>
#include <unistd.h>
#include <fmt/format.h> # include <fmt/format.h>
# include <sys/types.h>
# include <unistd.h>
#include "dynarmic/common/common_types.h" # include "dynarmic/common/common_types.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {

View file

@ -3,6 +3,8 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/backend/x64/reg_alloc.h"
#include <algorithm> #include <algorithm>
#include <numeric> #include <numeric>
#include <utility> #include <utility>
@ -11,7 +13,6 @@
#include <xbyak.h> #include <xbyak.h>
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/reg_alloc.h"
#include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/stack_layout.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
@ -228,8 +229,7 @@ RegAlloc::RegAlloc(BlockOfCode& code, std::vector<HostLoc> gpr_order, std::vecto
: gpr_order(gpr_order) : gpr_order(gpr_order)
, xmm_order(xmm_order) , xmm_order(xmm_order)
, hostloc_info(NonSpillHostLocCount + SpillCount) , hostloc_info(NonSpillHostLocCount + SpillCount)
, code(code) , code(code) {}
{}
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) { RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}}; ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
@ -382,13 +382,14 @@ HostLoc RegAlloc::ScratchImpl(const std::vector<HostLoc>& desired_locations) {
return location; return location;
} }
void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_reference> arg0, void RegAlloc::HostCall(IR::Inst* result_def,
std::optional<Argument::copyable_reference> arg0,
std::optional<Argument::copyable_reference> arg1, std::optional<Argument::copyable_reference> arg1,
std::optional<Argument::copyable_reference> arg2, std::optional<Argument::copyable_reference> arg2,
std::optional<Argument::copyable_reference> arg3) { std::optional<Argument::copyable_reference> arg3) {
constexpr size_t args_count = 4; constexpr size_t args_count = 4;
constexpr std::array<HostLoc, args_count> args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4 }; constexpr std::array<HostLoc, args_count> args_hostloc = {ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4};
const std::array<std::optional<Argument::copyable_reference>, args_count> args = { arg0, arg1, arg2, arg3 }; const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3};
static const std::vector<HostLoc> other_caller_save = [args_hostloc]() { static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end()); std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());

View file

@ -85,7 +85,8 @@ public:
private: private:
friend class RegAlloc; friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {} explicit Argument(RegAlloc& reg_alloc)
: reg_alloc(reg_alloc) {}
bool allocated = false; bool allocated = false;
RegAlloc& reg_alloc; RegAlloc& reg_alloc;

View file

@ -14,8 +14,8 @@ namespace Dynarmic::Backend::X64 {
constexpr size_t SpillCount = 64; constexpr size_t SpillCount = 64;
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) # pragma warning(push)
#pragma warning(disable:4324) // Structure was padded due to alignment specifier # pragma warning(disable : 4324) // Structure was padded due to alignment specifier
#endif #endif
struct alignas(16) StackLayout { struct alignas(16) StackLayout {
@ -31,7 +31,7 @@ struct alignas(16) StackLayout {
}; };
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) # pragma warning(pop)
#endif #endif
static_assert(sizeof(StackLayout) % 16 == 0); static_assert(sizeof(StackLayout) % 16 == 0);

View file

@ -3,13 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/assert.h"
#include <cstdio> #include <cstdio>
#include <exception> #include <exception>
#include <fmt/format.h> #include <fmt/format.h>
#include "dynarmic/common/assert.h"
namespace Dynarmic::Common { namespace Dynarmic::Common {
[[noreturn]] void Terminate(fmt::string_view msg, fmt::format_args args) { [[noreturn]] void Terminate(fmt::string_view msg, fmt::format_args args) {

View file

@ -15,7 +15,7 @@ namespace Dynarmic::Common {
namespace detail { namespace detail {
template <typename... Ts> template<typename... Ts>
[[noreturn]] void TerminateHelper(fmt::string_view msg, Ts... args) { [[noreturn]] void TerminateHelper(fmt::string_view msg, Ts... args) {
Terminate(msg, fmt::make_format_args(args...)); Terminate(msg, fmt::make_format_args(args...));
} }
@ -25,47 +25,47 @@ template <typename... Ts>
} // namespace Dynarmic::Common } // namespace Dynarmic::Common
#if defined(__clang) || defined(__GNUC__) #if defined(__clang) || defined(__GNUC__)
#define ASSUME(expr) [&]{ if (!(expr)) __builtin_unreachable(); }() # define ASSUME(expr) [&] { if (!(expr)) __builtin_unreachable(); }()
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#define ASSUME(expr) __assume(expr) # define ASSUME(expr) __assume(expr)
#else #else
#define ASSUME(expr) # define ASSUME(expr)
#endif #endif
#ifdef DYNARMIC_IGNORE_ASSERTS #ifdef DYNARMIC_IGNORE_ASSERTS
#if defined(__clang) || defined(__GNUC__) # if defined(__clang) || defined(__GNUC__)
#define UNREACHABLE() __builtin_unreachable() # define UNREACHABLE() __builtin_unreachable()
#elif defined(_MSC_VER) # elif defined(_MSC_VER)
#define UNREACHABLE() __assume(0) # define UNREACHABLE() __assume(0)
#else # else
#define UNREACHABLE() # define UNREACHABLE()
#endif # endif
#define ASSERT(expr) ASSUME(expr) # define ASSERT(expr) ASSUME(expr)
#define ASSERT_MSG(expr, ...) ASSUME(expr) # define ASSERT_MSG(expr, ...) ASSUME(expr)
#define ASSERT_FALSE(...) UNREACHABLE() # define ASSERT_FALSE(...) UNREACHABLE()
#else #else
#define UNREACHABLE() ASSERT_FALSE("Unreachable code!") # define UNREACHABLE() ASSERT_FALSE("Unreachable code!")
#define ASSERT(expr) \ # define ASSERT(expr) \
[&]{ \ [&] { \
if (UNLIKELY(!(expr))) { \ if (UNLIKELY(!(expr))) { \
::Dynarmic::Common::detail::TerminateHelper(#expr); \ ::Dynarmic::Common::detail::TerminateHelper(#expr); \
} \ } \
}() }()
#define ASSERT_MSG(expr, ...) \ # define ASSERT_MSG(expr, ...) \
[&]{ \ [&] { \
if (UNLIKELY(!(expr))) { \ if (UNLIKELY(!(expr))) { \
::Dynarmic::Common::detail::TerminateHelper(#expr "\nMessage: " __VA_ARGS__); \ ::Dynarmic::Common::detail::TerminateHelper(#expr "\nMessage: " __VA_ARGS__); \
} \ } \
}() }()
#define ASSERT_FALSE(...) ::Dynarmic::Common::detail::TerminateHelper("false\nMessage: " __VA_ARGS__) # define ASSERT_FALSE(...) ::Dynarmic::Common::detail::TerminateHelper("false\nMessage: " __VA_ARGS__)
#endif #endif
#if defined(NDEBUG) || defined(DYNARMIC_IGNORE_ASSERTS) #if defined(NDEBUG) || defined(DYNARMIC_IGNORE_ASSERTS)
#define DEBUG_ASSERT(expr) ASSUME(expr) # define DEBUG_ASSERT(expr) ASSUME(expr)
#define DEBUG_ASSERT_MSG(expr, ...) ASSUME(expr) # define DEBUG_ASSERT_MSG(expr, ...) ASSUME(expr)
#else #else
#define DEBUG_ASSERT(expr) ASSERT(expr) # define DEBUG_ASSERT(expr) ASSERT(expr)
#define DEBUG_ASSERT_MSG(expr, ...) ASSERT_MSG(expr, __VA_ARGS__) # define DEBUG_ASSERT_MSG(expr, ...) ASSERT_MSG(expr, __VA_ARGS__)
#endif #endif

View file

@ -21,7 +21,7 @@ constexpr size_t BitSize() {
return sizeof(T) * CHAR_BIT; return sizeof(T) * CHAR_BIT;
} }
template <typename T> template<typename T>
constexpr T Ones(size_t count) { constexpr T Ones(size_t count) {
ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T"); ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
if (count == BitSize<T>()) if (count == BitSize<T>())
@ -72,8 +72,8 @@ constexpr T ModifyBits(const T value, const T new_bits) {
} }
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) # pragma warning(push)
#pragma warning(disable:4554) # pragma warning(disable : 4554)
#endif #endif
/// Extracts a single bit at bit_position from value of type T. /// Extracts a single bit at bit_position from value of type T.
template<typename T> template<typename T>
@ -123,7 +123,7 @@ constexpr T ModifyBit(const T value, bool new_bit) {
return ModifyBit<T>(bit_position, value, new_bit); return ModifyBit<T>(bit_position, value, new_bit);
} }
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) # pragma warning(pop)
#endif #endif
/// Sign-extends a value that has bit_count bits to the full bitwidth of type T. /// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
@ -152,12 +152,12 @@ inline T SignExtend(const size_t bit_count, const T value) {
return value; return value;
} }
template <typename Integral> template<typename Integral>
inline size_t BitCount(Integral value) { inline size_t BitCount(Integral value) {
return std::bitset<BitSize<Integral>()>(value).count(); return std::bitset<BitSize<Integral>()>(value).count();
} }
template <typename T> template<typename T>
constexpr size_t CountLeadingZeros(T value) { constexpr size_t CountLeadingZeros(T value) {
auto x = static_cast<std::make_unsigned_t<T>>(value); auto x = static_cast<std::make_unsigned_t<T>>(value);
size_t result = BitSize<T>(); size_t result = BitSize<T>();
@ -168,7 +168,7 @@ constexpr size_t CountLeadingZeros(T value) {
return result; return result;
} }
template <typename T> template<typename T>
constexpr int HighestSetBit(T value) { constexpr int HighestSetBit(T value) {
auto x = static_cast<std::make_unsigned_t<T>>(value); auto x = static_cast<std::make_unsigned_t<T>>(value);
int result = -1; int result = -1;
@ -179,7 +179,7 @@ constexpr int HighestSetBit(T value) {
return result; return result;
} }
template <typename T> template<typename T>
constexpr size_t LowestSetBit(T value) { constexpr size_t LowestSetBit(T value) {
auto x = static_cast<std::make_unsigned_t<T>>(value); auto x = static_cast<std::make_unsigned_t<T>>(value);
if (x == 0) if (x == 0)
@ -193,12 +193,12 @@ constexpr size_t LowestSetBit(T value) {
return result; return result;
} }
template <typename T> template<typename T>
constexpr bool MostSignificantBit(T value) { constexpr bool MostSignificantBit(T value) {
return Bit<BitSize<T>() - 1, T>(value); return Bit<BitSize<T>() - 1, T>(value);
} }
template <typename T> template<typename T>
inline T Replicate(T value, size_t element_size) { inline T Replicate(T value, size_t element_size) {
ASSERT_MSG(BitSize<T>() % element_size == 0, "bitsize of T not divisible by element_size"); ASSERT_MSG(BitSize<T>() % element_size == 0, "bitsize of T not divisible by element_size");
if (element_size == BitSize<T>()) if (element_size == BitSize<T>())
@ -206,7 +206,7 @@ inline T Replicate(T value, size_t element_size) {
return Replicate(value | (value << element_size), element_size * 2); return Replicate(value | (value << element_size), element_size * 2);
} }
template <typename T> template<typename T>
constexpr T RotateRight(T value, size_t amount) { constexpr T RotateRight(T value, size_t amount) {
amount %= BitSize<T>(); amount %= BitSize<T>();
@ -219,8 +219,8 @@ constexpr T RotateRight(T value, size_t amount) {
} }
constexpr u32 SwapHalves32(u32 value) { constexpr u32 SwapHalves32(u32 value) {
return ((value & 0xFFFF0000U) >> 16) | return ((value & 0xFFFF0000U) >> 16)
((value & 0x0000FFFFU) << 16); | ((value & 0x0000FFFFU) << 16);
} }
constexpr u16 SwapBytes16(u16 value) { constexpr u16 SwapBytes16(u16 value) {
@ -228,21 +228,21 @@ constexpr u16 SwapBytes16(u16 value) {
} }
constexpr u32 SwapBytes32(u32 value) { constexpr u32 SwapBytes32(u32 value) {
return ((value & 0xFF000000U) >> 24) | return ((value & 0xFF000000U) >> 24)
((value & 0x00FF0000U) >> 8) | | ((value & 0x00FF0000U) >> 8)
((value & 0x0000FF00U) << 8) | | ((value & 0x0000FF00U) << 8)
((value & 0x000000FFU) << 24); | ((value & 0x000000FFU) << 24);
} }
constexpr u64 SwapBytes64(u64 value) { constexpr u64 SwapBytes64(u64 value) {
return ((value & 0xFF00000000000000ULL) >> 56) | return ((value & 0xFF00000000000000ULL) >> 56)
((value & 0x00FF000000000000ULL) >> 40) | | ((value & 0x00FF000000000000ULL) >> 40)
((value & 0x0000FF0000000000ULL) >> 24) | | ((value & 0x0000FF0000000000ULL) >> 24)
((value & 0x000000FF00000000ULL) >> 8) | | ((value & 0x000000FF00000000ULL) >> 8)
((value & 0x00000000FF000000ULL) << 8) | | ((value & 0x00000000FF000000ULL) << 8)
((value & 0x0000000000FF0000ULL) << 24) | | ((value & 0x0000000000FF0000ULL) << 24)
((value & 0x000000000000FF00ULL) << 40) | | ((value & 0x000000000000FF00ULL) << 40)
((value & 0x00000000000000FFULL) << 56); | ((value & 0x00000000000000FFULL) << 56);
} }
} // namespace Dynarmic::Common } // namespace Dynarmic::Common

View file

@ -13,7 +13,7 @@
namespace Dynarmic::Common { namespace Dynarmic::Common {
/// Reinterpret objects of one type as another by bit-casting between object representations. /// Reinterpret objects of one type as another by bit-casting between object representations.
template <class Dest, class Source> template<class Dest, class Source>
inline Dest BitCast(const Source& source) noexcept { inline Dest BitCast(const Source& source) noexcept {
static_assert(sizeof(Dest) == sizeof(Source), "size of destination and source objects must be equal"); static_assert(sizeof(Dest) == sizeof(Source), "size of destination and source objects must be equal");
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable."); static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
@ -26,7 +26,7 @@ inline Dest BitCast(const Source& source) noexcept {
/// Reinterpret objects of any arbitrary type as another type by bit-casting between object representations. /// Reinterpret objects of any arbitrary type as another type by bit-casting between object representations.
/// Note that here we do not verify if source has enough bytes to read from. /// Note that here we do not verify if source has enough bytes to read from.
template <class Dest, class SourcePtr> template<class Dest, class SourcePtr>
inline Dest BitCastPointee(const SourcePtr source) noexcept { inline Dest BitCastPointee(const SourcePtr source) noexcept {
static_assert(sizeof(SourcePtr) == sizeof(void*), "source pointer must have size of a pointer"); static_assert(sizeof(SourcePtr) == sizeof(void*), "source pointer must have size of a pointer");
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable."); static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
@ -37,7 +37,7 @@ inline Dest BitCastPointee(const SourcePtr source) noexcept {
} }
/// Cast a lambda into an equivalent function pointer. /// Cast a lambda into an equivalent function pointer.
template <class Function> template<class Function>
inline auto FptrCast(Function f) noexcept { inline auto FptrCast(Function f) noexcept {
return static_cast<mp::equivalent_function_type<Function>*>(f); return static_cast<mp::equivalent_function_type<Function>*>(f);
} }

View file

@ -3,18 +3,19 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/crypto/aes.h"
#include <array> #include <array>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/crypto/aes.h"
namespace Dynarmic::Common::Crypto::AES { namespace Dynarmic::Common::Crypto::AES {
using SubstitutionTable = std::array<u8, 256>; using SubstitutionTable = std::array<u8, 256>;
// See section 5.1.1 Figure 7 in FIPS 197 // See section 5.1.1 Figure 7 in FIPS 197
constexpr SubstitutionTable substitution_box{{ constexpr SubstitutionTable substitution_box{
// 0 1 2 3 4 5 6 7 8 9 A B C D E F {// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
@ -30,12 +31,11 @@ constexpr SubstitutionTable substitution_box{{
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16}};
}};
// See section 5.3.2 Figure 14 in FIPS 197 // See section 5.3.2 Figure 14 in FIPS 197
constexpr SubstitutionTable inverse_substitution_box{{ constexpr SubstitutionTable inverse_substitution_box{
// 0 1 2 3 4 5 6 7 8 9 A B C D E F {// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB, 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
@ -51,8 +51,7 @@ constexpr SubstitutionTable inverse_substitution_box{{
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D}};
}};
// See section 4.2.1 in FIPS 197. // See section 4.2.1 in FIPS 197.
static constexpr u8 xtime(u8 x) { static constexpr u8 xtime(u8 x) {
@ -61,11 +60,11 @@ static constexpr u8 xtime(u8 x) {
// Galois Field multiplication. // Galois Field multiplication.
static constexpr u8 Multiply(u8 x, u8 y) { static constexpr u8 Multiply(u8 x, u8 y) {
return static_cast<u8>(((y & 1) * x) ^ return static_cast<u8>(((y & 1) * x)
((y >> 1 & 1) * xtime(x)) ^ ^ ((y >> 1 & 1) * xtime(x))
((y >> 2 & 1) * xtime(xtime(x))) ^ ^ ((y >> 2 & 1) * xtime(xtime(x)))
((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^ ^ ((y >> 3 & 1) * xtime(xtime(xtime(x))))
((y >> 4 & 1) * xtime(xtime(xtime(xtime(x)))))); ^ ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x))))));
} }
static void ShiftRows(State& out_state, const State& state) { static void ShiftRows(State& out_state, const State& state) {

View file

@ -6,6 +6,7 @@
#pragma once #pragma once
#include <array> #include <array>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
namespace Dynarmic::Common::Crypto::AES { namespace Dynarmic::Common::Crypto::AES {

View file

@ -3,18 +3,19 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/crypto/crc32.h"
#include <array> #include <array>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/crypto/crc32.h"
namespace Dynarmic::Common::Crypto::CRC32 { namespace Dynarmic::Common::Crypto::CRC32 {
using CRC32Table = std::array<u32, 256>; using CRC32Table = std::array<u32, 256>;
// CRC32 algorithm that uses polynomial 0x1EDC6F41 // CRC32 algorithm that uses polynomial 0x1EDC6F41
constexpr CRC32Table castagnoli_table{{ constexpr CRC32Table castagnoli_table{
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, {0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
@ -77,12 +78,11 @@ constexpr CRC32Table castagnoli_table{{
0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351}};
}};
// CRC32 algorithm that uses polynomial 0x04C11DB7 // CRC32 algorithm that uses polynomial 0x04C11DB7
constexpr CRC32Table iso_table{{ constexpr CRC32Table iso_table{
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, {0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
@ -145,8 +145,7 @@ constexpr CRC32Table iso_table{{
0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6,
0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D}};
}};
static u32 ComputeCRC32(const CRC32Table& table, u32 crc, const u64 value, int length) { static u32 ComputeCRC32(const CRC32Table& table, u32 crc, const u64 value, int length) {
const auto* data = reinterpret_cast<const unsigned char*>(&value); const auto* data = reinterpret_cast<const unsigned char*>(&value);

View file

@ -3,17 +3,18 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/crypto/sm4.h"
#include <array> #include <array>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/crypto/sm4.h"
namespace Dynarmic::Common::Crypto::SM4 { namespace Dynarmic::Common::Crypto::SM4 {
using SubstitutionTable = std::array<u8, 256>; using SubstitutionTable = std::array<u8, 256>;
constexpr SubstitutionTable substitution_box{{ constexpr SubstitutionTable substitution_box{
0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, {0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7,
0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05,
0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3,
0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
@ -44,8 +45,7 @@ constexpr SubstitutionTable substitution_box{{
0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E,
0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20,
0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48}};
}};
u8 AccessSubstitutionBox(u8 index) { u8 AccessSubstitutionBox(u8 index) {
return substitution_box[index]; return substitution_box[index];

View file

@ -22,7 +22,8 @@ public:
FPCR() = default; FPCR() = default;
FPCR(const FPCR&) = default; FPCR(const FPCR&) = default;
FPCR(FPCR&&) = default; FPCR(FPCR&&) = default;
explicit FPCR(u32 data) : value{data & mask} {} explicit FPCR(u32 data)
: value{data & mask} {}
FPCR& operator=(const FPCR&) = default; FPCR& operator=(const FPCR&) = default;
FPCR& operator=(FPCR&&) = default; FPCR& operator=(FPCR&&) = default;

View file

@ -18,7 +18,8 @@ public:
FPSR() = default; FPSR() = default;
FPSR(const FPSR&) = default; FPSR(const FPSR&) = default;
FPSR(FPSR&&) = default; FPSR(FPSR&&) = default;
explicit FPSR(u32 data) : value{data & mask} {} explicit FPSR(u32 data)
: value{data & mask} {}
FPSR& operator=(const FPSR&) = default; FPSR& operator=(const FPSR&) = default;
FPSR& operator=(FPSR&&) = default; FPSR& operator=(FPSR&&) = default;

View file

@ -4,6 +4,7 @@
*/ */
#include "dynarmic/common/fp/fused.h" #include "dynarmic/common/fp/fused.h"
#include "dynarmic/common/fp/mantissa_util.h" #include "dynarmic/common/fp/mantissa_util.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
#include "dynarmic/common/u128.h" #include "dynarmic/common/u128.h"
@ -20,7 +21,7 @@ static FPUnpacked ReduceMantissa(bool sign, int exponent, const u128& mantissa)
FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) { FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
const bool product_sign = op1.sign != op2.sign; const bool product_sign = op1.sign != op2.sign;
const auto [product_exponent, product_value] = [op1, op2]{ const auto [product_exponent, product_value] = [op1, op2] {
int exponent = op1.exponent + op2.exponent; int exponent = op1.exponent + op2.exponent;
u128 value = Multiply64To128(op1.mantissa, op2.mantissa); u128 value = Multiply64To128(op1.mantissa, op2.mantissa);
if (value.Bit<product_point_position + 1>()) { if (value.Bit<product_point_position + 1>()) {

View file

@ -8,10 +8,10 @@
#include "dynarmic/common/fp/op/FPCompare.h" #include "dynarmic/common/fp/op/FPCompare.h"
#include "dynarmic/common/fp/op/FPConvert.h" #include "dynarmic/common/fp/op/FPConvert.h"
#include "dynarmic/common/fp/op/FPMulAdd.h" #include "dynarmic/common/fp/op/FPMulAdd.h"
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
#include "dynarmic/common/fp/op/FPRecipEstimate.h" #include "dynarmic/common/fp/op/FPRecipEstimate.h"
#include "dynarmic/common/fp/op/FPRecipExponent.h" #include "dynarmic/common/fp/op/FPRecipExponent.h"
#include "dynarmic/common/fp/op/FPRecipStepFused.h" #include "dynarmic/common/fp/op/FPRecipStepFused.h"
#include "dynarmic/common/fp/op/FPRoundInt.h" #include "dynarmic/common/fp/op/FPRoundInt.h"
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
#include "dynarmic/common/fp/op/FPToFixed.h" #include "dynarmic/common/fp/op/FPToFixed.h"

View file

@ -3,15 +3,16 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPCompare.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/op/FPCompare.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {
template <typename FPT> template<typename FPT>
bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) { bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) {
const auto unpacked1 = FPUnpack(lhs, fpcr, fpsr); const auto unpacked1 = FPUnpack(lhs, fpcr, fpsr);
const auto unpacked2 = FPUnpack(rhs, fpcr, fpsr); const auto unpacked2 = FPUnpack(rhs, fpcr, fpsr);
@ -20,8 +21,7 @@ bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) {
const auto& value1 = std::get<FPUnpacked>(unpacked1); const auto& value1 = std::get<FPUnpacked>(unpacked1);
const auto& value2 = std::get<FPUnpacked>(unpacked2); const auto& value2 = std::get<FPUnpacked>(unpacked2);
if (type1 == FPType::QNaN || type1 == FPType::SNaN || if (type1 == FPType::QNaN || type1 == FPType::SNaN || type2 == FPType::QNaN || type2 == FPType::SNaN) {
type2 == FPType::QNaN || type2 == FPType::SNaN) {
if (type1 == FPType::SNaN || type2 == FPType::SNaN) { if (type1 == FPType::SNaN || type2 == FPType::SNaN) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr); FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
} }

View file

@ -10,7 +10,7 @@ namespace Dynarmic::FP {
class FPCR; class FPCR;
class FPSR; class FPSR;
template <typename FPT> template<typename FPT>
bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr); bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,17 +3,18 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPConvert.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPConvert.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {
namespace { namespace {
template <typename FPT_TO, typename FPT_FROM> template<typename FPT_TO, typename FPT_FROM>
FPT_TO FPConvertNaN(FPT_FROM op) { FPT_TO FPConvertNaN(FPT_FROM op) {
const bool sign = Common::Bit<Common::BitSize<FPT_FROM>() - 1>(op); const bool sign = Common::Bit<Common::BitSize<FPT_FROM>() - 1>(op);
const u64 frac = [op] { const u64 frac = [op] {
@ -40,7 +41,7 @@ FPT_TO FPConvertNaN(FPT_FROM op) {
} }
} // Anonymous namespace } // Anonymous namespace
template <typename FPT_TO, typename FPT_FROM> template<typename FPT_TO, typename FPT_FROM>
FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr) { FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr) {
const auto [type, sign, value] = FPUnpackCV<FPT_FROM>(op, fpcr, fpsr); const auto [type, sign, value] = FPUnpackCV<FPT_FROM>(op, fpcr, fpsr);
const bool is_althp = Common::BitSize<FPT_TO>() == 16 && fpcr.AHP(); const bool is_althp = Common::BitSize<FPT_TO>() == 16 && fpcr.AHP();

View file

@ -11,7 +11,7 @@ class FPCR;
class FPSR; class FPSR;
enum class RoundingMode; enum class RoundingMode;
template <typename FPT_TO, typename FPT_FROM> template<typename FPT_TO, typename FPT_FROM>
FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr); FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPMulAdd.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/fused.h" #include "dynarmic/common/fp/fused.h"
#include "dynarmic/common/fp/op/FPMulAdd.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"

View file

@ -3,11 +3,12 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/fused.h" #include "dynarmic/common/fp/fused.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPNeg.h" #include "dynarmic/common/fp/op/FPNeg.h"
#include "dynarmic/common/fp/op/FPRSqrtStepFused.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"

View file

@ -3,6 +3,8 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPRecipEstimate.h"
#include <tuple> #include <tuple>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
@ -10,7 +12,6 @@
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPRecipEstimate.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
@ -39,7 +40,7 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
} }
if (value.exponent < FPInfo<FPT>::exponent_min - 2) { if (value.exponent < FPInfo<FPT>::exponent_min - 2) {
const bool overflow_to_inf = [&]{ const bool overflow_to_inf = [&] {
switch (fpcr.RMode()) { switch (fpcr.RMode()) {
case RoundingMode::ToNearest_TieEven: case RoundingMode::ToNearest_TieEven:
return true; return true;

View file

@ -3,18 +3,19 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/common_types.h" #include "dynarmic/common/fp/op/FPRecipExponent.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPRecipExponent.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {
namespace { namespace {
template <typename FPT> template<typename FPT>
FPT DetermineExponentValue(size_t value) { FPT DetermineExponentValue(size_t value) {
if constexpr (sizeof(FPT) == sizeof(u32)) { if constexpr (sizeof(FPT) == sizeof(u32)) {
return static_cast<FPT>(Common::Bits<23, 30>(value)); return static_cast<FPT>(Common::Bits<23, 30>(value));
@ -26,7 +27,7 @@ FPT DetermineExponentValue(size_t value) {
} }
} // Anonymous namespace } // Anonymous namespace
template <typename FPT> template<typename FPT>
FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) { FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr); const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
(void)value; (void)value;

View file

@ -10,7 +10,7 @@ namespace Dynarmic::FP {
class FPCR; class FPCR;
class FPSR; class FPSR;
template <typename FPT> template<typename FPT>
FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr); FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPRecipStepFused.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/fused.h" #include "dynarmic/common/fp/fused.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op/FPNeg.h" #include "dynarmic/common/fp/op/FPNeg.h"
#include "dynarmic/common/fp/op/FPRecipStepFused.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"

View file

@ -3,6 +3,8 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPRoundInt.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
@ -10,7 +12,6 @@
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/mantissa_util.h" #include "dynarmic/common/fp/mantissa_util.h"
#include "dynarmic/common/fp/op/FPRoundInt.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/process_nan.h" #include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/rounding_mode.h" #include "dynarmic/common/fp/rounding_mode.h"

View file

@ -3,13 +3,14 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/op/FPToFixed.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/mantissa_util.h" #include "dynarmic/common/fp/mantissa_util.h"
#include "dynarmic/common/fp/op/FPToFixed.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/rounding_mode.h" #include "dynarmic/common/fp/rounding_mode.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"

View file

@ -3,10 +3,11 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/process_exception.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {

View file

@ -3,6 +3,8 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/process_nan.h"
#include <optional> #include <optional>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
@ -11,7 +13,6 @@
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/process_nan.h"
#include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/fp/unpacked.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/fp/unpacked.h"
#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/mantissa_util.h" #include "dynarmic/common/fp/mantissa_util.h"
#include "dynarmic/common/fp/process_exception.h" #include "dynarmic/common/fp/process_exception.h"
#include "dynarmic/common/fp/rounding_mode.h" #include "dynarmic/common/fp/rounding_mode.h"
#include "dynarmic/common/fp/unpacked.h"
#include "dynarmic/common/safe_ops.h" #include "dynarmic/common/safe_ops.h"
namespace Dynarmic::FP { namespace Dynarmic::FP {
@ -143,12 +144,12 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
FPT result = 0; FPT result = 0;
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push) # pragma warning(push)
#pragma warning(disable:4127) // C4127: conditional expression is constant # pragma warning(disable : 4127) // C4127: conditional expression is constant
#endif #endif
if (!isFP16 || !fpcr.AHP()) { if (!isFP16 || !fpcr.AHP()) {
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(pop) # pragma warning(pop)
#endif #endif
constexpr int max_biased_exp = (1 << E) - 1; constexpr int max_biased_exp = (1 << E) - 1;
if (biased_exp >= max_biased_exp) { if (biased_exp >= max_biased_exp) {

View file

@ -14,10 +14,12 @@
namespace Dynarmic::Common { namespace Dynarmic::Common {
template <typename T> class IntrusiveList; template<typename T>
template <typename T> class IntrusiveListIterator; class IntrusiveList;
template<typename T>
class IntrusiveListIterator;
template <typename T> template<typename T>
class IntrusiveListNode { class IntrusiveListNode {
public: public:
bool IsSentinel() const { bool IsSentinel() const {
@ -34,9 +36,8 @@ protected:
friend class IntrusiveListIterator<const T>; friend class IntrusiveListIterator<const T>;
}; };
template <typename T> template<typename T>
class IntrusiveListSentinel final : public IntrusiveListNode<T> class IntrusiveListSentinel final : public IntrusiveListNode<T> {
{
using IntrusiveListNode<T>::next; using IntrusiveListNode<T>::next;
using IntrusiveListNode<T>::prev; using IntrusiveListNode<T>::prev;
using IntrusiveListNode<T>::is_sentinel; using IntrusiveListNode<T>::is_sentinel;
@ -49,7 +50,7 @@ public:
} }
}; };
template <typename T> template<typename T>
class IntrusiveListIterator { class IntrusiveListIterator {
public: public:
using iterator_category = std::bidirectional_iterator_tag; using iterator_category = std::bidirectional_iterator_tag;
@ -71,11 +72,14 @@ public:
IntrusiveListIterator(const IntrusiveListIterator& other) = default; IntrusiveListIterator(const IntrusiveListIterator& other) = default;
IntrusiveListIterator& operator=(const IntrusiveListIterator& other) = default; IntrusiveListIterator& operator=(const IntrusiveListIterator& other) = default;
explicit IntrusiveListIterator(node_pointer list_node) : node(list_node) { explicit IntrusiveListIterator(node_pointer list_node)
: node(list_node) {
} }
explicit IntrusiveListIterator(pointer data) : node(data) { explicit IntrusiveListIterator(pointer data)
: node(data) {
} }
explicit IntrusiveListIterator(reference data) : node(&data) { explicit IntrusiveListIterator(reference data)
: node(&data) {
} }
IntrusiveListIterator& operator++() { IntrusiveListIterator& operator++() {
@ -121,7 +125,7 @@ private:
node_pointer node = nullptr; node_pointer node = nullptr;
}; };
template <typename T> template<typename T>
class IntrusiveList { class IntrusiveList {
public: public:
using difference_type = std::ptrdiff_t; using difference_type = std::ptrdiff_t;
@ -222,10 +226,10 @@ public:
node->prev->next = node->next; node->prev->next = node->next;
node->next->prev = node->prev; node->next->prev = node->prev;
#if !defined(NDEBUG) #if !defined(NDEBUG)
node->next = nullptr; node->next = nullptr;
node->prev = nullptr; node->prev = nullptr;
#endif #endif
return node; return node;
} }
@ -367,7 +371,7 @@ private:
* @param lhs The first list. * @param lhs The first list.
* @param rhs The second list. * @param rhs The second list.
*/ */
template <typename T> template<typename T>
void swap(IntrusiveList<T>& lhs, IntrusiveList<T>& rhs) noexcept { void swap(IntrusiveList<T>& lhs, IntrusiveList<T>& rhs) noexcept {
lhs.swap(rhs); lhs.swap(rhs);
} }

View file

@ -8,8 +8,8 @@
#include <fmt/format.h> #include <fmt/format.h>
#ifdef DYNARMIC_USE_LLVM #ifdef DYNARMIC_USE_LLVM
#include <llvm-c/Disassembler.h> # include <llvm-c/Disassembler.h>
#include <llvm-c/Target.h> # include <llvm-c/Target.h>
#endif #endif
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
@ -79,8 +79,10 @@ std::string DisassembleAArch32([[maybe_unused]] bool is_thumb, [[maybe_unused]]
result += inst_size > 0 ? buffer : "<invalid instruction>"; result += inst_size > 0 ? buffer : "<invalid instruction>";
result += '\n'; result += '\n';
if (inst_size == 0) inst_size = is_thumb ? 2 : 4; if (inst_size == 0)
if (length <= inst_size) break; inst_size = is_thumb ? 2 : 4;
if (length <= inst_size)
break;
pc += inst_size; pc += inst_size;
instructions += inst_size; instructions += inst_size;

View file

@ -14,12 +14,12 @@
#include <mp/typelist/list.h> #include <mp/typelist/list.h>
#ifdef _MSC_VER #ifdef _MSC_VER
#include <mp/typelist/head.h> # include <mp/typelist/head.h>
#endif #endif
namespace Dynarmic::Common { namespace Dynarmic::Common {
template <typename Function, typename ...Values> template<typename Function, typename... Values>
inline auto GenerateLookupTableFromList(Function f, mp::list<Values...>) { inline auto GenerateLookupTableFromList(Function f, mp::list<Values...>) {
#ifdef _MSC_VER #ifdef _MSC_VER
using PairT = std::invoke_result_t<Function, mp::head<mp::list<Values...>>>; using PairT = std::invoke_result_t<Function, mp::head<mp::list<Values...>>>;

View file

@ -6,10 +6,10 @@
#pragma once #pragma once
#define CONCATENATE_TOKENS(x, y) CONCATENATE_TOKENS_IMPL(x, y) #define CONCATENATE_TOKENS(x, y) CONCATENATE_TOKENS_IMPL(x, y)
#define CONCATENATE_TOKENS_IMPL(x, y) x ## y #define CONCATENATE_TOKENS_IMPL(x, y) x##y
#ifdef __COUNTER__ #ifdef __COUNTER__
#define ANONYMOUS_VARIABLE(str) CONCATENATE_TOKENS(str, __COUNTER__) # define ANONYMOUS_VARIABLE(str) CONCATENATE_TOKENS(str, __COUNTER__)
#else #else
#define ANONYMOUS_VARIABLE(str) CONCATENATE_TOKENS(str, __LINE__) # define ANONYMOUS_VARIABLE(str) CONCATENATE_TOKENS(str, __LINE__)
#endif #endif

View file

@ -3,9 +3,10 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include <array>
#include "dynarmic/common/math_util.h" #include "dynarmic/common/math_util.h"
#include <array>
namespace Dynarmic::Common { namespace Dynarmic::Common {
u8 RecipEstimate(u64 a) { u8 RecipEstimate(u64 a) {

View file

@ -3,13 +3,14 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include <cstdlib>
#include "dynarmic/common/memory_pool.h" #include "dynarmic/common/memory_pool.h"
#include <cstdlib>
namespace Dynarmic::Common { namespace Dynarmic::Common {
Pool::Pool(size_t object_size, size_t initial_pool_size) : object_size(object_size), slab_size(initial_pool_size) { Pool::Pool(size_t object_size, size_t initial_pool_size)
: object_size(object_size), slab_size(initial_pool_size) {
AllocateNewSlab(); AllocateNewSlab();
} }

View file

@ -13,10 +13,14 @@
namespace Dynarmic::Safe { namespace Dynarmic::Safe {
template<typename T> T LogicalShiftLeft(T value, int shift_amount); template<typename T>
template<typename T> T LogicalShiftRight(T value, int shift_amount); T LogicalShiftLeft(T value, int shift_amount);
template<typename T> T ArithmeticShiftLeft(T value, int shift_amount); template<typename T>
template<typename T> T ArithmeticShiftRight(T value, int shift_amount); T LogicalShiftRight(T value, int shift_amount);
template<typename T>
T ArithmeticShiftLeft(T value, int shift_amount);
template<typename T>
T ArithmeticShiftRight(T value, int shift_amount);
template<typename T> template<typename T>
T LogicalShiftLeft(T value, int shift_amount) { T LogicalShiftLeft(T value, int shift_amount) {

View file

@ -17,40 +17,46 @@ struct ScopeExitTag {};
struct ScopeFailTag {}; struct ScopeFailTag {};
struct ScopeSuccessTag {}; struct ScopeSuccessTag {};
template <typename Function> template<typename Function>
class ScopeExit final { class ScopeExit final {
public: public:
explicit ScopeExit(Function&& fn) : function(std::move(fn)) {} explicit ScopeExit(Function&& fn)
: function(std::move(fn)) {}
~ScopeExit() noexcept { ~ScopeExit() noexcept {
function(); function();
} }
private: private:
Function function; Function function;
}; };
template <typename Function> template<typename Function>
class ScopeFail final { class ScopeFail final {
public: public:
explicit ScopeFail(Function&& fn) : function(std::move(fn)), exception_count(std::uncaught_exceptions()) {} explicit ScopeFail(Function&& fn)
: function(std::move(fn)), exception_count(std::uncaught_exceptions()) {}
~ScopeFail() noexcept { ~ScopeFail() noexcept {
if (std::uncaught_exceptions() > exception_count) { if (std::uncaught_exceptions() > exception_count) {
function(); function();
} }
} }
private: private:
Function function; Function function;
int exception_count; int exception_count;
}; };
template <typename Function> template<typename Function>
class ScopeSuccess final { class ScopeSuccess final {
public: public:
explicit ScopeSuccess(Function&& fn) : function(std::move(fn)), exception_count(std::uncaught_exceptions()) {} explicit ScopeSuccess(Function&& fn)
: function(std::move(fn)), exception_count(std::uncaught_exceptions()) {}
~ScopeSuccess() { ~ScopeSuccess() {
if (std::uncaught_exceptions() <= exception_count) { if (std::uncaught_exceptions() <= exception_count) {
function(); function();
} }
} }
private: private:
Function function; Function function;
int exception_count; int exception_count;
@ -58,23 +64,23 @@ private:
// We use ->* here as it has the highest precedence of the operators we can use. // We use ->* here as it has the highest precedence of the operators we can use.
template <typename Function> template<typename Function>
auto operator->*(ScopeExitTag, Function&& function) { auto operator->*(ScopeExitTag, Function&& function) {
return ScopeExit<std::decay_t<Function>>{std::forward<Function>(function)}; return ScopeExit<std::decay_t<Function>>{std::forward<Function>(function)};
} }
template <typename Function> template<typename Function>
auto operator->*(ScopeFailTag, Function&& function) { auto operator->*(ScopeFailTag, Function&& function) {
return ScopeFail<std::decay_t<Function>>{std::forward<Function>(function)}; return ScopeFail<std::decay_t<Function>>{std::forward<Function>(function)};
} }
template <typename Function> template<typename Function>
auto operator->*(ScopeSuccessTag, Function&& function) { auto operator->*(ScopeSuccessTag, Function&& function) {
return ScopeSuccess<std::decay_t<Function>>{std::forward<Function>(function)}; return ScopeSuccess<std::decay_t<Function>>{std::forward<Function>(function)};
} }
} // namespace Dynarmic::detail } // namespace Dynarmic::detail
#define SCOPE_EXIT auto ANONYMOUS_VARIABLE(_SCOPE_EXIT_) = ::Dynarmic::detail::ScopeExitTag{} ->* [&]() noexcept #define SCOPE_EXIT auto ANONYMOUS_VARIABLE(_SCOPE_EXIT_) = ::Dynarmic::detail::ScopeExitTag{}->*[&]() noexcept
#define SCOPE_FAIL auto ANONYMOUS_VARIABLE(_SCOPE_FAIL_) = ::Dynarmic::detail::ScopeFailTag{} ->* [&]() noexcept #define SCOPE_FAIL auto ANONYMOUS_VARIABLE(_SCOPE_FAIL_) = ::Dynarmic::detail::ScopeFailTag{}->*[&]() noexcept
#define SCOPE_SUCCESS auto ANONYMOUS_VARIABLE(_SCOPE_FAIL_) = ::Dynarmic::detail::ScopeSuccessTag{} ->* [&]() #define SCOPE_SUCCESS auto ANONYMOUS_VARIABLE(_SCOPE_FAIL_) = ::Dynarmic::detail::ScopeSuccessTag{}->*[&]()

View file

@ -7,7 +7,7 @@
namespace Dynarmic::Common { namespace Dynarmic::Common {
template <typename T> template<typename T>
constexpr char SignToChar(T value) { constexpr char SignToChar(T value) {
return value >= 0 ? '+' : '-'; return value >= 0 ? '+' : '-';
} }

View file

@ -3,9 +3,10 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/common_types.h"
#include "dynarmic/common/u128.h" #include "dynarmic/common/u128.h"
#include "dynarmic/common/common_types.h"
namespace Dynarmic { namespace Dynarmic {
u128 Multiply64To128(u64 a, u64 b) { u128 Multiply64To128(u64 a, u64 b) {

View file

@ -20,10 +20,12 @@ struct u128 {
u128& operator=(const u128&) = default; u128& operator=(const u128&) = default;
u128& operator=(u128&&) = default; u128& operator=(u128&&) = default;
u128(u64 lower_, u64 upper_) : lower(lower_), upper(upper_) {} u128(u64 lower_, u64 upper_)
: lower(lower_), upper(upper_) {}
template<typename T> template<typename T>
/* implicit */ u128(T value) : lower(value), upper(0) { /* implicit */ u128(T value)
: lower(value), upper(0) {
static_assert(std::is_integral_v<T>); static_assert(std::is_integral_v<T>);
static_assert(Common::BitSize<T>() <= Common::BitSize<u64>()); static_assert(Common::BitSize<T>() <= Common::BitSize<u64>());
} }

View file

@ -6,7 +6,7 @@
#pragma once #pragma once
#if defined(__clang__) || defined(__GNUC__) #if defined(__clang__) || defined(__GNUC__)
#define UNLIKELY(x) __builtin_expect(!!(x), 0) # define UNLIKELY(x) __builtin_expect(!!(x), 0)
#else #else
#define UNLIKELY(x) !!(x) # define UNLIKELY(x) !!(x)
#endif #endif

View file

@ -10,11 +10,11 @@
namespace Dynarmic::Common { namespace Dynarmic::Common {
namespace detail { namespace detail {
template <typename ReturnT, typename Lambda> template<typename ReturnT, typename Lambda>
struct VariantVisitor : boost::static_visitor<ReturnT>, Lambda { struct VariantVisitor : boost::static_visitor<ReturnT>
, Lambda {
VariantVisitor(Lambda&& lambda) VariantVisitor(Lambda&& lambda)
: Lambda(std::move(lambda)) : Lambda(std::move(lambda)) {}
{}
using Lambda::operator(); using Lambda::operator();
}; };

View file

@ -16,13 +16,13 @@ namespace Dynarmic::A32 {
/** /**
* Representation of the Floating-Point Status and Control Register. * Representation of the Floating-Point Status and Control Register.
*/ */
class FPSCR final class FPSCR final {
{
public: public:
FPSCR() = default; FPSCR() = default;
FPSCR(const FPSCR&) = default; FPSCR(const FPSCR&) = default;
FPSCR(FPSCR&&) = default; FPSCR(FPSCR&&) = default;
explicit FPSCR(u32 data) : value{data & mask} {} explicit FPSCR(u32 data)
: value{data & mask} {}
FPSCR& operator=(const FPSCR&) = default; FPSCR& operator=(const FPSCR&) = default;
FPSCR& operator=(FPSCR&&) = default; FPSCR& operator=(FPSCR&&) = default;

View file

@ -5,8 +5,8 @@
#pragma once #pragma once
#include "dynarmic/common/common_types.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/ir/cond.h" #include "dynarmic/ir/cond.h"
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
@ -14,7 +14,8 @@ namespace Dynarmic::A32 {
class ITState final { class ITState final {
public: public:
ITState() = default; ITState() = default;
explicit ITState(u8 data) : value(data) {} explicit ITState(u8 data)
: value(data) {}
ITState& operator=(u8 data) { ITState& operator=(u8 data) {
value = data; value = data;

View file

@ -56,7 +56,8 @@ public:
}; };
PSR() = default; PSR() = default;
explicit PSR(u32 data) : value{data & mask} {} explicit PSR(u32 data)
: value{data & mask} {}
PSR& operator=(u32 data) { PSR& operator=(u32 data) {
value = data & mask; value = data & mask;

View file

@ -19,10 +19,10 @@
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
template <typename Visitor> template<typename Visitor>
using ArmMatcher = Decoder::Matcher<Visitor, u32>; using ArmMatcher = Decoder::Matcher<Visitor, u32>;
template <typename V> template<typename V>
std::vector<ArmMatcher<V>> GetArmDecodeTable() { std::vector<ArmMatcher<V>> GetArmDecodeTable() {
std::vector<ArmMatcher<V>> table = { std::vector<ArmMatcher<V>> table = {

View file

@ -18,10 +18,10 @@
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
template <typename Visitor> template<typename Visitor>
using ASIMDMatcher = Decoder::Matcher<Visitor, u32>; using ASIMDMatcher = Decoder::Matcher<Visitor, u32>;
template <typename V> template<typename V>
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() { std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
std::vector<ASIMDMatcher<V>> table = { std::vector<ASIMDMatcher<V>> table = {

View file

@ -16,7 +16,7 @@
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
template <typename Visitor> template<typename Visitor>
using Thumb16Matcher = Decoder::Matcher<Visitor, u16>; using Thumb16Matcher = Decoder::Matcher<Visitor, u16>;
template<typename V> template<typename V>
@ -29,7 +29,7 @@ std::optional<std::reference_wrapper<const Thumb16Matcher<V>>> DecodeThumb16(u16
}; };
const auto matches_instruction = [instruction](const auto& matcher){ return matcher.Matches(instruction); }; const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
auto iter = std::find_if(table.begin(), table.end(), matches_instruction); auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb16Matcher<V>>>(*iter) : std::nullopt; return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb16Matcher<V>>>(*iter) : std::nullopt;

View file

@ -15,7 +15,7 @@
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
template <typename Visitor> template<typename Visitor>
using Thumb32Matcher = Decoder::Matcher<Visitor, u32>; using Thumb32Matcher = Decoder::Matcher<Visitor, u32>;
template<typename V> template<typename V>
@ -28,7 +28,7 @@ std::optional<std::reference_wrapper<const Thumb32Matcher<V>>> DecodeThumb32(u32
}; };
const auto matches_instruction = [instruction](const auto& matcher){ return matcher.Matches(instruction); }; const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
auto iter = std::find_if(table.begin(), table.end(), matches_instruction); auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb32Matcher<V>>>(*iter) : std::nullopt; return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb32Matcher<V>>>(*iter) : std::nullopt;

View file

@ -10,14 +10,13 @@
#include <optional> #include <optional>
#include <vector> #include <vector>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/decoder/decoder_detail.h" #include "dynarmic/frontend/decoder/decoder_detail.h"
#include "dynarmic/frontend/decoder/matcher.h" #include "dynarmic/frontend/decoder/matcher.h"
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
template <typename Visitor> template<typename Visitor>
using VFPMatcher = Decoder::Matcher<Visitor, u32>; using VFPMatcher = Decoder::Matcher<Visitor, u32>;
template<typename V> template<typename V>
@ -27,7 +26,7 @@ std::optional<std::reference_wrapper<const VFPMatcher<V>>> DecodeVFP(u32 instruc
static const struct Tables { static const struct Tables {
Table unconditional; Table unconditional;
Table conditional; Table conditional;
} tables = []{ } tables = [] {
Table list = { Table list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
@ -49,7 +48,7 @@ std::optional<std::reference_wrapper<const VFPMatcher<V>>> DecodeVFP(u32 instruc
const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000; const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000;
const Table& table = is_unconditional ? tables.unconditional : tables.conditional; const Table& table = is_unconditional ? tables.unconditional : tables.conditional;
const auto matches_instruction = [instruction](const auto& matcher){ return matcher.Matches(instruction); }; const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
auto iter = std::find_if(table.begin(), table.end(), matches_instruction); auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
return iter != table.end() ? std::optional<std::reference_wrapper<const VFPMatcher<V>>>(*iter) : std::nullopt; return iter != table.end() ? std::optional<std::reference_wrapper<const VFPMatcher<V>>>(*iter) : std::nullopt;

View file

@ -11,11 +11,11 @@
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/common/string_util.h" #include "dynarmic/common/string_util.h"
#include "dynarmic/frontend/imm.h"
#include "dynarmic/frontend/A32/decoder/arm.h" #include "dynarmic/frontend/A32/decoder/arm.h"
#include "dynarmic/frontend/A32/decoder/vfp.h" #include "dynarmic/frontend/A32/decoder/vfp.h"
#include "dynarmic/frontend/A32/disassembler/disassembler.h" #include "dynarmic/frontend/A32/disassembler/disassembler.h"
#include "dynarmic/frontend/A32/types.h" #include "dynarmic/frontend/A32/types.h"
#include "dynarmic/frontend/imm.h"
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
@ -24,22 +24,26 @@ public:
using instruction_return_type = std::string; using instruction_return_type = std::string;
static u32 ArmExpandImm(int rotate, Imm<8> imm8) { static u32 ArmExpandImm(int rotate, Imm<8> imm8) {
return Common::RotateRight(static_cast<u32>(imm8.ZeroExtend()), rotate*2); return Common::RotateRight(static_cast<u32>(imm8.ZeroExtend()), rotate * 2);
} }
static std::string ShiftStr(ShiftType shift, Imm<5> imm5) { static std::string ShiftStr(ShiftType shift, Imm<5> imm5) {
switch (shift) { switch (shift) {
case ShiftType::LSL: case ShiftType::LSL:
if (imm5 == 0) return ""; if (imm5 == 0)
return "";
return fmt::format(", lsl #{}", imm5.ZeroExtend()); return fmt::format(", lsl #{}", imm5.ZeroExtend());
case ShiftType::LSR: case ShiftType::LSR:
if (imm5 == 0) return ", lsr #32"; if (imm5 == 0)
return ", lsr #32";
return fmt::format(", lsr #{}", imm5.ZeroExtend()); return fmt::format(", lsr #{}", imm5.ZeroExtend());
case ShiftType::ASR: case ShiftType::ASR:
if (imm5 == 0) return ", asr #32"; if (imm5 == 0)
return ", asr #32";
return fmt::format(", asr #{}", imm5.ZeroExtend()); return fmt::format(", asr #{}", imm5.ZeroExtend());
case ShiftType::ROR: case ShiftType::ROR:
if (imm5 == 0) return ", rrx"; if (imm5 == 0)
return ", rrx";
return fmt::format(", ror #{}", imm5.ZeroExtend()); return fmt::format(", ror #{}", imm5.ZeroExtend());
} }
ASSERT(false); ASSERT(false);
@ -47,7 +51,7 @@ public:
} }
static std::string RsrStr(Reg s, ShiftType shift, Reg m) { static std::string RsrStr(Reg s, ShiftType shift, Reg m) {
switch (shift){ switch (shift) {
case ShiftType::LSL: case ShiftType::LSL:
return fmt::format("{}, lsl {}", m, s); return fmt::format("{}, lsl {}", m, s);
case ShiftType::LSR: case ShiftType::LSR:
@ -234,14 +238,20 @@ public:
// CRC32 instructions // CRC32 instructions
std::string arm_CRC32([[maybe_unused]] Cond cond, Imm<2> sz, Reg n, Reg d, Reg m) { std::string arm_CRC32([[maybe_unused]] Cond cond, Imm<2> sz, Reg n, Reg d, Reg m) {
static constexpr std::array data_type{ static constexpr std::array data_type{
"b", "h", "w", "invalid", "b",
"h",
"w",
"invalid",
}; };
return fmt::format("crc32{} {}, {}, {}", data_type[sz.ZeroExtend()], d, n, m); return fmt::format("crc32{} {}, {}, {}", data_type[sz.ZeroExtend()], d, n, m);
} }
std::string arm_CRC32C([[maybe_unused]] Cond cond, Imm<2> sz, Reg n, Reg d, Reg m) { std::string arm_CRC32C([[maybe_unused]] Cond cond, Imm<2> sz, Reg n, Reg d, Reg m) {
static constexpr std::array data_type{ static constexpr std::array data_type{
"b", "h", "w", "invalid", "b",
"h",
"w",
"invalid",
}; };
return fmt::format("crc32c{} {}, {}, {}", data_type[sz.ZeroExtend()], d, n, m); return fmt::format("crc32c{} {}, {}, {}", data_type[sz.ZeroExtend()], d, n, m);
@ -548,11 +558,11 @@ public:
if (P) { if (P) {
return fmt::format("ldrd{} {}, {}, [{}, #{}{}]{}", return fmt::format("ldrd{} {}, {}, [{}, #{}{}]{}",
CondToString(cond), t, t+1, n, sign, imm32, CondToString(cond), t, t + 1, n, sign, imm32,
W ? "!" : ""); W ? "!" : "");
} else { } else {
return fmt::format("ldrd{} {}, {}, [{}], #{}{}{}", return fmt::format("ldrd{} {}, {}, [{}], #{}{}{}",
CondToString(cond), t, t+1, n, sign, imm32, CondToString(cond), t, t + 1, n, sign, imm32,
W ? " (err: W == 1!!!)" : ""); W ? " (err: W == 1!!!)" : "");
} }
} }
@ -561,11 +571,11 @@ public:
if (P) { if (P) {
return fmt::format("ldrd{} {}, {}, [{}, {}{}]{}", return fmt::format("ldrd{} {}, {}, [{}, {}{}]{}",
CondToString(cond), t, t+1, n, sign, m, CondToString(cond), t, t + 1, n, sign, m,
W ? "!" : ""); W ? "!" : "");
} else { } else {
return fmt::format("ldrd{} {}, {}, [{}], {}{}{}", return fmt::format("ldrd{} {}, {}, [{}], {}{}{}",
CondToString(cond), t, t+1, n, sign, m, CondToString(cond), t, t + 1, n, sign, m,
W ? " (err: W == 1!!!)" : ""); W ? " (err: W == 1!!!)" : "");
} }
} }
@ -728,11 +738,11 @@ public:
if (P) { if (P) {
return fmt::format("strd{} {}, {}, [{}, #{}{}]{}", return fmt::format("strd{} {}, {}, [{}, #{}{}]{}",
CondToString(cond), t, t+1, n, sign, imm32, CondToString(cond), t, t + 1, n, sign, imm32,
W ? "!" : ""); W ? "!" : "");
} else { } else {
return fmt::format("strd{} {}, {}, [{}], #{}{}{}", return fmt::format("strd{} {}, {}, [{}], #{}{}{}",
CondToString(cond), t, t+1, n, sign, imm32, CondToString(cond), t, t + 1, n, sign, imm32,
W ? " (err: W == 1!!!)" : ""); W ? " (err: W == 1!!!)" : "");
} }
} }
@ -741,11 +751,11 @@ public:
if (P) { if (P) {
return fmt::format("strd{} {}, {}, [{}, {}{}]{}", return fmt::format("strd{} {}, {}, [{}, {}{}]{}",
CondToString(cond), t, t+1, n, sign, m, CondToString(cond), t, t + 1, n, sign, m,
W ? "!" : ""); W ? "!" : "");
} else { } else {
return fmt::format("strd{} {}, {}, [{}], {}{}{}", return fmt::format("strd{} {}, {}, [{}], {}{}{}",
CondToString(cond), t, t+1, n, sign, m, CondToString(cond), t, t + 1, n, sign, m,
W ? " (err: W == 1!!!)" : ""); W ? " (err: W == 1!!!)" : "");
} }
} }
@ -1143,7 +1153,7 @@ public:
return fmt::format("ldaexb{} {}, [{}]", CondToString(cond), t, n); return fmt::format("ldaexb{} {}, [{}]", CondToString(cond), t, n);
} }
std::string arm_LDAEXD(Cond cond, Reg n, Reg t) { std::string arm_LDAEXD(Cond cond, Reg n, Reg t) {
return fmt::format("ldaexd{} {}, {}, [{}]", CondToString(cond), t, t+1, n); return fmt::format("ldaexd{} {}, {}, [{}]", CondToString(cond), t, t + 1, n);
} }
std::string arm_LDAEXH(Cond cond, Reg n, Reg t) { std::string arm_LDAEXH(Cond cond, Reg n, Reg t) {
return fmt::format("ldaexh{} {}, [{}]", CondToString(cond), t, n); return fmt::format("ldaexh{} {}, [{}]", CondToString(cond), t, n);
@ -1164,7 +1174,7 @@ public:
return fmt::format("stlexb{} {}, {}, [{}]", CondToString(cond), d, m, n); return fmt::format("stlexb{} {}, {}, [{}]", CondToString(cond), d, m, n);
} }
std::string arm_STLEXD(Cond cond, Reg n, Reg d, Reg m) { std::string arm_STLEXD(Cond cond, Reg n, Reg d, Reg m) {
return fmt::format("stlexd{} {}, {}, {}, [{}]", CondToString(cond), d, m, m+1, n); return fmt::format("stlexd{} {}, {}, {}, [{}]", CondToString(cond), d, m, m + 1, n);
} }
std::string arm_STLEXH(Cond cond, Reg n, Reg d, Reg m) { std::string arm_STLEXH(Cond cond, Reg n, Reg d, Reg m) {
return fmt::format("stlexh{} {}, {}, [{}]", CondToString(cond), d, m, n); return fmt::format("stlexh{} {}, {}, [{}]", CondToString(cond), d, m, n);
@ -1176,7 +1186,7 @@ public:
return fmt::format("ldrexb{} {}, [{}]", CondToString(cond), d, n); return fmt::format("ldrexb{} {}, [{}]", CondToString(cond), d, n);
} }
std::string arm_LDREXD(Cond cond, Reg n, Reg d) { std::string arm_LDREXD(Cond cond, Reg n, Reg d) {
return fmt::format("ldrexd{} {}, {}, [{}]", CondToString(cond), d, d+1, n); return fmt::format("ldrexd{} {}, {}, [{}]", CondToString(cond), d, d + 1, n);
} }
std::string arm_LDREXH(Cond cond, Reg n, Reg d) { std::string arm_LDREXH(Cond cond, Reg n, Reg d) {
return fmt::format("ldrexh{} {}, [{}]", CondToString(cond), d, n); return fmt::format("ldrexh{} {}, [{}]", CondToString(cond), d, n);
@ -1188,7 +1198,7 @@ public:
return fmt::format("strexb{} {}, {}, [{}]", CondToString(cond), d, m, n); return fmt::format("strexb{} {}, {}, [{}]", CondToString(cond), d, m, n);
} }
std::string arm_STREXD(Cond cond, Reg n, Reg d, Reg m) { std::string arm_STREXD(Cond cond, Reg n, Reg d, Reg m) {
return fmt::format("strexd{} {}, {}, {}, [{}]", CondToString(cond), d, m, m+1, n); return fmt::format("strexd{} {}, {}, {}, [{}]", CondToString(cond), d, m, m + 1, n);
} }
std::string arm_STREXH(Cond cond, Reg n, Reg d, Reg m) { std::string arm_STREXH(Cond cond, Reg n, Reg d, Reg m) {
return fmt::format("strexh{} {}, {}, [{}]", CondToString(cond), d, m, n); return fmt::format("strexh{} {}, {}, [{}]", CondToString(cond), d, m, n);
@ -1315,35 +1325,35 @@ public:
} }
} }
std::string vfp_VMOV_u32_f64(Cond cond, size_t Vd, Reg t, bool D){ std::string vfp_VMOV_u32_f64(Cond cond, size_t Vd, Reg t, bool D) {
return fmt::format("vmov{}.32 {}, {}", CondToString(cond), FPRegStr(true, Vd, D), t); return fmt::format("vmov{}.32 {}, {}", CondToString(cond), FPRegStr(true, Vd, D), t);
} }
std::string vfp_VMOV_f64_u32(Cond cond, size_t Vn, Reg t, bool N){ std::string vfp_VMOV_f64_u32(Cond cond, size_t Vn, Reg t, bool N) {
return fmt::format("vmov{}.32 {}, {}", CondToString(cond), t, FPRegStr(true, Vn, N)); return fmt::format("vmov{}.32 {}, {}", CondToString(cond), t, FPRegStr(true, Vn, N));
} }
std::string vfp_VMOV_u32_f32(Cond cond, size_t Vn, Reg t, bool N){ std::string vfp_VMOV_u32_f32(Cond cond, size_t Vn, Reg t, bool N) {
return fmt::format("vmov{}.32 {}, {}", CondToString(cond), FPRegStr(false, Vn, N), t); return fmt::format("vmov{}.32 {}, {}", CondToString(cond), FPRegStr(false, Vn, N), t);
} }
std::string vfp_VMOV_f32_u32(Cond cond, size_t Vn, Reg t, bool N){ std::string vfp_VMOV_f32_u32(Cond cond, size_t Vn, Reg t, bool N) {
return fmt::format("vmov{}.32 {}, {}", CondToString(cond), t, FPRegStr(false, Vn, N)); return fmt::format("vmov{}.32 {}, {}", CondToString(cond), t, FPRegStr(false, Vn, N));
} }
std::string vfp_VMOV_2u32_2f32(Cond cond, Reg t2, Reg t, bool M, size_t Vm){ std::string vfp_VMOV_2u32_2f32(Cond cond, Reg t2, Reg t, bool M, size_t Vm) {
return fmt::format("vmov{} {}, {}, {}, {}", CondToString(cond), FPRegStr(false, Vm, M), FPNextRegStr(false, Vm, M), t, t2); return fmt::format("vmov{} {}, {}, {}, {}", CondToString(cond), FPRegStr(false, Vm, M), FPNextRegStr(false, Vm, M), t, t2);
} }
std::string vfp_VMOV_2f32_2u32(Cond cond, Reg t2, Reg t, bool M, size_t Vm){ std::string vfp_VMOV_2f32_2u32(Cond cond, Reg t2, Reg t, bool M, size_t Vm) {
return fmt::format("vmov{} {}, {}, {}, {}", CondToString(cond), t, t2, FPRegStr(false, Vm, M), FPNextRegStr(false, Vm, M)); return fmt::format("vmov{} {}, {}, {}, {}", CondToString(cond), t, t2, FPRegStr(false, Vm, M), FPNextRegStr(false, Vm, M));
} }
std::string vfp_VMOV_2u32_f64(Cond cond, Reg t2, Reg t, bool M, size_t Vm){ std::string vfp_VMOV_2u32_f64(Cond cond, Reg t2, Reg t, bool M, size_t Vm) {
return fmt::format("vmov{} {}, {}, {}", CondToString(cond), FPRegStr(true, Vm, M), t, t2); return fmt::format("vmov{} {}, {}, {}", CondToString(cond), FPRegStr(true, Vm, M), t, t2);
} }
std::string vfp_VMOV_f64_2u32(Cond cond, Reg t2, Reg t, bool M, size_t Vm){ std::string vfp_VMOV_f64_2u32(Cond cond, Reg t2, Reg t, bool M, size_t Vm) {
return fmt::format("vmov{} {}, {}, {}", CondToString(cond), t, t2, FPRegStr(true, Vm, M)); return fmt::format("vmov{} {}, {}, {}", CondToString(cond), t, t2, FPRegStr(true, Vm, M));
} }
@ -1382,7 +1392,7 @@ public:
return fmt::format("vdup{}.{} {}, {}", CondToString(cond), esize, VectorStr(Q, Vd, D), t); return fmt::format("vdup{}.{} {}, {}", CondToString(cond), esize, VectorStr(Q, Vd, D), t);
} }
std::string vfp_VMOV_reg(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm){ std::string vfp_VMOV_reg(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) {
return fmt::format("vmov{}.{} {}, {}", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D), FPRegStr(sz, Vm, M)); return fmt::format("vmov{}.{} {}, {}", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D), FPRegStr(sz, Vm, M));
} }

View file

@ -11,10 +11,10 @@
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/common/string_util.h" #include "dynarmic/common/string_util.h"
#include "dynarmic/frontend/imm.h"
#include "dynarmic/frontend/A32/decoder/thumb16.h" #include "dynarmic/frontend/A32/decoder/thumb16.h"
#include "dynarmic/frontend/A32/disassembler/disassembler.h" #include "dynarmic/frontend/A32/disassembler/disassembler.h"
#include "dynarmic/frontend/A32/types.h" #include "dynarmic/frontend/A32/types.h"
#include "dynarmic/frontend/imm.h"
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
@ -272,7 +272,7 @@ public:
std::string thumb16_IT(Imm<8> imm8) { std::string thumb16_IT(Imm<8> imm8) {
const Cond firstcond = imm8.Bits<4, 7, Cond>(); const Cond firstcond = imm8.Bits<4, 7, Cond>();
const bool firstcond0 = imm8.Bit<4>(); const bool firstcond0 = imm8.Bit<4>();
const auto [x, y, z] = [&]{ const auto [x, y, z] = [&] {
if (imm8.Bits<0, 3>() == 0b1000) { if (imm8.Bits<0, 3>() == 0b1000) {
return std::make_tuple("", "", ""); return std::make_tuple("", "", "");
} }
@ -305,12 +305,14 @@ public:
} }
std::string thumb16_PUSH(bool M, RegList reg_list) { std::string thumb16_PUSH(bool M, RegList reg_list) {
if (M) reg_list |= 1 << 14; if (M)
reg_list |= 1 << 14;
return fmt::format("push {{{}}}", RegListToString(reg_list)); return fmt::format("push {{{}}}", RegListToString(reg_list));
} }
std::string thumb16_POP(bool P, RegList reg_list) { std::string thumb16_POP(bool P, RegList reg_list) {
if (P) reg_list |= 1 << 15; if (P)
reg_list |= 1 << 15;
return fmt::format("pop {{{}}}", RegListToString(reg_list)); return fmt::format("pop {{{}}}", RegListToString(reg_list));
} }

View file

@ -3,8 +3,9 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/common/assert.h"
#include "dynarmic/frontend/A32/ir_emitter.h" #include "dynarmic/frontend/A32/ir_emitter.h"
#include "dynarmic/common/assert.h"
#include "dynarmic/frontend/A32/types.h" #include "dynarmic/frontend/A32/types.h"
#include "dynarmic/interface/A32/arch_version.h" #include "dynarmic/interface/A32/arch_version.h"
#include "dynarmic/ir/opcodes.h" #include "dynarmic/ir/opcodes.h"

View file

@ -27,7 +27,8 @@ enum class Reg;
*/ */
class IREmitter : public IR::IREmitter { class IREmitter : public IR::IREmitter {
public: public:
IREmitter(IR::Block& block, LocationDescriptor descriptor, ArchVersion arch_version) : IR::IREmitter(block), current_location(descriptor), arch_version(arch_version) {} IREmitter(IR::Block& block, LocationDescriptor descriptor, ArchVersion arch_version)
: IR::IREmitter(block), current_location(descriptor), arch_version(arch_version) {}
LocationDescriptor current_location; LocationDescriptor current_location;

View file

@ -3,10 +3,12 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include <ostream>
#include <fmt/format.h>
#include "dynarmic/frontend/A32/location_descriptor.h" #include "dynarmic/frontend/A32/location_descriptor.h"
#include <ostream>
#include <fmt/format.h>
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
std::ostream& operator<<(std::ostream& o, const LocationDescriptor& descriptor) { std::ostream& operator<<(std::ostream& o, const LocationDescriptor& descriptor) {

View file

@ -8,10 +8,11 @@
#include <functional> #include <functional>
#include <iosfwd> #include <iosfwd>
#include <tuple> #include <tuple>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/A32/FPSCR.h" #include "dynarmic/frontend/A32/FPSCR.h"
#include "dynarmic/frontend/A32/PSR.h"
#include "dynarmic/frontend/A32/ITState.h" #include "dynarmic/frontend/A32/ITState.h"
#include "dynarmic/frontend/A32/PSR.h"
#include "dynarmic/ir/location_descriptor.h" #include "dynarmic/ir/location_descriptor.h"
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
@ -32,8 +33,7 @@ public:
: arm_pc(arm_pc) : arm_pc(arm_pc)
, cpsr(cpsr.Value() & CPSR_MODE_MASK) , cpsr(cpsr.Value() & CPSR_MODE_MASK)
, fpscr(fpscr.Value() & FPSCR_MODE_MASK) , fpscr(fpscr.Value() & FPSCR_MODE_MASK)
, single_stepping(single_stepping) , single_stepping(single_stepping) {}
{}
explicit LocationDescriptor(const IR::LocationDescriptor& o) { explicit LocationDescriptor(const IR::LocationDescriptor& o) {
arm_pc = static_cast<u32>(o.Value()); arm_pc = static_cast<u32>(o.Value());
@ -54,11 +54,11 @@ public:
bool SingleStepping() const { return single_stepping; } bool SingleStepping() const { return single_stepping; }
bool operator == (const LocationDescriptor& o) const { bool operator==(const LocationDescriptor& o) const {
return std::tie(arm_pc, cpsr, fpscr, single_stepping) == std::tie(o.arm_pc, o.cpsr, o.fpscr, o.single_stepping); return std::tie(arm_pc, cpsr, fpscr, single_stepping) == std::tie(o.arm_pc, o.cpsr, o.fpscr, o.single_stepping);
} }
bool operator != (const LocationDescriptor& o) const { bool operator!=(const LocationDescriptor& o) const {
return !operator==(o); return !operator==(o);
} }
@ -138,13 +138,13 @@ std::ostream& operator<<(std::ostream& o, const LocationDescriptor& descriptor);
} // namespace Dynarmic::A32 } // namespace Dynarmic::A32
namespace std { namespace std {
template <> template<>
struct less<Dynarmic::A32::LocationDescriptor> { struct less<Dynarmic::A32::LocationDescriptor> {
bool operator()(const Dynarmic::A32::LocationDescriptor& x, const Dynarmic::A32::LocationDescriptor& y) const noexcept { bool operator()(const Dynarmic::A32::LocationDescriptor& x, const Dynarmic::A32::LocationDescriptor& y) const noexcept {
return x.UniqueHash() < y.UniqueHash(); return x.UniqueHash() < y.UniqueHash();
} }
}; };
template <> template<>
struct hash<Dynarmic::A32::LocationDescriptor> { struct hash<Dynarmic::A32::LocationDescriptor> {
size_t operator()(const Dynarmic::A32::LocationDescriptor& x) const noexcept { size_t operator()(const Dynarmic::A32::LocationDescriptor& x) const noexcept {
return std::hash<u64>()(x.UniqueHash()); return std::hash<u64>()(x.UniqueHash());

View file

@ -3,12 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/frontend/A32/translate/conditional_state.h"
#include <algorithm> #include <algorithm>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/A32/ir_emitter.h" #include "dynarmic/frontend/A32/ir_emitter.h"
#include "dynarmic/frontend/A32/translate/conditional_state.h"
#include "dynarmic/frontend/A32/translate/impl/translate.h" #include "dynarmic/frontend/A32/translate/impl/translate.h"
#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/config.h"
#include "dynarmic/ir/cond.h" #include "dynarmic/ir/cond.h"

View file

@ -3,11 +3,11 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/frontend/A32/translate/impl/translate.h"
#include <optional> #include <optional>
#include <tuple> #include <tuple>
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/frontend/A32/translate/impl/translate.h"
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
@ -70,7 +70,7 @@ std::optional<std::tuple<size_t, size_t, size_t>> DecodeType(Imm<4> type, size_t
} }
ASSERT_FALSE("Decode error"); ASSERT_FALSE("Decode error");
} }
} // anoynmous namespace } // namespace
bool TranslatorVisitor::v8_VST_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t size, size_t align, Reg m) { bool TranslatorVisitor::v8_VST_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t size, size_t align, Reg m) {
if (type == 0b1011 || type.Bits<2, 3>() == 0b11) { if (type == 0b1011 || type.Bits<2, 3>() == 0b11) {

View file

@ -3,10 +3,9 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/frontend/A32/translate/impl/translate.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/frontend/A32/translate/impl/translate.h"
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
@ -20,7 +19,7 @@ static bool TableLookup(TranslatorVisitor& v, bool is_vtbl, bool D, size_t Vn, s
return v.UnpredictableInstruction(); return v.UnpredictableInstruction();
} }
const IR::Table table = v.ir.VectorTable([&]{ const IR::Table table = v.ir.VectorTable([&] {
std::vector<IR::U64> result; std::vector<IR::U64> result;
for (size_t i = 0; i < length; ++i) { for (size_t i = 0; i < length; ++i) {
result.emplace_back(v.ir.GetExtendedRegister(n + i)); result.emplace_back(v.ir.GetExtendedRegister(n + i));

View file

@ -3,15 +3,13 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/frontend/A32/translate/impl/translate.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/frontend/A32/translate/impl/translate.h"
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
bool TranslatorVisitor::asimd_VMOV_imm(Imm<1> a, bool D, Imm<1> b, Imm<1> c, Imm<1> d, size_t Vd, bool TranslatorVisitor::asimd_VMOV_imm(Imm<1> a, bool D, Imm<1> b, Imm<1> c, Imm<1> d, size_t Vd, Imm<4> cmode, bool Q, bool op, Imm<1> e, Imm<1> f, Imm<1> g, Imm<1> h) {
Imm<4> cmode, bool Q, bool op, Imm<1> e, Imm<1> f, Imm<1> g, Imm<1> h) {
if (Q && Common::Bit<0>(Vd)) { if (Q && Common::Bit<0>(Vd)) {
return UndefinedInstruction(); return UndefinedInstruction();
} }
@ -68,27 +66,42 @@ bool TranslatorVisitor::asimd_VMOV_imm(Imm<1> a, bool D, Imm<1> b, Imm<1> c, Imm
}; };
switch (concatenate(cmode, Imm<1>{op}).ZeroExtend()) { switch (concatenate(cmode, Imm<1>{op}).ZeroExtend()) {
case 0b00000: case 0b00100: case 0b00000:
case 0b01000: case 0b01100: case 0b00100:
case 0b10000: case 0b10100: case 0b01000:
case 0b11000: case 0b11010: case 0b01100:
case 0b11100: case 0b11101: case 0b10000:
case 0b10100:
case 0b11000:
case 0b11010:
case 0b11100:
case 0b11101:
case 0b11110: case 0b11110:
return mov(); return mov();
case 0b11111: case 0b11111:
return UndefinedInstruction(); return UndefinedInstruction();
case 0b00001: case 0b00101: case 0b00001:
case 0b01001: case 0b01101: case 0b00101:
case 0b10001: case 0b10101: case 0b01001:
case 0b11001: case 0b11011: case 0b01101:
case 0b10001:
case 0b10101:
case 0b11001:
case 0b11011:
return mvn(); return mvn();
case 0b00010: case 0b00110: case 0b00010:
case 0b01010: case 0b01110: case 0b00110:
case 0b10010: case 0b10110: case 0b01010:
case 0b01110:
case 0b10010:
case 0b10110:
return orr(); return orr();
case 0b00011: case 0b00111: case 0b00011:
case 0b01011: case 0b01111: case 0b00111:
case 0b10011: case 0b10111: case 0b01011:
case 0b01111:
case 0b10011:
case 0b10111:
return bic(); return bic();
} }

View file

@ -3,9 +3,8 @@
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
*/ */
#include "dynarmic/frontend/A32/translate/impl/translate.h"
#include "dynarmic/common/bit_util.h" #include "dynarmic/common/bit_util.h"
#include "dynarmic/frontend/A32/translate/impl/translate.h"
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
namespace { namespace {
@ -27,7 +26,7 @@ enum class WidenBehaviour {
Both, Both,
}; };
template <bool WithDst, typename Callable> template<bool WithDst, typename Callable>
bool BitwiseInstruction(TranslatorVisitor& v, bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm, Callable fn) { bool BitwiseInstruction(TranslatorVisitor& v, bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm, Callable fn) {
if (Q && (Common::Bit<0>(Vd) || Common::Bit<0>(Vn) || Common::Bit<0>(Vm))) { if (Q && (Common::Bit<0>(Vd) || Common::Bit<0>(Vn) || Common::Bit<0>(Vm))) {
return v.UndefinedInstruction(); return v.UndefinedInstruction();
@ -53,7 +52,7 @@ bool BitwiseInstruction(TranslatorVisitor& v, bool D, size_t Vn, size_t Vd, bool
return true; return true;
} }
template <typename Callable> template<typename Callable>
bool FloatingPointInstruction(TranslatorVisitor& v, bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm, Callable fn) { bool FloatingPointInstruction(TranslatorVisitor& v, bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm, Callable fn) {
if (Q && (Common::Bit<0>(Vd) || Common::Bit<0>(Vn) || Common::Bit<0>(Vm))) { if (Q && (Common::Bit<0>(Vd) || Common::Bit<0>(Vn) || Common::Bit<0>(Vm))) {
return v.UndefinedInstruction(); return v.UndefinedInstruction();
@ -76,8 +75,7 @@ bool FloatingPointInstruction(TranslatorVisitor& v, bool D, bool sz, size_t Vn,
return true; return true;
} }
bool IntegerComparison(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm, bool IntegerComparison(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm, Comparison comparison) {
Comparison comparison) {
if (sz == 0b11) { if (sz == 0b11) {
return v.UndefinedInstruction(); return v.UndefinedInstruction();
} }
@ -112,8 +110,7 @@ bool IntegerComparison(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t V
return true; return true;
} }
bool FloatComparison(TranslatorVisitor& v, bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm, bool FloatComparison(TranslatorVisitor& v, bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm, Comparison comparison) {
Comparison comparison) {
if (sz) { if (sz) {
return v.UndefinedInstruction(); return v.UndefinedInstruction();
} }
@ -149,8 +146,7 @@ bool FloatComparison(TranslatorVisitor& v, bool D, bool sz, size_t Vn, size_t Vd
return true; return true;
} }
bool AbsoluteDifference(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm, bool AbsoluteDifference(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm, AccumulateBehavior accumulate) {
AccumulateBehavior accumulate) {
if (sz == 0b11) { if (sz == 0b11) {
return v.UndefinedInstruction(); return v.UndefinedInstruction();
} }
@ -182,8 +178,7 @@ bool AbsoluteDifference(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t
return true; return true;
} }
bool AbsoluteDifferenceLong(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm, bool AbsoluteDifferenceLong(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm, AccumulateBehavior accumulate) {
AccumulateBehavior accumulate) {
if (sz == 0b11) { if (sz == 0b11) {
return v.DecodeError(); return v.DecodeError();
} }
@ -217,7 +212,7 @@ bool AbsoluteDifferenceLong(TranslatorVisitor& v, bool U, bool D, size_t sz, siz
return true; return true;
} }
template <typename Callable> template<typename Callable>
bool WideInstruction(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm, WidenBehaviour widen_behaviour, Callable fn) { bool WideInstruction(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm, WidenBehaviour widen_behaviour, Callable fn) {
const size_t esize = 8U << sz; const size_t esize = 8U << sz;
const bool widen_first = widen_behaviour == WidenBehaviour::Both; const bool widen_first = widen_behaviour == WidenBehaviour::Both;

Some files were not shown because too many files have changed in this diff Show more