commit
a76a2fff53
29 changed files with 2322 additions and 711 deletions
23
.github/workflows/aarch64.yml
vendored
23
.github/workflows/aarch64.yml
vendored
|
@ -48,7 +48,6 @@ jobs:
|
|||
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
|
||||
-DDYNARMIC_TESTS_USE_UNICORN=0
|
||||
-DDYNARMIC_USE_LLVM=0
|
||||
-DDYNARMIC_FRONTENDS=A32
|
||||
-G Ninja
|
||||
|
||||
- name: Build AArch64
|
||||
|
@ -66,7 +65,6 @@ jobs:
|
|||
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
-DDYNARMIC_FRONTENDS=A32
|
||||
-DDYNARMIC_TESTS_USE_UNICORN=0
|
||||
-DDYNARMIC_USE_LLVM=0
|
||||
-G Ninja
|
||||
|
@ -79,6 +77,23 @@ jobs:
|
|||
working-directory: ${{github.workspace}}
|
||||
run: qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_tests -d yes
|
||||
|
||||
- name: Test against x86_64 implementation
|
||||
- name: Test against x86_64 implementation (A32, thumb)
|
||||
working-directory: ${{github.workspace}}
|
||||
run: diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator) <(./build-x64/tests/dynarmic_test_generator)
|
||||
run: |
|
||||
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 1 100000) <(./build-x64/tests/dynarmic_test_generator thumb 42 1 100000)
|
||||
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 10 10000) <(./build-x64/tests/dynarmic_test_generator thumb 42 10 10000)
|
||||
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 100 1000) <(./build-x64/tests/dynarmic_test_generator thumb 42 100 1000)
|
||||
|
||||
- name: Test against x86_64 implementation (A32, arm)
|
||||
working-directory: ${{github.workspace}}
|
||||
run: |
|
||||
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 1 100000) <(./build-x64/tests/dynarmic_test_generator arm 42 1 100000)
|
||||
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 10 10000) <(./build-x64/tests/dynarmic_test_generator arm 42 10 10000)
|
||||
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 100 1000) <(./build-x64/tests/dynarmic_test_generator arm 42 100 1000)
|
||||
|
||||
- name: Test against x86_64 implementation (A64)
|
||||
working-directory: ${{github.workspace}}
|
||||
run: |
|
||||
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 1 100000) <(./build-x64/tests/dynarmic_test_generator a64 42 1 100000)
|
||||
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 10 10000) <(./build-x64/tests/dynarmic_test_generator a64 42 10 10000)
|
||||
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 100 1000) <(./build-x64/tests/dynarmic_test_generator a64 42 100 1000)
|
||||
|
|
|
@ -373,6 +373,8 @@ elseif(ARCHITECTURE STREQUAL "arm64")
|
|||
backend/arm64/a32_jitstate.h
|
||||
backend/arm64/abi.cpp
|
||||
backend/arm64/abi.h
|
||||
backend/arm64/address_space.cpp
|
||||
backend/arm64/address_space.h
|
||||
backend/arm64/devirtualize.h
|
||||
backend/arm64/emit_arm64.cpp
|
||||
backend/arm64/emit_arm64.h
|
||||
|
@ -406,14 +408,16 @@ elseif(ARCHITECTURE STREQUAL "arm64")
|
|||
backend/arm64/a32_address_space.h
|
||||
backend/arm64/a32_core.h
|
||||
backend/arm64/a32_interface.cpp
|
||||
|
||||
# Move this to the list below when implemented
|
||||
backend/arm64/a64_interface.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
|
||||
target_sources(dynarmic PRIVATE
|
||||
backend/arm64/a64_address_space.cpp
|
||||
backend/arm64/a64_address_space.h
|
||||
backend/arm64/a64_core.h
|
||||
backend/arm64/a64_interface.cpp
|
||||
)
|
||||
endif()
|
||||
else()
|
||||
message(FATAL_ERROR "Unsupported architecture")
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A32/translate/a32_translate.h"
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
|
@ -97,9 +98,8 @@ static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const
|
|||
}
|
||||
|
||||
A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
|
||||
: conf(conf)
|
||||
, mem(conf.code_cache_size)
|
||||
, code(mem.ptr()) {
|
||||
: AddressSpace(conf.code_cache_size)
|
||||
, conf(conf) {
|
||||
EmitPrelude();
|
||||
}
|
||||
|
||||
|
@ -121,33 +121,6 @@ IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
|||
return ir_block;
|
||||
}
|
||||
|
||||
CodePtr A32AddressSpace::Get(IR::LocationDescriptor descriptor) {
|
||||
if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CodePtr A32AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) {
|
||||
if (CodePtr block_entry = Get(descriptor)) {
|
||||
return block_entry;
|
||||
}
|
||||
|
||||
IR::Block ir_block = GenerateIR(descriptor);
|
||||
const EmittedBlockInfo block_info = Emit(std::move(ir_block));
|
||||
|
||||
block_infos.insert_or_assign(descriptor.Value(), block_info);
|
||||
block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point);
|
||||
return block_info.entry_point;
|
||||
}
|
||||
|
||||
void A32AddressSpace::ClearCache() {
|
||||
block_entries.clear();
|
||||
block_infos.clear();
|
||||
block_references.clear();
|
||||
code.set_ptr(prelude_info.end_of_prelude);
|
||||
}
|
||||
|
||||
void A32AddressSpace::EmitPrelude() {
|
||||
using namespace oaknut::util;
|
||||
|
||||
|
@ -291,153 +264,33 @@ void A32AddressSpace::EmitPrelude() {
|
|||
mem.protect();
|
||||
}
|
||||
|
||||
size_t A32AddressSpace::GetRemainingSize() {
|
||||
return conf.code_cache_size - (code.ptr<CodePtr>() - reinterpret_cast<CodePtr>(mem.ptr()));
|
||||
}
|
||||
EmitConfig A32AddressSpace::GetEmitConfig() {
|
||||
return EmitConfig{
|
||||
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
|
||||
|
||||
EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) {
|
||||
if (GetRemainingSize() < 1024 * 1024) {
|
||||
ClearCache();
|
||||
}
|
||||
|
||||
mem.unprotect();
|
||||
|
||||
const EmitConfig emit_conf{
|
||||
.hook_isb = conf.hook_isb,
|
||||
|
||||
.cntfreq_el0{},
|
||||
.ctr_el0{},
|
||||
.dczid_el0{},
|
||||
.tpidrro_el0{},
|
||||
.tpidr_el0{},
|
||||
|
||||
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
||||
.enable_cycle_counting = conf.enable_cycle_counting,
|
||||
|
||||
.always_little_endian = conf.always_little_endian,
|
||||
|
||||
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; },
|
||||
.emit_cond = EmitA32Cond,
|
||||
.emit_condition_failed_terminal = EmitA32ConditionFailedTerminal,
|
||||
.emit_terminal = EmitA32Terminal,
|
||||
|
||||
.state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv),
|
||||
.state_fpsr_offset = offsetof(A32JitState, fpsr),
|
||||
|
||||
.coprocessors = conf.coprocessors,
|
||||
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
|
||||
};
|
||||
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf);
|
||||
|
||||
Link(block.Location(), block_info);
|
||||
|
||||
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
|
||||
|
||||
RelinkForDescriptor(block.Location());
|
||||
|
||||
mem.protect();
|
||||
|
||||
return block_info;
|
||||
}
|
||||
|
||||
static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list) {
|
||||
using namespace oaknut;
|
||||
using namespace oaknut::util;
|
||||
|
||||
for (auto [ptr_offset] : block_relocations_list) {
|
||||
CodeGenerator c{reinterpret_cast<u32*>(entry_point + ptr_offset)};
|
||||
|
||||
if (target_ptr) {
|
||||
c.B((void*)target_ptr);
|
||||
} else {
|
||||
c.NOP();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void A32AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) {
|
||||
using namespace oaknut;
|
||||
using namespace oaknut::util;
|
||||
|
||||
for (auto [ptr_offset, target] : block_info.relocations) {
|
||||
CodeGenerator c{reinterpret_cast<u32*>(block_info.entry_point + ptr_offset)};
|
||||
|
||||
switch (target) {
|
||||
case LinkTarget::ReturnToDispatcher:
|
||||
c.B(prelude_info.return_to_dispatcher);
|
||||
break;
|
||||
case LinkTarget::ReturnFromRunCode:
|
||||
c.B(prelude_info.return_from_run_code);
|
||||
break;
|
||||
case LinkTarget::ReadMemory8:
|
||||
c.BL(prelude_info.read_memory_8);
|
||||
break;
|
||||
case LinkTarget::ReadMemory16:
|
||||
c.BL(prelude_info.read_memory_16);
|
||||
break;
|
||||
case LinkTarget::ReadMemory32:
|
||||
c.BL(prelude_info.read_memory_32);
|
||||
break;
|
||||
case LinkTarget::ReadMemory64:
|
||||
c.BL(prelude_info.read_memory_64);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory8:
|
||||
c.BL(prelude_info.exclusive_read_memory_8);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory16:
|
||||
c.BL(prelude_info.exclusive_read_memory_16);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory32:
|
||||
c.BL(prelude_info.exclusive_read_memory_32);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory64:
|
||||
c.BL(prelude_info.exclusive_read_memory_64);
|
||||
break;
|
||||
case LinkTarget::WriteMemory8:
|
||||
c.BL(prelude_info.write_memory_8);
|
||||
break;
|
||||
case LinkTarget::WriteMemory16:
|
||||
c.BL(prelude_info.write_memory_16);
|
||||
break;
|
||||
case LinkTarget::WriteMemory32:
|
||||
c.BL(prelude_info.write_memory_32);
|
||||
break;
|
||||
case LinkTarget::WriteMemory64:
|
||||
c.BL(prelude_info.write_memory_64);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory8:
|
||||
c.BL(prelude_info.exclusive_write_memory_8);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory16:
|
||||
c.BL(prelude_info.exclusive_write_memory_16);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory32:
|
||||
c.BL(prelude_info.exclusive_write_memory_32);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory64:
|
||||
c.BL(prelude_info.exclusive_write_memory_64);
|
||||
break;
|
||||
case LinkTarget::CallSVC:
|
||||
c.BL(prelude_info.call_svc);
|
||||
break;
|
||||
case LinkTarget::ExceptionRaised:
|
||||
c.BL(prelude_info.exception_raised);
|
||||
break;
|
||||
case LinkTarget::InstructionSynchronizationBarrierRaised:
|
||||
c.BL(prelude_info.isb_raised);
|
||||
break;
|
||||
case LinkTarget::AddTicks:
|
||||
c.BL(prelude_info.add_ticks);
|
||||
break;
|
||||
case LinkTarget::GetTicksRemaining:
|
||||
c.BL(prelude_info.get_ticks_remaining);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid relocation target");
|
||||
}
|
||||
}
|
||||
|
||||
for (auto [target_descriptor, list] : block_info.block_relocations) {
|
||||
block_references[target_descriptor.Value()].emplace(block_descriptor.Value());
|
||||
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list);
|
||||
}
|
||||
}
|
||||
|
||||
void A32AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) {
|
||||
for (auto block_descriptor : block_references[target_descriptor.Value()]) {
|
||||
if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) {
|
||||
const EmittedBlockInfo& block_info = iter->second;
|
||||
|
||||
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]);
|
||||
|
||||
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
|
@ -5,84 +5,24 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <mcl/stdint.hpp>
|
||||
#include <oaknut/code_block.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
#include <tsl/robin_map.h>
|
||||
#include <tsl/robin_set.h>
|
||||
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/address_space.h"
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
#include "dynarmic/interface/halt_reason.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct A32JitState;
|
||||
|
||||
class A32AddressSpace final {
|
||||
class A32AddressSpace final : public AddressSpace {
|
||||
public:
|
||||
explicit A32AddressSpace(const A32::UserConfig& conf);
|
||||
|
||||
IR::Block GenerateIR(IR::LocationDescriptor) const;
|
||||
IR::Block GenerateIR(IR::LocationDescriptor) const override;
|
||||
|
||||
CodePtr Get(IR::LocationDescriptor descriptor);
|
||||
|
||||
CodePtr GetOrEmit(IR::LocationDescriptor descriptor);
|
||||
|
||||
void ClearCache();
|
||||
|
||||
private:
|
||||
protected:
|
||||
friend class A32Core;
|
||||
|
||||
void EmitPrelude();
|
||||
|
||||
size_t GetRemainingSize();
|
||||
EmittedBlockInfo Emit(IR::Block ir_block);
|
||||
void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block);
|
||||
void RelinkForDescriptor(IR::LocationDescriptor target_descriptor);
|
||||
EmitConfig GetEmitConfig() override;
|
||||
|
||||
const A32::UserConfig conf;
|
||||
|
||||
oaknut::CodeBlock mem;
|
||||
oaknut::CodeGenerator code;
|
||||
|
||||
tsl::robin_map<u64, CodePtr> block_entries;
|
||||
tsl::robin_map<u64, EmittedBlockInfo> block_infos;
|
||||
tsl::robin_map<u64, tsl::robin_set<u64>> block_references;
|
||||
|
||||
struct PreludeInfo {
|
||||
u32* end_of_prelude;
|
||||
|
||||
using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, A32JitState* context, volatile u32* halt_reason);
|
||||
RunCodeFuncType run_code;
|
||||
RunCodeFuncType step_code;
|
||||
void* return_to_dispatcher;
|
||||
void* return_from_run_code;
|
||||
|
||||
void* read_memory_8;
|
||||
void* read_memory_16;
|
||||
void* read_memory_32;
|
||||
void* read_memory_64;
|
||||
void* exclusive_read_memory_8;
|
||||
void* exclusive_read_memory_16;
|
||||
void* exclusive_read_memory_32;
|
||||
void* exclusive_read_memory_64;
|
||||
void* write_memory_8;
|
||||
void* write_memory_16;
|
||||
void* write_memory_32;
|
||||
void* write_memory_64;
|
||||
void* exclusive_write_memory_8;
|
||||
void* exclusive_write_memory_16;
|
||||
void* exclusive_write_memory_32;
|
||||
void* exclusive_write_memory_64;
|
||||
void* call_svc;
|
||||
void* exception_raised;
|
||||
void* isb_raised;
|
||||
void* add_ticks;
|
||||
void* get_ticks_remaining;
|
||||
} prelude_info;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
416
src/dynarmic/backend/arm64/a64_address_space.cpp
Normal file
416
src/dynarmic/backend/arm64/a64_address_space.cpp
Normal file
|
@ -0,0 +1,416 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_address_space.h"
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/devirtualize.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/stack_layout.h"
|
||||
#include "dynarmic/common/cast_util.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
template<auto mfp, typename T>
|
||||
static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<mfp>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
void* target = code.ptr<void*>();
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
template<auto callback, typename T>
|
||||
static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> T {
|
||||
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
|
||||
return (conf.callbacks->*callback)(vaddr);
|
||||
});
|
||||
};
|
||||
|
||||
void* target = code.ptr<void*>();
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(&conf));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
template<auto callback, typename T>
|
||||
static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, T value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
|
||||
[&](T expected) -> bool {
|
||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||
})
|
||||
? 0
|
||||
: 1;
|
||||
};
|
||||
|
||||
void* target = code.ptr<void*>();
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(&conf));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
static void* EmitRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<&A64::UserCallbacks::MemoryRead128>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
void* target = code.ptr<void*>();
|
||||
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
code.STP(X0, X1, SP);
|
||||
code.LDR(Q0, SP);
|
||||
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
|
||||
code.RET();
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
static void* EmitExclusiveRead128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> Vector {
|
||||
return conf.global_monitor->ReadAndMark<Vector>(conf.processor_id, vaddr, [&]() -> Vector {
|
||||
return conf.callbacks->MemoryRead128(vaddr);
|
||||
});
|
||||
};
|
||||
|
||||
void* target = code.ptr<void*>();
|
||||
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
code.STP(X0, X1, SP);
|
||||
code.LDR(Q0, SP);
|
||||
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
|
||||
code.RET();
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(&conf));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
static void* EmitWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<&A64::UserCallbacks::MemoryWrite128>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
void* target = code.ptr<void*>();
|
||||
code.FMOV(X2, D0);
|
||||
code.FMOV(X3, V0.D()[1]);
|
||||
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, Vector value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr,
|
||||
[&](Vector expected) -> bool {
|
||||
return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected);
|
||||
})
|
||||
? 0
|
||||
: 1;
|
||||
};
|
||||
|
||||
void* target = code.ptr<void*>();
|
||||
code.FMOV(X2, D0);
|
||||
code.FMOV(X3, V0.D()[1]);
|
||||
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(&conf));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf)
|
||||
: AddressSpace(conf.code_cache_size)
|
||||
, conf(conf) {
|
||||
EmitPrelude();
|
||||
}
|
||||
|
||||
IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
||||
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
||||
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code,
|
||||
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
|
||||
|
||||
Optimization::A64CallbackConfigPass(ir_block, conf);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
|
||||
Optimization::A64GetSetElimination(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
|
||||
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
|
||||
}
|
||||
Optimization::VerificationPass(ir_block);
|
||||
|
||||
return ir_block;
|
||||
}
|
||||
|
||||
void A64AddressSpace::EmitPrelude() {
|
||||
using namespace oaknut::util;
|
||||
|
||||
mem.unprotect();
|
||||
|
||||
prelude_info.read_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead8>(code, conf.callbacks);
|
||||
prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks);
|
||||
prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks);
|
||||
prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks);
|
||||
prelude_info.read_memory_128 = EmitRead128CallTrampoline(code, conf.callbacks);
|
||||
prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf);
|
||||
prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf);
|
||||
prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf);
|
||||
prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf);
|
||||
prelude_info.exclusive_read_memory_128 = EmitExclusiveRead128CallTrampoline(code, conf);
|
||||
prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
|
||||
prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
|
||||
prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
|
||||
prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
|
||||
prelude_info.write_memory_128 = EmitWrite128CallTrampoline(code, conf.callbacks);
|
||||
prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf);
|
||||
prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf);
|
||||
prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf);
|
||||
prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf);
|
||||
prelude_info.exclusive_write_memory_128 = EmitExclusiveWrite128CallTrampoline(code, conf);
|
||||
prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks);
|
||||
prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks);
|
||||
prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks);
|
||||
prelude_info.ic_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionCacheOperationRaised>(code, conf.callbacks);
|
||||
prelude_info.dc_raised = EmitCallTrampoline<&A64::UserCallbacks::DataCacheOperationRaised>(code, conf.callbacks);
|
||||
prelude_info.get_cntpct = EmitCallTrampoline<&A64::UserCallbacks::GetCNTPCT>(code, conf.callbacks);
|
||||
prelude_info.add_ticks = EmitCallTrampoline<&A64::UserCallbacks::AddTicks>(code, conf.callbacks);
|
||||
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A64::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
|
||||
|
||||
oaknut::Label return_from_run_code;
|
||||
|
||||
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
|
||||
{
|
||||
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
|
||||
code.MOV(X19, X0);
|
||||
code.MOV(Xstate, X1);
|
||||
code.MOV(Xhalt, X2);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.BL(prelude_info.get_ticks_remaining);
|
||||
code.MOV(Xticks, X0);
|
||||
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
||||
}
|
||||
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
|
||||
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, return_from_run_code);
|
||||
|
||||
code.BR(X19);
|
||||
}
|
||||
|
||||
prelude_info.step_code = code.ptr<PreludeInfo::RunCodeFuncType>();
|
||||
{
|
||||
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
|
||||
code.MOV(X19, X0);
|
||||
code.MOV(Xstate, X1);
|
||||
code.MOV(Xhalt, X2);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.MOV(Xticks, 1);
|
||||
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
||||
}
|
||||
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
|
||||
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
oaknut::Label step_hr_loop;
|
||||
code.l(step_hr_loop);
|
||||
code.LDAXR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, return_from_run_code);
|
||||
code.ORR(Wscratch0, Wscratch0, static_cast<u32>(HaltReason::Step));
|
||||
code.STLXR(Wscratch1, Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch1, step_hr_loop);
|
||||
|
||||
code.BR(X19);
|
||||
}
|
||||
|
||||
prelude_info.return_to_dispatcher = code.ptr<void*>();
|
||||
{
|
||||
oaknut::Label l_this, l_addr;
|
||||
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, return_from_run_code);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.CMP(Xticks, 0);
|
||||
code.B(LE, return_from_run_code);
|
||||
}
|
||||
|
||||
code.LDR(X0, l_this);
|
||||
code.MOV(X1, Xstate);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
code.BR(X0);
|
||||
|
||||
const auto fn = [](A64AddressSpace& self, A64JitState& context) -> CodePtr {
|
||||
return self.GetOrEmit(context.GetLocationDescriptor());
|
||||
};
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(this));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
}
|
||||
|
||||
prelude_info.return_from_run_code = code.ptr<void*>();
|
||||
{
|
||||
code.l(return_from_run_code);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(X1, X1, Xticks);
|
||||
code.BL(prelude_info.add_ticks);
|
||||
}
|
||||
|
||||
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
oaknut::Label exit_hr_loop;
|
||||
code.l(exit_hr_loop);
|
||||
code.LDAXR(W0, Xhalt);
|
||||
code.STLXR(Wscratch0, WZR, Xhalt);
|
||||
code.CBNZ(Wscratch0, exit_hr_loop);
|
||||
|
||||
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
code.RET();
|
||||
}
|
||||
|
||||
prelude_info.end_of_prelude = code.ptr<u32*>();
|
||||
|
||||
mem.invalidate_all();
|
||||
mem.protect();
|
||||
}
|
||||
|
||||
EmitConfig A64AddressSpace::GetEmitConfig() {
|
||||
return EmitConfig{
|
||||
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
|
||||
|
||||
.hook_isb = conf.hook_isb,
|
||||
|
||||
.cntfreq_el0 = conf.cntfrq_el0,
|
||||
.ctr_el0 = conf.ctr_el0,
|
||||
.dczid_el0 = conf.dczid_el0,
|
||||
.tpidrro_el0 = conf.tpidrro_el0,
|
||||
.tpidr_el0 = conf.tpidr_el0,
|
||||
|
||||
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
||||
.enable_cycle_counting = conf.enable_cycle_counting,
|
||||
|
||||
.always_little_endian = true,
|
||||
|
||||
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); },
|
||||
.emit_cond = EmitA64Cond,
|
||||
.emit_condition_failed_terminal = EmitA64ConditionFailedTerminal,
|
||||
.emit_terminal = EmitA64Terminal,
|
||||
|
||||
.state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv),
|
||||
.state_fpsr_offset = offsetof(A64JitState, fpsr),
|
||||
|
||||
.coprocessors{},
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
28
src/dynarmic/backend/arm64/a64_address_space.h
Normal file
28
src/dynarmic/backend/arm64/a64_address_space.h
Normal file
|
@ -0,0 +1,28 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dynarmic/backend/arm64/address_space.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
class A64AddressSpace final : public AddressSpace {
|
||||
public:
|
||||
explicit A64AddressSpace(const A64::UserConfig& conf);
|
||||
|
||||
IR::Block GenerateIR(IR::LocationDescriptor) const override;
|
||||
|
||||
protected:
|
||||
friend class A64Core;
|
||||
|
||||
void EmitPrelude();
|
||||
EmitConfig GetEmitConfig() override;
|
||||
|
||||
const A64::UserConfig conf;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
30
src/dynarmic/backend/arm64/a64_core.h
Normal file
30
src/dynarmic/backend/arm64/a64_core.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_address_space.h"
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
class A64Core final {
|
||||
public:
|
||||
explicit A64Core(const A64::UserConfig&) {}
|
||||
|
||||
HaltReason Run(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) {
|
||||
const auto location_descriptor = thread_ctx.GetLocationDescriptor();
|
||||
const auto entry_point = process.GetOrEmit(location_descriptor);
|
||||
return process.prelude_info.run_code(entry_point, &thread_ctx, halt_reason);
|
||||
}
|
||||
|
||||
HaltReason Step(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) {
|
||||
const auto location_descriptor = A64::LocationDescriptor{thread_ctx.GetLocationDescriptor()}.SetSingleStepping(true);
|
||||
const auto entry_point = process.GetOrEmit(location_descriptor);
|
||||
return process.prelude_info.step_code(entry_point, &thread_ctx, halt_reason);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
|
@ -1,5 +1,5 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2021 MerryMage
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
|
@ -11,136 +11,310 @@
|
|||
#include <mcl/scope_exit.hpp>
|
||||
#include <mcl/stdint.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_address_space.h"
|
||||
#include "dynarmic/backend/arm64/a64_core.h"
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/common/atomic.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
|
||||
namespace Dynarmic::A64 {
|
||||
|
||||
struct Jit::Impl {};
|
||||
using namespace Backend::Arm64;
|
||||
|
||||
Jit::Jit(UserConfig conf) {
|
||||
(void)conf;
|
||||
struct Jit::Impl final {
|
||||
Impl(Jit*, A64::UserConfig conf)
|
||||
: conf(conf)
|
||||
, current_address_space(conf)
|
||||
, core(conf) {}
|
||||
|
||||
HaltReason Run() {
|
||||
ASSERT(!is_executing);
|
||||
PerformRequestedCacheInvalidation();
|
||||
|
||||
is_executing = true;
|
||||
SCOPE_EXIT {
|
||||
is_executing = false;
|
||||
};
|
||||
|
||||
HaltReason hr = core.Run(current_address_space, current_state, &halt_reason);
|
||||
|
||||
PerformRequestedCacheInvalidation();
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
HaltReason Step() {
|
||||
ASSERT(!is_executing);
|
||||
PerformRequestedCacheInvalidation();
|
||||
|
||||
is_executing = true;
|
||||
SCOPE_EXIT {
|
||||
is_executing = false;
|
||||
};
|
||||
|
||||
HaltReason hr = core.Step(current_address_space, current_state, &halt_reason);
|
||||
|
||||
PerformRequestedCacheInvalidation();
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
void ClearCache() {
|
||||
std::unique_lock lock{invalidation_mutex};
|
||||
invalidate_entire_cache = true;
|
||||
HaltExecution(HaltReason::CacheInvalidation);
|
||||
}
|
||||
|
||||
void InvalidateCacheRange(std::uint64_t start_address, std::size_t length) {
|
||||
std::unique_lock lock{invalidation_mutex};
|
||||
invalid_cache_ranges.add(boost::icl::discrete_interval<u64>::closed(start_address, start_address + length - 1));
|
||||
HaltExecution(HaltReason::CacheInvalidation);
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
current_state = {};
|
||||
}
|
||||
|
||||
void HaltExecution(HaltReason hr) {
|
||||
Atomic::Or(&halt_reason, static_cast<u32>(hr));
|
||||
}
|
||||
|
||||
void ClearHalt(HaltReason hr) {
|
||||
Atomic::And(&halt_reason, ~static_cast<u32>(hr));
|
||||
}
|
||||
|
||||
std::uint64_t PC() const {
|
||||
return current_state.pc;
|
||||
}
|
||||
|
||||
void SetPC(std::uint64_t value) {
|
||||
current_state.pc = value;
|
||||
}
|
||||
|
||||
std::uint64_t SP() const {
|
||||
return current_state.sp;
|
||||
}
|
||||
|
||||
void SetSP(std::uint64_t value) {
|
||||
current_state.sp = value;
|
||||
}
|
||||
|
||||
std::array<std::uint64_t, 31>& Regs() {
|
||||
return current_state.reg;
|
||||
}
|
||||
|
||||
const std::array<std::uint64_t, 31>& Regs() const {
|
||||
return current_state.reg;
|
||||
}
|
||||
|
||||
std::array<std::uint64_t, 64>& VecRegs() {
|
||||
return current_state.vec;
|
||||
}
|
||||
|
||||
const std::array<std::uint64_t, 64>& VecRegs() const {
|
||||
return current_state.vec;
|
||||
}
|
||||
|
||||
std::uint32_t Fpcr() const {
|
||||
return current_state.fpcr;
|
||||
}
|
||||
|
||||
void SetFpcr(std::uint32_t value) {
|
||||
current_state.fpcr = value;
|
||||
}
|
||||
|
||||
std::uint32_t Fpsr() const {
|
||||
return current_state.fpsr;
|
||||
}
|
||||
|
||||
void SetFpsr(std::uint32_t value) {
|
||||
current_state.fpsr = value;
|
||||
}
|
||||
|
||||
std::uint32_t Pstate() const {
|
||||
return current_state.cpsr_nzcv;
|
||||
}
|
||||
|
||||
void SetPstate(std::uint32_t value) {
|
||||
current_state.cpsr_nzcv = value;
|
||||
}
|
||||
|
||||
void ClearExclusiveState() {
|
||||
current_state.exclusive_state = false;
|
||||
}
|
||||
|
||||
bool IsExecuting() const {
|
||||
return is_executing;
|
||||
}
|
||||
|
||||
void DumpDisassembly() const {
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
std::vector<std::string> Disassemble() const {
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
private:
|
||||
void PerformRequestedCacheInvalidation() {
|
||||
ClearHalt(HaltReason::CacheInvalidation);
|
||||
|
||||
if (invalidate_entire_cache) {
|
||||
current_address_space.ClearCache();
|
||||
|
||||
invalidate_entire_cache = false;
|
||||
invalid_cache_ranges.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!invalid_cache_ranges.empty()) {
|
||||
// TODO: Optimize
|
||||
current_address_space.ClearCache();
|
||||
|
||||
invalid_cache_ranges.clear();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
A64::UserConfig conf;
|
||||
A64JitState current_state{};
|
||||
A64AddressSpace current_address_space;
|
||||
A64Core core;
|
||||
|
||||
volatile u32 halt_reason = 0;
|
||||
|
||||
std::mutex invalidation_mutex;
|
||||
boost::icl::interval_set<u64> invalid_cache_ranges;
|
||||
bool invalidate_entire_cache = false;
|
||||
bool is_executing = false;
|
||||
};
|
||||
|
||||
Jit::Jit(UserConfig conf)
|
||||
: impl{std::make_unique<Jit::Impl>(this, conf)} {
|
||||
}
|
||||
|
||||
Jit::~Jit() = default;
|
||||
|
||||
HaltReason Jit::Run() {
|
||||
ASSERT_FALSE("not implemented");
|
||||
return impl->Run();
|
||||
}
|
||||
|
||||
HaltReason Jit::Step() {
|
||||
ASSERT_FALSE("not implemented");
|
||||
return impl->Step();
|
||||
}
|
||||
|
||||
void Jit::ClearCache() {
|
||||
impl->ClearCache();
|
||||
}
|
||||
|
||||
void Jit::InvalidateCacheRange(std::uint64_t start_address, std::size_t length) {
|
||||
(void)start_address;
|
||||
(void)length;
|
||||
impl->InvalidateCacheRange(start_address, length);
|
||||
}
|
||||
|
||||
void Jit::Reset() {
|
||||
impl->Reset();
|
||||
}
|
||||
|
||||
void Jit::HaltExecution(HaltReason hr) {
|
||||
(void)hr;
|
||||
impl->HaltExecution(hr);
|
||||
}
|
||||
|
||||
void Jit::ClearHalt(HaltReason hr) {
|
||||
(void)hr;
|
||||
impl->ClearHalt(hr);
|
||||
}
|
||||
|
||||
std::uint64_t Jit::GetSP() const {
|
||||
return 0;
|
||||
return impl->SP();
|
||||
}
|
||||
|
||||
void Jit::SetSP(std::uint64_t value) {
|
||||
(void)value;
|
||||
impl->SetSP(value);
|
||||
}
|
||||
|
||||
std::uint64_t Jit::GetPC() const {
|
||||
return 0;
|
||||
return impl->PC();
|
||||
}
|
||||
|
||||
void Jit::SetPC(std::uint64_t value) {
|
||||
(void)value;
|
||||
impl->SetPC(value);
|
||||
}
|
||||
|
||||
std::uint64_t Jit::GetRegister(std::size_t index) const {
|
||||
(void)index;
|
||||
return 0;
|
||||
return impl->Regs()[index];
|
||||
}
|
||||
|
||||
void Jit::SetRegister(size_t index, std::uint64_t value) {
|
||||
(void)index;
|
||||
(void)value;
|
||||
impl->Regs()[index] = value;
|
||||
}
|
||||
|
||||
std::array<std::uint64_t, 31> Jit::GetRegisters() const {
|
||||
return {};
|
||||
return impl->Regs();
|
||||
}
|
||||
|
||||
void Jit::SetRegisters(const std::array<std::uint64_t, 31>& value) {
|
||||
(void)value;
|
||||
impl->Regs() = value;
|
||||
}
|
||||
|
||||
Vector Jit::GetVector(std::size_t index) const {
|
||||
(void)index;
|
||||
return {};
|
||||
auto& vec = impl->VecRegs();
|
||||
return {vec[index * 2], vec[index * 2 + 1]};
|
||||
}
|
||||
|
||||
void Jit::SetVector(std::size_t index, Vector value) {
|
||||
(void)index;
|
||||
(void)value;
|
||||
auto& vec = impl->VecRegs();
|
||||
vec[index * 2] = value[0];
|
||||
vec[index * 2 + 1] = value[1];
|
||||
}
|
||||
|
||||
std::array<Vector, 32> Jit::GetVectors() const {
|
||||
return {};
|
||||
std::array<Vector, 32> ret;
|
||||
std::memcpy(ret.data(), impl->VecRegs().data(), sizeof(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Jit::SetVectors(const std::array<Vector, 32>& value) {
|
||||
(void)value;
|
||||
std::memcpy(impl->VecRegs().data(), value.data(), sizeof(value));
|
||||
}
|
||||
|
||||
std::uint32_t Jit::GetFpcr() const {
|
||||
return 0;
|
||||
return impl->Fpcr();
|
||||
}
|
||||
|
||||
void Jit::SetFpcr(std::uint32_t value) {
|
||||
(void)value;
|
||||
impl->SetFpcr(value);
|
||||
}
|
||||
|
||||
std::uint32_t Jit::GetFpsr() const {
|
||||
return 0;
|
||||
return impl->Fpsr();
|
||||
}
|
||||
|
||||
void Jit::SetFpsr(std::uint32_t value) {
|
||||
(void)value;
|
||||
impl->SetFpsr(value);
|
||||
}
|
||||
|
||||
std::uint32_t Jit::GetPstate() const {
|
||||
return 0;
|
||||
return impl->Pstate();
|
||||
}
|
||||
|
||||
void Jit::SetPstate(std::uint32_t value) {
|
||||
(void)value;
|
||||
impl->SetPstate(value);
|
||||
}
|
||||
|
||||
void Jit::ClearExclusiveState() {
|
||||
impl->ClearExclusiveState();
|
||||
}
|
||||
|
||||
bool Jit::IsExecuting() const {
|
||||
return false;
|
||||
return impl->IsExecuting();
|
||||
}
|
||||
|
||||
void Jit::DumpDisassembly() const {
|
||||
ASSERT_FALSE("not implemented");
|
||||
impl->DumpDisassembly();
|
||||
}
|
||||
|
||||
std::vector<std::string> Jit::Disassemble() const {
|
||||
ASSERT_FALSE("not implemented");
|
||||
return impl->Disassemble();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A64
|
||||
|
|
37
src/dynarmic/backend/arm64/a64_jitstate.h
Normal file
37
src/dynarmic/backend/arm64/a64_jitstate.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include <mcl/stdint.hpp>
|
||||
|
||||
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct A64JitState {
|
||||
std::array<u64, 31> reg{};
|
||||
u64 sp = 0;
|
||||
u64 pc = 0;
|
||||
|
||||
u32 cpsr_nzcv = 0;
|
||||
|
||||
alignas(16) std::array<u64, 64> vec{};
|
||||
|
||||
u32 exclusive_state = 0;
|
||||
|
||||
u32 fpsr = 0;
|
||||
u32 fpcr = 0;
|
||||
|
||||
IR::LocationDescriptor GetLocationDescriptor() const {
|
||||
const u64 fpcr_u64 = static_cast<u64>(fpcr & A64::LocationDescriptor::fpcr_mask) << A64::LocationDescriptor::fpcr_shift;
|
||||
const u64 pc_u64 = pc & A64::LocationDescriptor::pc_mask;
|
||||
return IR::LocationDescriptor{pc_u64 | fpcr_u64};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
|
@ -55,13 +55,15 @@ static FrameInfo CalculateFrameInfo(RegisterList rl, size_t frame_size) {
|
|||
};
|
||||
}
|
||||
|
||||
#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \
|
||||
for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \
|
||||
code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \
|
||||
} \
|
||||
if (frame_info.TYPE##s.size() % 2 == 1) { \
|
||||
const size_t i = frame_info.TYPE##s.size() - 1; \
|
||||
code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \
|
||||
#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \
|
||||
if (frame_info.TYPE##s.size() > 0) { \
|
||||
for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \
|
||||
code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \
|
||||
} \
|
||||
if (frame_info.TYPE##s.size() % 2 == 1) { \
|
||||
const size_t i = frame_info.TYPE##s.size() - 1; \
|
||||
code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \
|
||||
} \
|
||||
}
|
||||
|
||||
void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) {
|
||||
|
|
213
src/dynarmic/backend/arm64/address_space.cpp
Normal file
213
src/dynarmic/backend/arm64/address_space.cpp
Normal file
|
@ -0,0 +1,213 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_address_space.h"
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/devirtualize.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/stack_layout.h"
|
||||
#include "dynarmic/common/cast_util.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
AddressSpace::AddressSpace(size_t code_cache_size)
|
||||
: code_cache_size(code_cache_size)
|
||||
, mem(code_cache_size)
|
||||
, code(mem.ptr()) {}
|
||||
|
||||
AddressSpace::~AddressSpace() = default;
|
||||
|
||||
CodePtr AddressSpace::Get(IR::LocationDescriptor descriptor) {
|
||||
if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CodePtr AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) {
|
||||
if (CodePtr block_entry = Get(descriptor)) {
|
||||
return block_entry;
|
||||
}
|
||||
|
||||
IR::Block ir_block = GenerateIR(descriptor);
|
||||
const EmittedBlockInfo block_info = Emit(std::move(ir_block));
|
||||
|
||||
block_infos.insert_or_assign(descriptor.Value(), block_info);
|
||||
block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point);
|
||||
return block_info.entry_point;
|
||||
}
|
||||
|
||||
void AddressSpace::ClearCache() {
|
||||
block_entries.clear();
|
||||
block_infos.clear();
|
||||
block_references.clear();
|
||||
code.set_ptr(prelude_info.end_of_prelude);
|
||||
}
|
||||
|
||||
size_t AddressSpace::GetRemainingSize() {
|
||||
return code_cache_size - (code.ptr<CodePtr>() - reinterpret_cast<CodePtr>(mem.ptr()));
|
||||
}
|
||||
|
||||
EmittedBlockInfo AddressSpace::Emit(IR::Block block) {
|
||||
if (GetRemainingSize() < 1024 * 1024) {
|
||||
ClearCache();
|
||||
}
|
||||
|
||||
mem.unprotect();
|
||||
|
||||
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), GetEmitConfig());
|
||||
|
||||
Link(block.Location(), block_info);
|
||||
|
||||
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
|
||||
|
||||
RelinkForDescriptor(block.Location());
|
||||
|
||||
mem.protect();
|
||||
|
||||
return block_info;
|
||||
}
|
||||
|
||||
static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list) {
|
||||
using namespace oaknut;
|
||||
using namespace oaknut::util;
|
||||
|
||||
for (auto [ptr_offset] : block_relocations_list) {
|
||||
CodeGenerator c{reinterpret_cast<u32*>(entry_point + ptr_offset)};
|
||||
|
||||
if (target_ptr) {
|
||||
c.B((void*)target_ptr);
|
||||
} else {
|
||||
c.NOP();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) {
|
||||
using namespace oaknut;
|
||||
using namespace oaknut::util;
|
||||
|
||||
for (auto [ptr_offset, target] : block_info.relocations) {
|
||||
CodeGenerator c{reinterpret_cast<u32*>(block_info.entry_point + ptr_offset)};
|
||||
|
||||
switch (target) {
|
||||
case LinkTarget::ReturnToDispatcher:
|
||||
c.B(prelude_info.return_to_dispatcher);
|
||||
break;
|
||||
case LinkTarget::ReturnFromRunCode:
|
||||
c.B(prelude_info.return_from_run_code);
|
||||
break;
|
||||
case LinkTarget::ReadMemory8:
|
||||
c.BL(prelude_info.read_memory_8);
|
||||
break;
|
||||
case LinkTarget::ReadMemory16:
|
||||
c.BL(prelude_info.read_memory_16);
|
||||
break;
|
||||
case LinkTarget::ReadMemory32:
|
||||
c.BL(prelude_info.read_memory_32);
|
||||
break;
|
||||
case LinkTarget::ReadMemory64:
|
||||
c.BL(prelude_info.read_memory_64);
|
||||
break;
|
||||
case LinkTarget::ReadMemory128:
|
||||
c.BL(prelude_info.read_memory_128);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory8:
|
||||
c.BL(prelude_info.exclusive_read_memory_8);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory16:
|
||||
c.BL(prelude_info.exclusive_read_memory_16);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory32:
|
||||
c.BL(prelude_info.exclusive_read_memory_32);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory64:
|
||||
c.BL(prelude_info.exclusive_read_memory_64);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory128:
|
||||
c.BL(prelude_info.exclusive_read_memory_128);
|
||||
break;
|
||||
case LinkTarget::WriteMemory8:
|
||||
c.BL(prelude_info.write_memory_8);
|
||||
break;
|
||||
case LinkTarget::WriteMemory16:
|
||||
c.BL(prelude_info.write_memory_16);
|
||||
break;
|
||||
case LinkTarget::WriteMemory32:
|
||||
c.BL(prelude_info.write_memory_32);
|
||||
break;
|
||||
case LinkTarget::WriteMemory64:
|
||||
c.BL(prelude_info.write_memory_64);
|
||||
break;
|
||||
case LinkTarget::WriteMemory128:
|
||||
c.BL(prelude_info.write_memory_128);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory8:
|
||||
c.BL(prelude_info.exclusive_write_memory_8);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory16:
|
||||
c.BL(prelude_info.exclusive_write_memory_16);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory32:
|
||||
c.BL(prelude_info.exclusive_write_memory_32);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory64:
|
||||
c.BL(prelude_info.exclusive_write_memory_64);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory128:
|
||||
c.BL(prelude_info.exclusive_write_memory_128);
|
||||
break;
|
||||
case LinkTarget::CallSVC:
|
||||
c.BL(prelude_info.call_svc);
|
||||
break;
|
||||
case LinkTarget::ExceptionRaised:
|
||||
c.BL(prelude_info.exception_raised);
|
||||
break;
|
||||
case LinkTarget::InstructionSynchronizationBarrierRaised:
|
||||
c.BL(prelude_info.isb_raised);
|
||||
break;
|
||||
case LinkTarget::InstructionCacheOperationRaised:
|
||||
c.BL(prelude_info.ic_raised);
|
||||
break;
|
||||
case LinkTarget::DataCacheOperationRaised:
|
||||
c.BL(prelude_info.dc_raised);
|
||||
break;
|
||||
case LinkTarget::GetCNTPCT:
|
||||
c.BL(prelude_info.get_cntpct);
|
||||
break;
|
||||
case LinkTarget::AddTicks:
|
||||
c.BL(prelude_info.add_ticks);
|
||||
break;
|
||||
case LinkTarget::GetTicksRemaining:
|
||||
c.BL(prelude_info.get_ticks_remaining);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid relocation target");
|
||||
}
|
||||
}
|
||||
|
||||
for (auto [target_descriptor, list] : block_info.block_relocations) {
|
||||
block_references[target_descriptor.Value()].emplace(block_descriptor.Value());
|
||||
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list);
|
||||
}
|
||||
}
|
||||
|
||||
void AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) {
|
||||
for (auto block_descriptor : block_references[target_descriptor.Value()]) {
|
||||
if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) {
|
||||
const EmittedBlockInfo& block_info = iter->second;
|
||||
|
||||
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]);
|
||||
|
||||
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
92
src/dynarmic/backend/arm64/address_space.h
Normal file
92
src/dynarmic/backend/arm64/address_space.h
Normal file
|
@ -0,0 +1,92 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mcl/stdint.hpp>
|
||||
#include <oaknut/code_block.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
#include <tsl/robin_map.h>
|
||||
#include <tsl/robin_set.h>
|
||||
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/interface/halt_reason.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
class AddressSpace {
|
||||
public:
|
||||
explicit AddressSpace(size_t code_cache_size);
|
||||
virtual ~AddressSpace();
|
||||
|
||||
virtual IR::Block GenerateIR(IR::LocationDescriptor) const = 0;
|
||||
|
||||
CodePtr Get(IR::LocationDescriptor descriptor);
|
||||
|
||||
CodePtr GetOrEmit(IR::LocationDescriptor descriptor);
|
||||
|
||||
void ClearCache();
|
||||
|
||||
protected:
|
||||
virtual EmitConfig GetEmitConfig() = 0;
|
||||
|
||||
size_t GetRemainingSize();
|
||||
EmittedBlockInfo Emit(IR::Block ir_block);
|
||||
void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block);
|
||||
void RelinkForDescriptor(IR::LocationDescriptor target_descriptor);
|
||||
|
||||
const size_t code_cache_size;
|
||||
oaknut::CodeBlock mem;
|
||||
oaknut::CodeGenerator code;
|
||||
|
||||
tsl::robin_map<u64, CodePtr> block_entries;
|
||||
tsl::robin_map<u64, EmittedBlockInfo> block_infos;
|
||||
tsl::robin_map<u64, tsl::robin_set<u64>> block_references;
|
||||
|
||||
struct PreludeInfo {
|
||||
u32* end_of_prelude;
|
||||
|
||||
using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, void* jit_state, volatile u32* halt_reason);
|
||||
RunCodeFuncType run_code;
|
||||
RunCodeFuncType step_code;
|
||||
void* return_to_dispatcher;
|
||||
void* return_from_run_code;
|
||||
|
||||
void* read_memory_8;
|
||||
void* read_memory_16;
|
||||
void* read_memory_32;
|
||||
void* read_memory_64;
|
||||
void* read_memory_128;
|
||||
void* exclusive_read_memory_8;
|
||||
void* exclusive_read_memory_16;
|
||||
void* exclusive_read_memory_32;
|
||||
void* exclusive_read_memory_64;
|
||||
void* exclusive_read_memory_128;
|
||||
void* write_memory_8;
|
||||
void* write_memory_16;
|
||||
void* write_memory_32;
|
||||
void* write_memory_64;
|
||||
void* write_memory_128;
|
||||
void* exclusive_write_memory_8;
|
||||
void* exclusive_write_memory_16;
|
||||
void* exclusive_write_memory_32;
|
||||
void* exclusive_write_memory_64;
|
||||
void* exclusive_write_memory_128;
|
||||
|
||||
void* call_svc;
|
||||
void* exception_raised;
|
||||
void* dc_raised;
|
||||
void* ic_raised;
|
||||
void* isb_raised;
|
||||
|
||||
void* get_cntpct;
|
||||
void* add_ticks;
|
||||
void* get_ticks_remaining;
|
||||
} prelude_info;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
|
@ -8,7 +8,6 @@
|
|||
#include <fmt/ostream.h>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
|
@ -40,7 +39,7 @@ template<>
|
|||
void EmitIR<IR::Opcode::CallHostFunction>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
ctx.reg_alloc.PrepareForCall(nullptr, args[1], args[2], args[3]);
|
||||
ctx.reg_alloc.PrepareForCall(args[1], args[2], args[3]);
|
||||
code.MOV(Xscratch0, args[0].GetImmediateU64());
|
||||
code.BLR(Xscratch0);
|
||||
}
|
||||
|
@ -69,9 +68,34 @@ void EmitIR<IR::Opcode::GetGEFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, I
|
|||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetNZCVFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
|
||||
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(ctx.reg_alloc.IsValueLive(inst));
|
||||
void EmitIR<IR::Opcode::GetNZCVFromOp>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (ctx.reg_alloc.IsValueLive(inst)) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (args[0].GetType()) {
|
||||
case IR::Type::U32: {
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
|
||||
auto flags = ctx.reg_alloc.WriteFlags(inst);
|
||||
RegAlloc::Realize(Wvalue, flags);
|
||||
|
||||
code.TST(*Wvalue, Wvalue);
|
||||
break;
|
||||
}
|
||||
case IR::Type::U64: {
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
auto flags = ctx.reg_alloc.WriteFlags(inst);
|
||||
RegAlloc::Realize(Xvalue, flags);
|
||||
|
||||
code.TST(*Xvalue, Xvalue);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT_FALSE("Invalid type for GetNZCVFromOp");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
|
@ -164,10 +188,12 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
|
|||
ASSERT(!ctx.block.HasConditionFailedLocation());
|
||||
} else {
|
||||
ASSERT(ctx.block.HasConditionFailedLocation());
|
||||
oaknut::Label pass;
|
||||
|
||||
oaknut::Label pass = EmitA32Cond(code, ctx, ctx.block.GetCondition());
|
||||
pass = conf.emit_cond(code, ctx, ctx.block.GetCondition());
|
||||
EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount());
|
||||
EmitA32ConditionFailedTerminal(code, ctx);
|
||||
conf.emit_condition_failed_terminal(code, ctx);
|
||||
|
||||
code.l(pass);
|
||||
}
|
||||
|
||||
|
@ -205,7 +231,7 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
|
|||
reg_alloc.AssertNoMoreUses();
|
||||
|
||||
EmitAddCycles(code, ctx, block.CycleCount());
|
||||
EmitA32Terminal(code, ctx);
|
||||
conf.emit_terminal(code, ctx);
|
||||
|
||||
ebi.size = code.ptr<CodePtr>() - ebi.entry_point;
|
||||
return ebi;
|
||||
|
|
|
@ -38,6 +38,8 @@ enum class Opcode;
|
|||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct EmitContext;
|
||||
|
||||
using CodePtr = std::byte*;
|
||||
|
||||
enum class LinkTarget {
|
||||
|
@ -47,21 +49,28 @@ enum class LinkTarget {
|
|||
ReadMemory16,
|
||||
ReadMemory32,
|
||||
ReadMemory64,
|
||||
ReadMemory128,
|
||||
ExclusiveReadMemory8,
|
||||
ExclusiveReadMemory16,
|
||||
ExclusiveReadMemory32,
|
||||
ExclusiveReadMemory64,
|
||||
ExclusiveReadMemory128,
|
||||
WriteMemory8,
|
||||
WriteMemory16,
|
||||
WriteMemory32,
|
||||
WriteMemory64,
|
||||
WriteMemory128,
|
||||
ExclusiveWriteMemory8,
|
||||
ExclusiveWriteMemory16,
|
||||
ExclusiveWriteMemory32,
|
||||
ExclusiveWriteMemory64,
|
||||
ExclusiveWriteMemory128,
|
||||
CallSVC,
|
||||
ExceptionRaised,
|
||||
InstructionSynchronizationBarrierRaised,
|
||||
InstructionCacheOperationRaised,
|
||||
DataCacheOperationRaised,
|
||||
GetCNTPCT,
|
||||
AddTicks,
|
||||
GetTicksRemaining,
|
||||
};
|
||||
|
@ -83,24 +92,39 @@ struct EmittedBlockInfo {
|
|||
};
|
||||
|
||||
struct EmitConfig {
|
||||
OptimizationFlag optimizations;
|
||||
bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; }
|
||||
|
||||
bool hook_isb;
|
||||
|
||||
// System registers
|
||||
u64 cntfreq_el0;
|
||||
u32 ctr_el0;
|
||||
u32 dczid_el0;
|
||||
const u64* tpidrro_el0;
|
||||
u64* tpidr_el0;
|
||||
|
||||
// Timing
|
||||
bool wall_clock_cntpct;
|
||||
bool enable_cycle_counting;
|
||||
|
||||
// Endianness
|
||||
bool always_little_endian;
|
||||
|
||||
// Frontend specific callbacks
|
||||
FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor);
|
||||
oaknut::Label (*emit_cond)(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
|
||||
void (*emit_condition_failed_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
void (*emit_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
|
||||
// State offsets
|
||||
size_t state_nzcv_offset;
|
||||
size_t state_fpsr_offset;
|
||||
|
||||
// A32 specific
|
||||
std::array<std::shared_ptr<A32::Coprocessor>, 16> coprocessors{};
|
||||
|
||||
OptimizationFlag optimizations;
|
||||
|
||||
bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; }
|
||||
};
|
||||
|
||||
struct EmitContext;
|
||||
|
||||
EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& emit_conf);
|
||||
|
||||
template<IR::Opcode op>
|
||||
|
@ -108,7 +132,10 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
|||
void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget link_target);
|
||||
void EmitBlockLinkRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, const IR::LocationDescriptor& descriptor);
|
||||
oaknut::Label EmitA32Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
|
||||
oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
void EmitA32ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
|
@ -41,7 +41,7 @@ void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Re
|
|||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
|
||||
static void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
|
||||
auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 {
|
||||
return static_cast<u32>(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32);
|
||||
};
|
||||
|
@ -555,7 +555,7 @@ void EmitIR<IR::Opcode::A32UpdateUpperLocationDescriptor>(oaknut::CodeGenerator&
|
|||
template<>
|
||||
void EmitIR<IR::Opcode::A32CallSupervisor>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall(nullptr);
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
|
@ -576,7 +576,7 @@ void EmitIR<IR::Opcode::A32CallSupervisor>(oaknut::CodeGenerator& code, EmitCont
|
|||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExceptionRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall(nullptr);
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
|
@ -611,7 +611,7 @@ void EmitIR<IR::Opcode::A32InstructionSynchronizationBarrier>(oaknut::CodeGenera
|
|||
return;
|
||||
}
|
||||
|
||||
ctx.reg_alloc.PrepareForCall(nullptr);
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised);
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ static void EmitCoprocessorException() {
|
|||
}
|
||||
|
||||
static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
|
||||
ctx.reg_alloc.PrepareForCall(inst, {}, arg0, arg1);
|
||||
ctx.reg_alloc.PrepareForCall({}, arg0, arg1);
|
||||
|
||||
if (callback.user_arg) {
|
||||
code.MOV(X0, reinterpret_cast<u64>(*callback.user_arg));
|
||||
|
@ -32,6 +32,10 @@ static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A3
|
|||
|
||||
code.MOV(Xscratch0, reinterpret_cast<u64>(callback.function));
|
||||
code.BLR(Xscratch0);
|
||||
|
||||
if (inst) {
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
|
|
|
@ -25,18 +25,19 @@ static bool IsOrdered(IR::AccType acctype) {
|
|||
|
||||
static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall(inst, {}, args[1]);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1]);
|
||||
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
|
||||
|
||||
EmitRelocation(code, ctx, fn);
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
|
||||
static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall(inst, {}, args[1]);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1]);
|
||||
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
|
||||
|
||||
code.MOV(Wscratch0, 1);
|
||||
|
@ -45,11 +46,12 @@ static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ct
|
|||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
|
||||
static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
|
||||
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||
|
||||
if (ordered) {
|
||||
|
@ -63,7 +65,7 @@ static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::I
|
|||
|
||||
static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
|
||||
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||
|
||||
oaknut::Label end;
|
||||
|
@ -79,6 +81,7 @@ static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& c
|
|||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
code.l(end);
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
|
||||
template<>
|
||||
|
|
|
@ -3,9 +3,10 @@
|
|||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <mcl/bit_cast.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
|
@ -18,292 +19,460 @@ namespace Dynarmic::Backend::Arm64 {
|
|||
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext&, IR::Cond cond) {
|
||||
oaknut::Label pass;
|
||||
// TODO: Flags in host flags
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
||||
code.MSR(oaknut::SystemReg::NZCV, Xscratch0);
|
||||
code.B(static_cast<oaknut::Cond>(cond), pass);
|
||||
return pass;
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator&, EmitContext&, IR::Term::Interpret, IR::LocationDescriptor, bool) {
|
||||
ASSERT_FALSE("Interpret should never be emitted.");
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) {
|
||||
oaknut::Label fail;
|
||||
|
||||
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.CMP(Xticks, 0);
|
||||
code.B(LE, fail);
|
||||
EmitBlockLinkRelocation(code, ctx, terminal.next);
|
||||
} else {
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, fail);
|
||||
EmitBlockLinkRelocation(code, ctx, terminal.next);
|
||||
}
|
||||
}
|
||||
|
||||
code.l(fail);
|
||||
code.MOV(Xscratch0, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.STR(Xscratch0, Xstate, offsetof(A64JitState, pc));
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
|
||||
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
|
||||
EmitBlockLinkRelocation(code, ctx, terminal.next);
|
||||
}
|
||||
|
||||
code.MOV(Wscratch0, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.STR(Wscratch0, Xstate, offsetof(A64JitState, pc));
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
|
||||
// TODO: Implement PopRSBHint optimization
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
|
||||
// TODO: Implement FastDispatchHint optimization
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
oaknut::Label pass = EmitA64Cond(code, ctx, terminal.if_);
|
||||
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
|
||||
code.l(pass);
|
||||
EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
oaknut::Label fail;
|
||||
code.LDRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
|
||||
code.CBZ(Wscratch0, fail);
|
||||
EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
|
||||
code.l(fail);
|
||||
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
oaknut::Label fail;
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, fail);
|
||||
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
|
||||
code.l(fail);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
boost::apply_visitor([&](const auto& t) { EmitA64Terminal(code, ctx, t, initial_location, is_single_step); }, terminal);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
|
||||
const A64::LocationDescriptor location{ctx.block.Location()};
|
||||
EmitA64Terminal(code, ctx, ctx.block.GetTerminal(), location.SetSingleStepping(false), location.SingleStepping());
|
||||
}
|
||||
|
||||
void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
|
||||
const A64::LocationDescriptor location{ctx.block.Location()};
|
||||
EmitA64Terminal(code, ctx, IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, location.SetSingleStepping(false), location.SingleStepping());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetCheckBit>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (args[0].IsImmediate()) {
|
||||
if (args[0].GetImmediateU1()) {
|
||||
code.MOV(Wscratch0, 1);
|
||||
code.STRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
|
||||
} else {
|
||||
code.STRB(WZR, SP, offsetof(StackLayout, check_bit));
|
||||
}
|
||||
} else {
|
||||
auto Wbit = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wbit);
|
||||
code.STRB(Wbit, SP, offsetof(StackLayout, check_bit));
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCFlag>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Wflag = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wflag);
|
||||
code.LDR(Wflag, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
||||
code.AND(Wflag, Wflag, 1 << 29);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetNZCVRaw>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Wnzcv = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.LDR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetNZCVRaw>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetW>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wresult);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.LDR(Wresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetX>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xresult);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.LDR(Xresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetS>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Sresult = ctx.reg_alloc.WriteS(inst);
|
||||
RegAlloc::Realize(Sresult);
|
||||
code.LDR(Sresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetD>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Dresult = ctx.reg_alloc.WriteD(inst);
|
||||
RegAlloc::Realize(Dresult);
|
||||
code.LDR(Dresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
RegAlloc::Realize(Qresult);
|
||||
code.LDR(Qresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetSP>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xresult);
|
||||
|
||||
code.LDR(Xresult, Xstate, offsetof(A64JitState, sp));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetFPCR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wresult);
|
||||
|
||||
code.LDR(Wresult, Xstate, offsetof(A64JitState, fpcr));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetFPSR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wresult);
|
||||
|
||||
code.LDR(Wresult, Xstate, offsetof(A64JitState, fpsr));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetW>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[1]);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
code.MOV(*Wvalue, Wvalue);
|
||||
code.STR(Wvalue->toX(), Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetX>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[1]);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.STR(Xvalue, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetS>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Svalue = ctx.reg_alloc.ReadS(args[1]);
|
||||
RegAlloc::Realize(Svalue);
|
||||
|
||||
code.FMOV(Svalue, Svalue);
|
||||
code.STR(Svalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetD>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Dvalue = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Dvalue);
|
||||
|
||||
code.FMOV(Dvalue, Dvalue);
|
||||
code.STR(Dvalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Qvalue = ctx.reg_alloc.ReadQ(args[1]);
|
||||
RegAlloc::Realize(Qvalue);
|
||||
code.STR(Qvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetSP>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.STR(Xvalue, Xstate, offsetof(A64JitState, sp));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetFPCR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
code.STR(Wvalue, Xstate, offsetof(A64JitState, fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Wvalue->toX());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetFPSR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
code.STR(Wvalue, Xstate, offsetof(A64JitState, fpsr));
|
||||
code.MSR(oaknut::SystemReg::FPSR, Wvalue->toX());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetPC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.STR(Xvalue, Xstate, offsetof(A64JitState, pc));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64CallSupervisor>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(Xscratch0, Xscratch0, Xticks);
|
||||
EmitRelocation(code, ctx, LinkTarget::AddTicks);
|
||||
}
|
||||
|
||||
code.MOV(W1, args[0].GetImmediateU32());
|
||||
EmitRelocation(code, ctx, LinkTarget::CallSVC);
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
|
||||
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.MOV(Xticks, X0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExceptionRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(Xscratch0, Xscratch0, Xticks);
|
||||
EmitRelocation(code, ctx, LinkTarget::AddTicks);
|
||||
}
|
||||
|
||||
code.MOV(X1, args[0].GetImmediateU64());
|
||||
code.MOV(X2, args[1].GetImmediateU64());
|
||||
EmitRelocation(code, ctx, LinkTarget::ExceptionRaised);
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
|
||||
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.MOV(Xticks, X0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64DataCacheOperationRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
|
||||
EmitRelocation(code, ctx, LinkTarget::DataCacheOperationRaised);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64InstructionCacheOperationRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[0], args[1]);
|
||||
EmitRelocation(code, ctx, LinkTarget::InstructionCacheOperationRaised);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64DataSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
void EmitIR<IR::Opcode::A64DataSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
|
||||
code.DSB(oaknut::BarrierOp::SY);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64DataMemoryBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
void EmitIR<IR::Opcode::A64DataMemoryBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
|
||||
code.DMB(oaknut::BarrierOp::SY);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64InstructionSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
void EmitIR<IR::Opcode::A64InstructionSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) {
|
||||
if (!ctx.conf.hook_isb) {
|
||||
return;
|
||||
}
|
||||
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCNTFRQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Xvalue = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.MOV(Xvalue, ctx.conf.cntfreq_el0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCNTPCT>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
if (!ctx.conf.wall_clock_cntpct && ctx.conf.enable_cycle_counting) {
|
||||
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(X1, X1, Xticks);
|
||||
EmitRelocation(code, ctx, LinkTarget::AddTicks);
|
||||
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
|
||||
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.MOV(Xticks, X0);
|
||||
}
|
||||
EmitRelocation(code, ctx, LinkTarget::GetCNTPCT);
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCTR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Wvalue = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
code.MOV(Wvalue, ctx.conf.ctr_el0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetDCZID>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Wvalue = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
code.MOV(Wvalue, ctx.conf.dczid_el0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetTPIDR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Xvalue = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidr_el0));
|
||||
code.LDR(Xvalue, Xscratch0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetTPIDRRO>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto Xvalue = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidrro_el0));
|
||||
code.LDR(Xvalue, Xscratch0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetTPIDR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidr_el0));
|
||||
code.STR(Xvalue, Xscratch0);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
|
@ -5,11 +5,12 @@
|
|||
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/ir/acc_type.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
@ -18,172 +19,202 @@ namespace Dynarmic::Backend::Arm64 {
|
|||
|
||||
using namespace oaknut::util;
|
||||
|
||||
static bool IsOrdered(IR::AccType acctype) {
|
||||
return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED;
|
||||
}
|
||||
|
||||
static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1]);
|
||||
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
|
||||
|
||||
EmitRelocation(code, ctx, fn);
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
|
||||
static void EmitReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1]);
|
||||
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
|
||||
|
||||
EmitRelocation(code, ctx, fn);
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
code.MOV(Q8.B16(), Q0.B16());
|
||||
ctx.reg_alloc.DefineAsRegister(inst, Q8);
|
||||
}
|
||||
|
||||
static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1]);
|
||||
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
|
||||
|
||||
code.MOV(Wscratch0, 1);
|
||||
code.STRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state));
|
||||
EmitRelocation(code, ctx, fn);
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
|
||||
static void EmitExclusiveReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1]);
|
||||
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
|
||||
|
||||
code.MOV(Wscratch0, 1);
|
||||
code.STRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state));
|
||||
EmitRelocation(code, ctx, fn);
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
code.MOV(Q8.B16(), Q0.B16());
|
||||
ctx.reg_alloc.DefineAsRegister(inst, Q8);
|
||||
}
|
||||
|
||||
static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
|
||||
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
EmitRelocation(code, ctx, fn);
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
|
||||
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||
|
||||
oaknut::Label end;
|
||||
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
code.LDRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state));
|
||||
code.CBZ(Wscratch0, end);
|
||||
code.STRB(WZR, Xstate, offsetof(A64JitState, exclusive_state));
|
||||
EmitRelocation(code, ctx, fn);
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
code.l(end);
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ClearExclusive>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
void EmitIR<IR::Opcode::A64ClearExclusive>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
|
||||
code.STR(WZR, Xstate, offsetof(A64JitState, exclusive_state));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory8);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory16);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory32);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory64);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitReadMemory128(code, ctx, inst, LinkTarget::ReadMemory128);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory8);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory16);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory32);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory64);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitExclusiveReadMemory128(code, ctx, inst, LinkTarget::ExclusiveReadMemory128);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory8);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory16);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory32);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory64);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory128);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory8);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory16);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory32);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory64);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory128);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
|
@ -189,10 +189,14 @@ void EmitIR<IR::Opcode::IsZero64>(oaknut::CodeGenerator& code, EmitContext& ctx,
|
|||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::TestBit>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xoperand = ctx.reg_alloc.ReadX(args[0]);
|
||||
RegAlloc::Realize(Xresult, Xoperand);
|
||||
ASSERT(args[1].IsImmediate());
|
||||
ASSERT(args[1].GetImmediateU8() < 64);
|
||||
|
||||
code.UBFX(Xresult, Xoperand, args[1].GetImmediateU8(), 1);
|
||||
}
|
||||
|
||||
template<>
|
||||
|
@ -616,10 +620,23 @@ void EmitIR<IR::Opcode::ArithmeticShiftRight32>(oaknut::CodeGenerator& code, Emi
|
|||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ArithmeticShiftRight64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto& operand_arg = args[0];
|
||||
auto& shift_arg = args[1];
|
||||
|
||||
if (shift_arg.IsImmediate()) {
|
||||
const u8 shift = shift_arg.GetImmediateU8();
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
|
||||
RegAlloc::Realize(Xresult, Xoperand);
|
||||
code.ASR(Xresult, Xoperand, shift <= 63 ? shift : 63);
|
||||
} else {
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
|
||||
auto Xshift = ctx.reg_alloc.ReadX(shift_arg);
|
||||
RegAlloc::Realize(Xresult, Xoperand, Xshift);
|
||||
code.ASR(Xresult, Xoperand, Xshift);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
|
@ -690,10 +707,23 @@ void EmitIR<IR::Opcode::RotateRight32>(oaknut::CodeGenerator& code, EmitContext&
|
|||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRight64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto& operand_arg = args[0];
|
||||
auto& shift_arg = args[1];
|
||||
|
||||
if (shift_arg.IsImmediate()) {
|
||||
const u8 shift = shift_arg.GetImmediateU8();
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
|
||||
RegAlloc::Realize(Xresult, Xoperand);
|
||||
code.ROR(Xresult, Xoperand, shift);
|
||||
} else {
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
|
||||
auto Xshift = ctx.reg_alloc.ReadX(shift_arg);
|
||||
RegAlloc::Realize(Xresult, Xoperand, Xshift);
|
||||
code.ROR(Xresult, Xoperand, Xshift);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
|
@ -726,68 +756,114 @@ void EmitIR<IR::Opcode::RotateRightExtended>(oaknut::CodeGenerator& code, EmitCo
|
|||
}
|
||||
}
|
||||
|
||||
template<typename ShiftI, typename ShiftR>
|
||||
static void EmitMaskedShift32(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, ShiftI si_fn, ShiftR sr_fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto& operand_arg = args[0];
|
||||
auto& shift_arg = args[1];
|
||||
|
||||
if (shift_arg.IsImmediate()) {
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
auto Woperand = ctx.reg_alloc.ReadW(operand_arg);
|
||||
RegAlloc::Realize(Wresult, Woperand);
|
||||
const u32 shift = shift_arg.GetImmediateU32();
|
||||
|
||||
si_fn(Wresult, Woperand, static_cast<int>(shift & 0x1F));
|
||||
} else {
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
auto Woperand = ctx.reg_alloc.ReadW(operand_arg);
|
||||
auto Wshift = ctx.reg_alloc.ReadW(shift_arg);
|
||||
RegAlloc::Realize(Wresult, Woperand, Wshift);
|
||||
|
||||
sr_fn(Wresult, Woperand, Wshift);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename ShiftI, typename ShiftR>
|
||||
static void EmitMaskedShift64(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, ShiftI si_fn, ShiftR sr_fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto& operand_arg = args[0];
|
||||
auto& shift_arg = args[1];
|
||||
|
||||
if (shift_arg.IsImmediate()) {
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
|
||||
RegAlloc::Realize(Xresult, Xoperand);
|
||||
const u32 shift = shift_arg.GetImmediateU64();
|
||||
|
||||
si_fn(Xresult, Xoperand, static_cast<int>(shift & 0x3F));
|
||||
} else {
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
|
||||
auto Xshift = ctx.reg_alloc.ReadX(shift_arg);
|
||||
RegAlloc::Realize(Xresult, Xoperand, Xshift);
|
||||
|
||||
sr_fn(Xresult, Xoperand, Xshift);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftLeftMasked32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaskedShift32(
|
||||
code, ctx, inst,
|
||||
[&](auto& Wresult, auto& Woperand, auto shift) { code.LSL(Wresult, Woperand, shift); },
|
||||
[&](auto& Wresult, auto& Woperand, auto& Wshift) { code.LSL(Wresult, Woperand, Wshift); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftLeftMasked64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaskedShift64(
|
||||
code, ctx, inst,
|
||||
[&](auto& Xresult, auto& Xoperand, auto shift) { code.LSL(Xresult, Xoperand, shift); },
|
||||
[&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.LSL(Xresult, Xoperand, Xshift); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftRightMasked32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaskedShift32(
|
||||
code, ctx, inst,
|
||||
[&](auto& Wresult, auto& Woperand, auto shift) { code.LSR(Wresult, Woperand, shift); },
|
||||
[&](auto& Wresult, auto& Woperand, auto& Wshift) { code.LSR(Wresult, Woperand, Wshift); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftRightMasked64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaskedShift64(
|
||||
code, ctx, inst,
|
||||
[&](auto& Xresult, auto& Xoperand, auto shift) { code.LSR(Xresult, Xoperand, shift); },
|
||||
[&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.LSR(Xresult, Xoperand, Xshift); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ArithmeticShiftRightMasked32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaskedShift32(
|
||||
code, ctx, inst,
|
||||
[&](auto& Wresult, auto& Woperand, auto shift) { code.ASR(Wresult, Woperand, shift); },
|
||||
[&](auto& Wresult, auto& Woperand, auto& Wshift) { code.ASR(Wresult, Woperand, Wshift); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ArithmeticShiftRightMasked64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaskedShift64(
|
||||
code, ctx, inst,
|
||||
[&](auto& Xresult, auto& Xoperand, auto shift) { code.ASR(Xresult, Xoperand, shift); },
|
||||
[&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.ASR(Xresult, Xoperand, Xshift); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRightMasked32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaskedShift32(
|
||||
code, ctx, inst,
|
||||
[&](auto& Wresult, auto& Woperand, auto shift) { code.ROR(Wresult, Woperand, shift); },
|
||||
[&](auto& Wresult, auto& Woperand, auto& Wshift) { code.ROR(Wresult, Woperand, Wshift); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRightMasked64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaskedShift64(
|
||||
code, ctx, inst,
|
||||
[&](auto& Xresult, auto& Xoperand, auto shift) { code.ROR(Xresult, Xoperand, shift); },
|
||||
[&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.ROR(Xresult, Xoperand, Xshift); });
|
||||
}
|
||||
|
||||
template<size_t bitsize, typename EmitFn>
|
||||
|
@ -975,18 +1051,24 @@ void EmitIR<IR::Opcode::Mul64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR
|
|||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedMultiplyHigh64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xop1 = ctx.reg_alloc.ReadX(args[0]);
|
||||
auto Xop2 = ctx.reg_alloc.ReadX(args[1]);
|
||||
RegAlloc::Realize(Xresult, Xop1, Xop2);
|
||||
|
||||
code.SMULH(Xresult, Xop1, Xop2);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedMultiplyHigh64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xop1 = ctx.reg_alloc.ReadX(args[0]);
|
||||
auto Xop2 = ctx.reg_alloc.ReadX(args[1]);
|
||||
RegAlloc::Realize(Xresult, Xop1, Xop2);
|
||||
|
||||
code.UMULH(Xresult, Xop1, Xop2);
|
||||
}
|
||||
|
||||
template<>
|
||||
|
@ -1160,7 +1242,7 @@ void EmitIR<IR::Opcode::AndNot32>(oaknut::CodeGenerator& code, EmitContext& ctx,
|
|||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::AndNot64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitAndNot<32>(code, ctx, inst);
|
||||
EmitAndNot<64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
|
@ -1271,9 +1353,13 @@ void EmitIR<IR::Opcode::ZeroExtendWordToLong>(oaknut::CodeGenerator&, EmitContex
|
|||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ZeroExtendLongToQuad>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
|
||||
void EmitIR<IR::Opcode::ZeroExtendLongToQuad>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
RegAlloc::Realize(Xvalue, Qresult);
|
||||
|
||||
code.FMOV(Qresult->toD(), Xvalue);
|
||||
}
|
||||
|
||||
template<>
|
||||
|
@ -1313,98 +1399,124 @@ void EmitIR<IR::Opcode::CountLeadingZeros64>(oaknut::CodeGenerator& code, EmitCo
|
|||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ExtractRegister32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(args[2].IsImmediate());
|
||||
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
auto Wop1 = ctx.reg_alloc.ReadW(args[0]);
|
||||
auto Wop2 = ctx.reg_alloc.ReadW(args[1]);
|
||||
RegAlloc::Realize(Wresult, Wop1, Wop2);
|
||||
const u8 lsb = args[2].GetImmediateU8();
|
||||
|
||||
code.EXTR(Wresult, Wop2, Wop1, lsb); // NB: flipped
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ExtractRegister64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(args[2].IsImmediate());
|
||||
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xop1 = ctx.reg_alloc.ReadX(args[0]);
|
||||
auto Xop2 = ctx.reg_alloc.ReadX(args[1]);
|
||||
RegAlloc::Realize(Xresult, Xop1, Xop2);
|
||||
const u8 lsb = args[2].GetImmediateU8();
|
||||
|
||||
code.EXTR(Xresult, Xop2, Xop1, lsb); // NB: flipped
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ReplicateBit32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(args[1].IsImmediate());
|
||||
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
|
||||
const u8 bit = args[1].GetImmediateU8();
|
||||
RegAlloc::Realize(Wresult, Wvalue);
|
||||
|
||||
code.LSL(Wresult, Wvalue, 31 - bit);
|
||||
code.ASR(Wresult, Wresult, 31);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ReplicateBit64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(args[1].IsImmediate());
|
||||
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
const u8 bit = args[1].GetImmediateU8();
|
||||
RegAlloc::Realize(Xresult, Xvalue);
|
||||
|
||||
code.LSL(Xresult, Xvalue, 63 - bit);
|
||||
code.ASR(Xresult, Xresult, 63);
|
||||
}
|
||||
|
||||
static void EmitMaxMin32(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Cond cond) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
auto Wop1 = ctx.reg_alloc.ReadW(args[0]);
|
||||
auto Wop2 = ctx.reg_alloc.ReadW(args[1]);
|
||||
RegAlloc::Realize(Wresult, Wop1, Wop2);
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
|
||||
code.CMP(Wop1->toW(), Wop2);
|
||||
code.CSEL(Wresult, Wop1, Wop2, cond);
|
||||
}
|
||||
|
||||
static void EmitMaxMin64(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Cond cond) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xop1 = ctx.reg_alloc.ReadX(args[0]);
|
||||
auto Xop2 = ctx.reg_alloc.ReadX(args[1]);
|
||||
RegAlloc::Realize(Xresult, Xop1, Xop2);
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
|
||||
code.CMP(Xop1->toX(), Xop2);
|
||||
code.CSEL(Xresult, Xop1, Xop2, cond);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MaxSigned32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaxMin32(code, ctx, inst, GT);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MaxSigned64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaxMin64(code, ctx, inst, GT);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MaxUnsigned32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaxMin32(code, ctx, inst, HI);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MaxUnsigned64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaxMin64(code, ctx, inst, HI);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MinSigned32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaxMin32(code, ctx, inst, LT);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MinSigned64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaxMin64(code, ctx, inst, LT);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MinUnsigned32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaxMin32(code, ctx, inst, LO);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MinUnsigned64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
EmitMaxMin64(code, ctx, inst, LO);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
|
@ -575,6 +575,20 @@ void EmitIR<IR::Opcode::FPDoubleToHalf>(oaknut::CodeGenerator& code, EmitContext
|
|||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||
|
||||
if (rounding_mode == FP::RoundingMode::ToOdd) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Sto = ctx.reg_alloc.WriteS(inst);
|
||||
auto Dfrom = ctx.reg_alloc.ReadD(args[0]);
|
||||
RegAlloc::Realize(Sto, Dfrom);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
code.FCVTXN(Sto, Dfrom);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
EmitConvert<64, 32>(code, ctx, inst, [&](auto& Sto, auto& Dfrom) { code.FCVT(Sto, Dfrom); });
|
||||
}
|
||||
|
||||
|
|
|
@ -201,8 +201,8 @@ static void EmitThreeOpArrangedLower(oaknut::CodeGenerator& code, EmitContext& c
|
|||
template<size_t size, typename EmitFn>
|
||||
static void EmitSaturatedAccumulate(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qaccumulator = ctx.reg_alloc.ReadWriteQ(args[0], inst);
|
||||
auto Qoperand = ctx.reg_alloc.ReadQ(args[1]);
|
||||
auto Qaccumulator = ctx.reg_alloc.ReadWriteQ(args[1], inst); // NB: Swapped
|
||||
auto Qoperand = ctx.reg_alloc.ReadQ(args[0]); // NB: Swapped
|
||||
RegAlloc::Realize(Qaccumulator, Qoperand);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
|
|
|
@ -3,14 +3,31 @@
|
|||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <mcl/bit_cast.hpp>
|
||||
#include <mcl/mp/metavalue/lift_value.hpp>
|
||||
#include <mcl/mp/typelist/cartesian_product.hpp>
|
||||
#include <mcl/mp/typelist/get.hpp>
|
||||
#include <mcl/mp/typelist/lift_sequence.hpp>
|
||||
#include <mcl/mp/typelist/list.hpp>
|
||||
#include <mcl/mp/typelist/lower_to_tuple.hpp>
|
||||
#include <mcl/type_traits/function_info.hpp>
|
||||
#include <mcl/type_traits/integer_of_size.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/common/cast_util.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/common/fp/info.h"
|
||||
#include "dynarmic/common/fp/op.h"
|
||||
#include "dynarmic/common/fp/rounding_mode.h"
|
||||
#include "dynarmic/common/lut_from_list.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
@ -18,6 +35,15 @@
|
|||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
namespace mp = mcl::mp;
|
||||
|
||||
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
|
||||
|
||||
// Array alias that always sizes itself according to the given type T
|
||||
// relative to the size of a vector register. e.g. T = u32 would result
|
||||
// in a std::array<u32, 4>.
|
||||
template<typename T>
|
||||
using VectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof<T>>;
|
||||
|
||||
template<typename EmitFn>
|
||||
static void MaybeStandardFPSCRValue(oaknut::CodeGenerator& code, EmitContext& ctx, bool fpcr_controlled, EmitFn emit) {
|
||||
|
@ -232,12 +258,47 @@ void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst)
|
|||
});
|
||||
}
|
||||
|
||||
template<typename Lambda>
|
||||
static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::QReg Qresult, oaknut::QReg Qarg1, Lambda lambda, bool fpcr_controlled) {
|
||||
const auto fn = static_cast<mcl::equivalent_function_type<Lambda>*>(lambda);
|
||||
|
||||
const u32 fpcr = ctx.FPCR(fpcr_controlled).Value();
|
||||
constexpr u64 stack_size = sizeof(u64) * 4; // sizeof(u128) * 2
|
||||
|
||||
ABI_PushRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size);
|
||||
|
||||
code.MOV(Xscratch0, mcl::bit_cast<u64>(fn));
|
||||
code.ADD(X0, SP, 0 * 16);
|
||||
code.ADD(X1, SP, 1 * 16);
|
||||
code.MOV(X2, fpcr);
|
||||
code.ADD(X3, Xstate, ctx.conf.state_fpsr_offset);
|
||||
code.STR(Qarg1, X1);
|
||||
code.BLR(Xscratch0);
|
||||
code.LDR(Qresult, SP);
|
||||
|
||||
ABI_PopRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size);
|
||||
}
|
||||
|
||||
template<size_t fpcr_controlled_arg_index = 1, typename Lambda>
|
||||
static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qarg1 = ctx.reg_alloc.ReadQ(args[0]);
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
RegAlloc::Realize(Qarg1, Qresult);
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
ctx.fpsr.Spill();
|
||||
|
||||
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
||||
EmitTwoOpFallbackWithoutRegAlloc(code, ctx, Qresult, Qarg1, lambda, fpcr_controlled);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorAbs16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qresult = ctx.reg_alloc.ReadWriteQ(args[0], inst);
|
||||
RegAlloc::Realize(Qresult);
|
||||
|
||||
code.BIC(Qresult->H8(), 0b10000000, LSL, 8);
|
||||
}
|
||||
|
||||
template<>
|
||||
|
@ -486,10 +547,35 @@ void EmitIR<IR::Opcode::FPVectorRecipStepFused64>(oaknut::CodeGenerator& code, E
|
|||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRoundInt16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||
const bool exact = inst->GetArg(2).GetU1();
|
||||
|
||||
using rounding_list = mp::list<
|
||||
mp::lift_value<FP::RoundingMode::ToNearest_TieEven>,
|
||||
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
|
||||
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
|
||||
mp::lift_value<FP::RoundingMode::TowardsZero>,
|
||||
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
|
||||
using exact_list = mp::list<std::true_type, std::false_type>;
|
||||
|
||||
static const auto lut = Common::GenerateLookupTableFromList(
|
||||
[]<typename I>(I) {
|
||||
using FPT = u16;
|
||||
return std::pair{
|
||||
mp::lower_to_tuple_v<I>,
|
||||
Common::FptrCast(
|
||||
[](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||
constexpr FP::RoundingMode rounding_mode = mp::get<0, I>::value;
|
||||
constexpr bool exact = mp::get<1, I>::value;
|
||||
|
||||
for (size_t i = 0; i < output.size(); ++i) {
|
||||
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
|
||||
}
|
||||
})};
|
||||
},
|
||||
mp::cartesian_product<rounding_list, exact_list>{});
|
||||
|
||||
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
||||
}
|
||||
|
||||
template<>
|
||||
|
|
|
@ -138,7 +138,7 @@ bool RegAlloc::IsValueLive(IR::Inst* inst) const {
|
|||
return !!ValueLocation(inst);
|
||||
}
|
||||
|
||||
void RegAlloc::PrepareForCall(IR::Inst* result, std::optional<Argument::copyable_reference> arg0, std::optional<Argument::copyable_reference> arg1, std::optional<Argument::copyable_reference> arg2, std::optional<Argument::copyable_reference> arg3) {
|
||||
void RegAlloc::PrepareForCall(std::optional<Argument::copyable_reference> arg0, std::optional<Argument::copyable_reference> arg1, std::optional<Argument::copyable_reference> arg2, std::optional<Argument::copyable_reference> arg3) {
|
||||
fpsr_manager.Spill();
|
||||
SpillFlags();
|
||||
|
||||
|
@ -157,16 +157,29 @@ void RegAlloc::PrepareForCall(IR::Inst* result, std::optional<Argument::copyable
|
|||
}
|
||||
|
||||
const std::array<std::optional<Argument::copyable_reference>, 4> args{arg0, arg1, arg2, arg3};
|
||||
|
||||
// AAPCS64 Next General-purpose Register Number
|
||||
int ngrn = 0;
|
||||
// AAPCS64 Next SIMD and Floating-point Register Number
|
||||
int nsrn = 0;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (args[i]) {
|
||||
ASSERT(gprs[i].IsCompletelyEmpty());
|
||||
LoadCopyInto(args[i]->get().value, oaknut::XReg{i});
|
||||
if (args[i]->get().GetType() == IR::Type::U128) {
|
||||
ASSERT(fprs[nsrn].IsCompletelyEmpty());
|
||||
LoadCopyInto(args[i]->get().value, oaknut::QReg{nsrn});
|
||||
nsrn++;
|
||||
} else {
|
||||
ASSERT(gprs[ngrn].IsCompletelyEmpty());
|
||||
LoadCopyInto(args[i]->get().value, oaknut::XReg{ngrn});
|
||||
ngrn++;
|
||||
}
|
||||
} else {
|
||||
// Gaps are assumed to be in general-purpose registers
|
||||
// TODO: should there be a separate list passed for FPRs instead?
|
||||
ngrn++;
|
||||
}
|
||||
}
|
||||
|
||||
if (result) {
|
||||
DefineAsRegister(result, X0);
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) {
|
||||
|
|
|
@ -271,11 +271,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void PrepareForCall(IR::Inst* result = nullptr,
|
||||
std::optional<Argument::copyable_reference> arg0 = {},
|
||||
std::optional<Argument::copyable_reference> arg1 = {},
|
||||
std::optional<Argument::copyable_reference> arg2 = {},
|
||||
std::optional<Argument::copyable_reference> arg3 = {});
|
||||
void PrepareForCall(std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}, std::optional<Argument::copyable_reference> arg2 = {}, std::optional<Argument::copyable_reference> arg3 = {});
|
||||
|
||||
void DefineAsExisting(IR::Inst* inst, Argument& arg);
|
||||
void DefineAsRegister(IR::Inst* inst, oaknut::Reg reg);
|
||||
|
|
|
@ -154,7 +154,7 @@ void EmitX64::EmitGetNZFromOp(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
|
||||
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
|
||||
code.cmp(value, 0);
|
||||
code.test(value, value);
|
||||
code.lahf();
|
||||
code.movzx(eax, ah);
|
||||
ctx.reg_alloc.DefineValue(inst, nz);
|
||||
|
@ -180,9 +180,9 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
|
||||
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
|
||||
code.cmp(value, 0);
|
||||
code.test(value, value);
|
||||
code.lahf();
|
||||
code.seto(code.al);
|
||||
code.mov(al, 0);
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||
}
|
||||
|
||||
|
|
|
@ -198,7 +198,7 @@ struct UserConfig {
|
|||
|
||||
/// Pointer to where TPIDR_EL0 is stored. This pointer will be inserted into
|
||||
/// emitted code.
|
||||
const std::uint64_t* tpidr_el0 = nullptr;
|
||||
std::uint64_t* tpidr_el0 = nullptr;
|
||||
|
||||
/// Pointer to the page table which we can use for direct page table access.
|
||||
/// If an entry in page_table is null, the relevant memory callback will be called.
|
||||
|
|
|
@ -1024,6 +1024,54 @@ TEST_CASE("A64: This is an infinite loop if fast dispatch is enabled", "[a64]")
|
|||
jit.Run();
|
||||
}
|
||||
|
||||
TEST_CASE("A64: EXTR", "[a64]") {
|
||||
A64TestEnv env;
|
||||
A64::Jit jit{A64::UserConfig{&env}};
|
||||
|
||||
env.code_mem.emplace_back(0x93d8fef7); // EXTR X23, X23, X24, #63
|
||||
env.code_mem.emplace_back(0x14000000); // B .
|
||||
|
||||
jit.SetPC(0);
|
||||
jit.SetRegister(23, 0);
|
||||
jit.SetRegister(24, 1);
|
||||
|
||||
env.ticks_left = 2;
|
||||
jit.Run();
|
||||
|
||||
REQUIRE(jit.GetRegister(23) == 0);
|
||||
}
|
||||
|
||||
TEST_CASE("A64: Isolated GetNZCVFromOp", "[a64]") {
|
||||
A64TestEnv env;
|
||||
A64::Jit jit{A64::UserConfig{&env}};
|
||||
|
||||
env.code_mem.emplace_back(0xaa1f03f5); // MOV X21, XZR
|
||||
env.code_mem.emplace_back(0x912a02da); // ADD X26, X22, #0xa80
|
||||
env.code_mem.emplace_back(0x913662dc); // ADD X28, X22, #0xd98
|
||||
env.code_mem.emplace_back(0x320003e8); // MOV W8, #1
|
||||
env.code_mem.emplace_back(0xa9006bfc); // STP X28, X26, [SP]
|
||||
env.code_mem.emplace_back(0x7200011f); // TST W8, #1
|
||||
env.code_mem.emplace_back(0xf94007e8); // LDR X8, [SP, #8]
|
||||
env.code_mem.emplace_back(0x321e03e3); // MOV W3, #4
|
||||
env.code_mem.emplace_back(0xaa1303e2); // MOV X2, X19
|
||||
env.code_mem.emplace_back(0x9a881357); // CSEL X23, X26, X8, NE
|
||||
env.code_mem.emplace_back(0xf94003e8); // LDR X8, [SP]
|
||||
env.code_mem.emplace_back(0xaa1703e0); // MOV X0, X23
|
||||
env.code_mem.emplace_back(0x9a881396); // CSEL X22, X28, X8, NE
|
||||
env.code_mem.emplace_back(0x92407ea8); // AND X8, X21, #0xffffffff
|
||||
env.code_mem.emplace_back(0x1ac8269b); // LSR W27, W20, W8
|
||||
env.code_mem.emplace_back(0x0b1b0768); // ADD W8, W27, W27, LSL #1
|
||||
env.code_mem.emplace_back(0x937f7d01); // SBFIZ X1, X8, #1, #32
|
||||
env.code_mem.emplace_back(0x2a1f03e4); // MOV W4, WZR
|
||||
env.code_mem.emplace_back(0x531e7779); // LSL W25, W27, #2
|
||||
env.code_mem.emplace_back(0x14000000); // B .
|
||||
|
||||
jit.SetPC(0);
|
||||
|
||||
env.ticks_left = 20;
|
||||
jit.Run();
|
||||
}
|
||||
|
||||
TEST_CASE("A64: Optimization failure when folding ADD", "[a64]") {
|
||||
A64TestEnv env;
|
||||
A64::Jit jit{A64::UserConfig{&env}};
|
||||
|
|
|
@ -6,7 +6,10 @@
|
|||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
|
@ -14,6 +17,7 @@
|
|||
#include <mcl/stdint.hpp>
|
||||
|
||||
#include "./A32/testenv.h"
|
||||
#include "./A64/testenv.h"
|
||||
#include "./fuzz_util.h"
|
||||
#include "./rand_int.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
|
@ -22,7 +26,11 @@
|
|||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/frontend/A32/translate/a32_translate.h"
|
||||
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A64/a64_types.h"
|
||||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
@ -36,21 +44,14 @@ constexpr bool mask_fpsr_cum_bits = true;
|
|||
namespace {
|
||||
using namespace Dynarmic;
|
||||
|
||||
bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) {
|
||||
const A32::LocationDescriptor location = A32::LocationDescriptor{pc, {}, {}}.SetTFlag(is_thumb).SetIT(it_state);
|
||||
IR::Block block{location};
|
||||
const bool should_continue = A32::TranslateSingleInstruction(block, location, instruction);
|
||||
|
||||
if (!should_continue && !is_last_inst) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ShouldTestInst(IR::Block& block) {
|
||||
if (auto terminal = block.GetTerminal(); boost::get<IR::Term::Interpret>(&terminal)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto& ir_inst : block) {
|
||||
switch (ir_inst.GetOpcode()) {
|
||||
// A32
|
||||
case IR::Opcode::A32GetFpscr:
|
||||
case IR::Opcode::A32ExceptionRaised:
|
||||
case IR::Opcode::A32CallSupervisor:
|
||||
|
@ -61,7 +62,53 @@ bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A
|
|||
case IR::Opcode::A32CoprocGetTwoWords:
|
||||
case IR::Opcode::A32CoprocLoadWords:
|
||||
case IR::Opcode::A32CoprocStoreWords:
|
||||
// A64
|
||||
case IR::Opcode::A64ExceptionRaised:
|
||||
case IR::Opcode::A64CallSupervisor:
|
||||
case IR::Opcode::A64DataCacheOperationRaised:
|
||||
case IR::Opcode::A64GetCNTPCT:
|
||||
// Unimplemented
|
||||
case IR::Opcode::SignedSaturatedAdd8:
|
||||
case IR::Opcode::SignedSaturatedAdd16:
|
||||
case IR::Opcode::SignedSaturatedAdd32:
|
||||
case IR::Opcode::SignedSaturatedAdd64:
|
||||
case IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh16:
|
||||
case IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh32:
|
||||
case IR::Opcode::SignedSaturatedSub8:
|
||||
case IR::Opcode::SignedSaturatedSub16:
|
||||
case IR::Opcode::SignedSaturatedSub32:
|
||||
case IR::Opcode::SignedSaturatedSub64:
|
||||
case IR::Opcode::UnsignedSaturatedAdd8:
|
||||
case IR::Opcode::UnsignedSaturatedAdd16:
|
||||
case IR::Opcode::UnsignedSaturatedAdd32:
|
||||
case IR::Opcode::UnsignedSaturatedAdd64:
|
||||
case IR::Opcode::UnsignedSaturatedSub8:
|
||||
case IR::Opcode::UnsignedSaturatedSub16:
|
||||
case IR::Opcode::UnsignedSaturatedSub32:
|
||||
case IR::Opcode::UnsignedSaturatedSub64:
|
||||
case IR::Opcode::VectorMaxS64:
|
||||
case IR::Opcode::VectorMaxU64:
|
||||
case IR::Opcode::VectorMinS64:
|
||||
case IR::Opcode::VectorMinU64:
|
||||
case IR::Opcode::VectorMultiply64:
|
||||
case IR::Opcode::SM4AccessSubstitutionBox:
|
||||
// Half-prec conversions
|
||||
case IR::Opcode::FPHalfToFixedS16:
|
||||
case IR::Opcode::FPHalfToFixedS32:
|
||||
case IR::Opcode::FPHalfToFixedS64:
|
||||
case IR::Opcode::FPHalfToFixedU16:
|
||||
case IR::Opcode::FPHalfToFixedU32:
|
||||
case IR::Opcode::FPHalfToFixedU64:
|
||||
// Half-precision
|
||||
case IR::Opcode::FPAbs16:
|
||||
case IR::Opcode::FPMulAdd16:
|
||||
case IR::Opcode::FPNeg16:
|
||||
case IR::Opcode::FPRecipEstimate16:
|
||||
case IR::Opcode::FPRecipExponent16:
|
||||
case IR::Opcode::FPRecipStepFused16:
|
||||
case IR::Opcode::FPRoundInt16:
|
||||
case IR::Opcode::FPRSqrtEstimate16:
|
||||
case IR::Opcode::FPRSqrtStepFused16:
|
||||
case IR::Opcode::FPVectorAbs16:
|
||||
case IR::Opcode::FPVectorEqual16:
|
||||
case IR::Opcode::FPVectorMulAdd16:
|
||||
|
@ -84,6 +131,30 @@ bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ShouldTestA32Inst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) {
|
||||
const A32::LocationDescriptor location = A32::LocationDescriptor{pc, {}, {}}.SetTFlag(is_thumb).SetIT(it_state);
|
||||
IR::Block block{location};
|
||||
const bool should_continue = A32::TranslateSingleInstruction(block, location, instruction);
|
||||
|
||||
if (!should_continue && !is_last_inst) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return ShouldTestInst(block);
|
||||
}
|
||||
|
||||
bool ShouldTestA64Inst(u32 instruction, u64 pc, bool is_last_inst) {
|
||||
const A64::LocationDescriptor location = A64::LocationDescriptor{pc, {}};
|
||||
IR::Block block{location};
|
||||
const bool should_continue = A64::TranslateSingleInstruction(block, location, instruction);
|
||||
|
||||
if (!should_continue && !is_last_inst) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return ShouldTestInst(block);
|
||||
}
|
||||
|
||||
u32 GenRandomArmInst(u32 pc, bool is_last_inst) {
|
||||
static const struct InstructionGeneratorInfo {
|
||||
std::vector<InstructionGenerator> generators;
|
||||
|
@ -144,7 +215,7 @@ u32 GenRandomArmInst(u32 pc, bool is_last_inst) {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (ShouldTestInst(inst, pc, false, is_last_inst)) {
|
||||
if (ShouldTestA32Inst(inst, pc, false, is_last_inst)) {
|
||||
return inst;
|
||||
}
|
||||
}
|
||||
|
@ -245,7 +316,7 @@ std::vector<u16> GenRandomThumbInst(u32 pc, bool is_last_inst, A32::ITState it_s
|
|||
const u32 inst = instructions.generators[index].Generate();
|
||||
const bool is_four_bytes = (inst >> 16) != 0;
|
||||
|
||||
if (ShouldTestInst(is_four_bytes ? mcl::bit::swap_halves_32(inst) : inst, pc, true, is_last_inst, it_state)) {
|
||||
if (ShouldTestA32Inst(is_four_bytes ? mcl::bit::swap_halves_32(inst) : inst, pc, true, is_last_inst, it_state)) {
|
||||
if (is_four_bytes)
|
||||
return {static_cast<u16>(inst >> 16), static_cast<u16>(inst)};
|
||||
return {static_cast<u16>(inst)};
|
||||
|
@ -253,8 +324,65 @@ std::vector<u16> GenRandomThumbInst(u32 pc, bool is_last_inst, A32::ITState it_s
|
|||
}
|
||||
}
|
||||
|
||||
u32 GenRandomA64Inst(u64 pc, bool is_last_inst) {
|
||||
static const struct InstructionGeneratorInfo {
|
||||
std::vector<InstructionGenerator> generators;
|
||||
std::vector<InstructionGenerator> invalid;
|
||||
} instructions = [] {
|
||||
const std::vector<std::tuple<std::string, const char*>> list{
|
||||
#define INST(fn, name, bitstring) {#fn, bitstring},
|
||||
#include "dynarmic/frontend/A64/decoder/a64.inc"
|
||||
#undef INST
|
||||
};
|
||||
|
||||
std::vector<InstructionGenerator> generators;
|
||||
std::vector<InstructionGenerator> invalid;
|
||||
|
||||
// List of instructions not to test
|
||||
const std::vector<std::string> do_not_test{
|
||||
// Dynarmic and QEMU currently differ on how the exclusive monitor's address range works.
|
||||
"STXR",
|
||||
"STLXR",
|
||||
"STXP",
|
||||
"STLXP",
|
||||
"LDXR",
|
||||
"LDAXR",
|
||||
"LDXP",
|
||||
"LDAXP",
|
||||
// Behaviour differs from QEMU
|
||||
"MSR_reg",
|
||||
"MSR_imm",
|
||||
"MRS",
|
||||
};
|
||||
|
||||
for (const auto& [fn, bitstring] : list) {
|
||||
if (fn == "UnallocatedEncoding") {
|
||||
continue;
|
||||
}
|
||||
if (std::find(do_not_test.begin(), do_not_test.end(), fn) != do_not_test.end()) {
|
||||
invalid.emplace_back(InstructionGenerator{bitstring});
|
||||
continue;
|
||||
}
|
||||
generators.emplace_back(InstructionGenerator{bitstring});
|
||||
}
|
||||
return InstructionGeneratorInfo{generators, invalid};
|
||||
}();
|
||||
|
||||
while (true) {
|
||||
const size_t index = RandInt<size_t>(0, instructions.generators.size() - 1);
|
||||
const u32 inst = instructions.generators[index].Generate();
|
||||
|
||||
if (std::any_of(instructions.invalid.begin(), instructions.invalid.end(), [inst](const auto& invalid) { return invalid.Match(inst); })) {
|
||||
continue;
|
||||
}
|
||||
if (ShouldTestA64Inst(inst, pc, is_last_inst)) {
|
||||
return inst;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename TestEnv>
|
||||
Dynarmic::A32::UserConfig GetUserConfig(TestEnv& testenv) {
|
||||
Dynarmic::A32::UserConfig GetA32UserConfig(TestEnv& testenv) {
|
||||
Dynarmic::A32::UserConfig user_config;
|
||||
user_config.optimizations &= ~OptimizationFlag::FastDispatch;
|
||||
user_config.callbacks = &testenv;
|
||||
|
@ -262,14 +390,14 @@ Dynarmic::A32::UserConfig GetUserConfig(TestEnv& testenv) {
|
|||
}
|
||||
|
||||
template<size_t num_jit_reruns = 1, typename TestEnv>
|
||||
static void RunTestInstance(Dynarmic::A32::Jit& jit,
|
||||
TestEnv& jit_env,
|
||||
const std::array<u32, 16>& regs,
|
||||
const std::array<u32, 64>& vecs,
|
||||
const std::vector<typename TestEnv::InstructionType>& instructions,
|
||||
const u32 cpsr,
|
||||
const u32 fpscr,
|
||||
const size_t ticks_left) {
|
||||
void RunTestInstance(Dynarmic::A32::Jit& jit,
|
||||
TestEnv& jit_env,
|
||||
const std::array<u32, 16>& regs,
|
||||
const std::array<u32, 64>& vecs,
|
||||
const std::vector<typename TestEnv::InstructionType>& instructions,
|
||||
const u32 cpsr,
|
||||
const u32 fpscr,
|
||||
const size_t ticks_left) {
|
||||
const u32 initial_pc = regs[15];
|
||||
const u32 num_words = initial_pc / sizeof(typename TestEnv::InstructionType);
|
||||
const u32 code_mem_size = num_words + static_cast<u32>(instructions.size());
|
||||
|
@ -294,37 +422,37 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit,
|
|||
jit.Run();
|
||||
}
|
||||
|
||||
fmt::print("instructions: ");
|
||||
fmt::print("instructions:");
|
||||
for (auto instruction : instructions) {
|
||||
if constexpr (sizeof(decltype(instruction)) == 2) {
|
||||
fmt::print("{:04x} ", instruction);
|
||||
fmt::print(" {:04x}", instruction);
|
||||
} else {
|
||||
fmt::print("{:08x} ", instruction);
|
||||
fmt::print(" {:08x}", instruction);
|
||||
}
|
||||
}
|
||||
fmt::print("\n");
|
||||
|
||||
fmt::print("initial_regs: ");
|
||||
fmt::print("initial_regs:");
|
||||
for (u32 i : regs) {
|
||||
fmt::print("{:08x} ", i);
|
||||
fmt::print(" {:08x}", i);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("initial_vecs: ");
|
||||
fmt::print("initial_vecs:");
|
||||
for (u32 i : vecs) {
|
||||
fmt::print("{:08x} ", i);
|
||||
fmt::print(" {:08x}", i);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("initial_cpsr: {:08x}\n", cpsr);
|
||||
fmt::print("initial_fpcr: {:08x}\n", fpscr);
|
||||
|
||||
fmt::print("final_regs: ");
|
||||
fmt::print("final_regs:");
|
||||
for (u32 i : jit.Regs()) {
|
||||
fmt::print("{:08x} ", i);
|
||||
fmt::print(" {:08x}", i);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("final_vecs: ");
|
||||
fmt::print("final_vecs:");
|
||||
for (u32 i : jit.ExtRegs()) {
|
||||
fmt::print("{:08x} ", i);
|
||||
fmt::print(" {:08x}", i);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("final_cpsr: {:08x}\n", jit.Cpsr());
|
||||
|
@ -343,11 +471,104 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit,
|
|||
|
||||
fmt::print("===\n");
|
||||
}
|
||||
|
||||
Dynarmic::A64::UserConfig GetA64UserConfig(A64TestEnv& jit_env) {
|
||||
Dynarmic::A64::UserConfig jit_user_config{&jit_env};
|
||||
jit_user_config.optimizations &= ~OptimizationFlag::FastDispatch;
|
||||
// The below corresponds to the settings for qemu's aarch64_max_initfn
|
||||
jit_user_config.dczid_el0 = 7;
|
||||
jit_user_config.ctr_el0 = 0x80038003;
|
||||
return jit_user_config;
|
||||
}
|
||||
|
||||
template<size_t num_jit_reruns = 1>
|
||||
void RunTestInstance(Dynarmic::A64::Jit& jit,
|
||||
A64TestEnv& jit_env,
|
||||
const std::array<u64, 31>& regs,
|
||||
const std::array<std::array<u64, 2>, 32>& vecs,
|
||||
const std::vector<u32>& instructions,
|
||||
const u32 pstate,
|
||||
const u32 fpcr,
|
||||
const u64 initial_sp,
|
||||
const u64 start_address,
|
||||
const size_t ticks_left) {
|
||||
jit.ClearCache();
|
||||
|
||||
for (size_t jit_rerun_count = 0; jit_rerun_count < num_jit_reruns; ++jit_rerun_count) {
|
||||
jit_env.code_mem = instructions;
|
||||
jit_env.code_mem.emplace_back(0x14000000); // B .
|
||||
jit_env.code_mem_start_address = start_address;
|
||||
jit_env.modified_memory.clear();
|
||||
jit_env.interrupts.clear();
|
||||
|
||||
jit.SetRegisters(regs);
|
||||
jit.SetVectors(vecs);
|
||||
jit.SetPC(start_address);
|
||||
jit.SetSP(initial_sp);
|
||||
jit.SetFpcr(fpcr);
|
||||
jit.SetFpsr(0);
|
||||
jit.SetPstate(pstate);
|
||||
jit.ClearCache();
|
||||
|
||||
jit_env.ticks_left = ticks_left;
|
||||
jit.Run();
|
||||
}
|
||||
|
||||
fmt::print("instructions:");
|
||||
for (u32 instruction : instructions) {
|
||||
fmt::print(" {:08x}", instruction);
|
||||
}
|
||||
fmt::print("\n");
|
||||
|
||||
fmt::print("initial_regs:");
|
||||
for (u64 i : regs) {
|
||||
fmt::print(" {:016x}", i);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("initial_vecs:");
|
||||
for (auto i : vecs) {
|
||||
fmt::print(" {:016x}:{:016x}", i[0], i[1]);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("initial_sp: {:016x}\n", initial_sp);
|
||||
fmt::print("initial_pstate: {:08x}\n", pstate);
|
||||
fmt::print("initial_fpcr: {:08x}\n", fpcr);
|
||||
|
||||
fmt::print("final_regs:");
|
||||
for (u64 i : jit.GetRegisters()) {
|
||||
fmt::print(" {:016x}", i);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("final_vecs:");
|
||||
for (auto i : jit.GetVectors()) {
|
||||
fmt::print(" {:016x}:{:016x}", i[0], i[1]);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("final_sp: {:016x}\n", jit.GetSP());
|
||||
fmt::print("final_pc: {:016x}\n", jit.GetPC());
|
||||
fmt::print("final_pstate: {:08x}\n", jit.GetPstate());
|
||||
fmt::print("final_fpcr: {:08x}\n", jit.GetFpcr());
|
||||
fmt::print("final_qc : {}\n", FP::FPSR{jit.GetFpsr()}.QC());
|
||||
|
||||
fmt::print("mod_mem:");
|
||||
for (auto [addr, value] : jit_env.modified_memory) {
|
||||
fmt::print(" {:08x}:{:02x}", addr, value);
|
||||
}
|
||||
fmt::print("\n");
|
||||
|
||||
fmt::print("interrupts:\n");
|
||||
for (const auto& i : jit_env.interrupts) {
|
||||
std::puts(i.c_str());
|
||||
}
|
||||
|
||||
fmt::print("===\n");
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void TestThumb(size_t num_instructions, size_t num_iterations) {
|
||||
ThumbTestEnv jit_env{};
|
||||
Dynarmic::A32::Jit jit{GetUserConfig(jit_env)};
|
||||
Dynarmic::A32::Jit jit{GetA32UserConfig(jit_env)};
|
||||
|
||||
std::array<u32, 16> regs;
|
||||
std::array<u32, 64> ext_reg;
|
||||
|
@ -374,7 +595,7 @@ void TestThumb(size_t num_instructions, size_t num_iterations) {
|
|||
|
||||
void TestArm(size_t num_instructions, size_t num_iterations) {
|
||||
ArmTestEnv jit_env{};
|
||||
Dynarmic::A32::Jit jit{GetUserConfig(jit_env)};
|
||||
Dynarmic::A32::Jit jit{GetA32UserConfig(jit_env)};
|
||||
|
||||
std::array<u32, 16> regs;
|
||||
std::array<u32, 64> ext_reg;
|
||||
|
@ -394,19 +615,76 @@ void TestArm(size_t num_instructions, size_t num_iterations) {
|
|||
}
|
||||
|
||||
regs[15] = start_address;
|
||||
RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, 1);
|
||||
RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, num_instructions);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int, char*[]) {
|
||||
detail::g_rand_int_generator.seed(42069);
|
||||
void TestA64(size_t num_instructions, size_t num_iterations) {
|
||||
A64TestEnv jit_env{};
|
||||
Dynarmic::A64::Jit jit{GetA64UserConfig(jit_env)};
|
||||
|
||||
TestThumb(1, 100000);
|
||||
TestArm(1, 100000);
|
||||
TestThumb(5, 100000);
|
||||
TestArm(5, 100000);
|
||||
TestThumb(1024, 10000);
|
||||
TestArm(1024, 10000);
|
||||
std::array<u64, 31> regs;
|
||||
std::array<std::array<u64, 2>, 32> vecs;
|
||||
std::vector<u32> instructions;
|
||||
|
||||
for (size_t iteration = 0; iteration < num_iterations; ++iteration) {
|
||||
std::generate(regs.begin(), regs.end(), [] { return RandInt<u64>(0, ~u64(0)); });
|
||||
std::generate(vecs.begin(), vecs.end(), RandomVector);
|
||||
|
||||
const u32 start_address = 100;
|
||||
const u32 pstate = (RandInt<u32>(0, 0xF) << 28);
|
||||
const u32 fpcr = RandomFpcr();
|
||||
const u64 initial_sp = RandInt<u64>(0x30'0000'0000, 0x40'0000'0000) * 4;
|
||||
|
||||
instructions.clear();
|
||||
for (size_t i = 0; i < num_instructions; ++i) {
|
||||
instructions.emplace_back(GenRandomA64Inst(static_cast<u32>(start_address + 4 * instructions.size()), i == num_instructions - 1));
|
||||
}
|
||||
|
||||
RunTestInstance(jit, jit_env, regs, vecs, instructions, pstate, fpcr, initial_sp, start_address, num_instructions);
|
||||
}
|
||||
}
|
||||
|
||||
static std::optional<size_t> str2sz(char const* s) {
|
||||
char* end = nullptr;
|
||||
errno = 0;
|
||||
|
||||
const long l = std::strtol(s, &end, 10);
|
||||
if (errno == ERANGE || l < 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
if (*s == '\0' || *end != '\0') {
|
||||
return std::nullopt;
|
||||
}
|
||||
return static_cast<size_t>(l);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc != 5) {
|
||||
fmt::print("Usage: {} <thumb|arm|a64> <seed> <instruction_count> <iteration_count>\n", argv[0]);
|
||||
}
|
||||
|
||||
const auto seed = str2sz(argv[2]);
|
||||
const auto instruction_count = str2sz(argv[3]);
|
||||
const auto iterator_count = str2sz(argv[4]);
|
||||
|
||||
if (!seed || !instruction_count || !iterator_count) {
|
||||
fmt::print("invalid numeric arguments\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
detail::g_rand_int_generator.seed(static_cast<std::mt19937::result_type>(*seed));
|
||||
|
||||
if (strcmp(argv[1], "thumb") == 0) {
|
||||
TestThumb(*instruction_count, *iterator_count);
|
||||
} else if (strcmp(argv[1], "arm") == 0) {
|
||||
TestArm(*instruction_count, *iterator_count);
|
||||
} else if (strcmp(argv[1], "a64") == 0) {
|
||||
TestA64(*instruction_count, *iterator_count);
|
||||
} else {
|
||||
fmt::print("unrecognized instruction class\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue