emit_x64: Fix bugs in fast dispatcher

* We failed to invalidate entries if there are no patches required for a location descriptor.
* Bug in A64 hashing code (rbx instead of rbp).
* Bug in A32 and A64 lookup code (inconsistent choice of key: PC vs IR::LocationDescriptor).
* Test case added.
This commit is contained in:
Merry 2022-07-11 15:57:14 +01:00
parent 6243e5a90e
commit cd85b7fdaa
5 changed files with 127 additions and 9 deletions

View file

@ -235,8 +235,9 @@ void A32EmitX64::GenTerminalHandlers() {
calculate_location_descriptor(); calculate_location_descriptor();
code.L(rsb_cache_miss); code.L(rsb_cache_miss);
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data())); code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
code.mov(rbp, rbx);
if (code.HasHostFeature(HostFeature::SSE42)) { if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(ebp, r12d); code.crc32(rbp, r12);
} }
code.and_(ebp, fast_dispatch_table_mask); code.and_(ebp, fast_dispatch_table_mask);
code.lea(rbp, ptr[r12 + rbp]); code.lea(rbp, ptr[r12 + rbp]);
@ -254,11 +255,12 @@ void A32EmitX64::GenTerminalHandlers() {
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>(); fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>();
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data())); code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
if (code.HasHostFeature(HostFeature::SSE42)) { if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(code.ABI_PARAM1.cvt32(), code.ABI_PARAM2.cvt32()); code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
} }
code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask); code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask);
code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]); code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]);
code.ret(); code.ret();
PerfMapRegister(fast_dispatch_table_lookup, code.getCurr(), "a32_fast_dispatch_table_lookup");
} }
} }

View file

@ -193,8 +193,9 @@ void A64EmitX64::GenTerminalHandlers() {
calculate_location_descriptor(); calculate_location_descriptor();
code.L(rsb_cache_miss); code.L(rsb_cache_miss);
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data())); code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
code.mov(rbp, rbx);
if (code.HasHostFeature(HostFeature::SSE42)) { if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(rbx, r12d); code.crc32(rbp, r12);
} }
code.and_(ebp, fast_dispatch_table_mask); code.and_(ebp, fast_dispatch_table_mask);
code.lea(rbp, ptr[r12 + rbp]); code.lea(rbp, ptr[r12 + rbp]);
@ -215,7 +216,7 @@ void A64EmitX64::GenTerminalHandlers() {
code.crc32(code.ABI_PARAM1, code.ABI_PARAM2); code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
} }
code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask); code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask);
code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]); code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM2 + code.ABI_PARAM1]);
code.ret(); code.ret();
PerfMapRegister(fast_dispatch_table_lookup, code.getCurr(), "a64_fast_dispatch_table_lookup"); PerfMapRegister(fast_dispatch_table_lookup, code.getCurr(), "a64_fast_dispatch_table_lookup");
} }

View file

@ -325,7 +325,9 @@ void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_co
} }
void EmitX64::Unpatch(const IR::LocationDescriptor& target_desc) { void EmitX64::Unpatch(const IR::LocationDescriptor& target_desc) {
if (patch_information.count(target_desc)) {
Patch(target_desc, nullptr); Patch(target_desc, nullptr);
}
} }
void EmitX64::ClearCache() { void EmitX64::ClearCache() {
@ -345,9 +347,8 @@ void EmitX64::InvalidateBasicBlocks(const tsl::robin_set<IR::LocationDescriptor>
continue; continue;
} }
if (patch_information.count(descriptor)) {
Unpatch(descriptor); Unpatch(descriptor);
}
block_descriptors.erase(it); block_descriptors.erase(it);
} }
} }

View file

@ -0,0 +1,113 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <catch2/catch.hpp>
#include "./testenv.h"
#include "dynarmic/interface/A64/a64.h"
using namespace Dynarmic;
TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have any patching requirements", "[a64]") {
A64TestEnv env;
A64::UserConfig conf{&env};
A64::Jit jit{conf};
REQUIRE(conf.HasOptimization(OptimizationFlag::FastDispatch));
env.code_mem_start_address = 100;
env.code_mem.clear();
env.code_mem.emplace_back(0xd2800d80); // MOV X0, 108
env.code_mem.emplace_back(0xd61f0000); // BR X0
env.code_mem.emplace_back(0xd2800540); // MOV X0, 42
env.code_mem.emplace_back(0x14000000); // B .
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.InvalidateCacheRange(108, 4);
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
env.code_mem[2] = 0xd28008a0; // MOV X0, 69
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.InvalidateCacheRange(108, 4);
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 69);
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 69);
}
TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have any patching requirements 2", "[a64]") {
A64TestEnv env;
A64::UserConfig conf{&env};
A64::Jit jit{conf};
REQUIRE(conf.HasOptimization(OptimizationFlag::FastDispatch));
env.code_mem.emplace_back(0xd2800100); // MOV X0, 8
env.code_mem.emplace_back(0xd61f0000); // BR X0
env.code_mem.emplace_back(0xd2800540); // MOV X0, 42
env.code_mem.emplace_back(0x14000000); // B .
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.InvalidateCacheRange(8, 4);
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
env.code_mem[2] = 0xd28008a0; // MOV X0, 69
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.InvalidateCacheRange(8, 4);
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 69);
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 69);
}

View file

@ -20,6 +20,8 @@ endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS) if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic_tests PRIVATE target_sources(dynarmic_tests PRIVATE
A64/a64.cpp A64/a64.cpp
A64/misaligned_page_table.cpp
A64/test_invalidation.cpp
A64/testenv.h A64/testenv.h
) )
endif() endif()
@ -44,7 +46,6 @@ if (DYNARMIC_TESTS_USE_UNICORN)
if ("A64" IN_LIST DYNARMIC_FRONTENDS) if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic_tests PRIVATE target_sources(dynarmic_tests PRIVATE
A64/fuzz_with_unicorn.cpp A64/fuzz_with_unicorn.cpp
A64/misaligned_page_table.cpp
A64/verify_unicorn.cpp A64/verify_unicorn.cpp
unicorn_emu/a64_unicorn.cpp unicorn_emu/a64_unicorn.cpp
unicorn_emu/a64_unicorn.h unicorn_emu/a64_unicorn.h