Optimization: Implement Return Stack Buffer
This commit is contained in:
parent
8e68e6fdd9
commit
960d14d18e
18 changed files with 167 additions and 31 deletions
|
@ -15,7 +15,7 @@ using namespace Gen;
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
namespace BackendX64 {
|
namespace BackendX64 {
|
||||||
|
|
||||||
BlockOfCode::BlockOfCode() {
|
BlockOfCode::BlockOfCode() : Gen::XCodeBlock() {
|
||||||
AllocCodeSpace(128 * 1024 * 1024);
|
AllocCodeSpace(128 * 1024 * 1024);
|
||||||
ClearCache(false);
|
ClearCache(false);
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,7 @@ void BlockOfCode::ClearCache(bool poison_memory) {
|
||||||
|
|
||||||
GenConstants();
|
GenConstants();
|
||||||
GenRunCode();
|
GenRunCode();
|
||||||
|
GenReturnFromRunCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cycles_to_run) const {
|
size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cycles_to_run) const {
|
||||||
|
@ -41,11 +42,7 @@ size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cyc
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlockOfCode::ReturnFromRunCode(bool MXCSR_switch) {
|
void BlockOfCode::ReturnFromRunCode(bool MXCSR_switch) {
|
||||||
if (MXCSR_switch)
|
JMP(MXCSR_switch ? return_from_run_code : return_from_run_code_without_mxcsr_switch, true);
|
||||||
SwitchMxcsrOnExit();
|
|
||||||
|
|
||||||
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
|
||||||
RET();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlockOfCode::GenConstants() {
|
void BlockOfCode::GenConstants() {
|
||||||
|
@ -80,6 +77,17 @@ void BlockOfCode::GenRunCode() {
|
||||||
JMPptr(R(ABI_PARAM2));
|
JMPptr(R(ABI_PARAM2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::GenReturnFromRunCode() {
|
||||||
|
return_from_run_code = GetCodePtr();
|
||||||
|
|
||||||
|
SwitchMxcsrOnExit();
|
||||||
|
|
||||||
|
return_from_run_code_without_mxcsr_switch = GetCodePtr();
|
||||||
|
|
||||||
|
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
||||||
|
RET();
|
||||||
|
}
|
||||||
|
|
||||||
void BlockOfCode::SwitchMxcsrOnEntry() {
|
void BlockOfCode::SwitchMxcsrOnEntry() {
|
||||||
STMXCSR(MDisp(R15, offsetof(JitState, save_host_MXCSR)));
|
STMXCSR(MDisp(R15, offsetof(JitState, save_host_MXCSR)));
|
||||||
LDMXCSR(MDisp(R15, offsetof(JitState, guest_MXCSR)));
|
LDMXCSR(MDisp(R15, offsetof(JitState, guest_MXCSR)));
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
#include "backend_x64/jitstate.h"
|
#include "backend_x64/jitstate.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/x64/emitter.h"
|
#include "common/x64/emitter.h"
|
||||||
|
@ -51,6 +53,10 @@ public:
|
||||||
return Gen::M(const_FloatPenultimatePositiveDenormal64);
|
return Gen::M(const_FloatPenultimatePositiveDenormal64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CodePtr GetReturnFromRunCodeAddress() const {
|
||||||
|
return return_from_run_code;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const u8* const_FloatNegativeZero32;
|
const u8* const_FloatNegativeZero32;
|
||||||
const u8* const_FloatNaN32;
|
const u8* const_FloatNaN32;
|
||||||
|
@ -64,6 +70,10 @@ private:
|
||||||
using RunCodeFuncType = void(*)(JitState*, CodePtr);
|
using RunCodeFuncType = void(*)(JitState*, CodePtr);
|
||||||
RunCodeFuncType run_code;
|
RunCodeFuncType run_code;
|
||||||
void GenRunCode();
|
void GenRunCode();
|
||||||
|
|
||||||
|
CodePtr return_from_run_code;
|
||||||
|
CodePtr return_from_run_code_without_mxcsr_switch;
|
||||||
|
void GenReturnFromRunCode();
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace BackendX64
|
} // namespace BackendX64
|
||||||
|
|
|
@ -67,6 +67,7 @@ EmitX64::BlockDescriptor EmitX64::Emit(const Arm::LocationDescriptor descriptor,
|
||||||
code->INT3();
|
code->INT3();
|
||||||
const CodePtr code_ptr = code->GetCodePtr();
|
const CodePtr code_ptr = code->GetCodePtr();
|
||||||
basic_blocks[descriptor].code_ptr = code_ptr;
|
basic_blocks[descriptor].code_ptr = code_ptr;
|
||||||
|
unique_hash_to_code_ptr[descriptor.UniqueHash()] = code_ptr;
|
||||||
|
|
||||||
EmitCondPrelude(block.cond, block.cond_failed, block.location);
|
EmitCondPrelude(block.cond, block.cond_failed, block.location);
|
||||||
|
|
||||||
|
@ -328,6 +329,27 @@ void EmitX64::EmitCallSupervisor(IR::Block&, IR::Inst* inst) {
|
||||||
code->SwitchMxcsrOnEntry();
|
code->SwitchMxcsrOnEntry();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
|
||||||
|
ASSERT(inst->GetArg(0).IsImmediate());
|
||||||
|
u64 imm64 = inst->GetArg(0).GetU64();
|
||||||
|
|
||||||
|
X64Reg tmp = reg_alloc.ScratchRegister({HostLoc::RCX});
|
||||||
|
X64Reg rsb_index = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
|
||||||
|
? u64(unique_hash_to_code_ptr[imm64])
|
||||||
|
: u64(code->GetReturnFromRunCodeAddress());
|
||||||
|
|
||||||
|
code->MOV(32, R(rsb_index), MDisp(R15, offsetof(JitState, rsb_ptr)));
|
||||||
|
code->AND(32, R(rsb_index), Imm32(u32(JitState::RSBSize - 1)));
|
||||||
|
code->MOV(64, R(tmp), Imm64(imm64));
|
||||||
|
code->MOV(64, MComplex(R15, rsb_index, SCALE_1, offsetof(JitState, rsb_location_descriptors)), R(tmp));
|
||||||
|
patch_unique_hash_locations[imm64].emplace_back(code->GetCodePtr());
|
||||||
|
code->MOV(64, R(tmp), Imm64(code_ptr)); // This line has to match up with EmitX64::Patch.
|
||||||
|
code->MOV(64, MComplex(R15, rsb_index, SCALE_1, offsetof(JitState, rsb_codeptrs)), R(tmp));
|
||||||
|
code->ADD(32, R(rsb_index), Imm32(1));
|
||||||
|
code->MOV(32, MDisp(R15, offsetof(JitState, rsb_ptr)), R(rsb_index));
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitGetCarryFromOp(IR::Block&, IR::Inst*) {
|
void EmitX64::EmitGetCarryFromOp(IR::Block&, IR::Inst*) {
|
||||||
ASSERT_MSG(0, "should never happen");
|
ASSERT_MSG(0, "should never happen");
|
||||||
}
|
}
|
||||||
|
@ -1696,7 +1718,22 @@ void EmitX64::EmitTerminalLinkBlockFast(IR::Term::LinkBlockFast terminal, Arm::L
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, Arm::LocationDescriptor initial_location) {
|
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, Arm::LocationDescriptor initial_location) {
|
||||||
EmitTerminalReturnToDispatch({}, initial_location); // TODO: Implement RSB
|
// This calculation has to match up with IREmitter::PushRSB
|
||||||
|
code->MOV(32, R(RBX), MJitStateCpsr());
|
||||||
|
code->MOV(32, R(RCX), MJitStateReg(Arm::Reg::PC));
|
||||||
|
code->AND(32, R(RBX), Imm32((1 << 5) | (1 << 9)));
|
||||||
|
code->SHL(32, R(RBX), Imm8(2));
|
||||||
|
code->OR(32, R(RBX), MDisp(R15, offsetof(JitState, guest_FPSCR_mode)));
|
||||||
|
code->SHR(64, R(RBX), Imm8(32));
|
||||||
|
code->OR(64, R(RBX), R(RCX));
|
||||||
|
|
||||||
|
code->MOV(64, R(RAX), Imm64(u64(code->GetReturnFromRunCodeAddress())));
|
||||||
|
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||||
|
code->CMP(64, R(RBX), MDisp(R15, int(offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64))));
|
||||||
|
code->CMOVcc(64, RAX, MDisp(R15, int(offsetof(JitState, rsb_codeptrs) + i * sizeof(u64))), CC_E);
|
||||||
|
}
|
||||||
|
code->SUB(32, MDisp(R15, offsetof(JitState, rsb_ptr)), Imm32(1));
|
||||||
|
code->JMPptr(R(RAX));
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitTerminalIf(IR::Term::If terminal, Arm::LocationDescriptor initial_location) {
|
void EmitX64::EmitTerminalIf(IR::Term::If terminal, Arm::LocationDescriptor initial_location) {
|
||||||
|
@ -1716,6 +1753,11 @@ void EmitX64::Patch(Arm::LocationDescriptor desc, CodePtr bb) {
|
||||||
ASSERT(code->GetCodePtr() - location == 6);
|
ASSERT(code->GetCodePtr() - location == 6);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (CodePtr location : patch_unique_hash_locations[desc.UniqueHash()]) {
|
||||||
|
code->SetCodePtr(const_cast<u8*>(location));
|
||||||
|
code->MOV(64, R(RCX), Imm64(u64(bb)));
|
||||||
|
}
|
||||||
|
|
||||||
code->SetCodePtr(save_code_ptr);
|
code->SetCodePtr(save_code_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -74,6 +74,8 @@ private:
|
||||||
BlockOfCode* code;
|
BlockOfCode* code;
|
||||||
UserCallbacks cb;
|
UserCallbacks cb;
|
||||||
Jit* jit_interface;
|
Jit* jit_interface;
|
||||||
|
std::unordered_map<u64, CodePtr> unique_hash_to_code_ptr;
|
||||||
|
std::unordered_map<u64, std::vector<CodePtr>> patch_unique_hash_locations;
|
||||||
std::unordered_map<Arm::LocationDescriptor, BlockDescriptor, Arm::LocationDescriptorHash> basic_blocks;
|
std::unordered_map<Arm::LocationDescriptor, BlockDescriptor, Arm::LocationDescriptorHash> basic_blocks;
|
||||||
std::unordered_map<Arm::LocationDescriptor, std::vector<CodePtr>, Arm::LocationDescriptorHash> patch_jg_locations;
|
std::unordered_map<Arm::LocationDescriptor, std::vector<CodePtr>, Arm::LocationDescriptorHash> patch_jg_locations;
|
||||||
};
|
};
|
||||||
|
|
|
@ -29,10 +29,15 @@ namespace Dynarmic {
|
||||||
using namespace BackendX64;
|
using namespace BackendX64;
|
||||||
|
|
||||||
struct Jit::Impl {
|
struct Jit::Impl {
|
||||||
Impl(Jit* jit, UserCallbacks callbacks) : emitter(&block_of_code, callbacks, jit), callbacks(callbacks) {}
|
Impl(Jit* jit, UserCallbacks callbacks)
|
||||||
|
: block_of_code()
|
||||||
|
, jit_state(&block_of_code)
|
||||||
|
, emitter(&block_of_code, callbacks, jit)
|
||||||
|
, callbacks(callbacks)
|
||||||
|
{}
|
||||||
|
|
||||||
JitState jit_state{};
|
BlockOfCode block_of_code;
|
||||||
BlockOfCode block_of_code{};
|
JitState jit_state;
|
||||||
EmitX64 emitter;
|
EmitX64 emitter;
|
||||||
const UserCallbacks callbacks;
|
const UserCallbacks callbacks;
|
||||||
|
|
||||||
|
@ -41,7 +46,7 @@ struct Jit::Impl {
|
||||||
bool TFlag = Common::Bit<5>(jit_state.Cpsr);
|
bool TFlag = Common::Bit<5>(jit_state.Cpsr);
|
||||||
bool EFlag = Common::Bit<9>(jit_state.Cpsr);
|
bool EFlag = Common::Bit<9>(jit_state.Cpsr);
|
||||||
|
|
||||||
Arm::LocationDescriptor descriptor{pc, TFlag, EFlag, jit_state.guest_FPSCR_flags};
|
Arm::LocationDescriptor descriptor{pc, TFlag, EFlag, jit_state.guest_FPSCR_mode};
|
||||||
|
|
||||||
CodePtr code_ptr = GetBasicBlock(descriptor).code_ptr;
|
CodePtr code_ptr = GetBasicBlock(descriptor).code_ptr;
|
||||||
return block_of_code.RunCode(&jit_state, code_ptr, cycle_count);
|
return block_of_code.RunCode(&jit_state, code_ptr, cycle_count);
|
||||||
|
@ -121,11 +126,12 @@ void Jit::ClearCache(bool poison_memory) {
|
||||||
ASSERT(!is_executing);
|
ASSERT(!is_executing);
|
||||||
impl->block_of_code.ClearCache(poison_memory);
|
impl->block_of_code.ClearCache(poison_memory);
|
||||||
impl->emitter.ClearCache();
|
impl->emitter.ClearCache();
|
||||||
|
impl->jit_state.ResetRSB(&impl->block_of_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit::Reset() {
|
void Jit::Reset() {
|
||||||
ASSERT(!is_executing);
|
ASSERT(!is_executing);
|
||||||
impl->jit_state = {};
|
impl->jit_state = JitState(&impl->block_of_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit::HaltExecution() {
|
void Jit::HaltExecution() {
|
||||||
|
|
|
@ -4,14 +4,22 @@
|
||||||
* General Public License version 2 or any later version.
|
* General Public License version 2 or any later version.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "backend_x64/block_of_code.h"
|
||||||
#include "backend_x64/jitstate.h"
|
#include "backend_x64/jitstate.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/bit_util.h"
|
#include "common/bit_util.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "frontend/arm_types.h"
|
||||||
|
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
namespace BackendX64 {
|
namespace BackendX64 {
|
||||||
|
|
||||||
|
void JitState::ResetRSB(BlockOfCode* code) {
|
||||||
|
for (auto& value : rsb_codeptrs) {
|
||||||
|
value = u64(code->GetReturnFromRunCodeAddress());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comparing MXCSR and FPSCR
|
* Comparing MXCSR and FPSCR
|
||||||
* =========================
|
* =========================
|
||||||
|
@ -68,14 +76,16 @@ namespace BackendX64 {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// NZCV; QC (ASMID only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
|
// NZCV; QC (ASMID only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
|
||||||
constexpr u32 FPSCR_MASK = 0b1111'00'111111'0'111'10011111'00000000;
|
constexpr u32 FPSCR_MODE_MASK = Arm::LocationDescriptor::FPSCR_MODE_MASK;
|
||||||
|
constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
|
||||||
|
|
||||||
u32 JitState::Fpscr() const {
|
u32 JitState::Fpscr() const {
|
||||||
ASSERT((guest_FPSCR_flags & ~FPSCR_MASK) == 0);
|
ASSERT((guest_FPSCR_mode & ~FPSCR_MODE_MASK) == 0);
|
||||||
|
ASSERT((guest_FPSCR_nzcv & ~FPSCR_NZCV_MASK) == 0);
|
||||||
ASSERT((FPSCR_IDC & ~(1 << 7)) == 0);
|
ASSERT((FPSCR_IDC & ~(1 << 7)) == 0);
|
||||||
ASSERT((FPSCR_UFC & ~(1 << 3)) == 0);
|
ASSERT((FPSCR_UFC & ~(1 << 3)) == 0);
|
||||||
|
|
||||||
u32 FPSCR = guest_FPSCR_flags;
|
u32 FPSCR = guest_FPSCR_mode | guest_FPSCR_nzcv;
|
||||||
FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE
|
FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE
|
||||||
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
||||||
FPSCR |= FPSCR_IDC;
|
FPSCR |= FPSCR_IDC;
|
||||||
|
@ -86,7 +96,8 @@ u32 JitState::Fpscr() const {
|
||||||
|
|
||||||
void JitState::SetFpscr(u32 FPSCR) {
|
void JitState::SetFpscr(u32 FPSCR) {
|
||||||
old_FPSCR = FPSCR;
|
old_FPSCR = FPSCR;
|
||||||
guest_FPSCR_flags = FPSCR & FPSCR_MASK;
|
guest_FPSCR_mode = FPSCR & FPSCR_MODE_MASK;
|
||||||
|
guest_FPSCR_nzcv = FPSCR & FPSCR_NZCV_MASK;
|
||||||
guest_MXCSR = 0;
|
guest_MXCSR = 0;
|
||||||
|
|
||||||
// Exception masks / enables
|
// Exception masks / enables
|
||||||
|
@ -114,6 +125,5 @@ void JitState::SetFpscr(u32 FPSCR) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
} // namespace BackendX64
|
} // namespace BackendX64
|
||||||
} // namespace Dynarmic
|
} // namespace Dynarmic
|
||||||
|
|
|
@ -13,9 +13,13 @@
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
namespace BackendX64 {
|
namespace BackendX64 {
|
||||||
|
|
||||||
|
class BlockOfCode;
|
||||||
|
|
||||||
constexpr size_t SpillCount = 32;
|
constexpr size_t SpillCount = 32;
|
||||||
|
|
||||||
struct JitState {
|
struct JitState {
|
||||||
|
JitState(BlockOfCode* code) { ResetRSB(code); }
|
||||||
|
|
||||||
u32 Cpsr = 0;
|
u32 Cpsr = 0;
|
||||||
std::array<u32, 16> Reg{}; // Current register file.
|
std::array<u32, 16> Reg{}; // Current register file.
|
||||||
// TODO: Mode-specific register sets unimplemented.
|
// TODO: Mode-specific register sets unimplemented.
|
||||||
|
@ -34,9 +38,16 @@ struct JitState {
|
||||||
u32 exclusive_state = 0;
|
u32 exclusive_state = 0;
|
||||||
u32 exclusive_address = 0;
|
u32 exclusive_address = 0;
|
||||||
|
|
||||||
|
static constexpr size_t RSBSize = 4; // MUST be a power of 2.
|
||||||
|
u32 rsb_ptr = 0;
|
||||||
|
std::array<u64, RSBSize> rsb_location_descriptors;
|
||||||
|
std::array<u64, RSBSize> rsb_codeptrs;
|
||||||
|
void ResetRSB(BlockOfCode* code);
|
||||||
|
|
||||||
u32 FPSCR_IDC = 0;
|
u32 FPSCR_IDC = 0;
|
||||||
u32 FPSCR_UFC = 0;
|
u32 FPSCR_UFC = 0;
|
||||||
u32 guest_FPSCR_flags = 0;
|
u32 guest_FPSCR_mode = 0;
|
||||||
|
u32 guest_FPSCR_nzcv = 0;
|
||||||
u32 old_FPSCR = 0;
|
u32 old_FPSCR = 0;
|
||||||
u32 Fpscr() const;
|
u32 Fpscr() const;
|
||||||
void SetFpscr(u32 FPSCR);
|
void SetFpscr(u32 FPSCR);
|
||||||
|
|
|
@ -45,7 +45,7 @@ static Gen::X64Reg HostLocToX64(HostLoc loc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static Gen::OpArg SpillToOpArg(HostLoc loc) {
|
static Gen::OpArg SpillToOpArg(HostLoc loc) {
|
||||||
static_assert(std::is_same<decltype(JitState{}.Spill[0]), u64&>::value, "Spill must be u64");
|
static_assert(std::is_same<decltype(JitState{nullptr}.Spill[0]), u64&>::value, "Spill must be u64");
|
||||||
DEBUG_ASSERT(HostLocIsSpill(loc));
|
DEBUG_ASSERT(HostLocIsSpill(loc));
|
||||||
|
|
||||||
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
||||||
|
|
|
@ -71,10 +71,10 @@ enum class SignExtendRotation {
|
||||||
* tells us if the processor is in Thumb or Arm mode.
|
* tells us if the processor is in Thumb or Arm mode.
|
||||||
*/
|
*/
|
||||||
struct LocationDescriptor {
|
struct LocationDescriptor {
|
||||||
static constexpr u32 FPSCR_MASK = 0x3F79F9F;
|
static constexpr u32 FPSCR_MODE_MASK = 0x03F79F00;
|
||||||
|
|
||||||
LocationDescriptor(u32 arm_pc, bool tflag, bool eflag, u32 fpscr)
|
LocationDescriptor(u32 arm_pc, bool tflag, bool eflag, u32 fpscr)
|
||||||
: arm_pc(arm_pc), tflag(tflag), eflag(eflag), fpscr(fpscr & FPSCR_MASK) {}
|
: arm_pc(arm_pc), tflag(tflag), eflag(eflag), fpscr(fpscr & FPSCR_MODE_MASK) {}
|
||||||
|
|
||||||
u32 PC() const { return arm_pc; }
|
u32 PC() const { return arm_pc; }
|
||||||
bool TFlag() const { return tflag; }
|
bool TFlag() const { return tflag; }
|
||||||
|
@ -106,7 +106,17 @@ struct LocationDescriptor {
|
||||||
}
|
}
|
||||||
|
|
||||||
LocationDescriptor SetFPSCR(u32 new_fpscr) const {
|
LocationDescriptor SetFPSCR(u32 new_fpscr) const {
|
||||||
return LocationDescriptor(arm_pc, tflag, eflag, new_fpscr & FPSCR_MASK);
|
return LocationDescriptor(arm_pc, tflag, eflag, new_fpscr & FPSCR_MODE_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 UniqueHash() const {
|
||||||
|
// This value MUST BE UNIQUE.
|
||||||
|
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
|
||||||
|
u64 pc_u64 = u64(arm_pc);
|
||||||
|
u64 fpscr_u64 = u64(fpscr) << 32;
|
||||||
|
u64 t_u64 = tflag ? (1ull << 35) : 0;
|
||||||
|
u64 e_u64 = eflag ? (1ull << 39) : 0;
|
||||||
|
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -118,10 +128,7 @@ private:
|
||||||
|
|
||||||
struct LocationDescriptorHash {
|
struct LocationDescriptorHash {
|
||||||
size_t operator()(const LocationDescriptor& x) const {
|
size_t operator()(const LocationDescriptor& x) const {
|
||||||
return std::hash<u64>()(static_cast<u64>(x.PC())
|
return std::hash<u64>()(x.UniqueHash());
|
||||||
^ static_cast<u64>(x.TFlag())
|
|
||||||
^ (static_cast<u64>(x.EFlag()) << 1)
|
|
||||||
^ (static_cast<u64>(x.FPSCR()) << 32));
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,10 @@ Value::Value(u32 value) : type(Type::U32) {
|
||||||
inner.imm_u32 = value;
|
inner.imm_u32 = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value::Value(u64 value) : type(Type::U64) {
|
||||||
|
inner.imm_u64 = value;
|
||||||
|
}
|
||||||
|
|
||||||
bool Value::IsImmediate() const {
|
bool Value::IsImmediate() const {
|
||||||
if (type == Type::Opaque)
|
if (type == Type::Opaque)
|
||||||
return inner.inst->GetOpcode() == Opcode::Identity ? inner.inst->GetArg(0).IsImmediate() : false;
|
return inner.inst->GetOpcode() == Opcode::Identity ? inner.inst->GetArg(0).IsImmediate() : false;
|
||||||
|
@ -98,6 +102,13 @@ u32 Value::GetU32() const {
|
||||||
return inner.imm_u32;
|
return inner.imm_u32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 Value::GetU64() const {
|
||||||
|
if (type == Type::Opaque && inner.inst->GetOpcode() == Opcode::Identity)
|
||||||
|
return inner.inst->GetArg(0).GetU64();
|
||||||
|
DEBUG_ASSERT(type == Type::U64);
|
||||||
|
return inner.imm_u64;
|
||||||
|
}
|
||||||
|
|
||||||
// Inst class member definitions
|
// Inst class member definitions
|
||||||
|
|
||||||
Value Inst::GetArg(size_t index) const {
|
Value Inst::GetArg(size_t index) const {
|
||||||
|
|
|
@ -50,6 +50,7 @@ public:
|
||||||
explicit Value(bool value);
|
explicit Value(bool value);
|
||||||
explicit Value(u8 value);
|
explicit Value(u8 value);
|
||||||
explicit Value(u32 value);
|
explicit Value(u32 value);
|
||||||
|
explicit Value(u64 value);
|
||||||
|
|
||||||
bool IsEmpty() const;
|
bool IsEmpty() const;
|
||||||
bool IsImmediate() const;
|
bool IsImmediate() const;
|
||||||
|
@ -61,6 +62,7 @@ public:
|
||||||
bool GetU1() const;
|
bool GetU1() const;
|
||||||
u8 GetU8() const;
|
u8 GetU8() const;
|
||||||
u32 GetU32() const;
|
u32 GetU32() const;
|
||||||
|
u64 GetU64() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Type type;
|
Type type;
|
||||||
|
@ -72,6 +74,7 @@ private:
|
||||||
bool imm_u1;
|
bool imm_u1;
|
||||||
u8 imm_u8;
|
u8 imm_u8;
|
||||||
u32 imm_u32;
|
u32 imm_u32;
|
||||||
|
u64 imm_u64;
|
||||||
} inner;
|
} inner;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -98,6 +98,10 @@ void IREmitter::CallSupervisor(const IR::Value& value) {
|
||||||
Inst(IR::Opcode::CallSupervisor, {value});
|
Inst(IR::Opcode::CallSupervisor, {value});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IREmitter::PushRSB(const LocationDescriptor& return_location) {
|
||||||
|
Inst(IR::Opcode::PushRSB, {IR::Value(return_location.UniqueHash())});
|
||||||
|
}
|
||||||
|
|
||||||
IR::Value IREmitter::GetCFlag() {
|
IR::Value IREmitter::GetCFlag() {
|
||||||
return Inst(IR::Opcode::GetCFlag, {});
|
return Inst(IR::Opcode::GetCFlag, {});
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,6 +54,7 @@ public:
|
||||||
void BXWritePC(const IR::Value& value);
|
void BXWritePC(const IR::Value& value);
|
||||||
void LoadWritePC(const IR::Value& value);
|
void LoadWritePC(const IR::Value& value);
|
||||||
void CallSupervisor(const IR::Value& value);
|
void CallSupervisor(const IR::Value& value);
|
||||||
|
void PushRSB(const LocationDescriptor& return_location);
|
||||||
|
|
||||||
IR::Value GetCFlag();
|
IR::Value GetCFlag();
|
||||||
void SetNFlag(const IR::Value& value);
|
void SetNFlag(const IR::Value& value);
|
||||||
|
|
|
@ -22,6 +22,9 @@ OPCODE(OrQFlag, T::Void, T::U1
|
||||||
OPCODE(BXWritePC, T::Void, T::U32 )
|
OPCODE(BXWritePC, T::Void, T::U32 )
|
||||||
OPCODE(CallSupervisor, T::Void, T::U32 )
|
OPCODE(CallSupervisor, T::Void, T::U32 )
|
||||||
|
|
||||||
|
// Hints
|
||||||
|
OPCODE(PushRSB, T::Void, T::U64 )
|
||||||
|
|
||||||
// Pseudo-operation, handled specially at final emit
|
// Pseudo-operation, handled specially at final emit
|
||||||
OPCODE(GetCarryFromOp, T::U1, T::U32 )
|
OPCODE(GetCarryFromOp, T::U1, T::U32 )
|
||||||
OPCODE(GetOverflowFromOp, T::U1, T::U32 )
|
OPCODE(GetOverflowFromOp, T::U1, T::U32 )
|
||||||
|
|
|
@ -26,6 +26,7 @@ bool ArmTranslatorVisitor::arm_BL(Cond cond, Imm24 imm24) {
|
||||||
u32 imm32 = Common::SignExtend<26, u32>(imm24 << 2) + 8;
|
u32 imm32 = Common::SignExtend<26, u32>(imm24 << 2) + 8;
|
||||||
// BL <label>
|
// BL <label>
|
||||||
if (ConditionPassed(cond)) {
|
if (ConditionPassed(cond)) {
|
||||||
|
ir.PushRSB(ir.current_location.AdvancePC(4));
|
||||||
ir.SetRegister(Reg::LR, ir.Imm32(ir.current_location.PC() + 4));
|
ir.SetRegister(Reg::LR, ir.Imm32(ir.current_location.PC() + 4));
|
||||||
auto new_location = ir.current_location.AdvancePC(imm32);
|
auto new_location = ir.current_location.AdvancePC(imm32);
|
||||||
ir.SetTerm(IR::Term::LinkBlock{ new_location });
|
ir.SetTerm(IR::Term::LinkBlock{ new_location });
|
||||||
|
@ -37,6 +38,7 @@ bool ArmTranslatorVisitor::arm_BL(Cond cond, Imm24 imm24) {
|
||||||
bool ArmTranslatorVisitor::arm_BLX_imm(bool H, Imm24 imm24) {
|
bool ArmTranslatorVisitor::arm_BLX_imm(bool H, Imm24 imm24) {
|
||||||
u32 imm32 = Common::SignExtend<26, u32>((imm24 << 2)) + (H ? 2 : 0) + 8;
|
u32 imm32 = Common::SignExtend<26, u32>((imm24 << 2)) + (H ? 2 : 0) + 8;
|
||||||
// BLX <label>
|
// BLX <label>
|
||||||
|
ir.PushRSB(ir.current_location.AdvancePC(4));
|
||||||
ir.SetRegister(Reg::LR, ir.Imm32(ir.current_location.PC() + 4));
|
ir.SetRegister(Reg::LR, ir.Imm32(ir.current_location.PC() + 4));
|
||||||
auto new_location = ir.current_location.AdvancePC(imm32).SetTFlag(true);
|
auto new_location = ir.current_location.AdvancePC(imm32).SetTFlag(true);
|
||||||
ir.SetTerm(IR::Term::LinkBlock{ new_location });
|
ir.SetTerm(IR::Term::LinkBlock{ new_location });
|
||||||
|
@ -48,6 +50,7 @@ bool ArmTranslatorVisitor::arm_BLX_reg(Cond cond, Reg m) {
|
||||||
return UnpredictableInstruction();
|
return UnpredictableInstruction();
|
||||||
// BLX <Rm>
|
// BLX <Rm>
|
||||||
if (ConditionPassed(cond)) {
|
if (ConditionPassed(cond)) {
|
||||||
|
ir.PushRSB(ir.current_location.AdvancePC(4));
|
||||||
ir.SetRegister(Reg::LR, ir.Imm32(ir.current_location.PC() + 4));
|
ir.SetRegister(Reg::LR, ir.Imm32(ir.current_location.PC() + 4));
|
||||||
ir.BXWritePC(ir.GetRegister(m));
|
ir.BXWritePC(ir.GetRegister(m));
|
||||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||||
|
@ -60,7 +63,10 @@ bool ArmTranslatorVisitor::arm_BX(Cond cond, Reg m) {
|
||||||
// BX <Rm>
|
// BX <Rm>
|
||||||
if (ConditionPassed(cond)) {
|
if (ConditionPassed(cond)) {
|
||||||
ir.BXWritePC(ir.GetRegister(m));
|
ir.BXWritePC(ir.GetRegister(m));
|
||||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
if (m == Reg::R14)
|
||||||
|
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||||
|
else
|
||||||
|
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -44,7 +44,10 @@ bool ArmTranslatorVisitor::arm_LDR_imm(Cond cond, bool P, bool U, bool W, Reg n,
|
||||||
|
|
||||||
if (d == Reg::PC) {
|
if (d == Reg::PC) {
|
||||||
ir.BXWritePC(data);
|
ir.BXWritePC(data);
|
||||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
if (!P && W && n == Reg::R13)
|
||||||
|
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||||
|
else
|
||||||
|
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -413,7 +416,10 @@ static bool LDMHelper(IREmitter& ir, bool W, Reg n, RegList list, IR::Value star
|
||||||
}
|
}
|
||||||
if (Common::Bit<15>(list)) {
|
if (Common::Bit<15>(list)) {
|
||||||
ir.LoadWritePC(ir.ReadMemory32(address));
|
ir.LoadWritePC(ir.ReadMemory32(address));
|
||||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
if (n == Reg::R13)
|
||||||
|
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||||
|
else
|
||||||
|
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -753,15 +753,18 @@ struct ThumbTranslatorVisitor final {
|
||||||
bool thumb16_BX(Reg m) {
|
bool thumb16_BX(Reg m) {
|
||||||
// BX <Rm>
|
// BX <Rm>
|
||||||
ir.BXWritePC(ir.GetRegister(m));
|
ir.BXWritePC(ir.GetRegister(m));
|
||||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
if (m == Reg::R14)
|
||||||
|
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||||
|
else
|
||||||
|
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool thumb16_BLX_reg(Reg m) {
|
bool thumb16_BLX_reg(Reg m) {
|
||||||
// BLX <Rm>
|
// BLX <Rm>
|
||||||
|
ir.PushRSB(ir.current_location.AdvancePC(2));
|
||||||
ir.SetRegister(Reg::LR, ir.Imm32((ir.current_location.PC() + 2) | 1));
|
ir.SetRegister(Reg::LR, ir.Imm32((ir.current_location.PC() + 2) | 1));
|
||||||
ir.BXWritePC(ir.GetRegister(m));
|
ir.BXWritePC(ir.GetRegister(m));
|
||||||
// TODO(optimization): Possible push RSB location
|
|
||||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -798,6 +801,7 @@ struct ThumbTranslatorVisitor final {
|
||||||
bool thumb32_BL_imm(Imm11 hi, Imm11 lo) {
|
bool thumb32_BL_imm(Imm11 hi, Imm11 lo) {
|
||||||
s32 imm32 = Common::SignExtend<23, s32>((hi << 12) | (lo << 1)) + 4;
|
s32 imm32 = Common::SignExtend<23, s32>((hi << 12) | (lo << 1)) + 4;
|
||||||
// BL <label>
|
// BL <label>
|
||||||
|
ir.PushRSB(ir.current_location.AdvancePC(4));
|
||||||
ir.SetRegister(Reg::LR, ir.Imm32((ir.current_location.PC() + 4) | 1));
|
ir.SetRegister(Reg::LR, ir.Imm32((ir.current_location.PC() + 4) | 1));
|
||||||
auto new_location = ir.current_location.AdvancePC(imm32);
|
auto new_location = ir.current_location.AdvancePC(imm32);
|
||||||
ir.SetTerm(IR::Term::LinkBlock{new_location});
|
ir.SetTerm(IR::Term::LinkBlock{new_location});
|
||||||
|
@ -810,6 +814,7 @@ struct ThumbTranslatorVisitor final {
|
||||||
return UnpredictableInstruction();
|
return UnpredictableInstruction();
|
||||||
}
|
}
|
||||||
// BLX <label>
|
// BLX <label>
|
||||||
|
ir.PushRSB(ir.current_location.AdvancePC(4));
|
||||||
ir.SetRegister(Reg::LR, ir.Imm32((ir.current_location.PC() + 4) | 1));
|
ir.SetRegister(Reg::LR, ir.Imm32((ir.current_location.PC() + 4) | 1));
|
||||||
auto new_location = ir.current_location
|
auto new_location = ir.current_location
|
||||||
.SetPC(ir.AlignPC(4) + imm32)
|
.SetPC(ir.AlignPC(4) + imm32)
|
||||||
|
|
|
@ -25,6 +25,7 @@ void DeadCodeElimination(IR::Block& block) {
|
||||||
case IR::Opcode::OrQFlag:
|
case IR::Opcode::OrQFlag:
|
||||||
case IR::Opcode::BXWritePC:
|
case IR::Opcode::BXWritePC:
|
||||||
case IR::Opcode::CallSupervisor:
|
case IR::Opcode::CallSupervisor:
|
||||||
|
case IR::Opcode::PushRSB:
|
||||||
case IR::Opcode::FPAbs32:
|
case IR::Opcode::FPAbs32:
|
||||||
case IR::Opcode::FPAbs64:
|
case IR::Opcode::FPAbs64:
|
||||||
case IR::Opcode::FPAdd32:
|
case IR::Opcode::FPAdd32:
|
||||||
|
|
Loading…
Reference in a new issue