a32_get_set_elimination_pass: Fix bugs in A32 get/set algorithm

This commit is contained in:
Merry 2023-02-05 17:19:08 +00:00
parent 2636da8821
commit 7d3b4f913b
3 changed files with 234 additions and 155 deletions

View file

@ -17,8 +17,6 @@ using namespace oaknut::util;
void EmitVerboseDebuggingOutput(oaknut::CodeGenerator& code, EmitContext& ctx) { void EmitVerboseDebuggingOutput(oaknut::CodeGenerator& code, EmitContext& ctx) {
code.SUB(SP, SP, sizeof(RegisterData)); code.SUB(SP, SP, sizeof(RegisterData));
code.MRS(X0, oaknut::SystemReg::FPSR);
code.STR(X0, SP, offsetof(RegisterData, fpsr));
for (int i = 0; i < 30; i++) { for (int i = 0; i < 30; i++) {
if (i == 18) { if (i == 18) {
continue; // Platform register continue; // Platform register
@ -32,9 +30,13 @@ void EmitVerboseDebuggingOutput(oaknut::CodeGenerator& code, EmitContext& ctx) {
code.STR(X0, SP, offsetof(RegisterData, nzcv)); code.STR(X0, SP, offsetof(RegisterData, nzcv));
code.ADD(X0, SP, sizeof(RegisterData) + offsetof(StackLayout, spill)); code.ADD(X0, SP, sizeof(RegisterData) + offsetof(StackLayout, spill));
code.STR(X0, SP, offsetof(RegisterData, spill)); code.STR(X0, SP, offsetof(RegisterData, spill));
code.MRS(X0, oaknut::SystemReg::FPSR);
code.STR(X0, SP, offsetof(RegisterData, fpsr));
ctx.reg_alloc.EmitVerboseDebuggingOutput(); ctx.reg_alloc.EmitVerboseDebuggingOutput();
code.LDR(X0, SP, offsetof(RegisterData, fpsr));
code.MSR(oaknut::SystemReg::FPSR, X0);
code.LDR(X0, SP, offsetof(RegisterData, nzcv)); code.LDR(X0, SP, offsetof(RegisterData, nzcv));
code.MSR(oaknut::SystemReg::NZCV, X0); code.MSR(oaknut::SystemReg::NZCV, X0);
for (int i = 0; i < 32; i++) { for (int i = 0; i < 32; i++) {
@ -46,8 +48,6 @@ void EmitVerboseDebuggingOutput(oaknut::CodeGenerator& code, EmitContext& ctx) {
} }
code.LDR(oaknut::XReg{i}, SP, offsetof(RegisterData, x) + i * sizeof(u64)); code.LDR(oaknut::XReg{i}, SP, offsetof(RegisterData, x) + i * sizeof(u64));
} }
code.LDR(X0, SP, offsetof(RegisterData, fpsr));
code.MSR(oaknut::SystemReg::FPSR, X0);
code.ADD(SP, SP, sizeof(RegisterData)); code.ADD(SP, SP, sizeof(RegisterData));
} }

View file

@ -19,52 +19,45 @@
namespace Dynarmic::Optimization { namespace Dynarmic::Optimization {
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { namespace {
void FlagsPass(IR::Block& block) {
using Iterator = std::reverse_iterator<IR::Block::iterator>; using Iterator = std::reverse_iterator<IR::Block::iterator>;
struct RegisterInfo { struct FlagInfo {
bool set_not_required = false; bool set_not_required = false;
bool has_value_request = false; bool has_value_request = false;
Iterator value_request = {}; Iterator value_request = {};
}; };
struct ValuelessRegisterInfo { struct ValuelessFlagInfo {
bool set_not_required = false; bool set_not_required = false;
}; };
std::array<RegisterInfo, 15> reg_info; ValuelessFlagInfo nzcvq;
std::array<RegisterInfo, 64> ext_reg_singles_info; ValuelessFlagInfo nzcv;
std::array<RegisterInfo, 32> ext_reg_doubles_info; ValuelessFlagInfo nz;
std::array<RegisterInfo, 32> ext_reg_vector_double_info; FlagInfo c_flag;
std::array<RegisterInfo, 16> ext_reg_vector_quad_info; FlagInfo ge;
ValuelessRegisterInfo nzcvq;
ValuelessRegisterInfo nzcv;
ValuelessRegisterInfo nz;
RegisterInfo c_flag;
RegisterInfo ge;
auto do_set = [&](RegisterInfo& info, IR::Value value, Iterator inst, std::initializer_list<std::reference_wrapper<RegisterInfo>> dependants = {}) { auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) {
if (info.has_value_request) { if (info.has_value_request) {
info.value_request->ReplaceUsesWith(value); info.value_request->ReplaceUsesWith(value);
} }
info.has_value_request = false; info.has_value_request = false;
if (info.set_not_required && std::all_of(dependants.begin(), dependants.end(), [](auto d) { return !d.get().has_value_request; })) {
inst->Invalidate();
}
info.set_not_required = true;
for (auto d : dependants) {
d.get() = {};
}
};
auto do_set_valueless = [&](ValuelessRegisterInfo& info, Iterator inst) {
if (info.set_not_required) { if (info.set_not_required) {
inst->Invalidate(); inst->Invalidate();
} }
info.set_not_required = true; info.set_not_required = true;
}; };
auto do_get = [](RegisterInfo& info, Iterator inst) { auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) {
if (info.set_not_required) {
inst->Invalidate();
}
info.set_not_required = true;
};
auto do_get = [](FlagInfo& info, Iterator inst) {
if (info.has_value_request) { if (info.has_value_request) {
info.value_request->ReplaceUsesWith(IR::Value{&*inst}); info.value_request->ReplaceUsesWith(IR::Value{&*inst});
} }
@ -76,107 +69,6 @@ void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) {
for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { for (auto inst = block.rbegin(); inst != block.rend(); ++inst) {
switch (inst->GetOpcode()) { switch (inst->GetOpcode()) {
case IR::Opcode::A32SetRegister: {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
if (reg == A32::Reg::PC) {
break;
}
const auto reg_index = static_cast<size_t>(reg);
do_set(reg_info[reg_index], inst->GetArg(1), inst);
break;
}
case IR::Opcode::A32GetRegister: {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
ASSERT(reg != A32::Reg::PC);
const size_t reg_index = static_cast<size_t>(reg);
do_get(reg_info[reg_index], inst);
break;
}
case IR::Opcode::A32SetExtendedRegister32: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_set(ext_reg_singles_info[reg_index],
inst->GetArg(1),
inst,
{
ext_reg_doubles_info[reg_index / 2],
ext_reg_vector_double_info[reg_index / 2],
ext_reg_vector_quad_info[reg_index / 4],
});
break;
}
case IR::Opcode::A32GetExtendedRegister32: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_get(ext_reg_singles_info[reg_index], inst);
break;
}
case IR::Opcode::A32SetExtendedRegister64: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_set(ext_reg_doubles_info[reg_index],
inst->GetArg(1),
inst,
{
ext_reg_singles_info[reg_index * 2 + 0],
ext_reg_singles_info[reg_index * 2 + 1],
ext_reg_vector_double_info[reg_index],
ext_reg_vector_quad_info[reg_index / 2],
});
break;
}
case IR::Opcode::A32GetExtendedRegister64: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_get(ext_reg_doubles_info[reg_index], inst);
break;
}
case IR::Opcode::A32SetVector: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
if (A32::IsDoubleExtReg(reg)) {
ir.SetInsertionPointBefore(std::prev(inst.base()));
const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)});
do_set(ext_reg_vector_double_info[reg_index],
stored_value,
inst,
{
ext_reg_singles_info[reg_index * 2 + 0],
ext_reg_singles_info[reg_index * 2 + 1],
ext_reg_doubles_info[reg_index],
ext_reg_vector_quad_info[reg_index / 2],
});
} else {
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
do_set(ext_reg_vector_quad_info[reg_index],
inst->GetArg(1),
inst,
{
ext_reg_singles_info[reg_index * 4 + 0],
ext_reg_singles_info[reg_index * 4 + 1],
ext_reg_singles_info[reg_index * 4 + 2],
ext_reg_singles_info[reg_index * 4 + 3],
ext_reg_doubles_info[reg_index * 2 + 0],
ext_reg_doubles_info[reg_index * 2 + 1],
ext_reg_vector_double_info[reg_index * 2 + 0],
ext_reg_vector_double_info[reg_index * 2 + 1],
});
}
break;
}
case IR::Opcode::A32GetVector: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
if (A32::IsDoubleExtReg(reg)) {
do_get(ext_reg_vector_double_info[reg_index], inst);
} else {
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
do_get(ext_reg_vector_quad_info[reg_index], inst);
}
break;
}
case IR::Opcode::A32GetCFlag: { case IR::Opcode::A32GetCFlag: {
do_get(c_flag, inst); do_get(c_flag, inst);
break; break;
@ -282,12 +174,192 @@ void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) {
c_flag = {}; c_flag = {};
ge = {}; ge = {};
} }
break;
}
}
}
}
void RegisterPass(IR::Block& block) {
using Iterator = IR::Block::iterator;
struct RegInfo {
IR::Value register_value;
std::optional<Iterator> last_set_instruction;
};
std::array<RegInfo, 15> reg_info;
const auto do_get = [](RegInfo& info, Iterator get_inst) {
if (info.register_value.IsEmpty()) {
info.register_value = IR::Value(&*get_inst);
return;
}
get_inst->ReplaceUsesWith(info.register_value);
};
const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) {
if (info.last_set_instruction) {
(*info.last_set_instruction)->Invalidate();
}
info = {
.register_value = value,
.last_set_instruction = set_inst,
};
};
enum class ExtValueType {
Empty,
Single,
Double,
VectorDouble,
VectorQuad,
};
struct ExtRegInfo {
ExtValueType value_type = ExtValueType::Empty;
IR::Value register_value;
std::optional<Iterator> last_set_instruction;
};
std::array<ExtRegInfo, 64> ext_reg_info;
const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) {
if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
for (auto& info : infos) {
info.get() = {
.value_type = type,
.register_value = IR::Value(&*get_inst),
.last_set_instruction = std::nullopt,
};
}
return;
}
get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value);
};
const auto do_ext_set = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, IR::Value value, Iterator set_inst) {
if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
if (std::data(infos)[0].get().last_set_instruction) {
(*std::data(infos)[0].get().last_set_instruction)->Invalidate();
}
}
for (auto& info : infos) {
info.get() = {
.value_type = type,
.register_value = value,
.last_set_instruction = set_inst,
};
}
};
// Location and version don't matter here.
A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}};
for (auto inst = block.begin(); inst != block.end(); ++inst) {
switch (inst->GetOpcode()) {
case IR::Opcode::A32GetRegister: {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
ASSERT(reg != A32::Reg::PC);
const size_t reg_index = static_cast<size_t>(reg);
do_get(reg_info[reg_index], inst);
break;
}
case IR::Opcode::A32SetRegister: {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
if (reg == A32::Reg::PC) {
break;
}
const auto reg_index = static_cast<size_t>(reg);
do_set(reg_info[reg_index], inst->GetArg(1), inst);
break;
}
case IR::Opcode::A32GetExtendedRegister32: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst);
break;
}
case IR::Opcode::A32SetExtendedRegister32: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst);
break;
}
case IR::Opcode::A32GetExtendedRegister64: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_get(ExtValueType::Double,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
inst);
break;
}
case IR::Opcode::A32SetExtendedRegister64: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_set(ExtValueType::Double,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
inst->GetArg(1),
inst);
break;
}
case IR::Opcode::A32GetVector: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
if (A32::IsDoubleExtReg(reg)) {
do_ext_get(ExtValueType::VectorDouble,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
inst);
} else {
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
do_ext_get(ExtValueType::VectorQuad,
{
ext_reg_info[reg_index * 4 + 0],
ext_reg_info[reg_index * 4 + 1],
ext_reg_info[reg_index * 4 + 2],
ext_reg_info[reg_index * 4 + 3],
},
inst);
}
break;
}
case IR::Opcode::A32SetVector: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
if (A32::IsDoubleExtReg(reg)) {
ir.SetInsertionPointAfter(inst);
const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)});
do_ext_set(ExtValueType::VectorDouble,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
stored_value,
inst);
} else {
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
do_ext_set(ExtValueType::VectorQuad,
{
ext_reg_info[reg_index * 4 + 0],
ext_reg_info[reg_index * 4 + 1],
ext_reg_info[reg_index * 4 + 2],
ext_reg_info[reg_index * 4 + 3],
},
inst->GetArg(1),
inst);
}
break;
}
default: {
if (inst->ReadsFromCoreRegister() || inst->WritesToCoreRegister()) { if (inst->ReadsFromCoreRegister() || inst->WritesToCoreRegister()) {
reg_info = {}; reg_info = {};
ext_reg_singles_info = {}; ext_reg_info = {};
ext_reg_doubles_info = {};
ext_reg_vector_double_info = {};
ext_reg_vector_quad_info = {};
} }
break; break;
} }
@ -295,4 +367,11 @@ void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) {
} }
} }
} // namespace
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) {
FlagsPass(block);
RegisterPass(block);
}
} // namespace Dynarmic::Optimization } // namespace Dynarmic::Optimization

View file

@ -406,26 +406,6 @@ void RunTestInstance(Dynarmic::A32::Jit& jit,
const u32 num_words = initial_pc / sizeof(typename TestEnv::InstructionType); const u32 num_words = initial_pc / sizeof(typename TestEnv::InstructionType);
const u32 code_mem_size = num_words + static_cast<u32>(instructions.size()); const u32 code_mem_size = num_words + static_cast<u32>(instructions.size());
jit.ClearCache();
for (size_t jit_rerun_count = 0; jit_rerun_count < num_jit_reruns; ++jit_rerun_count) {
jit_env.code_mem.resize(code_mem_size);
std::fill(jit_env.code_mem.begin(), jit_env.code_mem.end(), TestEnv::infinite_loop);
std::copy(instructions.begin(), instructions.end(), jit_env.code_mem.begin() + num_words);
jit_env.PadCodeMem();
jit_env.modified_memory.clear();
jit_env.interrupts.clear();
jit.Regs() = regs;
jit.ExtRegs() = vecs;
jit.SetFpscr(fpscr);
jit.SetCpsr(cpsr);
jit_env.ticks_left = ticks_left;
jit.Run();
}
fmt::print("instructions:"); fmt::print("instructions:");
for (auto instruction : instructions) { for (auto instruction : instructions) {
if constexpr (sizeof(decltype(instruction)) == 2) { if constexpr (sizeof(decltype(instruction)) == 2) {
@ -449,6 +429,26 @@ void RunTestInstance(Dynarmic::A32::Jit& jit,
fmt::print("initial_cpsr: {:08x}\n", cpsr); fmt::print("initial_cpsr: {:08x}\n", cpsr);
fmt::print("initial_fpcr: {:08x}\n", fpscr); fmt::print("initial_fpcr: {:08x}\n", fpscr);
jit.ClearCache();
for (size_t jit_rerun_count = 0; jit_rerun_count < num_jit_reruns; ++jit_rerun_count) {
jit_env.code_mem.resize(code_mem_size);
std::fill(jit_env.code_mem.begin(), jit_env.code_mem.end(), TestEnv::infinite_loop);
std::copy(instructions.begin(), instructions.end(), jit_env.code_mem.begin() + num_words);
jit_env.PadCodeMem();
jit_env.modified_memory.clear();
jit_env.interrupts.clear();
jit.Regs() = regs;
jit.ExtRegs() = vecs;
jit.SetFpscr(fpscr);
jit.SetCpsr(cpsr);
jit_env.ticks_left = ticks_left;
jit.Run();
}
fmt::print("final_regs:"); fmt::print("final_regs:");
for (u32 i : jit.Regs()) { for (u32 i : jit.Regs()) {
fmt::print(" {:08x}", i); fmt::print(" {:08x}", i);