diff --git a/src/dynarmic/backend/arm64/emit_arm64.cpp b/src/dynarmic/backend/arm64/emit_arm64.cpp index 7f138541..76ed3c72 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64.cpp @@ -48,11 +48,10 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& c auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto Wvalue = ctx.reg_alloc.ReadW(args[0]); - auto Wnz = ctx.reg_alloc.WriteW(inst); - RegAlloc::Realize(Wvalue, Wnz); + auto flags = ctx.reg_alloc.WriteFlags(inst); + RegAlloc::Realize(Wvalue, flags); - code.CMP(*Wnz, WZR); - code.MRS(Wnz->toX(), static_cast(0b11'011'0100'0010'000)); + code.CMP(*Wvalue, WZR); } EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& emit_conf) { diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index 4bda0d91..8bbdcee6 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -47,6 +47,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCon auto Woperand = ctx.reg_alloc.ReadW(operand_arg); auto Wshift = ctx.reg_alloc.ReadW(shift_arg); RegAlloc::Realize(Wresult, Woperand, Wshift); + ctx.reg_alloc.SpillFlags(); code.AND(Wscratch0, Wshift, 0xff); code.LSL(Wresult, Woperand, Wscratch0); @@ -83,6 +84,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCon auto Wshift = ctx.reg_alloc.ReadW(shift_arg); auto Wcarry_in = ctx.reg_alloc.ReadW(carry_arg); RegAlloc::Realize(Wresult, Wcarry_out, Woperand, Wshift, Wcarry_in); + ctx.reg_alloc.SpillFlags(); // TODO: Use RMIF diff --git a/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/backend/arm64/reg_alloc.cpp index feb8d4b4..55e2687d 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -88,10 +88,8 @@ bool RegAlloc::IsValueLive(IR::Inst* inst) const { return !!ValueLocation(inst); } -template +template int RegAlloc::RealizeReadImpl(const IR::Inst* value) { - constexpr HostLoc::Kind required_kind = is_vector ? HostLoc::Kind::Fpr : HostLoc::Kind::Gpr; - const auto current_location = ValueLocation(value); ASSERT(current_location); @@ -103,26 +101,7 @@ int RegAlloc::RealizeReadImpl(const IR::Inst* value) { ASSERT(!ValueInfo(*current_location).realized); ASSERT(ValueInfo(*current_location).locked); - if constexpr (is_vector) { - const int new_location_index = AllocateRegister(fprs, fpr_order); - SpillFpr(new_location_index); - - switch (current_location->kind) { - case HostLoc::Kind::Gpr: - code.FMOV(oaknut::DReg{new_location_index}, oaknut::XReg{current_location->index}); - break; - case HostLoc::Kind::Fpr: - ASSERT_FALSE("Logic error"); - break; - case HostLoc::Kind::Spill: - code.LDR(oaknut::QReg{new_location_index}, SP, spill_offset + new_location_index * spill_slot_size); - break; - } - - fprs[new_location_index] = std::exchange(ValueInfo(*current_location), {}); - fprs[new_location_index].realized = true; - return new_location_index; - } else { + if constexpr (required_kind == HostLoc::Kind::Gpr) { const int new_location_index = AllocateRegister(gprs, gpr_order); SpillGpr(new_location_index); @@ -137,15 +116,44 @@ int RegAlloc::RealizeReadImpl(const IR::Inst* value) { case HostLoc::Kind::Spill: code.LDR(oaknut::XReg{new_location_index}, SP, spill_offset + new_location_index * spill_slot_size); break; + case HostLoc::Kind::Flags: + code.MRS(oaknut::XReg{new_location_index}, static_cast(0b11'011'0100'0010'000)); + break; } gprs[new_location_index] = std::exchange(ValueInfo(*current_location), {}); gprs[new_location_index].realized = true; return new_location_index; + } else if constexpr (required_kind == HostLoc::Kind::Fpr) { + const int new_location_index = AllocateRegister(fprs, fpr_order); + SpillFpr(new_location_index); + + switch (current_location->kind) { + case HostLoc::Kind::Gpr: + code.FMOV(oaknut::DReg{new_location_index}, oaknut::XReg{current_location->index}); + break; + case HostLoc::Kind::Fpr: + ASSERT_FALSE("Logic error"); + break; + case HostLoc::Kind::Spill: + code.LDR(oaknut::QReg{new_location_index}, SP, spill_offset + new_location_index * spill_slot_size); + break; + case HostLoc::Kind::Flags: + ASSERT_FALSE("Moving from flags into fprs is not currently supported"); + break; + } + + fprs[new_location_index] = std::exchange(ValueInfo(*current_location), {}); + fprs[new_location_index].realized = true; + return new_location_index; + } else if constexpr (required_kind == HostLoc::Kind::Flags) { + ASSERT_FALSE("Loading flags back into NZCV is not currently supported"); + } else { + static_assert(required_kind == HostLoc::Kind::Fpr || required_kind == HostLoc::Kind::Gpr || required_kind == HostLoc::Kind::Flags); } } -template +template int RegAlloc::RealizeWriteImpl(const IR::Inst* value) { ASSERT(!ValueLocation(value)); @@ -157,23 +165,31 @@ int RegAlloc::RealizeWriteImpl(const IR::Inst* value) { info.expected_uses += value->UseCount(); }; - if constexpr (is_vector) { - const int new_location_index = AllocateRegister(fprs, fpr_order); - SpillFpr(new_location_index); - setup_location(fprs[new_location_index]); - return new_location_index; - } else { + if constexpr (kind == HostLoc::Kind::Gpr) { const int new_location_index = AllocateRegister(gprs, gpr_order); SpillGpr(new_location_index); setup_location(gprs[new_location_index]); return new_location_index; + } else if constexpr (kind == HostLoc::Kind::Fpr) { + const int new_location_index = AllocateRegister(fprs, fpr_order); + SpillFpr(new_location_index); + setup_location(fprs[new_location_index]); + return new_location_index; + } else if constexpr (kind == HostLoc::Kind::Flags) { + ASSERT(flags.values.empty()); + setup_location(flags); + return 0; + } else { + static_assert(kind == HostLoc::Kind::Fpr || kind == HostLoc::Kind::Gpr || kind == HostLoc::Kind::Flags); } } -template int RegAlloc::RealizeReadImpl(const IR::Inst* value); -template int RegAlloc::RealizeReadImpl(const IR::Inst* value); -template int RegAlloc::RealizeWriteImpl(const IR::Inst* value); -template int RegAlloc::RealizeWriteImpl(const IR::Inst* value); +template int RegAlloc::RealizeReadImpl(const IR::Inst* value); +template int RegAlloc::RealizeReadImpl(const IR::Inst* value); +template int RegAlloc::RealizeReadImpl(const IR::Inst* value); +template int RegAlloc::RealizeWriteImpl(const IR::Inst* value); +template int RegAlloc::RealizeWriteImpl(const IR::Inst* value); +template int RegAlloc::RealizeWriteImpl(const IR::Inst* value); void RegAlloc::Unlock(HostLoc host_loc) { HostLocInfo& info = ValueInfo(host_loc); @@ -223,6 +239,17 @@ void RegAlloc::SpillFpr(int index) { spills[new_location_index] = std::exchange(fprs[index], {}); } +void RegAlloc::SpillFlags() { + ASSERT(!flags.locked && !flags.realized); + if (flags.values.empty()) { + return; + } + const int new_location_index = AllocateRegister(gprs, gpr_order); + SpillGpr(new_location_index); + code.MRS(oaknut::XReg{new_location_index}, static_cast(0b11'011'0100'0010'000)); + gprs[new_location_index] = std::exchange(flags, {}); +} + int RegAlloc::FindFreeSpill() const { const auto iter = std::find_if(spills.begin(), spills.end(), [](const HostLocInfo& info) { return info.values.empty(); }); ASSERT_MSG(iter != spills.end(), "All spill locations are full"); @@ -240,6 +267,9 @@ std::optional RegAlloc::ValueLocation(const IR::Inst* value) const { if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value); iter != fprs.end()) { return HostLoc{HostLoc::Kind::Fpr, static_cast(iter - fprs.begin())}; } + if (contains_value(flags)) { + return HostLoc{HostLoc::Kind::Flags, 0}; + } if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value); iter != spills.end()) { return HostLoc{HostLoc::Kind::Spill, static_cast(iter - spills.begin())}; } @@ -252,6 +282,8 @@ HostLocInfo& RegAlloc::ValueInfo(HostLoc host_loc) { return gprs[static_cast(host_loc.index)]; case HostLoc::Kind::Fpr: return fprs[static_cast(host_loc.index)]; + case HostLoc::Kind::Flags: + return flags; case HostLoc::Kind::Spill: return spills[static_cast(host_loc.index)]; } @@ -263,13 +295,16 @@ HostLocInfo& RegAlloc::ValueInfo(const IR::Inst* value) { return info.values.contains(value); }; - if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value)) { + if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value); iter != gprs.end()) { return *iter; } - if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value)) { + if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value); iter != gprs.end()) { return *iter; } - if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value)) { + if (contains_value(flags)) { + return flags; + } + if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value); iter != gprs.end()) { return *iter; } ASSERT_FALSE("RegAlloc::ValueInfo: Value not found"); diff --git a/src/dynarmic/backend/arm64/reg_alloc.h b/src/dynarmic/backend/arm64/reg_alloc.h index 35bf35fb..0ca9b7c2 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.h +++ b/src/dynarmic/backend/arm64/reg_alloc.h @@ -30,6 +30,7 @@ struct HostLoc { enum class Kind { Gpr, Fpr, + Flags, Spill, } kind; int index; @@ -60,10 +61,23 @@ private: IR::Value value; }; +struct FlagsTag { +private: + template + friend struct RAReg; + + explicit FlagsTag(int) {} + int index() const { return 0; } +}; + template struct RAReg { public: - static constexpr bool is_vector = std::is_base_of_v; + static constexpr HostLoc::Kind kind = !std::is_same_v + ? std::is_base_of_v + ? HostLoc::Kind::Fpr + : HostLoc::Kind::Gpr + : HostLoc::Kind::Flags; operator T() const { return *reg; } @@ -120,6 +134,8 @@ public: auto ReadH(Argument& arg) { return RAReg{*this, false, PreReadImpl(arg.value)}; } auto ReadB(Argument& arg) { return RAReg{*this, false, PreReadImpl(arg.value)}; } + auto ReadFlags(Argument& arg) { return RAReg{*this, false, PreReadImpl(arg.value)}; } + template auto ReadReg(Argument& arg) { if constexpr (size == 64) { @@ -157,6 +173,8 @@ public: auto WriteH(IR::Inst* inst) { return RAReg{*this, true, inst}; } auto WriteB(IR::Inst* inst) { return RAReg{*this, true, inst}; } + auto WriteFlags(IR::Inst* inst) { return RAReg{*this, true, inst}; } + template auto WriteReg(IR::Inst* inst) { if constexpr (size == 64) { @@ -185,6 +203,7 @@ public: } } + void SpillFlags(); void SpillAll(); template @@ -198,13 +217,14 @@ private: friend struct RAReg; const IR::Inst* PreReadImpl(const IR::Value& value) { - ValueInfo(value.GetInst()).locked = true; - return value.GetInst(); + const IR::Inst* inst = value.GetInst(); + ValueInfo(inst).locked = true; + return inst; } - template + template int RealizeReadImpl(const IR::Inst* value); - template + template int RealizeWriteImpl(const IR::Inst* value); void Unlock(HostLoc host_loc); @@ -223,6 +243,7 @@ private: std::array gprs; std::array fprs; + HostLocInfo flags; std::array spills; mutable std::mt19937 rand_gen; @@ -231,13 +252,13 @@ private: template RAReg::~RAReg() { if (reg) { - reg_alloc.Unlock(HostLoc{is_vector ? HostLoc::Kind::Fpr : HostLoc::Kind::Gpr, reg->index()}); + reg_alloc.Unlock(HostLoc{kind, reg->index()}); } } template void RAReg::Realize() { - reg = T{write ? reg_alloc.RealizeWriteImpl(value) : reg_alloc.RealizeReadImpl(value)}; + reg = T{write ? reg_alloc.RealizeWriteImpl(value) : reg_alloc.RealizeReadImpl(value)}; } } // namespace Dynarmic::Backend::Arm64