backend/arm64/reg_alloc: Implement ReadWrite mode

This commit is contained in:
Merry 2022-08-02 00:37:40 +01:00 committed by merry
parent 208b19b89a
commit 0288540155
2 changed files with 147 additions and 51 deletions

View file

@ -93,7 +93,6 @@ bool HostLocInfo::Contains(const IR::Inst* value) const {
void HostLocInfo::SetupScratchLocation() {
ASSERT(IsCompletelyEmpty());
locked++;
realized = true;
}
@ -101,7 +100,6 @@ void HostLocInfo::SetupLocation(const IR::Inst* value) {
ASSERT(IsCompletelyEmpty());
values.clear();
values.emplace_back(value);
locked++;
realized = true;
uses_this_inst = 0;
accumulated_uses = 0;
@ -112,8 +110,8 @@ bool HostLocInfo::IsCompletelyEmpty() const {
return values.empty() && !locked && !realized && !accumulated_uses && !expected_uses && !uses_this_inst;
}
bool HostLocInfo::IsImmediatelyAllocatable() const {
return values.empty() && !locked;
bool HostLocInfo::MaybeAllocatable() const {
return !locked && !realized;
}
bool HostLocInfo::IsOneRemainingUse() const {
@ -169,6 +167,7 @@ void RegAlloc::PrepareForCall(IR::Inst* result, std::optional<Argument::copyable
const std::array<std::optional<Argument::copyable_reference>, 4> args{arg0, arg1, arg2, arg3};
for (int i = 0; i < 4; i++) {
if (args[i]) {
ASSERT(gprs[i].IsCompletelyEmpty());
LoadCopyInto(args[i]->get().value, oaknut::XReg{i});
}
}
@ -339,21 +338,43 @@ int RegAlloc::RealizeWriteImpl(const IR::Inst* value) {
}
}
template<HostLoc::Kind kind>
int RegAlloc::RealizeReadWriteImpl(const IR::Value& read_value, const IR::Inst* write_value) {
// TODO: Move elimination
const int write_loc = RealizeWriteImpl<kind>(write_value);
if constexpr (kind == HostLoc::Kind::Gpr) {
LoadCopyInto(read_value, oaknut::XReg{write_loc});
return write_loc;
} else if constexpr (kind == HostLoc::Kind::Fpr) {
LoadCopyInto(read_value, oaknut::QReg{write_loc});
return write_loc;
} else if constexpr (kind == HostLoc::Kind::Flags) {
ASSERT_FALSE("Incorrect function for ReadWrite of flags");
} else {
static_assert(kind == HostLoc::Kind::Fpr || kind == HostLoc::Kind::Gpr || kind == HostLoc::Kind::Flags);
}
}
template int RegAlloc::RealizeReadImpl<HostLoc::Kind::Gpr>(const IR::Value& value);
template int RegAlloc::RealizeReadImpl<HostLoc::Kind::Fpr>(const IR::Value& value);
template int RegAlloc::RealizeReadImpl<HostLoc::Kind::Flags>(const IR::Value& value);
template int RegAlloc::RealizeWriteImpl<HostLoc::Kind::Gpr>(const IR::Inst* value);
template int RegAlloc::RealizeWriteImpl<HostLoc::Kind::Fpr>(const IR::Inst* value);
template int RegAlloc::RealizeWriteImpl<HostLoc::Kind::Flags>(const IR::Inst* value);
template int RegAlloc::RealizeReadWriteImpl<HostLoc::Kind::Gpr>(const IR::Value&, const IR::Inst*);
template int RegAlloc::RealizeReadWriteImpl<HostLoc::Kind::Fpr>(const IR::Value&, const IR::Inst*);
template int RegAlloc::RealizeReadWriteImpl<HostLoc::Kind::Flags>(const IR::Value&, const IR::Inst*);
int RegAlloc::AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<int>& order) const {
const auto empty = std::find_if(order.begin(), order.end(), [&](int i) { return regs[i].IsImmediatelyAllocatable(); });
const auto empty = std::find_if(order.begin(), order.end(), [&](int i) { return regs[i].IsCompletelyEmpty(); });
if (empty != order.end()) {
return *empty;
}
std::vector<int> candidates;
std::copy_if(order.begin(), order.end(), std::back_inserter(candidates), [&](int i) { return !regs[i].locked; });
std::copy_if(order.begin(), order.end(), std::back_inserter(candidates), [&](int i) { return regs[i].MaybeAllocatable(); });
// TODO: LRU
std::uniform_int_distribution<size_t> dis{0, candidates.size() - 1};
@ -405,7 +426,6 @@ void RegAlloc::ReadWriteFlags(Argument& read, IR::Inst* write) {
if (write) {
flags.SetupLocation(write);
flags.locked--;
flags.realized = false;
}
}
@ -435,7 +455,6 @@ void RegAlloc::LoadCopyInto(const IR::Value& value, oaknut::XReg reg) {
const auto current_location = ValueLocation(value.GetInst());
ASSERT(current_location);
ASSERT(gprs[reg.index()].IsCompletelyEmpty());
switch (current_location->kind) {
case HostLoc::Kind::Gpr:
code.MOV(reg, oaknut::XReg{current_location->index});
@ -453,6 +472,32 @@ void RegAlloc::LoadCopyInto(const IR::Value& value, oaknut::XReg reg) {
}
}
void RegAlloc::LoadCopyInto(const IR::Value& value, oaknut::QReg reg) {
if (value.IsImmediate()) {
code.MOV(Xscratch0, value.GetImmediateAsU64());
code.FMOV(reg.toD(), Xscratch0);
return;
}
const auto current_location = ValueLocation(value.GetInst());
ASSERT(current_location);
switch (current_location->kind) {
case HostLoc::Kind::Gpr:
code.FMOV(reg.toD(), oaknut::XReg{current_location->index});
break;
case HostLoc::Kind::Fpr:
code.MOV(reg.B16(), oaknut::QReg{current_location->index}.B16());
break;
case HostLoc::Kind::Spill:
// TODO: Minimize move size to max value width
code.LDR(reg, SP, spill_offset + current_location->index * spill_slot_size);
break;
case HostLoc::Kind::Flags:
ASSERT_FALSE("Moving from flags into fprs is not currently supported");
break;
}
}
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
const auto contains_value = [value](const HostLocInfo& info) { return info.Contains(value); };

View file

@ -26,7 +26,7 @@ namespace Dynarmic::Backend::Arm64 {
class FpsrManager;
class RegAlloc;
struct HostLoc {
struct HostLoc final {
enum class Kind {
Gpr,
Fpr,
@ -36,7 +36,13 @@ struct HostLoc {
int index;
};
struct Argument {
enum RWType {
Read,
Write,
ReadWrite,
};
struct Argument final {
public:
using copyable_reference = std::reference_wrapper<Argument>;
@ -68,7 +74,7 @@ private:
IR::Value value;
};
struct FlagsTag {
struct FlagsTag final {
private:
template<typename>
friend struct RAReg;
@ -78,7 +84,7 @@ private:
};
template<typename T>
struct RAReg {
struct RAReg final {
public:
static constexpr HostLoc::Kind kind = !std::is_same_v<FlagsTag, T>
? std::is_base_of_v<oaknut::VReg, T>
@ -103,7 +109,7 @@ public:
private:
friend class RegAlloc;
explicit RAReg(RegAlloc& reg_alloc, bool write, const IR::Value& value);
explicit RAReg(RegAlloc& reg_alloc, RWType rw, const IR::Value& read_value, const IR::Inst* write_value);
RAReg(const RAReg&) = delete;
RAReg& operator=(const RAReg&) = delete;
@ -113,12 +119,13 @@ private:
void Realize();
RegAlloc& reg_alloc;
bool write;
const IR::Value value;
RWType rw;
const IR::Value read_value;
const IR::Inst* write_value;
std::optional<T> reg;
};
struct HostLocInfo {
struct HostLocInfo final {
std::vector<const IR::Inst*> values;
size_t locked = 0;
bool realized = false;
@ -130,12 +137,12 @@ struct HostLocInfo {
void SetupScratchLocation();
void SetupLocation(const IR::Inst*);
bool IsCompletelyEmpty() const;
bool IsImmediatelyAllocatable() const;
bool MaybeAllocatable() const;
bool IsOneRemainingUse() const;
void UpdateUses();
};
class RegAlloc {
class RegAlloc final {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
@ -145,14 +152,14 @@ public:
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
bool IsValueLive(IR::Inst* inst) const;
auto ReadX(Argument& arg) { return RAReg<oaknut::XReg>{*this, false, arg.value}; }
auto ReadW(Argument& arg) { return RAReg<oaknut::WReg>{*this, false, arg.value}; }
auto ReadX(Argument& arg) { return RAReg<oaknut::XReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadW(Argument& arg) { return RAReg<oaknut::WReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadQ(Argument& arg) { return RAReg<oaknut::QReg>{*this, false, arg.value}; }
auto ReadD(Argument& arg) { return RAReg<oaknut::DReg>{*this, false, arg.value}; }
auto ReadS(Argument& arg) { return RAReg<oaknut::SReg>{*this, false, arg.value}; }
auto ReadH(Argument& arg) { return RAReg<oaknut::HReg>{*this, false, arg.value}; }
auto ReadB(Argument& arg) { return RAReg<oaknut::BReg>{*this, false, arg.value}; }
auto ReadQ(Argument& arg) { return RAReg<oaknut::QReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadD(Argument& arg) { return RAReg<oaknut::DReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadS(Argument& arg) { return RAReg<oaknut::SReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadH(Argument& arg) { return RAReg<oaknut::HReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadB(Argument& arg) { return RAReg<oaknut::BReg>{*this, RWType::Read, arg.value, nullptr}; }
template<size_t size>
auto ReadReg(Argument& arg) {
@ -182,16 +189,16 @@ public:
}
}
auto WriteX(IR::Inst* inst) { return RAReg<oaknut::XReg>{*this, true, IR::Value{inst}}; }
auto WriteW(IR::Inst* inst) { return RAReg<oaknut::WReg>{*this, true, IR::Value{inst}}; }
auto WriteX(IR::Inst* inst) { return RAReg<oaknut::XReg>{*this, RWType::Write, {}, inst}; }
auto WriteW(IR::Inst* inst) { return RAReg<oaknut::WReg>{*this, RWType::Write, {}, inst}; }
auto WriteQ(IR::Inst* inst) { return RAReg<oaknut::QReg>{*this, true, IR::Value{inst}}; }
auto WriteD(IR::Inst* inst) { return RAReg<oaknut::DReg>{*this, true, IR::Value{inst}}; }
auto WriteS(IR::Inst* inst) { return RAReg<oaknut::SReg>{*this, true, IR::Value{inst}}; }
auto WriteH(IR::Inst* inst) { return RAReg<oaknut::HReg>{*this, true, IR::Value{inst}}; }
auto WriteB(IR::Inst* inst) { return RAReg<oaknut::BReg>{*this, true, IR::Value{inst}}; }
auto WriteQ(IR::Inst* inst) { return RAReg<oaknut::QReg>{*this, RWType::Write, {}, inst}; }
auto WriteD(IR::Inst* inst) { return RAReg<oaknut::DReg>{*this, RWType::Write, {}, inst}; }
auto WriteS(IR::Inst* inst) { return RAReg<oaknut::SReg>{*this, RWType::Write, {}, inst}; }
auto WriteH(IR::Inst* inst) { return RAReg<oaknut::HReg>{*this, RWType::Write, {}, inst}; }
auto WriteB(IR::Inst* inst) { return RAReg<oaknut::BReg>{*this, RWType::Write, {}, inst}; }
auto WriteFlags(IR::Inst* inst) { return RAReg<FlagsTag>{*this, true, IR::Value{inst}}; }
auto WriteFlags(IR::Inst* inst) { return RAReg<FlagsTag>{*this, RWType::Write, {}, inst}; }
template<size_t size>
auto WriteReg(IR::Inst* inst) {
@ -221,6 +228,43 @@ public:
}
}
auto ReadWriteX(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::XReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteW(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::WReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteQ(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::QReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteD(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::DReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteS(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::SReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteH(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::HReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteB(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::BReg>{*this, RWType::ReadWrite, arg.value, inst}; }
template<size_t size>
auto ReadWriteReg(Argument& arg, const IR::Inst* inst) {
if constexpr (size == 64) {
return ReadWriteX(arg, inst);
} else if constexpr (size == 32) {
return ReadWriteW(arg, inst);
} else {
ASSERT_FALSE("Invalid size to ReadWriteReg {}", size);
}
}
template<size_t size>
auto ReadWriteVec(Argument& arg, const IR::Inst* inst) {
if constexpr (size == 128) {
return ReadWriteQ(arg, inst);
} else if constexpr (size == 64) {
return ReadWriteD(arg, inst);
} else if constexpr (size == 32) {
return ReadWriteS(arg, inst);
} else if constexpr (size == 16) {
return ReadWriteH(arg, inst);
} else if constexpr (size == 8) {
return ReadWriteB(arg, inst);
} else {
ASSERT_FALSE("Invalid size to ReadWriteVec {}", size);
}
}
void PrepareForCall(IR::Inst* result = nullptr,
std::optional<Argument::copyable_reference> arg0 = {},
std::optional<Argument::copyable_reference> arg1 = {},
@ -254,6 +298,8 @@ private:
int RealizeReadImpl(const IR::Value& value);
template<HostLoc::Kind kind>
int RealizeWriteImpl(const IR::Inst* value);
template<HostLoc::Kind kind>
int RealizeReadWriteImpl(const IR::Value& read_value, const IR::Inst* write_value);
int AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<int>& order) const;
void SpillGpr(int index);
@ -261,6 +307,7 @@ private:
int FindFreeSpill() const;
void LoadCopyInto(const IR::Value& value, oaknut::XReg reg);
void LoadCopyInto(const IR::Value& value, oaknut::QReg reg);
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
HostLocInfo& ValueInfo(HostLoc host_loc);
@ -280,34 +327,38 @@ private:
};
template<typename T>
RAReg<T>::RAReg(RegAlloc& reg_alloc, bool write, const IR::Value& value)
: reg_alloc{reg_alloc}, write{write}, value{value} {
if (!write && !value.IsImmediate()) {
reg_alloc.ValueInfo(value.GetInst()).locked++;
RAReg<T>::RAReg(RegAlloc& reg_alloc, RWType rw, const IR::Value& read_value, const IR::Inst* write_value)
: reg_alloc{reg_alloc}, rw{rw}, read_value{read_value}, write_value{write_value} {
if (rw != RWType::Write && !read_value.IsImmediate()) {
reg_alloc.ValueInfo(read_value.GetInst()).locked++;
}
}
template<typename T>
RAReg<T>::~RAReg() {
if (value.IsImmediate()) {
if (reg) {
// Immediate in scratch register
HostLocInfo& info = reg_alloc.ValueInfo(HostLoc{kind, reg->index()});
info.locked--;
info.realized = false;
}
} else {
HostLocInfo& info = reg_alloc.ValueInfo(value.GetInst());
info.locked--;
if (reg) {
reg_alloc.ValueInfo(HostLoc{kind, reg->index()}).realized = false;
}
if (rw != RWType::Write && !read_value.IsImmediate()) {
reg_alloc.ValueInfo(read_value.GetInst()).locked--;
}
if (reg) {
reg_alloc.ValueInfo(HostLoc{kind, reg->index()}).realized = false;
}
}
template<typename T>
void RAReg<T>::Realize() {
reg = T{write ? reg_alloc.RealizeWriteImpl<kind>(value.GetInst()) : reg_alloc.RealizeReadImpl<kind>(value)};
switch (rw) {
case RWType::Read:
reg = T{reg_alloc.RealizeReadImpl<kind>(read_value)};
break;
case RWType::Write:
reg = T{reg_alloc.RealizeWriteImpl<kind>(write_value)};
break;
case RWType::ReadWrite:
reg = T{reg_alloc.RealizeReadWriteImpl<kind>(read_value, write_value)};
break;
default:
ASSERT_FALSE("Invalid RWType");
}
}
} // namespace Dynarmic::Backend::Arm64