From 65cccf070e39d7c7ded0965518331f6303e70a2b Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 24 Feb 2017 18:42:59 +0000 Subject: [PATCH 01/16] reg_alloc: Properly encapsulate HostLocInfo --- src/backend_x64/reg_alloc.cpp | 74 +++++++++++++++----------------- src/backend_x64/reg_alloc.h | 79 +++++++++++++++++++++++++---------- 2 files changed, 91 insertions(+), 62 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 073d7be0..5d2c49ab 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -52,8 +52,8 @@ HostLoc RegAlloc::DefHostLocReg(IR::Inst* def_inst, HostLocList desired_location SpillRegister(location); } - LocInfo(location).is_being_used = true; - LocInfo(location).def = def_inst; + LocInfo(location).Lock(); + LocInfo(location).Def(def_inst); DEBUG_ASSERT(LocInfo(location).IsDef()); return location; @@ -69,7 +69,7 @@ void RegAlloc::RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst) { DEBUG_ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined"); HostLoc location = *ValueLocation(use_inst.GetInst()); - LocInfo(location).values.emplace_back(def_inst); + LocInfo(location).AddValue(def_inst); use_inst.GetInst()->DecrementRemainingUses(); DEBUG_ASSERT(LocInfo(location).IsIdle()); } @@ -91,8 +91,8 @@ HostLoc RegAlloc::UseDefHostLocReg(IR::Inst* use_inst, IR::Inst* def_inst, HostL HostLoc current_location = *ValueLocation(use_inst); auto& loc_info = LocInfo(current_location); if (loc_info.IsIdle()) { - loc_info.is_being_used = true; - loc_info.def = def_inst; + loc_info.Lock(); + loc_info.Def(def_inst); DEBUG_ASSERT(loc_info.IsUseDef()); if (HostLocIsSpill(current_location)) { HostLoc new_location = SelectARegister(desired_locations); @@ -136,12 +136,12 @@ std::tuple RegAlloc::UseDefOpArgHostLocReg(IR::Value use_value, auto& loc_info = LocInfo(current_location); if (!loc_info.IsIdle()) { if (HostLocIsSpill(current_location)) { - loc_info.is_being_used = true; + loc_info.Lock(); DEBUG_ASSERT(loc_info.IsUse()); return std::make_tuple(SpillToOpArg(current_location), DefHostLocReg(def_inst, desired_locations)); } else { - loc_info.is_being_used = true; - loc_info.def = def_inst; + loc_info.Lock(); + loc_info.Def(def_inst); DEBUG_ASSERT(loc_info.IsUseDef()); return std::make_tuple(HostLocToX64(current_location), current_location); } @@ -180,7 +180,7 @@ HostLoc RegAlloc::UseHostLocReg(IR::Inst* use_inst, HostLocList desired_location LocInfo(current_location) = {}; DEBUG_ASSERT(LocInfo(new_location).IsUse()); } else { - LocInfo(new_location).is_being_used = true; + LocInfo(new_location).Lock(); DEBUG_ASSERT(LocInfo(new_location).IsScratch()); } return new_location; @@ -231,7 +231,7 @@ HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_l if (HostLocIsSpill(current_location)) { EmitMove(new_location, current_location); - LocInfo(new_location).is_being_used = true; + LocInfo(new_location).Lock(); use_inst->DecrementRemainingUses(); DEBUG_ASSERT(LocInfo(new_location).IsScratch()); return new_location; @@ -246,8 +246,8 @@ HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_l ASSERT(LocInfo(current_location).IsIdle()); } - LocInfo(new_location).is_being_used = true; - LocInfo(new_location).values.clear(); + LocInfo(new_location) = {}; + LocInfo(new_location).Lock(); use_inst->DecrementRemainingUses(); DEBUG_ASSERT(LocInfo(new_location).IsScratch()); return new_location; @@ -266,7 +266,7 @@ HostLoc RegAlloc::ScratchHostLocReg(HostLocList desired_locations) { } // Update state - LocInfo(location).is_being_used = true; + LocInfo(location).Lock(); DEBUG_ASSERT(LocInfo(location).IsScratch()); return location; @@ -329,9 +329,8 @@ HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const { boost::optional RegAlloc::ValueLocation(const IR::Inst* value) const { for (size_t i = 0; i < HostLocCount; i++) - for (const IR::Inst* v : hostloc_info[i].values) - if (v == value) - return boost::make_optional(static_cast(i)); + if (hostloc_info[i].ContainsValue(value)) + return boost::make_optional(static_cast(i)); return boost::none; } @@ -339,17 +338,18 @@ boost::optional RegAlloc::ValueLocation(const IR::Inst* value) const { bool RegAlloc::IsRegisterOccupied(HostLoc loc) const { const auto& info = LocInfo(loc); - return !info.values.empty() || info.def; + return !info.IsEmpty(); } bool RegAlloc::IsRegisterAllocated(HostLoc loc) const { - return LocInfo(loc).is_being_used; + return !LocInfo(loc).IsIdle(); } -bool RegAlloc::IsLastUse(const IR::Inst* inst) const { - if (inst->UseCount() > 1) - return false; - return LocInfo(*ValueLocation(inst)).values.size() == 1; +bool RegAlloc::IsLastUse(const IR::Inst*) const { + //if (inst->UseCount() > 1) + // return false; + //return LocInfo(*ValueLocation(inst)).values.size() == 1; + return false; } void RegAlloc::SpillRegister(HostLoc loc) { @@ -375,22 +375,14 @@ HostLoc RegAlloc::FindFreeSpill() const { void RegAlloc::EndOfAllocScope() { for (auto& iter : hostloc_info) { - iter.is_being_used = false; - if (iter.def) { - iter.values.clear(); - iter.values.emplace_back(iter.def); - iter.def = nullptr; - } - if (!iter.values.empty()) { - auto to_erase = std::remove_if(iter.values.begin(), iter.values.end(), - [](const auto& inst){ return !inst->HasUses(); }); - iter.values.erase(to_erase, iter.values.end()); - } + iter.EndOfAllocScope(); } } void RegAlloc::AssertNoMoreUses() { - ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i){ return i.values.empty(); })); + if (!std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i){ return i.IsEmpty(); })) { + ASSERT_MSG(false, "bad"); + } } void RegAlloc::Reset() { @@ -438,15 +430,15 @@ std::tuple RegAlloc::UseHostLoc(IR::Inst* use_inst, HostLocList d SpillRegister(new_location); } EmitMove(new_location, current_location); - LocInfo(new_location).is_being_used = true; - LocInfo(new_location).values.emplace_back(use_inst); + LocInfo(new_location).Lock(); + LocInfo(new_location).AddValue(use_inst); use_inst->DecrementRemainingUses(); DEBUG_ASSERT(LocInfo(new_location).IsUse()); return std::make_tuple(new_location, false); } else { - bool was_being_used = LocInfo(current_location).is_being_used; + bool was_being_used = LocInfo(current_location).IsLocked(); ASSERT(LocInfo(current_location).IsUse() || LocInfo(current_location).IsIdle()); - LocInfo(current_location).is_being_used = true; + LocInfo(current_location).Lock(); use_inst->DecrementRemainingUses(); DEBUG_ASSERT(LocInfo(current_location).IsUse()); return std::make_tuple(current_location, was_being_used); @@ -454,8 +446,8 @@ std::tuple RegAlloc::UseHostLoc(IR::Inst* use_inst, HostLocList d } if (HostLocIsSpill(current_location)) { - bool was_being_used = LocInfo(current_location).is_being_used; - LocInfo(current_location).is_being_used = true; + bool was_being_used = LocInfo(current_location).IsLocked(); + LocInfo(current_location).Lock(); use_inst->DecrementRemainingUses(); DEBUG_ASSERT(LocInfo(current_location).IsUse()); return std::make_tuple(current_location, was_being_used); @@ -464,7 +456,7 @@ std::tuple RegAlloc::UseHostLoc(IR::Inst* use_inst, HostLocList d ASSERT(LocInfo(current_location).IsIdle()); EmitExchange(new_location, current_location); std::swap(LocInfo(new_location), LocInfo(current_location)); - LocInfo(new_location).is_being_used = true; + LocInfo(new_location).Lock(); use_inst->DecrementRemainingUses(); DEBUG_ASSERT(LocInfo(new_location).IsUse()); return std::make_tuple(new_location, false); diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index d2e69b62..0b1eea15 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -22,6 +22,64 @@ namespace Dynarmic { namespace BackendX64 { +struct HostLocInfo { +public: + bool IsIdle() const { + return !is_being_used; + } + bool IsLocked() const { + return is_being_used; + } + bool IsEmpty() const { + return !is_being_used && !def && values.empty(); + } + bool IsScratch() const { + return is_being_used && !def && values.empty(); + } + bool IsUse() const { + return is_being_used && !def && !values.empty(); + } + bool IsDef() const { + return is_being_used && def && values.empty(); + } + bool IsUseDef() const { + return is_being_used && def && !values.empty(); + } + + bool ContainsValue(const IR::Inst* inst) const { + return std::find(values.begin(), values.end(), inst) != values.end(); + } + + void Lock() { + is_being_used = true; + } + void AddValue(IR::Inst* inst) { + values.push_back(inst); + } + void Def(IR::Inst* inst) { + ASSERT(!def); + def = inst; + } + + void EndOfAllocScope() { + const auto to_erase = std::remove_if(values.begin(), values.end(), [](const auto& inst){ return !inst->HasUses(); }); + values.erase(to_erase, values.end()); + + if (def) { + ASSERT(values.empty()); + AddValue(def); + def = nullptr; + } + + is_being_used = false; + } + +private: + std::vector values; // early value + IR::Inst* def = nullptr; // late value + bool is_being_used = false; +}; + class RegAlloc final { public: explicit RegAlloc(BlockOfCode* code) : code(code) {} @@ -114,27 +172,6 @@ private: BlockOfCode* code = nullptr; - struct HostLocInfo { - std::vector values; // early value - IR::Inst* def = nullptr; // late value - bool is_being_used = false; - - bool IsIdle() const { - return !is_being_used; - } - bool IsScratch() const { - return is_being_used && !def && values.empty(); - } - bool IsUse() const { - return is_being_used && !def && !values.empty(); - } - bool IsDef() const { - return is_being_used && def && values.empty(); - } - bool IsUseDef() const { - return is_being_used && def && !values.empty(); - } - }; std::array hostloc_info; HostLocInfo& LocInfo(HostLoc loc) { DEBUG_ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); From aefe5504280bfcfa73fdfc4c217d57537a484449 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 24 Feb 2017 19:08:58 +0000 Subject: [PATCH 02/16] reg_alloc: Remove the Def concept from register allocator internals --- src/backend_x64/reg_alloc.cpp | 120 +++++++--------------------------- src/backend_x64/reg_alloc.h | 46 +++++-------- 2 files changed, 42 insertions(+), 124 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 5d2c49ab..f2eff0ef 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -42,85 +42,20 @@ static Xbyak::Reg HostLocToX64(HostLoc hostloc) { ASSERT_MSG(false, "This should never happen."); } -HostLoc RegAlloc::DefHostLocReg(IR::Inst* def_inst, HostLocList desired_locations) { - DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); - DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); - - HostLoc location = SelectARegister(desired_locations); - - if (IsRegisterOccupied(location)) { - SpillRegister(location); - } - - LocInfo(location).Lock(); - LocInfo(location).Def(def_inst); - - DEBUG_ASSERT(LocInfo(location).IsDef()); - return location; -} - void RegAlloc::RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst) { DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); if (use_inst.IsImmediate()) { - LoadImmediateIntoHostLocReg(use_inst, DefHostLocReg(def_inst, any_gpr)); + HostLoc location = ScratchHostLocReg(any_gpr); + DefineValue(def_inst, location); + LoadImmediateIntoHostLocReg(use_inst, location); return; } + use_inst.GetInst()->DecrementRemainingUses(); DEBUG_ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined"); HostLoc location = *ValueLocation(use_inst.GetInst()); - LocInfo(location).AddValue(def_inst); - use_inst.GetInst()->DecrementRemainingUses(); - DEBUG_ASSERT(LocInfo(location).IsIdle()); -} - -HostLoc RegAlloc::UseDefHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) { - if (!use_value.IsImmediate()) { - return UseDefHostLocReg(use_value.GetInst(), def_inst, desired_locations); - } - - return LoadImmediateIntoHostLocReg(use_value, DefHostLocReg(def_inst, desired_locations)); -} - -HostLoc RegAlloc::UseDefHostLocReg(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations) { - DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); - DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); - DEBUG_ASSERT_MSG(ValueLocation(use_inst), "use_inst has not been defined"); - - if (IsLastUse(use_inst)) { - HostLoc current_location = *ValueLocation(use_inst); - auto& loc_info = LocInfo(current_location); - if (loc_info.IsIdle()) { - loc_info.Lock(); - loc_info.Def(def_inst); - DEBUG_ASSERT(loc_info.IsUseDef()); - if (HostLocIsSpill(current_location)) { - HostLoc new_location = SelectARegister(desired_locations); - if (IsRegisterOccupied(new_location)) { - SpillRegister(new_location); - } - EmitMove(new_location, current_location); - LocInfo(new_location) = LocInfo(current_location); - LocInfo(current_location) = {}; - return new_location; - } else { - return current_location; - } - } - } - - bool is_floating_point = HostLocIsXMM(*desired_locations.begin()); - if (is_floating_point) { - DEBUG_ASSERT(use_inst->GetType() == IR::Type::F32 || use_inst->GetType() == IR::Type::F64); - } - HostLoc use_reg = UseHostLocReg(use_inst, is_floating_point ? any_xmm : any_gpr); - HostLoc def_reg = DefHostLocReg(def_inst, desired_locations); - if (is_floating_point) { - code->movapd(HostLocToXmm(def_reg), HostLocToXmm(use_reg)); - } else { - code->mov(HostLocToReg64(def_reg), HostLocToReg64(use_reg)); - } - return def_reg; + DefineValue(def_inst, location); } std::tuple RegAlloc::UseDefOpArgHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) { @@ -138,11 +73,12 @@ std::tuple RegAlloc::UseDefOpArgHostLocReg(IR::Value use_value, if (HostLocIsSpill(current_location)) { loc_info.Lock(); DEBUG_ASSERT(loc_info.IsUse()); - return std::make_tuple(SpillToOpArg(current_location), DefHostLocReg(def_inst, desired_locations)); + HostLoc location = ScratchHostLocReg(desired_locations); + DefineValue(def_inst, location); + return std::make_tuple(SpillToOpArg(current_location), location); } else { loc_info.Lock(); - loc_info.Def(def_inst); - DEBUG_ASSERT(loc_info.IsUseDef()); + DefineValue(def_inst, current_location); return std::make_tuple(HostLocToX64(current_location), current_location); } } @@ -150,7 +86,8 @@ std::tuple RegAlloc::UseDefOpArgHostLocReg(IR::Value use_value, } OpArg use_oparg = UseOpArg(use_value, any_gpr); - HostLoc def_reg = DefHostLocReg(def_inst, desired_locations); + HostLoc def_reg = ScratchHostLocReg(desired_locations); + DefineValue(def_inst, def_reg); return std::make_tuple(use_oparg, def_reg); } @@ -237,8 +174,7 @@ HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_l return new_location; } else if (HostLocIsRegister(current_location)) { ASSERT(LocInfo(current_location).IsIdle() - || LocInfo(current_location).IsUse() - || LocInfo(current_location).IsUseDef()); + || LocInfo(current_location).IsUse()); if (current_location != new_location) { EmitMove(new_location, current_location); @@ -289,7 +225,7 @@ void RegAlloc::HostCall(IR::Inst* result_def, IR::Value arg0_use, IR::Value arg1 // TODO: This works but almost certainly leads to suboptimal generated code. if (result_def) { - DefHostLocReg(result_def, {ABI_RETURN}); + DefineValue(result_def, ScratchHostLocReg({ABI_RETURN})); } else { ScratchHostLocReg({ABI_RETURN}); } @@ -352,6 +288,11 @@ bool RegAlloc::IsLastUse(const IR::Inst*) const { return false; } +void RegAlloc::DefineValue(IR::Inst* def_inst, HostLoc host_loc) { + DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); + LocInfo(host_loc).AddValue(def_inst); +} + void RegAlloc::SpillRegister(HostLoc loc) { ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled"); ASSERT_MSG(IsRegisterOccupied(loc), "There is no need to spill unoccupied registers"); @@ -424,25 +365,12 @@ std::tuple RegAlloc::UseHostLoc(IR::Inst* use_inst, HostLocList d HostLoc current_location = *ValueLocation(use_inst); auto iter = std::find(desired_locations.begin(), desired_locations.end(), current_location); if (iter != desired_locations.end()) { - if (LocInfo(current_location).IsDef()) { - HostLoc new_location = SelectARegister(desired_locations); - if (IsRegisterOccupied(new_location)) { - SpillRegister(new_location); - } - EmitMove(new_location, current_location); - LocInfo(new_location).Lock(); - LocInfo(new_location).AddValue(use_inst); - use_inst->DecrementRemainingUses(); - DEBUG_ASSERT(LocInfo(new_location).IsUse()); - return std::make_tuple(new_location, false); - } else { - bool was_being_used = LocInfo(current_location).IsLocked(); - ASSERT(LocInfo(current_location).IsUse() || LocInfo(current_location).IsIdle()); - LocInfo(current_location).Lock(); - use_inst->DecrementRemainingUses(); - DEBUG_ASSERT(LocInfo(current_location).IsUse()); - return std::make_tuple(current_location, was_being_used); - } + bool was_being_used = LocInfo(current_location).IsLocked(); + ASSERT(LocInfo(current_location).IsUse() || LocInfo(current_location).IsIdle()); + LocInfo(current_location).Lock(); + use_inst->DecrementRemainingUses(); + DEBUG_ASSERT(LocInfo(current_location).IsUse()); + return std::make_tuple(current_location, was_being_used); } if (HostLocIsSpill(current_location)) { diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index 0b1eea15..b45280ec 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -31,19 +31,13 @@ public: return is_being_used; } bool IsEmpty() const { - return !is_being_used && !def && values.empty(); + return !is_being_used && values.empty(); } bool IsScratch() const { - return is_being_used && !def && values.empty(); + return is_being_used && values.empty(); } bool IsUse() const { - return is_being_used && !def && !values.empty(); - } - bool IsDef() const { - return is_being_used && def && values.empty(); - } - bool IsUseDef() const { - return is_being_used && def && !values.empty(); + return is_being_used && !values.empty(); } bool ContainsValue(const IR::Inst* inst) const { @@ -56,27 +50,16 @@ public: void AddValue(IR::Inst* inst) { values.push_back(inst); } - void Def(IR::Inst* inst) { - ASSERT(!def); - def = inst; - } void EndOfAllocScope() { const auto to_erase = std::remove_if(values.begin(), values.end(), [](const auto& inst){ return !inst->HasUses(); }); values.erase(to_erase, values.end()); - if (def) { - ASSERT(values.empty()); - AddValue(def); - def = nullptr; - } - is_being_used = false; } private: - std::vector values; // early value - IR::Inst* def = nullptr; // late value + std::vector values; bool is_being_used = false; }; @@ -86,18 +69,26 @@ public: /// Late-def Xbyak::Reg64 DefGpr(IR::Inst* def_inst, HostLocList desired_locations = any_gpr) { - return HostLocToReg64(DefHostLocReg(def_inst, desired_locations)); + HostLoc location = ScratchHostLocReg(desired_locations); + DefineValue(def_inst, location); + return HostLocToReg64(location); } Xbyak::Xmm DefXmm(IR::Inst* def_inst, HostLocList desired_locations = any_xmm) { - return HostLocToXmm(DefHostLocReg(def_inst, desired_locations)); + HostLoc location = ScratchHostLocReg(desired_locations); + DefineValue(def_inst, location); + return HostLocToXmm(location); } void RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst); /// Early-use, Late-def Xbyak::Reg64 UseDefGpr(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr) { - return HostLocToReg64(UseDefHostLocReg(use_value, def_inst, desired_locations)); + HostLoc location = UseScratchHostLocReg(use_value, desired_locations); + DefineValue(def_inst, location); + return HostLocToReg64(location); } Xbyak::Xmm UseDefXmm(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_xmm) { - return HostLocToXmm(UseDefHostLocReg(use_value, def_inst, desired_locations)); + HostLoc location = UseScratchHostLocReg(use_value, desired_locations); + DefineValue(def_inst, location); + return HostLocToXmm(location); } std::tuple UseDefOpArgGpr(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr) { OpArg op; @@ -152,9 +143,8 @@ private: bool IsRegisterAllocated(HostLoc loc) const; bool IsLastUse(const IR::Inst* inst) const; - HostLoc DefHostLocReg(IR::Inst* def_inst, HostLocList desired_locations); - HostLoc UseDefHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations); - HostLoc UseDefHostLocReg(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations); + void DefineValue(IR::Inst* def_inst, HostLoc host_loc); + std::tuple UseDefOpArgHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations); HostLoc UseHostLocReg(IR::Value use_value, HostLocList desired_locations); HostLoc UseHostLocReg(IR::Inst* use_inst, HostLocList desired_locations); From 2b078152e74bd6c6f4d282b54bdd4edfe03ce9bd Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 24 Feb 2017 19:42:36 +0000 Subject: [PATCH 03/16] reg_alloc: Reimplement UseHostLocReg --- src/backend_x64/reg_alloc.cpp | 152 ++++++++++++++++++++++------------ src/backend_x64/reg_alloc.h | 6 +- 2 files changed, 104 insertions(+), 54 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index f2eff0ef..792e516f 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -42,6 +42,44 @@ static Xbyak::Reg HostLocToX64(HostLoc hostloc) { ASSERT_MSG(false, "This should never happen."); } +static bool IsSameHostLocClass(HostLoc a, HostLoc b) { + return (HostLocIsGPR(a) && HostLocIsGPR(b)) + || (HostLocIsXMM(a) && HostLocIsXMM(b)) + || (HostLocIsSpill(a) && HostLocIsSpill(b)); +} + +static void EmitMove(BlockOfCode* code, HostLoc to, HostLoc from) { + if (HostLocIsXMM(to) && HostLocIsXMM(from)) { + code->movaps(HostLocToXmm(to), HostLocToXmm(from)); + } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) { + code->mov(HostLocToReg64(to), HostLocToReg64(from)); + } else if (HostLocIsXMM(to) && HostLocIsGPR(from)) { + ASSERT_MSG(false, "TODO"); + } else if (HostLocIsGPR(to) && HostLocIsXMM(from)) { + ASSERT_MSG(false, "TODO"); + } else if (HostLocIsXMM(to) && HostLocIsSpill(from)) { + code->movsd(HostLocToXmm(to), SpillToOpArg(from)); + } else if (HostLocIsSpill(to) && HostLocIsXMM(from)) { + code->movsd(SpillToOpArg(to), HostLocToXmm(from)); + } else if (HostLocIsGPR(to) && HostLocIsSpill(from)) { + code->mov(HostLocToReg64(to), SpillToOpArg(from)); + } else if (HostLocIsSpill(to) && HostLocIsGPR(from)) { + code->mov(SpillToOpArg(to), HostLocToReg64(from)); + } else { + ASSERT_MSG(false, "Invalid RegAlloc::EmitMove"); + } +} + +static void EmitExchange(BlockOfCode* code, HostLoc a, HostLoc b) { + if (HostLocIsGPR(a) && HostLocIsGPR(b)) { + code->xchg(HostLocToReg64(a), HostLocToReg64(b)); + } else if (HostLocIsXMM(a) && HostLocIsXMM(b)) { + ASSERT_MSG(false, "Check your code: Exchanging XMM registers is unnecessary"); + } else { + ASSERT_MSG(false, "Invalid RegAlloc::EmitExchange"); + } +} + void RegAlloc::RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst) { DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); @@ -100,30 +138,29 @@ HostLoc RegAlloc::UseHostLocReg(IR::Value use_value, HostLocList desired_locatio } HostLoc RegAlloc::UseHostLocReg(IR::Inst* use_inst, HostLocList desired_locations) { - HostLoc current_location; - bool was_being_used; - std::tie(current_location, was_being_used) = UseHostLoc(use_inst, desired_locations); + use_inst->DecrementRemainingUses(); - if (HostLocIsRegister(current_location)) { + const HostLoc current_location = *ValueLocation(use_inst); + + const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end(); + if (can_use_current_location) { + LocInfo(current_location).Lock(); return current_location; - } else if (HostLocIsSpill(current_location)) { - HostLoc new_location = SelectARegister(desired_locations); - if (IsRegisterOccupied(new_location)) { - SpillRegister(new_location); - } - EmitMove(new_location, current_location); - if (!was_being_used) { - LocInfo(new_location) = LocInfo(current_location); - LocInfo(current_location) = {}; - DEBUG_ASSERT(LocInfo(new_location).IsUse()); - } else { - LocInfo(new_location).Lock(); - DEBUG_ASSERT(LocInfo(new_location).IsScratch()); - } - return new_location; } - ASSERT_MSG(false, "Unknown current_location type"); + if (LocInfo(current_location).IsLocked()) { + return UseScratchHostLocReg(use_inst, desired_locations); + } + + const HostLoc destination_location = SelectARegister(desired_locations); + if (IsSameHostLocClass(destination_location, current_location)) { + Exchange(destination_location, current_location); + } else { + MoveOutOfTheWay(destination_location); + Move(destination_location, current_location); + } + LocInfo(destination_location).Lock(); + return destination_location; } OpArg RegAlloc::UseOpArg(IR::Value use_value, HostLocList desired_locations) { @@ -167,7 +204,7 @@ HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_l } if (HostLocIsSpill(current_location)) { - EmitMove(new_location, current_location); + EmitMove(code, new_location, current_location); LocInfo(new_location).Lock(); use_inst->DecrementRemainingUses(); DEBUG_ASSERT(LocInfo(new_location).IsScratch()); @@ -177,7 +214,7 @@ HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_l || LocInfo(current_location).IsUse()); if (current_location != new_location) { - EmitMove(new_location, current_location); + EmitMove(code, new_location, current_location); } else { ASSERT(LocInfo(current_location).IsIdle()); } @@ -300,7 +337,7 @@ void RegAlloc::SpillRegister(HostLoc loc) { HostLoc new_loc = FindFreeSpill(); - EmitMove(new_loc, loc); + EmitMove(code, new_loc, loc); LocInfo(new_loc) = LocInfo(loc); LocInfo(loc) = {}; @@ -330,34 +367,6 @@ void RegAlloc::Reset() { hostloc_info.fill({}); } -void RegAlloc::EmitMove(HostLoc to, HostLoc from) { - if (HostLocIsXMM(to) && HostLocIsSpill(from)) { - code->movsd(HostLocToXmm(to), SpillToOpArg(from)); - } else if (HostLocIsSpill(to) && HostLocIsXMM(from)) { - code->movsd(SpillToOpArg(to), HostLocToXmm(from)); - } else if (HostLocIsXMM(to) && HostLocIsXMM(from)) { - code->movaps(HostLocToXmm(to), HostLocToXmm(from)); - } else if (HostLocIsGPR(to) && HostLocIsSpill(from)) { - code->mov(HostLocToReg64(to), SpillToOpArg(from)); - } else if (HostLocIsSpill(to) && HostLocIsGPR(from)) { - code->mov(SpillToOpArg(to), HostLocToReg64(from)); - } else if (HostLocIsGPR(to) && HostLocIsGPR(from)){ - code->mov(HostLocToReg64(to), HostLocToReg64(from)); - } else { - ASSERT_MSG(false, "Invalid RegAlloc::EmitMove"); - } -} - -void RegAlloc::EmitExchange(HostLoc a, HostLoc b) { - if (HostLocIsGPR(a) && HostLocIsGPR(b)) { - code->xchg(HostLocToReg64(a), HostLocToReg64(b)); - } else if (HostLocIsXMM(a) && HostLocIsXMM(b)) { - ASSERT_MSG(false, "Exchange is unnecessary for XMM registers"); - } else { - ASSERT_MSG(false, "Invalid RegAlloc::EmitExchange"); - } -} - std::tuple RegAlloc::UseHostLoc(IR::Inst* use_inst, HostLocList desired_locations) { DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); DEBUG_ASSERT_MSG(ValueLocation(use_inst), "use_inst has not been defined"); @@ -382,7 +391,7 @@ std::tuple RegAlloc::UseHostLoc(IR::Inst* use_inst, HostLocList d } else if (HostLocIsRegister(current_location)) { HostLoc new_location = SelectARegister(desired_locations); ASSERT(LocInfo(current_location).IsIdle()); - EmitExchange(new_location, current_location); + EmitExchange(code, new_location, current_location); std::swap(LocInfo(new_location), LocInfo(current_location)); LocInfo(new_location).Lock(); use_inst->DecrementRemainingUses(); @@ -407,5 +416,44 @@ HostLoc RegAlloc::LoadImmediateIntoHostLocReg(IR::Value imm, HostLoc host_loc) { return host_loc; } +void RegAlloc::Move(HostLoc to, HostLoc from) { + ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked()); + + if (LocInfo(from).IsEmpty()) { + return; + } + + LocInfo(to) = LocInfo(from); + LocInfo(from) = {}; + + EmitMove(code, to, from); +} + +void RegAlloc::Exchange(HostLoc a, HostLoc b) { + ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked()); + + if (LocInfo(a).IsEmpty()) { + Move(a, b); + return; + } + + if (LocInfo(b).IsEmpty()) { + Move(b, a); + return; + } + + std::swap(LocInfo(a), LocInfo(b)); + + EmitExchange(code, a, b); +} + +void RegAlloc::MoveOutOfTheWay(HostLoc reg) { + ASSERT(!LocInfo(reg).IsLocked()); + if (IsRegisterOccupied(reg)) { + SpillRegister(reg); + } +} + + } // namespace BackendX64 } // namespace Dynarmic diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index b45280ec..7f381508 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -153,10 +153,12 @@ private: HostLoc UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_locations); HostLoc ScratchHostLocReg(HostLocList desired_locations); - void EmitMove(HostLoc to, HostLoc from); - void EmitExchange(HostLoc a, HostLoc b); HostLoc LoadImmediateIntoHostLocReg(IR::Value imm, HostLoc reg); + void Move(HostLoc to, HostLoc from); + void Exchange(HostLoc a, HostLoc b); + void MoveOutOfTheWay(HostLoc reg); + void SpillRegister(HostLoc loc); HostLoc FindFreeSpill() const; From e1d8238c50bac2c7c00bba6d40a8e141910e83d0 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 24 Feb 2017 19:46:32 +0000 Subject: [PATCH 04/16] reg_alloc: Stub UseOpArg --- src/backend_x64/reg_alloc.cpp | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 792e516f..1d5f75eb 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -169,19 +169,8 @@ OpArg RegAlloc::UseOpArg(IR::Value use_value, HostLocList desired_locations) { return {}; // return a None } - IR::Inst* use_inst = use_value.GetInst(); - - HostLoc current_location; - bool was_being_used; - std::tie(current_location, was_being_used) = UseHostLoc(use_inst, desired_locations); - - if (HostLocIsRegister(current_location)) { - return HostLocToX64(current_location); - } else if (HostLocIsSpill(current_location)) { - return SpillToOpArg(current_location); - } - - ASSERT_MSG(false, "Unknown current_location type"); + // TODO: Reimplement properly + return HostLocToX64(UseHostLocReg(use_value.GetInst(), desired_locations)); } HostLoc RegAlloc::UseScratchHostLocReg(IR::Value use_value, HostLocList desired_locations) { From 9518bbe06ececdb08b94dcd4748965e22198a436 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 24 Feb 2017 19:58:16 +0000 Subject: [PATCH 05/16] reg_alloc: Reimplement UseScratchHostLocReg --- src/backend_x64/reg_alloc.cpp | 50 +++++++++++++---------------------- src/backend_x64/reg_alloc.h | 1 + 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 1d5f75eb..c5d4c199 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -182,40 +182,22 @@ HostLoc RegAlloc::UseScratchHostLocReg(IR::Value use_value, HostLocList desired_ } HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_locations) { - DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); - DEBUG_ASSERT_MSG(ValueLocation(use_inst), "use_inst has not been defined"); - ASSERT_MSG(use_inst->HasUses(), "use_inst ran out of uses. (Use-d an IR::Inst* too many times)"); + use_inst->DecrementRemainingUses(); - HostLoc current_location = *ValueLocation(use_inst); - HostLoc new_location = SelectARegister(desired_locations); - if (IsRegisterOccupied(new_location)) { - SpillRegister(new_location); + const HostLoc current_location = *ValueLocation(use_inst); + + const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end(); + if (can_use_current_location && !LocInfo(current_location).IsLocked()) { + MoveOutOfTheWay(current_location); + LocInfo(current_location).Lock(); + return current_location; } - if (HostLocIsSpill(current_location)) { - EmitMove(code, new_location, current_location); - LocInfo(new_location).Lock(); - use_inst->DecrementRemainingUses(); - DEBUG_ASSERT(LocInfo(new_location).IsScratch()); - return new_location; - } else if (HostLocIsRegister(current_location)) { - ASSERT(LocInfo(current_location).IsIdle() - || LocInfo(current_location).IsUse()); - - if (current_location != new_location) { - EmitMove(code, new_location, current_location); - } else { - ASSERT(LocInfo(current_location).IsIdle()); - } - - LocInfo(new_location) = {}; - LocInfo(new_location).Lock(); - use_inst->DecrementRemainingUses(); - DEBUG_ASSERT(LocInfo(new_location).IsScratch()); - return new_location; - } - - ASSERT_MSG(false, "Invalid current_location"); + const HostLoc destination_location = SelectARegister(desired_locations); + MoveOutOfTheWay(destination_location); + CopyToScratch(destination_location, current_location); + LocInfo(destination_location).Lock(); + return destination_location; } HostLoc RegAlloc::ScratchHostLocReg(HostLocList desired_locations) { @@ -418,6 +400,12 @@ void RegAlloc::Move(HostLoc to, HostLoc from) { EmitMove(code, to, from); } +void RegAlloc::CopyToScratch(HostLoc to, HostLoc from) { + ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty()); + + EmitMove(code, to, from); +} + void RegAlloc::Exchange(HostLoc a, HostLoc b) { ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked()); diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index 7f381508..ab6ff8ec 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -156,6 +156,7 @@ private: HostLoc LoadImmediateIntoHostLocReg(IR::Value imm, HostLoc reg); void Move(HostLoc to, HostLoc from); + void CopyToScratch(HostLoc to, HostLoc from); void Exchange(HostLoc a, HostLoc b); void MoveOutOfTheWay(HostLoc reg); From 640faab8a74ddb6347868b021621f978d5228024 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 24 Feb 2017 19:59:34 +0000 Subject: [PATCH 06/16] reg_alloc: UseHostLoc is no longer necessary --- src/backend_x64/reg_alloc.cpp | 36 ----------------------------------- src/backend_x64/reg_alloc.h | 1 - 2 files changed, 37 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index c5d4c199..987c9593 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -338,42 +338,6 @@ void RegAlloc::Reset() { hostloc_info.fill({}); } -std::tuple RegAlloc::UseHostLoc(IR::Inst* use_inst, HostLocList desired_locations) { - DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); - DEBUG_ASSERT_MSG(ValueLocation(use_inst), "use_inst has not been defined"); - - HostLoc current_location = *ValueLocation(use_inst); - auto iter = std::find(desired_locations.begin(), desired_locations.end(), current_location); - if (iter != desired_locations.end()) { - bool was_being_used = LocInfo(current_location).IsLocked(); - ASSERT(LocInfo(current_location).IsUse() || LocInfo(current_location).IsIdle()); - LocInfo(current_location).Lock(); - use_inst->DecrementRemainingUses(); - DEBUG_ASSERT(LocInfo(current_location).IsUse()); - return std::make_tuple(current_location, was_being_used); - } - - if (HostLocIsSpill(current_location)) { - bool was_being_used = LocInfo(current_location).IsLocked(); - LocInfo(current_location).Lock(); - use_inst->DecrementRemainingUses(); - DEBUG_ASSERT(LocInfo(current_location).IsUse()); - return std::make_tuple(current_location, was_being_used); - } else if (HostLocIsRegister(current_location)) { - HostLoc new_location = SelectARegister(desired_locations); - ASSERT(LocInfo(current_location).IsIdle()); - EmitExchange(code, new_location, current_location); - std::swap(LocInfo(new_location), LocInfo(current_location)); - LocInfo(new_location).Lock(); - use_inst->DecrementRemainingUses(); - DEBUG_ASSERT(LocInfo(new_location).IsUse()); - return std::make_tuple(new_location, false); - } - - ASSERT_MSG(false, "Invalid current_location"); - return std::make_tuple(static_cast(-1), false); -} - HostLoc RegAlloc::LoadImmediateIntoHostLocReg(IR::Value imm, HostLoc host_loc) { ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate"); diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index ab6ff8ec..88cb2bdf 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -148,7 +148,6 @@ private: std::tuple UseDefOpArgHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations); HostLoc UseHostLocReg(IR::Value use_value, HostLocList desired_locations); HostLoc UseHostLocReg(IR::Inst* use_inst, HostLocList desired_locations); - std::tuple UseHostLoc(IR::Inst* use_inst, HostLocList desired_locations); HostLoc UseScratchHostLocReg(IR::Value use_value, HostLocList desired_locations); HostLoc UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_locations); HostLoc ScratchHostLocReg(HostLocList desired_locations); From 1ee4c07f1450bb2acf2cfbd99441fa9672a56c3b Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 24 Feb 2017 20:01:41 +0000 Subject: [PATCH 07/16] reg_alloc: Reimplement ScratchHostLocReg --- src/backend_x64/reg_alloc.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 987c9593..590c1575 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -201,18 +201,9 @@ HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_l } HostLoc RegAlloc::ScratchHostLocReg(HostLocList desired_locations) { - DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); - HostLoc location = SelectARegister(desired_locations); - - if (IsRegisterOccupied(location)) { - SpillRegister(location); - } - - // Update state + MoveOutOfTheWay(location); LocInfo(location).Lock(); - - DEBUG_ASSERT(LocInfo(location).IsScratch()); return location; } From 6c3df057fade62a4f7d00a63657629a67eb3ad8f Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 24 Feb 2017 20:14:02 +0000 Subject: [PATCH 08/16] reg_alloc: Remove unused functions --- src/backend_x64/reg_alloc.cpp | 52 +++++------------------------------ src/backend_x64/reg_alloc.h | 13 ++------- 2 files changed, 9 insertions(+), 56 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 590c1575..efb31e46 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -101,27 +101,7 @@ std::tuple RegAlloc::UseDefOpArgHostLocReg(IR::Value use_value, DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); DEBUG_ASSERT_MSG(use_value.IsImmediate() || ValueLocation(use_value.GetInst()), "use_inst has not been defined"); - if (!use_value.IsImmediate()) { - const IR::Inst* use_inst = use_value.GetInst(); - - if (IsLastUse(use_inst)) { - HostLoc current_location = *ValueLocation(use_inst); - auto& loc_info = LocInfo(current_location); - if (!loc_info.IsIdle()) { - if (HostLocIsSpill(current_location)) { - loc_info.Lock(); - DEBUG_ASSERT(loc_info.IsUse()); - HostLoc location = ScratchHostLocReg(desired_locations); - DefineValue(def_inst, location); - return std::make_tuple(SpillToOpArg(current_location), location); - } else { - loc_info.Lock(); - DefineValue(def_inst, current_location); - return std::make_tuple(HostLocToX64(current_location), current_location); - } - } - } - } + // TODO: IsLastUse optimization OpArg use_oparg = UseOpArg(use_value, any_gpr); HostLoc def_reg = ScratchHostLocReg(desired_locations); @@ -247,7 +227,7 @@ HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const { // Find all locations that have not been allocated.. auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){ - return !this->IsRegisterAllocated(loc); + return !this->LocInfo(loc).IsLocked(); }); candidates.erase(allocated_locs, candidates.end()); ASSERT_MSG(!candidates.empty(), "All candidate registers have already been allocated"); @@ -256,7 +236,7 @@ HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const { // TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible. std::partition(candidates.begin(), candidates.end(), [this](auto loc){ - return !this->IsRegisterOccupied(loc); + return this->LocInfo(loc).IsEmpty(); }); return candidates.front(); @@ -270,23 +250,6 @@ boost::optional RegAlloc::ValueLocation(const IR::Inst* value) const { return boost::none; } -bool RegAlloc::IsRegisterOccupied(HostLoc loc) const { - const auto& info = LocInfo(loc); - - return !info.IsEmpty(); -} - -bool RegAlloc::IsRegisterAllocated(HostLoc loc) const { - return !LocInfo(loc).IsIdle(); -} - -bool RegAlloc::IsLastUse(const IR::Inst*) const { - //if (inst->UseCount() > 1) - // return false; - //return LocInfo(*ValueLocation(inst)).values.size() == 1; - return false; -} - void RegAlloc::DefineValue(IR::Inst* def_inst, HostLoc host_loc) { DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); LocInfo(host_loc).AddValue(def_inst); @@ -294,8 +257,8 @@ void RegAlloc::DefineValue(IR::Inst* def_inst, HostLoc host_loc) { void RegAlloc::SpillRegister(HostLoc loc) { ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled"); - ASSERT_MSG(IsRegisterOccupied(loc), "There is no need to spill unoccupied registers"); - ASSERT_MSG(!IsRegisterAllocated(loc), "Registers that have been allocated must not be spilt"); + ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers"); + ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt"); HostLoc new_loc = FindFreeSpill(); @@ -307,7 +270,7 @@ void RegAlloc::SpillRegister(HostLoc loc) { HostLoc RegAlloc::FindFreeSpill() const { for (size_t i = 0; i < SpillCount; i++) - if (!IsRegisterOccupied(HostLocSpill(i))) + if (LocInfo(HostLocSpill(i)).IsEmpty()) return HostLocSpill(i); ASSERT_MSG(false, "All spill locations are full"); @@ -381,11 +344,10 @@ void RegAlloc::Exchange(HostLoc a, HostLoc b) { void RegAlloc::MoveOutOfTheWay(HostLoc reg) { ASSERT(!LocInfo(reg).IsLocked()); - if (IsRegisterOccupied(reg)) { + if (!LocInfo(reg).IsEmpty()) { SpillRegister(reg); } } - } // namespace BackendX64 } // namespace Dynarmic diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index 88cb2bdf..360f8a9f 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -24,20 +24,14 @@ namespace BackendX64 { struct HostLocInfo { public: - bool IsIdle() const { - return !is_being_used; - } bool IsLocked() const { return is_being_used; } bool IsEmpty() const { return !is_being_used && values.empty(); } - bool IsScratch() const { - return is_being_used && values.empty(); - } - bool IsUse() const { - return is_being_used && !values.empty(); + bool IsLastUse() const { + return !is_being_used && std::all_of(values.begin(), values.end(), [](const auto& inst) { return !inst->HasUses(); }); } bool ContainsValue(const IR::Inst* inst) const { @@ -139,9 +133,6 @@ public: private: HostLoc SelectARegister(HostLocList desired_locations) const; boost::optional ValueLocation(const IR::Inst* value) const; - bool IsRegisterOccupied(HostLoc loc) const; - bool IsRegisterAllocated(HostLoc loc) const; - bool IsLastUse(const IR::Inst* inst) const; void DefineValue(IR::Inst* def_inst, HostLoc host_loc); From 13ac0c234e114241515f0d937060fb8283cf7e31 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 24 Feb 2017 20:19:50 +0000 Subject: [PATCH 09/16] reg_alloc: Differentiate between ReadLock and WriteLock --- src/backend_x64/reg_alloc.cpp | 16 ++++++---------- src/backend_x64/reg_alloc.h | 10 +++++++++- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index efb31e46..6f9a1f6d 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -124,7 +124,7 @@ HostLoc RegAlloc::UseHostLocReg(IR::Inst* use_inst, HostLocList desired_location const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end(); if (can_use_current_location) { - LocInfo(current_location).Lock(); + LocInfo(current_location).ReadLock(); return current_location; } @@ -139,7 +139,7 @@ HostLoc RegAlloc::UseHostLocReg(IR::Inst* use_inst, HostLocList desired_location MoveOutOfTheWay(destination_location); Move(destination_location, current_location); } - LocInfo(destination_location).Lock(); + LocInfo(destination_location).ReadLock(); return destination_location; } @@ -169,21 +169,21 @@ HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_l const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end(); if (can_use_current_location && !LocInfo(current_location).IsLocked()) { MoveOutOfTheWay(current_location); - LocInfo(current_location).Lock(); + LocInfo(current_location).WriteLock(); return current_location; } const HostLoc destination_location = SelectARegister(desired_locations); MoveOutOfTheWay(destination_location); CopyToScratch(destination_location, current_location); - LocInfo(destination_location).Lock(); + LocInfo(destination_location).WriteLock(); return destination_location; } HostLoc RegAlloc::ScratchHostLocReg(HostLocList desired_locations) { HostLoc location = SelectARegister(desired_locations); MoveOutOfTheWay(location); - LocInfo(location).Lock(); + LocInfo(location).WriteLock(); return location; } @@ -261,11 +261,7 @@ void RegAlloc::SpillRegister(HostLoc loc) { ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt"); HostLoc new_loc = FindFreeSpill(); - - EmitMove(code, new_loc, loc); - - LocInfo(new_loc) = LocInfo(loc); - LocInfo(loc) = {}; + Move(new_loc, loc); } HostLoc RegAlloc::FindFreeSpill() const { diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index 360f8a9f..3bbc5a04 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -38,9 +38,15 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } - void Lock() { + void ReadLock() { + ASSERT(!is_scratch); is_being_used = true; } + void WriteLock() { + ASSERT(!is_being_used); + is_being_used = true; + is_scratch = true; + } void AddValue(IR::Inst* inst) { values.push_back(inst); } @@ -50,11 +56,13 @@ public: values.erase(to_erase, values.end()); is_being_used = false; + is_scratch = false; } private: std::vector values; bool is_being_used = false; + bool is_scratch = false; }; class RegAlloc final { From f883bad2cc9d97737e6cb422f0393a868474aab7 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 24 Feb 2017 21:09:12 +0000 Subject: [PATCH 10/16] reg_alloc: New register allocation interface --- src/backend_x64/reg_alloc.cpp | 45 +++++++++++++++++++ src/backend_x64/reg_alloc.h | 82 +++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 6f9a1f6d..46e003d9 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -22,6 +22,8 @@ static u64 ImmediateToU64(const IR::Value& imm) { return u64(imm.GetU1()); case IR::Type::U8: return u64(imm.GetU8()); + case IR::Type::U16: + return u64(imm.GetU16()); case IR::Type::U32: return u64(imm.GetU32()); case IR::Type::U64: @@ -80,6 +82,49 @@ static void EmitExchange(BlockOfCode* code, HostLoc a, HostLoc b) { } } +u8 Argument::GetImmediateU8() const { + u64 imm = ImmediateToU64(value); + ASSERT(imm < 0x100); + return u8(imm); +} + +u16 Argument::GetImmediateU16() const { + u64 imm = ImmediateToU64(value); + ASSERT(imm < 0x10000); + return u16(imm); +} + +u32 Argument::GetImmediateU32() const { + u64 imm = ImmediateToU64(value); + ASSERT(imm < 0x100000000); + return u32(imm); +} + +u64 Argument::GetImmediateU64() const { + return ImmediateToU64(value); +} + +bool Argument::IsInGpr() const { + return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst())); +} + +bool Argument::IsInXmm() const { + return HostLocIsXMM(*reg_alloc.ValueLocation(value.GetInst())); +} + +bool Argument::IsInMemory() const { + return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst())); +} + +std::array RegAlloc::GetArgumentInfo(IR::Inst* inst) { + std::array ret = { Argument{*this}, Argument{*this}, Argument{*this}}; + for (size_t i = 0; i < inst->NumArgs(); i++) { + IR::Value arg = inst->GetArg(i); + ret[i].value = arg; + } + return ret; +} + void RegAlloc::RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst) { DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index 3bbc5a04..cbc3e56c 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -22,6 +22,8 @@ namespace Dynarmic { namespace BackendX64 { +class RegAlloc; + struct HostLocInfo { public: bool IsLocked() const { @@ -65,10 +67,88 @@ private: bool is_scratch = false; }; +struct Argument { +public: + IR::Type GetType() const { + return value.GetType(); + } + bool IsImmediate() const { + return value.IsImmediate(); + } + + u8 GetImmediateU8() const; + u16 GetImmediateU16() const; + u32 GetImmediateU32() const; + u64 GetImmediateU64() const; + + /// Is this value currently in a GPR? + bool IsInGpr() const; + /// Is this value currently in a XMM? + bool IsInXmm() const; + /// Is this value currently in memory? + bool IsInMemory() const; + +private: + friend class RegAlloc; + Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {} + + bool allocated = false; + RegAlloc& reg_alloc; + IR::Value value; +}; + class RegAlloc final { public: explicit RegAlloc(BlockOfCode* code) : code(code) {} + std::array GetArgumentInfo(IR::Inst* inst); + + Xbyak::Reg64 UseGpr(Argument& arg) { + ASSERT(!arg.allocated); + arg.allocated = true; + return HostLocToReg64(UseHostLocReg(arg.value, any_gpr)); + } + Xbyak::Xmm UseXmm(Argument& arg) { + ASSERT(!arg.allocated); + arg.allocated = true; + return HostLocToXmm(UseHostLocReg(arg.value, any_xmm)); + } + void Use(Argument& arg, HostLoc host_loc) { + ASSERT(!arg.allocated); + arg.allocated = true; + UseHostLocReg(arg.value, {host_loc}); + } + + Xbyak::Reg64 UseScratchGpr(Argument& arg) { + ASSERT(!arg.allocated); + arg.allocated = true; + return HostLocToReg64(UseScratchHostLocReg(arg.value, any_gpr)); + } + Xbyak::Xmm UseScratchXmm(Argument& arg) { + ASSERT(!arg.allocated); + arg.allocated = true; + return HostLocToXmm(UseScratchHostLocReg(arg.value, any_xmm)); + } + void UseScratch(Argument& arg, HostLoc host_loc) { + ASSERT(!arg.allocated); + arg.allocated = true; + UseScratchHostLocReg(arg.value, {host_loc}); + } + + void DefineValue(IR::Inst* inst, const Xbyak::Reg64& reg) { + HostLoc hostloc = static_cast(reg.getIdx() + static_cast(HostLoc::RAX)); + DefineValue(inst, hostloc); + } + void DefineValue(IR::Inst* inst, const Xbyak::Xmm& reg) { + HostLoc hostloc = static_cast(reg.getIdx() + static_cast(HostLoc::XMM0)); + DefineValue(inst, hostloc); + } + void DefineValue(IR::Inst* inst, Argument& arg) { + ASSERT(!arg.allocated); + arg.allocated = true; + RegisterAddDef(inst, arg.value); + } + /// Late-def Xbyak::Reg64 DefGpr(IR::Inst* def_inst, HostLocList desired_locations = any_gpr) { HostLoc location = ScratchHostLocReg(desired_locations); @@ -139,6 +219,8 @@ public: void Reset(); private: + friend struct Argument; + HostLoc SelectARegister(HostLocList desired_locations) const; boost::optional ValueLocation(const IR::Inst* value) const; From 08a467bf9af384925acd6b9e6733bed56430972a Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 24 Feb 2017 21:25:31 +0000 Subject: [PATCH 11/16] emit_x64: Port to new register allocator interface --- src/backend_x64/emit_x64.cpp | 1608 ++++++++++++++++----------------- src/backend_x64/reg_alloc.cpp | 8 +- src/backend_x64/reg_alloc.h | 13 +- 3 files changed, 796 insertions(+), 833 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 56149d43..b433391c 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -118,85 +118,96 @@ void EmitX64::EmitBreakpoint(RegAlloc&, IR::Block&, IR::Inst*) { } void EmitX64::EmitIdentity(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - if (!inst->GetArg(0).IsImmediate()) { - reg_alloc.RegisterAddDef(inst, inst->GetArg(0)); + auto args = reg_alloc.GetArgumentInfo(inst); + if (!args[0].IsImmediate()) { + reg_alloc.DefineValue(inst, args[0]); } } void EmitX64::EmitGetRegister(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Arm::Reg reg = inst->GetArg(0).GetRegRef(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateReg(reg)); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitGetExtendedRegister32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); ASSERT(Arm::IsSingleExtReg(reg)); - Xbyak::Xmm result = reg_alloc.DefXmm(inst); + Xbyak::Xmm result = reg_alloc.ScratchXmm(); code->movss(result, MJitStateExtReg(reg)); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitGetExtendedRegister64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); ASSERT(Arm::IsDoubleExtReg(reg)); - Xbyak::Xmm result = reg_alloc.DefXmm(inst); + + Xbyak::Xmm result = reg_alloc.ScratchXmm(); code->movsd(result, MJitStateExtReg(reg)); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetRegister(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); Arm::Reg reg = inst->GetArg(0).GetRegRef(); - IR::Value arg = inst->GetArg(1); - if (arg.IsImmediate()) { - code->mov(MJitStateReg(reg), arg.GetU32()); + if (args[1].IsImmediate()) { + code->mov(MJitStateReg(reg), args[1].GetImmediateU32()); } else { - Xbyak::Reg32 to_store = reg_alloc.UseGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseGpr(args[1]).cvt32(); code->mov(MJitStateReg(reg), to_store); } } void EmitX64::EmitSetExtendedRegister32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); ASSERT(Arm::IsSingleExtReg(reg)); - Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(1)); + Xbyak::Xmm source = reg_alloc.UseXmm(args[1]); code->movss(MJitStateExtReg(reg), source); } void EmitX64::EmitSetExtendedRegister64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); ASSERT(Arm::IsDoubleExtReg(reg)); - Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(1)); + Xbyak::Xmm source = reg_alloc.UseXmm(args[1]); code->movsd(MJitStateExtReg(reg), source); } void EmitX64::EmitGetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 arg = reg_alloc.UseGpr(inst->GetArg(0)).cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 arg = reg_alloc.UseGpr(args[0]).cvt32(); code->mov(MJitStateCpsr(), arg); } void EmitX64::EmitGetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); code->shr(result, 31); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 31; constexpr u32 flag_mask = 1u << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - if (arg.GetU1()) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { code->or_(MJitStateCpsr(), flag_mask); } else { code->and_(MJitStateCpsr(), ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(MJitStateCpsr(), ~flag_mask); @@ -205,24 +216,25 @@ void EmitX64::EmitSetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); code->shr(result, 30); code->and_(result, 1); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 30; constexpr u32 flag_mask = 1u << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - if (arg.GetU1()) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { code->or_(MJitStateCpsr(), flag_mask); } else { code->and_(MJitStateCpsr(), ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(MJitStateCpsr(), ~flag_mask); @@ -231,24 +243,25 @@ void EmitX64::EmitSetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); code->shr(result, 29); code->and_(result, 1); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 29; constexpr u32 flag_mask = 1u << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - if (arg.GetU1()) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { code->or_(MJitStateCpsr(), flag_mask); } else { code->and_(MJitStateCpsr(), ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(MJitStateCpsr(), ~flag_mask); @@ -257,24 +270,25 @@ void EmitX64::EmitSetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); code->shr(result, 28); code->and_(result, 1); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 28; constexpr u32 flag_mask = 1u << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - if (arg.GetU1()) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { code->or_(MJitStateCpsr(), flag_mask); } else { code->and_(MJitStateCpsr(), ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(MJitStateCpsr(), ~flag_mask); @@ -285,12 +299,12 @@ void EmitX64::EmitSetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 27; constexpr u32 flag_mask = 1u << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - if (arg.GetU1()) + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) code->or_(MJitStateCpsr(), flag_mask); } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->or_(MJitStateCpsr(), to_store); @@ -298,22 +312,23 @@ void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateCpsr()); code->shr(result, 16); code->and_(result, 0xF); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { constexpr size_t flag_bit = 16; constexpr u32 flag_mask = 0xFu << flag_bit; - IR::Value arg = inst->GetArg(0); - if (arg.IsImmediate()) { - u32 imm = (arg.GetU32() << flag_bit) & flag_mask; + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate()) { + u32 imm = (args[0].GetImmediateU32() << flag_bit) & flag_mask; code->and_(MJitStateCpsr(), ~flag_mask); code->or_(MJitStateCpsr(), imm); } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(to_store, flag_mask); @@ -323,8 +338,10 @@ void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); + auto& arg = args[0]; + const u32 T_bit = 1 << 5; - auto arg = inst->GetArg(0); // Pseudocode: // if (new_pc & 1) { @@ -336,7 +353,7 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { // } if (arg.IsImmediate()) { - u32 new_pc = arg.GetU32(); + u32 new_pc = arg.GetImmediateU32(); if (Common::Bit<0>(new_pc)) { new_pc &= 0xFFFFFFFE; code->mov(MJitStateReg(Arm::Reg::PC), new_pc); @@ -368,9 +385,8 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitCallSupervisor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto imm32 = inst->GetArg(0); - - reg_alloc.HostCall(nullptr, imm32); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.HostCall(nullptr, args[0]); code->SwitchMxcsrOnExit(); code->CallFunction(cb.CallSVC); @@ -395,9 +411,8 @@ static void SetFpscrImpl(u32 value, JitState* jit_state) { } void EmitX64::EmitSetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto a = inst->GetArg(0); - - reg_alloc.HostCall(nullptr, a); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.HostCall(nullptr, args[0]); code->mov(code->ABI_PARAM2, code->r15); code->SwitchMxcsrOnExit(); @@ -408,15 +423,16 @@ void EmitX64::EmitSetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { void EmitX64::EmitGetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { using namespace Xbyak::util; - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->mov(result, dword[r15 + offsetof(JitState, FPSCR_nzcv)]); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { using namespace Xbyak::util; - Xbyak::Reg32 value = reg_alloc.UseGpr(inst->GetArg(0)).cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 value = reg_alloc.UseGpr(args[0]).cvt32(); code->mov(dword[r15 + offsetof(JitState, FPSCR_nzcv)], value); } @@ -424,8 +440,9 @@ void EmitX64::EmitSetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) void EmitX64::EmitPushRSB(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { using namespace Xbyak::util; - ASSERT(inst->GetArg(0).IsImmediate()); - u64 unique_hash_of_target = inst->GetArg(0).GetU64(); + auto args = reg_alloc.GetArgumentInfo(inst); + ASSERT(args[0].IsImmediate()); + u64 unique_hash_of_target = args[0].GetImmediateU64(); auto iter = block_descriptors.find(unique_hash_of_target); CodePtr target_code_ptr = iter != block_descriptors.end() @@ -470,121 +487,123 @@ void EmitX64::EmitGetGEFromOp(RegAlloc&, IR::Block&, IR::Inst*) { } void EmitX64::EmitPack2x32To1x64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg lo; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - lo = result.cvt32(); - } else { - std::tie(lo, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - lo.setBit(32); - Xbyak::Reg64 hi = reg_alloc.UseScratchGpr(inst->GetArg(1)); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 lo = reg_alloc.UseScratchGpr(args[0]); + Xbyak::Reg64 hi = reg_alloc.UseScratchGpr(args[1]); code->shl(hi, 32); - code->mov(result.cvt32(), *lo); // Zero extend to 64-bits - code->or_(result, hi); + code->mov(lo.cvt32(), lo.cvt32()); // Zero extend to 64-bits + code->or_(lo, hi); + + reg_alloc.DefineValue(inst, lo); } void EmitX64::EmitLeastSignificantWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - reg_alloc.RegisterAddDef(inst, inst->GetArg(0)); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.DefineValue(inst, args[0]); } void EmitX64::EmitMostSignificantWord(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); - Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); code->shr(result, 32); + reg_alloc.DefineValue(inst, result); + auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); if (carry_inst) { EraseInstruction(block, carry_inst); - - Xbyak::Reg64 carry = reg_alloc.DefGpr(carry_inst); - + Xbyak::Reg64 carry = reg_alloc.ScratchGpr(); code->setc(carry.cvt8()); + reg_alloc.DefineValue(carry_inst, carry); } } void EmitX64::EmitLeastSignificantHalf(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - reg_alloc.RegisterAddDef(inst, inst->GetArg(0)); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.DefineValue(inst, args[0]); } void EmitX64::EmitLeastSignificantByte(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - reg_alloc.RegisterAddDef(inst, inst->GetArg(0)); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.DefineValue(inst, args[0]); } void EmitX64::EmitMostSignificantBit(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); // TODO: Flag optimization - code->shr(result, 31); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitIsZero(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); // TODO: Flag optimization - code->test(result, result); code->sete(result.cvt8()); code->movzx(result, result.cvt8()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitIsZero64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); // TODO: Flag optimization - code->test(result, result); code->sete(result.cvt8()); code->movzx(result, result.cvt8()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + // TODO: Consider using BMI2 instructions like SHLX when arm-in-host flags is implemented. if (!carry_inst) { - if (!inst->GetArg(2).IsImmediate()) { + if (!carry_arg.IsImmediate()) { inst->GetArg(2).GetInst()->DecrementRemainingUses(); } - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - u8 shift = shift_arg.GetU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); if (shift <= 31) { code->shl(result, shift); } else { code->xor_(result, result); } + + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32(); // The 32-bit x64 SHL instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. - code->shl(result, shift); + code->shl(result, code->cl); code->xor_(zero, zero); - code->cmp(shift, 32); + code->cmp(code->cl, 32); code->cmovnb(result, zero); + + reg_alloc.DefineValue(inst, result); } } else { EraseInstruction(block, carry_inst); - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); if (shift == 0) { // There is nothing more to do. @@ -600,21 +619,24 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, IR::In code->xor_(result, result); code->and_(carry, 1); } + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); // TODO: Optimize this. code->inLocalLabel(); - code->cmp(shift, 32); + code->cmp(code->cl, 32); code->ja(".Rs_gt32"); code->je(".Rs_eq32"); // if (Rs & 0xFF < 32) { code->bt(carry.cvt32(), 0); // Set the carry flag for correct behaviour in the case when Rs & 0xFF == 0 - code->shl(result, shift); + code->shl(result, code->cl); code->setc(carry.cvt8()); code->jmp(".end"); // } else if (Rs & 0xFF > 32) { @@ -631,6 +653,9 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, IR::In code->L(".end"); code->outLocalLabel(); + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } } } @@ -638,44 +663,49 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, IR::In void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + if (!carry_inst) { - if (!inst->GetArg(2).IsImmediate()) { + if (!carry_arg.IsImmediate()) { inst->GetArg(2).GetInst()->DecrementRemainingUses(); } - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - u8 shift = shift_arg.GetU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); if (shift <= 31) { code->shr(result, shift); } else { code->xor_(result, result); } + + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32(); // The 32-bit x64 SHR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. - code->shr(result, shift); + code->shr(result, code->cl); code->xor_(zero, zero); - code->cmp(shift, 32); + code->cmp(code->cl, 32); code->cmovnb(result, zero); + + reg_alloc.DefineValue(inst, result); } } else { EraseInstruction(block, carry_inst); - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); if (shift == 0) { // There is nothing more to do. @@ -690,23 +720,26 @@ void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::I code->xor_(result, result); code->xor_(carry, carry); } + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); // TODO: Optimize this. code->inLocalLabel(); - code->cmp(shift, 32); + code->cmp(code->cl, 32); code->ja(".Rs_gt32"); code->je(".Rs_eq32"); // if (Rs & 0xFF == 0) goto end; - code->test(shift, shift); + code->test(code->cl, code->cl); code->jz(".end"); // if (Rs & 0xFF < 32) { - code->shr(result, shift); + code->shr(result, code->cl); code->setc(carry.cvt8()); code->jmp(".end"); // } else if (Rs & 0xFF > 32) { @@ -723,39 +756,52 @@ void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::I code->L(".end"); code->outLocalLabel(); + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } } } void EmitX64::EmitLogicalShiftRight64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; - auto shift_arg = inst->GetArg(1); ASSERT_MSG(shift_arg.IsImmediate(), "variable 64 bit shifts are not implemented"); - u8 shift = shift_arg.GetU8(); - ASSERT_MSG(shift < 64, "shift width clamping is not implemented"); + ASSERT_MSG(shift_arg.GetImmediateU8() < 64, "shift width clamping is not implemented"); + + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(operand_arg); + u8 shift = shift_arg.GetImmediateU8(); code->shr(result.cvt64(), shift); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + if (!carry_inst) { - if (!inst->GetArg(2).IsImmediate()) { + if (!carry_arg.IsImmediate()) { inst->GetArg(2).GetInst()->DecrementRemainingUses(); } - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); code->sar(result, u8(shift < 31 ? shift : 31)); + + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Reg32 shift = reg_alloc.UseScratchGpr(shift_arg, {HostLoc::RCX}).cvt32(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + reg_alloc.UseScratch(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); Xbyak::Reg32 const31 = reg_alloc.ScratchGpr().cvt32(); // The 32-bit x64 SAR instruction masks the shift count by 0x1F before performing the shift. @@ -763,20 +809,20 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR // We note that all shift values above 31 have the same behaviour as 31 does, so we saturate `shift` to 31. code->mov(const31, 31); - code->movzx(shift, shift.cvt8()); - code->cmp(shift, u32(31)); - code->cmovg(shift, const31); - code->sar(result, shift.cvt8()); + code->movzx(code->ecx, code->cl); + code->cmp(code->ecx, u32(31)); + code->cmovg(code->ecx, const31); + code->sar(result, code->cl); + + reg_alloc.DefineValue(inst, result); } } else { EraseInstruction(block, carry_inst); - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); if (shift == 0) { // There is nothing more to do. @@ -788,22 +834,25 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR code->bt(result, 31); code->setc(carry); } + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); // TODO: Optimize this. code->inLocalLabel(); - code->cmp(shift, u32(31)); + code->cmp(code->cl, u32(31)); code->ja(".Rs_gt31"); // if (Rs & 0xFF == 0) goto end; - code->test(shift, shift); + code->test(code->cl, code->cl); code->jz(".end"); // if (Rs & 0xFF <= 31) { - code->sar(result, shift); + code->sar(result, code->cl); code->setc(carry); code->jmp(".end"); // } else if (Rs & 0xFF > 31) { @@ -815,6 +864,9 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR code->L(".end"); code->outLocalLabel(); + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } } } @@ -822,34 +874,39 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + if (!carry_inst) { - if (!inst->GetArg(2).IsImmediate()) { + if (!carry_arg.IsImmediate()) { inst->GetArg(2).GetInst()->DecrementRemainingUses(); } - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); code->ror(result, u8(shift & 0x1F)); + + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); + reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); // x64 ROR instruction does (shift & 0x1F) for us. - code->ror(result, shift); + code->ror(result, code->cl); + + reg_alloc.DefineValue(inst, result); } } else { EraseInstruction(block, carry_inst); - auto shift_arg = inst->GetArg(1); - if (shift_arg.IsImmediate()) { - u8 shift = shift_arg.GetU8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8(); + u8 shift = shift_arg.GetImmediateU8(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); if (shift == 0) { // There is nothing more to do. @@ -860,23 +917,26 @@ void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* i code->ror(result, shift); code->setc(carry); } + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } else { - Xbyak::Reg8 shift = reg_alloc.UseScratchGpr(shift_arg, {HostLoc::RCX}).cvt8(); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8(); + reg_alloc.UseScratch(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); // TODO: Optimize code->inLocalLabel(); // if (Rs & 0xFF == 0) goto end; - code->test(shift, shift); + code->test(code->cl, code->cl); code->jz(".end"); - code->and_(shift.cvt32(), u32(0x1F)); + code->and_(code->ecx, u32(0x1F)); code->jz(".zero_1F"); // if (Rs & 0x1F != 0) { - code->ror(result, shift); + code->ror(result, code->cl); code->setc(carry); code->jmp(".end"); // } else { @@ -887,6 +947,9 @@ void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* i code->L(".end"); code->outLocalLabel(); + + reg_alloc.DefineValue(inst, result); + reg_alloc.DefineValue(carry_inst, carry); } } } @@ -894,28 +957,32 @@ void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* i void EmitX64::EmitRotateRightExtended(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - Xbyak::Reg8 carry = carry_inst - ? reg_alloc.UseDefGpr(inst->GetArg(1), carry_inst).cvt8() - : reg_alloc.UseGpr(inst->GetArg(1)).cvt8(); + auto args = reg_alloc.GetArgumentInfo(inst); + + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(args[1]).cvt8(); code->bt(carry.cvt32(), 0); code->rcr(result, 1); + reg_alloc.DefineValue(inst, result); + if (carry_inst) { EraseInstruction(block, carry_inst); code->setc(carry); + + reg_alloc.DefineValue(carry_inst, carry); } } const Xbyak::Reg64 INVALID_REG = Xbyak::Reg64(-1); -static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, const IR::Value& carry_in, IR::Inst* carry_out) { +static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) { if (carry_in.IsImmediate()) { - return carry_out ? reg_alloc.DefGpr(carry_out).cvt8() : INVALID_REG.cvt8(); + return carry_out ? reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8(); } else { - return carry_out ? reg_alloc.UseDefGpr(carry_in, carry_out).cvt8() : reg_alloc.UseGpr(carry_in).cvt8(); + return carry_out ? reg_alloc.UseScratchGpr(carry_in).cvt8() : reg_alloc.UseGpr(carry_in).cvt8(); } } @@ -923,20 +990,19 @@ void EmitX64::EmitAddWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - IR::Value carry_in = inst->GetArg(2); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& carry_in = args[2]; - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst); - Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt8() : INVALID_REG.cvt8(); + Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8(); // TODO: Consider using LEA. - if (b.IsImmediate()) { - u32 op_arg = b.GetU32(); + if (args[1].IsImmediate()) { + u32 op_arg = args[1].GetImmediateU32(); if (carry_in.IsImmediate()) { - if (carry_in.GetU1()) { + if (carry_in.GetImmediateU1()) { code->stc(); code->adc(result, op_arg); } else { @@ -947,10 +1013,10 @@ void EmitX64::EmitAddWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->adc(result, op_arg); } } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); if (carry_in.IsImmediate()) { - if (carry_in.GetU1()) { + if (carry_in.GetImmediateU1()) { code->stc(); code->adc(result, *op_arg); } else { @@ -962,48 +1028,50 @@ void EmitX64::EmitAddWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* } } + reg_alloc.DefineValue(inst, result); + if (carry_inst) { EraseInstruction(block, carry_inst); - code->setc(carry); + reg_alloc.DefineValue(carry_inst, carry); } if (overflow_inst) { EraseInstruction(block, overflow_inst); - code->seto(overflow); + reg_alloc.DefineValue(overflow_inst, overflow); } } void EmitX64::EmitAdd64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst); - Xbyak::Reg64 op_arg = reg_alloc.UseGpr(b); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + Xbyak::Reg64 op_arg = reg_alloc.UseGpr(args[1]); code->add(result, op_arg); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSubWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - IR::Value carry_in = inst->GetArg(2); + auto args = reg_alloc.GetArgumentInfo(inst); + auto& carry_in = args[2]; - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst); - Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt8() : INVALID_REG.cvt8(); + Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8(); // TODO: Consider using LEA. // TODO: Optimize CMP case. // Note that x64 CF is inverse of what the ARM carry flag is here. - if (b.IsImmediate()) { - u32 op_arg = b.GetU32(); + if (args[1].IsImmediate()) { + u32 op_arg = args[1].GetImmediateU32(); if (carry_in.IsImmediate()) { - if (carry_in.GetU1()) { + if (carry_in.GetImmediateU1()) { code->sub(result, op_arg); } else { code->stc(); @@ -1015,10 +1083,10 @@ void EmitX64::EmitSubWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->sbb(result, op_arg); } } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); if (carry_in.IsImmediate()) { - if (carry_in.GetU1()) { + if (carry_in.GetImmediateU1()) { code->sub(result, *op_arg); } else { code->stc(); @@ -1031,242 +1099,203 @@ void EmitX64::EmitSubWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* } } + reg_alloc.DefineValue(inst, result); + if (carry_inst) { EraseInstruction(block, carry_inst); - code->setnc(carry); + reg_alloc.DefineValue(carry_inst, carry); } if (overflow_inst) { EraseInstruction(block, overflow_inst); - code->seto(overflow); + reg_alloc.DefineValue(overflow_inst, overflow); } } void EmitX64::EmitSub64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst); - Xbyak::Reg64 op_arg = reg_alloc.UseGpr(b); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + Xbyak::Reg64 op_arg = reg_alloc.UseGpr(args[1]); code->sub(result, op_arg); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitMul(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - if (a.IsImmediate()) - std::swap(a, b); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - if (b.IsImmediate()) { - code->imul(result, result, b.GetU32()); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + if (args[1].IsImmediate()) { + code->imul(result, result, args[1].GetImmediateU32()); } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->imul(result, *op_arg); } + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitMul64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst); - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); code->imul(result, *op_arg); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitAnd(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); - if (b.IsImmediate()) { - u32 op_arg = b.GetU32(); + if (args[1].IsImmediate()) { + u32 op_arg = args[1].GetImmediateU32(); code->and_(result, op_arg); } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->and_(result, *op_arg); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitEor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); - if (b.IsImmediate()) { - u32 op_arg = b.GetU32(); + if (args[1].IsImmediate()) { + u32 op_arg = args[1].GetImmediateU32(); code->xor_(result, op_arg); } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->xor_(result, *op_arg); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitOr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); - if (b.IsImmediate()) { - u32 op_arg = b.GetU32(); + if (args[1].IsImmediate()) { + u32 op_arg = args[1].GetImmediateU32(); code->or_(result, op_arg); } else { - OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr); + OpArg op_arg = reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->or_(result, *op_arg); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitNot(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); + auto args = reg_alloc.GetArgumentInfo(inst); - if (a.IsImmediate()) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - - code->mov(result, u32(~a.GetU32())); + Xbyak::Reg32 result; + if (args[0].IsImmediate()) { + result = reg_alloc.ScratchGpr().cvt32(); + code->mov(result, u32(~args[0].GetImmediateU32())); } else { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - + result = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->not_(result); } + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSignExtendWordToLong(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(32); - code->movsxd(result.cvt64(), *source); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->movsxd(result.cvt64(), result.cvt32()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSignExtendHalfToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(16); - code->movsx(result.cvt32(), *source); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->movsx(result.cvt32(), result.cvt16()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSignExtendByteToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(8); - code->movsx(result.cvt32(), *source); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->movsx(result.cvt32(), result.cvt8()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitZeroExtendWordToLong(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(32); - code->mov(result.cvt32(), *source); // x64 zeros upper 32 bits on a 32-bit move + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitZeroExtendHalfToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(16); - code->movzx(result.cvt32(), *source); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->movzx(result.cvt32(), result.cvt16()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitZeroExtendByteToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - OpArg source; - Xbyak::Reg64 result; - if (inst->GetArg(0).IsImmediate()) { - // TODO: Optimize - result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - source = result; - } else { - std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst); - } - - source.setBit(8); - code->movzx(result.cvt32(), *source); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + code->movzx(result.cvt32(), result.cvt8()); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitByteReverseWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32(); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); code->bswap(result); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitByteReverseHalf(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg16 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt16(); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg16 result = reg_alloc.UseScratchGpr(args[0]).cvt16(); code->rol(result, 8); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitByteReverseDual(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst); - + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); code->bswap(result); + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitCountLeadingZeros(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - + auto args = reg_alloc.GetArgumentInfo(inst); if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) { - Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 source = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); code->lzcnt(result, source); + + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 source = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); // The result of a bsr of zero is undefined, but zf is set after it. code->bsr(result, source); @@ -1274,18 +1303,19 @@ void EmitX64::EmitCountLeadingZeros(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i code->cmovz(result, source); code->neg(result); code->add(result, 31); + + reg_alloc.DefineValue(inst, result); } } void EmitX64::EmitSignedSaturatedAdd(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 addend = reg_alloc.UseGpr(b).cvt32(); - Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 addend = reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); code->mov(overflow, result); code->shr(overflow, 31); @@ -1294,22 +1324,25 @@ void EmitX64::EmitSignedSaturatedAdd(RegAlloc& reg_alloc, IR::Block& block, IR:: code->add(result, addend); code->cmovo(result, overflow); + reg_alloc.DefineValue(inst, result); + if (overflow_inst) { EraseInstruction(block, overflow_inst); code->seto(overflow.cvt8()); + + reg_alloc.DefineValue(overflow_inst, overflow); } } void EmitX64::EmitSignedSaturatedSub(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 subend = reg_alloc.UseGpr(b).cvt32(); - Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subend = reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); code->mov(overflow, result); code->shr(overflow, 31); @@ -1318,25 +1351,29 @@ void EmitX64::EmitSignedSaturatedSub(RegAlloc& reg_alloc, IR::Block& block, IR:: code->sub(result, subend); code->cmovo(result, overflow); + reg_alloc.DefineValue(inst, result); + if (overflow_inst) { EraseInstruction(block, overflow_inst); code->seto(overflow.cvt8()); + + reg_alloc.DefineValue(overflow_inst, overflow); } } void EmitX64::EmitUnsignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - size_t N = inst->GetArg(1).GetU8(); + auto args = reg_alloc.GetArgumentInfo(inst); + size_t N = args[1].GetImmediateU8(); ASSERT(N <= 31); u32 saturated_value = (1u << N) - 1; - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Reg32 reg_a = reg_alloc.UseGpr(a).cvt32(); - Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); // Pseudocode: result = clamp(reg_a, 0, saturated_value); code->xor_(overflow, overflow); @@ -1345,22 +1382,26 @@ void EmitX64::EmitUnsignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR:: code->cmovle(result, overflow); code->cmovbe(result, reg_a); + reg_alloc.DefineValue(inst, result); + if (overflow_inst) { EraseInstruction(block, overflow_inst); code->seta(overflow.cvt8()); + + reg_alloc.DefineValue(overflow_inst, overflow); } } void EmitX64::EmitSignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - IR::Value a = inst->GetArg(0); - size_t N = inst->GetArg(1).GetU8(); + auto args = reg_alloc.GetArgumentInfo(inst); + size_t N = args[1].GetImmediateU8(); ASSERT(N >= 1 && N <= 32); if (N == 32) { - reg_alloc.RegisterAddDef(inst, a); + reg_alloc.DefineValue(inst, args[0]); if (overflow_inst) { auto no_overflow = IR::Value(false); overflow_inst->ReplaceUsesWith(no_overflow); @@ -1373,9 +1414,9 @@ void EmitX64::EmitSignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::In u32 negative_saturated_value = 1u << (N - 1); u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Reg32 reg_a = reg_alloc.UseGpr(a).cvt32(); - Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32(); // overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value. @@ -1391,10 +1432,14 @@ void EmitX64::EmitSignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::In code->cmp(overflow, mask); code->cmovbe(result, reg_a); + reg_alloc.DefineValue(inst, result); + if (overflow_inst) { EraseInstruction(block, overflow_inst); code->seta(overflow.cvt8()); + + reg_alloc.DefineValue(overflow_inst, overflow); } } @@ -1435,330 +1480,269 @@ static void ExtractAndDuplicateMostSignificantBitFromPackedWords(BlockOfCode* co } void EmitX64::EmitPackedAddU8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); - Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(a).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseScratchGpr(b).cvt32(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Reg32 reg_ge, tmp; + code->paddb(xmm_a, xmm_b); if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - tmp = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_ge = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm tmp = reg_alloc.ScratchXmm(); - code->mov(reg_ge, reg_a); - code->and_(reg_ge, reg_b); + code->movdqa(tmp, xmm_a); + code->pminub(tmp, xmm_b); + code->pcmpeqb(tmp, xmm_b); + code->movd(reg_ge, tmp); + code->not_(reg_ge); + + ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } - // SWAR Arithmetic - code->mov(result, reg_a); - code->xor_(result, reg_b); - code->and_(result, 0x80808080); - code->and_(reg_a, 0x7F7F7F7F); - code->and_(reg_b, 0x7F7F7F7F); - code->add(reg_a, reg_b); - if (ge_inst) { - code->mov(tmp, result); - code->and_(tmp, reg_a); - code->or_(reg_ge, tmp); - } - code->xor_(result, reg_a); - if (ge_inst) { - ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge, tmp); - } + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedAddS8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - } - - code->movd(xmm_a, reg_a); - code->movd(xmm_b, reg_b); - if (ge_inst) { Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm(); + reg_ge = reg_alloc.ScratchGpr().cvt32(); + code->movdqa(saturated_sum, xmm_a); code->paddsb(saturated_sum, xmm_b); code->movd(reg_ge, saturated_sum); } + code->paddb(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); + if (ge_inst) { code->not_(reg_ge); ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedAddU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); - Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(a).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseScratchGpr(b).cvt32(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Reg32 reg_ge, tmp; + code->paddw(xmm_a, xmm_b); if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); + Xbyak::Reg32 reg_ge = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm tmp = reg_alloc.ScratchXmm(); - code->mov(reg_ge, reg_a); - code->and_(reg_ge, reg_b); + code->movdqa(tmp, xmm_a); + code->pminuw(tmp, xmm_b); + code->pcmpeqw(tmp, xmm_b); + code->movd(reg_ge, tmp); + code->not_(reg_ge); + + ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } - // SWAR Arithmetic - code->mov(result, reg_a); - code->xor_(result, reg_b); - code->and_(result, 0x80008000); - code->and_(reg_a, 0x7FFF7FFF); - code->and_(reg_b, 0x7FFF7FFF); - code->add(reg_a, reg_b); - if (ge_inst) { - tmp = reg_alloc.ScratchGpr().cvt32(); - code->mov(tmp, result); - code->and_(tmp, reg_a); - code->or_(reg_ge, tmp); - } - code->xor_(result, reg_a); - if (ge_inst) { - ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); - } + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedAddS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); - if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - } - - code->movd(xmm_a, reg_a); - code->movd(xmm_b, reg_b); - if (ge_inst) { + reg_ge = reg_alloc.ScratchGpr().cvt32(); Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm(); + code->movdqa(saturated_sum, xmm_a); code->paddsw(saturated_sum, xmm_b); code->movd(reg_ge, saturated_sum); } + code->paddw(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); + if (ge_inst) { code->not_(reg_ge); ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedSubU8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_ge; - if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - xmm_ge = reg_alloc.ScratchXmm(); - } + Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + reg_ge = reg_alloc.ScratchGpr().cvt32(); - code->movd(xmm_a, reg_a); - code->movd(xmm_b, reg_b); - if (ge_inst) { code->movdqa(xmm_ge, xmm_a); code->pmaxub(xmm_ge, xmm_b); code->pcmpeqb(xmm_ge, xmm_a); code->movd(reg_ge, xmm_ge); } + code->psubb(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); if (ge_inst) { ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedSubS8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); - if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - } - code->movd(xmm_b, reg_b); - code->movd(xmm_a, reg_a); - if (ge_inst) { Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + reg_ge = reg_alloc.ScratchGpr().cvt32(); + code->movdqa(xmm_ge, xmm_a); code->psubsb(xmm_ge, xmm_b); code->movd(reg_ge, xmm_ge); } + code->psubb(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); + if (ge_inst) { code->not_(reg_ge); ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedSubU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_ge; - if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - xmm_ge = reg_alloc.ScratchXmm(); - } + reg_ge = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); - code->movd(xmm_a, reg_a); - code->movd(xmm_b, reg_b); - if (ge_inst) { code->movdqa(xmm_ge, xmm_a); code->pmaxuw(xmm_ge, xmm_b); code->pcmpeqw(xmm_ge, xmm_a); code->movd(reg_ge, xmm_ge); } + code->psubw(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); + if (ge_inst) { ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedSubS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 reg_ge; - Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); - if (ge_inst) { EraseInstruction(block, ge_inst); - reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); - } - - code->movd(xmm_b, reg_b); - code->movd(xmm_a, reg_a); - if (ge_inst) { Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + reg_ge = reg_alloc.ScratchGpr().cvt32(); + code->movdqa(xmm_ge, xmm_a); code->psubsw(xmm_ge, xmm_b); code->movd(reg_ge, xmm_ge); } + code->psubw(xmm_a, xmm_b); - code->movd(reg_a, xmm_a); + if (ge_inst) { code->not_(reg_ge); ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); + reg_alloc.DefineValue(ge_inst, reg_ge); } + + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedHalvingAddU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); // This code path requires SSSE3 because of the PSHUFB instruction. // A fallback implementation is provided below. if (cpu_info.has(Xbyak::util::Cpu::tSSSE3)) { - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 arg = reg_alloc.UseGpr(b).cvt32(); - - // Load the operands into Xmm registers - Xbyak::Xmm xmm_scratch_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_scratch_b = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseScratchXmm(args[1]); Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm(); Xbyak::Reg64 mask = reg_alloc.ScratchGpr(); - code->movd(xmm_scratch_a, result); - code->movd(xmm_scratch_b, arg); - // Set the mask to expand the values // 0xAABBCCDD becomes 0x00AA00BB00CC00DD code->mov(mask, 0x8003800280018000); code->movq(xmm_mask, mask); // Expand each 8-bit value to 16-bit - code->pshufb(xmm_scratch_a, xmm_mask); - code->pshufb(xmm_scratch_b, xmm_mask); + code->pshufb(xmm_a, xmm_mask); + code->pshufb(xmm_b, xmm_mask); // Add the individual 16-bit values - code->paddw(xmm_scratch_a, xmm_scratch_b); + code->paddw(xmm_a, xmm_b); // Shift the 16-bit values to the right to halve them - code->psrlw(xmm_scratch_a, 1); + code->psrlw(xmm_a, 1); // Set the mask to pack the values again // 0x00AA00BB00CC00DD becomes 0xAABBCCDD @@ -1766,33 +1750,33 @@ void EmitX64::EmitPackedHalvingAddU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->movq(xmm_mask, mask); // Shuffle them back to 8-bit values - code->pshufb(xmm_scratch_a, xmm_mask); + code->pshufb(xmm_a, xmm_mask); - code->movd(result, xmm_scratch_a); - return; + reg_alloc.DefineValue(inst, xmm_a); + } else { + // Fallback implementation in case the CPU doesn't support SSSE3 + Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 and_a_b = reg_a; + Xbyak::Reg32 result = reg_a; + + code->mov(xor_a_b, reg_a); + code->and(and_a_b, reg_b); + code->xor(xor_a_b, reg_b); + code->shr(xor_a_b, 1); + code->and(xor_a_b, 0x7F7F7F7F); + code->add(result, xor_a_b); + + reg_alloc.DefineValue(inst, result); } - - // Fallback implementation in case the CPU doesn't support SSSE3 - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); - Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 and_a_b = reg_a; - Xbyak::Reg32 result = reg_a; - - code->mov(xor_a_b, reg_a); - code->and(and_a_b, reg_b); - code->xor(xor_a_b, reg_b); - code->shr(xor_a_b, 1); - code->and(xor_a_b, 0x7F7F7F7F); - code->add(result, xor_a_b); } void EmitX64::EmitPackedHalvingAddU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 and_a_b = reg_a; Xbyak::Reg32 result = reg_a; @@ -1808,14 +1792,15 @@ void EmitX64::EmitPackedHalvingAddU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->shr(xor_a_b, 1); code->and(xor_a_b, 0x7FFF7FFF); code->add(result, xor_a_b); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitPackedHalvingAddS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 and_a_b = reg_a; Xbyak::Reg32 result = reg_a; @@ -1836,14 +1821,15 @@ void EmitX64::EmitPackedHalvingAddS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->and(xor_a_b, 0x7F7F7F7F); code->add(result, xor_a_b); code->xor(result, carry); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitPackedHalvingAddS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 and_a_b = reg_a; Xbyak::Reg32 result = reg_a; @@ -1864,14 +1850,15 @@ void EmitX64::EmitPackedHalvingAddS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->and(xor_a_b, 0x7FFF7FFF); code->add(result, xor_a_b); code->xor(result, carry); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitPackedHalvingSubU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32(); + Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32(); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -1894,14 +1881,14 @@ void EmitX64::EmitPackedHalvingSubU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->xor(minuend, 0x80808080); // minuend now contains the desired result. + reg_alloc.DefineValue(inst, minuend); } void EmitX64::EmitPackedHalvingSubS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32(); + Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32(); Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32(); @@ -1929,14 +1916,15 @@ void EmitX64::EmitPackedHalvingSubS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->sub(minuend, subtrahend); code->xor(minuend, 0x80808080); code->xor(minuend, carry); + + reg_alloc.DefineValue(inst, minuend); } void EmitX64::EmitPackedHalvingSubU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32(); + Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32(); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -1957,14 +1945,15 @@ void EmitX64::EmitPackedHalvingSubU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->or(minuend, 0x80008000); code->sub(minuend, subtrahend); code->xor(minuend, 0x80008000); + + reg_alloc.DefineValue(inst, minuend); } void EmitX64::EmitPackedHalvingSubS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32(); + Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32(); Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32(); @@ -1992,106 +1981,76 @@ void EmitX64::EmitPackedHalvingSubS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->sub(minuend, subtrahend); code->xor(minuend, 0x80008000); code->xor(minuend, carry); + + reg_alloc.DefineValue(inst, minuend); +} + +void EmitPackedHalvingSubAdd(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, bool is_signed) { + auto args = reg_alloc.GetArgumentInfo(inst); + + Xbyak::Reg32 reg_a_hi = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b_hi = reg_alloc.UseScratchGpr(args[1]).cvt32(); + Xbyak::Reg32 reg_a_lo = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_b_lo = reg_alloc.ScratchGpr().cvt32(); + + // If asx is true, the high word contains the sum and the low word the difference. + // If false, the high word contains the difference and the low word the sum. + bool asx = args[2].GetImmediateU1(); + + if (is_signed) { + code->movsx(reg_a_lo, reg_a_hi.cvt16()); + code->movsx(reg_b_lo, reg_b_hi.cvt16()); + code->sar(reg_a_hi, 16); + code->sar(reg_b_hi, 16); + } else { + code->movzx(reg_a_lo, reg_a_hi.cvt16()); + code->movzx(reg_b_lo, reg_b_hi.cvt16()); + code->shr(reg_a_hi, 16); + code->shr(reg_b_hi, 16); + } + + if (asx) { + // Calculate diff such that reg_a_lo<31:16> contains diff<16:1>. + code->sub(reg_a_lo, reg_b_hi); + code->shl(reg_a_lo, 15); + + // Calculate sum such that reg_a_hi<15:0> contains sum<16:1>. + code->add(reg_a_hi, reg_b_lo); + code->shr(reg_a_hi, 1); + } else { + // Calculate sum such that reg_a_lo<31:16> contains sum<16:1>. + code->add(reg_a_lo, reg_b_hi); + code->shl(reg_a_lo, 15); + + // Calculate diff such that reg_a_hi<15:0> contains diff<16:1>. + code->sub(reg_a_hi, reg_b_lo); + code->shr(reg_a_hi, 1); + } + + // reg_a_lo now contains the low word and reg_a_hi now contains the high word. + // Merge them. + code->shld(reg_a_hi, reg_a_lo, 16); + + reg_alloc.DefineValue(inst, reg_a_hi); } void EmitX64::EmitPackedHalvingSubAddU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - // If asx is true, the high word contains the sum and the low word the difference. - // If false, the high word contains the difference and the low word the sum. - bool asx = inst->GetArg(2).GetU1(); - - Xbyak::Reg32 reg_a_hi = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b_hi = reg_alloc.UseScratchGpr(b).cvt32(); - Xbyak::Reg32 reg_a_lo = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 reg_b_lo = reg_alloc.ScratchGpr().cvt32(); - - code->movzx(reg_a_lo, reg_a_hi.cvt16()); - code->movzx(reg_b_lo, reg_b_hi.cvt16()); - code->shr(reg_a_hi, 16); - code->shr(reg_b_hi, 16); - - if (asx) { - // Calculate diff such that reg_a_lo<31:16> contains diff<16:1>. - code->sub(reg_a_lo, reg_b_hi); - code->shl(reg_a_lo, 15); - - // Calculate sum such that reg_a_hi<15:0> contains sum<16:1>. - code->add(reg_a_hi, reg_b_lo); - code->shr(reg_a_hi, 1); - } else { - // Calculate sum such that reg_a_lo<31:16> contains sum<16:1>. - code->add(reg_a_lo, reg_b_hi); - code->shl(reg_a_lo, 15); - - // Calculate diff such that reg_a_hi<15:0> contains diff<16:1>. - code->sub(reg_a_hi, reg_b_lo); - code->shr(reg_a_hi, 1); - } - - // reg_a_lo now contains the low word and reg_a_hi now contains the high word. - // Merge them. - code->shld(reg_a_hi, reg_a_lo, 16); + EmitPackedHalvingSubAdd(code, reg_alloc, inst, false); } void EmitX64::EmitPackedHalvingSubAddS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - // If asx is true, the high word contains the sum and the low word the difference. - // If false, the high word contains the difference and the low word the sum. - bool asx = inst->GetArg(2).GetU1(); - - Xbyak::Reg32 reg_a_hi = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 reg_b_hi = reg_alloc.UseScratchGpr(b).cvt32(); - Xbyak::Reg32 reg_a_lo = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 reg_b_lo = reg_alloc.ScratchGpr().cvt32(); - - code->movsx(reg_a_lo, reg_a_hi.cvt16()); - code->movsx(reg_b_lo, reg_b_hi.cvt16()); - code->sar(reg_a_hi, 16); - code->sar(reg_b_hi, 16); - - if (asx) { - // Calculate diff such that reg_a_lo<31:16> contains diff<16:1>. - code->sub(reg_a_lo, reg_b_hi); - code->shl(reg_a_lo, 15); - - // Calculate sum such that reg_a_hi<15:0> contains sum<16:1>. - code->add(reg_a_hi, reg_b_lo); - code->shr(reg_a_hi, 1); - } else { - // Calculate sum such that reg_a_lo<31:16> contains sum<16:1>. - code->add(reg_a_lo, reg_b_hi); - code->shl(reg_a_lo, 15); - - // Calculate diff such that reg_a_hi<15:0> contains diff<16:1>. - code->sub(reg_a_hi, reg_b_lo); - code->shr(reg_a_hi, 1); - } - - // reg_a_lo now contains the low word and reg_a_hi now contains the high word. - // Merge them. - code->shld(reg_a_hi, reg_a_lo, 16); + EmitPackedHalvingSubAdd(code, reg_alloc, inst, true); } static void EmitPackedOperation(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); - Xbyak::Reg32 arg = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); - Xbyak::Xmm xmm_scratch_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_scratch_b = reg_alloc.ScratchXmm(); + (code->*fn)(xmm_a, xmm_b); - code->movd(xmm_scratch_a, result); - code->movd(xmm_scratch_b, arg); - - (code->*fn)(xmm_scratch_a, xmm_scratch_b); - - code->movd(result, xmm_scratch_a); + reg_alloc.DefineValue(inst, xmm_a); } void EmitX64::EmitPackedSaturatedAddU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { @@ -2224,11 +2183,10 @@ static void ZeroIfNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_ } static void FPThreeOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); - Xbyak::Xmm operand = reg_alloc.UseXmm(b); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm operand = reg_alloc.UseXmm(args[1]); Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); if (block.Location().FPSCR().FTZ()) { @@ -2242,14 +2200,15 @@ static void FPThreeOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block if (block.Location().FPSCR().DN()) { DefaultNaN32(code, result); } + + reg_alloc.DefineValue(inst, result); } static void FPThreeOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); - Xbyak::Xmm operand = reg_alloc.UseXmm(b); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm operand = reg_alloc.UseXmm(args[1]); Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); if (block.Location().FPSCR().FTZ()) { @@ -2263,12 +2222,14 @@ static void FPThreeOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block if (block.Location().FPSCR().DN()) { DefaultNaN64(code, result); } + + reg_alloc.DefineValue(inst, result); } static void FPTwoOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - IR::Value a = inst->GetArg(0); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); if (block.Location().FPSCR().FTZ()) { @@ -2282,12 +2243,14 @@ static void FPTwoOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, if (block.Location().FPSCR().DN()) { DefaultNaN32(code, result); } + + reg_alloc.DefineValue(inst, result); } static void FPTwoOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - IR::Value a = inst->GetArg(0); + auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); if (block.Location().FPSCR().FTZ()) { @@ -2301,76 +2264,76 @@ static void FPTwoOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, if (block.Location().FPSCR().DN()) { DefaultNaN64(code, result); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitTransferFromFP32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(0)); - // TODO: Eliminate this. - code->movd(result, source); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.DefineValue(inst, args[0]); } void EmitX64::EmitTransferFromFP64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg64 result = reg_alloc.DefGpr(inst); - Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(0)); - // TODO: Eliminate this. - code->movq(result, source); + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.DefineValue(inst, args[0]); } void EmitX64::EmitTransferToFP32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - if (inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetU32() == 0) { - Xbyak::Xmm result = reg_alloc.DefXmm(inst); + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate() && args[0].GetImmediateU32() == 0) { + Xbyak::Xmm result = reg_alloc.ScratchXmm(); code->xorps(result, result); + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Xmm result = reg_alloc.DefXmm(inst); - Xbyak::Reg32 source = reg_alloc.UseGpr(inst->GetArg(0)).cvt32(); - // TODO: Eliminate this. - code->movd(result, source); + reg_alloc.DefineValue(inst, args[0]); } } void EmitX64::EmitTransferToFP64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - if (inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetU64() == 0) { - Xbyak::Xmm result = reg_alloc.DefXmm(inst); - code->xorpd(result, result); + auto args = reg_alloc.GetArgumentInfo(inst); + if (args[0].IsImmediate() && args[0].GetImmediateU64() == 0) { + Xbyak::Xmm result = reg_alloc.ScratchXmm(); + code->xorps(result, result); + reg_alloc.DefineValue(inst, result); } else { - Xbyak::Xmm result = reg_alloc.DefXmm(inst); - Xbyak::Reg64 source = reg_alloc.UseGpr(inst->GetArg(0)); - // TODO: Eliminate this. - code->movq(result, source); + reg_alloc.DefineValue(inst, args[0]); } } void EmitX64::EmitFPAbs32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); code->pand(result, code->MFloatNonSignMask32()); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPAbs64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); code->pand(result, code->MFloatNonSignMask64()); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPNeg32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); code->pxor(result, code->MFloatNegativeZero32()); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPNeg64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); code->pxor(result, code->MFloatNegativeZero64()); + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPAdd32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { @@ -2437,12 +2400,10 @@ static void SetFpscrNzcvFromFlags(BlockOfCode* code, RegAlloc& reg_alloc) { } void EmitX64::EmitFPCompare32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - bool quiet = inst->GetArg(2).GetU1(); - - Xbyak::Xmm reg_a = reg_alloc.UseXmm(a); - Xbyak::Xmm reg_b = reg_alloc.UseXmm(b); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm reg_a = reg_alloc.UseXmm(args[0]); + Xbyak::Xmm reg_b = reg_alloc.UseXmm(args[1]); + bool quiet = args[2].GetImmediateU1(); if (quiet) { code->ucomiss(reg_a, reg_b); @@ -2454,12 +2415,10 @@ void EmitX64::EmitFPCompare32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitFPCompare64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - bool quiet = inst->GetArg(2).GetU1(); - - Xbyak::Xmm reg_a = reg_alloc.UseXmm(a); - Xbyak::Xmm reg_b = reg_alloc.UseXmm(b); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm reg_a = reg_alloc.UseXmm(args[0]); + Xbyak::Xmm reg_b = reg_alloc.UseXmm(args[1]); + bool quiet = args[2].GetImmediateU1(); if (quiet) { code->ucomisd(reg_a, reg_b); @@ -2471,9 +2430,8 @@ void EmitX64::EmitFPCompare64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { } void EmitX64::EmitFPSingleToDouble(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); if (block.Location().FPSCR().FTZ()) { @@ -2486,12 +2444,13 @@ void EmitX64::EmitFPSingleToDouble(RegAlloc& reg_alloc, IR::Block& block, IR::In if (block.Location().FPSCR().DN()) { DefaultNaN64(code, result); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPDoubleToSingle(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); if (block.Location().FPSCR().FTZ()) { @@ -2504,29 +2463,29 @@ void EmitX64::EmitFPDoubleToSingle(RegAlloc& reg_alloc, IR::Block& block, IR::In if (block.Location().FPSCR().DN()) { DefaultNaN32(code, result); } + + reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPSingleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_towards_zero = inst->GetArg(1).GetU1(); - - Xbyak::Xmm from = reg_alloc.UseScratchXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); + bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. // Conversion to double is lossless, and allows for clamping. if (block.Location().FPSCR().FTZ()) { - DenormalsAreZero32(code, from, gpr_scratch); + DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); // First time is to set flags if (round_towards_zero) { - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr } else { - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr } // Clamp to output range ZeroIfNaN64(code, from, xmm_scratch); @@ -2534,21 +2493,20 @@ void EmitX64::EmitFPSingleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->maxsd(from, code->MFloatMinS32()); // Second time is for real if (round_towards_zero) { - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr } else { - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr } - code->movd(to, gpr_scratch); + + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_towards_zero = inst->GetArg(1).GetU1(); - - Xbyak::Xmm from = reg_alloc.UseScratchXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); + bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. // Conversion to double is lossless, and allows for accurate clamping. @@ -2559,28 +2517,27 @@ void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* if (block.Location().FPSCR().RMode() != Arm::FPSCR::RoundingMode::TowardsZero && !round_towards_zero) { if (block.Location().FPSCR().FTZ()) { - DenormalsAreZero32(code, from, gpr_scratch); + DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); ZeroIfNaN64(code, from, xmm_scratch); // Bring into SSE range code->addsd(from, code->MFloatMinS32()); // First time is to set flags - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr // Clamp to output range code->minsd(from, code->MFloatMaxS32()); code->maxsd(from, code->MFloatMinS32()); // Actually convert - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr // Bring back into original range - code->add(gpr_scratch, u32(2147483648u)); - code->movd(to, gpr_scratch); + code->add(to, u32(2147483648u)); } else { Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm(); Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32(); if (block.Location().FPSCR().FTZ()) { - DenormalsAreZero32(code, from, gpr_scratch); + DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); ZeroIfNaN64(code, from, xmm_scratch); @@ -2593,26 +2550,26 @@ void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* // Bring into range if necessary code->addsd(from, xmm_mask); // First time is to set flags - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr // Clamp to output range code->minsd(from, code->MFloatMaxS32()); code->maxsd(from, code->MFloatMinU32()); // Actually convert - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr // Bring back into original range if necessary - code->add(gpr_scratch, gpr_mask); - code->movd(to, gpr_scratch); + code->add(to, gpr_mask); } + + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPDoubleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_towards_zero = inst->GetArg(1).GetU1(); - - Xbyak::Xmm from = reg_alloc.UseScratchXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); + Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. @@ -2631,21 +2588,21 @@ void EmitX64::EmitFPDoubleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->maxsd(from, code->MFloatMinS32()); // Second time is for real if (round_towards_zero) { - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr } else { - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr } - code->movd(to, gpr_scratch); + + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_towards_zero = inst->GetArg(1).GetU1(); - - Xbyak::Xmm from = reg_alloc.UseScratchXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); + Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. // TODO: Use VCVTPD2UDQ when AVX512VL is available. @@ -2664,10 +2621,9 @@ void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->minsd(from, code->MFloatMaxS32()); code->maxsd(from, code->MFloatMinS32()); // Actually convert - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code->cvtsd2si(to, from); // 32 bit gpr // Bring back into original range - code->add(gpr_scratch, u32(2147483648u)); - code->movd(to, gpr_scratch); + code->add(to, u32(2147483648u)); } else { Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm(); Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32(); @@ -2690,65 +2646,64 @@ void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->minsd(from, code->MFloatMaxS32()); code->maxsd(from, code->MFloatMinU32()); // Actually convert - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code->cvttsd2si(to, from); // 32 bit gpr // Bring back into original range if necessary - code->add(gpr_scratch, gpr_mask); - code->movd(to, gpr_scratch); + code->add(to, gpr_mask); } + + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPS32ToSingle(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_to_nearest = inst->GetArg(1).GetU1(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 from = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Xmm to = reg_alloc.ScratchXmm(); + bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); - Xbyak::Xmm from = reg_alloc.UseXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + code->cvtsi2ss(to, from); - code->movd(gpr_scratch, from); - code->cvtsi2ss(to, gpr_scratch); + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPU32ToSingle(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_to_nearest = inst->GetArg(1).GetU1(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 from = reg_alloc.UseGpr(args[0]); + Xbyak::Xmm to = reg_alloc.ScratchXmm(); + bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); - Xbyak::Xmm from = reg_alloc.UseXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - // Use a 64-bit register to ensure we don't end up treating the input as signed - Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); + // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed + code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary + code->cvtsi2ss(to, from); - code->movq(gpr_scratch, from); - code->cvtsi2ss(to, gpr_scratch); + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPS32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_to_nearest = inst->GetArg(1).GetU1(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 from = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Xmm to = reg_alloc.ScratchXmm(); + bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); - Xbyak::Xmm from = reg_alloc.UseXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + code->cvtsi2sd(to, from); - code->movd(gpr_scratch, from); - code->cvtsi2sd(to, gpr_scratch); + reg_alloc.DefineValue(inst, to); } void EmitX64::EmitFPU32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - bool round_to_nearest = inst->GetArg(1).GetU1(); + auto args = reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 from = reg_alloc.UseGpr(args[0]); + Xbyak::Xmm to = reg_alloc.ScratchXmm(); + bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); - Xbyak::Xmm from = reg_alloc.UseXmm(a); - Xbyak::Xmm to = reg_alloc.DefXmm(inst); - // Use a 64-bit register to ensure we don't end up treating the input as signed - Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); + // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed + code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary + code->cvtsi2sd(to, from); - code->movq(gpr_scratch, from); - code->cvtsi2sd(to, gpr_scratch); + reg_alloc.DefineValue(inst, to); } @@ -2761,8 +2716,9 @@ void EmitX64::EmitClearExclusive(RegAlloc&, IR::Block&, IR::Inst*) { void EmitX64::EmitSetExclusive(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { using namespace Xbyak::util; - ASSERT(inst->GetArg(1).IsImmediate()); - Xbyak::Reg32 address = reg_alloc.UseGpr(inst->GetArg(0)).cvt32(); + auto args = reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + Xbyak::Reg32 address = reg_alloc.UseGpr(args[0]).cvt32(); code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(1)); code->mov(dword[r15 + offsetof(JitState, exclusive_address)], address); @@ -2770,16 +2726,16 @@ void EmitX64::EmitSetExclusive(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) template static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) { + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.HostCall(inst, args[0]); + if (!cb.page_table) { - reg_alloc.HostCall(inst, inst->GetArg(0)); code->CallFunction(fn); return; } using namespace Xbyak::util; - reg_alloc.HostCall(inst, inst->GetArg(0)); - Xbyak::Reg64 result = code->ABI_RETURN; Xbyak::Reg32 vaddr = code->ABI_PARAM1.cvt32(); Xbyak::Reg64 page_index = code->ABI_PARAM3; @@ -2787,26 +2743,26 @@ static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, U Xbyak::Label abort, end; - code->mov(rax, reinterpret_cast(cb.page_table)); + code->mov(result, reinterpret_cast(cb.page_table)); code->mov(page_index.cvt32(), vaddr); code->shr(page_index.cvt32(), 12); - code->mov(rax, qword[rax + page_index * 8]); - code->test(rax, rax); + code->mov(result, qword[result + page_index * 8]); + code->test(result, result); code->jz(abort); code->mov(page_offset.cvt32(), vaddr); code->and_(page_offset.cvt32(), 4095); switch (bit_size) { case 8: - code->movzx(result, code->byte[rax + page_offset]); + code->movzx(result, code->byte[result + page_offset]); break; case 16: - code->movzx(result, word[rax + page_offset]); + code->movzx(result, word[result + page_offset]); break; case 32: - code->mov(result.cvt32(), dword[rax + page_offset]); + code->mov(result.cvt32(), dword[result + page_offset]); break; case 64: - code->mov(result.cvt64(), qword[rax + page_offset]); + code->mov(result.cvt64(), qword[result + page_offset]); break; default: ASSERT_MSG(false, "Invalid bit_size"); @@ -2820,16 +2776,16 @@ static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, U template static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) { + auto args = reg_alloc.GetArgumentInfo(inst); + reg_alloc.HostCall(nullptr, args[0], args[1]); + if (!cb.page_table) { - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); code->CallFunction(fn); return; } using namespace Xbyak::util; - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); - Xbyak::Reg32 vaddr = code->ABI_PARAM1.cvt32(); Xbyak::Reg64 value = code->ABI_PARAM2; Xbyak::Reg64 page_index = code->ABI_PARAM3; @@ -2902,17 +2858,18 @@ void EmitX64::EmitWriteMemory64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) template static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, FunctionPointer fn, bool prepend_high_word) { + auto args = reg_alloc.GetArgumentInfo(inst); + if (prepend_high_word) { + reg_alloc.HostCall(nullptr, args[0], args[1], args[2]); + } else { + reg_alloc.HostCall(nullptr, args[0], args[1]); + } + Xbyak::Reg32 passed = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers. + using namespace Xbyak::util; Xbyak::Label end; - if (prepend_high_word) { - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1), inst->GetArg(2)); - } else { - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); - } - Xbyak::Reg32 passed = reg_alloc.DefGpr(inst).cvt32(); - Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers. - code->mov(passed, u32(1)); code->cmp(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0)); code->je(end); @@ -2929,6 +2886,8 @@ static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* ins code->CallFunction(fn); code->xor_(passed, passed); code->L(end); + + reg_alloc.DefineValue(inst, passed); } void EmitX64::EmitExclusiveWriteMemory8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { @@ -2951,7 +2910,7 @@ static void EmitCoprocessorException() { ASSERT_MSG(false, "Should raise coproc exception here"); } -static void CallCoprocCallback(BlockOfCode* code, RegAlloc& reg_alloc, Jit* jit_interface, Coprocessor::Callback callback, IR::Inst* inst = nullptr, IR::Value arg0 = {}, IR::Value arg1 = {}) { +static void CallCoprocCallback(BlockOfCode* code, RegAlloc& reg_alloc, Jit* jit_interface, Coprocessor::Callback callback, IR::Inst* inst = nullptr, boost::optional arg0 = {}, boost::optional arg1 = {}) { reg_alloc.HostCall(inst, {}, {}, arg0, arg1); code->mov(code->ABI_PARAM1, reinterpret_cast(jit_interface)); @@ -2989,6 +2948,7 @@ void EmitX64::EmitCoprocInternalOperation(RegAlloc& reg_alloc, IR::Block&, IR::I } void EmitX64::EmitCoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -2998,8 +2958,6 @@ void EmitX64::EmitCoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i Arm::CoprocReg CRm = static_cast(coproc_info[4]); unsigned opc2 = static_cast(coproc_info[5]); - IR::Value word = inst->GetArg(1); - std::shared_ptr coproc = cb.coprocessors[coproc_num]; if (!coproc) { EmitCoprocessorException(); @@ -3012,12 +2970,12 @@ void EmitX64::EmitCoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i EmitCoprocessorException(); return; case 1: - CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), nullptr, word); + CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), nullptr, args[1]); return; case 2: { u32* destination_ptr = boost::get(action); - Xbyak::Reg32 reg_word = reg_alloc.UseGpr(word).cvt32(); + Xbyak::Reg32 reg_word = reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr(); code->mov(reg_destination_addr, reinterpret_cast(destination_ptr)); @@ -3031,6 +2989,7 @@ void EmitX64::EmitCoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i } void EmitX64::EmitCoprocSendTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -3038,9 +2997,6 @@ void EmitX64::EmitCoprocSendTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* unsigned opc = static_cast(coproc_info[2]); Arm::CoprocReg CRm = static_cast(coproc_info[3]); - IR::Value word1 = inst->GetArg(1); - IR::Value word2 = inst->GetArg(2); - std::shared_ptr coproc = cb.coprocessors[coproc_num]; if (!coproc) { EmitCoprocessorException(); @@ -3053,13 +3009,13 @@ void EmitX64::EmitCoprocSendTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* EmitCoprocessorException(); return; case 1: - CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), nullptr, word1, word2); + CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), nullptr, args[1], args[2]); return; case 2: { auto destination_ptrs = boost::get>(action); - Xbyak::Reg32 reg_word1 = reg_alloc.UseGpr(word1).cvt32(); - Xbyak::Reg32 reg_word2 = reg_alloc.UseGpr(word2).cvt32(); + Xbyak::Reg32 reg_word1 = reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 reg_word2 = reg_alloc.UseGpr(args[2]).cvt32(); Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr(); code->mov(reg_destination_addr, reinterpret_cast(destination_ptrs[0])); @@ -3101,12 +3057,14 @@ void EmitX64::EmitCoprocGetOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in case 2: { u32* source_ptr = boost::get(action); - Xbyak::Reg32 reg_word = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 reg_word = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg64 reg_source_addr = reg_alloc.ScratchGpr(); code->mov(reg_source_addr, reinterpret_cast(source_ptr)); code->mov(reg_word, code->dword[reg_source_addr]); + reg_alloc.DefineValue(inst, reg_word); + return; } default: @@ -3139,7 +3097,7 @@ void EmitX64::EmitCoprocGetTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i case 2: { auto source_ptrs = boost::get>(action); - Xbyak::Reg64 reg_result = reg_alloc.DefGpr(inst); + Xbyak::Reg64 reg_result = reg_alloc.ScratchGpr(); Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr(); Xbyak::Reg64 reg_tmp = reg_alloc.ScratchGpr(); @@ -3150,6 +3108,8 @@ void EmitX64::EmitCoprocGetTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i code->mov(reg_tmp.cvt32(), code->dword[reg_destination_addr]); code->or_(reg_result, reg_tmp); + reg_alloc.DefineValue(inst, reg_result); + return; } default: @@ -3158,6 +3118,7 @@ void EmitX64::EmitCoprocGetTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* i } void EmitX64::EmitCoprocLoadWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -3167,8 +3128,6 @@ void EmitX64::EmitCoprocLoadWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* ins bool has_option = coproc_info[4] != 0; boost::optional option{has_option, coproc_info[5]}; - IR::Value address = inst->GetArg(1); - std::shared_ptr coproc = cb.coprocessors[coproc_num]; if (!coproc) { EmitCoprocessorException(); @@ -3181,10 +3140,11 @@ void EmitX64::EmitCoprocLoadWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* ins return; } - CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, address); + CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, args[1]); } void EmitX64::EmitCoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -3194,8 +3154,6 @@ void EmitX64::EmitCoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in bool has_option = coproc_info[4] != 0; boost::optional option{has_option, coproc_info[5]}; - IR::Value address = inst->GetArg(1); - std::shared_ptr coproc = cb.coprocessors[coproc_num]; if (!coproc) { EmitCoprocessorException(); @@ -3208,7 +3166,7 @@ void EmitX64::EmitCoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in return; } - CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, address); + CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, args[1]); } void EmitX64::EmitAddCycles(size_t cycles) { diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 46e003d9..b2dd8276 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -56,9 +56,9 @@ static void EmitMove(BlockOfCode* code, HostLoc to, HostLoc from) { } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) { code->mov(HostLocToReg64(to), HostLocToReg64(from)); } else if (HostLocIsXMM(to) && HostLocIsGPR(from)) { - ASSERT_MSG(false, "TODO"); + code->movq(HostLocToXmm(to), HostLocToReg64(from)); } else if (HostLocIsGPR(to) && HostLocIsXMM(from)) { - ASSERT_MSG(false, "TODO"); + code->movq(HostLocToReg64(to), HostLocToXmm(from)); } else if (HostLocIsXMM(to) && HostLocIsSpill(from)) { code->movsd(HostLocToXmm(to), SpillToOpArg(from)); } else if (HostLocIsSpill(to) && HostLocIsXMM(from)) { @@ -82,6 +82,10 @@ static void EmitExchange(BlockOfCode* code, HostLoc a, HostLoc b) { } } +bool Argument::GetImmediateU1() const { + return value.GetU1(); +} + u8 Argument::GetImmediateU8() const { u64 imm = ImmediateToU64(value); ASSERT(imm < 0x100); diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index cbc3e56c..e854ddfe 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -76,6 +76,7 @@ public: return value.IsImmediate(); } + bool GetImmediateU1() const; u8 GetImmediateU8() const; u16 GetImmediateU16() const; u32 GetImmediateU32() const; @@ -113,6 +114,9 @@ public: arg.allocated = true; return HostLocToXmm(UseHostLocReg(arg.value, any_xmm)); } + OpArg UseOpArg(Argument& arg) { + return UseGpr(arg); + } void Use(Argument& arg, HostLoc host_loc) { ASSERT(!arg.allocated); arg.allocated = true; @@ -135,12 +139,9 @@ public: UseScratchHostLocReg(arg.value, {host_loc}); } - void DefineValue(IR::Inst* inst, const Xbyak::Reg64& reg) { - HostLoc hostloc = static_cast(reg.getIdx() + static_cast(HostLoc::RAX)); - DefineValue(inst, hostloc); - } - void DefineValue(IR::Inst* inst, const Xbyak::Xmm& reg) { - HostLoc hostloc = static_cast(reg.getIdx() + static_cast(HostLoc::XMM0)); + void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) { + ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG); + HostLoc hostloc = static_cast(reg.getIdx() + static_cast(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX)); DefineValue(inst, hostloc); } void DefineValue(IR::Inst* inst, Argument& arg) { From cf93ab3d3120f89f5d2a706f9c4e6e6d52dc9f17 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 25 Feb 2017 17:16:23 +0000 Subject: [PATCH 12/16] reg_alloc: Remove old register allocator interface --- src/backend_x64/reg_alloc.cpp | 34 ---------------------- src/backend_x64/reg_alloc.h | 53 +---------------------------------- 2 files changed, 1 insertion(+), 86 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index b2dd8276..8794ce3b 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -33,17 +33,6 @@ static u64 ImmediateToU64(const IR::Value& imm) { } } -static Xbyak::Reg HostLocToX64(HostLoc hostloc) { - if (HostLocIsGPR(hostloc)) { - DEBUG_ASSERT(hostloc != HostLoc::RSP && hostloc != HostLoc::R15); - return HostLocToReg64(hostloc); - } - if (HostLocIsXMM(hostloc)) { - return HostLocToXmm(hostloc); - } - ASSERT_MSG(false, "This should never happen."); -} - static bool IsSameHostLocClass(HostLoc a, HostLoc b) { return (HostLocIsGPR(a) && HostLocIsGPR(b)) || (HostLocIsXMM(a) && HostLocIsXMM(b)) @@ -145,19 +134,6 @@ void RegAlloc::RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst) { DefineValue(def_inst, location); } -std::tuple RegAlloc::UseDefOpArgHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) { - DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); - DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); - DEBUG_ASSERT_MSG(use_value.IsImmediate() || ValueLocation(use_value.GetInst()), "use_inst has not been defined"); - - // TODO: IsLastUse optimization - - OpArg use_oparg = UseOpArg(use_value, any_gpr); - HostLoc def_reg = ScratchHostLocReg(desired_locations); - DefineValue(def_inst, def_reg); - return std::make_tuple(use_oparg, def_reg); -} - HostLoc RegAlloc::UseHostLocReg(IR::Value use_value, HostLocList desired_locations) { if (!use_value.IsImmediate()) { return UseHostLocReg(use_value.GetInst(), desired_locations); @@ -192,16 +168,6 @@ HostLoc RegAlloc::UseHostLocReg(IR::Inst* use_inst, HostLocList desired_location return destination_location; } -OpArg RegAlloc::UseOpArg(IR::Value use_value, HostLocList desired_locations) { - if (use_value.IsImmediate()) { - ASSERT_MSG(false, "UseOpArg does not support immediates"); - return {}; // return a None - } - - // TODO: Reimplement properly - return HostLocToX64(UseHostLocReg(use_value.GetInst(), desired_locations)); -} - HostLoc RegAlloc::UseScratchHostLocReg(IR::Value use_value, HostLocList desired_locations) { if (!use_value.IsImmediate()) { return UseScratchHostLocReg(use_value.GetInst(), desired_locations); diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index e854ddfe..219342bb 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -150,57 +150,6 @@ public: RegisterAddDef(inst, arg.value); } - /// Late-def - Xbyak::Reg64 DefGpr(IR::Inst* def_inst, HostLocList desired_locations = any_gpr) { - HostLoc location = ScratchHostLocReg(desired_locations); - DefineValue(def_inst, location); - return HostLocToReg64(location); - } - Xbyak::Xmm DefXmm(IR::Inst* def_inst, HostLocList desired_locations = any_xmm) { - HostLoc location = ScratchHostLocReg(desired_locations); - DefineValue(def_inst, location); - return HostLocToXmm(location); - } - void RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst); - /// Early-use, Late-def - Xbyak::Reg64 UseDefGpr(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr) { - HostLoc location = UseScratchHostLocReg(use_value, desired_locations); - DefineValue(def_inst, location); - return HostLocToReg64(location); - } - Xbyak::Xmm UseDefXmm(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_xmm) { - HostLoc location = UseScratchHostLocReg(use_value, desired_locations); - DefineValue(def_inst, location); - return HostLocToXmm(location); - } - std::tuple UseDefOpArgGpr(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr) { - OpArg op; - HostLoc host_loc; - std::tie(op, host_loc) = UseDefOpArgHostLocReg(use_value, def_inst, desired_locations); - return std::make_tuple(op, HostLocToReg64(host_loc)); - } - std::tuple UseDefOpArgXmm(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_xmm) { - OpArg op; - HostLoc host_loc; - std::tie(op, host_loc) = UseDefOpArgHostLocReg(use_value, def_inst, desired_locations); - return std::make_tuple(op, HostLocToXmm(host_loc)); - } - /// Early-use - Xbyak::Reg64 UseGpr(IR::Value use_value, HostLocList desired_locations = any_gpr) { - return HostLocToReg64(UseHostLocReg(use_value, desired_locations)); - } - Xbyak::Xmm UseXmm(IR::Value use_value, HostLocList desired_locations = any_xmm) { - return HostLocToXmm(UseHostLocReg(use_value, desired_locations)); - } - OpArg UseOpArg(IR::Value use_value, HostLocList desired_locations); - /// Early-use, Destroyed - Xbyak::Reg64 UseScratchGpr(IR::Value use_value, HostLocList desired_locations = any_gpr) { - return HostLocToReg64(UseScratchHostLocReg(use_value, desired_locations)); - } - Xbyak::Xmm UseScratchXmm(IR::Value use_value, HostLocList desired_locations = any_xmm) { - return HostLocToXmm(UseScratchHostLocReg(use_value, desired_locations)); - } - /// Early-def, Late-use, single-use Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) { return HostLocToReg64(ScratchHostLocReg(desired_locations)); } @@ -226,8 +175,8 @@ private: boost::optional ValueLocation(const IR::Inst* value) const; void DefineValue(IR::Inst* def_inst, HostLoc host_loc); + void RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst); - std::tuple UseDefOpArgHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations); HostLoc UseHostLocReg(IR::Value use_value, HostLocList desired_locations); HostLoc UseHostLocReg(IR::Inst* use_inst, HostLocList desired_locations); HostLoc UseScratchHostLocReg(IR::Value use_value, HostLocList desired_locations); From 51fc9fec05ae18140f19e96fab77a286dba33119 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 26 Feb 2017 22:28:32 +0000 Subject: [PATCH 13/16] reg_alloc: Reorganize --- src/backend_x64/reg_alloc.cpp | 212 +++++++++++++++++++++------------- src/backend_x64/reg_alloc.h | 93 ++++----------- 2 files changed, 155 insertions(+), 150 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 8794ce3b..011803de 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -110,7 +110,7 @@ bool Argument::IsInMemory() const { } std::array RegAlloc::GetArgumentInfo(IR::Inst* inst) { - std::array ret = { Argument{*this}, Argument{*this}, Argument{*this}}; + std::array ret = { Argument{*this}, Argument{*this}, Argument{*this} }; for (size_t i = 0; i < inst->NumArgs(); i++) { IR::Value arg = inst->GetArg(i); ret[i].value = arg; @@ -118,33 +118,72 @@ std::array RegAlloc::GetArgumentInfo(IR::Inst* inst) { return ret; } -void RegAlloc::RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst) { - DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); - - if (use_inst.IsImmediate()) { - HostLoc location = ScratchHostLocReg(any_gpr); - DefineValue(def_inst, location); - LoadImmediateIntoHostLocReg(use_inst, location); - return; - } - - use_inst.GetInst()->DecrementRemainingUses(); - DEBUG_ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined"); - HostLoc location = *ValueLocation(use_inst.GetInst()); - DefineValue(def_inst, location); +Xbyak::Reg64 RegAlloc::UseGpr(Argument& arg) { + ASSERT(!arg.allocated); + arg.allocated = true; + return HostLocToReg64(UseImpl(arg.value, any_gpr)); } -HostLoc RegAlloc::UseHostLocReg(IR::Value use_value, HostLocList desired_locations) { - if (!use_value.IsImmediate()) { - return UseHostLocReg(use_value.GetInst(), desired_locations); - } - - return LoadImmediateIntoHostLocReg(use_value, ScratchHostLocReg(desired_locations)); +Xbyak::Xmm RegAlloc::UseXmm(Argument& arg) { + ASSERT(!arg.allocated); + arg.allocated = true; + return HostLocToXmm(UseImpl(arg.value, any_xmm)); } -HostLoc RegAlloc::UseHostLocReg(IR::Inst* use_inst, HostLocList desired_locations) { - use_inst->DecrementRemainingUses(); +OpArg RegAlloc::UseOpArg(Argument& arg) { + return UseGpr(arg); +} +void RegAlloc::Use(Argument& arg, HostLoc host_loc) { + ASSERT(!arg.allocated); + arg.allocated = true; + UseImpl(arg.value, {host_loc}); +} + +Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) { + ASSERT(!arg.allocated); + arg.allocated = true; + return HostLocToReg64(UseScratchImpl(arg.value, any_gpr)); +} + +Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) { + ASSERT(!arg.allocated); + arg.allocated = true; + return HostLocToXmm(UseScratchImpl(arg.value, any_xmm)); +} + +void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) { + ASSERT(!arg.allocated); + arg.allocated = true; + UseScratchImpl(arg.value, {host_loc}); +} + +void RegAlloc::DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) { + ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG); + HostLoc hostloc = static_cast(reg.getIdx() + static_cast(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX)); + DefineValueImpl(inst, hostloc); +} + +void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) { + ASSERT(!arg.allocated); + arg.allocated = true; + DefineValueImpl(inst, arg.value); +} + +Xbyak::Reg64 RegAlloc::ScratchGpr(HostLocList desired_locations) { + return HostLocToReg64(ScratchImpl(desired_locations)); +} + +Xbyak::Xmm RegAlloc::ScratchXmm(HostLocList desired_locations) { + return HostLocToXmm(ScratchImpl(desired_locations)); +} + +HostLoc RegAlloc::UseImpl(IR::Value use_value, HostLocList desired_locations) { + if (use_value.IsImmediate()) { + return LoadImmediate(use_value, ScratchImpl(desired_locations)); + } + + IR::Inst* use_inst = use_value.GetInst(); const HostLoc current_location = *ValueLocation(use_inst); const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end(); @@ -154,7 +193,7 @@ HostLoc RegAlloc::UseHostLocReg(IR::Inst* use_inst, HostLocList desired_location } if (LocInfo(current_location).IsLocked()) { - return UseScratchHostLocReg(use_inst, desired_locations); + return UseScratchImpl(use_value, desired_locations); } const HostLoc destination_location = SelectARegister(desired_locations); @@ -168,17 +207,12 @@ HostLoc RegAlloc::UseHostLocReg(IR::Inst* use_inst, HostLocList desired_location return destination_location; } -HostLoc RegAlloc::UseScratchHostLocReg(IR::Value use_value, HostLocList desired_locations) { - if (!use_value.IsImmediate()) { - return UseScratchHostLocReg(use_value.GetInst(), desired_locations); +HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, HostLocList desired_locations) { + if (use_value.IsImmediate()) { + return LoadImmediate(use_value, ScratchImpl(desired_locations)); } - return LoadImmediateIntoHostLocReg(use_value, ScratchHostLocReg(desired_locations)); -} - -HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_locations) { - use_inst->DecrementRemainingUses(); - + IR::Inst* use_inst = use_value.GetInst(); const HostLoc current_location = *ValueLocation(use_inst); const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end(); @@ -195,19 +229,19 @@ HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_l return destination_location; } -HostLoc RegAlloc::ScratchHostLocReg(HostLocList desired_locations) { +HostLoc RegAlloc::ScratchImpl(HostLocList desired_locations) { HostLoc location = SelectARegister(desired_locations); MoveOutOfTheWay(location); LocInfo(location).WriteLock(); return location; } -void RegAlloc::HostCall(IR::Inst* result_def, IR::Value arg0_use, IR::Value arg1_use, IR::Value arg2_use, IR::Value arg3_use) { +void RegAlloc::HostCall(IR::Inst* result_def, boost::optional arg0, boost::optional arg1, boost::optional arg2, boost::optional arg3) { constexpr size_t args_count = 4; constexpr std::array args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4 }; - const std::array args = {&arg0_use, &arg1_use, &arg2_use, &arg3_use}; + const std::array, args_count> args = { arg0, arg1, arg2, arg3 }; - const static std::vector other_caller_save = [args_hostloc](){ + const static std::vector other_caller_save = [args_hostloc]() { std::vector ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end()); for (auto hostloc : args_hostloc) @@ -216,27 +250,39 @@ void RegAlloc::HostCall(IR::Inst* result_def, IR::Value arg0_use, IR::Value arg1 return ret; }(); - // TODO: This works but almost certainly leads to suboptimal generated code. - + ScratchGpr({ABI_RETURN}); if (result_def) { - DefineValue(result_def, ScratchHostLocReg({ABI_RETURN})); - } else { - ScratchHostLocReg({ABI_RETURN}); + DefineValueImpl(result_def, ABI_RETURN); } for (size_t i = 0; i < args_count; i++) { - if (!args[i]->IsEmpty()) { - UseScratchHostLocReg(*args[i], {args_hostloc[i]}); - } else { - ScratchHostLocReg({args_hostloc[i]}); + if (args[i]) { + UseScratch(*args[i], args_hostloc[i]); + } + } + + for (size_t i = 0; i < args_count; i++) { + if (!args[i]) { + // TODO: Force spill + ScratchGpr({args_hostloc[i]}); } } for (HostLoc caller_saved : other_caller_save) { - ScratchHostLocReg({caller_saved}); + ScratchImpl({caller_saved}); } } +void RegAlloc::EndOfAllocScope() { + for (auto& iter : hostloc_info) { + iter.EndOfAllocScope(); + } +} + +void RegAlloc::AssertNoMoreUses() { + ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) { return i.IsEmpty(); })); +} + HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const { std::vector candidates = desired_locations; @@ -265,45 +311,28 @@ boost::optional RegAlloc::ValueLocation(const IR::Inst* value) const { return boost::none; } -void RegAlloc::DefineValue(IR::Inst* def_inst, HostLoc host_loc) { +void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) { DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); LocInfo(host_loc).AddValue(def_inst); } -void RegAlloc::SpillRegister(HostLoc loc) { - ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled"); - ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers"); - ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt"); +void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) { + DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); - HostLoc new_loc = FindFreeSpill(); - Move(new_loc, loc); -} - -HostLoc RegAlloc::FindFreeSpill() const { - for (size_t i = 0; i < SpillCount; i++) - if (LocInfo(HostLocSpill(i)).IsEmpty()) - return HostLocSpill(i); - - ASSERT_MSG(false, "All spill locations are full"); -} - -void RegAlloc::EndOfAllocScope() { - for (auto& iter : hostloc_info) { - iter.EndOfAllocScope(); + if (use_inst.IsImmediate()) { + HostLoc location = ScratchImpl(any_gpr); + DefineValueImpl(def_inst, location); + LoadImmediate(use_inst, location); + return; } + + use_inst.GetInst()->DecrementRemainingUses(); + DEBUG_ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined"); + HostLoc location = *ValueLocation(use_inst.GetInst()); + DefineValueImpl(def_inst, location); } -void RegAlloc::AssertNoMoreUses() { - if (!std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i){ return i.IsEmpty(); })) { - ASSERT_MSG(false, "bad"); - } -} - -void RegAlloc::Reset() { - hostloc_info.fill({}); -} - -HostLoc RegAlloc::LoadImmediateIntoHostLocReg(IR::Value imm, HostLoc host_loc) { +HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) { ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate"); Xbyak::Reg64 reg = HostLocToReg64(host_loc); @@ -360,5 +389,32 @@ void RegAlloc::MoveOutOfTheWay(HostLoc reg) { } } +void RegAlloc::SpillRegister(HostLoc loc) { + ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled"); + ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers"); + ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt"); + + HostLoc new_loc = FindFreeSpill(); + Move(new_loc, loc); +} + +HostLoc RegAlloc::FindFreeSpill() const { + for (size_t i = 0; i < SpillCount; i++) + if (LocInfo(HostLocSpill(i)).IsEmpty()) + return HostLocSpill(i); + + ASSERT_MSG(false, "All spill locations are full"); +} + +HostLocInfo& RegAlloc::LocInfo(HostLoc loc) { + DEBUG_ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); + return hostloc_info[static_cast(loc)]; +} + +const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const { + DEBUG_ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); + return hostloc_info[static_cast(loc)]; +} + } // namespace BackendX64 } // namespace Dynarmic diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index 219342bb..b4c7544c 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -104,61 +104,22 @@ public: std::array GetArgumentInfo(IR::Inst* inst); - Xbyak::Reg64 UseGpr(Argument& arg) { - ASSERT(!arg.allocated); - arg.allocated = true; - return HostLocToReg64(UseHostLocReg(arg.value, any_gpr)); - } - Xbyak::Xmm UseXmm(Argument& arg) { - ASSERT(!arg.allocated); - arg.allocated = true; - return HostLocToXmm(UseHostLocReg(arg.value, any_xmm)); - } - OpArg UseOpArg(Argument& arg) { - return UseGpr(arg); - } - void Use(Argument& arg, HostLoc host_loc) { - ASSERT(!arg.allocated); - arg.allocated = true; - UseHostLocReg(arg.value, {host_loc}); - } + Xbyak::Reg64 UseGpr(Argument& arg); + Xbyak::Xmm UseXmm(Argument& arg); + OpArg UseOpArg(Argument& arg); + void Use(Argument& arg, HostLoc host_loc); - Xbyak::Reg64 UseScratchGpr(Argument& arg) { - ASSERT(!arg.allocated); - arg.allocated = true; - return HostLocToReg64(UseScratchHostLocReg(arg.value, any_gpr)); - } - Xbyak::Xmm UseScratchXmm(Argument& arg) { - ASSERT(!arg.allocated); - arg.allocated = true; - return HostLocToXmm(UseScratchHostLocReg(arg.value, any_xmm)); - } - void UseScratch(Argument& arg, HostLoc host_loc) { - ASSERT(!arg.allocated); - arg.allocated = true; - UseScratchHostLocReg(arg.value, {host_loc}); - } + Xbyak::Reg64 UseScratchGpr(Argument& arg); + Xbyak::Xmm UseScratchXmm(Argument& arg); + void UseScratch(Argument& arg, HostLoc host_loc); - void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) { - ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG); - HostLoc hostloc = static_cast(reg.getIdx() + static_cast(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX)); - DefineValue(inst, hostloc); - } - void DefineValue(IR::Inst* inst, Argument& arg) { - ASSERT(!arg.allocated); - arg.allocated = true; - RegisterAddDef(inst, arg.value); - } + void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); + void DefineValue(IR::Inst* inst, Argument& arg); - Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) { - return HostLocToReg64(ScratchHostLocReg(desired_locations)); - } - Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) { - return HostLocToXmm(ScratchHostLocReg(desired_locations)); - } + Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr); + Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm); - /// Late-def for result register, Early-use for all arguments, Each value is placed into registers according to host ABI. - void HostCall(IR::Inst* result_def = nullptr, IR::Value arg0_use = {}, IR::Value arg1_use = {}, IR::Value arg2_use = {}, IR::Value arg3_use = {}); + void HostCall(IR::Inst* result_def = nullptr, boost::optional arg0 = {}, boost::optional arg1 = {}, boost::optional arg2 = {}, boost::optional arg3 = {}); // TODO: Values in host flags @@ -166,25 +127,21 @@ public: void AssertNoMoreUses(); - void Reset(); - private: friend struct Argument; HostLoc SelectARegister(HostLocList desired_locations) const; boost::optional ValueLocation(const IR::Inst* value) const; - void DefineValue(IR::Inst* def_inst, HostLoc host_loc); - void RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst); + HostLoc UseImpl(IR::Value use_value, HostLocList desired_locations); + HostLoc UseScratchImpl(IR::Value use_value, HostLocList desired_locations); + HostLoc ScratchImpl(HostLocList desired_locations); + void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc); + void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst); - HostLoc UseHostLocReg(IR::Value use_value, HostLocList desired_locations); - HostLoc UseHostLocReg(IR::Inst* use_inst, HostLocList desired_locations); - HostLoc UseScratchHostLocReg(IR::Value use_value, HostLocList desired_locations); - HostLoc UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_locations); - HostLoc ScratchHostLocReg(HostLocList desired_locations); - - HostLoc LoadImmediateIntoHostLocReg(IR::Value imm, HostLoc reg); + BlockOfCode* code = nullptr; + HostLoc LoadImmediate(IR::Value imm, HostLoc reg); void Move(HostLoc to, HostLoc from); void CopyToScratch(HostLoc to, HostLoc from); void Exchange(HostLoc a, HostLoc b); @@ -193,17 +150,9 @@ private: void SpillRegister(HostLoc loc); HostLoc FindFreeSpill() const; - BlockOfCode* code = nullptr; - std::array hostloc_info; - HostLocInfo& LocInfo(HostLoc loc) { - DEBUG_ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); - return hostloc_info[static_cast(loc)]; - } - const HostLocInfo& LocInfo(HostLoc loc) const { - DEBUG_ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); - return hostloc_info[static_cast(loc)]; - } + HostLocInfo& LocInfo(HostLoc loc); + const HostLocInfo& LocInfo(HostLoc loc) const; }; } // namespace BackendX64 From 184db36caf748afb8a41787437947994922df760 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 26 Feb 2017 22:57:12 +0000 Subject: [PATCH 14/16] reg_alloc: Call DecrementRemainingUses in only one place --- src/backend_x64/emit_x64.cpp | 16 ---------------- src/backend_x64/reg_alloc.cpp | 4 +++- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index b433391c..6fb70618 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -567,10 +567,6 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, IR::In // TODO: Consider using BMI2 instructions like SHLX when arm-in-host flags is implemented. if (!carry_inst) { - if (!carry_arg.IsImmediate()) { - inst->GetArg(2).GetInst()->DecrementRemainingUses(); - } - if (shift_arg.IsImmediate()) { Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); u8 shift = shift_arg.GetImmediateU8(); @@ -669,10 +665,6 @@ void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::I auto& carry_arg = args[2]; if (!carry_inst) { - if (!carry_arg.IsImmediate()) { - inst->GetArg(2).GetInst()->DecrementRemainingUses(); - } - if (shift_arg.IsImmediate()) { Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); u8 shift = shift_arg.GetImmediateU8(); @@ -788,10 +780,6 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR auto& carry_arg = args[2]; if (!carry_inst) { - if (!carry_arg.IsImmediate()) { - inst->GetArg(2).GetInst()->DecrementRemainingUses(); - } - if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); @@ -880,10 +868,6 @@ void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* i auto& carry_arg = args[2]; if (!carry_inst) { - if (!carry_arg.IsImmediate()) { - inst->GetArg(2).GetInst()->DecrementRemainingUses(); - } - if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 011803de..a79d843b 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -114,6 +114,9 @@ std::array RegAlloc::GetArgumentInfo(IR::Inst* inst) { for (size_t i = 0; i < inst->NumArgs(); i++) { IR::Value arg = inst->GetArg(i); ret[i].value = arg; + if (!arg.IsImmediate()) { + arg.GetInst()->DecrementRemainingUses(); + } } return ret; } @@ -326,7 +329,6 @@ void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) { return; } - use_inst.GetInst()->DecrementRemainingUses(); DEBUG_ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined"); HostLoc location = *ValueLocation(use_inst.GetInst()); DefineValueImpl(def_inst, location); From 135346eb2e24c0c29d19ba5489d58ea0c1d22dfe Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 26 Feb 2017 23:16:41 +0000 Subject: [PATCH 15/16] reg_alloc: Move implementations out of header --- src/backend_x64/reg_alloc.cpp | 47 +++++++++++++++++++++++++++++++++++ src/backend_x64/reg_alloc.h | 46 ++++++++-------------------------- 2 files changed, 57 insertions(+), 36 deletions(-) diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index a79d843b..7f256555 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -71,6 +71,53 @@ static void EmitExchange(BlockOfCode* code, HostLoc a, HostLoc b) { } } +bool HostLocInfo::IsLocked() const { + return is_being_used; +} + +bool HostLocInfo::IsEmpty() const { + return !is_being_used && values.empty(); +} + +bool HostLocInfo::IsLastUse() const { + return !is_being_used && std::all_of(values.begin(), values.end(), [](const auto& inst) { return !inst->HasUses(); }); +} + +bool HostLocInfo::ContainsValue(const IR::Inst* inst) const { + return std::find(values.begin(), values.end(), inst) != values.end(); +} + +void HostLocInfo::ReadLock() { + ASSERT(!is_scratch); + is_being_used = true; +} + +void HostLocInfo::WriteLock() { + ASSERT(!is_being_used); + is_being_used = true; + is_scratch = true; +} + +void HostLocInfo::AddValue(IR::Inst* inst) { + values.push_back(inst); +} + +void HostLocInfo::EndOfAllocScope() { + const auto to_erase = std::remove_if(values.begin(), values.end(), [](const auto& inst) { return !inst->HasUses(); }); + values.erase(to_erase, values.end()); + + is_being_used = false; + is_scratch = false; +} + +IR::Type Argument::GetType() const { + return value.GetType(); +} + +bool Argument::IsImmediate() const { + return value.IsImmediate(); +} + bool Argument::GetImmediateU1() const { return value.GetU1(); } diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index b4c7544c..6a54819f 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -26,40 +26,18 @@ class RegAlloc; struct HostLocInfo { public: - bool IsLocked() const { - return is_being_used; - } - bool IsEmpty() const { - return !is_being_used && values.empty(); - } - bool IsLastUse() const { - return !is_being_used && std::all_of(values.begin(), values.end(), [](const auto& inst) { return !inst->HasUses(); }); - } + bool IsLocked() const; + bool IsEmpty() const; + bool IsLastUse() const; - bool ContainsValue(const IR::Inst* inst) const { - return std::find(values.begin(), values.end(), inst) != values.end(); - } + bool ContainsValue(const IR::Inst* inst) const; - void ReadLock() { - ASSERT(!is_scratch); - is_being_used = true; - } - void WriteLock() { - ASSERT(!is_being_used); - is_being_used = true; - is_scratch = true; - } - void AddValue(IR::Inst* inst) { - values.push_back(inst); - } + void ReadLock(); + void WriteLock(); - void EndOfAllocScope() { - const auto to_erase = std::remove_if(values.begin(), values.end(), [](const auto& inst){ return !inst->HasUses(); }); - values.erase(to_erase, values.end()); + void AddValue(IR::Inst* inst); - is_being_used = false; - is_scratch = false; - } + void EndOfAllocScope(); private: std::vector values; @@ -69,12 +47,8 @@ private: struct Argument { public: - IR::Type GetType() const { - return value.GetType(); - } - bool IsImmediate() const { - return value.IsImmediate(); - } + IR::Type GetType() const; + bool IsImmediate() const; bool GetImmediateU1() const; u8 GetImmediateU8() const; From 92a01b0cd88b13895ff8056b9c435bf0eb6d7bfc Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 26 Feb 2017 23:27:41 +0000 Subject: [PATCH 16/16] Prefer ASSERT to DEBUG_ASSERT --- src/backend_x64/hostloc.cpp | 6 +++--- src/backend_x64/reg_alloc.cpp | 10 +++++----- src/frontend/ir/microinstruction.cpp | 20 ++++++++++---------- src/frontend/ir/value.cpp | 18 +++++++++--------- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/backend_x64/hostloc.cpp b/src/backend_x64/hostloc.cpp index 2093623a..6349e4a8 100644 --- a/src/backend_x64/hostloc.cpp +++ b/src/backend_x64/hostloc.cpp @@ -10,12 +10,12 @@ namespace Dynarmic { namespace BackendX64 { Xbyak::Reg64 HostLocToReg64(HostLoc loc) { - DEBUG_ASSERT(HostLocIsGPR(loc)); + ASSERT(HostLocIsGPR(loc)); return Xbyak::Reg64(static_cast(loc)); } Xbyak::Xmm HostLocToXmm(HostLoc loc) { - DEBUG_ASSERT(HostLocIsXMM(loc)); + ASSERT(HostLocIsXMM(loc)); return Xbyak::Xmm(static_cast(loc) - static_cast(HostLoc::XMM0)); } @@ -23,7 +23,7 @@ Xbyak::Address SpillToOpArg(HostLoc loc) { using namespace Xbyak::util; static_assert(std::is_same::value, "Spill must be u64"); - DEBUG_ASSERT(HostLocIsSpill(loc)); + ASSERT(HostLocIsSpill(loc)); size_t i = static_cast(loc) - static_cast(HostLoc::FirstSpill); return qword[r15 + offsetof(JitState, Spill) + i * sizeof(u64)]; diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 7f256555..3496f6cd 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -362,12 +362,12 @@ boost::optional RegAlloc::ValueLocation(const IR::Inst* value) const { } void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) { - DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); + ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); LocInfo(host_loc).AddValue(def_inst); } void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) { - DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); + ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined"); if (use_inst.IsImmediate()) { HostLoc location = ScratchImpl(any_gpr); @@ -376,7 +376,7 @@ void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) { return; } - DEBUG_ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined"); + ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined"); HostLoc location = *ValueLocation(use_inst.GetInst()); DefineValueImpl(def_inst, location); } @@ -456,12 +456,12 @@ HostLoc RegAlloc::FindFreeSpill() const { } HostLocInfo& RegAlloc::LocInfo(HostLoc loc) { - DEBUG_ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); + ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); return hostloc_info[static_cast(loc)]; } const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const { - DEBUG_ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); + ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); return hostloc_info[static_cast(loc)]; } diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index b5d89188..7ed7dc34 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -255,13 +255,13 @@ Inst* Inst::GetAssociatedPseudoOperation(Opcode opcode) { // This is faster than doing a search through the block. switch (opcode) { case IR::Opcode::GetCarryFromOp: - DEBUG_ASSERT(!carry_inst || carry_inst->GetOpcode() == Opcode::GetCarryFromOp); + ASSERT(!carry_inst || carry_inst->GetOpcode() == Opcode::GetCarryFromOp); return carry_inst; case IR::Opcode::GetOverflowFromOp: - DEBUG_ASSERT(!overflow_inst || overflow_inst->GetOpcode() == Opcode::GetOverflowFromOp); + ASSERT(!overflow_inst || overflow_inst->GetOpcode() == Opcode::GetOverflowFromOp); return overflow_inst; case IR::Opcode::GetGEFromOp: - DEBUG_ASSERT(!ge_inst || ge_inst->GetOpcode() == Opcode::GetGEFromOp); + ASSERT(!ge_inst || ge_inst->GetOpcode() == Opcode::GetGEFromOp); return ge_inst; default: break; @@ -278,15 +278,15 @@ Type Inst::GetType() const { } Value Inst::GetArg(size_t index) const { - DEBUG_ASSERT(index < GetNumArgsOf(op)); - DEBUG_ASSERT(!args[index].IsEmpty()); + ASSERT(index < GetNumArgsOf(op)); + ASSERT(!args[index].IsEmpty()); return args[index]; } void Inst::SetArg(size_t index, Value value) { - DEBUG_ASSERT(index < GetNumArgsOf(op)); - DEBUG_ASSERT(AreTypesCompatible(value.GetType(), GetArgTypeOf(op, index))); + ASSERT(index < GetNumArgsOf(op)); + ASSERT(AreTypesCompatible(value.GetType(), GetArgTypeOf(op, index))); if (!args[index].IsImmediate()) { UndoUse(args[index]); @@ -346,15 +346,15 @@ void Inst::UndoUse(const Value& value) { switch (op){ case Opcode::GetCarryFromOp: - DEBUG_ASSERT(value.GetInst()->carry_inst->GetOpcode() == Opcode::GetCarryFromOp); + ASSERT(value.GetInst()->carry_inst->GetOpcode() == Opcode::GetCarryFromOp); value.GetInst()->carry_inst = nullptr; break; case Opcode::GetOverflowFromOp: - DEBUG_ASSERT(value.GetInst()->overflow_inst->GetOpcode() == Opcode::GetOverflowFromOp); + ASSERT(value.GetInst()->overflow_inst->GetOpcode() == Opcode::GetOverflowFromOp); value.GetInst()->overflow_inst = nullptr; break; case Opcode::GetGEFromOp: - DEBUG_ASSERT(value.GetInst()->ge_inst->GetOpcode() == Opcode::GetGEFromOp); + ASSERT(value.GetInst()->ge_inst->GetOpcode() == Opcode::GetGEFromOp); value.GetInst()->ge_inst = nullptr; break; default: diff --git a/src/frontend/ir/value.cpp b/src/frontend/ir/value.cpp index 8ade5af2..50ea6e25 100644 --- a/src/frontend/ir/value.cpp +++ b/src/frontend/ir/value.cpp @@ -69,59 +69,59 @@ Type Value::GetType() const { } Arm::Reg Value::GetRegRef() const { - DEBUG_ASSERT(type == Type::RegRef); + ASSERT(type == Type::RegRef); return inner.imm_regref; } Arm::ExtReg Value::GetExtRegRef() const { - DEBUG_ASSERT(type == Type::ExtRegRef); + ASSERT(type == Type::ExtRegRef); return inner.imm_extregref; } Inst* Value::GetInst() const { - DEBUG_ASSERT(type == Type::Opaque); + ASSERT(type == Type::Opaque); return inner.inst; } bool Value::GetU1() const { if (type == Type::Opaque && inner.inst->GetOpcode() == Opcode::Identity) return inner.inst->GetArg(0).GetU1(); - DEBUG_ASSERT(type == Type::U1); + ASSERT(type == Type::U1); return inner.imm_u1; } u8 Value::GetU8() const { if (type == Type::Opaque && inner.inst->GetOpcode() == Opcode::Identity) return inner.inst->GetArg(0).GetU8(); - DEBUG_ASSERT(type == Type::U8); + ASSERT(type == Type::U8); return inner.imm_u8; } u16 Value::GetU16() const { if (type == Type::Opaque && inner.inst->GetOpcode() == Opcode::Identity) return inner.inst->GetArg(0).GetU16(); - DEBUG_ASSERT(type == Type::U16); + ASSERT(type == Type::U16); return inner.imm_u16; } u32 Value::GetU32() const { if (type == Type::Opaque && inner.inst->GetOpcode() == Opcode::Identity) return inner.inst->GetArg(0).GetU32(); - DEBUG_ASSERT(type == Type::U32); + ASSERT(type == Type::U32); return inner.imm_u32; } u64 Value::GetU64() const { if (type == Type::Opaque && inner.inst->GetOpcode() == Opcode::Identity) return inner.inst->GetArg(0).GetU64(); - DEBUG_ASSERT(type == Type::U64); + ASSERT(type == Type::U64); return inner.imm_u64; } std::array Value::GetCoprocInfo() const { if (type == Type::Opaque && inner.inst->GetOpcode() == Opcode::Identity) return inner.inst->GetArg(0).GetCoprocInfo(); - DEBUG_ASSERT(type == Type::CoprocInfo); + ASSERT(type == Type::CoprocInfo); return inner.imm_coproc; }