diff --git a/src/backend/x64/a32_emit_x64.cpp b/src/backend/x64/a32_emit_x64.cpp index d90c637d..78bbecba 100644 --- a/src/backend/x64/a32_emit_x64.cpp +++ b/src/backend/x64/a32_emit_x64.cpp @@ -51,6 +51,10 @@ static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) { const size_t index = static_cast(reg) - static_cast(A32::ExtReg::D0); return qword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u64) * index]; } + if (A32::IsQuadExtReg(reg)) { + const size_t index = static_cast(reg) - static_cast(A32::ExtReg::Q0); + return xword[r15 + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index]; + } ASSERT_FALSE("Should never happen."); } @@ -339,6 +343,19 @@ void A32EmitX64::EmitA32GetExtendedRegister64(A32EmitContext& ctx, IR::Inst* ins ctx.reg_alloc.DefineValue(inst, result); } +void A32EmitX64::EmitA32GetVector(A32EmitContext& ctx, IR::Inst* inst) { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg)); + + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + if (A32::IsDoubleExtReg(reg)) { + code.movsd(result, MJitStateExtReg(reg)); + } else { + code.movaps(result, MJitStateExtReg(reg)); + } + ctx.reg_alloc.DefineValue(inst, result); +} + void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); @@ -382,6 +399,19 @@ void A32EmitX64::EmitA32SetExtendedRegister64(A32EmitContext& ctx, IR::Inst* ins } } +void A32EmitX64::EmitA32SetVector(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg)); + + const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); + if (A32::IsDoubleExtReg(reg)) { + code.movsd(MJitStateExtReg(reg), to_store); + } else { + code.movaps(MJitStateExtReg(reg), to_store); + } +} + static u32 GetCpsrImpl(A32JitState* jit_state) { return jit_state->Cpsr(); } diff --git a/src/backend/x64/a32_jitstate.h b/src/backend/x64/a32_jitstate.h index 645f509e..f1b0d233 100644 --- a/src/backend/x64/a32_jitstate.h +++ b/src/backend/x64/a32_jitstate.h @@ -37,13 +37,13 @@ struct A32JitState { u32 Cpsr() const; void SetCpsr(u32 cpsr); - alignas(u64) std::array ExtReg{}; // Extension registers. + alignas(16) std::array ExtReg{}; // Extension registers. static constexpr size_t SpillCount = 64; - std::array Spill{}; // Spill. + alignas(16) std::array, SpillCount> spill{}; // Spill. static Xbyak::Address GetSpillLocationFromIndex(size_t i) { using namespace Xbyak::util; - return qword[r15 + offsetof(A32JitState, Spill) + i * sizeof(u64)]; + return xword[r15 + offsetof(A32JitState, spill) + i * sizeof(u64) * 2]; } // For internal use (See: BlockOfCode::RunCode) diff --git a/src/frontend/A32/ir_emitter.cpp b/src/frontend/A32/ir_emitter.cpp index 83ddff34..cc753889 100644 --- a/src/frontend/A32/ir_emitter.cpp +++ b/src/frontend/A32/ir_emitter.cpp @@ -41,6 +41,11 @@ IR::U32U64 IREmitter::GetExtendedRegister(ExtReg reg) { ASSERT_FALSE("Invalid reg."); } +IR::U128 IREmitter::GetVector(ExtReg reg) { + ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg)); + return Inst(Opcode::A32GetVector, IR::Value(reg)); +} + void IREmitter::SetRegister(const Reg reg, const IR::U32& value) { ASSERT(reg != A32::Reg::PC); Inst(Opcode::A32SetRegister, IR::Value(reg), value); @@ -56,6 +61,11 @@ void IREmitter::SetExtendedRegister(const ExtReg reg, const IR::U32U64& value) { } } +void IREmitter::SetVector(ExtReg reg, const IR::U128& value) { + ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg)); + Inst(Opcode::A32SetVector, IR::Value(reg), value); +} + void IREmitter::ALUWritePC(const IR::U32& value) { // This behaviour is ARM version-dependent. // The below implementation is for ARMv6k diff --git a/src/frontend/A32/ir_emitter.h b/src/frontend/A32/ir_emitter.h index ff625842..8500a6e5 100644 --- a/src/frontend/A32/ir_emitter.h +++ b/src/frontend/A32/ir_emitter.h @@ -33,8 +33,10 @@ public: IR::U32 GetRegister(Reg source_reg); IR::U32U64 GetExtendedRegister(ExtReg source_reg); + IR::U128 GetVector(ExtReg source_reg); void SetRegister(Reg dest_reg, const IR::U32& value); void SetExtendedRegister(ExtReg dest_reg, const IR::U32U64& value); + void SetVector(ExtReg dest_reg, const IR::U128& value); void ALUWritePC(const IR::U32& value); void BranchWritePC(const IR::U32& value); diff --git a/src/frontend/A32/types.cpp b/src/frontend/A32/types.cpp index 6751e6c0..203ffa50 100644 --- a/src/frontend/A32/types.cpp +++ b/src/frontend/A32/types.cpp @@ -38,6 +38,9 @@ const char* ExtRegToString(ExtReg reg) { "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", + + "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", + "q9", "q10", "q11", "q12", "q13", "q14", "q15", "q16", }; return reg_strs.at(static_cast(reg)); } diff --git a/src/frontend/A32/types.h b/src/frontend/A32/types.h index b47164a1..9437f6df 100644 --- a/src/frontend/A32/types.h +++ b/src/frontend/A32/types.h @@ -36,6 +36,8 @@ enum class ExtReg { D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, + Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, }; using RegList = u16; @@ -73,6 +75,10 @@ constexpr bool IsDoubleExtReg(ExtReg reg) { return reg >= ExtReg::D0 && reg <= ExtReg::D31; } +constexpr bool IsQuadExtReg(ExtReg reg) { + return reg >= ExtReg::Q0 && reg <= ExtReg::Q15; +} + inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); return static_cast(reg); @@ -87,6 +93,10 @@ inline size_t RegNumber(ExtReg reg) { return static_cast(reg) - static_cast(ExtReg::D0); } + if (IsQuadExtReg(reg)) { + return static_cast(reg) - static_cast(ExtReg::Q0); + } + ASSERT_FALSE("Invalid extended register"); } @@ -101,11 +111,16 @@ inline ExtReg operator+(ExtReg reg, size_t number) { const auto new_reg = static_cast(static_cast(reg) + number); ASSERT((IsSingleExtReg(reg) && IsSingleExtReg(new_reg)) || - (IsDoubleExtReg(reg) && IsDoubleExtReg(new_reg))); + (IsDoubleExtReg(reg) && IsDoubleExtReg(new_reg)) || + (IsQuadExtReg(reg) && IsQuadExtReg(new_reg))); return new_reg; } +inline ExtReg ToExtRegQ(size_t base, bool bit) { + return ExtReg::Q0 + ((base >> 1) + (bit ? 8 : 0)); +} + inline ExtReg ToExtRegD(size_t base, bool bit) { return ExtReg::D0 + (base + (bit ? 16 : 0)); } @@ -115,11 +130,11 @@ inline ExtReg ToExtRegS(size_t base, bool bit) { } inline ExtReg ToExtReg(bool sz, size_t base, bool bit) { - if (sz) { - return ToExtRegD(base, bit); - } else { - return ToExtRegS(base, bit); - } + return sz ? ToExtRegD(base, bit) : ToExtRegS(base, bit); +} + +inline ExtReg ToVector(bool Q, size_t base, bool bit) { + return Q ? ToExtRegQ(base, bit) : ToExtRegD(base, bit); } } // namespace Dynarmic::A32 diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 7aa002b0..40ac6cc6 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -198,6 +198,7 @@ bool Inst::ReadsFromCoreRegister() const { case Opcode::A32GetRegister: case Opcode::A32GetExtendedRegister32: case Opcode::A32GetExtendedRegister64: + case Opcode::A32GetVector: case Opcode::A64GetW: case Opcode::A64GetX: case Opcode::A64GetS: @@ -216,6 +217,7 @@ bool Inst::WritesToCoreRegister() const { case Opcode::A32SetRegister: case Opcode::A32SetExtendedRegister32: case Opcode::A32SetExtendedRegister64: + case Opcode::A32SetVector: case Opcode::A32BXWritePC: case Opcode::A64SetW: case Opcode::A64SetX: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 4a5ca362..29288348 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -9,9 +9,11 @@ A32OPC(SetCheckBit, Void, U1 A32OPC(GetRegister, U32, A32Reg ) A32OPC(GetExtendedRegister32, U32, A32ExtReg ) A32OPC(GetExtendedRegister64, U64, A32ExtReg ) +A32OPC(GetVector, U128, A32ExtReg ) A32OPC(SetRegister, Void, A32Reg, U32 ) A32OPC(SetExtendedRegister32, Void, A32ExtReg, U32 ) A32OPC(SetExtendedRegister64, Void, A32ExtReg, U64 ) +A32OPC(SetVector, Void, A32ExtReg, U128 ) A32OPC(GetCpsr, U32, ) A32OPC(SetCpsr, Void, U32 ) A32OPC(SetCpsrNZCV, Void, U32 ) diff --git a/src/ir_opt/a32_get_set_elimination_pass.cpp b/src/ir_opt/a32_get_set_elimination_pass.cpp index c4cce036..aa875cc4 100644 --- a/src/ir_opt/a32_get_set_elimination_pass.cpp +++ b/src/ir_opt/a32_get_set_elimination_pass.cpp @@ -23,8 +23,10 @@ void A32GetSetElimination(IR::Block& block) { Iterator last_set_instruction; }; std::array reg_info; - std::array ext_reg_singles_info; + std::array ext_reg_singles_info; std::array ext_reg_doubles_info; + std::array ext_reg_vector_double_info; + std::array ext_reg_vector_quad_info; struct CpsrInfo { RegisterInfo n; RegisterInfo z; @@ -75,10 +77,9 @@ void A32GetSetElimination(IR::Block& block) { const size_t reg_index = A32::RegNumber(reg); do_set(ext_reg_singles_info[reg_index], inst->GetArg(1), inst); - const size_t doubles_reg_index = reg_index / 2; - if (doubles_reg_index < ext_reg_doubles_info.size()) { - ext_reg_doubles_info[doubles_reg_index] = {}; - } + ext_reg_doubles_info[reg_index / 2] = {}; + ext_reg_vector_double_info[reg_index / 2] = {}; + ext_reg_vector_quad_info[reg_index / 4] = {}; break; } case IR::Opcode::A32GetExtendedRegister32: { @@ -86,10 +87,9 @@ void A32GetSetElimination(IR::Block& block) { const size_t reg_index = A32::RegNumber(reg); do_get(ext_reg_singles_info[reg_index], inst); - const size_t doubles_reg_index = reg_index / 2; - if (doubles_reg_index < ext_reg_doubles_info.size()) { - ext_reg_doubles_info[doubles_reg_index] = {}; - } + ext_reg_doubles_info[reg_index / 2] = {}; + ext_reg_vector_double_info[reg_index / 2] = {}; + ext_reg_vector_quad_info[reg_index / 4] = {}; break; } case IR::Opcode::A32SetExtendedRegister64: { @@ -97,11 +97,10 @@ void A32GetSetElimination(IR::Block& block) { const size_t reg_index = A32::RegNumber(reg); do_set(ext_reg_doubles_info[reg_index], inst->GetArg(1), inst); - const size_t singles_reg_index = reg_index * 2; - if (singles_reg_index < ext_reg_singles_info.size()) { - ext_reg_singles_info[singles_reg_index] = {}; - ext_reg_singles_info[singles_reg_index+1] = {}; - } + ext_reg_singles_info[reg_index * 2 + 0] = {}; + ext_reg_singles_info[reg_index * 2 + 1] = {}; + ext_reg_vector_double_info[reg_index] = {}; + ext_reg_vector_quad_info[reg_index / 2] = {}; break; } case IR::Opcode::A32GetExtendedRegister64: { @@ -109,10 +108,61 @@ void A32GetSetElimination(IR::Block& block) { const size_t reg_index = A32::RegNumber(reg); do_get(ext_reg_doubles_info[reg_index], inst); - const size_t singles_reg_index = reg_index * 2; - if (singles_reg_index < ext_reg_singles_info.size()) { - ext_reg_singles_info[singles_reg_index] = {}; - ext_reg_singles_info[singles_reg_index+1] = {}; + ext_reg_singles_info[reg_index * 2 + 0] = {}; + ext_reg_singles_info[reg_index * 2 + 1] = {}; + ext_reg_vector_double_info[reg_index] = {}; + ext_reg_vector_quad_info[reg_index / 2] = {}; + break; + } + case IR::Opcode::A32SetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + do_set(ext_reg_vector_double_info[reg_index], inst->GetArg(1), inst); + + ext_reg_singles_info[reg_index * 2 + 0] = {}; + ext_reg_singles_info[reg_index * 2 + 1] = {}; + ext_reg_doubles_info[reg_index] = {}; + ext_reg_vector_quad_info[reg_index / 2] = {}; + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + + do_set(ext_reg_vector_quad_info[reg_index], inst->GetArg(1), inst); + + ext_reg_singles_info[reg_index * 4 + 0] = {}; + ext_reg_singles_info[reg_index * 4 + 1] = {}; + ext_reg_singles_info[reg_index * 4 + 2] = {}; + ext_reg_singles_info[reg_index * 4 + 3] = {}; + ext_reg_doubles_info[reg_index * 2 + 0] = {}; + ext_reg_doubles_info[reg_index * 2 + 1] = {}; + ext_reg_vector_double_info[reg_index * 2 + 0] = {}; + ext_reg_vector_double_info[reg_index * 2 + 1] = {}; + } + break; + } + case IR::Opcode::A32GetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + do_get(ext_reg_vector_double_info[reg_index], inst); + + ext_reg_singles_info[reg_index * 2 + 0] = {}; + ext_reg_singles_info[reg_index * 2 + 1] = {}; + ext_reg_doubles_info[reg_index] = {}; + ext_reg_vector_quad_info[reg_index / 2] = {}; + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + + do_get(ext_reg_vector_quad_info[reg_index], inst); + + ext_reg_singles_info[reg_index * 4 + 0] = {}; + ext_reg_singles_info[reg_index * 4 + 1] = {}; + ext_reg_singles_info[reg_index * 4 + 2] = {}; + ext_reg_singles_info[reg_index * 4 + 3] = {}; + ext_reg_doubles_info[reg_index * 2 + 0] = {}; + ext_reg_doubles_info[reg_index * 2 + 1] = {}; + ext_reg_vector_double_info[reg_index * 2 + 0] = {}; + ext_reg_vector_double_info[reg_index * 2 + 1] = {}; } break; }