From 4414ec5bc821d6bd702efd9cd451dc3bd02432b5 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 2 Aug 2016 13:46:12 +0100 Subject: [PATCH] RegAlloc: Allow allocation of XMM registers --- src/backend_x64/emit_x64.cpp | 136 +++++++++++++++++----------------- src/backend_x64/jitstate.h | 2 +- src/backend_x64/reg_alloc.cpp | 135 +++++++++++++++++++++------------ src/backend_x64/reg_alloc.h | 74 ++++++++++++++---- src/frontend/ir/ir.h | 2 + 5 files changed, 218 insertions(+), 131 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 9d260a7a..c50b8d30 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -89,12 +89,12 @@ CodePtr EmitX64::Emit(const Arm::LocationDescriptor descriptor, Dynarmic::IR::Bl void EmitX64::EmitIdentity(IR::Block& block, IR::Inst* inst) { // TODO: Possible unnecessary mov here. - reg_alloc.UseDefRegister(inst->GetArg(0), inst); + reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); } void EmitX64::EmitGetRegister(IR::Block&, IR::Inst* inst) { Arm::Reg reg = inst->GetArg(0).GetRegRef(); - X64Reg result = reg_alloc.DefRegister(inst); + X64Reg result = reg_alloc.DefRegister(inst, any_gpr); code->MOV(32, R(result), MJitStateReg(reg)); } @@ -104,13 +104,13 @@ void EmitX64::EmitSetRegister(IR::Block&, IR::Inst* inst) { if (arg.IsImmediate()) { code->MOV(32, MJitStateReg(reg), Imm32(arg.GetU32())); } else { - X64Reg to_store = reg_alloc.UseRegister(arg.GetInst()); + X64Reg to_store = reg_alloc.UseRegister(arg.GetInst(), any_gpr); code->MOV(32, MJitStateReg(reg), R(to_store)); } } void EmitX64::EmitGetNFlag(IR::Block&, IR::Inst* inst) { - X64Reg result = reg_alloc.DefRegister(inst); + X64Reg result = reg_alloc.DefRegister(inst, any_gpr); code->MOV(32, R(result), MJitStateCpsr()); code->SHR(32, R(result), Imm8(31)); } @@ -126,7 +126,7 @@ void EmitX64::EmitSetNFlag(IR::Block&, IR::Inst* inst) { code->AND(32, MJitStateCpsr(), Imm32(~flag_mask)); } } else { - X64Reg to_store = reg_alloc.UseScratchRegister(arg.GetInst()); + X64Reg to_store = reg_alloc.UseScratchRegister(arg.GetInst(), any_gpr); code->SHL(32, R(to_store), Imm8(flag_bit)); code->AND(32, MJitStateCpsr(), Imm32(~flag_mask)); @@ -135,7 +135,7 @@ void EmitX64::EmitSetNFlag(IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetZFlag(IR::Block&, IR::Inst* inst) { - X64Reg result = reg_alloc.DefRegister(inst); + X64Reg result = reg_alloc.DefRegister(inst, any_gpr); code->MOV(32, R(result), MJitStateCpsr()); code->SHR(32, R(result), Imm8(30)); code->AND(32, R(result), Imm32(1)); @@ -152,7 +152,7 @@ void EmitX64::EmitSetZFlag(IR::Block&, IR::Inst* inst) { code->AND(32, MJitStateCpsr(), Imm32(~flag_mask)); } } else { - X64Reg to_store = reg_alloc.UseScratchRegister(arg.GetInst()); + X64Reg to_store = reg_alloc.UseScratchRegister(arg.GetInst(), any_gpr); code->SHL(32, R(to_store), Imm8(flag_bit)); code->AND(32, MJitStateCpsr(), Imm32(~flag_mask)); @@ -161,7 +161,7 @@ void EmitX64::EmitSetZFlag(IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetCFlag(IR::Block&, IR::Inst* inst) { - X64Reg result = reg_alloc.DefRegister(inst); + X64Reg result = reg_alloc.DefRegister(inst, any_gpr); code->MOV(32, R(result), MJitStateCpsr()); code->SHR(32, R(result), Imm8(29)); code->AND(32, R(result), Imm32(1)); @@ -178,7 +178,7 @@ void EmitX64::EmitSetCFlag(IR::Block&, IR::Inst* inst) { code->AND(32, MJitStateCpsr(), Imm32(~flag_mask)); } } else { - X64Reg to_store = reg_alloc.UseScratchRegister(arg.GetInst()); + X64Reg to_store = reg_alloc.UseScratchRegister(arg.GetInst(), any_gpr); code->SHL(32, R(to_store), Imm8(flag_bit)); code->AND(32, MJitStateCpsr(), Imm32(~flag_mask)); @@ -187,7 +187,7 @@ void EmitX64::EmitSetCFlag(IR::Block&, IR::Inst* inst) { } void EmitX64::EmitGetVFlag(IR::Block&, IR::Inst* inst) { - X64Reg result = reg_alloc.DefRegister(inst); + X64Reg result = reg_alloc.DefRegister(inst, any_gpr); code->MOV(32, R(result), MJitStateCpsr()); code->SHR(32, R(result), Imm8(28)); code->AND(32, R(result), Imm32(1)); @@ -204,7 +204,7 @@ void EmitX64::EmitSetVFlag(IR::Block&, IR::Inst* inst) { code->AND(32, MJitStateCpsr(), Imm32(~flag_mask)); } } else { - X64Reg to_store = reg_alloc.UseScratchRegister(arg.GetInst()); + X64Reg to_store = reg_alloc.UseScratchRegister(arg.GetInst(), any_gpr); code->SHL(32, R(to_store), Imm8(flag_bit)); code->AND(32, MJitStateCpsr(), Imm32(~flag_mask)); @@ -237,9 +237,9 @@ void EmitX64::EmitBXWritePC(IR::Block&, IR::Inst* inst) { code->AND(32, MJitStateCpsr(), Imm32(~T_bit)); } } else { - X64Reg new_pc = reg_alloc.UseScratchRegister(arg.GetInst()); - X64Reg tmp1 = reg_alloc.ScratchRegister(); - X64Reg tmp2 = reg_alloc.ScratchRegister(); + X64Reg new_pc = reg_alloc.UseScratchRegister(arg.GetInst(), any_gpr); + X64Reg tmp1 = reg_alloc.ScratchRegister(any_gpr); + X64Reg tmp2 = reg_alloc.ScratchRegister(any_gpr); code->MOV(32, R(tmp1), MJitStateCpsr()); code->MOV(32, R(tmp2), R(tmp1)); @@ -274,17 +274,17 @@ void EmitX64::EmitGetOverflowFromOp(IR::Block&, IR::Inst*) { void EmitX64::EmitLeastSignificantHalf(IR::Block&, IR::Inst* inst) { // TODO: Optimize - reg_alloc.UseDefRegister(inst->GetArg(0), inst); + reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); } void EmitX64::EmitLeastSignificantByte(IR::Block&, IR::Inst* inst) { // TODO: Optimize - reg_alloc.UseDefRegister(inst->GetArg(0), inst); + reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); } void EmitX64::EmitMostSignificantBit(IR::Block&, IR::Inst* inst) { - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); // TODO: Flag optimization @@ -292,7 +292,7 @@ void EmitX64::EmitMostSignificantBit(IR::Block&, IR::Inst* inst) { } void EmitX64::EmitIsZero(IR::Block&, IR::Inst* inst) { - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); // TODO: Flag optimization @@ -315,7 +315,7 @@ void EmitX64::EmitLogicalShiftLeft(IR::Block& block, IR::Inst* inst) { auto shift_arg = inst->GetArg(1); if (shift_arg.IsImmediate()) { - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); u8 shift = shift_arg.GetU8(); if (shift <= 31) { @@ -325,8 +325,8 @@ void EmitX64::EmitLogicalShiftLeft(IR::Block& block, IR::Inst* inst) { } } else { X64Reg shift = reg_alloc.UseRegister(shift_arg.GetInst(), {HostLoc::RCX}); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); - X64Reg zero = reg_alloc.ScratchRegister(); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); + X64Reg zero = reg_alloc.ScratchRegister(any_gpr); // The 32-bit x64 SHL instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. @@ -344,8 +344,8 @@ void EmitX64::EmitLogicalShiftLeft(IR::Block& block, IR::Inst* inst) { if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetU8(); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); - X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); + X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst, any_gpr); if (shift == 0) { // There is nothing more to do. @@ -363,8 +363,8 @@ void EmitX64::EmitLogicalShiftLeft(IR::Block& block, IR::Inst* inst) { } } else { X64Reg shift = reg_alloc.UseRegister(shift_arg.GetInst(), {HostLoc::RCX}); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); - X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); + X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst, any_gpr); // TODO: Optimize this. @@ -405,7 +405,7 @@ void EmitX64::EmitLogicalShiftRight(IR::Block& block, IR::Inst* inst) { auto shift_arg = inst->GetArg(1); if (shift_arg.IsImmediate()) { - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); u8 shift = shift_arg.GetU8(); if (shift <= 31) { @@ -415,8 +415,8 @@ void EmitX64::EmitLogicalShiftRight(IR::Block& block, IR::Inst* inst) { } } else { X64Reg shift = reg_alloc.UseRegister(shift_arg.GetInst(), {HostLoc::RCX}); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); - X64Reg zero = reg_alloc.ScratchRegister(); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); + X64Reg zero = reg_alloc.ScratchRegister(any_gpr); // The 32-bit x64 SHR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. @@ -434,8 +434,8 @@ void EmitX64::EmitLogicalShiftRight(IR::Block& block, IR::Inst* inst) { if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetU8(); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); - X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); + X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst, any_gpr); if (shift == 0) { // There is nothing more to do. @@ -452,8 +452,8 @@ void EmitX64::EmitLogicalShiftRight(IR::Block& block, IR::Inst* inst) { } } else { X64Reg shift = reg_alloc.UseRegister(shift_arg.GetInst(), {HostLoc::RCX}); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); - X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); + X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst, any_gpr); // TODO: Optimize this. @@ -498,13 +498,13 @@ void EmitX64::EmitArithmeticShiftRight(IR::Block& block, IR::Inst* inst) { if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetU8(); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); code->SAR(32, R(result), Imm8(shift < 31 ? shift : 31)); } else { X64Reg shift = reg_alloc.UseScratchRegister(shift_arg.GetInst(), {HostLoc::RCX}); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); - X64Reg const31 = reg_alloc.ScratchRegister(); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); + X64Reg const31 = reg_alloc.ScratchRegister(any_gpr); // The 32-bit x64 SAR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count. @@ -524,8 +524,8 @@ void EmitX64::EmitArithmeticShiftRight(IR::Block& block, IR::Inst* inst) { if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetU8(); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); - X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); + X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst, any_gpr); if (shift == 0) { // There is nothing more to do. @@ -539,8 +539,8 @@ void EmitX64::EmitArithmeticShiftRight(IR::Block& block, IR::Inst* inst) { } } else { X64Reg shift = reg_alloc.UseRegister(shift_arg.GetInst(), {HostLoc::RCX}); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); - X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); + X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst, any_gpr); // TODO: Optimize this. @@ -578,12 +578,12 @@ void EmitX64::EmitRotateRight(IR::Block& block, IR::Inst* inst) { if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetU8(); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); code->ROR(32, R(result), Imm8(shift & 0x1F)); } else { X64Reg shift = reg_alloc.UseRegister(shift_arg.GetInst(), {HostLoc::RCX}); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); // x64 ROR instruction does (shift & 0x1F) for us. code->ROR(32, R(result), R(shift)); @@ -596,8 +596,8 @@ void EmitX64::EmitRotateRight(IR::Block& block, IR::Inst* inst) { if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetU8(); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); - X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); + X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst, any_gpr); if (shift == 0) { // There is nothing more to do. @@ -610,8 +610,8 @@ void EmitX64::EmitRotateRight(IR::Block& block, IR::Inst* inst) { } } else { X64Reg shift = reg_alloc.UseScratchRegister(shift_arg.GetInst(), {HostLoc::RCX}); - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); - X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); + X64Reg carry = reg_alloc.UseDefRegister(inst->GetArg(2), carry_inst, any_gpr); // TODO: Optimize @@ -638,10 +638,10 @@ void EmitX64::EmitRotateRight(IR::Block& block, IR::Inst* inst) { static X64Reg DoCarry(RegAlloc& reg_alloc, const IR::Value& carry_in, IR::Inst* carry_out) { if (carry_in.IsImmediate()) { - return carry_out ? reg_alloc.DefRegister(carry_out) : INVALID_REG; + return carry_out ? reg_alloc.DefRegister(carry_out, any_gpr) : INVALID_REG; } else { IR::Inst* in = carry_in.GetInst(); - return carry_out ? reg_alloc.UseDefRegister(in, carry_out) : reg_alloc.UseRegister(in); + return carry_out ? reg_alloc.UseDefRegister(in, carry_out, any_gpr) : reg_alloc.UseRegister(in, any_gpr); } } @@ -653,15 +653,15 @@ void EmitX64::EmitAddWithCarry(IR::Block& block, IR::Inst* inst) { IR::Value b = inst->GetArg(1); IR::Value carry_in = inst->GetArg(2); - X64Reg result = reg_alloc.UseDefRegister(a, inst); + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_gpr); X64Reg carry = DoCarry(reg_alloc, carry_in, carry_inst); - X64Reg overflow = overflow_inst ? reg_alloc.DefRegister(overflow_inst) : INVALID_REG; + X64Reg overflow = overflow_inst ? reg_alloc.DefRegister(overflow_inst, any_gpr) : INVALID_REG; // TODO: Consider using LEA. OpArg op_arg = b.IsImmediate() ? Imm32(b.GetU32()) - : R(reg_alloc.UseRegister(b.GetInst())); + : R(reg_alloc.UseRegister(b.GetInst(), any_gpr)); if (carry_in.IsImmediate()) { if (carry_in.GetU1()) { @@ -695,9 +695,9 @@ void EmitX64::EmitSubWithCarry(IR::Block& block, IR::Inst* inst) { IR::Value b = inst->GetArg(1); IR::Value carry_in = inst->GetArg(2); - X64Reg result = reg_alloc.UseDefRegister(a, inst); + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_gpr); X64Reg carry = DoCarry(reg_alloc, carry_in, carry_inst); - X64Reg overflow = overflow_inst ? reg_alloc.DefRegister(overflow_inst) : INVALID_REG; + X64Reg overflow = overflow_inst ? reg_alloc.DefRegister(overflow_inst, any_gpr) : INVALID_REG; // TODO: Consider using LEA. // TODO: Optimize CMP case. @@ -705,7 +705,7 @@ void EmitX64::EmitSubWithCarry(IR::Block& block, IR::Inst* inst) { OpArg op_arg = b.IsImmediate() ? Imm32(b.GetU32()) - : R(reg_alloc.UseRegister(b.GetInst())); + : R(reg_alloc.UseRegister(b.GetInst(), any_gpr)); if (carry_in.IsImmediate()) { if (carry_in.GetU1()) { @@ -736,10 +736,10 @@ void EmitX64::EmitAnd(IR::Block&, IR::Inst* inst) { IR::Value a = inst->GetArg(0); IR::Value b = inst->GetArg(1); - X64Reg result = reg_alloc.UseDefRegister(a, inst); + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_gpr); OpArg op_arg = b.IsImmediate() ? Imm32(b.GetU32()) - : R(reg_alloc.UseRegister(b.GetInst())); + : R(reg_alloc.UseRegister(b.GetInst(), any_gpr)); code->AND(32, R(result), op_arg); } @@ -748,10 +748,10 @@ void EmitX64::EmitEor(IR::Block&, IR::Inst* inst) { IR::Value a = inst->GetArg(0); IR::Value b = inst->GetArg(1); - X64Reg result = reg_alloc.UseDefRegister(a, inst); + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_gpr); OpArg op_arg = b.IsImmediate() ? Imm32(b.GetU32()) - : R(reg_alloc.UseRegister(b.GetInst())); + : R(reg_alloc.UseRegister(b.GetInst(), any_gpr)); code->XOR(32, R(result), op_arg); } @@ -760,10 +760,10 @@ void EmitX64::EmitOr(IR::Block&, IR::Inst* inst) { IR::Value a = inst->GetArg(0); IR::Value b = inst->GetArg(1); - X64Reg result = reg_alloc.UseDefRegister(a, inst); + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_gpr); OpArg op_arg = b.IsImmediate() ? Imm32(b.GetU32()) - : R(reg_alloc.UseRegister(b.GetInst())); + : R(reg_alloc.UseRegister(b.GetInst(), any_gpr)); code->OR(32, R(result), op_arg); } @@ -772,11 +772,11 @@ void EmitX64::EmitNot(IR::Block&, IR::Inst* inst) { IR::Value a = inst->GetArg(0); if (a.IsImmediate()) { - X64Reg result = reg_alloc.DefRegister(inst); + X64Reg result = reg_alloc.DefRegister(inst, any_gpr); code->MOV(32, R(result), Imm32(~a.GetU32())); } else { - X64Reg result = reg_alloc.UseDefRegister(a.GetInst(), inst); + X64Reg result = reg_alloc.UseDefRegister(a.GetInst(), inst, any_gpr); code->NOT(32, R(result)); } @@ -784,46 +784,46 @@ void EmitX64::EmitNot(IR::Block&, IR::Inst* inst) { void EmitX64::EmitSignExtendHalfToWord(IR::Block&, IR::Inst* inst) { // TODO: Remove unnecessary mov that may occur here - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); code->MOVSX(32, 16, result, R(result)); } void EmitX64::EmitSignExtendByteToWord(IR::Block&, IR::Inst* inst) { // TODO: Remove unnecessary mov that may occur here - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); code->MOVSX(32, 8, result, R(result)); } void EmitX64::EmitZeroExtendHalfToWord(IR::Block&, IR::Inst* inst) { // TODO: Remove unnecessary mov that may occur here - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); code->MOVZX(32, 16, result, R(result)); } void EmitX64::EmitZeroExtendByteToWord(IR::Block&, IR::Inst* inst) { // TODO: Remove unnecessary mov that may occur here - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); code->MOVZX(32, 8, result, R(result)); } void EmitX64::EmitByteReverseWord(IR::Block&, IR::Inst* inst) { - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); code->BSWAP(32, result); } void EmitX64::EmitByteReverseHalf(IR::Block&, IR::Inst* inst) { - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); code->ROL(16, R(result), Imm8(8)); } void EmitX64::EmitByteReverseDual(IR::Block&, IR::Inst* inst) { - X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst); + X64Reg result = reg_alloc.UseDefRegister(inst->GetArg(0), inst, any_gpr); code->BSWAP(64, result); } diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h index 4f7cc893..89c1f8a9 100644 --- a/src/backend_x64/jitstate.h +++ b/src/backend_x64/jitstate.h @@ -21,7 +21,7 @@ struct JitState { u32 Fpscr = 0; - std::array Spill{}; // Spill. + std::array Spill{}; // Spill. // For internal use (See: Routines::RunCode) u64 save_host_RSP = 0; diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 971d237f..90dfe6d7 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -21,13 +21,14 @@ static Gen::X64Reg HostLocToX64(HostLoc loc) { } static Gen::OpArg SpillToOpArg(HostLoc loc) { + static_assert(std::is_same::value, "Spill must be u64"); DEBUG_ASSERT(HostLocIsSpill(loc)); size_t i = static_cast(loc) - static_cast(HostLoc::FirstSpill); - return Gen::MDisp(Gen::R15, static_cast(offsetof(JitState, Spill) + i * sizeof(u32))); + return Gen::MDisp(Gen::R15, static_cast(offsetof(JitState, Spill) + i * sizeof(u64))); } -Gen::X64Reg RegAlloc::DefRegister(IR::Inst* def_inst, std::initializer_list desired_locations) { +Gen::X64Reg RegAlloc::DefRegister(IR::Inst* def_inst, HostLocList desired_locations) { DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); DEBUG_ASSERT_MSG(ValueLocations(def_inst).empty(), "def_inst has already been defined"); @@ -38,13 +39,11 @@ Gen::X64Reg RegAlloc::DefRegister(IR::Inst* def_inst, std::initializer_list(location)] = HostLocState::Def; - hostloc_to_inst[static_cast(location)] = def_inst; - + LocInfo(location) = {def_inst, HostLocState::Def}; return HostLocToX64(location); } -Gen::X64Reg RegAlloc::UseDefRegister(IR::Value use_value, IR::Inst* def_inst, std::initializer_list desired_locations) { +Gen::X64Reg RegAlloc::UseDefRegister(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) { if (!use_value.IsImmediate()) { return UseDefRegister(use_value.GetInst(), def_inst, desired_locations); } @@ -52,19 +51,19 @@ Gen::X64Reg RegAlloc::UseDefRegister(IR::Value use_value, IR::Inst* def_inst, st return LoadImmediateIntoRegister(use_value, DefRegister(def_inst, desired_locations)); } -Gen::X64Reg RegAlloc::UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, std::initializer_list desired_locations) { +Gen::X64Reg RegAlloc::UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations) { DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); DEBUG_ASSERT_MSG(ValueLocations(def_inst).empty(), "def_inst has already been defined"); DEBUG_ASSERT_MSG(!ValueLocations(use_inst).empty(), "use_inst has not been defined"); // TODO: Optimize the case when this is the last use_inst use. - Gen::X64Reg use_reg = UseRegister(use_inst); + Gen::X64Reg use_reg = UseRegister(use_inst, any_gpr); Gen::X64Reg def_reg = DefRegister(def_inst, desired_locations); - code->MOV(32, Gen::R(def_reg), Gen::R(use_reg)); + code->MOV(64, Gen::R(def_reg), Gen::R(use_reg)); return def_reg; } -Gen::X64Reg RegAlloc::UseRegister(IR::Value use_value, std::initializer_list desired_locations) { +Gen::X64Reg RegAlloc::UseRegister(IR::Value use_value, HostLocList desired_locations) { if (!use_value.IsImmediate()) { return UseRegister(use_value.GetInst(), desired_locations); } @@ -72,17 +71,18 @@ Gen::X64Reg RegAlloc::UseRegister(IR::Value use_value, std::initializer_list desired_locations) { +Gen::X64Reg RegAlloc::UseRegister(IR::Inst* use_inst, HostLocList desired_locations) { DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); DEBUG_ASSERT_MSG(!ValueLocations(use_inst).empty(), "use_inst has not been defined"); HostLoc current_location = ValueLocations(use_inst).front(); auto iter = std::find(desired_locations.begin(), desired_locations.end(), current_location); if (iter != desired_locations.end()) { - ASSERT(hostloc_state[static_cast(current_location)] == HostLocState::Idle || hostloc_state[static_cast(current_location)] == HostLocState::Use); + ASSERT(LocInfo(current_location).state == HostLocState::Idle || + LocInfo(current_location).state == HostLocState::Use); // Update state - hostloc_state[static_cast(current_location)] = HostLocState::Use; + LocInfo(current_location).state = HostLocState::Use; DecrementRemainingUses(use_inst); return HostLocToX64(current_location); @@ -95,18 +95,19 @@ Gen::X64Reg RegAlloc::UseRegister(IR::Inst* use_inst, std::initializer_listMOV(32, Gen::R(HostLocToX64(new_location)), SpillToOpArg(current_location)); + EmitMove(new_location, current_location); - hostloc_state[static_cast(new_location)] = HostLocState::Use; - std::swap(hostloc_to_inst[static_cast(new_location)], hostloc_to_inst[static_cast(current_location)]); + LocInfo(new_location) = LocInfo(current_location); + LocInfo(new_location).state = HostLocState::Use; + LocInfo(current_location) = {}; DecrementRemainingUses(use_inst); } else if (HostLocIsRegister(current_location)) { - ASSERT(hostloc_state[static_cast(current_location)] == HostLocState::Idle); + ASSERT(LocInfo(current_location).state == HostLocState::Idle); - code->XCHG(32, Gen::R(HostLocToX64(new_location)), Gen::R(HostLocToX64(current_location))); + EmitExchange(new_location, current_location); - hostloc_state[static_cast(new_location)] = HostLocState::Use; - std::swap(hostloc_to_inst[static_cast(new_location)], hostloc_to_inst[static_cast(current_location)]); + std::swap(LocInfo(new_location), LocInfo(current_location)); + LocInfo(new_location).state = HostLocState::Use; DecrementRemainingUses(use_inst); } else { ASSERT_MSG(0, "Invalid current_location"); @@ -115,7 +116,7 @@ Gen::X64Reg RegAlloc::UseRegister(IR::Inst* use_inst, std::initializer_list desired_locations) { +Gen::X64Reg RegAlloc::UseScratchRegister(IR::Value use_value, HostLocList desired_locations) { if (!use_value.IsImmediate()) { return UseScratchRegister(use_value.GetInst(), desired_locations); } @@ -123,7 +124,7 @@ Gen::X64Reg RegAlloc::UseScratchRegister(IR::Value use_value, std::initializer_l return LoadImmediateIntoRegister(use_value, ScratchRegister(desired_locations)); } -Gen::X64Reg RegAlloc::UseScratchRegister(IR::Inst* use_inst, std::initializer_list desired_locations) { +Gen::X64Reg RegAlloc::UseScratchRegister(IR::Inst* use_inst, HostLocList desired_locations) { DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); DEBUG_ASSERT_MSG(!ValueLocations(use_inst).empty(), "use_inst has not been defined"); ASSERT_MSG(use_inst->use_count != 0, "use_inst ran out of uses. (Use-d an IR::Inst* too many times)"); @@ -136,23 +137,23 @@ Gen::X64Reg RegAlloc::UseScratchRegister(IR::Inst* use_inst, std::initializer_li SpillRegister(new_location); } - code->MOV(32, Gen::R(HostLocToX64(new_location)), SpillToOpArg(current_location)); + EmitMove(new_location, current_location); - hostloc_state[static_cast(new_location)] = HostLocState::Scratch; + LocInfo(new_location).state = HostLocState::Scratch; DecrementRemainingUses(use_inst); } else if (HostLocIsRegister(current_location)) { - ASSERT(hostloc_state[static_cast(current_location)] == HostLocState::Idle); + ASSERT(LocInfo(current_location).state == HostLocState::Idle); if (IsRegisterOccupied(new_location)) { SpillRegister(new_location); if (current_location != new_location) { - code->MOV(32, Gen::R(HostLocToX64(new_location)), Gen::R(HostLocToX64(current_location))); + EmitMove(new_location, current_location); } } else { - code->MOV(32, Gen::R(HostLocToX64(new_location)), Gen::R(HostLocToX64(current_location))); + EmitMove(new_location, current_location); } - hostloc_state[static_cast(new_location)] = HostLocState::Scratch; + LocInfo(new_location).state = HostLocState::Scratch; DecrementRemainingUses(use_inst); } else { ASSERT_MSG(0, "Invalid current_location"); @@ -161,8 +162,7 @@ Gen::X64Reg RegAlloc::UseScratchRegister(IR::Inst* use_inst, std::initializer_li return HostLocToX64(new_location); } - -Gen::X64Reg RegAlloc::ScratchRegister(std::initializer_list desired_locations) { +Gen::X64Reg RegAlloc::ScratchRegister(HostLocList desired_locations) { DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister)); HostLoc location = SelectARegister(desired_locations); @@ -172,7 +172,7 @@ Gen::X64Reg RegAlloc::ScratchRegister(std::initializer_list desired_loc } // Update state - hostloc_state[static_cast(location)] = HostLocState::Scratch; + LocInfo(location).state = HostLocState::Scratch; return HostLocToX64(location); } @@ -235,7 +235,7 @@ void RegAlloc::HostCall(IR::Inst* result_def, IR::Value arg0_use, IR::Value arg1 code->MOV(64, Gen::R(Gen::RSP), Gen::MDisp(Gen::R15, offsetof(JitState, save_host_RSP))); } -HostLoc RegAlloc::SelectARegister(std::initializer_list desired_locations) const { +HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const { std::vector candidates = desired_locations; // Find all locations that have not been allocated.. @@ -259,32 +259,32 @@ std::vector RegAlloc::ValueLocations(IR::Inst* value) const { std::vector locations; for (size_t i = 0; i < HostLocCount; i++) - if (hostloc_to_inst[i] == value) + if (hostloc_info[i].value == value) locations.emplace_back(static_cast(i)); return locations; } bool RegAlloc::IsRegisterOccupied(HostLoc loc) const { - return hostloc_to_inst.at(static_cast(loc)) != nullptr; + return GetLocInfo(loc).value != nullptr; } bool RegAlloc::IsRegisterAllocated(HostLoc loc) const { - return hostloc_state.at(static_cast(loc)) != HostLocState::Idle; + return GetLocInfo(loc).state != HostLocState::Idle; } void RegAlloc::SpillRegister(HostLoc loc) { ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled"); - ASSERT_MSG(hostloc_state[static_cast(loc)] == HostLocState::Idle, "Allocated registers cannot be spilled"); + ASSERT_MSG(LocInfo(loc).state == HostLocState::Idle, "Allocated registers cannot be spilled"); ASSERT_MSG(IsRegisterOccupied(loc), "There is no need to spill unoccupied registers"); ASSERT_MSG(!IsRegisterAllocated(loc), "Registers that have been allocated must not be spilt"); HostLoc new_loc = FindFreeSpill(); - code->MOV(32, SpillToOpArg(new_loc), Gen::R(HostLocToX64(loc))); + EmitMove(new_loc, loc); - hostloc_to_inst[static_cast(new_loc)] = hostloc_to_inst[static_cast(loc)]; - hostloc_to_inst[static_cast(loc)] = nullptr; + LocInfo(new_loc).value = LocInfo(loc).value; + LocInfo(loc).value = nullptr; } HostLoc RegAlloc::FindFreeSpill() const { @@ -296,11 +296,11 @@ HostLoc RegAlloc::FindFreeSpill() const { } void RegAlloc::EndOfAllocScope() { - hostloc_state.fill(HostLocState::Idle); - - for (auto& iter : hostloc_to_inst) - if (iter && iter->use_count == 0) - iter = nullptr; + for (auto& iter : hostloc_info) { + iter.state = HostLocState::Idle; + if (iter.value && iter.value->use_count == 0) + iter.value = nullptr; + } } void RegAlloc::DecrementRemainingUses(IR::Inst* value) { @@ -309,12 +309,53 @@ void RegAlloc::DecrementRemainingUses(IR::Inst* value) { } void RegAlloc::AssertNoMoreUses() { - ASSERT(std::all_of(hostloc_to_inst.begin(), hostloc_to_inst.end(), [](const auto& inst){ return !inst; })); + ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i){ return !i.value; })); } void RegAlloc::Reset() { - hostloc_to_inst.fill(nullptr); - hostloc_state.fill(HostLocState::Idle); + hostloc_info.fill({}); +} + +void RegAlloc::EmitMove(HostLoc to, HostLoc from) { + const auto& from_info = LocInfo(from); + + if (HostLocIsXMM(to) && HostLocIsSpill(from)) { + if (from_info.GetType() == IR::Type::F64) { + code->MOVSD(HostLocToX64(to), SpillToOpArg(from)); + } else if (from_info.GetType() == IR::Type::F32) { + code->MOVSS(HostLocToX64(to), SpillToOpArg(from)); + } else { + ASSERT_MSG(false, "Tried to move a non-fp value into an XMM register"); + } + } else if (HostLocIsSpill(to) && HostLocIsXMM(from)) { + if (from_info.GetType() == IR::Type::F64) { + code->MOVSD(SpillToOpArg(to), HostLocToX64(from)); + } else if (from_info.GetType() == IR::Type::F32) { + code->MOVSS(SpillToOpArg(to), HostLocToX64(from)); + } else { + ASSERT_MSG(false, "Tried to move a non-fp value into an XMM register"); + } + } else if (HostLocIsXMM(to) && HostLocIsXMM(from)) { + code->MOVAPS(HostLocToX64(to), Gen::R(HostLocToX64(from))); + } else if (HostLocIsGPR(to) && HostLocIsSpill(from)) { + code->MOV(64, Gen::R(HostLocToX64(to)), SpillToOpArg(from)); + } else if (HostLocIsSpill(to) && HostLocIsGPR(from)) { + code->MOV(64, SpillToOpArg(to), Gen::R(HostLocToX64(from))); + } else if (HostLocIsGPR(to) && HostLocIsGPR(from)){ + code->MOV(64, Gen::R(HostLocToX64(to)), Gen::R(HostLocToX64(from))); + } else { + ASSERT_MSG(false, "Invalid RegAlloc::EmitMove"); + } +} + +void RegAlloc::EmitExchange(HostLoc a, HostLoc b) { + if (HostLocIsGPR(a) && HostLocIsGPR(b)) { + code->XCHG(64, Gen::R(HostLocToX64(a)), Gen::R(HostLocToX64(b))); + } else if (HostLocIsXMM(a) && HostLocIsXMM(b)) { + ASSERT_MSG(false, "Exchange is unnecessary for XMM registers"); + } else { + ASSERT_MSG(false, "Invalid RegAlloc::EmitExchange"); + } } } // namespace BackendX64 diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index bf4f58eb..1ed93aa5 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -19,6 +19,8 @@ namespace BackendX64 { enum class HostLoc { // Ordering of the registers is intentional. See also: HostLocToX64. RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, CF, PF, AF, ZF, SF, OF, FirstSpill, }; @@ -29,10 +31,18 @@ enum class HostLocState { Idle, Def, Use, Scratch }; -inline bool HostLocIsRegister(HostLoc reg) { +inline bool HostLocIsGPR(HostLoc reg) { return reg >= HostLoc::RAX && reg <= HostLoc::R14; } +inline bool HostLocIsXMM(HostLoc reg) { + return reg >= HostLoc::XMM0 && reg <= HostLoc::XMM15; +} + +inline bool HostLocIsRegister(HostLoc reg) { + return HostLocIsGPR(reg) || HostLocIsXMM(reg); +} + inline bool HostLocIsFlag(HostLoc reg) { return reg >= HostLoc::CF && reg <= HostLoc::OF; } @@ -46,7 +56,9 @@ inline bool HostLocIsSpill(HostLoc reg) { return reg >= HostLoc::FirstSpill && reg <= HostLocSpill(SpillCount - 1); } -const std::initializer_list hostloc_any_register = { +using HostLocList = std::initializer_list; + +const HostLocList any_gpr = { HostLoc::RAX, HostLoc::RBX, HostLoc::RCX, @@ -61,7 +73,26 @@ const std::initializer_list hostloc_any_register = { HostLoc::R11, HostLoc::R12, HostLoc::R13, - HostLoc::R14 + HostLoc::R14, +}; + +const HostLocList any_xmm = { + HostLoc::XMM0, + HostLoc::XMM1, + HostLoc::XMM2, + HostLoc::XMM3, + HostLoc::XMM4, + HostLoc::XMM5, + HostLoc::XMM6, + HostLoc::XMM7, + HostLoc::XMM8, + HostLoc::XMM9, + HostLoc::XMM10, + HostLoc::XMM11, + HostLoc::XMM12, + HostLoc::XMM13, + HostLoc::XMM14, + HostLoc::XMM15, }; class RegAlloc final { @@ -69,18 +100,18 @@ public: RegAlloc(Gen::XEmitter* code) : code(code) {} /// Late-def - Gen::X64Reg DefRegister(IR::Inst* def_inst, std::initializer_list desired_locations = hostloc_any_register); + Gen::X64Reg DefRegister(IR::Inst* def_inst, HostLocList desired_locations); /// Early-use, Late-def - Gen::X64Reg UseDefRegister(IR::Value use_value, IR::Inst* def_inst, std::initializer_list desired_locations = hostloc_any_register); - Gen::X64Reg UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, std::initializer_list desired_locations = hostloc_any_register); + Gen::X64Reg UseDefRegister(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations); + Gen::X64Reg UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations); /// Early-use - Gen::X64Reg UseRegister(IR::Value use_value, std::initializer_list desired_locations = hostloc_any_register); - Gen::X64Reg UseRegister(IR::Inst* use_inst, std::initializer_list desired_locations = hostloc_any_register); + Gen::X64Reg UseRegister(IR::Value use_value, HostLocList desired_locations); + Gen::X64Reg UseRegister(IR::Inst* use_inst, HostLocList desired_locations); /// Early-use, Destroyed - Gen::X64Reg UseScratchRegister(IR::Value use_value, std::initializer_list desired_locations = hostloc_any_register); - Gen::X64Reg UseScratchRegister(IR::Inst* use_inst, std::initializer_list desired_locations = hostloc_any_register); + Gen::X64Reg UseScratchRegister(IR::Value use_value, HostLocList desired_locations); + Gen::X64Reg UseScratchRegister(IR::Inst* use_inst, HostLocList desired_locations); /// Early-def, Late-use, single-use - Gen::X64Reg ScratchRegister(std::initializer_list desired_locations = hostloc_any_register); + Gen::X64Reg ScratchRegister(HostLocList desired_locations); Gen::X64Reg LoadImmediateIntoRegister(IR::Value imm, Gen::X64Reg reg); /// Late-def for result register, Early-use for all arguments, Each value is placed into registers according to host ABI. @@ -97,19 +128,32 @@ public: void Reset(); private: - HostLoc SelectARegister(std::initializer_list desired_locations) const; + HostLoc SelectARegister(HostLocList desired_locations) const; std::vector ValueLocations(IR::Inst* value) const; bool IsRegisterOccupied(HostLoc loc) const; bool IsRegisterAllocated(HostLoc loc) const; + void EmitMove(HostLoc to, HostLoc from); + void EmitExchange(HostLoc a, HostLoc b); void SpillRegister(HostLoc loc); HostLoc FindFreeSpill() const; Gen::XEmitter* code = nullptr; - using mapping_map_t = std::array; - mapping_map_t hostloc_to_inst; - std::array hostloc_state; + struct HostLocInfo { + IR::Inst* value = nullptr; + HostLocState state = HostLocState::Idle; + IR::Type GetType() const { + return value ? value->GetType() : IR::Type::Void; + } + }; + std::array hostloc_info; + HostLocInfo& LocInfo(HostLoc loc) { + return hostloc_info[static_cast(loc)]; + } + HostLocInfo GetLocInfo(HostLoc loc) const { + return hostloc_info[static_cast(loc)]; + } }; } // namespace BackendX64 diff --git a/src/frontend/ir/ir.h b/src/frontend/ir/ir.h index 9c8670ab..f661abca 100644 --- a/src/frontend/ir/ir.h +++ b/src/frontend/ir/ir.h @@ -41,6 +41,8 @@ enum class Type { U16 = 1 << 5, U32 = 1 << 6, U64 = 1 << 7, + F32 = 1 << 8, + F64 = 1 << 9, }; Type GetTypeOf(Opcode op);