From babfb7d7b86944eb176934708a8c169cc92c7f15 Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 30 Jul 2022 14:23:55 +0100 Subject: [PATCH] IR/saturation: Revamp saturated add/sub IR instructions --- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 8 - .../backend/arm64/emit_arm64_saturation.cpp | 148 +++++----- src/dynarmic/backend/x64/a64_emit_x64.cpp | 16 -- .../backend/x64/emit_x64_saturation.cpp | 261 +++++++++--------- .../frontend/A32/translate/impl/saturated.cpp | 12 +- .../A32/translate/impl/thumb32_misc.cpp | 12 +- src/dynarmic/frontend/A64/a64_ir_emitter.cpp | 4 - src/dynarmic/frontend/A64/a64_ir_emitter.h | 1 - .../translate/impl/simd_scalar_three_same.cpp | 17 +- .../impl/simd_scalar_x_indexed_element.cpp | 5 +- src/dynarmic/ir/ir_emitter.cpp | 66 +++-- src/dynarmic/ir/ir_emitter.h | 13 +- src/dynarmic/ir/microinstruction.cpp | 19 +- src/dynarmic/ir/opcodes.inc | 7 +- 14 files changed, 292 insertions(+), 297 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index cfac2a01..4bd2e270 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -186,14 +186,6 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ct ASSERT_FALSE("Unimplemented"); } -template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); -} - template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { (void)code; diff --git a/src/dynarmic/backend/arm64/emit_arm64_saturation.cpp b/src/dynarmic/backend/arm64/emit_arm64_saturation.cpp index feddc9a0..b55660b0 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_saturation.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_saturation.cpp @@ -18,6 +18,88 @@ namespace Dynarmic::Backend::Arm64 { using namespace oaknut::util; +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const size_t N = args[1].GetImmediateU8(); + ASSERT(N >= 1 && N <= 32); + + if (N == 32) { + ctx.reg_alloc.DefineAsExisting(inst, args[0]); + if (overflow_inst) { + auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst); + RegAlloc::Realize(Woverflow); + code.MOV(*Woverflow, WZR); + } + return; + } + + const u32 positive_saturated_value = (1u << (N - 1)) - 1; + const u32 negative_saturated_value = ~u32{0} << (N - 1); + + auto Woperand = ctx.reg_alloc.ReadW(args[0]); + auto Wresult = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Woperand, Wresult); + ctx.reg_alloc.SpillFlags(); + + code.MOV(Wscratch0, negative_saturated_value); + code.MOV(Wscratch1, positive_saturated_value); + code.CMP(*Woperand, Wscratch0); + code.CSEL(Wresult, Woperand, Wscratch0, GT); + code.CMP(*Woperand, Wscratch1); + code.CSEL(Wresult, Wresult, Wscratch1, LT); + + if (overflow_inst) { + auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst); + RegAlloc::Realize(Woverflow); + code.CMP(*Wresult, Woperand); + code.CSET(Woverflow, NE); + } +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wresult = ctx.reg_alloc.WriteW(inst); + auto Woperand = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wresult, Woperand); + ctx.reg_alloc.SpillFlags(); + + const size_t N = args[1].GetImmediateU8(); + ASSERT(N <= 31); + const u32 saturated_value = (1u << N) - 1; + + code.MOV(Wscratch0, saturated_value); + code.CMP(*Woperand, Wscratch0); + code.CSEL(Wresult, Woperand, Wscratch0, LS); + + if (overflow_inst) { + auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst); + RegAlloc::Realize(Woverflow); + code.CSET(Woverflow, HI); + } +} + template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { (void)code; @@ -98,47 +180,6 @@ void EmitIR(oaknut::CodeGenerator& code, EmitC ASSERT_FALSE("Unimplemented"); } -template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const size_t N = args[1].GetImmediateU8(); - ASSERT(N >= 1 && N <= 32); - - if (N == 32) { - ctx.reg_alloc.DefineAsExisting(inst, args[0]); - if (overflow_inst) { - auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst); - RegAlloc::Realize(Woverflow); - code.MOV(*Woverflow, WZR); - } - return; - } - - const u32 positive_saturated_value = (1u << (N - 1)) - 1; - const u32 negative_saturated_value = ~u32{0} << (N - 1); - - auto Woperand = ctx.reg_alloc.ReadW(args[0]); - auto Wresult = ctx.reg_alloc.WriteW(inst); - RegAlloc::Realize(Woperand, Wresult); - ctx.reg_alloc.SpillFlags(); - - code.MOV(Wscratch0, negative_saturated_value); - code.MOV(Wscratch1, positive_saturated_value); - code.CMP(*Woperand, Wscratch0); - code.CSEL(Wresult, Woperand, Wscratch0, GT); - code.CMP(*Woperand, Wscratch1); - code.CSEL(Wresult, Wresult, Wscratch1, LT); - - if (overflow_inst) { - auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst); - RegAlloc::Realize(Woverflow); - code.CMP(*Wresult, Woperand); - code.CSET(Woverflow, NE); - } -} - template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { (void)code; @@ -203,29 +244,4 @@ void EmitIR(oaknut::CodeGenerator& code, Emi ASSERT_FALSE("Unimplemented"); } -template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Wresult = ctx.reg_alloc.WriteW(inst); - auto Woperand = ctx.reg_alloc.ReadW(args[0]); - RegAlloc::Realize(Wresult, Woperand); - ctx.reg_alloc.SpillFlags(); - - const size_t N = args[1].GetImmediateU8(); - ASSERT(N <= 31); - const u32 saturated_value = (1u << N) - 1; - - code.MOV(Wscratch0, saturated_value); - code.CMP(*Woperand, Wscratch0); - code.CSEL(Wresult, Woperand, Wscratch0, LS); - - if (overflow_inst) { - auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst); - RegAlloc::Realize(Woverflow); - code.CSET(Woverflow, HI); - } -} - } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/backend/x64/a64_emit_x64.cpp index 7cc4e140..aa336106 100644 --- a/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -452,22 +452,6 @@ void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) { code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); } -void A64EmitX64::EmitA64OrQC(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - if (args[0].IsImmediate()) { - if (!args[0].GetImmediateU1()) { - return; - } - - code.mov(code.byte[code.r15 + offsetof(A64JitState, fpsr_qc)], u8(1)); - return; - } - - const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); - code.or_(code.byte[code.r15 + offsetof(A64JitState, fpsr_qc)], to_store); -} - void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const auto addr = qword[r15 + offsetof(A64JitState, pc)]; diff --git a/src/dynarmic/backend/x64/emit_x64_saturation.cpp b/src/dynarmic/backend/x64/emit_x64_saturation.cpp index 7301c77b..707d99eb 100644 --- a/src/dynarmic/backend/x64/emit_x64_saturation.cpp +++ b/src/dynarmic/backend/x64/emit_x64_saturation.cpp @@ -27,10 +27,8 @@ enum class Op { Sub, }; -template +template void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size); @@ -62,11 +60,14 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) code.cmovo(result, overflow); } - if (overflow_inst) { - code.seto(overflow.cvt8()); - - ctx.reg_alloc.DefineValue(overflow_inst, overflow); - ctx.EraseInstruction(overflow_inst); + code.seto(overflow.cvt8()); + if constexpr (has_overflow_inst) { + if (const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)) { + ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.EraseInstruction(overflow_inst); + } + } else { + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); } ctx.reg_alloc.DefineValue(inst, result); @@ -74,8 +75,6 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) template void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size); @@ -95,109 +94,21 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst code.cmovae(addend, op_result); } - if (overflow_inst) { - const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(); - code.setb(overflow.cvt8()); - - ctx.reg_alloc.DefineValue(overflow_inst, overflow); - ctx.EraseInstruction(overflow_inst); - } + const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(); + code.setb(overflow.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); ctx.reg_alloc.DefineValue(inst, addend); } } // anonymous namespace -void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) { - EmitSignedSaturatedOp(code, ctx, inst); +void EmitX64::EmitSignedSaturatedAddWithFlag32(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); } -void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) { - EmitSignedSaturatedOp(code, ctx, inst); -} - -void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) { - EmitSignedSaturatedOp(code, ctx, inst); -} - -void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) { - EmitSignedSaturatedOp(code, ctx, inst); -} - -void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) { - const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - - code.movsx(x, x.cvt16()); - code.movsx(y, y.cvt16()); - - code.imul(x, y); - code.lea(y, ptr[x.cvt64() + x.cvt64()]); - code.mov(tmp, x); - code.shr(tmp, 15); - code.xor_(y, x); - code.mov(y, 0x7FFF); - code.cmovns(y, tmp); - - if (overflow_inst) { - code.sets(tmp.cvt8()); - - ctx.reg_alloc.DefineValue(overflow_inst, tmp); - ctx.EraseInstruction(overflow_inst); - } - - ctx.reg_alloc.DefineValue(inst, y); -} - -void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) { - const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]); - const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); - - code.movsxd(x, x.cvt32()); - code.movsxd(y, y.cvt32()); - - code.imul(x, y); - code.lea(y, ptr[x + x]); - code.mov(tmp, x); - code.shr(tmp, 31); - code.xor_(y, x); - code.mov(y.cvt32(), 0x7FFFFFFF); - code.cmovns(y.cvt32(), tmp.cvt32()); - - if (overflow_inst) { - code.sets(tmp.cvt8()); - - ctx.reg_alloc.DefineValue(overflow_inst, tmp); - ctx.EraseInstruction(overflow_inst); - } - - ctx.reg_alloc.DefineValue(inst, y); -} - -void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) { - EmitSignedSaturatedOp(code, ctx, inst); -} - -void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) { - EmitSignedSaturatedOp(code, ctx, inst); -} - -void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) { - EmitSignedSaturatedOp(code, ctx, inst); -} - -void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) { - EmitSignedSaturatedOp(code, ctx, inst); +void EmitX64::EmitSignedSaturatedSubWithFlag32(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); } void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { @@ -250,6 +161,116 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { + const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const size_t N = args[1].GetImmediateU8(); + ASSERT(N <= 31); + + const u32 saturated_value = (1u << N) - 1; + + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); + + // Pseudocode: result = clamp(reg_a, 0, saturated_value); + code.xor_(overflow, overflow); + code.cmp(reg_a, saturated_value); + code.mov(result, saturated_value); + code.cmovle(result, overflow); + code.cmovbe(result, reg_a); + + if (overflow_inst) { + code.seta(overflow.cvt8()); + + ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.EraseInstruction(overflow_inst); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + + code.movsx(x, x.cvt16()); + code.movsx(y, y.cvt16()); + + code.imul(x, y); + code.lea(y, ptr[x.cvt64() + x.cvt64()]); + code.mov(tmp, x); + code.shr(tmp, 15); + code.xor_(y, x); + code.mov(y, 0x7FFF); + code.cmovns(y, tmp); + + code.sets(tmp.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); + + ctx.reg_alloc.DefineValue(inst, y); +} + +void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + + code.movsxd(x, x.cvt32()); + code.movsxd(y, y.cvt32()); + + code.imul(x, y); + code.lea(y, ptr[x + x]); + code.mov(tmp, x); + code.shr(tmp, 31); + code.xor_(y, x); + code.mov(y.cvt32(), 0x7FFFFFFF); + code.cmovns(y.cvt32(), tmp.cvt32()); + + code.sets(tmp.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); + + ctx.reg_alloc.DefineValue(inst, y); +} + +void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + void EmitX64::EmitUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) { EmitUnsignedSaturatedOp(code, ctx, inst); } @@ -282,34 +303,4 @@ void EmitX64::EmitUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) { EmitUnsignedSaturatedOp(code, ctx, inst); } -void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { - const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const size_t N = args[1].GetImmediateU8(); - ASSERT(N <= 31); - - const u32 saturated_value = (1u << N) - 1; - - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); - - // Pseudocode: result = clamp(reg_a, 0, saturated_value); - code.xor_(overflow, overflow); - code.cmp(reg_a, saturated_value); - code.mov(result, saturated_value); - code.cmovle(result, overflow); - code.cmovbe(result, reg_a); - - if (overflow_inst) { - code.seta(overflow.cvt8()); - - ctx.reg_alloc.DefineValue(overflow_inst, overflow); - ctx.EraseInstruction(overflow_inst); - } - - ctx.reg_alloc.DefineValue(inst, result); -} - } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/frontend/A32/translate/impl/saturated.cpp b/src/dynarmic/frontend/A32/translate/impl/saturated.cpp index 51e7e1b6..41db1150 100644 --- a/src/dynarmic/frontend/A32/translate/impl/saturated.cpp +++ b/src/dynarmic/frontend/A32/translate/impl/saturated.cpp @@ -116,7 +116,7 @@ bool TranslatorVisitor::arm_QADD(Cond cond, Reg n, Reg d, Reg m) { const auto a = ir.GetRegister(m); const auto b = ir.GetRegister(n); - const auto result = ir.SignedSaturatedAdd(a, b); + const auto result = ir.SignedSaturatedAddWithFlag(a, b); ir.SetRegister(d, result.result); ir.OrQFlag(result.overflow); @@ -135,7 +135,7 @@ bool TranslatorVisitor::arm_QSUB(Cond cond, Reg n, Reg d, Reg m) { const auto a = ir.GetRegister(m); const auto b = ir.GetRegister(n); - const auto result = ir.SignedSaturatedSub(a, b); + const auto result = ir.SignedSaturatedSubWithFlag(a, b); ir.SetRegister(d, result.result); ir.OrQFlag(result.overflow); @@ -154,10 +154,10 @@ bool TranslatorVisitor::arm_QDADD(Cond cond, Reg n, Reg d, Reg m) { const auto a = ir.GetRegister(m); const auto b = ir.GetRegister(n); - const auto doubled = ir.SignedSaturatedAdd(b, b); + const auto doubled = ir.SignedSaturatedAddWithFlag(b, b); ir.OrQFlag(doubled.overflow); - const auto result = ir.SignedSaturatedAdd(a, doubled.result); + const auto result = ir.SignedSaturatedAddWithFlag(a, doubled.result); ir.SetRegister(d, result.result); ir.OrQFlag(result.overflow); return true; @@ -175,10 +175,10 @@ bool TranslatorVisitor::arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) { const auto a = ir.GetRegister(m); const auto b = ir.GetRegister(n); - const auto doubled = ir.SignedSaturatedAdd(b, b); + const auto doubled = ir.SignedSaturatedAddWithFlag(b, b); ir.OrQFlag(doubled.overflow); - const auto result = ir.SignedSaturatedSub(a, doubled.result); + const auto result = ir.SignedSaturatedSubWithFlag(a, doubled.result); ir.SetRegister(d, result.result); ir.OrQFlag(result.overflow); return true; diff --git a/src/dynarmic/frontend/A32/translate/impl/thumb32_misc.cpp b/src/dynarmic/frontend/A32/translate/impl/thumb32_misc.cpp index bab2d158..9edb2310 100644 --- a/src/dynarmic/frontend/A32/translate/impl/thumb32_misc.cpp +++ b/src/dynarmic/frontend/A32/translate/impl/thumb32_misc.cpp @@ -26,7 +26,7 @@ bool TranslatorVisitor::thumb32_QADD(Reg n, Reg d, Reg m) { const auto reg_m = ir.GetRegister(m); const auto reg_n = ir.GetRegister(n); - const auto result = ir.SignedSaturatedAdd(reg_m, reg_n); + const auto result = ir.SignedSaturatedAddWithFlag(reg_m, reg_n); ir.SetRegister(d, result.result); ir.OrQFlag(result.overflow); @@ -40,10 +40,10 @@ bool TranslatorVisitor::thumb32_QDADD(Reg n, Reg d, Reg m) { const auto reg_m = ir.GetRegister(m); const auto reg_n = ir.GetRegister(n); - const auto doubled_n = ir.SignedSaturatedAdd(reg_n, reg_n); + const auto doubled_n = ir.SignedSaturatedAddWithFlag(reg_n, reg_n); ir.OrQFlag(doubled_n.overflow); - const auto result = ir.SignedSaturatedAdd(reg_m, doubled_n.result); + const auto result = ir.SignedSaturatedAddWithFlag(reg_m, doubled_n.result); ir.SetRegister(d, result.result); ir.OrQFlag(result.overflow); return true; @@ -56,10 +56,10 @@ bool TranslatorVisitor::thumb32_QDSUB(Reg n, Reg d, Reg m) { const auto reg_m = ir.GetRegister(m); const auto reg_n = ir.GetRegister(n); - const auto doubled_n = ir.SignedSaturatedAdd(reg_n, reg_n); + const auto doubled_n = ir.SignedSaturatedAddWithFlag(reg_n, reg_n); ir.OrQFlag(doubled_n.overflow); - const auto result = ir.SignedSaturatedSub(reg_m, doubled_n.result); + const auto result = ir.SignedSaturatedSubWithFlag(reg_m, doubled_n.result); ir.SetRegister(d, result.result); ir.OrQFlag(result.overflow); return true; @@ -72,7 +72,7 @@ bool TranslatorVisitor::thumb32_QSUB(Reg n, Reg d, Reg m) { const auto reg_m = ir.GetRegister(m); const auto reg_n = ir.GetRegister(n); - const auto result = ir.SignedSaturatedSub(reg_m, reg_n); + const auto result = ir.SignedSaturatedSubWithFlag(reg_m, reg_n); ir.SetRegister(d, result.result); ir.OrQFlag(result.overflow); diff --git a/src/dynarmic/frontend/A64/a64_ir_emitter.cpp b/src/dynarmic/frontend/A64/a64_ir_emitter.cpp index 80d7e007..3f5a70bd 100644 --- a/src/dynarmic/frontend/A64/a64_ir_emitter.cpp +++ b/src/dynarmic/frontend/A64/a64_ir_emitter.cpp @@ -42,10 +42,6 @@ void IREmitter::SetNZCV(const IR::NZCV& nzcv) { Inst(Opcode::A64SetNZCV, nzcv); } -void IREmitter::OrQC(const IR::U1& value) { - Inst(Opcode::A64OrQC, value); -} - void IREmitter::CallSupervisor(u32 imm) { Inst(Opcode::A64CallSupervisor, Imm32(imm)); } diff --git a/src/dynarmic/frontend/A64/a64_ir_emitter.h b/src/dynarmic/frontend/A64/a64_ir_emitter.h index 2d3797a3..7fc8bea7 100644 --- a/src/dynarmic/frontend/A64/a64_ir_emitter.h +++ b/src/dynarmic/frontend/A64/a64_ir_emitter.h @@ -39,7 +39,6 @@ public: IR::U32 GetNZCVRaw(); void SetNZCVRaw(IR::U32 value); void SetNZCV(const IR::NZCV& nzcv); - void OrQC(const IR::U1& value); void CallSupervisor(u32 imm); void ExceptionRaised(Exception exception); diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp index af1adbe3..7a56f2aa 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp @@ -131,8 +131,7 @@ bool TranslatorVisitor::SQADD_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::UAny operand1 = V_scalar(esize, Vn); const IR::UAny operand2 = V_scalar(esize, Vm); const auto result = ir.SignedSaturatedAdd(operand1, operand2); - ir.OrQC(result.overflow); - V_scalar(esize, Vd, result.result); + V_scalar(esize, Vd, result); return true; } @@ -146,10 +145,7 @@ bool TranslatorVisitor::SQDMULH_vec_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::UAny operand1 = V_scalar(esize, Vn); const IR::UAny operand2 = V_scalar(esize, Vm); const auto result = ir.SignedSaturatedDoublingMultiplyReturnHigh(operand1, operand2); - - ir.OrQC(result.overflow); - - V_scalar(esize, Vd, result.result); + V_scalar(esize, Vd, result); return true; } @@ -175,8 +171,7 @@ bool TranslatorVisitor::SQSUB_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::UAny operand1 = V_scalar(esize, Vn); const IR::UAny operand2 = V_scalar(esize, Vm); const auto result = ir.SignedSaturatedSub(operand1, operand2); - ir.OrQC(result.overflow); - V_scalar(esize, Vd, result.result); + V_scalar(esize, Vd, result); return true; } @@ -186,8 +181,7 @@ bool TranslatorVisitor::UQADD_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::UAny operand1 = V_scalar(esize, Vn); const IR::UAny operand2 = V_scalar(esize, Vm); const auto result = ir.UnsignedSaturatedAdd(operand1, operand2); - ir.OrQC(result.overflow); - V_scalar(esize, Vd, result.result); + V_scalar(esize, Vd, result); return true; } @@ -197,8 +191,7 @@ bool TranslatorVisitor::UQSUB_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::UAny operand1 = V_scalar(esize, Vn); const IR::UAny operand2 = V_scalar(esize, Vm); const auto result = ir.UnsignedSaturatedSub(operand1, operand2); - ir.OrQC(result.overflow); - V_scalar(esize, Vd, result.result); + V_scalar(esize, Vd, result); return true; } diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_scalar_x_indexed_element.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_scalar_x_indexed_element.cpp index 4c3d95e2..93347c50 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_scalar_x_indexed_element.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_scalar_x_indexed_element.cpp @@ -127,10 +127,7 @@ bool TranslatorVisitor::SQDMULH_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vm const IR::UAny operand1 = V_scalar(esize, Vn); const IR::UAny operand2 = ir.VectorGetElement(esize, V(128, Vm), index); const auto result = ir.SignedSaturatedDoublingMultiplyReturnHigh(operand1, operand2); - - ir.OrQC(result.overflow); - - V_scalar(esize, Vd, result.result); + V_scalar(esize, Vd, result); return true; } diff --git a/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/ir/ir_emitter.cpp index 45165098..d3079ebe 100644 --- a/src/dynarmic/ir/ir_emitter.cpp +++ b/src/dynarmic/ir/ir_emitter.cpp @@ -525,7 +525,33 @@ U32U64 IREmitter::MinUnsigned(const U32U64& a, const U32U64& b) { return Inst(Opcode::MinUnsigned64, a, b); } -ResultAndOverflow IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& b) { +ResultAndOverflow IREmitter::SignedSaturatedAddWithFlag(const U32& a, const U32& b) { + const auto result = Inst(Opcode::SignedSaturatedAddWithFlag32, a, b); + const auto overflow = Inst(Opcode::GetOverflowFromOp, result); + return {result, overflow}; +} + +ResultAndOverflow IREmitter::SignedSaturatedSubWithFlag(const U32& a, const U32& b) { + const auto result = Inst(Opcode::SignedSaturatedSubWithFlag32, a, b); + const auto overflow = Inst(Opcode::GetOverflowFromOp, result); + return {result, overflow}; +} + +ResultAndOverflow IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) { + ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32); + const auto result = Inst(Opcode::SignedSaturation, a, Imm8(static_cast(bit_size_to_saturate_to))); + const auto overflow = Inst(Opcode::GetOverflowFromOp, result); + return {result, overflow}; +} + +ResultAndOverflow IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) { + ASSERT(bit_size_to_saturate_to <= 31); + const auto result = Inst(Opcode::UnsignedSaturation, a, Imm8(static_cast(bit_size_to_saturate_to))); + const auto overflow = Inst(Opcode::GetOverflowFromOp, result); + return {result, overflow}; +} + +UAny IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& b) { ASSERT(a.GetType() == b.GetType()); const auto result = [&]() -> IR::UAny { switch (a.GetType()) { @@ -541,11 +567,10 @@ ResultAndOverflow IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& return IR::UAny{}; } }(); - const auto overflow = Inst(Opcode::GetOverflowFromOp, result); - return {result, overflow}; + return result; } -ResultAndOverflow IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b) { +UAny IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b) { ASSERT(a.GetType() == b.GetType()); const auto result = [&]() -> IR::UAny { switch (a.GetType()) { @@ -557,12 +582,10 @@ ResultAndOverflow IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(con UNREACHABLE(); } }(); - - const auto overflow = Inst(Opcode::GetOverflowFromOp, result); - return {result, overflow}; + return result; } -ResultAndOverflow IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) { +UAny IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) { ASSERT(a.GetType() == b.GetType()); const auto result = [&]() -> IR::UAny { switch (a.GetType()) { @@ -578,18 +601,10 @@ ResultAndOverflow IREmitter::SignedSaturatedSub(const UAny& a, const UAny& return IR::UAny{}; } }(); - const auto overflow = Inst(Opcode::GetOverflowFromOp, result); - return {result, overflow}; + return result; } -ResultAndOverflow IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) { - ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32); - const auto result = Inst(Opcode::SignedSaturation, a, Imm8(static_cast(bit_size_to_saturate_to))); - const auto overflow = Inst(Opcode::GetOverflowFromOp, result); - return {result, overflow}; -} - -ResultAndOverflow IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAny& b) { +UAny IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAny& b) { ASSERT(a.GetType() == b.GetType()); const auto result = [&]() -> IR::UAny { switch (a.GetType()) { @@ -605,11 +620,10 @@ ResultAndOverflow IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAn return IR::UAny{}; } }(); - const auto overflow = Inst(Opcode::GetOverflowFromOp, result); - return {result, overflow}; + return result; } -ResultAndOverflow IREmitter::UnsignedSaturatedSub(const UAny& a, const UAny& b) { +UAny IREmitter::UnsignedSaturatedSub(const UAny& a, const UAny& b) { ASSERT(a.GetType() == b.GetType()); const auto result = [&]() -> IR::UAny { switch (a.GetType()) { @@ -625,15 +639,7 @@ ResultAndOverflow IREmitter::UnsignedSaturatedSub(const UAny& a, const UAn return IR::UAny{}; } }(); - const auto overflow = Inst(Opcode::GetOverflowFromOp, result); - return {result, overflow}; -} - -ResultAndOverflow IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) { - ASSERT(bit_size_to_saturate_to <= 31); - const auto result = Inst(Opcode::UnsignedSaturation, a, Imm8(static_cast(bit_size_to_saturate_to))); - const auto overflow = Inst(Opcode::GetOverflowFromOp, result); - return {result, overflow}; + return result; } U128 IREmitter::VectorSignedSaturatedAdd(size_t esize, const U128& a, const U128& b) { diff --git a/src/dynarmic/ir/ir_emitter.h b/src/dynarmic/ir/ir_emitter.h index 3dc4cbb8..d19d5db5 100644 --- a/src/dynarmic/ir/ir_emitter.h +++ b/src/dynarmic/ir/ir_emitter.h @@ -150,14 +150,17 @@ public: U32U64 MinSigned(const U32U64& a, const U32U64& b); U32U64 MinUnsigned(const U32U64& a, const U32U64& b); - ResultAndOverflow SignedSaturatedAdd(const UAny& a, const UAny& b); - ResultAndOverflow SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b); - ResultAndOverflow SignedSaturatedSub(const UAny& a, const UAny& b); + ResultAndOverflow SignedSaturatedAddWithFlag(const U32& a, const U32& b); + ResultAndOverflow SignedSaturatedSubWithFlag(const U32& a, const U32& b); ResultAndOverflow SignedSaturation(const U32& a, size_t bit_size_to_saturate_to); - ResultAndOverflow UnsignedSaturatedAdd(const UAny& a, const UAny& b); - ResultAndOverflow UnsignedSaturatedSub(const UAny& a, const UAny& b); ResultAndOverflow UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to); + UAny SignedSaturatedAdd(const UAny& a, const UAny& b); + UAny SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b); + UAny SignedSaturatedSub(const UAny& a, const UAny& b); + UAny UnsignedSaturatedAdd(const UAny& a, const UAny& b); + UAny UnsignedSaturatedSub(const UAny& a, const UAny& b); + U128 VectorSignedSaturatedAdd(size_t esize, const U128& a, const U128& b); U128 VectorSignedSaturatedSub(size_t esize, const U128& a, const U128& b); U128 VectorUnsignedSaturatedAdd(size_t esize, const U128& a, const U128& b); diff --git a/src/dynarmic/ir/microinstruction.cpp b/src/dynarmic/ir/microinstruction.cpp index cf51363b..ba8c08da 100644 --- a/src/dynarmic/ir/microinstruction.cpp +++ b/src/dynarmic/ir/microinstruction.cpp @@ -418,7 +418,24 @@ bool Inst::ReadsFromFPSRCumulativeSaturationBit() const { bool Inst::WritesToFPSRCumulativeSaturationBit() const { switch (op) { - case Opcode::A64OrQC: + case Opcode::SignedSaturatedAdd8: + case Opcode::SignedSaturatedAdd16: + case Opcode::SignedSaturatedAdd32: + case Opcode::SignedSaturatedAdd64: + case Opcode::SignedSaturatedDoublingMultiplyReturnHigh16: + case Opcode::SignedSaturatedDoublingMultiplyReturnHigh32: + case Opcode::SignedSaturatedSub8: + case Opcode::SignedSaturatedSub16: + case Opcode::SignedSaturatedSub32: + case Opcode::SignedSaturatedSub64: + case Opcode::UnsignedSaturatedAdd8: + case Opcode::UnsignedSaturatedAdd16: + case Opcode::UnsignedSaturatedAdd32: + case Opcode::UnsignedSaturatedAdd64: + case Opcode::UnsignedSaturatedSub8: + case Opcode::UnsignedSaturatedSub16: + case Opcode::UnsignedSaturatedSub32: + case Opcode::UnsignedSaturatedSub64: case Opcode::VectorSignedSaturatedAbs8: case Opcode::VectorSignedSaturatedAbs16: case Opcode::VectorSignedSaturatedAbs32: diff --git a/src/dynarmic/ir/opcodes.inc b/src/dynarmic/ir/opcodes.inc index c7dd2fef..59391944 100644 --- a/src/dynarmic/ir/opcodes.inc +++ b/src/dynarmic/ir/opcodes.inc @@ -63,7 +63,6 @@ A64OPC(SetQ, Void, A64V A64OPC(SetSP, Void, U64 ) A64OPC(SetFPCR, Void, U32 ) A64OPC(SetFPSR, Void, U32 ) -A64OPC(OrQC, Void, U1 ) A64OPC(SetPC, Void, U64 ) A64OPC(CallSupervisor, Void, U32 ) A64OPC(ExceptionRaised, Void, U64, U64 ) @@ -178,6 +177,10 @@ OPCODE(MinUnsigned32, U32, U32, OPCODE(MinUnsigned64, U64, U64, U64 ) // Saturated instructions +OPCODE(SignedSaturatedAddWithFlag32, U32, U32, U32 ) +OPCODE(SignedSaturatedSubWithFlag32, U32, U32, U32 ) +OPCODE(SignedSaturation, U32, U32, U8 ) +OPCODE(UnsignedSaturation, U32, U32, U8 ) OPCODE(SignedSaturatedAdd8, U8, U8, U8 ) OPCODE(SignedSaturatedAdd16, U16, U16, U16 ) OPCODE(SignedSaturatedAdd32, U32, U32, U32 ) @@ -188,7 +191,6 @@ OPCODE(SignedSaturatedSub8, U8, U8, OPCODE(SignedSaturatedSub16, U16, U16, U16 ) OPCODE(SignedSaturatedSub32, U32, U32, U32 ) OPCODE(SignedSaturatedSub64, U64, U64, U64 ) -OPCODE(SignedSaturation, U32, U32, U8 ) OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 ) OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 ) OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 ) @@ -197,7 +199,6 @@ OPCODE(UnsignedSaturatedSub8, U8, U8, OPCODE(UnsignedSaturatedSub16, U16, U16, U16 ) OPCODE(UnsignedSaturatedSub32, U32, U32, U32 ) OPCODE(UnsignedSaturatedSub64, U64, U64, U64 ) -OPCODE(UnsignedSaturation, U32, U32, U8 ) // Vector saturated instructions OPCODE(VectorSignedSaturatedAdd8, U128, U128, U128 )