diff --git a/src/backend_x64/emit_x64_saturation.cpp b/src/backend_x64/emit_x64_saturation.cpp index 06986f57..fdbace17 100644 --- a/src/backend_x64/emit_x64_saturation.cpp +++ b/src/backend_x64/emit_x64_saturation.cpp @@ -77,6 +77,45 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) ctx.reg_alloc.DefineValue(inst, result); } +template +void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]); + Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]); + + op_result.setBit(size); + addend.setBit(size); + + if constexpr (op == Op::Add) { + code.add(op_result, addend); + } else { + code.sub(op_result, addend); + } + + constexpr u64 boundary = op == Op::Add ? std::numeric_limits>::max() + : 0; + code.mov(addend, boundary); + + if constexpr (size < 64) { + code.cmovae(addend.cvt32(), op_result.cvt32()); + } else { + code.cmovae(addend, op_result); + } + + if (overflow_inst) { + Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(); + code.setb(overflow.cvt8()); + + ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.EraseInstruction(overflow_inst); + } + + ctx.reg_alloc.DefineValue(inst, addend); +} + } // anonymous namespace void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) { @@ -111,36 +150,6 @@ void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) { EmitSignedSaturatedOp(code, ctx, inst); } -void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { - auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - size_t N = args[1].GetImmediateU8(); - ASSERT(N <= 31); - - u32 saturated_value = (1u << N) - 1; - - Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); - - // Pseudocode: result = clamp(reg_a, 0, saturated_value); - code.xor_(overflow, overflow); - code.cmp(reg_a, saturated_value); - code.mov(result, saturated_value); - code.cmovle(result, overflow); - code.cmovbe(result, reg_a); - - if (overflow_inst) { - code.seta(overflow.cvt8()); - - ctx.reg_alloc.DefineValue(overflow_inst, overflow); - ctx.EraseInstruction(overflow_inst); - } - - ctx.reg_alloc.DefineValue(inst, result); -} - void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); @@ -190,4 +199,66 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void EmitX64::EmitUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) { + EmitUnsignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitUnsignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) { + EmitUnsignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitUnsignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) { + EmitUnsignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitUnsignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) { + EmitUnsignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitUnsignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) { + EmitUnsignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitUnsignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) { + EmitUnsignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitUnsignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) { + EmitUnsignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) { + EmitUnsignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { + auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + size_t N = args[1].GetImmediateU8(); + ASSERT(N <= 31); + + u32 saturated_value = (1u << N) - 1; + + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); + + // Pseudocode: result = clamp(reg_a, 0, saturated_value); + code.xor_(overflow, overflow); + code.cmp(reg_a, saturated_value); + code.mov(result, saturated_value); + code.cmovle(result, overflow); + code.cmovbe(result, reg_a); + + if (overflow_inst) { + code.seta(overflow.cvt8()); + + ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.EraseInstruction(overflow_inst); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + } // namespace Dynarmic::BackendX64 diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index ae763eb4..973d13f9 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -521,16 +521,56 @@ ResultAndOverflow IREmitter::SignedSaturatedSub(const UAny& a, const UAny& return {result, overflow}; } -ResultAndOverflow IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) { - ASSERT(bit_size_to_saturate_to <= 31); - auto result = Inst(Opcode::UnsignedSaturation, a, Imm8(static_cast(bit_size_to_saturate_to))); +ResultAndOverflow IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) { + ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32); + auto result = Inst(Opcode::SignedSaturation, a, Imm8(static_cast(bit_size_to_saturate_to))); auto overflow = Inst(Opcode::GetOverflowFromOp, result); return {result, overflow}; } -ResultAndOverflow IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) { - ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32); - auto result = Inst(Opcode::SignedSaturation, a, Imm8(static_cast(bit_size_to_saturate_to))); +ResultAndOverflow IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAny& b) { + ASSERT(a.GetType() == b.GetType()); + const auto result = [&]() -> IR::UAny { + switch (a.GetType()) { + case IR::Type::U8: + return Inst(Opcode::UnsignedSaturatedAdd8, a, b); + case IR::Type::U16: + return Inst(Opcode::UnsignedSaturatedAdd16, a, b); + case IR::Type::U32: + return Inst(Opcode::UnsignedSaturatedAdd32, a, b); + case IR::Type::U64: + return Inst(Opcode::UnsignedSaturatedAdd64, a, b); + default: + return IR::UAny{}; + } + }(); + const auto overflow = Inst(Opcode::GetOverflowFromOp, result); + return {result, overflow}; +} + +ResultAndOverflow IREmitter::UnsignedSaturatedSub(const UAny& a, const UAny& b) { + ASSERT(a.GetType() == b.GetType()); + const auto result = [&]() -> IR::UAny { + switch (a.GetType()) { + case IR::Type::U8: + return Inst(Opcode::UnsignedSaturatedSub8, a, b); + case IR::Type::U16: + return Inst(Opcode::UnsignedSaturatedSub16, a, b); + case IR::Type::U32: + return Inst(Opcode::UnsignedSaturatedSub32, a, b); + case IR::Type::U64: + return Inst(Opcode::UnsignedSaturatedSub64, a, b); + default: + return IR::UAny{}; + } + }(); + const auto overflow = Inst(Opcode::GetOverflowFromOp, result); + return {result, overflow}; +} + +ResultAndOverflow IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) { + ASSERT(bit_size_to_saturate_to <= 31); + auto result = Inst(Opcode::UnsignedSaturation, a, Imm8(static_cast(bit_size_to_saturate_to))); auto overflow = Inst(Opcode::GetOverflowFromOp, result); return {result, overflow}; } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 8bc631d6..ef87fe79 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -144,8 +144,10 @@ public: ResultAndOverflow SignedSaturatedAdd(const UAny& a, const UAny& b); ResultAndOverflow SignedSaturatedSub(const UAny& a, const UAny& b); - ResultAndOverflow UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to); ResultAndOverflow SignedSaturation(const U32& a, size_t bit_size_to_saturate_to); + ResultAndOverflow UnsignedSaturatedAdd(const UAny& a, const UAny& b); + ResultAndOverflow UnsignedSaturatedSub(const UAny& a, const UAny& b); + ResultAndOverflow UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to); ResultAndGE PackedAddU8(const U32& a, const U32& b); ResultAndGE PackedAddS8(const U32& a, const U32& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index c885adf8..dcadb703 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -164,6 +164,14 @@ OPCODE(SignedSaturatedSub16, T::U16, T::U16, OPCODE(SignedSaturatedSub32, T::U32, T::U32, T::U32 ) OPCODE(SignedSaturatedSub64, T::U64, T::U64, T::U64 ) OPCODE(SignedSaturation, T::U32, T::U32, T::U8 ) +OPCODE(UnsignedSaturatedAdd8, T::U8, T::U8, T::U8 ) +OPCODE(UnsignedSaturatedAdd16, T::U16, T::U16, T::U16 ) +OPCODE(UnsignedSaturatedAdd32, T::U32, T::U32, T::U32 ) +OPCODE(UnsignedSaturatedAdd64, T::U64, T::U64, T::U64 ) +OPCODE(UnsignedSaturatedSub8, T::U8, T::U8, T::U8 ) +OPCODE(UnsignedSaturatedSub16, T::U16, T::U16, T::U16 ) +OPCODE(UnsignedSaturatedSub32, T::U32, T::U32, T::U32 ) +OPCODE(UnsignedSaturatedSub64, T::U64, T::U64, T::U64 ) OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 ) // Packed instructions