diff --git a/src/backend_x64/emit_x64_saturation.cpp b/src/backend_x64/emit_x64_saturation.cpp index 5c4483ad..06986f57 100644 --- a/src/backend_x64/emit_x64_saturation.cpp +++ b/src/backend_x64/emit_x64_saturation.cpp @@ -4,11 +4,14 @@ * General Public License version 2 or any later version. */ +#include + #include "backend_x64/block_of_code.h" #include "backend_x64/emit_x64.h" #include "common/assert.h" #include "common/bit_util.h" #include "common/common_types.h" +#include "common/mp/integer.h" #include "frontend/ir/basic_block.h" #include "frontend/ir/microinstruction.h" #include "frontend/ir/opcodes.h" @@ -16,22 +19,53 @@ namespace Dynarmic::BackendX64 { using namespace Xbyak::util; +namespace mp = Dynarmic::Common::mp; -void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) { +namespace { + +enum class Op { + Add, + Sub, +}; + +template +void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 addend = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]); + Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]); + Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(); - code.mov(overflow, result); - code.shr(overflow, 31); - code.add(overflow, 0x7FFFFFFF); - // overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative - code.add(result, addend); - code.cmovo(result, overflow); + result.setBit(size); + addend.setBit(size); + overflow.setBit(size); + + constexpr u64 int_max = static_cast(std::numeric_limits>::max()); + if constexpr (size < 64) { + code.xor_(overflow.cvt32(), overflow.cvt32()); + code.bt(result.cvt32(), size - 1); + code.adc(overflow.cvt32(), int_max); + } else { + code.mov(overflow, int_max); + code.bt(result, 63); + code.adc(overflow, 0); + } + + // overflow now contains 0x7F... if a was positive, or 0x80... if a was negative + + if constexpr (op == Op::Add) { + code.add(result, addend); + } else { + code.sub(result, addend); + } + + if constexpr (size < 64) { + code.cmovo(result.cvt32(), overflow.cvt32()); + } else { + code.cmovo(result, overflow); + } if (overflow_inst) { code.seto(overflow.cvt8()); @@ -43,30 +77,38 @@ void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) { - auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); +} // anonymous namespace - auto args = ctx.reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} - Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 subend = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); +void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} - code.mov(overflow, result); - code.shr(overflow, 31); - code.add(overflow, 0x7FFFFFFF); - // overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative - code.sub(result, subend); - code.cmovo(result, overflow); +void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} - if (overflow_inst) { - code.seto(overflow.cvt8()); +void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} - ctx.reg_alloc.DefineValue(overflow_inst, overflow); - ctx.EraseInstruction(overflow_inst); - } +void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} - ctx.reg_alloc.DefineValue(inst, result); +void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); +} + +void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) { + EmitSignedSaturatedOp(code, ctx, inst); } void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 7322d57f..98aad4c3 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -481,15 +481,43 @@ U32U64 IREmitter::MinUnsigned(const U32U64& a, const U32U64& b) { return Inst(Opcode::MinUnsigned64, a, b); } -ResultAndOverflow IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) { - auto result = Inst(Opcode::SignedSaturatedAdd, a, b); - auto overflow = Inst(Opcode::GetOverflowFromOp, result); +ResultAndOverflow IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& b) { + ASSERT(a.GetType() == b.GetType()); + const auto result = [&]() -> IR::UAny { + switch (a.GetType()) { + case IR::Type::U8: + return Inst(Opcode::SignedSaturatedAdd8, a, b); + case IR::Type::U16: + return Inst(Opcode::SignedSaturatedAdd16, a, b); + case IR::Type::U32: + return Inst(Opcode::SignedSaturatedAdd32, a, b); + case IR::Type::U64: + return Inst(Opcode::SignedSaturatedAdd64, a, b); + default: + return IR::UAny{}; + } + }(); + const auto overflow = Inst(Opcode::GetOverflowFromOp, result); return {result, overflow}; } -ResultAndOverflow IREmitter::SignedSaturatedSub(const U32& a, const U32& b) { - auto result = Inst(Opcode::SignedSaturatedSub, a, b); - auto overflow = Inst(Opcode::GetOverflowFromOp, result); +ResultAndOverflow IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) { + ASSERT(a.GetType() == b.GetType()); + const auto result = [&]() -> IR::UAny { + switch (a.GetType()) { + case IR::Type::U8: + return Inst(Opcode::SignedSaturatedSub8, a, b); + case IR::Type::U16: + return Inst(Opcode::SignedSaturatedSub16, a, b); + case IR::Type::U32: + return Inst(Opcode::SignedSaturatedSub32, a, b); + case IR::Type::U64: + return Inst(Opcode::SignedSaturatedSub64, a, b); + default: + return IR::UAny{}; + } + }(); + const auto overflow = Inst(Opcode::GetOverflowFromOp, result); return {result, overflow}; } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 10a00f5d..47c005ee 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -142,8 +142,8 @@ public: U32U64 MinSigned(const U32U64& a, const U32U64& b); U32U64 MinUnsigned(const U32U64& a, const U32U64& b); - ResultAndOverflow SignedSaturatedAdd(const U32& a, const U32& b); - ResultAndOverflow SignedSaturatedSub(const U32& a, const U32& b); + ResultAndOverflow SignedSaturatedAdd(const UAny& a, const UAny& b); + ResultAndOverflow SignedSaturatedSub(const UAny& a, const UAny& b); ResultAndOverflow UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to); ResultAndOverflow SignedSaturation(const U32& a, size_t bit_size_to_saturate_to); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 2c4f1b77..5890433e 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -76,10 +76,10 @@ A64OPC(GetTPIDRRO, T::U64, // Hints OPCODE(PushRSB, T::Void, T::U64 ) -// Pseudo-operation, handled special ly at final emit -OPCODE(GetCarryFromOp, T::U1, T::U32 ) -OPCODE(GetOverflowFromOp, T::U1, T::U32 ) -OPCODE(GetGEFromOp, T::U32, T::U32 ) +// Pseudo-operation, handled specially at final emit +OPCODE(GetCarryFromOp, T::U1, T::Opaque ) +OPCODE(GetOverflowFromOp, T::U1, T::Opaque ) +OPCODE(GetGEFromOp, T::U32, T::Opaque ) OPCODE(GetNZCVFromOp, T::NZCVFlags, T::Opaque ) OPCODE(NZCVFromPackedFlags, T::NZCVFlags, T::U32 ) @@ -155,10 +155,16 @@ OPCODE(MinUnsigned32, T::U32, T::U32, OPCODE(MinUnsigned64, T::U64, T::U64, T::U64 ) // Saturated instructions -OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 ) -OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 ) -OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 ) +OPCODE(SignedSaturatedAdd8, T::U8, T::U8, T::U8 ) +OPCODE(SignedSaturatedAdd16, T::U16, T::U16, T::U16 ) +OPCODE(SignedSaturatedAdd32, T::U32, T::U32, T::U32 ) +OPCODE(SignedSaturatedAdd64, T::U64, T::U64, T::U64 ) +OPCODE(SignedSaturatedSub8, T::U8, T::U8, T::U8 ) +OPCODE(SignedSaturatedSub16, T::U16, T::U16, T::U16 ) +OPCODE(SignedSaturatedSub32, T::U32, T::U32, T::U32 ) +OPCODE(SignedSaturatedSub64, T::U64, T::U64, T::U64 ) OPCODE(SignedSaturation, T::U32, T::U32, T::U8 ) +OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 ) // Packed instructions OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )