From 2501bfbfae620ae55c1bdc714a7b679bd21328a6 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 29 Jul 2018 17:52:27 -0400 Subject: [PATCH] ir: Add opcodes for performing scalar integral min/max --- src/backend_x64/emit_x64_data_processing.cpp | 96 ++++++++++++++++++++ src/frontend/ir/ir_emitter.cpp | 32 +++++++ src/frontend/ir/ir_emitter.h | 4 + src/frontend/ir/opcodes.inc | 8 ++ 4 files changed, 140 insertions(+) diff --git a/src/backend_x64/emit_x64_data_processing.cpp b/src/backend_x64/emit_x64_data_processing.cpp index 97019a56..96509a68 100644 --- a/src/backend_x64/emit_x64_data_processing.cpp +++ b/src/backend_x64/emit_x64_data_processing.cpp @@ -1317,4 +1317,100 @@ void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { } } +void EmitX64::EmitMaxSigned32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + + code.cmp(x, y); + code.cmovge(y, x); + + ctx.reg_alloc.DefineValue(inst, y); +} + +void EmitX64::EmitMaxSigned64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]); + + code.cmp(x, y); + code.cmovge(y, x); + + ctx.reg_alloc.DefineValue(inst, y); +} + +void EmitX64::EmitMaxUnsigned32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + + code.cmp(x, y); + code.cmova(y, x); + + ctx.reg_alloc.DefineValue(inst, y); +} + +void EmitX64::EmitMaxUnsigned64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]); + + code.cmp(x, y); + code.cmova(y, x); + + ctx.reg_alloc.DefineValue(inst, y); +} + +void EmitX64::EmitMinSigned32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + + code.cmp(x, y); + code.cmovle(y, x); + + ctx.reg_alloc.DefineValue(inst, y); +} + +void EmitX64::EmitMinSigned64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]); + + code.cmp(x, y); + code.cmovle(y, x); + + ctx.reg_alloc.DefineValue(inst, y); +} + +void EmitX64::EmitMinUnsigned32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + + code.cmp(x, y); + code.cmovb(y, x); + + ctx.reg_alloc.DefineValue(inst, y); +} + +void EmitX64::EmitMinUnsigned64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]); + + code.cmp(x, y); + code.cmovb(y, x); + + ctx.reg_alloc.DefineValue(inst, y); +} + } // namespace Dynarmic::BackendX64 diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 11b9ae64..212ab255 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -449,6 +449,38 @@ U32U64 IREmitter::ExtractRegister(const U32U64& a, const U32U64& b, const U8& ls return Inst(Opcode::ExtractRegister64, a, b, lsb); } +U32U64 IREmitter::MaxSigned(const U32U64& a, const U32U64& b) { + if (a.GetType() == IR::Type::U32) { + return Inst(Opcode::MaxSigned32, a, b); + } + + return Inst(Opcode::MaxSigned64, a, b); +} + +U32U64 IREmitter::MaxUnsigned(const U32U64& a, const U32U64& b) { + if (a.GetType() == IR::Type::U32) { + return Inst(Opcode::MaxUnsigned32, a, b); + } + + return Inst(Opcode::MaxUnsigned64, a, b); +} + +U32U64 IREmitter::MinSigned(const U32U64& a, const U32U64& b) { + if (a.GetType() == IR::Type::U32) { + return Inst(Opcode::MinSigned32, a, b); + } + + return Inst(Opcode::MinSigned64, a, b); +} + +U32U64 IREmitter::MinUnsigned(const U32U64& a, const U32U64& b) { + if (a.GetType() == IR::Type::U32) { + return Inst(Opcode::MinUnsigned32, a, b); + } + + return Inst(Opcode::MinUnsigned64, a, b); +} + ResultAndOverflow IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) { auto result = Inst(Opcode::SignedSaturatedAdd, a, b); auto overflow = Inst(Opcode::GetOverflowFromOp, result); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 27480fa0..ffd83710 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -137,6 +137,10 @@ public: U64 ByteReverseDual(const U64& a); U32U64 CountLeadingZeros(const U32U64& a); U32U64 ExtractRegister(const U32U64& a, const U32U64& b, const U8& lsb); + U32U64 MaxSigned(const U32U64& a, const U32U64& b); + U32U64 MaxUnsigned(const U32U64& a, const U32U64& b); + U32U64 MinSigned(const U32U64& a, const U32U64& b); + U32U64 MinUnsigned(const U32U64& a, const U32U64& b); ResultAndOverflow SignedSaturatedAdd(const U32& a, const U32& b); ResultAndOverflow SignedSaturatedSub(const U32& a, const U32& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index c41cf7bd..3bb1400a 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -145,6 +145,14 @@ OPCODE(CountLeadingZeros32, T::U32, T::U32 OPCODE(CountLeadingZeros64, T::U64, T::U64 ) OPCODE(ExtractRegister32, T::U32, T::U32, T::U32, T::U8 ) OPCODE(ExtractRegister64, T::U64, T::U64, T::U64, T::U8 ) +OPCODE(MaxSigned32, T::U32, T::U32, T::U32 ) +OPCODE(MaxSigned64, T::U64, T::U64, T::U64 ) +OPCODE(MaxUnsigned32, T::U32, T::U32, T::U32 ) +OPCODE(MaxUnsigned64, T::U64, T::U64, T::U64 ) +OPCODE(MinSigned32, T::U32, T::U32, T::U32 ) +OPCODE(MinSigned64, T::U64, T::U64, T::U64 ) +OPCODE(MinUnsigned32, T::U32, T::U32, T::U32 ) +OPCODE(MinUnsigned64, T::U64, T::U64, T::U64 ) // Saturated instructions OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )