From 2501bfbfae620ae55c1bdc714a7b679bd21328a6 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Sun, 29 Jul 2018 17:52:27 -0400
Subject: [PATCH] ir: Add opcodes for performing scalar integral min/max

---
 src/backend_x64/emit_x64_data_processing.cpp | 96 ++++++++++++++++++++
 src/frontend/ir/ir_emitter.cpp               | 32 +++++++
 src/frontend/ir/ir_emitter.h                 |  4 +
 src/frontend/ir/opcodes.inc                  |  8 ++
 4 files changed, 140 insertions(+)
diff --git a/src/backend_x64/emit_x64_data_processing.cpp b/src/backend_x64/emit_x64_data_processing.cpp
index 97019a56..96509a68 100644
--- a/src/backend_x64/emit_x64_data_processing.cpp
+++ b/src/backend_x64/emit_x64_data_processing.cpp
@@ -1317,4 +1317,100 @@ void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) {
    }
 }
 
+void EmitX64::EmitMaxSigned32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32();
+    const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
+
+    code.cmp(x, y);
+    code.cmovge(y, x);
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
+void EmitX64::EmitMaxSigned64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]);
+    const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
+
+    code.cmp(x, y);
+    code.cmovge(y, x);
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
+void EmitX64::EmitMaxUnsigned32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32();
+    const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
+
+    code.cmp(x, y);
+    code.cmova(y, x);
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
+void EmitX64::EmitMaxUnsigned64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]);
+    const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
+
+    code.cmp(x, y);
+    code.cmova(y, x);
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
+void EmitX64::EmitMinSigned32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32();
+    const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
+
+    code.cmp(x, y);
+    code.cmovle(y, x);
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
+void EmitX64::EmitMinSigned64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]);
+    const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
+
+    code.cmp(x, y);
+    code.cmovle(y, x);
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
+void EmitX64::EmitMinUnsigned32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32();
+    const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
+
+    code.cmp(x, y);
+    code.cmovb(y, x);
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
+void EmitX64::EmitMinUnsigned64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]);
+    const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
+
+    code.cmp(x, y);
+    code.cmovb(y, x);
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
 } // namespace Dynarmic::BackendX64
diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp
index 11b9ae64..212ab255 100644
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@@ -449,6 +449,38 @@ U32U64 IREmitter::ExtractRegister(const U32U64& a, const U32U64& b, const U8& ls
     return Inst<U64>(Opcode::ExtractRegister64, a, b, lsb);
 }
 
+U32U64 IREmitter::MaxSigned(const U32U64& a, const U32U64& b) {
+    if (a.GetType() == IR::Type::U32) {
+        return Inst<U32>(Opcode::MaxSigned32, a, b);
+    }
+
+    return Inst<U64>(Opcode::MaxSigned64, a, b);
+}
+
+U32U64 IREmitter::MaxUnsigned(const U32U64& a, const U32U64& b) {
+    if (a.GetType() == IR::Type::U32) {
+        return Inst<U32>(Opcode::MaxUnsigned32, a, b);
+    }
+
+    return Inst<U64>(Opcode::MaxUnsigned64, a, b);
+}
+
+U32U64 IREmitter::MinSigned(const U32U64& a, const U32U64& b) {
+    if (a.GetType() == IR::Type::U32) {
+        return Inst<U32>(Opcode::MinSigned32, a, b);
+    }
+
+    return Inst<U64>(Opcode::MinSigned64, a, b);
+}
+
+U32U64 IREmitter::MinUnsigned(const U32U64& a, const U32U64& b) {
+    if (a.GetType() == IR::Type::U32) {
+        return Inst<U32>(Opcode::MinUnsigned32, a, b);
+    }
+
+    return Inst<U64>(Opcode::MinUnsigned64, a, b);
+}
+
 ResultAndOverflow<U32> IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) {
     auto result = Inst<U32>(Opcode::SignedSaturatedAdd, a, b);
     auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h
index 27480fa0..ffd83710 100644
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@@ -137,6 +137,10 @@ public:
     U64 ByteReverseDual(const U64& a);
     U32U64 CountLeadingZeros(const U32U64& a);
     U32U64 ExtractRegister(const U32U64& a, const U32U64& b, const U8& lsb);
+    U32U64 MaxSigned(const U32U64& a, const U32U64& b);
+    U32U64 MaxUnsigned(const U32U64& a, const U32U64& b);
+    U32U64 MinSigned(const U32U64& a, const U32U64& b);
+    U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
 
     ResultAndOverflow<U32> SignedSaturatedAdd(const U32& a, const U32& b);
     ResultAndOverflow<U32> SignedSaturatedSub(const U32& a, const U32& b);
diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc
index c41cf7bd..3bb1400a 100644
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@@ -145,6 +145,14 @@ OPCODE(CountLeadingZeros32,                     T::U32,         T::U32
 OPCODE(CountLeadingZeros64,                     T::U64,         T::U64                                          )
 OPCODE(ExtractRegister32,                       T::U32,         T::U32,         T::U32,         T::U8           )
 OPCODE(ExtractRegister64,                       T::U64,         T::U64,         T::U64,         T::U8           )
+OPCODE(MaxSigned32,                             T::U32,         T::U32,         T::U32                          )
+OPCODE(MaxSigned64,                             T::U64,         T::U64,         T::U64                          )
+OPCODE(MaxUnsigned32,                           T::U32,         T::U32,         T::U32                          )
+OPCODE(MaxUnsigned64,                           T::U64,         T::U64,         T::U64                          )
+OPCODE(MinSigned32,                             T::U32,         T::U32,         T::U32                          )
+OPCODE(MinSigned64,                             T::U64,         T::U64,         T::U64                          )
+OPCODE(MinUnsigned32,                           T::U32,         T::U32,         T::U32                          )
+OPCODE(MinUnsigned64,                           T::U64,         T::U64,         T::U64                          )
 
 // Saturated instructions
 OPCODE(SignedSaturatedAdd,                      T::U32,         T::U32,         T::U32                          )