IR: Generalise SignedSaturated{Add,Sub} to support more bitwidths

This commit is contained in:
MerryMage 2018-07-30 10:59:52 +01:00
parent 71db0e67ae
commit 10e196480f
4 changed files with 119 additions and 43 deletions

View file

@ -4,11 +4,14 @@
* General Public License version 2 or any later version.
*/
#include <limits>
#include "backend_x64/block_of_code.h"
#include "backend_x64/emit_x64.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/mp/integer.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
@ -16,22 +19,53 @@
namespace Dynarmic::BackendX64 {
using namespace Xbyak::util;
namespace mp = Dynarmic::Common::mp;
void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
namespace {
enum class Op {
Add,
Sub,
};
template<Op op, size_t size>
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 addend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]);
Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]);
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
code.mov(overflow, result);
code.shr(overflow, 31);
code.add(overflow, 0x7FFFFFFF);
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
result.setBit(size);
addend.setBit(size);
overflow.setBit(size);
constexpr u64 int_max = static_cast<u64>(std::numeric_limits<mp::signed_integer_of_size<size>>::max());
if constexpr (size < 64) {
code.xor_(overflow.cvt32(), overflow.cvt32());
code.bt(result.cvt32(), size - 1);
code.adc(overflow.cvt32(), int_max);
} else {
code.mov(overflow, int_max);
code.bt(result, 63);
code.adc(overflow, 0);
}
// overflow now contains 0x7F... if a was positive, or 0x80... if a was negative
if constexpr (op == Op::Add) {
code.add(result, addend);
} else {
code.sub(result, addend);
}
if constexpr (size < 64) {
code.cmovo(result.cvt32(), overflow.cvt32());
} else {
code.cmovo(result, overflow);
}
if (overflow_inst) {
code.seto(overflow.cvt8());
@ -43,30 +77,38 @@ void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) {
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
} // anonymous namespace
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
}
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 subend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
}
code.mov(overflow, result);
code.shr(overflow, 31);
code.add(overflow, 0x7FFFFFFF);
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
code.sub(result, subend);
code.cmovo(result, overflow);
void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
}
if (overflow_inst) {
code.seto(overflow.cvt8());
void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
}
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
}
ctx.reg_alloc.DefineValue(inst, result);
void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {

View file

@ -481,15 +481,43 @@ U32U64 IREmitter::MinUnsigned(const U32U64& a, const U32U64& b) {
return Inst<U64>(Opcode::MinUnsigned64, a, b);
}
ResultAndOverflow<U32> IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) {
auto result = Inst<U32>(Opcode::SignedSaturatedAdd, a, b);
auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& b) {
ASSERT(a.GetType() == b.GetType());
const auto result = [&]() -> IR::UAny {
switch (a.GetType()) {
case IR::Type::U8:
return Inst<U8>(Opcode::SignedSaturatedAdd8, a, b);
case IR::Type::U16:
return Inst<U16>(Opcode::SignedSaturatedAdd16, a, b);
case IR::Type::U32:
return Inst<U32>(Opcode::SignedSaturatedAdd32, a, b);
case IR::Type::U64:
return Inst<U64>(Opcode::SignedSaturatedAdd64, a, b);
default:
return IR::UAny{};
}
}();
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}
ResultAndOverflow<U32> IREmitter::SignedSaturatedSub(const U32& a, const U32& b) {
auto result = Inst<U32>(Opcode::SignedSaturatedSub, a, b);
auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
ASSERT(a.GetType() == b.GetType());
const auto result = [&]() -> IR::UAny {
switch (a.GetType()) {
case IR::Type::U8:
return Inst<U8>(Opcode::SignedSaturatedSub8, a, b);
case IR::Type::U16:
return Inst<U16>(Opcode::SignedSaturatedSub16, a, b);
case IR::Type::U32:
return Inst<U32>(Opcode::SignedSaturatedSub32, a, b);
case IR::Type::U64:
return Inst<U64>(Opcode::SignedSaturatedSub64, a, b);
default:
return IR::UAny{};
}
}();
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}

View file

@ -142,8 +142,8 @@ public:
U32U64 MinSigned(const U32U64& a, const U32U64& b);
U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
ResultAndOverflow<U32> SignedSaturatedAdd(const U32& a, const U32& b);
ResultAndOverflow<U32> SignedSaturatedSub(const U32& a, const U32& b);
ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);

View file

@ -76,10 +76,10 @@ A64OPC(GetTPIDRRO, T::U64,
// Hints
OPCODE(PushRSB, T::Void, T::U64 )
// Pseudo-operation, handled special ly at final emit
OPCODE(GetCarryFromOp, T::U1, T::U32 )
OPCODE(GetOverflowFromOp, T::U1, T::U32 )
OPCODE(GetGEFromOp, T::U32, T::U32 )
// Pseudo-operation, handled specially at final emit
OPCODE(GetCarryFromOp, T::U1, T::Opaque )
OPCODE(GetOverflowFromOp, T::U1, T::Opaque )
OPCODE(GetGEFromOp, T::U32, T::Opaque )
OPCODE(GetNZCVFromOp, T::NZCVFlags, T::Opaque )
OPCODE(NZCVFromPackedFlags, T::NZCVFlags, T::U32 )
@ -155,10 +155,16 @@ OPCODE(MinUnsigned32, T::U32, T::U32,
OPCODE(MinUnsigned64, T::U64, T::U64, T::U64 )
// Saturated instructions
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 )
OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 )
OPCODE(SignedSaturatedAdd8, T::U8, T::U8, T::U8 )
OPCODE(SignedSaturatedAdd16, T::U16, T::U16, T::U16 )
OPCODE(SignedSaturatedAdd32, T::U32, T::U32, T::U32 )
OPCODE(SignedSaturatedAdd64, T::U64, T::U64, T::U64 )
OPCODE(SignedSaturatedSub8, T::U8, T::U8, T::U8 )
OPCODE(SignedSaturatedSub16, T::U16, T::U16, T::U16 )
OPCODE(SignedSaturatedSub32, T::U32, T::U32, T::U32 )
OPCODE(SignedSaturatedSub64, T::U64, T::U64, T::U64 )
OPCODE(SignedSaturation, T::U32, T::U32, T::U8 )
OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 )
// Packed instructions
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )