IR/saturation: Revamp saturated add/sub IR instructions

This commit is contained in:
Merry 2022-07-30 14:23:55 +01:00 committed by merry
parent 2d0bf7ca9b
commit babfb7d7b8
14 changed files with 292 additions and 297 deletions

View file

@ -186,14 +186,6 @@ void EmitIR<IR::Opcode::A64SetFPSR>(oaknut::CodeGenerator& code, EmitContext& ct
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::A64OrQC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::A64SetPC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;

View file

@ -18,6 +18,88 @@ namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
template<>
void EmitIR<IR::Opcode::SignedSaturatedAddWithFlag32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedSubWithFlag32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N >= 1 && N <= 32);
if (N == 32) {
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
if (overflow_inst) {
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
RegAlloc::Realize(Woverflow);
code.MOV(*Woverflow, WZR);
}
return;
}
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
const u32 negative_saturated_value = ~u32{0} << (N - 1);
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
auto Wresult = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Woperand, Wresult);
ctx.reg_alloc.SpillFlags();
code.MOV(Wscratch0, negative_saturated_value);
code.MOV(Wscratch1, positive_saturated_value);
code.CMP(*Woperand, Wscratch0);
code.CSEL(Wresult, Woperand, Wscratch0, GT);
code.CMP(*Woperand, Wscratch1);
code.CSEL(Wresult, Wresult, Wscratch1, LT);
if (overflow_inst) {
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
RegAlloc::Realize(Woverflow);
code.CMP(*Wresult, Woperand);
code.CSET(Woverflow, NE);
}
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wresult = ctx.reg_alloc.WriteW(inst);
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wresult, Woperand);
ctx.reg_alloc.SpillFlags();
const size_t N = args[1].GetImmediateU8();
ASSERT(N <= 31);
const u32 saturated_value = (1u << N) - 1;
code.MOV(Wscratch0, saturated_value);
code.CMP(*Woperand, Wscratch0);
code.CSEL(Wresult, Woperand, Wscratch0, LS);
if (overflow_inst) {
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
RegAlloc::Realize(Woverflow);
code.CSET(Woverflow, HI);
}
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
@ -98,47 +180,6 @@ void EmitIR<IR::Opcode::SignedSaturatedSub64>(oaknut::CodeGenerator& code, EmitC
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N >= 1 && N <= 32);
if (N == 32) {
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
if (overflow_inst) {
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
RegAlloc::Realize(Woverflow);
code.MOV(*Woverflow, WZR);
}
return;
}
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
const u32 negative_saturated_value = ~u32{0} << (N - 1);
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
auto Wresult = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Woperand, Wresult);
ctx.reg_alloc.SpillFlags();
code.MOV(Wscratch0, negative_saturated_value);
code.MOV(Wscratch1, positive_saturated_value);
code.CMP(*Woperand, Wscratch0);
code.CSEL(Wresult, Woperand, Wscratch0, GT);
code.CMP(*Woperand, Wscratch1);
code.CSEL(Wresult, Wresult, Wscratch1, LT);
if (overflow_inst) {
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
RegAlloc::Realize(Woverflow);
code.CMP(*Wresult, Woperand);
code.CSET(Woverflow, NE);
}
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
@ -203,29 +244,4 @@ void EmitIR<IR::Opcode::UnsignedSaturatedSub64>(oaknut::CodeGenerator& code, Emi
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wresult = ctx.reg_alloc.WriteW(inst);
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wresult, Woperand);
ctx.reg_alloc.SpillFlags();
const size_t N = args[1].GetImmediateU8();
ASSERT(N <= 31);
const u32 saturated_value = (1u << N) - 1;
code.MOV(Wscratch0, saturated_value);
code.CMP(*Woperand, Wscratch0);
code.CSEL(Wresult, Woperand, Wscratch0, LS);
if (overflow_inst) {
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
RegAlloc::Realize(Woverflow);
code.CSET(Woverflow, HI);
}
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -452,22 +452,6 @@ void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
}
void A64EmitX64::EmitA64OrQC(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (!args[0].GetImmediateU1()) {
return;
}
code.mov(code.byte[code.r15 + offsetof(A64JitState, fpsr_qc)], u8(1));
return;
}
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
code.or_(code.byte[code.r15 + offsetof(A64JitState, fpsr_qc)], to_store);
}
void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto addr = qword[r15 + offsetof(A64JitState, pc)];

View file

@ -27,10 +27,8 @@ enum class Op {
Sub,
};
template<Op op, size_t size>
template<Op op, size_t size, bool has_overflow_inst = false>
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
@ -62,11 +60,14 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
code.cmovo(result, overflow);
}
if (overflow_inst) {
code.seto(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
code.seto(overflow.cvt8());
if constexpr (has_overflow_inst) {
if (const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)) {
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
} else {
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
}
ctx.reg_alloc.DefineValue(inst, result);
@ -74,8 +75,6 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
template<Op op, size_t size>
void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
@ -95,109 +94,21 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
code.cmovae(addend, op_result);
}
if (overflow_inst) {
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
code.setb(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
code.setb(overflow.cvt8());
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
ctx.reg_alloc.DefineValue(inst, addend);
}
} // anonymous namespace
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
void EmitX64::EmitSignedSaturatedAddWithFlag32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 32, true>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
code.movsx(x, x.cvt16());
code.movsx(y, y.cvt16());
code.imul(x, y);
code.lea(y, ptr[x.cvt64() + x.cvt64()]);
code.mov(tmp, x);
code.shr(tmp, 15);
code.xor_(y, x);
code.mov(y, 0x7FFF);
code.cmovns(y, tmp);
if (overflow_inst) {
code.sets(tmp.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, tmp);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, y);
}
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
code.movsxd(x, x.cvt32());
code.movsxd(y, y.cvt32());
code.imul(x, y);
code.lea(y, ptr[x + x]);
code.mov(tmp, x);
code.shr(tmp, 31);
code.xor_(y, x);
code.mov(y.cvt32(), 0x7FFFFFFF);
code.cmovns(y.cvt32(), tmp.cvt32());
if (overflow_inst) {
code.sets(tmp.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, tmp);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, y);
}
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
void EmitX64::EmitSignedSaturatedSubWithFlag32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 32, true>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
@ -250,6 +161,116 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N <= 31);
const u32 saturated_value = (1u << N) - 1;
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
code.xor_(overflow, overflow);
code.cmp(reg_a, saturated_value);
code.mov(result, saturated_value);
code.cmovle(result, overflow);
code.cmovbe(result, reg_a);
if (overflow_inst) {
code.seta(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
code.movsx(x, x.cvt16());
code.movsx(y, y.cvt16());
code.imul(x, y);
code.lea(y, ptr[x.cvt64() + x.cvt64()]);
code.mov(tmp, x);
code.shr(tmp, 15);
code.xor_(y, x);
code.mov(y, 0x7FFF);
code.cmovns(y, tmp);
code.sets(tmp.cvt8());
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
ctx.reg_alloc.DefineValue(inst, y);
}
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
code.movsxd(x, x.cvt32());
code.movsxd(y, y.cvt32());
code.imul(x, y);
code.lea(y, ptr[x + x]);
code.mov(tmp, x);
code.shr(tmp, 31);
code.xor_(y, x);
code.mov(y.cvt32(), 0x7FFFFFFF);
code.cmovns(y.cvt32(), tmp.cvt32());
code.sets(tmp.cvt8());
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
ctx.reg_alloc.DefineValue(inst, y);
}
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
}
@ -282,34 +303,4 @@ void EmitX64::EmitUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N <= 31);
const u32 saturated_value = (1u << N) - 1;
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
code.xor_(overflow, overflow);
code.cmp(reg_a, saturated_value);
code.mov(result, saturated_value);
code.cmovle(result, overflow);
code.cmovbe(result, reg_a);
if (overflow_inst) {
code.seta(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // namespace Dynarmic::Backend::X64

View file

@ -116,7 +116,7 @@ bool TranslatorVisitor::arm_QADD(Cond cond, Reg n, Reg d, Reg m) {
const auto a = ir.GetRegister(m);
const auto b = ir.GetRegister(n);
const auto result = ir.SignedSaturatedAdd(a, b);
const auto result = ir.SignedSaturatedAddWithFlag(a, b);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);
@ -135,7 +135,7 @@ bool TranslatorVisitor::arm_QSUB(Cond cond, Reg n, Reg d, Reg m) {
const auto a = ir.GetRegister(m);
const auto b = ir.GetRegister(n);
const auto result = ir.SignedSaturatedSub(a, b);
const auto result = ir.SignedSaturatedSubWithFlag(a, b);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);
@ -154,10 +154,10 @@ bool TranslatorVisitor::arm_QDADD(Cond cond, Reg n, Reg d, Reg m) {
const auto a = ir.GetRegister(m);
const auto b = ir.GetRegister(n);
const auto doubled = ir.SignedSaturatedAdd(b, b);
const auto doubled = ir.SignedSaturatedAddWithFlag(b, b);
ir.OrQFlag(doubled.overflow);
const auto result = ir.SignedSaturatedAdd(a, doubled.result);
const auto result = ir.SignedSaturatedAddWithFlag(a, doubled.result);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);
return true;
@ -175,10 +175,10 @@ bool TranslatorVisitor::arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) {
const auto a = ir.GetRegister(m);
const auto b = ir.GetRegister(n);
const auto doubled = ir.SignedSaturatedAdd(b, b);
const auto doubled = ir.SignedSaturatedAddWithFlag(b, b);
ir.OrQFlag(doubled.overflow);
const auto result = ir.SignedSaturatedSub(a, doubled.result);
const auto result = ir.SignedSaturatedSubWithFlag(a, doubled.result);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);
return true;

View file

@ -26,7 +26,7 @@ bool TranslatorVisitor::thumb32_QADD(Reg n, Reg d, Reg m) {
const auto reg_m = ir.GetRegister(m);
const auto reg_n = ir.GetRegister(n);
const auto result = ir.SignedSaturatedAdd(reg_m, reg_n);
const auto result = ir.SignedSaturatedAddWithFlag(reg_m, reg_n);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);
@ -40,10 +40,10 @@ bool TranslatorVisitor::thumb32_QDADD(Reg n, Reg d, Reg m) {
const auto reg_m = ir.GetRegister(m);
const auto reg_n = ir.GetRegister(n);
const auto doubled_n = ir.SignedSaturatedAdd(reg_n, reg_n);
const auto doubled_n = ir.SignedSaturatedAddWithFlag(reg_n, reg_n);
ir.OrQFlag(doubled_n.overflow);
const auto result = ir.SignedSaturatedAdd(reg_m, doubled_n.result);
const auto result = ir.SignedSaturatedAddWithFlag(reg_m, doubled_n.result);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);
return true;
@ -56,10 +56,10 @@ bool TranslatorVisitor::thumb32_QDSUB(Reg n, Reg d, Reg m) {
const auto reg_m = ir.GetRegister(m);
const auto reg_n = ir.GetRegister(n);
const auto doubled_n = ir.SignedSaturatedAdd(reg_n, reg_n);
const auto doubled_n = ir.SignedSaturatedAddWithFlag(reg_n, reg_n);
ir.OrQFlag(doubled_n.overflow);
const auto result = ir.SignedSaturatedSub(reg_m, doubled_n.result);
const auto result = ir.SignedSaturatedSubWithFlag(reg_m, doubled_n.result);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);
return true;
@ -72,7 +72,7 @@ bool TranslatorVisitor::thumb32_QSUB(Reg n, Reg d, Reg m) {
const auto reg_m = ir.GetRegister(m);
const auto reg_n = ir.GetRegister(n);
const auto result = ir.SignedSaturatedSub(reg_m, reg_n);
const auto result = ir.SignedSaturatedSubWithFlag(reg_m, reg_n);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);

View file

@ -42,10 +42,6 @@ void IREmitter::SetNZCV(const IR::NZCV& nzcv) {
Inst(Opcode::A64SetNZCV, nzcv);
}
void IREmitter::OrQC(const IR::U1& value) {
Inst(Opcode::A64OrQC, value);
}
void IREmitter::CallSupervisor(u32 imm) {
Inst(Opcode::A64CallSupervisor, Imm32(imm));
}

View file

@ -39,7 +39,6 @@ public:
IR::U32 GetNZCVRaw();
void SetNZCVRaw(IR::U32 value);
void SetNZCV(const IR::NZCV& nzcv);
void OrQC(const IR::U1& value);
void CallSupervisor(u32 imm);
void ExceptionRaised(Exception exception);

View file

@ -131,8 +131,7 @@ bool TranslatorVisitor::SQADD_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
const IR::UAny operand1 = V_scalar(esize, Vn);
const IR::UAny operand2 = V_scalar(esize, Vm);
const auto result = ir.SignedSaturatedAdd(operand1, operand2);
ir.OrQC(result.overflow);
V_scalar(esize, Vd, result.result);
V_scalar(esize, Vd, result);
return true;
}
@ -146,10 +145,7 @@ bool TranslatorVisitor::SQDMULH_vec_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
const IR::UAny operand1 = V_scalar(esize, Vn);
const IR::UAny operand2 = V_scalar(esize, Vm);
const auto result = ir.SignedSaturatedDoublingMultiplyReturnHigh(operand1, operand2);
ir.OrQC(result.overflow);
V_scalar(esize, Vd, result.result);
V_scalar(esize, Vd, result);
return true;
}
@ -175,8 +171,7 @@ bool TranslatorVisitor::SQSUB_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
const IR::UAny operand1 = V_scalar(esize, Vn);
const IR::UAny operand2 = V_scalar(esize, Vm);
const auto result = ir.SignedSaturatedSub(operand1, operand2);
ir.OrQC(result.overflow);
V_scalar(esize, Vd, result.result);
V_scalar(esize, Vd, result);
return true;
}
@ -186,8 +181,7 @@ bool TranslatorVisitor::UQADD_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
const IR::UAny operand1 = V_scalar(esize, Vn);
const IR::UAny operand2 = V_scalar(esize, Vm);
const auto result = ir.UnsignedSaturatedAdd(operand1, operand2);
ir.OrQC(result.overflow);
V_scalar(esize, Vd, result.result);
V_scalar(esize, Vd, result);
return true;
}
@ -197,8 +191,7 @@ bool TranslatorVisitor::UQSUB_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
const IR::UAny operand1 = V_scalar(esize, Vn);
const IR::UAny operand2 = V_scalar(esize, Vm);
const auto result = ir.UnsignedSaturatedSub(operand1, operand2);
ir.OrQC(result.overflow);
V_scalar(esize, Vd, result.result);
V_scalar(esize, Vd, result);
return true;
}

View file

@ -127,10 +127,7 @@ bool TranslatorVisitor::SQDMULH_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vm
const IR::UAny operand1 = V_scalar(esize, Vn);
const IR::UAny operand2 = ir.VectorGetElement(esize, V(128, Vm), index);
const auto result = ir.SignedSaturatedDoublingMultiplyReturnHigh(operand1, operand2);
ir.OrQC(result.overflow);
V_scalar(esize, Vd, result.result);
V_scalar(esize, Vd, result);
return true;
}

View file

@ -525,7 +525,33 @@ U32U64 IREmitter::MinUnsigned(const U32U64& a, const U32U64& b) {
return Inst<U64>(Opcode::MinUnsigned64, a, b);
}
ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& b) {
ResultAndOverflow<U32> IREmitter::SignedSaturatedAddWithFlag(const U32& a, const U32& b) {
const auto result = Inst<U32>(Opcode::SignedSaturatedAddWithFlag32, a, b);
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}
ResultAndOverflow<U32> IREmitter::SignedSaturatedSubWithFlag(const U32& a, const U32& b) {
const auto result = Inst<U32>(Opcode::SignedSaturatedSubWithFlag32, a, b);
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}
ResultAndOverflow<U32> IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32);
const auto result = Inst<U32>(Opcode::SignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}
ResultAndOverflow<U32> IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
ASSERT(bit_size_to_saturate_to <= 31);
const auto result = Inst<U32>(Opcode::UnsignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}
UAny IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& b) {
ASSERT(a.GetType() == b.GetType());
const auto result = [&]() -> IR::UAny {
switch (a.GetType()) {
@ -541,11 +567,10 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny&
return IR::UAny{};
}
}();
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
return result;
}
ResultAndOverflow<UAny> IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b) {
UAny IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b) {
ASSERT(a.GetType() == b.GetType());
const auto result = [&]() -> IR::UAny {
switch (a.GetType()) {
@ -557,12 +582,10 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(con
UNREACHABLE();
}
}();
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
return result;
}
ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
UAny IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
ASSERT(a.GetType() == b.GetType());
const auto result = [&]() -> IR::UAny {
switch (a.GetType()) {
@ -578,18 +601,10 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny&
return IR::UAny{};
}
}();
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
return result;
}
ResultAndOverflow<U32> IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32);
const auto result = Inst<U32>(Opcode::SignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}
ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAny& b) {
UAny IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAny& b) {
ASSERT(a.GetType() == b.GetType());
const auto result = [&]() -> IR::UAny {
switch (a.GetType()) {
@ -605,11 +620,10 @@ ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAn
return IR::UAny{};
}
}();
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
return result;
}
ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedSub(const UAny& a, const UAny& b) {
UAny IREmitter::UnsignedSaturatedSub(const UAny& a, const UAny& b) {
ASSERT(a.GetType() == b.GetType());
const auto result = [&]() -> IR::UAny {
switch (a.GetType()) {
@ -625,15 +639,7 @@ ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedSub(const UAny& a, const UAn
return IR::UAny{};
}
}();
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}
ResultAndOverflow<U32> IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
ASSERT(bit_size_to_saturate_to <= 31);
const auto result = Inst<U32>(Opcode::UnsignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
return result;
}
U128 IREmitter::VectorSignedSaturatedAdd(size_t esize, const U128& a, const U128& b) {

View file

@ -150,14 +150,17 @@ public:
U32U64 MinSigned(const U32U64& a, const U32U64& b);
U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
ResultAndOverflow<UAny> SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b);
ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
ResultAndOverflow<U32> SignedSaturatedAddWithFlag(const U32& a, const U32& b);
ResultAndOverflow<U32> SignedSaturatedSubWithFlag(const U32& a, const U32& b);
ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
ResultAndOverflow<UAny> UnsignedSaturatedAdd(const UAny& a, const UAny& b);
ResultAndOverflow<UAny> UnsignedSaturatedSub(const UAny& a, const UAny& b);
ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
UAny SignedSaturatedAdd(const UAny& a, const UAny& b);
UAny SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b);
UAny SignedSaturatedSub(const UAny& a, const UAny& b);
UAny UnsignedSaturatedAdd(const UAny& a, const UAny& b);
UAny UnsignedSaturatedSub(const UAny& a, const UAny& b);
U128 VectorSignedSaturatedAdd(size_t esize, const U128& a, const U128& b);
U128 VectorSignedSaturatedSub(size_t esize, const U128& a, const U128& b);
U128 VectorUnsignedSaturatedAdd(size_t esize, const U128& a, const U128& b);

View file

@ -418,7 +418,24 @@ bool Inst::ReadsFromFPSRCumulativeSaturationBit() const {
bool Inst::WritesToFPSRCumulativeSaturationBit() const {
switch (op) {
case Opcode::A64OrQC:
case Opcode::SignedSaturatedAdd8:
case Opcode::SignedSaturatedAdd16:
case Opcode::SignedSaturatedAdd32:
case Opcode::SignedSaturatedAdd64:
case Opcode::SignedSaturatedDoublingMultiplyReturnHigh16:
case Opcode::SignedSaturatedDoublingMultiplyReturnHigh32:
case Opcode::SignedSaturatedSub8:
case Opcode::SignedSaturatedSub16:
case Opcode::SignedSaturatedSub32:
case Opcode::SignedSaturatedSub64:
case Opcode::UnsignedSaturatedAdd8:
case Opcode::UnsignedSaturatedAdd16:
case Opcode::UnsignedSaturatedAdd32:
case Opcode::UnsignedSaturatedAdd64:
case Opcode::UnsignedSaturatedSub8:
case Opcode::UnsignedSaturatedSub16:
case Opcode::UnsignedSaturatedSub32:
case Opcode::UnsignedSaturatedSub64:
case Opcode::VectorSignedSaturatedAbs8:
case Opcode::VectorSignedSaturatedAbs16:
case Opcode::VectorSignedSaturatedAbs32:

View file

@ -63,7 +63,6 @@ A64OPC(SetQ, Void, A64V
A64OPC(SetSP, Void, U64 )
A64OPC(SetFPCR, Void, U32 )
A64OPC(SetFPSR, Void, U32 )
A64OPC(OrQC, Void, U1 )
A64OPC(SetPC, Void, U64 )
A64OPC(CallSupervisor, Void, U32 )
A64OPC(ExceptionRaised, Void, U64, U64 )
@ -178,6 +177,10 @@ OPCODE(MinUnsigned32, U32, U32,
OPCODE(MinUnsigned64, U64, U64, U64 )
// Saturated instructions
OPCODE(SignedSaturatedAddWithFlag32, U32, U32, U32 )
OPCODE(SignedSaturatedSubWithFlag32, U32, U32, U32 )
OPCODE(SignedSaturation, U32, U32, U8 )
OPCODE(UnsignedSaturation, U32, U32, U8 )
OPCODE(SignedSaturatedAdd8, U8, U8, U8 )
OPCODE(SignedSaturatedAdd16, U16, U16, U16 )
OPCODE(SignedSaturatedAdd32, U32, U32, U32 )
@ -188,7 +191,6 @@ OPCODE(SignedSaturatedSub8, U8, U8,
OPCODE(SignedSaturatedSub16, U16, U16, U16 )
OPCODE(SignedSaturatedSub32, U32, U32, U32 )
OPCODE(SignedSaturatedSub64, U64, U64, U64 )
OPCODE(SignedSaturation, U32, U32, U8 )
OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 )
OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 )
OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 )
@ -197,7 +199,6 @@ OPCODE(UnsignedSaturatedSub8, U8, U8,
OPCODE(UnsignedSaturatedSub16, U16, U16, U16 )
OPCODE(UnsignedSaturatedSub32, U32, U32, U32 )
OPCODE(UnsignedSaturatedSub64, U64, U64, U64 )
OPCODE(UnsignedSaturation, U32, U32, U8 )
// Vector saturated instructions
OPCODE(VectorSignedSaturatedAdd8, U128, U128, U128 )