Implement QADD, QSUB, QDADD, QDSUB
This commit is contained in:
parent
b178ab3bec
commit
96e46ba6b5
8 changed files with 210 additions and 43 deletions
|
@ -21,6 +21,7 @@ set(SRCS
|
|||
frontend/translate/translate_arm/packing.cpp
|
||||
frontend/translate/translate_arm/parallel.cpp
|
||||
frontend/translate/translate_arm/reversal.cpp
|
||||
frontend/translate/translate_arm/saturated.cpp
|
||||
frontend/translate/translate_arm/status_register_access.cpp
|
||||
frontend/translate/translate_arm/synchronization.cpp
|
||||
frontend/translate/translate_arm/vfp2.cpp
|
||||
|
|
|
@ -1256,6 +1256,77 @@ void EmitX64::EmitByteReverseDual(IR::Block&, IR::Inst* inst) {
|
|||
code->bswap(result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCountLeadingZeros(IR::Block&, IR::Inst* inst) {
|
||||
IR::Value a = inst->GetArg(0);
|
||||
|
||||
if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) {
|
||||
Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32();
|
||||
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
||||
|
||||
code->lzcnt(result, source);
|
||||
} else {
|
||||
Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32();
|
||||
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
||||
|
||||
// The result of a bsr of zero is undefined, but zf is set after it.
|
||||
code->bsr(result, source);
|
||||
code->mov(source, 0xFFFFFFFF);
|
||||
code->cmovz(result, source);
|
||||
code->neg(result);
|
||||
code->add(result, 31);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAdd(IR::Block& block, IR::Inst* inst) {
|
||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
IR::Value a = inst->GetArg(0);
|
||||
IR::Value b = inst->GetArg(1);
|
||||
|
||||
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||
Xbyak::Reg32 addend = reg_alloc.UseGpr(b).cvt32();
|
||||
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code->mov(overflow, result);
|
||||
code->shr(overflow, 31);
|
||||
code->add(overflow, 0x7FFFFFFF);
|
||||
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
||||
code->add(result, addend);
|
||||
code->cmovo(result, overflow);
|
||||
|
||||
if (overflow_inst) {
|
||||
EraseInstruction(block, overflow_inst);
|
||||
inst->DecrementRemainingUses();
|
||||
|
||||
code->seto(overflow.cvt8());
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedSub(IR::Block& block, IR::Inst* inst) {
|
||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
IR::Value a = inst->GetArg(0);
|
||||
IR::Value b = inst->GetArg(1);
|
||||
|
||||
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||
Xbyak::Reg32 subend = reg_alloc.UseGpr(b).cvt32();
|
||||
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code->mov(overflow, result);
|
||||
code->shr(overflow, 31);
|
||||
code->add(overflow, 0x7FFFFFFF);
|
||||
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
||||
code->sub(result, subend);
|
||||
code->cmovo(result, overflow);
|
||||
|
||||
if (overflow_inst) {
|
||||
EraseInstruction(block, overflow_inst);
|
||||
inst->DecrementRemainingUses();
|
||||
|
||||
code->seto(overflow.cvt8());
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) {
|
||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
|
@ -1600,27 +1671,6 @@ void EmitX64::EmitPackedSaturatedSubS16(IR::Block&, IR::Inst* inst) {
|
|||
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCountLeadingZeros(IR::Block&, IR::Inst* inst) {
|
||||
IR::Value a = inst->GetArg(0);
|
||||
|
||||
if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) {
|
||||
Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32();
|
||||
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
||||
|
||||
code->lzcnt(result, source);
|
||||
} else {
|
||||
Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32();
|
||||
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
||||
|
||||
// The result of a bsr of zero is undefined, but zf is set after it.
|
||||
code->bsr(result, source);
|
||||
code->mov(source, 0xFFFFFFFF);
|
||||
code->cmovz(result, source);
|
||||
code->neg(result);
|
||||
code->add(result, 31);
|
||||
}
|
||||
}
|
||||
|
||||
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
||||
using namespace Xbyak::util;
|
||||
Xbyak::Label end;
|
||||
|
|
|
@ -324,6 +324,22 @@ Value IREmitter::ByteReverseDual(const Value& a) {
|
|||
return Inst(Opcode::ByteReverseDual, {a});
|
||||
}
|
||||
|
||||
Value IREmitter::CountLeadingZeros(const Value& a) {
|
||||
return Inst(Opcode::CountLeadingZeros, {a});
|
||||
}
|
||||
|
||||
IREmitter::ResultAndOverflow IREmitter::SignedSaturatedAdd(const Value& a, const Value& b) {
|
||||
auto result = Inst(Opcode::SignedSaturatedAdd, {a, b});
|
||||
auto overflow = Inst(Opcode::GetOverflowFromOp, {result});
|
||||
return {result, overflow};
|
||||
}
|
||||
|
||||
IREmitter::ResultAndOverflow IREmitter::SignedSaturatedSub(const Value& a, const Value& b) {
|
||||
auto result = Inst(Opcode::SignedSaturatedSub, {a, b});
|
||||
auto overflow = Inst(Opcode::GetOverflowFromOp, {result});
|
||||
return {result, overflow};
|
||||
}
|
||||
|
||||
IREmitter::ResultAndGE IREmitter::PackedAddU8(const Value& a, const Value& b) {
|
||||
auto result = Inst(Opcode::PackedAddU8, {a, b});
|
||||
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
||||
|
@ -392,10 +408,6 @@ Value IREmitter::PackedSaturatedSubS16(const Value& a, const Value& b) {
|
|||
return Inst(Opcode::PackedSaturatedSubS16, {a, b});
|
||||
}
|
||||
|
||||
Value IREmitter::CountLeadingZeros(const Value& a) {
|
||||
return Inst(Opcode::CountLeadingZeros, {a});
|
||||
}
|
||||
|
||||
Value IREmitter::TransferToFP32(const Value& a) {
|
||||
return Inst(Opcode::TransferToFP32, {a});
|
||||
}
|
||||
|
|
|
@ -43,6 +43,11 @@ public:
|
|||
Value carry;
|
||||
};
|
||||
|
||||
struct ResultAndOverflow {
|
||||
Value result;
|
||||
Value overflow;
|
||||
};
|
||||
|
||||
struct ResultAndCarryAndOverflow {
|
||||
Value result;
|
||||
Value carry;
|
||||
|
@ -127,6 +132,11 @@ public:
|
|||
Value ByteReverseWord(const Value& a);
|
||||
Value ByteReverseHalf(const Value& a);
|
||||
Value ByteReverseDual(const Value& a);
|
||||
Value CountLeadingZeros(const Value& a);
|
||||
|
||||
ResultAndOverflow SignedSaturatedAdd(const Value& a, const Value& b);
|
||||
ResultAndOverflow SignedSaturatedSub(const Value& a, const Value& b);
|
||||
|
||||
ResultAndGE PackedAddU8(const Value& a, const Value& b);
|
||||
ResultAndGE PackedSubU8(const Value& a, const Value& b);
|
||||
Value PackedHalvingAddU8(const Value& a, const Value& b);
|
||||
|
@ -143,7 +153,6 @@ public:
|
|||
Value PackedSaturatedAddS16(const Value& a, const Value& b);
|
||||
Value PackedSaturatedSubU16(const Value& a, const Value& b);
|
||||
Value PackedSaturatedSubS16(const Value& a, const Value& b);
|
||||
Value CountLeadingZeros(const Value& a);
|
||||
|
||||
Value TransferToFP32(const Value& a);
|
||||
Value TransferToFP64(const Value& a);
|
||||
|
|
|
@ -72,6 +72,13 @@ OPCODE(ZeroExtendByteToWord, T::U32, T::U8
|
|||
OPCODE(ByteReverseWord, T::U32, T::U32 )
|
||||
OPCODE(ByteReverseHalf, T::U16, T::U16 )
|
||||
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
||||
OPCODE(CountLeadingZeros, T::U32, T::U32 )
|
||||
|
||||
// Saturated instructions
|
||||
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
|
||||
OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 )
|
||||
|
||||
// Packed instructions
|
||||
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
|
||||
OPCODE(PackedSubU8, T::U32, T::U32, T::U32 )
|
||||
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
|
||||
|
@ -88,7 +95,6 @@ OPCODE(PackedSaturatedAddU16, T::U32, T::U32, T::U32
|
|||
OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 )
|
||||
OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 )
|
||||
OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 )
|
||||
OPCODE(CountLeadingZeros, T::U32, T::U32 )
|
||||
|
||||
// Floating-point operations
|
||||
OPCODE(TransferToFP32, T::F32, T::U32 )
|
||||
|
|
78
src/frontend/translate/translate_arm/saturated.cpp
Normal file
78
src/frontend/translate/translate_arm/saturated.cpp
Normal file
|
@ -0,0 +1,78 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "translate_arm.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace Arm {
|
||||
|
||||
bool ArmTranslatorVisitor::arm_QADD(Cond cond, Reg n, Reg d, Reg m) {
|
||||
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||
return UnpredictableInstruction();
|
||||
|
||||
// QADD <Rd>, <Rm>, <Rn>
|
||||
if (ConditionPassed(cond)) {
|
||||
auto a = ir.GetRegister(m);
|
||||
auto b = ir.GetRegister(n);
|
||||
auto result = ir.SignedSaturatedAdd(a, b);
|
||||
ir.SetRegister(d, result.result);
|
||||
ir.OrQFlag(result.overflow);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ArmTranslatorVisitor::arm_QSUB(Cond cond, Reg n, Reg d, Reg m) {
|
||||
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||
return UnpredictableInstruction();
|
||||
|
||||
// QSUB <Rd>, <Rm>, <Rn>
|
||||
if (ConditionPassed(cond)) {
|
||||
auto a = ir.GetRegister(m);
|
||||
auto b = ir.GetRegister(n);
|
||||
auto result = ir.SignedSaturatedSub(a, b);
|
||||
ir.SetRegister(d, result.result);
|
||||
ir.OrQFlag(result.overflow);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ArmTranslatorVisitor::arm_QDADD(Cond cond, Reg n, Reg d, Reg m) {
|
||||
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||
return UnpredictableInstruction();
|
||||
|
||||
// QDADD <Rd>, <Rm>, <Rn>
|
||||
if (ConditionPassed(cond)) {
|
||||
auto a = ir.GetRegister(m);
|
||||
auto b = ir.GetRegister(n);
|
||||
auto doubled = ir.SignedSaturatedAdd(b, b);
|
||||
ir.OrQFlag(doubled.overflow);
|
||||
auto result = ir.SignedSaturatedAdd(a, doubled.result);
|
||||
ir.SetRegister(d, result.result);
|
||||
ir.OrQFlag(result.overflow);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ArmTranslatorVisitor::arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) {
|
||||
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||
return UnpredictableInstruction();
|
||||
|
||||
// QDSUB <Rd>, <Rm>, <Rn>
|
||||
if (ConditionPassed(cond)) {
|
||||
auto a = ir.GetRegister(m);
|
||||
auto b = ir.GetRegister(n);
|
||||
auto doubled = ir.SignedSaturatedAdd(b, b);
|
||||
ir.OrQFlag(doubled.overflow);
|
||||
auto result = ir.SignedSaturatedSub(a, doubled.result);
|
||||
ir.SetRegister(d, result.result);
|
||||
ir.OrQFlag(result.overflow);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // namespace Arm
|
||||
} // namespace Dynarmic
|
|
@ -326,22 +326,10 @@ struct ArmTranslatorVisitor final {
|
|||
bool arm_UHSUB16(Cond cond, Reg n, Reg d, Reg m);
|
||||
|
||||
// Saturated Add/Subtract instructions
|
||||
bool arm_QADD(Cond cond, Reg n, Reg d, Reg m) {
|
||||
UNUSED(cond, d, m, n);
|
||||
return InterpretThisInstruction();
|
||||
}
|
||||
bool arm_QSUB(Cond cond, Reg n, Reg d, Reg m) {
|
||||
UNUSED(cond, d, m, n);
|
||||
return InterpretThisInstruction();
|
||||
}
|
||||
bool arm_QDADD(Cond cond, Reg n, Reg d, Reg m) {
|
||||
UNUSED(cond, d, m, n);
|
||||
return InterpretThisInstruction();
|
||||
}
|
||||
bool arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) {
|
||||
UNUSED(cond, d, m, n);
|
||||
return InterpretThisInstruction();
|
||||
}
|
||||
bool arm_QADD(Cond cond, Reg n, Reg d, Reg m);
|
||||
bool arm_QSUB(Cond cond, Reg n, Reg d, Reg m);
|
||||
bool arm_QDADD(Cond cond, Reg n, Reg d, Reg m);
|
||||
bool arm_QDSUB(Cond cond, Reg n, Reg d, Reg m);
|
||||
|
||||
// Synchronization Primitive instructions
|
||||
bool arm_CLREX();
|
||||
|
|
|
@ -985,6 +985,29 @@ TEST_CASE("Test ARM misc instructions", "[JitX64]") {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Fuzz ARM saturated instructions", "[JitX64]") {
|
||||
auto is_valid = [](u32 inst) -> bool {
|
||||
// R15 as Rd, Rn, or Rm is UNPREDICTABLE
|
||||
return Bits<16, 19>(inst) != 0b1111 &&
|
||||
Bits<12, 15>(inst) != 0b1111 &&
|
||||
Bits<0, 3>(inst) != 0b1111;
|
||||
};
|
||||
|
||||
const std::array<InstructionGenerator, 4> instructions = {{
|
||||
InstructionGenerator("cccc00010000nnnndddd00000101mmmm", is_valid), // QADD
|
||||
InstructionGenerator("cccc00010010nnnndddd00000101mmmm", is_valid), // QSUB
|
||||
InstructionGenerator("cccc00010100nnnndddd00000101mmmm", is_valid), // QDADD
|
||||
InstructionGenerator("cccc00010110nnnndddd00000101mmmm", is_valid), // QDSUB
|
||||
}};
|
||||
|
||||
SECTION("Saturated") {
|
||||
FuzzJitArm(4, 5, 10000, [&instructions]() -> u32 {
|
||||
return instructions[RandInt<size_t>(0, instructions.size() - 1)].Generate();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("Fuzz ARM packing instructions", "[JitX64]") {
|
||||
auto is_pkh_valid = [](u32 inst) -> bool {
|
||||
// R15 as Rd, Rn, or Rm is UNPREDICTABLE
|
||||
|
|
Loading…
Reference in a new issue