Implement QADD, QSUB, QDADD, QDSUB
This commit is contained in:
parent
b178ab3bec
commit
96e46ba6b5
8 changed files with 210 additions and 43 deletions
|
@ -21,6 +21,7 @@ set(SRCS
|
||||||
frontend/translate/translate_arm/packing.cpp
|
frontend/translate/translate_arm/packing.cpp
|
||||||
frontend/translate/translate_arm/parallel.cpp
|
frontend/translate/translate_arm/parallel.cpp
|
||||||
frontend/translate/translate_arm/reversal.cpp
|
frontend/translate/translate_arm/reversal.cpp
|
||||||
|
frontend/translate/translate_arm/saturated.cpp
|
||||||
frontend/translate/translate_arm/status_register_access.cpp
|
frontend/translate/translate_arm/status_register_access.cpp
|
||||||
frontend/translate/translate_arm/synchronization.cpp
|
frontend/translate/translate_arm/synchronization.cpp
|
||||||
frontend/translate/translate_arm/vfp2.cpp
|
frontend/translate/translate_arm/vfp2.cpp
|
||||||
|
|
|
@ -1256,6 +1256,77 @@ void EmitX64::EmitByteReverseDual(IR::Block&, IR::Inst* inst) {
|
||||||
code->bswap(result);
|
code->bswap(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitCountLeadingZeros(IR::Block&, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
|
||||||
|
if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) {
|
||||||
|
Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32();
|
||||||
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
||||||
|
|
||||||
|
code->lzcnt(result, source);
|
||||||
|
} else {
|
||||||
|
Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32();
|
||||||
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
||||||
|
|
||||||
|
// The result of a bsr of zero is undefined, but zf is set after it.
|
||||||
|
code->bsr(result, source);
|
||||||
|
code->mov(source, 0xFFFFFFFF);
|
||||||
|
code->cmovz(result, source);
|
||||||
|
code->neg(result);
|
||||||
|
code->add(result, 31);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedAdd(IR::Block& block, IR::Inst* inst) {
|
||||||
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 addend = reg_alloc.UseGpr(b).cvt32();
|
||||||
|
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
code->mov(overflow, result);
|
||||||
|
code->shr(overflow, 31);
|
||||||
|
code->add(overflow, 0x7FFFFFFF);
|
||||||
|
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
||||||
|
code->add(result, addend);
|
||||||
|
code->cmovo(result, overflow);
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
EraseInstruction(block, overflow_inst);
|
||||||
|
inst->DecrementRemainingUses();
|
||||||
|
|
||||||
|
code->seto(overflow.cvt8());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedSub(IR::Block& block, IR::Inst* inst) {
|
||||||
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 subend = reg_alloc.UseGpr(b).cvt32();
|
||||||
|
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
code->mov(overflow, result);
|
||||||
|
code->shr(overflow, 31);
|
||||||
|
code->add(overflow, 0x7FFFFFFF);
|
||||||
|
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
||||||
|
code->sub(result, subend);
|
||||||
|
code->cmovo(result, overflow);
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
EraseInstruction(block, overflow_inst);
|
||||||
|
inst->DecrementRemainingUses();
|
||||||
|
|
||||||
|
code->seto(overflow.cvt8());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) {
|
void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) {
|
||||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
@ -1600,27 +1671,6 @@ void EmitX64::EmitPackedSaturatedSubS16(IR::Block&, IR::Inst* inst) {
|
||||||
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw);
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitCountLeadingZeros(IR::Block&, IR::Inst* inst) {
|
|
||||||
IR::Value a = inst->GetArg(0);
|
|
||||||
|
|
||||||
if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) {
|
|
||||||
Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32();
|
|
||||||
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
||||||
|
|
||||||
code->lzcnt(result, source);
|
|
||||||
} else {
|
|
||||||
Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32();
|
|
||||||
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
||||||
|
|
||||||
// The result of a bsr of zero is undefined, but zf is set after it.
|
|
||||||
code->bsr(result, source);
|
|
||||||
code->mov(source, 0xFFFFFFFF);
|
|
||||||
code->cmovz(result, source);
|
|
||||||
code->neg(result);
|
|
||||||
code->add(result, 31);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
|
|
@ -324,6 +324,22 @@ Value IREmitter::ByteReverseDual(const Value& a) {
|
||||||
return Inst(Opcode::ByteReverseDual, {a});
|
return Inst(Opcode::ByteReverseDual, {a});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value IREmitter::CountLeadingZeros(const Value& a) {
|
||||||
|
return Inst(Opcode::CountLeadingZeros, {a});
|
||||||
|
}
|
||||||
|
|
||||||
|
IREmitter::ResultAndOverflow IREmitter::SignedSaturatedAdd(const Value& a, const Value& b) {
|
||||||
|
auto result = Inst(Opcode::SignedSaturatedAdd, {a, b});
|
||||||
|
auto overflow = Inst(Opcode::GetOverflowFromOp, {result});
|
||||||
|
return {result, overflow};
|
||||||
|
}
|
||||||
|
|
||||||
|
IREmitter::ResultAndOverflow IREmitter::SignedSaturatedSub(const Value& a, const Value& b) {
|
||||||
|
auto result = Inst(Opcode::SignedSaturatedSub, {a, b});
|
||||||
|
auto overflow = Inst(Opcode::GetOverflowFromOp, {result});
|
||||||
|
return {result, overflow};
|
||||||
|
}
|
||||||
|
|
||||||
IREmitter::ResultAndGE IREmitter::PackedAddU8(const Value& a, const Value& b) {
|
IREmitter::ResultAndGE IREmitter::PackedAddU8(const Value& a, const Value& b) {
|
||||||
auto result = Inst(Opcode::PackedAddU8, {a, b});
|
auto result = Inst(Opcode::PackedAddU8, {a, b});
|
||||||
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
||||||
|
@ -392,10 +408,6 @@ Value IREmitter::PackedSaturatedSubS16(const Value& a, const Value& b) {
|
||||||
return Inst(Opcode::PackedSaturatedSubS16, {a, b});
|
return Inst(Opcode::PackedSaturatedSubS16, {a, b});
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::CountLeadingZeros(const Value& a) {
|
|
||||||
return Inst(Opcode::CountLeadingZeros, {a});
|
|
||||||
}
|
|
||||||
|
|
||||||
Value IREmitter::TransferToFP32(const Value& a) {
|
Value IREmitter::TransferToFP32(const Value& a) {
|
||||||
return Inst(Opcode::TransferToFP32, {a});
|
return Inst(Opcode::TransferToFP32, {a});
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,6 +43,11 @@ public:
|
||||||
Value carry;
|
Value carry;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ResultAndOverflow {
|
||||||
|
Value result;
|
||||||
|
Value overflow;
|
||||||
|
};
|
||||||
|
|
||||||
struct ResultAndCarryAndOverflow {
|
struct ResultAndCarryAndOverflow {
|
||||||
Value result;
|
Value result;
|
||||||
Value carry;
|
Value carry;
|
||||||
|
@ -127,6 +132,11 @@ public:
|
||||||
Value ByteReverseWord(const Value& a);
|
Value ByteReverseWord(const Value& a);
|
||||||
Value ByteReverseHalf(const Value& a);
|
Value ByteReverseHalf(const Value& a);
|
||||||
Value ByteReverseDual(const Value& a);
|
Value ByteReverseDual(const Value& a);
|
||||||
|
Value CountLeadingZeros(const Value& a);
|
||||||
|
|
||||||
|
ResultAndOverflow SignedSaturatedAdd(const Value& a, const Value& b);
|
||||||
|
ResultAndOverflow SignedSaturatedSub(const Value& a, const Value& b);
|
||||||
|
|
||||||
ResultAndGE PackedAddU8(const Value& a, const Value& b);
|
ResultAndGE PackedAddU8(const Value& a, const Value& b);
|
||||||
ResultAndGE PackedSubU8(const Value& a, const Value& b);
|
ResultAndGE PackedSubU8(const Value& a, const Value& b);
|
||||||
Value PackedHalvingAddU8(const Value& a, const Value& b);
|
Value PackedHalvingAddU8(const Value& a, const Value& b);
|
||||||
|
@ -143,7 +153,6 @@ public:
|
||||||
Value PackedSaturatedAddS16(const Value& a, const Value& b);
|
Value PackedSaturatedAddS16(const Value& a, const Value& b);
|
||||||
Value PackedSaturatedSubU16(const Value& a, const Value& b);
|
Value PackedSaturatedSubU16(const Value& a, const Value& b);
|
||||||
Value PackedSaturatedSubS16(const Value& a, const Value& b);
|
Value PackedSaturatedSubS16(const Value& a, const Value& b);
|
||||||
Value CountLeadingZeros(const Value& a);
|
|
||||||
|
|
||||||
Value TransferToFP32(const Value& a);
|
Value TransferToFP32(const Value& a);
|
||||||
Value TransferToFP64(const Value& a);
|
Value TransferToFP64(const Value& a);
|
||||||
|
|
|
@ -72,6 +72,13 @@ OPCODE(ZeroExtendByteToWord, T::U32, T::U8
|
||||||
OPCODE(ByteReverseWord, T::U32, T::U32 )
|
OPCODE(ByteReverseWord, T::U32, T::U32 )
|
||||||
OPCODE(ByteReverseHalf, T::U16, T::U16 )
|
OPCODE(ByteReverseHalf, T::U16, T::U16 )
|
||||||
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
||||||
|
OPCODE(CountLeadingZeros, T::U32, T::U32 )
|
||||||
|
|
||||||
|
// Saturated instructions
|
||||||
|
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 )
|
||||||
|
|
||||||
|
// Packed instructions
|
||||||
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSubU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSubU8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
|
||||||
|
@ -88,7 +95,6 @@ OPCODE(PackedSaturatedAddU16, T::U32, T::U32, T::U32
|
||||||
OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(CountLeadingZeros, T::U32, T::U32 )
|
|
||||||
|
|
||||||
// Floating-point operations
|
// Floating-point operations
|
||||||
OPCODE(TransferToFP32, T::F32, T::U32 )
|
OPCODE(TransferToFP32, T::F32, T::U32 )
|
||||||
|
|
78
src/frontend/translate/translate_arm/saturated.cpp
Normal file
78
src/frontend/translate/translate_arm/saturated.cpp
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "translate_arm.h"
|
||||||
|
|
||||||
|
namespace Dynarmic {
|
||||||
|
namespace Arm {
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::arm_QADD(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
|
return UnpredictableInstruction();
|
||||||
|
|
||||||
|
// QADD <Rd>, <Rm>, <Rn>
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto a = ir.GetRegister(m);
|
||||||
|
auto b = ir.GetRegister(n);
|
||||||
|
auto result = ir.SignedSaturatedAdd(a, b);
|
||||||
|
ir.SetRegister(d, result.result);
|
||||||
|
ir.OrQFlag(result.overflow);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::arm_QSUB(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
|
return UnpredictableInstruction();
|
||||||
|
|
||||||
|
// QSUB <Rd>, <Rm>, <Rn>
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto a = ir.GetRegister(m);
|
||||||
|
auto b = ir.GetRegister(n);
|
||||||
|
auto result = ir.SignedSaturatedSub(a, b);
|
||||||
|
ir.SetRegister(d, result.result);
|
||||||
|
ir.OrQFlag(result.overflow);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::arm_QDADD(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
|
return UnpredictableInstruction();
|
||||||
|
|
||||||
|
// QDADD <Rd>, <Rm>, <Rn>
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto a = ir.GetRegister(m);
|
||||||
|
auto b = ir.GetRegister(n);
|
||||||
|
auto doubled = ir.SignedSaturatedAdd(b, b);
|
||||||
|
ir.OrQFlag(doubled.overflow);
|
||||||
|
auto result = ir.SignedSaturatedAdd(a, doubled.result);
|
||||||
|
ir.SetRegister(d, result.result);
|
||||||
|
ir.OrQFlag(result.overflow);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
|
return UnpredictableInstruction();
|
||||||
|
|
||||||
|
// QDSUB <Rd>, <Rm>, <Rn>
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto a = ir.GetRegister(m);
|
||||||
|
auto b = ir.GetRegister(n);
|
||||||
|
auto doubled = ir.SignedSaturatedAdd(b, b);
|
||||||
|
ir.OrQFlag(doubled.overflow);
|
||||||
|
auto result = ir.SignedSaturatedSub(a, doubled.result);
|
||||||
|
ir.SetRegister(d, result.result);
|
||||||
|
ir.OrQFlag(result.overflow);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Arm
|
||||||
|
} // namespace Dynarmic
|
|
@ -326,22 +326,10 @@ struct ArmTranslatorVisitor final {
|
||||||
bool arm_UHSUB16(Cond cond, Reg n, Reg d, Reg m);
|
bool arm_UHSUB16(Cond cond, Reg n, Reg d, Reg m);
|
||||||
|
|
||||||
// Saturated Add/Subtract instructions
|
// Saturated Add/Subtract instructions
|
||||||
bool arm_QADD(Cond cond, Reg n, Reg d, Reg m) {
|
bool arm_QADD(Cond cond, Reg n, Reg d, Reg m);
|
||||||
UNUSED(cond, d, m, n);
|
bool arm_QSUB(Cond cond, Reg n, Reg d, Reg m);
|
||||||
return InterpretThisInstruction();
|
bool arm_QDADD(Cond cond, Reg n, Reg d, Reg m);
|
||||||
}
|
bool arm_QDSUB(Cond cond, Reg n, Reg d, Reg m);
|
||||||
bool arm_QSUB(Cond cond, Reg n, Reg d, Reg m) {
|
|
||||||
UNUSED(cond, d, m, n);
|
|
||||||
return InterpretThisInstruction();
|
|
||||||
}
|
|
||||||
bool arm_QDADD(Cond cond, Reg n, Reg d, Reg m) {
|
|
||||||
UNUSED(cond, d, m, n);
|
|
||||||
return InterpretThisInstruction();
|
|
||||||
}
|
|
||||||
bool arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) {
|
|
||||||
UNUSED(cond, d, m, n);
|
|
||||||
return InterpretThisInstruction();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Synchronization Primitive instructions
|
// Synchronization Primitive instructions
|
||||||
bool arm_CLREX();
|
bool arm_CLREX();
|
||||||
|
|
|
@ -985,6 +985,29 @@ TEST_CASE("Test ARM misc instructions", "[JitX64]") {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Fuzz ARM saturated instructions", "[JitX64]") {
|
||||||
|
auto is_valid = [](u32 inst) -> bool {
|
||||||
|
// R15 as Rd, Rn, or Rm is UNPREDICTABLE
|
||||||
|
return Bits<16, 19>(inst) != 0b1111 &&
|
||||||
|
Bits<12, 15>(inst) != 0b1111 &&
|
||||||
|
Bits<0, 3>(inst) != 0b1111;
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::array<InstructionGenerator, 4> instructions = {{
|
||||||
|
InstructionGenerator("cccc00010000nnnndddd00000101mmmm", is_valid), // QADD
|
||||||
|
InstructionGenerator("cccc00010010nnnndddd00000101mmmm", is_valid), // QSUB
|
||||||
|
InstructionGenerator("cccc00010100nnnndddd00000101mmmm", is_valid), // QDADD
|
||||||
|
InstructionGenerator("cccc00010110nnnndddd00000101mmmm", is_valid), // QDSUB
|
||||||
|
}};
|
||||||
|
|
||||||
|
SECTION("Saturated") {
|
||||||
|
FuzzJitArm(4, 5, 10000, [&instructions]() -> u32 {
|
||||||
|
return instructions[RandInt<size_t>(0, instructions.size() - 1)].Generate();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
TEST_CASE("Fuzz ARM packing instructions", "[JitX64]") {
|
TEST_CASE("Fuzz ARM packing instructions", "[JitX64]") {
|
||||||
auto is_pkh_valid = [](u32 inst) -> bool {
|
auto is_pkh_valid = [](u32 inst) -> bool {
|
||||||
// R15 as Rd, Rn, or Rm is UNPREDICTABLE
|
// R15 as Rd, Rn, or Rm is UNPREDICTABLE
|
||||||
|
|
Loading…
Add table
Reference in a new issue