diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d78c8d4e..a0fa1271 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,6 +21,7 @@ set(SRCS frontend/translate/translate_arm/packing.cpp frontend/translate/translate_arm/parallel.cpp frontend/translate/translate_arm/reversal.cpp + frontend/translate/translate_arm/saturated.cpp frontend/translate/translate_arm/status_register_access.cpp frontend/translate/translate_arm/synchronization.cpp frontend/translate/translate_arm/vfp2.cpp diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index bf9477dc..9af87e6e 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -1256,6 +1256,77 @@ void EmitX64::EmitByteReverseDual(IR::Block&, IR::Inst* inst) { code->bswap(result); } +void EmitX64::EmitCountLeadingZeros(IR::Block&, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + + if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) { + Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32(); + Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + + code->lzcnt(result, source); + } else { + Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32(); + Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + + // The result of a bsr of zero is undefined, but zf is set after it. + code->bsr(result, source); + code->mov(source, 0xFFFFFFFF); + code->cmovz(result, source); + code->neg(result); + code->add(result, 31); + } +} + +void EmitX64::EmitSignedSaturatedAdd(IR::Block& block, IR::Inst* inst) { + auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); + + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 addend = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + + code->mov(overflow, result); + code->shr(overflow, 31); + code->add(overflow, 0x7FFFFFFF); + // overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative + code->add(result, addend); + code->cmovo(result, overflow); + + if (overflow_inst) { + EraseInstruction(block, overflow_inst); + inst->DecrementRemainingUses(); + + code->seto(overflow.cvt8()); + } +} + +void EmitX64::EmitSignedSaturatedSub(IR::Block& block, IR::Inst* inst) { + auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); + + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 subend = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32(); + + code->mov(overflow, result); + code->shr(overflow, 31); + code->add(overflow, 0x7FFFFFFF); + // overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative + code->sub(result, subend); + code->cmovo(result, overflow); + + if (overflow_inst) { + EraseInstruction(block, overflow_inst); + inst->DecrementRemainingUses(); + + code->seto(overflow.cvt8()); + } +} + void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) { auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -1600,27 +1671,6 @@ void EmitX64::EmitPackedSaturatedSubS16(IR::Block&, IR::Inst* inst) { EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw); } -void EmitX64::EmitCountLeadingZeros(IR::Block&, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - - if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) { - Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - - code->lzcnt(result, source); - } else { - Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32(); - Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); - - // The result of a bsr of zero is undefined, but zf is set after it. - code->bsr(result, source); - code->mov(source, 0xFFFFFFFF); - code->cmovz(result, source); - code->neg(result); - code->add(result, 31); - } -} - static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { using namespace Xbyak::util; Xbyak::Label end; diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index ad2ae6a3..58fe16ee 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -324,6 +324,22 @@ Value IREmitter::ByteReverseDual(const Value& a) { return Inst(Opcode::ByteReverseDual, {a}); } +Value IREmitter::CountLeadingZeros(const Value& a) { + return Inst(Opcode::CountLeadingZeros, {a}); +} + +IREmitter::ResultAndOverflow IREmitter::SignedSaturatedAdd(const Value& a, const Value& b) { + auto result = Inst(Opcode::SignedSaturatedAdd, {a, b}); + auto overflow = Inst(Opcode::GetOverflowFromOp, {result}); + return {result, overflow}; +} + +IREmitter::ResultAndOverflow IREmitter::SignedSaturatedSub(const Value& a, const Value& b) { + auto result = Inst(Opcode::SignedSaturatedSub, {a, b}); + auto overflow = Inst(Opcode::GetOverflowFromOp, {result}); + return {result, overflow}; +} + IREmitter::ResultAndGE IREmitter::PackedAddU8(const Value& a, const Value& b) { auto result = Inst(Opcode::PackedAddU8, {a, b}); auto ge = Inst(Opcode::GetGEFromOp, {result}); @@ -392,10 +408,6 @@ Value IREmitter::PackedSaturatedSubS16(const Value& a, const Value& b) { return Inst(Opcode::PackedSaturatedSubS16, {a, b}); } -Value IREmitter::CountLeadingZeros(const Value& a) { - return Inst(Opcode::CountLeadingZeros, {a}); -} - Value IREmitter::TransferToFP32(const Value& a) { return Inst(Opcode::TransferToFP32, {a}); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index bfe7d7b1..40a740d1 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -43,6 +43,11 @@ public: Value carry; }; + struct ResultAndOverflow { + Value result; + Value overflow; + }; + struct ResultAndCarryAndOverflow { Value result; Value carry; @@ -127,6 +132,11 @@ public: Value ByteReverseWord(const Value& a); Value ByteReverseHalf(const Value& a); Value ByteReverseDual(const Value& a); + Value CountLeadingZeros(const Value& a); + + ResultAndOverflow SignedSaturatedAdd(const Value& a, const Value& b); + ResultAndOverflow SignedSaturatedSub(const Value& a, const Value& b); + ResultAndGE PackedAddU8(const Value& a, const Value& b); ResultAndGE PackedSubU8(const Value& a, const Value& b); Value PackedHalvingAddU8(const Value& a, const Value& b); @@ -143,7 +153,6 @@ public: Value PackedSaturatedAddS16(const Value& a, const Value& b); Value PackedSaturatedSubU16(const Value& a, const Value& b); Value PackedSaturatedSubS16(const Value& a, const Value& b); - Value CountLeadingZeros(const Value& a); Value TransferToFP32(const Value& a); Value TransferToFP64(const Value& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index b6a2f5d4..a563d621 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -72,6 +72,13 @@ OPCODE(ZeroExtendByteToWord, T::U32, T::U8 OPCODE(ByteReverseWord, T::U32, T::U32 ) OPCODE(ByteReverseHalf, T::U16, T::U16 ) OPCODE(ByteReverseDual, T::U64, T::U64 ) +OPCODE(CountLeadingZeros, T::U32, T::U32 ) + +// Saturated instructions +OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 ) +OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 ) + +// Packed instructions OPCODE(PackedAddU8, T::U32, T::U32, T::U32 ) OPCODE(PackedSubU8, T::U32, T::U32, T::U32 ) OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 ) @@ -88,7 +95,6 @@ OPCODE(PackedSaturatedAddU16, T::U32, T::U32, T::U32 OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 ) OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 ) OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 ) -OPCODE(CountLeadingZeros, T::U32, T::U32 ) // Floating-point operations OPCODE(TransferToFP32, T::F32, T::U32 ) diff --git a/src/frontend/translate/translate_arm/saturated.cpp b/src/frontend/translate/translate_arm/saturated.cpp new file mode 100644 index 00000000..80eb26e2 --- /dev/null +++ b/src/frontend/translate/translate_arm/saturated.cpp @@ -0,0 +1,78 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "translate_arm.h" + +namespace Dynarmic { +namespace Arm { + +bool ArmTranslatorVisitor::arm_QADD(Cond cond, Reg n, Reg d, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + + // QADD , , + if (ConditionPassed(cond)) { + auto a = ir.GetRegister(m); + auto b = ir.GetRegister(n); + auto result = ir.SignedSaturatedAdd(a, b); + ir.SetRegister(d, result.result); + ir.OrQFlag(result.overflow); + } + return true; +} + +bool ArmTranslatorVisitor::arm_QSUB(Cond cond, Reg n, Reg d, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + + // QSUB , , + if (ConditionPassed(cond)) { + auto a = ir.GetRegister(m); + auto b = ir.GetRegister(n); + auto result = ir.SignedSaturatedSub(a, b); + ir.SetRegister(d, result.result); + ir.OrQFlag(result.overflow); + } + return true; +} + +bool ArmTranslatorVisitor::arm_QDADD(Cond cond, Reg n, Reg d, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + + // QDADD , , + if (ConditionPassed(cond)) { + auto a = ir.GetRegister(m); + auto b = ir.GetRegister(n); + auto doubled = ir.SignedSaturatedAdd(b, b); + ir.OrQFlag(doubled.overflow); + auto result = ir.SignedSaturatedAdd(a, doubled.result); + ir.SetRegister(d, result.result); + ir.OrQFlag(result.overflow); + } + return true; +} + +bool ArmTranslatorVisitor::arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + + // QDSUB , , + if (ConditionPassed(cond)) { + auto a = ir.GetRegister(m); + auto b = ir.GetRegister(n); + auto doubled = ir.SignedSaturatedAdd(b, b); + ir.OrQFlag(doubled.overflow); + auto result = ir.SignedSaturatedSub(a, doubled.result); + ir.SetRegister(d, result.result); + ir.OrQFlag(result.overflow); + } + return true; +} + + +} // namespace Arm +} // namespace Dynarmic diff --git a/src/frontend/translate/translate_arm/translate_arm.h b/src/frontend/translate/translate_arm/translate_arm.h index 0ede9df4..997a20b8 100644 --- a/src/frontend/translate/translate_arm/translate_arm.h +++ b/src/frontend/translate/translate_arm/translate_arm.h @@ -326,22 +326,10 @@ struct ArmTranslatorVisitor final { bool arm_UHSUB16(Cond cond, Reg n, Reg d, Reg m); // Saturated Add/Subtract instructions - bool arm_QADD(Cond cond, Reg n, Reg d, Reg m) { - UNUSED(cond, d, m, n); - return InterpretThisInstruction(); - } - bool arm_QSUB(Cond cond, Reg n, Reg d, Reg m) { - UNUSED(cond, d, m, n); - return InterpretThisInstruction(); - } - bool arm_QDADD(Cond cond, Reg n, Reg d, Reg m) { - UNUSED(cond, d, m, n); - return InterpretThisInstruction(); - } - bool arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) { - UNUSED(cond, d, m, n); - return InterpretThisInstruction(); - } + bool arm_QADD(Cond cond, Reg n, Reg d, Reg m); + bool arm_QSUB(Cond cond, Reg n, Reg d, Reg m); + bool arm_QDADD(Cond cond, Reg n, Reg d, Reg m); + bool arm_QDSUB(Cond cond, Reg n, Reg d, Reg m); // Synchronization Primitive instructions bool arm_CLREX(); diff --git a/tests/arm/fuzz_arm.cpp b/tests/arm/fuzz_arm.cpp index 70b2ad91..b876840a 100644 --- a/tests/arm/fuzz_arm.cpp +++ b/tests/arm/fuzz_arm.cpp @@ -985,6 +985,29 @@ TEST_CASE("Test ARM misc instructions", "[JitX64]") { } } +TEST_CASE("Fuzz ARM saturated instructions", "[JitX64]") { + auto is_valid = [](u32 inst) -> bool { + // R15 as Rd, Rn, or Rm is UNPREDICTABLE + return Bits<16, 19>(inst) != 0b1111 && + Bits<12, 15>(inst) != 0b1111 && + Bits<0, 3>(inst) != 0b1111; + }; + + const std::array instructions = {{ + InstructionGenerator("cccc00010000nnnndddd00000101mmmm", is_valid), // QADD + InstructionGenerator("cccc00010010nnnndddd00000101mmmm", is_valid), // QSUB + InstructionGenerator("cccc00010100nnnndddd00000101mmmm", is_valid), // QDADD + InstructionGenerator("cccc00010110nnnndddd00000101mmmm", is_valid), // QDSUB + }}; + + SECTION("Saturated") { + FuzzJitArm(4, 5, 10000, [&instructions]() -> u32 { + return instructions[RandInt(0, instructions.size() - 1)].Generate(); + }); + } +} + + TEST_CASE("Fuzz ARM packing instructions", "[JitX64]") { auto is_pkh_valid = [](u32 inst) -> bool { // R15 as Rd, Rn, or Rm is UNPREDICTABLE