Implemented USAD8 and USADA8

This commit is contained in:
FernandoS27 2016-12-17 14:52:22 -05:00 committed by Merry
parent 975987f38e
commit 3f6ecfe245
7 changed files with 56 additions and 9 deletions

View file

@ -1672,6 +1672,10 @@ void EmitX64::EmitPackedSaturatedSubS16(IR::Block&, IR::Inst* inst) {
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw); EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw);
} }
void EmitX64::EmitPackedAbsDiffSumS8(IR::Block&, IR::Inst* inst) {
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psadbw);
}
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
using namespace Xbyak::util; using namespace Xbyak::util;
Xbyak::Label end; Xbyak::Label end;

View file

@ -408,6 +408,10 @@ Value IREmitter::PackedSaturatedSubS16(const Value& a, const Value& b) {
return Inst(Opcode::PackedSaturatedSubS16, {a, b}); return Inst(Opcode::PackedSaturatedSubS16, {a, b});
} }
Value IREmitter::PackedAbsDiffSumS8(const Value& a, const Value& b) {
return Inst(Opcode::PackedAbsDiffSumS8, {a, b});
}
Value IREmitter::TransferToFP32(const Value& a) { Value IREmitter::TransferToFP32(const Value& a) {
return Inst(Opcode::TransferToFP32, {a}); return Inst(Opcode::TransferToFP32, {a});
} }
@ -654,4 +658,3 @@ Value IREmitter::Inst(Opcode op, std::initializer_list<Value> args) {
} // namespace IR } // namespace IR
} // namespace Dynarmic } // namespace Dynarmic

View file

@ -153,6 +153,7 @@ public:
Value PackedSaturatedAddS16(const Value& a, const Value& b); Value PackedSaturatedAddS16(const Value& a, const Value& b);
Value PackedSaturatedSubU16(const Value& a, const Value& b); Value PackedSaturatedSubU16(const Value& a, const Value& b);
Value PackedSaturatedSubS16(const Value& a, const Value& b); Value PackedSaturatedSubS16(const Value& a, const Value& b);
Value PackedAbsDiffSumS8(const Value& a, const Value& b);
Value TransferToFP32(const Value& a); Value TransferToFP32(const Value& a);
Value TransferToFP64(const Value& a); Value TransferToFP64(const Value& a);

View file

@ -95,6 +95,7 @@ OPCODE(PackedSaturatedAddU16, T::U32, T::U32, T::U32
OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 ) OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 ) OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 ) OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 )
// Floating-point operations // Floating-point operations
OPCODE(TransferToFP32, T::F32, T::U32 ) OPCODE(TransferToFP32, T::F32, T::U32 )

View file

@ -66,6 +66,27 @@ bool ArmTranslatorVisitor::arm_USAX(Cond cond, Reg n, Reg d, Reg m) {
return InterpretThisInstruction(); return InterpretThisInstruction();
} }
bool ArmTranslatorVisitor::arm_USAD8(Cond cond, Reg d, Reg m, Reg n) {
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
if (ConditionPassed(cond)) {
auto result = ir.PackedAbsDiffSumS8(ir.GetRegister(n), ir.GetRegister(m));
ir.SetRegister(d, result);
}
return true;
}
bool ArmTranslatorVisitor::arm_USADA8(Cond cond, Reg d, Reg a, Reg m, Reg n){
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
if (ConditionPassed(cond)) {
auto tmp = ir.PackedAbsDiffSumS8(ir.GetRegister(n), ir.GetRegister(m));
auto result = ir.AddWithCarry(ir.GetRegister(a), tmp, ir.Imm1(0));
ir.SetRegister(d, result.result);
}
return true;
}
bool ArmTranslatorVisitor::arm_USUB8(Cond cond, Reg n, Reg d, Reg m) { bool ArmTranslatorVisitor::arm_USUB8(Cond cond, Reg n, Reg d, Reg m) {
if (d == Reg::PC || n == Reg::PC || m == Reg::PC) if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction(); return UnpredictableInstruction();

View file

@ -214,14 +214,8 @@ struct ArmTranslatorVisitor final {
bool arm_SEL(Cond cond, Reg n, Reg d, Reg m); bool arm_SEL(Cond cond, Reg n, Reg d, Reg m);
// Unsigned sum of absolute difference functions // Unsigned sum of absolute difference functions
bool arm_USAD8(Cond cond, Reg d, Reg m, Reg n) { bool arm_USAD8(Cond cond, Reg d, Reg m, Reg n);
UNUSED(cond, d, m, n); bool arm_USADA8(Cond cond, Reg d, Reg a, Reg m, Reg n);
return InterpretThisInstruction();
}
bool arm_USADA8(Cond cond, Reg d, Reg a, Reg m, Reg n) {
UNUSED(cond, d, a, m, n);
return InterpretThisInstruction();
}
// Packing instructions // Packing instructions
bool arm_PKHBT(Cond cond, Reg n, Reg d, Imm5 imm5, Reg m); bool arm_PKHBT(Cond cond, Reg n, Reg d, Imm5 imm5, Reg m);

View file

@ -961,6 +961,29 @@ TEST_CASE("Fuzz ARM parallel instructions", "[JitX64]") {
} }
} }
TEST_CASE("Fuzz ARM sum of absolute differences", "[JitX64]") {
auto validate_d_m_n = [](u32 inst) -> bool {
return Bits<16, 19>(inst) != 15 &&
Bits<8, 11>(inst) != 15 &&
Bits<0, 3>(inst) != 15;
};
auto validate_d_a_m_n = [&](u32 inst) -> bool {
return validate_d_m_n(inst) &&
Bits<12, 15>(inst) != 15;
};
const std::array<InstructionGenerator, 2> differences_instructions = {{
InstructionGenerator("cccc01111000dddd1111mmmm0001nnnn", validate_d_m_n), // USAD8
InstructionGenerator("cccc01111000ddddaaaammmm0001nnnn", validate_d_a_m_n), // USADA8
}};
SECTION("Sum of Absolute Differences (Differences)") {
FuzzJitArm(1, 1, 10000, [&differences_instructions]() -> u32 {
return differences_instructions[RandInt<size_t>(0, differences_instructions.size() - 1)].Generate();
});
}
}
TEST_CASE( "SMUAD", "[JitX64]" ) { TEST_CASE( "SMUAD", "[JitX64]" ) {
Dynarmic::Jit jit{GetUserCallbacks()}; Dynarmic::Jit jit{GetUserCallbacks()};
code_mem.fill({}); code_mem.fill({});