Merge pull request #574 from lioncash/multiply2

thumb32: Implement long multiply and divide instructions
This commit is contained in:
merry 2021-02-09 20:37:16 +00:00 committed by GitHub
commit fe761b2c61
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 245 additions and 10 deletions

View file

@ -154,6 +154,7 @@ if ("A32" IN_LIST DYNARMIC_FRONTENDS)
frontend/A32/translate/impl/synchronization.cpp frontend/A32/translate/impl/synchronization.cpp
frontend/A32/translate/impl/thumb16.cpp frontend/A32/translate/impl/thumb16.cpp
frontend/A32/translate/impl/thumb32.cpp frontend/A32/translate/impl/thumb32.cpp
frontend/A32/translate/impl/thumb32_long_multiply.cpp
frontend/A32/translate/impl/thumb32_misc.cpp frontend/A32/translate/impl/thumb32_misc.cpp
frontend/A32/translate/impl/thumb32_multiply.cpp frontend/A32/translate/impl/thumb32_multiply.cpp
frontend/A32/translate/impl/thumb32_parallel.cpp frontend/A32/translate/impl/thumb32_parallel.cpp

View file

@ -279,16 +279,16 @@ INST(thumb32_USAD8, "USAD8", "111110110111nnnn1111dd
INST(thumb32_USADA8, "USADA8", "111110110111nnnnaaaadddd0000mmmm") INST(thumb32_USADA8, "USADA8", "111110110111nnnnaaaadddd0000mmmm")
// Long Multiply, Long Multiply Accumulate, and Divide // Long Multiply, Long Multiply Accumulate, and Divide
//INST(thumb32_SMULL, "SMULL", "111110111000------------0000----") INST(thumb32_SMULL, "SMULL", "111110111000nnnnllllhhhh0000mmmm")
//INST(thumb32_SDIV, "SDIV", "111110111001------------1111----") INST(thumb32_SDIV, "SDIV", "111110111001nnnn1111dddd1111mmmm")
//INST(thumb32_UMULL, "UMULL", "111110111010------------0000----") INST(thumb32_UMULL, "UMULL", "111110111010nnnnllllhhhh0000mmmm")
//INST(thumb32_UDIV, "UDIV", "111110111011------------1111----") INST(thumb32_UDIV, "UDIV", "111110111011nnnn1111dddd1111mmmm")
//INST(thumb32_SMLAL, "SMLAL", "111110111100------------0000----") INST(thumb32_SMLAL, "SMLAL", "111110111100nnnnllllhhhh0000mmmm")
//INST(thumb32_SMLALXY, "SMLALXY", "111110111100------------10------") INST(thumb32_SMLALXY, "SMLALXY", "111110111100nnnnllllhhhh10NMmmmm")
//INST(thumb32_SMLALD, "SMLALD", "111110111100------------110-----") INST(thumb32_SMLALD, "SMLALD", "111110111100nnnnllllhhhh110Mmmmm")
//INST(thumb32_SMLSLD, "SMLSLD", "111110111101------------110-----") INST(thumb32_SMLSLD, "SMLSLD", "111110111101nnnnllllhhhh110Mmmmm")
//INST(thumb32_UMLAL, "UMLAL", "111110111110------------0000----") INST(thumb32_UMLAL, "UMLAL", "111110111110nnnnllllhhhh0000mmmm")
//INST(thumb32_UMAAL, "UMAAL", "111110111110------------0110----") INST(thumb32_UMAAL, "UMAAL", "111110111110nnnnllllhhhh0110mmmm")
// Coprocessor // Coprocessor
//INST(thumb32_MCRR2, "MCRR2", "111111000100--------------------") //INST(thumb32_MCRR2, "MCRR2", "111111000100--------------------")

View file

@ -0,0 +1,222 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2021 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "frontend/A32/translate/impl/translate_thumb.h"
namespace Dynarmic::A32 {
namespace {
using DivideFunction = IR::U32U64 (IREmitter::*)(const IR::U32U64&, const IR::U32U64&);
bool DivideOperation(ThumbTranslatorVisitor& v, Reg d, Reg m, Reg n, DivideFunction fn) {
if (d == Reg::PC || m == Reg::PC || n == Reg::PC) {
return v.UnpredictableInstruction();
}
const IR::U32 operand1 = v.ir.GetRegister(n);
const IR::U32 operand2 = v.ir.GetRegister(m);
const IR::U32 result = (v.ir.*fn)(operand1, operand2);
v.ir.SetRegister(d, result);
return true;
}
} // Anonymous namespace
bool ThumbTranslatorVisitor::thumb32_SDIV(Reg n, Reg d, Reg m) {
return DivideOperation(*this, d, m, n, &IREmitter::SignedDiv);
}
bool ThumbTranslatorVisitor::thumb32_SMLAL(Reg n, Reg dLo, Reg dHi, Reg m) {
if (dLo == Reg::PC || dHi == Reg::PC || n == Reg::PC || m == Reg::PC) {
return UnpredictableInstruction();
}
if (dHi == dLo) {
return UnpredictableInstruction();
}
const auto n64 = ir.SignExtendWordToLong(ir.GetRegister(n));
const auto m64 = ir.SignExtendWordToLong(ir.GetRegister(m));
const auto product = ir.Mul(n64, m64);
const auto addend = ir.Pack2x32To1x64(ir.GetRegister(dLo), ir.GetRegister(dHi));
const auto result = ir.Add(product, addend);
const auto lo = ir.LeastSignificantWord(result);
const auto hi = ir.MostSignificantWord(result).result;
ir.SetRegister(dLo, lo);
ir.SetRegister(dHi, hi);
return true;
}
bool ThumbTranslatorVisitor::thumb32_SMLALD(Reg n, Reg dLo, Reg dHi, bool M, Reg m) {
if (dLo == Reg::PC || dHi == Reg::PC || n == Reg::PC || m == Reg::PC) {
return UnpredictableInstruction();
}
if (dHi == dLo) {
return UnpredictableInstruction();
}
const IR::U32 n32 = ir.GetRegister(n);
const IR::U32 m32 = ir.GetRegister(m);
const IR::U32 n_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(n32));
const IR::U32 n_hi = ir.ArithmeticShiftRight(n32, ir.Imm8(16), ir.Imm1(0)).result;
IR::U32 m_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32));
IR::U32 m_hi = ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result;
if (M) {
std::swap(m_lo, m_hi);
}
const IR::U64 product_lo = ir.SignExtendWordToLong(ir.Mul(n_lo, m_lo));
const IR::U64 product_hi = ir.SignExtendWordToLong(ir.Mul(n_hi, m_hi));
const auto addend = ir.Pack2x32To1x64(ir.GetRegister(dLo), ir.GetRegister(dHi));
const auto result = ir.Add(ir.Add(product_lo, product_hi), addend);
ir.SetRegister(dLo, ir.LeastSignificantWord(result));
ir.SetRegister(dHi, ir.MostSignificantWord(result).result);
return true;
}
bool ThumbTranslatorVisitor::thumb32_SMLALXY(Reg n, Reg dLo, Reg dHi, bool N, bool M, Reg m) {
if (dLo == Reg::PC || dHi == Reg::PC || n == Reg::PC || m == Reg::PC) {
return UnpredictableInstruction();
}
if (dHi == dLo) {
return UnpredictableInstruction();
}
const IR::U32 n32 = ir.GetRegister(n);
const IR::U32 m32 = ir.GetRegister(m);
const IR::U32 n16 = N ? ir.ArithmeticShiftRight(n32, ir.Imm8(16), ir.Imm1(0)).result
: ir.SignExtendHalfToWord(ir.LeastSignificantHalf(n32));
const IR::U32 m16 = M ? ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result
: ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32));
const IR::U64 product = ir.SignExtendWordToLong(ir.Mul(n16, m16));
const auto addend = ir.Pack2x32To1x64(ir.GetRegister(dLo), ir.GetRegister(dHi));
const auto result = ir.Add(product, addend);
ir.SetRegister(dLo, ir.LeastSignificantWord(result));
ir.SetRegister(dHi, ir.MostSignificantWord(result).result);
return true;
}
bool ThumbTranslatorVisitor::thumb32_SMLSLD(Reg n, Reg dLo, Reg dHi, bool M, Reg m) {
if (dLo == Reg::PC || dHi == Reg::PC || n == Reg::PC || m == Reg::PC) {
return UnpredictableInstruction();
}
if (dHi == dLo) {
return UnpredictableInstruction();
}
const IR::U32 n32 = ir.GetRegister(n);
const IR::U32 m32 = ir.GetRegister(m);
const IR::U32 n_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(n32));
const IR::U32 n_hi = ir.ArithmeticShiftRight(n32, ir.Imm8(16), ir.Imm1(0)).result;
IR::U32 m_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32));
IR::U32 m_hi = ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result;
if (M) {
std::swap(m_lo, m_hi);
}
const IR::U64 product_lo = ir.SignExtendWordToLong(ir.Mul(n_lo, m_lo));
const IR::U64 product_hi = ir.SignExtendWordToLong(ir.Mul(n_hi, m_hi));
const auto addend = ir.Pack2x32To1x64(ir.GetRegister(dLo), ir.GetRegister(dHi));
const auto result = ir.Add(ir.Sub(product_lo, product_hi), addend);
ir.SetRegister(dLo, ir.LeastSignificantWord(result));
ir.SetRegister(dHi, ir.MostSignificantWord(result).result);
return true;
}
bool ThumbTranslatorVisitor::thumb32_SMULL(Reg n, Reg dLo, Reg dHi, Reg m) {
if (dLo == Reg::PC || dHi == Reg::PC || n == Reg::PC || m == Reg::PC) {
return UnpredictableInstruction();
}
if (dHi == dLo) {
return UnpredictableInstruction();
}
const auto n64 = ir.SignExtendWordToLong(ir.GetRegister(n));
const auto m64 = ir.SignExtendWordToLong(ir.GetRegister(m));
const auto result = ir.Mul(n64, m64);
const auto lo = ir.LeastSignificantWord(result);
const auto hi = ir.MostSignificantWord(result).result;
ir.SetRegister(dLo, lo);
ir.SetRegister(dHi, hi);
return true;
}
bool ThumbTranslatorVisitor::thumb32_UDIV(Reg n, Reg d, Reg m) {
return DivideOperation(*this, d, m, n, &IREmitter::UnsignedDiv);
}
bool ThumbTranslatorVisitor::thumb32_UMLAL(Reg n, Reg dLo, Reg dHi, Reg m) {
if (dLo == Reg::PC || dHi == Reg::PC || n == Reg::PC || m == Reg::PC) {
return UnpredictableInstruction();
}
if (dHi == dLo) {
return UnpredictableInstruction();
}
const auto n64 = ir.ZeroExtendWordToLong(ir.GetRegister(n));
const auto m64 = ir.ZeroExtendWordToLong(ir.GetRegister(m));
const auto product = ir.Mul(n64, m64);
const auto addend = ir.Pack2x32To1x64(ir.GetRegister(dLo), ir.GetRegister(dHi));
const auto result = ir.Add(product, addend);
const auto lo = ir.LeastSignificantWord(result);
const auto hi = ir.MostSignificantWord(result).result;
ir.SetRegister(dLo, lo);
ir.SetRegister(dHi, hi);
return true;
}
bool ThumbTranslatorVisitor::thumb32_UMULL(Reg n, Reg dLo, Reg dHi, Reg m) {
if (dLo == Reg::PC || dHi == Reg::PC || n == Reg::PC || m == Reg::PC) {
return UnpredictableInstruction();
}
if (dHi == dLo) {
return UnpredictableInstruction();
}
const auto n64 = ir.ZeroExtendWordToLong(ir.GetRegister(n));
const auto m64 = ir.ZeroExtendWordToLong(ir.GetRegister(m));
const auto result = ir.Mul(n64, m64);
const auto lo = ir.LeastSignificantWord(result);
const auto hi = ir.MostSignificantWord(result).result;
ir.SetRegister(dLo, lo);
ir.SetRegister(dHi, hi);
return true;
}
bool ThumbTranslatorVisitor::thumb32_UMAAL(Reg n, Reg dLo, Reg dHi, Reg m) {
if (dLo == Reg::PC || dHi == Reg::PC || n == Reg::PC || m == Reg::PC) {
return UnpredictableInstruction();
}
if (dHi == dLo) {
return UnpredictableInstruction();
}
const auto lo64 = ir.ZeroExtendWordToLong(ir.GetRegister(dLo));
const auto hi64 = ir.ZeroExtendWordToLong(ir.GetRegister(dHi));
const auto n64 = ir.ZeroExtendWordToLong(ir.GetRegister(n));
const auto m64 = ir.ZeroExtendWordToLong(ir.GetRegister(m));
const auto result = ir.Add(ir.Add(ir.Mul(n64, m64), hi64), lo64);
ir.SetRegister(dLo, ir.LeastSignificantWord(result));
ir.SetRegister(dHi, ir.MostSignificantWord(result).result);
return true;
}
} // namespace Dynarmic::A32

View file

@ -121,6 +121,18 @@ struct ThumbTranslatorVisitor final {
bool thumb32_BLX_imm(Imm<11> hi, Imm<11> lo); bool thumb32_BLX_imm(Imm<11> hi, Imm<11> lo);
bool thumb32_UDF(); bool thumb32_UDF();
// thumb32 long multiply, long multiply accumulate, and divide instructions
bool thumb32_SDIV(Reg n, Reg d, Reg m);
bool thumb32_SMLAL(Reg n, Reg dLo, Reg dHi, Reg m);
bool thumb32_SMLALD(Reg n, Reg dLo, Reg dHi, bool M, Reg m);
bool thumb32_SMLALXY(Reg n, Reg dLo, Reg dHi, bool N, bool M, Reg m);
bool thumb32_SMLSLD(Reg n, Reg dLo, Reg dHi, bool M, Reg m);
bool thumb32_SMULL(Reg n, Reg dLo, Reg dHi, Reg m);
bool thumb32_UDIV(Reg n, Reg d, Reg m);
bool thumb32_UMAAL(Reg n, Reg dLo, Reg dHi, Reg m);
bool thumb32_UMLAL(Reg n, Reg dLo, Reg dHi, Reg m);
bool thumb32_UMULL(Reg n, Reg dLo, Reg dHi, Reg m);
// thumb32 miscellaneous instructions // thumb32 miscellaneous instructions
bool thumb32_CLZ(Reg n, Reg d, Reg m); bool thumb32_CLZ(Reg n, Reg d, Reg m);
bool thumb32_QADD(Reg n, Reg d, Reg m); bool thumb32_QADD(Reg n, Reg d, Reg m);