diff --git a/src/frontend/A32/decoder/thumb32.inc b/src/frontend/A32/decoder/thumb32.inc index 63b1942b..d7b8b953 100644 --- a/src/frontend/A32/decoder/thumb32.inc +++ b/src/frontend/A32/decoder/thumb32.inc @@ -264,17 +264,17 @@ INST(thumb32_CLZ, "CLZ", "111110101011nnnn1111dd INST(thumb32_MUL, "MUL", "111110110000nnnn1111dddd0000mmmm") INST(thumb32_MLA, "MLA", "111110110000nnnnaaaadddd0000mmmm") INST(thumb32_MLS, "MLS", "111110110000nnnnaaaadddd0001mmmm") -//INST(thumb32_SMULXY, "SMULXY", "111110110001----1111----00------") -//INST(thumb32_SMLAXY, "SMLAXY", "111110110001------------00------") -//INST(thumb32_SMUAD, "SMUAD", "111110110010----1111----000-----") -//INST(thumb32_SMLAD, "SMLAD", "111110110010------------000-----") -//INST(thumb32_SMULWY, "SMULWY", "111110110011----1111----000-----") -//INST(thumb32_SMLAWY, "SMLAWY", "111110110011------------000-----") -//INST(thumb32_SMUSD, "SMUSD", "111110110100----1111----000-----") -//INST(thumb32_SMLSD, "SMLSD", "111110110100------------000-----") -//INST(thumb32_SMMUL, "SMMUL", "111110110101----1111----000-----") -//INST(thumb32_SMMLA, "SMMLA", "111110110101------------000-----") -//INST(thumb32_SMMLS, "SMMLS", "111110110110------------000-----") +INST(thumb32_SMULXY, "SMULXY", "111110110001nnnn1111dddd00NMmmmm") +INST(thumb32_SMLAXY, "SMLAXY", "111110110001nnnnaaaadddd00NMmmmm") +INST(thumb32_SMUAD, "SMUAD", "111110110010nnnn1111dddd000Mmmmm") +INST(thumb32_SMLAD, "SMLAD", "111110110010nnnnaaaadddd000Xmmmm") +INST(thumb32_SMULWY, "SMULWY", "111110110011nnnn1111dddd000Mmmmm") +INST(thumb32_SMLAWY, "SMLAWY", "111110110011nnnnaaaadddd000Mmmmm") +INST(thumb32_SMUSD, "SMUSD", "111110110100nnnn1111dddd000Mmmmm") +INST(thumb32_SMLSD, "SMLSD", "111110110100nnnnaaaadddd000Xmmmm") +INST(thumb32_SMMUL, "SMMUL", "111110110101nnnn1111dddd000Rmmmm") +INST(thumb32_SMMLA, "SMMLA", "111110110101nnnnaaaadddd000Rmmmm") +INST(thumb32_SMMLS, "SMMLS", "111110110110nnnnaaaadddd000Rmmmm") INST(thumb32_USAD8, "USAD8", "111110110111nnnn1111dddd0000mmmm") INST(thumb32_USADA8, "USADA8", "111110110111nnnnaaaadddd0000mmmm") diff --git a/src/frontend/A32/translate/impl/thumb32_multiply.cpp b/src/frontend/A32/translate/impl/thumb32_multiply.cpp index ecaee5a7..13143949 100644 --- a/src/frontend/A32/translate/impl/thumb32_multiply.cpp +++ b/src/frontend/A32/translate/impl/thumb32_multiply.cpp @@ -8,7 +8,7 @@ namespace Dynarmic::A32 { bool ThumbTranslatorVisitor::thumb32_MLA(Reg n, Reg a, Reg d, Reg m) { - if (d == Reg::PC || n == Reg::PC || m == Reg::PC) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC || a == Reg::PC) { return UnpredictableInstruction(); } @@ -22,7 +22,7 @@ bool ThumbTranslatorVisitor::thumb32_MLA(Reg n, Reg a, Reg d, Reg m) { } bool ThumbTranslatorVisitor::thumb32_MLS(Reg n, Reg a, Reg d, Reg m) { - if (d == Reg::PC || n == Reg::PC || m == Reg::PC) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC || a == Reg::PC) { return UnpredictableInstruction(); } @@ -48,6 +48,239 @@ bool ThumbTranslatorVisitor::thumb32_MUL(Reg n, Reg d, Reg m) { return true; } +bool ThumbTranslatorVisitor::thumb32_SMLAD(Reg n, Reg a, Reg d, bool X, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC || a == Reg::PC) { + return UnpredictableInstruction(); + } + + const IR::U32 n32 = ir.GetRegister(n); + const IR::U32 m32 = ir.GetRegister(m); + const IR::U32 n_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(n32)); + const IR::U32 n_hi = ir.ArithmeticShiftRight(n32, ir.Imm8(16), ir.Imm1(0)).result; + + IR::U32 m_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32)); + IR::U32 m_hi = ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result; + if (X) { + std::swap(m_lo, m_hi); + } + + const IR::U32 product_lo = ir.Mul(n_lo, m_lo); + const IR::U32 product_hi = ir.Mul(n_hi, m_hi); + const IR::U32 addend = ir.GetRegister(a); + + auto result_overflow = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0)); + ir.OrQFlag(result_overflow.overflow); + result_overflow = ir.AddWithCarry(result_overflow.result, addend, ir.Imm1(0)); + + ir.SetRegister(d, result_overflow.result); + ir.OrQFlag(result_overflow.overflow); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_SMLSD(Reg n, Reg a, Reg d, bool X, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC || a == Reg::PC) { + return UnpredictableInstruction(); + } + + const IR::U32 n32 = ir.GetRegister(n); + const IR::U32 m32 = ir.GetRegister(m); + const IR::U32 n_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(n32)); + const IR::U32 n_hi = ir.ArithmeticShiftRight(n32, ir.Imm8(16), ir.Imm1(0)).result; + + IR::U32 m_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32)); + IR::U32 m_hi = ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result; + if (X) { + std::swap(m_lo, m_hi); + } + + const IR::U32 product_lo = ir.Mul(n_lo, m_lo); + const IR::U32 product_hi = ir.Mul(n_hi, m_hi); + const IR::U32 addend = ir.GetRegister(a); + const IR::U32 product = ir.Sub(product_lo, product_hi); + auto result_overflow = ir.AddWithCarry(product, addend, ir.Imm1(0)); + + ir.SetRegister(d, result_overflow.result); + ir.OrQFlag(result_overflow.overflow); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_SMLAXY(Reg n, Reg a, Reg d, bool N, bool M, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC || a == Reg::PC) { + return UnpredictableInstruction(); + } + + const IR::U32 n32 = ir.GetRegister(n); + const IR::U32 m32 = ir.GetRegister(m); + const IR::U32 n16 = N ? ir.ArithmeticShiftRight(n32, ir.Imm8(16), ir.Imm1(0)).result + : ir.SignExtendHalfToWord(ir.LeastSignificantHalf(n32)); + const IR::U32 m16 = M ? ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result + : ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32)); + const IR::U32 product = ir.Mul(n16, m16); + const auto result_overflow = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0)); + + ir.SetRegister(d, result_overflow.result); + ir.OrQFlag(result_overflow.overflow); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_SMMLA(Reg n, Reg a, Reg d, bool R, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC || a == Reg::PC) { + return UnpredictableInstruction(); + } + + const auto n64 = ir.SignExtendWordToLong(ir.GetRegister(n)); + const auto m64 = ir.SignExtendWordToLong(ir.GetRegister(m)); + const auto a64 = ir.Pack2x32To1x64(ir.Imm32(0), ir.GetRegister(a)); + const auto temp = ir.Add(a64, ir.Mul(n64, m64)); + const auto result_carry = ir.MostSignificantWord(temp); + auto result = result_carry.result; + if (R) { + result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result; + } + + ir.SetRegister(d, result); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_SMMLS(Reg n, Reg a, Reg d, bool R, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC || a == Reg::PC) { + return UnpredictableInstruction(); + } + + const auto n64 = ir.SignExtendWordToLong(ir.GetRegister(n)); + const auto m64 = ir.SignExtendWordToLong(ir.GetRegister(m)); + const auto a64 = ir.Pack2x32To1x64(ir.Imm32(0), ir.GetRegister(a)); + const auto temp = ir.Sub(a64, ir.Mul(n64, m64)); + const auto result_carry = ir.MostSignificantWord(temp); + auto result = result_carry.result; + if (R) { + result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result; + } + + ir.SetRegister(d, result); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_SMMUL(Reg n, Reg d, bool R, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) { + return UnpredictableInstruction(); + } + + const auto n64 = ir.SignExtendWordToLong(ir.GetRegister(n)); + const auto m64 = ir.SignExtendWordToLong(ir.GetRegister(m)); + const auto product = ir.Mul(n64, m64); + const auto result_carry = ir.MostSignificantWord(product); + auto result = result_carry.result; + if (R) { + result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result; + } + + ir.SetRegister(d, result); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_SMUAD(Reg n, Reg d, bool M, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) { + return UnpredictableInstruction(); + } + + const IR::U32 n32 = ir.GetRegister(n); + const IR::U32 m32 = ir.GetRegister(m); + const IR::U32 n_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(n32)); + const IR::U32 n_hi = ir.ArithmeticShiftRight(n32, ir.Imm8(16), ir.Imm1(0)).result; + + IR::U32 m_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32)); + IR::U32 m_hi = ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result; + if (M) { + std::swap(m_lo, m_hi); + } + + const IR::U32 product_lo = ir.Mul(n_lo, m_lo); + const IR::U32 product_hi = ir.Mul(n_hi, m_hi); + const auto result_overflow = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0)); + + ir.SetRegister(d, result_overflow.result); + ir.OrQFlag(result_overflow.overflow); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_SMUSD(Reg n, Reg d, bool M, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) { + return UnpredictableInstruction(); + } + + const IR::U32 n32 = ir.GetRegister(n); + const IR::U32 m32 = ir.GetRegister(m); + const IR::U32 n_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(n32)); + const IR::U32 n_hi = ir.ArithmeticShiftRight(n32, ir.Imm8(16), ir.Imm1(0)).result; + + IR::U32 m_lo = ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32)); + IR::U32 m_hi = ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result; + if (M) { + std::swap(m_lo, m_hi); + } + + const IR::U32 product_lo = ir.Mul(n_lo, m_lo); + const IR::U32 product_hi = ir.Mul(n_hi, m_hi); + const IR::U32 result = ir.Sub(product_lo, product_hi); + + ir.SetRegister(d, result); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_SMULXY(Reg n, Reg d, bool N, bool M, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) { + return UnpredictableInstruction(); + } + + const auto n32 = ir.GetRegister(n); + const auto m32 = ir.GetRegister(m); + const auto n16 = N ? ir.ArithmeticShiftRight(n32, ir.Imm8(16), ir.Imm1(0)).result + : ir.SignExtendHalfToWord(ir.LeastSignificantHalf(n32)); + const auto m16 = M ? ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result + : ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32)); + const auto result = ir.Mul(n16, m16); + + ir.SetRegister(d, result); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_SMLAWY(Reg n, Reg a, Reg d, bool M, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC || a == Reg::PC) { + return UnpredictableInstruction(); + } + + const IR::U64 n32 = ir.SignExtendWordToLong(ir.GetRegister(n)); + IR::U32 m32 = ir.GetRegister(m); + if (M) { + m32 = ir.LogicalShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result; + } + const IR::U64 m16 = ir.SignExtendWordToLong(ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32))); + const auto product = ir.LeastSignificantWord(ir.LogicalShiftRight(ir.Mul(n32, m16), ir.Imm8(16))); + const auto result_overflow = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0)); + + ir.SetRegister(d, result_overflow.result); + ir.OrQFlag(result_overflow.overflow); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_SMULWY(Reg n, Reg d, bool M, Reg m) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) { + return UnpredictableInstruction(); + } + + const IR::U64 n32 = ir.SignExtendWordToLong(ir.GetRegister(n)); + IR::U32 m32 = ir.GetRegister(m); + if (M) { + m32 = ir.LogicalShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result; + } + const IR::U64 m16 = ir.SignExtendWordToLong(ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32))); + const auto result = ir.LogicalShiftRight(ir.Mul(n32, m16), ir.Imm8(16)); + + ir.SetRegister(d, ir.LeastSignificantWord(result)); + return true; +} + bool ThumbTranslatorVisitor::thumb32_USAD8(Reg n, Reg d, Reg m) { if (d == Reg::PC || n == Reg::PC || m == Reg::PC) { return UnpredictableInstruction(); @@ -62,7 +295,7 @@ bool ThumbTranslatorVisitor::thumb32_USAD8(Reg n, Reg d, Reg m) { } bool ThumbTranslatorVisitor::thumb32_USADA8(Reg n, Reg a, Reg d, Reg m) { - if (d == Reg::PC || n == Reg::PC || m == Reg::PC) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC || a == Reg::PC) { return UnpredictableInstruction(); } diff --git a/src/frontend/A32/translate/impl/translate_thumb.h b/src/frontend/A32/translate/impl/translate_thumb.h index 49f4675e..0ddec662 100644 --- a/src/frontend/A32/translate/impl/translate_thumb.h +++ b/src/frontend/A32/translate/impl/translate_thumb.h @@ -137,6 +137,17 @@ struct ThumbTranslatorVisitor final { bool thumb32_MLA(Reg n, Reg a, Reg d, Reg m); bool thumb32_MLS(Reg n, Reg a, Reg d, Reg m); bool thumb32_MUL(Reg n, Reg d, Reg m); + bool thumb32_SMLAD(Reg n, Reg a, Reg d, bool X, Reg m); + bool thumb32_SMLAXY(Reg n, Reg a, Reg d, bool N, bool M, Reg m); + bool thumb32_SMLAWY(Reg n, Reg a, Reg d, bool M, Reg m); + bool thumb32_SMLSD(Reg n, Reg a, Reg d, bool X, Reg m); + bool thumb32_SMMLA(Reg n, Reg a, Reg d, bool R, Reg m); + bool thumb32_SMMLS(Reg n, Reg a, Reg d, bool R, Reg m); + bool thumb32_SMMUL(Reg n, Reg d, bool R, Reg m); + bool thumb32_SMUAD(Reg n, Reg d, bool M, Reg m); + bool thumb32_SMUSD(Reg n, Reg d, bool M, Reg m); + bool thumb32_SMULXY(Reg n, Reg d, bool N, bool M, Reg m); + bool thumb32_SMULWY(Reg n, Reg d, bool M, Reg m); bool thumb32_USAD8(Reg n, Reg d, Reg m); bool thumb32_USADA8(Reg n, Reg a, Reg d, Reg m);