diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 1af9139a..96a672ff 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -331,7 +331,7 @@ INST(SHA1SU0, "SHA1SU0", "01011 //INST(SHA256SU1, "SHA256SU1", "01011110000mmmmm011000nnnnnddddd") INST(SHA1H, "SHA1H", "0101111000101000000010nnnnnddddd") INST(SHA1SU1, "SHA1SU1", "0101111000101000000110nnnnnddddd") -//INST(SHA256SU0, "SHA256SU0", "0101111000101000001010nnnnnddddd") +INST(SHA256SU0, "SHA256SU0", "0101111000101000001010nnnnnddddd") // Data Processing - FP and SIMD - Scalar copy INST(DUP_elt_1, "DUP (element)", "01011110000iiiii000001nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_sha.cpp b/src/frontend/A64/translate/impl/simd_sha.cpp index a973fd0d..81da4bdc 100644 --- a/src/frontend/A64/translate/impl/simd_sha.cpp +++ b/src/frontend/A64/translate/impl/simd_sha.cpp @@ -114,4 +114,34 @@ bool TranslatorVisitor::SHA1H(Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::SHA256SU0(Vec Vn, Vec Vd) { + const IR::U128 d = ir.GetQ(Vd); + const IR::U128 n = ir.GetQ(Vn); + + const IR::U128 t = [&] { + // Shuffle the upper three elements down: [3, 2, 1, 0] -> [0, 3, 2, 1] + const IR::U128 shuffled = ir.VectorShuffleWords(d, 0b00111001); + + return ir.VectorSetElement(32, shuffled, 3, ir.VectorGetElement(32, n, 0)); + }(); + + IR::U128 result = ir.ZeroVector(); + for (size_t i = 0; i < 4; i++) { + const IR::U32 modified_element = [&] { + const IR::U32 element = ir.VectorGetElement(32, t, i); + const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); + const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); + const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); + }(); + + const IR::U32 d_element = ir.VectorGetElement(32, d, i); + result = ir.VectorSetElement(32, result, i, ir.Add(modified_element, d_element)); + } + + ir.SetQ(Vd, result); + return true; +} + } // namespace Dynarmic::A64