diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index a2832c65..337f20a3 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -874,7 +874,7 @@ INST(SM3SS1, "SM3SS1", "11001 // Data Processing - FP and SIMD - SHA512 two register INST(SHA512SU0, "SHA512SU0", "1100111011000000100000nnnnnddddd") -//INST(SM4E, "SM4E", "1100111011000000100001nnnnnddddd") +INST(SM4E, "SM4E", "1100111011000000100001nnnnnddddd") // Data Processing - FP and SIMD - Conversion between floating point and fixed point //INST(SCVTF_float_fix, "SCVTF (scalar, fixed-point)", "z0011110yy000010ppppppnnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_sha512.cpp b/src/frontend/A64/translate/impl/simd_sha512.cpp index 976f14c4..85040232 100644 --- a/src/frontend/A64/translate/impl/simd_sha512.cpp +++ b/src/frontend/A64/translate/impl/simd_sha512.cpp @@ -239,4 +239,41 @@ bool TranslatorVisitor::SM3PARTW2(Vec Vm, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::SM4E(Vec Vn, Vec Vd) { + const IR::U128 n = ir.GetQ(Vn); + IR::U128 roundresult = ir.GetQ(Vd); + + for (size_t i = 0; i < 4; i++) { + const IR::U32 round_key = ir.VectorGetElement(32, n, i); + + const IR::U32 upper_round = ir.VectorGetElement(32, roundresult, 3); + const IR::U32 before_upper_round = ir.VectorGetElement(32, roundresult, 2); + const IR::U32 after_lower_round = ir.VectorGetElement(32, roundresult, 1); + + IR::U128 intval_vec = ir.ZeroExtendToQuad(ir.Eor(upper_round, ir.Eor(before_upper_round, ir.Eor(after_lower_round, round_key)))); + + for (size_t i = 0; i < 4; i++) { + const IR::U8 byte_element = ir.VectorGetElement(8, intval_vec, i); + intval_vec = ir.VectorSetElement(8, intval_vec, i, ir.SM4AccessSubstitutionBox(byte_element)); + } + + const IR::U32 intval = [&] { + const IR::U32 low_word = ir.VectorGetElement(32, intval_vec, 0); + const IR::U32 tmp1 = ir.RotateRight(low_word, ir.Imm8(30)); + const IR::U32 tmp2 = ir.RotateRight(low_word, ir.Imm8(22)); + const IR::U32 tmp3 = ir.RotateRight(low_word, ir.Imm8(14)); + const IR::U32 tmp4 = ir.RotateRight(low_word, ir.Imm8(8)); + + const IR::U32 tmp5 = ir.Eor(low_word, ir.Eor(tmp1, ir.Eor(tmp2, ir.Eor(tmp3, tmp4)))); + return ir.Eor(tmp5, ir.VectorGetElement(32, roundresult, 0)); + }(); + + roundresult = ir.VectorShuffleWords(roundresult, 0b00111001); + roundresult = ir.VectorSetElement(32, roundresult, 3, intval); + } + + ir.SetQ(Vd, roundresult); + return true; +} + } // namespace Dynarmic::A64