diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp index c835d3de..a50afe9e 100644 --- a/src/backend/x64/emit_x64_vector.cpp +++ b/src/backend/x64/emit_x64_vector.cpp @@ -3194,7 +3194,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR:: const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(); code.vpsllq(lower_result, even, 32); - code.vblendps(lower_result, lower_result, even, 0b0101); + code.vblendps(lower_result, lower_result, odds, 0b0101); ctx.reg_alloc.DefineValue(lower_inst, lower_result); ctx.EraseInstruction(lower_inst); diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index c31734f2..4dc3e619 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -763,7 +763,7 @@ INST(MLS_vec, "MLS (vector)", "0Q101 INST(PMUL, "PMUL", "0Q101110zz1mmmmm100111nnnnnddddd") INST(UMAXP, "UMAXP", "0Q101110zz1mmmmm101001nnnnnddddd") INST(UMINP, "UMINP", "0Q101110zz1mmmmm101011nnnnnddddd") -//INST(SQRDMULH_vec_2, "SQRDMULH (vector)", "0Q101110zz1mmmmm101101nnnnnddddd") +INST(SQRDMULH_vec_2, "SQRDMULH (vector)", "0Q101110zz1mmmmm101101nnnnnddddd") INST(FMAXNMP_vec_2, "FMAXNMP (vector)", "0Q1011100z1mmmmm110001nnnnnddddd") //INST(FMLAL_vec_2, "FMLAL, FMLAL2 (vector)", "0Q1011100z1mmmmm110011nnnnnddddd") INST(FADDP_vec_2, "FADDP (vector)", "0Q1011100z1mmmmm110101nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_three_same.cpp b/src/frontend/A64/translate/impl/simd_three_same.cpp index eb2594ca..f9c35262 100644 --- a/src/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_three_same.cpp @@ -438,6 +438,23 @@ bool TranslatorVisitor::SQDMULH_vec_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec V return true; } +bool TranslatorVisitor::SQRDMULH_vec_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { + if (size == 0b00 || size == 0b11) { + return ReservedValue(); + } + + const size_t esize = 8 << size.ZeroExtend(); + const size_t datasize = Q ? 128 : 64; + + const IR::U128 operand1 = V(datasize, Vn); + const IR::U128 operand2 = V(datasize, Vm); + const IR::UpperAndLower multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, operand2); + const IR::U128 result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast(esize - 1))); + + V(datasize, Vd, result); + return true; +} + bool TranslatorVisitor::ADD_vector(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { if (size == 0b11 && !Q) return ReservedValue(); const size_t esize = 8 << size.ZeroExtend();