diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index f2c56455..ce686728 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -722,14 +722,14 @@ void EmitX64::EmitVectorExtractLower(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const u8 position = args[2].GetImmediateU8(); ASSERT(position % 8 == 0); + code.punpcklqdq(xmm_a, xmm_b); code.psrldq(xmm_a, position / 8); - code.pslldq(xmm_b, (64 - position) / 8); - code.por(xmm_a, xmm_b); + code.movq(xmm_a, xmm_a); ctx.reg_alloc.DefineValue(inst, xmm_a); } diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 6dcd2a63..a75b4037 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -523,7 +523,7 @@ INST(TRN2, "TRN2", "0Q001 INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd") // Data Processing - FP and SIMD - SIMD Extract -//INST(EXT, "EXT", "0Q101110000mmmmm0iiii0nnnnnddddd") +INST(EXT, "EXT", "0Q101110000mmmmm0iiii0nnnnnddddd") // Data Processing - FP and SIMD - SIMD Copy INST(DUP_elt_2, "DUP (element)", "0Q001110000iiiii000001nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_extract.cpp b/src/frontend/A64/translate/impl/simd_extract.cpp index 70c58de0..78719316 100644 --- a/src/frontend/A64/translate/impl/simd_extract.cpp +++ b/src/frontend/A64/translate/impl/simd_extract.cpp @@ -18,7 +18,7 @@ bool TranslatorVisitor::EXT(bool Q, Vec Vm, Imm<4> imm4, Vec Vn, Vec Vd) { const IR::U128 lo = V(datasize, Vn); const IR::U128 hi = V(datasize, Vm); - const IR::U128 result = ir.VectorExtract(lo, hi, position); + const IR::U128 result = datasize == 64 ? ir.VectorExtractLower(lo, hi, position) : ir.VectorExtract(lo, hi, position); V(datasize, Vd, result);