diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp
index 27839e83..c835d3de 100644
--- a/src/backend/x64/emit_x64_vector.cpp
+++ b/src/backend/x64/emit_x64_vector.cpp
@@ -3262,6 +3262,85 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::
     }
 }
 
+void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
+
+    code.punpcklwd(x, x);
+    code.punpcklwd(y, y);
+    code.pmaddwd(x, y);
+
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
+        code.vpcmpeqd(y, x, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
+        code.vpxor(x, x, y);
+    } else {
+        code.movdqa(y, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
+        code.pcmpeqd(y, x);
+        code.pxor(x, y);
+    }
+
+    const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
+    code.pmovmskb(bit, y);
+    code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
+
+    ctx.reg_alloc.DefineValue(inst, x);
+}
+
+void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
+
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
+        code.vpmovsxdq(x, x);
+        code.vpmovsxdq(y, y);
+        code.vpmuldq(x, x, y);
+        code.vpaddq(x, x, x);
+    } else {
+        const Xbyak::Reg64 a = ctx.reg_alloc.ScratchGpr();
+        const Xbyak::Reg64 b = ctx.reg_alloc.ScratchGpr();
+        const Xbyak::Reg64 c = ctx.reg_alloc.ScratchGpr();
+        const Xbyak::Reg64 d = ctx.reg_alloc.ScratchGpr();
+
+        code.movq(c, x);
+        code.movq(d, y);
+        code.movsxd(a, c.cvt32());
+        code.movsxd(b, d.cvt32());
+        code.sar(c, 32);
+        code.sar(d, 32);
+        code.imul(a, b);
+        code.imul(c, d);
+        code.movq(x, a);
+        code.movq(y, c);
+        code.punpcklqdq(x, y);
+        code.paddq(x, x);
+
+        ctx.reg_alloc.Release(a);
+        ctx.reg_alloc.Release(b);
+        ctx.reg_alloc.Release(c);
+        ctx.reg_alloc.Release(d);
+    }
+
+    const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
+        code.vpcmpeqq(y, x, code.MConst(xword, 0x8000000000000000, 0x8000000000000000));
+        code.vpxor(x, x, y);
+        code.vpmovmskb(bit, y);
+    } else {
+        code.movdqa(y, code.MConst(xword, 0x8000000000000000, 0x8000000000000000));
+        code.pcmpeqd(y, x);
+        code.shufps(y, y, 0b11110101);
+        code.pxor(x, y);
+        code.pmovmskb(bit, y);
+    }
+    code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
+
+    ctx.reg_alloc.DefineValue(inst, x);
+}
+
 static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
     auto args = ctx.reg_alloc.GetArgumentInfo(inst);
     const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(args[0]);
diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp
index 7b73efa2..e81b528b 100644
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@@ -1594,6 +1594,17 @@ UpperAndLower IREmitter::VectorSignedSaturatedDoublingMultiply(size_t esize, con
     };
 }
 
+U128 IREmitter::VectorSignedSaturatedDoublingMultiplyLong(size_t esize, const U128& a, const U128& b) {
+    switch (esize) {
+    case 16:
+        return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyLong16, a, b);
+    case 32:
+        return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyLong32, a, b);
+    }
+    UNREACHABLE();
+    return {};
+}
+
 U128 IREmitter::VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a) {
     switch (original_esize) {
     case 16:
diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h
index 9385f29c..016cda79 100644
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@@ -274,6 +274,7 @@ public:
     U128 VectorSignedSaturatedAbs(size_t esize, const U128& a);
     U128 VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128& a, const U128& b);
     UpperAndLower VectorSignedSaturatedDoublingMultiply(size_t esize, const U128& a, const U128& b);
+    U128 VectorSignedSaturatedDoublingMultiplyLong(size_t esize, const U128& a, const U128& b);
     U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
     U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
     U128 VectorSignedSaturatedNeg(size_t esize, const U128& a);
diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp
index 299be55b..5a91ab92 100644
--- a/src/frontend/ir/microinstruction.cpp
+++ b/src/frontend/ir/microinstruction.cpp
@@ -355,6 +355,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
     case Opcode::VectorSignedSaturatedAccumulateUnsigned16:
     case Opcode::VectorSignedSaturatedAccumulateUnsigned32:
     case Opcode::VectorSignedSaturatedAccumulateUnsigned64:
+    case Opcode::VectorSignedSaturatedDoublingMultiply16:
+    case Opcode::VectorSignedSaturatedDoublingMultiply32:
+    case Opcode::VectorSignedSaturatedDoublingMultiplyLong16:
+    case Opcode::VectorSignedSaturatedDoublingMultiplyLong32:
     case Opcode::VectorSignedSaturatedNarrowToSigned16:
     case Opcode::VectorSignedSaturatedNarrowToSigned32:
     case Opcode::VectorSignedSaturatedNarrowToSigned64:
diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc
index 03279c31..504f5bd7 100644
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@@ -410,6 +410,8 @@ OPCODE(VectorSignedSaturatedAccumulateUnsigned32,          U128,           U128,
 OPCODE(VectorSignedSaturatedAccumulateUnsigned64,          U128,           U128,           U128                                            )
 OPCODE(VectorSignedSaturatedDoublingMultiply16,            Void,           U128,           U128                                            )
 OPCODE(VectorSignedSaturatedDoublingMultiply32,            Void,           U128,           U128                                            )
+OPCODE(VectorSignedSaturatedDoublingMultiplyLong16,        U128,           U128,           U128                                            )
+OPCODE(VectorSignedSaturatedDoublingMultiplyLong32,        U128,           U128,           U128                                            )
 OPCODE(VectorSignedSaturatedNarrowToSigned16,              U128,           U128                                                            )
 OPCODE(VectorSignedSaturatedNarrowToSigned32,              U128,           U128                                                            )
 OPCODE(VectorSignedSaturatedNarrowToSigned64,              U128,           U128                                                            )