IR: Split VectorSignedSaturatedDoublingMultiply into VectorSignedSaturatedDoublingMultiply{High,HighRounding}

This commit is contained in:
Merry 2022-08-03 23:07:42 +01:00 committed by merry
parent aaf7c41ab3
commit a97105c296
12 changed files with 131 additions and 125 deletions

View file

@ -1359,7 +1359,7 @@ void EmitIR<IR::Opcode::VectorSignedSaturatedAccumulateUnsigned64>(oaknut::CodeG
} }
template<> template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedDoublingMultiply16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { void EmitIR<IR::Opcode::VectorSignedSaturatedDoublingMultiplyHigh16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code; (void)code;
(void)ctx; (void)ctx;
(void)inst; (void)inst;
@ -1367,7 +1367,23 @@ void EmitIR<IR::Opcode::VectorSignedSaturatedDoublingMultiply16>(oaknut::CodeGen
} }
template<> template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedDoublingMultiply32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { void EmitIR<IR::Opcode::VectorSignedSaturatedDoublingMultiplyHigh32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedDoublingMultiplyHighRounding16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedDoublingMultiplyHighRounding32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code; (void)code;
(void)ctx; (void)ctx;
(void)inst; (void)inst;

View file

@ -3812,10 +3812,8 @@ void EmitX64::EmitVectorSignedSaturatedAccumulateUnsigned64(EmitContext& ctx, IR
EmitVectorSignedSaturatedAccumulateUnsigned<64>(code, ctx, inst); EmitVectorSignedSaturatedAccumulateUnsigned<64>(code, ctx, inst);
} }
void EmitX64::EmitVectorSignedSaturatedDoublingMultiply16(EmitContext& ctx, IR::Inst* inst) { template<bool is_rounding>
const auto upper_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetUpperFromOp); static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
@ -3839,52 +3837,53 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply16(EmitContext& ctx, IR::
ctx.reg_alloc.Release(x); ctx.reg_alloc.Release(x);
ctx.reg_alloc.Release(y); ctx.reg_alloc.Release(y);
if (lower_inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm();
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vpaddw(lower_result, lower_tmp, lower_tmp); if constexpr (is_rounding) {
code.vpsrlw(lower_tmp, lower_tmp, 14);
code.vpaddw(lower_tmp, lower_tmp, code.MConst(xword, 0x0001000100010001, 0x0001000100010001));
code.vpsrlw(lower_tmp, lower_tmp, 1);
} else { } else {
code.movdqa(lower_result, lower_tmp);
code.paddw(lower_result, lower_result);
}
ctx.reg_alloc.DefineValue(lower_inst, lower_result);
ctx.EraseInstruction(lower_inst);
}
const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm();
if (code.HasHostFeature(HostFeature::AVX)) {
code.vpsrlw(lower_tmp, lower_tmp, 15); code.vpsrlw(lower_tmp, lower_tmp, 15);
}
code.vpaddw(upper_tmp, upper_tmp, upper_tmp); code.vpaddw(upper_tmp, upper_tmp, upper_tmp);
code.vpor(upper_result, upper_tmp, lower_tmp); code.vpaddw(result, upper_tmp, lower_tmp);
code.vpcmpeqw(upper_tmp, upper_result, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); code.vpcmpeqw(upper_tmp, result, code.MConst(xword, 0x8000800080008000, 0x8000800080008000));
code.vpxor(upper_result, upper_result, upper_tmp); code.vpxor(result, result, upper_tmp);
} else { } else {
code.paddw(upper_tmp, upper_tmp); code.paddw(upper_tmp, upper_tmp);
if constexpr (is_rounding) {
code.psrlw(lower_tmp, 14);
code.paddw(lower_tmp, code.MConst(xword, 0x0001000100010001, 0x0001000100010001));
code.psrlw(lower_tmp, 1);
} else {
code.psrlw(lower_tmp, 15); code.psrlw(lower_tmp, 15);
code.movdqa(upper_result, upper_tmp); }
code.por(upper_result, lower_tmp); code.movdqa(result, upper_tmp);
code.paddw(result, lower_tmp);
code.movdqa(upper_tmp, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); code.movdqa(upper_tmp, code.MConst(xword, 0x8000800080008000, 0x8000800080008000));
code.pcmpeqw(upper_tmp, upper_result); code.pcmpeqw(upper_tmp, result);
code.pxor(upper_result, upper_tmp); code.pxor(result, upper_tmp);
} }
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
code.pmovmskb(bit, upper_tmp); code.pmovmskb(bit, upper_tmp);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
if (upper_inst) { ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(upper_inst, upper_result);
ctx.EraseInstruction(upper_inst);
}
} }
void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHigh16(EmitContext& ctx, IR::Inst* inst) {
const auto upper_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetUpperFromOp); EmitVectorSignedSaturatedDoublingMultiply16<false>(code, ctx, inst);
const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp); }
void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHighRounding16(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturatedDoublingMultiply16<true>(code, ctx, inst);
}
template<bool is_rounding>
void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
@ -3904,37 +3903,29 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::
code.vpaddq(odds, odds, odds); code.vpaddq(odds, odds, odds);
code.vpaddq(even, even, even); code.vpaddq(even, even, even);
const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.vpsrlq(upper_result, odds, 32); if constexpr (is_rounding) {
code.vblendps(upper_result, upper_result, even, 0b1010); code.vmovdqa(result, code.MConst(xword, 0x0000000080000000, 0x0000000080000000));
code.vpaddq(odds, odds, result);
code.vpaddq(even, even, result);
}
code.vpsrlq(result, odds, 32);
code.vblendps(result, result, even, 0b1010);
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
code.vpcmpeqd(mask, upper_result, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.vpcmpeqd(mask, result, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
code.vpxor(upper_result, upper_result, mask); code.vpxor(result, result, mask);
code.pmovmskb(bit, mask); code.pmovmskb(bit, mask);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.Release(mask); ctx.reg_alloc.Release(mask);
ctx.reg_alloc.Release(bit); ctx.reg_alloc.Release(bit);
if (upper_inst) { ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(upper_inst, upper_result);
ctx.EraseInstruction(upper_inst);
}
if (lower_inst) {
const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm();
code.vpsllq(lower_result, even, 32);
code.vblendps(lower_result, lower_result, odds, 0b0101);
ctx.reg_alloc.DefineValue(lower_inst, lower_result);
ctx.EraseInstruction(lower_inst);
}
return; return;
} }
@ -3942,8 +3933,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm();
// calculate sign correction // calculate sign correction
code.movdqa(tmp, x); code.movdqa(tmp, x);
@ -3966,35 +3956,37 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::
code.paddq(tmp, tmp); code.paddq(tmp, tmp);
code.paddq(x, x); code.paddq(x, x);
if constexpr (is_rounding) {
code.movdqa(result, code.MConst(xword, 0x0000000080000000, 0x0000000080000000));
code.paddq(tmp, result);
code.paddq(x, result);
}
// put everything into place // put everything into place
code.pcmpeqw(upper_result, upper_result); code.pcmpeqw(result, result);
code.pcmpeqw(lower_result, lower_result); code.psllq(result, 32);
code.psllq(upper_result, 32); code.pand(result, x);
code.psrlq(lower_result, 32);
code.pand(upper_result, x);
code.pand(lower_result, tmp);
code.psrlq(tmp, 32); code.psrlq(tmp, 32);
code.psllq(x, 32); code.por(result, tmp);
code.por(upper_result, tmp); code.psubd(result, sign_correction);
code.por(lower_result, x);
code.psubd(upper_result, sign_correction);
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
code.movdqa(tmp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.movdqa(tmp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
code.pcmpeqd(tmp, upper_result); code.pcmpeqd(tmp, result);
code.pxor(upper_result, tmp); code.pxor(result, tmp);
code.pmovmskb(bit, tmp); code.pmovmskb(bit, tmp);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
if (upper_inst) { ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(upper_inst, upper_result);
ctx.EraseInstruction(upper_inst);
} }
if (lower_inst) {
ctx.reg_alloc.DefineValue(lower_inst, lower_result); void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHigh32(EmitContext& ctx, IR::Inst* inst) {
ctx.EraseInstruction(lower_inst); EmitVectorSignedSaturatedDoublingMultiply32<false>(code, ctx, inst);
} }
void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHighRounding32(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturatedDoublingMultiply32<true>(code, ctx, inst);
} }
void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, IR::Inst* inst) {

View file

@ -663,9 +663,9 @@ bool TranslatorVisitor::asimd_VQDMULH(bool D, size_t sz, size_t Vn, size_t Vd, b
const auto reg_n = ir.GetVector(n); const auto reg_n = ir.GetVector(n);
const auto reg_m = ir.GetVector(m); const auto reg_m = ir.GetVector(m);
const auto result = ir.VectorSignedSaturatedDoublingMultiply(esize, reg_n, reg_m); const auto result = ir.VectorSignedSaturatedDoublingMultiplyHigh(esize, reg_n, reg_m);
ir.SetVector(d, result.upper); ir.SetVector(d, result);
return true; return true;
} }
@ -685,8 +685,7 @@ bool TranslatorVisitor::asimd_VQRDMULH(bool D, size_t sz, size_t Vn, size_t Vd,
const auto reg_n = ir.GetVector(n); const auto reg_n = ir.GetVector(n);
const auto reg_m = ir.GetVector(m); const auto reg_m = ir.GetVector(m);
const auto multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, reg_n, reg_m); const auto result = ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, reg_n, reg_m);
const auto result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast<u8>(esize - 1)));
ir.SetVector(d, result); ir.SetVector(d, result);
return true; return true;

View file

@ -106,7 +106,7 @@ bool ScalarMultiplyLong(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t
return true; return true;
} }
bool ScalarMultiplyReturnHigh(TranslatorVisitor& v, bool Q, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm, Rounding round) { bool ScalarMultiplyDoublingReturnHigh(TranslatorVisitor& v, bool Q, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm, Rounding round) {
if (sz == 0b11) { if (sz == 0b11) {
return v.DecodeError(); return v.DecodeError();
} }
@ -126,15 +126,9 @@ bool ScalarMultiplyReturnHigh(TranslatorVisitor& v, bool Q, bool D, size_t sz, s
const auto reg_n = v.ir.GetVector(n); const auto reg_n = v.ir.GetVector(n);
const auto reg_m = v.ir.VectorBroadcastElement(esize, v.ir.GetVector(m), index); const auto reg_m = v.ir.VectorBroadcastElement(esize, v.ir.GetVector(m), index);
const auto result = [&] { const auto result = round == Rounding::None
const auto tmp = v.ir.VectorSignedSaturatedDoublingMultiply(esize, reg_n, reg_m); ? v.ir.VectorSignedSaturatedDoublingMultiplyHigh(esize, reg_n, reg_m)
: v.ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, reg_n, reg_m);
if (round == Rounding::Round) {
return v.ir.VectorAdd(esize, tmp.upper, v.ir.VectorLogicalShiftRight(esize, tmp.lower, static_cast<u8>(esize - 1)));
}
return tmp.upper;
}();
v.ir.SetVector(d, result); v.ir.SetVector(d, result);
return true; return true;
@ -184,11 +178,11 @@ bool TranslatorVisitor::asimd_VQDMULL_scalar(bool D, size_t sz, size_t Vn, size_
} }
bool TranslatorVisitor::asimd_VQDMULH_scalar(bool Q, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm) { bool TranslatorVisitor::asimd_VQDMULH_scalar(bool Q, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm) {
return ScalarMultiplyReturnHigh(*this, Q, D, sz, Vn, Vd, N, M, Vm, Rounding::None); return ScalarMultiplyDoublingReturnHigh(*this, Q, D, sz, Vn, Vd, N, M, Vm, Rounding::None);
} }
bool TranslatorVisitor::asimd_VQRDMULH_scalar(bool Q, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm) { bool TranslatorVisitor::asimd_VQRDMULH_scalar(bool Q, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm) {
return ScalarMultiplyReturnHigh(*this, Q, D, sz, Vn, Vd, N, M, Vm, Rounding::Round); return ScalarMultiplyDoublingReturnHigh(*this, Q, D, sz, Vn, Vd, N, M, Vm, Rounding::Round);
} }
} // namespace Dynarmic::A32 } // namespace Dynarmic::A32

View file

@ -158,8 +158,7 @@ bool TranslatorVisitor::SQRDMULH_vec_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
const IR::U128 operand1 = ir.ZeroExtendToQuad(ir.VectorGetElement(esize, V(128, Vn), 0)); const IR::U128 operand1 = ir.ZeroExtendToQuad(ir.VectorGetElement(esize, V(128, Vn), 0));
const IR::U128 operand2 = ir.ZeroExtendToQuad(ir.VectorGetElement(esize, V(128, Vm), 0)); const IR::U128 operand2 = ir.ZeroExtendToQuad(ir.VectorGetElement(esize, V(128, Vm), 0));
const IR::UpperAndLower multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, operand2); const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, operand1, operand2);
const IR::U128 result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast<u8>(esize - 1)));
V_scalar(esize, Vd, ir.VectorGetElement(esize, result, 0)); V_scalar(esize, Vd, ir.VectorGetElement(esize, result, 0));
return true; return true;

View file

@ -142,8 +142,7 @@ bool TranslatorVisitor::SQRDMULH_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> V
const IR::U128 operand1 = ir.ZeroExtendToQuad(ir.VectorGetElement(esize, V(128, Vn), 0)); const IR::U128 operand1 = ir.ZeroExtendToQuad(ir.VectorGetElement(esize, V(128, Vn), 0));
const IR::U128 operand2 = V(128, Vm); const IR::U128 operand2 = V(128, Vm);
const IR::U128 broadcast = ir.VectorBroadcastElement(esize, operand2, index); const IR::U128 broadcast = ir.VectorBroadcastElement(esize, operand2, index);
const IR::UpperAndLower multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, broadcast); const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, operand1, broadcast);
const IR::U128 result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast<u8>(esize - 1)));
V(128, Vd, result); V(128, Vd, result);
return true; return true;

View file

@ -441,7 +441,7 @@ bool TranslatorVisitor::SQDMULH_vec_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec V
const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand1 = V(datasize, Vn);
const IR::U128 operand2 = V(datasize, Vm); const IR::U128 operand2 = V(datasize, Vm);
const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, operand2).upper; const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHigh(esize, operand1, operand2);
V(datasize, Vd, result); V(datasize, Vd, result);
return true; return true;
@ -457,8 +457,7 @@ bool TranslatorVisitor::SQRDMULH_vec_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec
const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand1 = V(datasize, Vn);
const IR::U128 operand2 = V(datasize, Vm); const IR::U128 operand2 = V(datasize, Vm);
const IR::UpperAndLower multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, operand2); const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, operand1, operand2);
const IR::U128 result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast<u8>(esize - 1)));
V(datasize, Vd, result); V(datasize, Vd, result);
return true; return true;

View file

@ -368,7 +368,7 @@ bool TranslatorVisitor::SQDMULH_elt_2(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, I
const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand1 = V(datasize, Vn);
const IR::U128 operand2 = V(idxsize, Vm); const IR::U128 operand2 = V(idxsize, Vm);
const IR::U128 index_vector = ir.VectorBroadcastElement(esize, operand2, index); const IR::U128 index_vector = ir.VectorBroadcastElement(esize, operand2, index);
const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, index_vector).upper; const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHigh(esize, operand1, index_vector);
V(datasize, Vd, result); V(datasize, Vd, result);
return true; return true;
@ -387,8 +387,7 @@ bool TranslatorVisitor::SQRDMULH_elt_2(bool Q, Imm<2> size, Imm<1> L, Imm<1> M,
const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand1 = V(datasize, Vn);
const IR::U128 operand2 = V(idxsize, Vm); const IR::U128 operand2 = V(idxsize, Vm);
const IR::U128 index_vector = ir.VectorBroadcastElement(esize, operand2, index); const IR::U128 index_vector = ir.VectorBroadcastElement(esize, operand2, index);
const IR::UpperAndLower multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, index_vector); const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, operand1, index_vector);
const IR::U128 result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast<u8>(esize - 1)));
V(datasize, Vd, result); V(datasize, Vd, result);
return true; return true;

View file

@ -1749,22 +1749,26 @@ U128 IREmitter::VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128
UNREACHABLE(); UNREACHABLE();
} }
UpperAndLower IREmitter::VectorSignedSaturatedDoublingMultiply(size_t esize, const U128& a, const U128& b) { U128 IREmitter::VectorSignedSaturatedDoublingMultiplyHigh(size_t esize, const U128& a, const U128& b) {
const Value multiply = [&] {
switch (esize) { switch (esize) {
case 16: case 16:
return Inst(Opcode::VectorSignedSaturatedDoublingMultiply16, a, b); return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyHigh16, a, b);
case 32: case 32:
return Inst(Opcode::VectorSignedSaturatedDoublingMultiply32, a, b); return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyHigh32, a, b);
default: default:
UNREACHABLE(); UNREACHABLE();
} }
}(); }
return { U128 IREmitter::VectorSignedSaturatedDoublingMultiplyHighRounding(size_t esize, const U128& a, const U128& b) {
Inst<U128>(Opcode::GetUpperFromOp, multiply), switch (esize) {
Inst<U128>(Opcode::GetLowerFromOp, multiply), case 16:
}; return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyHighRounding16, a, b);
case 32:
return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyHighRounding32, a, b);
default:
UNREACHABLE();
}
} }
U128 IREmitter::VectorSignedSaturatedDoublingMultiplyLong(size_t esize, const U128& a, const U128& b) { U128 IREmitter::VectorSignedSaturatedDoublingMultiplyLong(size_t esize, const U128& a, const U128& b) {

View file

@ -296,7 +296,8 @@ public:
UpperAndLower VectorSignedMultiply(size_t esize, const U128& a, const U128& b); UpperAndLower VectorSignedMultiply(size_t esize, const U128& a, const U128& b);
U128 VectorSignedSaturatedAbs(size_t esize, const U128& a); U128 VectorSignedSaturatedAbs(size_t esize, const U128& a);
U128 VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128& a, const U128& b);
UpperAndLower VectorSignedSaturatedDoublingMultiply(size_t esize, const U128& a, const U128& b); U128 VectorSignedSaturatedDoublingMultiplyHigh(size_t esize, const U128& a, const U128& b);
U128 VectorSignedSaturatedDoublingMultiplyHighRounding(size_t esize, const U128& a, const U128& b);
U128 VectorSignedSaturatedDoublingMultiplyLong(size_t esize, const U128& a, const U128& b); U128 VectorSignedSaturatedDoublingMultiplyLong(size_t esize, const U128& a, const U128& b);
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);

View file

@ -448,8 +448,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
case Opcode::VectorSignedSaturatedAdd16: case Opcode::VectorSignedSaturatedAdd16:
case Opcode::VectorSignedSaturatedAdd32: case Opcode::VectorSignedSaturatedAdd32:
case Opcode::VectorSignedSaturatedAdd64: case Opcode::VectorSignedSaturatedAdd64:
case Opcode::VectorSignedSaturatedDoublingMultiply16: case Opcode::VectorSignedSaturatedDoublingMultiplyHigh16:
case Opcode::VectorSignedSaturatedDoublingMultiply32: case Opcode::VectorSignedSaturatedDoublingMultiplyHigh32:
case Opcode::VectorSignedSaturatedDoublingMultiplyHighRounding16:
case Opcode::VectorSignedSaturatedDoublingMultiplyHighRounding32:
case Opcode::VectorSignedSaturatedDoublingMultiplyLong16: case Opcode::VectorSignedSaturatedDoublingMultiplyLong16:
case Opcode::VectorSignedSaturatedDoublingMultiplyLong32: case Opcode::VectorSignedSaturatedDoublingMultiplyLong32:
case Opcode::VectorSignedSaturatedNarrowToSigned16: case Opcode::VectorSignedSaturatedNarrowToSigned16:

View file

@ -481,8 +481,10 @@ OPCODE(VectorSignedSaturatedAccumulateUnsigned8, U128, U128
OPCODE(VectorSignedSaturatedAccumulateUnsigned16, U128, U128, U128 ) OPCODE(VectorSignedSaturatedAccumulateUnsigned16, U128, U128, U128 )
OPCODE(VectorSignedSaturatedAccumulateUnsigned32, U128, U128, U128 ) OPCODE(VectorSignedSaturatedAccumulateUnsigned32, U128, U128, U128 )
OPCODE(VectorSignedSaturatedAccumulateUnsigned64, U128, U128, U128 ) OPCODE(VectorSignedSaturatedAccumulateUnsigned64, U128, U128, U128 )
OPCODE(VectorSignedSaturatedDoublingMultiply16, Void, U128, U128 ) OPCODE(VectorSignedSaturatedDoublingMultiplyHigh16, U128, U128, U128 )
OPCODE(VectorSignedSaturatedDoublingMultiply32, Void, U128, U128 ) OPCODE(VectorSignedSaturatedDoublingMultiplyHigh32, U128, U128, U128 )
OPCODE(VectorSignedSaturatedDoublingMultiplyHighRounding16, U128, U128, U128 )
OPCODE(VectorSignedSaturatedDoublingMultiplyHighRounding32, U128, U128, U128 )
OPCODE(VectorSignedSaturatedDoublingMultiplyLong16, U128, U128, U128 ) OPCODE(VectorSignedSaturatedDoublingMultiplyLong16, U128, U128, U128 )
OPCODE(VectorSignedSaturatedDoublingMultiplyLong32, U128, U128, U128 ) OPCODE(VectorSignedSaturatedDoublingMultiplyLong32, U128, U128, U128 )
OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 ) OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 )