IR: Add VectorMultiply{Signed,Unsigned}Widen instructions

Polyfill for x86-64 backend
This commit is contained in:
Merry 2022-08-02 11:03:54 +01:00 committed by merry
parent bbf0179d30
commit 61d509dda2
11 changed files with 180 additions and 15 deletions

View file

@ -1001,6 +1001,54 @@ void EmitIR<IR::Opcode::VectorMultiply64>(oaknut::CodeGenerator& code, EmitConte
ASSERT_FALSE("Unimplemented"); ASSERT_FALSE("Unimplemented");
} }
template<>
void EmitIR<IR::Opcode::VectorMultiplySignedWiden8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorMultiplySignedWiden16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorMultiplySignedWiden32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorMultiplyUnsignedWiden8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorMultiplyUnsignedWiden16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorMultiplyUnsignedWiden32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<> template<>
void EmitIR<IR::Opcode::VectorNarrow16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { void EmitIR<IR::Opcode::VectorNarrow16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code; (void)code;

View file

@ -55,6 +55,7 @@ static std::function<void(BlockOfCode&)> GenRCP(const A32::UserConfig& conf) {
static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) { static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) {
return Optimization::PolyfillOptions{ return Optimization::PolyfillOptions{
.sha256 = !code.HasHostFeature(HostFeature::SHA), .sha256 = !code.HasHostFeature(HostFeature::SHA),
.vector_multiply_widen = true,
}; };
} }

View file

@ -51,6 +51,7 @@ static std::function<void(BlockOfCode&)> GenRCP(const A64::UserConfig& conf) {
static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) { static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) {
return Optimization::PolyfillOptions{ return Optimization::PolyfillOptions{
.sha256 = !code.HasHostFeature(HostFeature::SHA), .sha256 = !code.HasHostFeature(HostFeature::SHA),
.vector_multiply_widen = true,
}; };
} }

View file

@ -2221,6 +2221,30 @@ void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, tmp2); ctx.reg_alloc.DefineValue(inst, tmp2);
} }
void EmitX64::EmitVectorMultiplySignedWiden8(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplySignedWiden8");
}
void EmitX64::EmitVectorMultiplySignedWiden16(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplySignedWiden16");
}
void EmitX64::EmitVectorMultiplySignedWiden32(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplySignedWiden32");
}
void EmitX64::EmitVectorMultiplyUnsignedWiden8(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplyUnsignedWiden8");
}
void EmitX64::EmitVectorMultiplyUnsignedWiden16(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplyUnsignedWiden16");
}
void EmitX64::EmitVectorMultiplyUnsignedWiden32(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplyUnsignedWiden32");
}
void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);

View file

@ -909,11 +909,30 @@ bool TranslatorVisitor::asimd_VABDL(bool U, bool D, size_t sz, size_t Vn, size_t
} }
bool TranslatorVisitor::asimd_VMLAL(bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool op, bool N, bool M, size_t Vm) { bool TranslatorVisitor::asimd_VMLAL(bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool op, bool N, bool M, size_t Vm) {
return WideInstruction(*this, U, D, sz, Vn, Vd, N, M, Vm, WidenBehaviour::Both, [this, op](size_t esize, const auto& reg_d, const auto& reg_n, const auto& reg_m) { const size_t esize = 8U << sz;
const auto multiply = ir.VectorMultiply(esize, reg_n, reg_m);
return op ? ir.VectorSub(esize, reg_d, multiply) if (sz == 0b11) {
: ir.VectorAdd(esize, reg_d, multiply); return DecodeError();
}); }
if (mcl::bit::get_bit<0>(Vd)) {
return UndefinedInstruction();
}
const auto d = ToVector(true, Vd, D);
const auto m = ToVector(false, Vm, M);
const auto n = ToVector(false, Vn, N);
const auto reg_d = ir.GetVector(d);
const auto reg_m = ir.GetVector(m);
const auto reg_n = ir.GetVector(n);
const auto multiply = U ? ir.VectorMultiplyUnsignedWiden(esize, reg_n, reg_m)
: ir.VectorMultiplySignedWiden(esize, reg_n, reg_m);
const auto result = op ? ir.VectorSub(esize * 2, reg_d, multiply)
: ir.VectorAdd(esize * 2, reg_d, multiply);
ir.SetVector(d, result);
return true;
} }
bool TranslatorVisitor::asimd_VMULL(bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool P, bool N, bool M, size_t Vm) { bool TranslatorVisitor::asimd_VMULL(bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool P, bool N, bool M, size_t Vm) {
@ -930,14 +949,11 @@ bool TranslatorVisitor::asimd_VMULL(bool U, bool D, size_t sz, size_t Vn, size_t
const auto m = ToVector(false, Vm, M); const auto m = ToVector(false, Vm, M);
const auto n = ToVector(false, Vn, N); const auto n = ToVector(false, Vn, N);
const auto extend_reg = [&](const auto& reg) {
return U ? ir.VectorZeroExtend(esize, reg) : ir.VectorSignExtend(esize, reg);
};
const auto reg_n = ir.GetVector(n); const auto reg_n = ir.GetVector(n);
const auto reg_m = ir.GetVector(m); const auto reg_m = ir.GetVector(m);
const auto result = P ? ir.VectorPolynomialMultiplyLong(esize, reg_n, reg_m) const auto result = P ? ir.VectorPolynomialMultiplyLong(esize, reg_n, reg_m)
: ir.VectorMultiply(2 * esize, extend_reg(reg_n), extend_reg(reg_m)); : U ? ir.VectorMultiplyUnsignedWiden(esize, reg_n, reg_m)
: ir.VectorMultiplySignedWiden(esize, reg_n, reg_m);
ir.SetVector(d, result); ir.SetVector(d, result);
return true; return true;

View file

@ -85,11 +85,10 @@ bool ScalarMultiplyLong(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t
const auto [m, index] = GetScalarLocation(esize, M, Vm); const auto [m, index] = GetScalarLocation(esize, M, Vm);
const auto scalar = v.ir.VectorGetElement(esize, v.ir.GetVector(m), index); const auto scalar = v.ir.VectorGetElement(esize, v.ir.GetVector(m), index);
const auto ext_scalar = U ? (esize == 16 ? IR::U32U64{v.ir.ZeroExtendToWord(scalar)} : IR::U32U64{v.ir.ZeroExtendToLong(scalar)}) const auto reg_n = v.ir.GetVector(n);
: (esize == 16 ? IR::U32U64{v.ir.SignExtendToWord(scalar)} : IR::U32U64{v.ir.SignExtendToLong(scalar)}); const auto reg_m = v.ir.VectorBroadcast(esize, scalar);
const auto reg_n = U ? v.ir.VectorZeroExtend(esize, v.ir.GetVector(n)) : v.ir.VectorSignExtend(esize, v.ir.GetVector(n)); const auto addend = U ? v.ir.VectorMultiplyUnsignedWiden(esize, reg_n, reg_m)
const auto reg_m = v.ir.VectorBroadcast(esize * 2, ext_scalar); : v.ir.VectorMultiplySignedWiden(esize, reg_n, reg_m);
const auto addend = v.ir.VectorMultiply(esize * 2, reg_n, reg_m);
const auto result = [&] { const auto result = [&] {
switch (multiply) { switch (multiply) {
case MultiplyBehavior::Multiply: case MultiplyBehavior::Multiply:

View file

@ -1404,6 +1404,30 @@ U128 IREmitter::VectorMultiply(size_t esize, const U128& a, const U128& b) {
UNREACHABLE(); UNREACHABLE();
} }
U128 IREmitter::VectorMultiplySignedWiden(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorMultiplySignedWiden8, a, b);
case 16:
return Inst<U128>(Opcode::VectorMultiplySignedWiden16, a, b);
case 32:
return Inst<U128>(Opcode::VectorMultiplySignedWiden32, a, b);
}
UNREACHABLE();
}
U128 IREmitter::VectorMultiplyUnsignedWiden(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorMultiplyUnsignedWiden8, a, b);
case 16:
return Inst<U128>(Opcode::VectorMultiplyUnsignedWiden16, a, b);
case 32:
return Inst<U128>(Opcode::VectorMultiplyUnsignedWiden32, a, b);
}
UNREACHABLE();
}
U128 IREmitter::VectorNarrow(size_t original_esize, const U128& a) { U128 IREmitter::VectorNarrow(size_t original_esize, const U128& a) {
switch (original_esize) { switch (original_esize) {
case 16: case 16:

View file

@ -264,6 +264,8 @@ public:
U128 VectorMinSigned(size_t esize, const U128& a, const U128& b); U128 VectorMinSigned(size_t esize, const U128& a, const U128& b);
U128 VectorMinUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorMinUnsigned(size_t esize, const U128& a, const U128& b);
U128 VectorMultiply(size_t esize, const U128& a, const U128& b); U128 VectorMultiply(size_t esize, const U128& a, const U128& b);
U128 VectorMultiplySignedWiden(size_t esize, const U128& a, const U128& b);
U128 VectorMultiplyUnsignedWiden(size_t esize, const U128& a, const U128& b);
U128 VectorNarrow(size_t original_esize, const U128& a); U128 VectorNarrow(size_t original_esize, const U128& a);
U128 VectorNot(const U128& a); U128 VectorNot(const U128& a);
U128 VectorOr(const U128& a, const U128& b); U128 VectorOr(const U128& a, const U128& b);

View file

@ -402,6 +402,12 @@ OPCODE(VectorMultiply8, U128, U128
OPCODE(VectorMultiply16, U128, U128, U128 ) OPCODE(VectorMultiply16, U128, U128, U128 )
OPCODE(VectorMultiply32, U128, U128, U128 ) OPCODE(VectorMultiply32, U128, U128, U128 )
OPCODE(VectorMultiply64, U128, U128, U128 ) OPCODE(VectorMultiply64, U128, U128, U128 )
OPCODE(VectorMultiplySignedWiden8, U128, U128, U128 )
OPCODE(VectorMultiplySignedWiden16, U128, U128, U128 )
OPCODE(VectorMultiplySignedWiden32, U128, U128, U128 )
OPCODE(VectorMultiplyUnsignedWiden8, U128, U128, U128 )
OPCODE(VectorMultiplyUnsignedWiden16, U128, U128, U128 )
OPCODE(VectorMultiplyUnsignedWiden32, U128, U128, U128 )
OPCODE(VectorNarrow16, U128, U128 ) OPCODE(VectorNarrow16, U128, U128 )
OPCODE(VectorNarrow32, U128, U128 ) OPCODE(VectorNarrow32, U128, U128 )
OPCODE(VectorNarrow64, U128, U128 ) OPCODE(VectorNarrow64, U128, U128 )

View file

@ -22,6 +22,7 @@ namespace Dynarmic::Optimization {
struct PolyfillOptions { struct PolyfillOptions {
bool sha256 = false; bool sha256 = false;
bool vector_multiply_widen = false;
bool operator==(const PolyfillOptions&) const = default; bool operator==(const PolyfillOptions&) const = default;
}; };

View file

@ -138,6 +138,19 @@ void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
inst.ReplaceUsesWith(part1 ? x : y); inst.ReplaceUsesWith(part1 ? x : y);
} }
template<size_t esize, bool is_signed>
void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
IR::U128 n = (IR::U128)inst.GetArg(0);
IR::U128 m = (IR::U128)inst.GetArg(1);
const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n);
const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m);
const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m);
inst.ReplaceUsesWith(result);
}
} // namespace } // namespace
void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
@ -166,6 +179,36 @@ void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
PolyfillSHA256Hash(ir, inst); PolyfillSHA256Hash(ir, inst);
} }
break; break;
case IR::Opcode::VectorMultiplySignedWiden8:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplySignedWiden16:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplySignedWiden32:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden8:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, false>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden16:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, false>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden32:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, false>(ir, inst);
}
break;
default: default:
break; break;
} }