IR: Remove VectorShuffleHighHalfwords and VectorShuffleLowHalfwords

This commit is contained in:
Merry 2022-08-06 12:57:46 +01:00 committed by merry
parent c6667997bc
commit 9313f5ea88
7 changed files with 191 additions and 119 deletions

View file

@ -1146,6 +1146,54 @@ void EmitIR<IR::Opcode::VectorReverseBits>(oaknut::CodeGenerator& code, EmitCont
EmitTwoOpArranged<8>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.RBIT(Vresult, Voperand); }); EmitTwoOpArranged<8>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.RBIT(Vresult, Voperand); });
} }
template<>
void EmitIR<IR::Opcode::VectorReverseElementsInHalfGroups8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorReverseElementsInWordGroups8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorReverseElementsInWordGroups16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorReverseElementsInLongGroups8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorReverseElementsInLongGroups16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorReverseElementsInLongGroups32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<> template<>
void EmitIR<IR::Opcode::VectorReduceAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { void EmitIR<IR::Opcode::VectorReduceAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitReduce<8>(code, ctx, inst, [&](auto& Bresult, auto Voperand) { code.ADDV(Bresult, Voperand); }); EmitReduce<8>(code, ctx, inst, [&](auto& Bresult, auto Voperand) { code.ADDV(Bresult, Voperand); });
@ -1236,22 +1284,6 @@ void EmitIR<IR::Opcode::VectorRoundingShiftLeftU64>(oaknut::CodeGenerator& code,
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.URSHL(Vresult, Va, Vb); }); EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.URSHL(Vresult, Va, Vb); });
} }
template<>
void EmitIR<IR::Opcode::VectorShuffleHighHalfwords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorShuffleLowHalfwords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<> template<>
void EmitIR<IR::Opcode::VectorShuffleWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { void EmitIR<IR::Opcode::VectorShuffleWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code; (void)code;

View file

@ -3023,6 +3023,89 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, data); ctx.reg_alloc.DefineValue(inst, data);
} }
void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp, data);
code.psllw(tmp, 8);
code.psrlw(data, 8);
code.por(data, tmp);
ctx.reg_alloc.DefineValue(inst, data);
}
void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
// TODO: PSHUFB
code.movdqa(tmp, data);
code.psllw(tmp, 8);
code.psrlw(data, 8);
code.por(data, tmp);
code.pshuflw(data, data, 0b10110001);
code.pshufhw(data, data, 0b10110001);
ctx.reg_alloc.DefineValue(inst, data);
}
void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pshuflw(data, data, 0b10110001);
code.pshufhw(data, data, 0b10110001);
ctx.reg_alloc.DefineValue(inst, data);
}
void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
// TODO: PSHUFB
code.movdqa(tmp, data);
code.psllw(tmp, 8);
code.psrlw(data, 8);
code.por(data, tmp);
code.pshuflw(data, data, 0b00011011);
code.pshufhw(data, data, 0b00011011);
ctx.reg_alloc.DefineValue(inst, data);
}
void EmitX64::EmitVectorReverseElementsInLongGroups16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pshuflw(data, data, 0b00011011);
code.pshufhw(data, data, 0b00011011);
ctx.reg_alloc.DefineValue(inst, data);
}
void EmitX64::EmitVectorReverseElementsInLongGroups32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pshuflw(data, data, 0b01001110);
code.pshufhw(data, data, 0b01001110);
ctx.reg_alloc.DefineValue(inst, data);
}
void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -3308,14 +3391,6 @@ static void VectorShuffleImpl(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
void EmitX64::EmitVectorShuffleHighHalfwords(EmitContext& ctx, IR::Inst* inst) {
VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshufhw);
}
void EmitX64::EmitVectorShuffleLowHalfwords(EmitContext& ctx, IR::Inst* inst) {
VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshuflw);
}
void EmitX64::EmitVectorShuffleWords(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorShuffleWords(EmitContext& ctx, IR::Inst* inst) {
VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshufd); VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshufd);
} }

View file

@ -117,47 +117,18 @@ bool TranslatorVisitor::asimd_VREV(bool D, size_t sz, size_t Vd, size_t op, bool
const auto m = ToVector(Q, Vm, M); const auto m = ToVector(Q, Vm, M);
const auto result = [this, m, op, sz] { const auto result = [this, m, op, sz] {
const auto reg_m = ir.GetVector(m); const auto reg_m = ir.GetVector(m);
const size_t esize = 16U << sz; const size_t esize = 8 << sz;
const auto shift = static_cast<u8>(8U << sz);
// 64-bit regions switch (op) {
if (op == 0b00) { case 0b00:
IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, reg_m, shift), return ir.VectorReverseElementsInLongGroups(esize, reg_m);
ir.VectorLogicalShiftLeft(esize, reg_m, shift)); case 0b01:
return ir.VectorReverseElementsInWordGroups(esize, reg_m);
switch (sz) { case 0b10:
case 0: // 8-bit elements return ir.VectorReverseElementsInHalfGroups(esize, reg_m);
result = ir.VectorShuffleLowHalfwords(result, 0b00011011);
result = ir.VectorShuffleHighHalfwords(result, 0b00011011);
break;
case 1: // 16-bit elements
result = ir.VectorShuffleLowHalfwords(result, 0b01001110);
result = ir.VectorShuffleHighHalfwords(result, 0b01001110);
break;
}
return result;
} }
// 32-bit regions UNREACHABLE();
if (op == 0b01) {
IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, reg_m, shift),
ir.VectorLogicalShiftLeft(esize, reg_m, shift));
// If dealing with 8-bit elements we'll need to shuffle the bytes in each halfword
// e.g. Assume the following numbers point out bytes in a 32-bit word, we're essentially
// changing [3, 2, 1, 0] to [2, 3, 0, 1]
if (sz == 0) {
result = ir.VectorShuffleLowHalfwords(result, 0b10110001);
result = ir.VectorShuffleHighHalfwords(result, 0b10110001);
}
return result;
}
// 16-bit regions
return ir.VectorOr(ir.VectorLogicalShiftRight(esize, reg_m, 8),
ir.VectorLogicalShiftLeft(esize, reg_m, 8));
}(); }();
ir.SetVector(d, result); ir.SetVector(d, result);

View file

@ -673,81 +673,45 @@ bool TranslatorVisitor::RBIT_asimd(bool Q, Vec Vn, Vec Vd) {
} }
bool TranslatorVisitor::REV16_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) { bool TranslatorVisitor::REV16_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
if (size != 0) { if (size > 0) {
return UnallocatedEncoding(); return UnallocatedEncoding();
} }
const size_t datasize = Q ? 128 : 64; const size_t datasize = Q ? 128 : 64;
constexpr size_t esize = 16; constexpr size_t esize = 8;
const IR::U128 data = V(datasize, Vn); const IR::U128 data = V(datasize, Vn);
const IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, 8), const IR::U128 result = ir.VectorReverseElementsInHalfGroups(esize, data);
ir.VectorLogicalShiftLeft(esize, data, 8));
V(datasize, Vd, result); V(datasize, Vd, result);
return true; return true;
} }
bool TranslatorVisitor::REV32_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) { bool TranslatorVisitor::REV32_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
const u32 zext_size = size.ZeroExtend(); if (size > 1) {
if (zext_size > 1) {
return UnallocatedEncoding(); return UnallocatedEncoding();
} }
const size_t datasize = Q ? 128 : 64; const size_t datasize = Q ? 128 : 64;
const size_t esize = 16 << zext_size; const size_t esize = 8 << size.ZeroExtend();
const u8 shift = static_cast<u8>(8 << zext_size);
const IR::U128 data = V(datasize, Vn); const IR::U128 data = V(datasize, Vn);
const IR::U128 result = ir.VectorReverseElementsInWordGroups(esize, data);
// TODO: Consider factoring byte swapping code out into its own opcode.
// Technically the rest of the following code can be a PSHUFB
// in the presence of SSSE3.
IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, shift),
ir.VectorLogicalShiftLeft(esize, data, shift));
// If dealing with 8-bit elements we'll need to shuffle the bytes in each halfword
// e.g. Assume the following numbers point out bytes in a 32-bit word, we're essentially
// changing [3, 2, 1, 0] to [2, 3, 0, 1]
if (zext_size == 0) {
result = ir.VectorShuffleLowHalfwords(result, 0b10110001);
result = ir.VectorShuffleHighHalfwords(result, 0b10110001);
}
V(datasize, Vd, result); V(datasize, Vd, result);
return true; return true;
} }
bool TranslatorVisitor::REV64_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) { bool TranslatorVisitor::REV64_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
const u32 zext_size = size.ZeroExtend(); if (size > 2) {
if (zext_size >= 3) {
return UnallocatedEncoding(); return UnallocatedEncoding();
} }
const size_t datasize = Q ? 128 : 64; const size_t datasize = Q ? 128 : 64;
const size_t esize = 16 << zext_size; const size_t esize = 8 << size.ZeroExtend();
const u8 shift = static_cast<u8>(8 << zext_size);
const IR::U128 data = V(datasize, Vn); const IR::U128 data = V(datasize, Vn);
const IR::U128 result = ir.VectorReverseElementsInLongGroups(esize, data);
// TODO: Consider factoring byte swapping code out into its own opcode.
// Technically the rest of the following code can be a PSHUFB
// in the presence of SSSE3.
IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, shift),
ir.VectorLogicalShiftLeft(esize, data, shift));
switch (zext_size) {
case 0: // 8-bit elements
result = ir.VectorShuffleLowHalfwords(result, 0b00011011);
result = ir.VectorShuffleHighHalfwords(result, 0b00011011);
break;
case 1: // 16-bit elements
result = ir.VectorShuffleLowHalfwords(result, 0b01001110);
result = ir.VectorShuffleHighHalfwords(result, 0b01001110);
break;
}
V(datasize, Vd, result); V(datasize, Vd, result);
return true; return true;

View file

@ -1573,6 +1573,39 @@ U128 IREmitter::VectorReverseBits(const U128& a) {
return Inst<U128>(Opcode::VectorReverseBits, a); return Inst<U128>(Opcode::VectorReverseBits, a);
} }
U128 IREmitter::VectorReverseElementsInHalfGroups(size_t esize, const U128& a) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorReverseElementsInHalfGroups8, a);
default:
UNREACHABLE();
}
}
U128 IREmitter::VectorReverseElementsInWordGroups(size_t esize, const U128& a) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorReverseElementsInWordGroups8, a);
case 16:
return Inst<U128>(Opcode::VectorReverseElementsInWordGroups16, a);
default:
UNREACHABLE();
}
}
U128 IREmitter::VectorReverseElementsInLongGroups(size_t esize, const U128& a) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorReverseElementsInLongGroups8, a);
case 16:
return Inst<U128>(Opcode::VectorReverseElementsInLongGroups16, a);
case 32:
return Inst<U128>(Opcode::VectorReverseElementsInLongGroups32, a);
default:
UNREACHABLE();
}
}
U128 IREmitter::VectorReduceAdd(size_t esize, const U128& a) { U128 IREmitter::VectorReduceAdd(size_t esize, const U128& a) {
switch (esize) { switch (esize) {
case 8: case 8:
@ -1666,14 +1699,6 @@ U128 IREmitter::VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, con
UNREACHABLE(); UNREACHABLE();
} }
U128 IREmitter::VectorShuffleHighHalfwords(const U128& a, u8 mask) {
return Inst<U128>(Opcode::VectorShuffleHighHalfwords, a, mask);
}
U128 IREmitter::VectorShuffleLowHalfwords(const U128& a, u8 mask) {
return Inst<U128>(Opcode::VectorShuffleLowHalfwords, a, mask);
}
U128 IREmitter::VectorShuffleWords(const U128& a, u8 mask) { U128 IREmitter::VectorShuffleWords(const U128& a, u8 mask) {
return Inst<U128>(Opcode::VectorShuffleWords, a, mask); return Inst<U128>(Opcode::VectorShuffleWords, a, mask);
} }

View file

@ -281,6 +281,9 @@ public:
U128 VectorPolynomialMultiplyLong(size_t esize, const U128& a, const U128& b); U128 VectorPolynomialMultiplyLong(size_t esize, const U128& a, const U128& b);
U128 VectorPopulationCount(const U128& a); U128 VectorPopulationCount(const U128& a);
U128 VectorReverseBits(const U128& a); U128 VectorReverseBits(const U128& a);
U128 VectorReverseElementsInHalfGroups(size_t esize, const U128& a);
U128 VectorReverseElementsInWordGroups(size_t esize, const U128& a);
U128 VectorReverseElementsInLongGroups(size_t esize, const U128& a);
U128 VectorReduceAdd(size_t esize, const U128& a); U128 VectorReduceAdd(size_t esize, const U128& a);
U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount); U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount);
U128 VectorRotateRight(size_t esize, const U128& a, u8 amount); U128 VectorRotateRight(size_t esize, const U128& a, u8 amount);
@ -288,8 +291,6 @@ public:
U128 VectorRoundingHalvingAddUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorRoundingHalvingAddUnsigned(size_t esize, const U128& a, const U128& b);
U128 VectorRoundingShiftLeftSigned(size_t esize, const U128& a, const U128& b); U128 VectorRoundingShiftLeftSigned(size_t esize, const U128& a, const U128& b);
U128 VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, const U128& b);
U128 VectorShuffleHighHalfwords(const U128& a, u8 mask);
U128 VectorShuffleLowHalfwords(const U128& a, u8 mask);
U128 VectorShuffleWords(const U128& a, u8 mask); U128 VectorShuffleWords(const U128& a, u8 mask);
U128 VectorSignExtend(size_t original_esize, const U128& a); U128 VectorSignExtend(size_t original_esize, const U128& a);
U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b); U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);

View file

@ -443,6 +443,12 @@ OPCODE(VectorPolynomialMultiplyLong8, U128, U128
OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 ) OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 )
OPCODE(VectorPopulationCount, U128, U128 ) OPCODE(VectorPopulationCount, U128, U128 )
OPCODE(VectorReverseBits, U128, U128 ) OPCODE(VectorReverseBits, U128, U128 )
OPCODE(VectorReverseElementsInHalfGroups8, U128, U128 )
OPCODE(VectorReverseElementsInWordGroups8, U128, U128 )
OPCODE(VectorReverseElementsInWordGroups16, U128, U128 )
OPCODE(VectorReverseElementsInLongGroups8, U128, U128 )
OPCODE(VectorReverseElementsInLongGroups16, U128, U128 )
OPCODE(VectorReverseElementsInLongGroups32, U128, U128 )
OPCODE(VectorReduceAdd8, U128, U128 ) OPCODE(VectorReduceAdd8, U128, U128 )
OPCODE(VectorReduceAdd16, U128, U128 ) OPCODE(VectorReduceAdd16, U128, U128 )
OPCODE(VectorReduceAdd32, U128, U128 ) OPCODE(VectorReduceAdd32, U128, U128 )
@ -461,8 +467,6 @@ OPCODE(VectorRoundingShiftLeftU8, U128, U128
OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 ) OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 )
OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 ) OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 )
OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 ) OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 )
OPCODE(VectorShuffleHighHalfwords, U128, U128, U8 )
OPCODE(VectorShuffleLowHalfwords, U128, U128, U8 )
OPCODE(VectorShuffleWords, U128, U128, U8 ) OPCODE(VectorShuffleWords, U128, U128, U8 )
OPCODE(VectorSignExtend8, U128, U128 ) OPCODE(VectorSignExtend8, U128, U128 )
OPCODE(VectorSignExtend16, U128, U128 ) OPCODE(VectorSignExtend16, U128, U128 )