IR: Remove VectorShuffleWords
Introduce VectorRotateWholeVectorRight
This commit is contained in:
parent
8fb37e0e4f
commit
f92cb5e66f
9 changed files with 41 additions and 40 deletions
|
@ -1321,6 +1321,15 @@ void EmitIR<IR::Opcode::VectorReduceAdd64>(oaknut::CodeGenerator& code, EmitCont
|
||||||
EmitReduce<64>(code, ctx, inst, [&](auto& Dresult, auto Voperand) { code.ADDP(Dresult, Voperand); });
|
EmitReduce<64>(code, ctx, inst, [&](auto& Dresult, auto Voperand) { code.ADDP(Dresult, Voperand); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void EmitIR<IR::Opcode::VectorRotateWholeVectorRight>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitImmShift<8>(code, ctx, inst, [&](auto Vresult, auto Voperand, u8 shift_amount) {
|
||||||
|
ASSERT(shift_amount % 8 == 0);
|
||||||
|
const u8 ext_imm = (shift_amount % 128) / 8;
|
||||||
|
code.EXT(Vresult, Voperand, Voperand, ext_imm);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
void EmitIR<IR::Opcode::VectorRoundingHalvingAddS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
void EmitIR<IR::Opcode::VectorRoundingHalvingAddS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpArranged<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SRHADD(Vresult, Va, Vb); });
|
EmitThreeOpArranged<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SRHADD(Vresult, Va, Vb); });
|
||||||
|
@ -1391,14 +1400,6 @@ void EmitIR<IR::Opcode::VectorRoundingShiftLeftU64>(oaknut::CodeGenerator& code,
|
||||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.URSHL(Vresult, Va, Vb); });
|
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.URSHL(Vresult, Va, Vb); });
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
|
||||||
void EmitIR<IR::Opcode::VectorShuffleWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
(void)code;
|
|
||||||
(void)ctx;
|
|
||||||
(void)inst;
|
|
||||||
ASSERT_FALSE("Unimplemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
void EmitIR<IR::Opcode::VectorSignExtend8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
void EmitIR<IR::Opcode::VectorSignExtend8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitTwoOpArrangedWiden<8>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.SXTL(Vresult, Voperand); });
|
EmitTwoOpArrangedWiden<8>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.SXTL(Vresult, Voperand); });
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <bit>
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
@ -3320,6 +3321,20 @@ void EmitX64::EmitVectorReduceAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, data);
|
ctx.reg_alloc.DefineValue(inst, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorRotateWholeVectorRight(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const u8 shift_amount = args[1].GetImmediateU8();
|
||||||
|
ASSERT(shift_amount % 32 == 0);
|
||||||
|
const u8 shuffle_imm = std::rotr<u8>(0b11100100, shift_amount / 32 * 2);
|
||||||
|
|
||||||
|
code.pshufd(result, operand, shuffle_imm);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
@ -3501,22 +3516,6 @@ void EmitX64::EmitVectorRoundingShiftLeftU64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
static void VectorShuffleImpl(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx&, const Xbyak::Operand&, u8)) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
|
||||||
const u8 mask = args[1].GetImmediateU8();
|
|
||||||
|
|
||||||
(code.*fn)(result, operand, mask);
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitVectorShuffleWords(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshufd);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||||
|
|
|
@ -103,7 +103,7 @@ bool TranslatorVisitor::ZIP2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
|
||||||
|
|
||||||
// TODO: Urgh.
|
// TODO: Urgh.
|
||||||
const IR::U128 interleaved = ir.VectorInterleaveLower(esize, operand1, operand2);
|
const IR::U128 interleaved = ir.VectorInterleaveLower(esize, operand1, operand2);
|
||||||
return ir.VectorZeroUpper(ir.VectorShuffleWords(interleaved, 0b01001110));
|
return ir.VectorZeroUpper(ir.VectorRotateWholeVectorRight(interleaved, 64));
|
||||||
}();
|
}();
|
||||||
|
|
||||||
V(datasize, Vd, result);
|
V(datasize, Vd, result);
|
||||||
|
|
|
@ -39,7 +39,7 @@ IR::U128 SHA1HashUpdate(IREmitter& ir, Vec Vm, Vec Vn, Vec Vd, SHA1HashUpdateFun
|
||||||
|
|
||||||
// Move each 32-bit element to the left once
|
// Move each 32-bit element to the left once
|
||||||
// e.g. [3, 2, 1, 0], becomes [2, 1, 0, 3]
|
// e.g. [3, 2, 1, 0], becomes [2, 1, 0, 3]
|
||||||
const IR::U128 shuffled_x = ir.VectorShuffleWords(x, 0b10010011);
|
const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96);
|
||||||
x = ir.VectorSetElement(32, shuffled_x, 0, y);
|
x = ir.VectorSetElement(32, shuffled_x, 0, y);
|
||||||
y = high_x;
|
y = high_x;
|
||||||
}
|
}
|
||||||
|
@ -91,7 +91,7 @@ bool TranslatorVisitor::SHA1SU1(Vec Vn, Vec Vd) {
|
||||||
const IR::U128 n = ir.GetQ(Vn);
|
const IR::U128 n = ir.GetQ(Vn);
|
||||||
|
|
||||||
// Shuffle down the whole vector and zero out the top 32 bits
|
// Shuffle down the whole vector and zero out the top 32 bits
|
||||||
const IR::U128 shuffled_n = ir.VectorSetElement(32, ir.VectorShuffleWords(n, 0b00111001), 3, ir.Imm32(0));
|
const IR::U128 shuffled_n = ir.VectorSetElement(32, ir.VectorRotateWholeVectorRight(n, 32), 3, ir.Imm32(0));
|
||||||
const IR::U128 t = ir.VectorEor(d, shuffled_n);
|
const IR::U128 t = ir.VectorEor(d, shuffled_n);
|
||||||
const IR::U128 rotated_t = ir.VectorRotateLeft(32, t, 1);
|
const IR::U128 rotated_t = ir.VectorRotateLeft(32, t, 1);
|
||||||
|
|
||||||
|
|
|
@ -141,7 +141,7 @@ IR::U128 SM4Hash(IREmitter& ir, Vec Vn, Vec Vd, SM4RotationType type) {
|
||||||
const IR::U32 intval_low_word = ir.VectorGetElement(32, intval_vec, 0);
|
const IR::U32 intval_low_word = ir.VectorGetElement(32, intval_vec, 0);
|
||||||
const IR::U32 round_result_low_word = ir.VectorGetElement(32, roundresult, 0);
|
const IR::U32 round_result_low_word = ir.VectorGetElement(32, roundresult, 0);
|
||||||
const IR::U32 intval = SM4Rotation(ir, intval_low_word, round_result_low_word, type);
|
const IR::U32 intval = SM4Rotation(ir, intval_low_word, round_result_low_word, type);
|
||||||
roundresult = ir.VectorShuffleWords(roundresult, 0b00111001);
|
roundresult = ir.VectorRotateWholeVectorRight(roundresult, 32);
|
||||||
roundresult = ir.VectorSetElement(32, roundresult, 3, intval);
|
roundresult = ir.VectorSetElement(32, roundresult, 3, intval);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -235,7 +235,7 @@ bool TranslatorVisitor::SM3PARTW1(Vec Vm, Vec Vn, Vec Vd) {
|
||||||
|
|
||||||
const IR::U128 result_low_three_words = [&] {
|
const IR::U128 result_low_three_words = [&] {
|
||||||
// Move the top-most 3 words down one element (i.e. [3, 2, 1, 0] -> [0, 3, 2, 1])
|
// Move the top-most 3 words down one element (i.e. [3, 2, 1, 0] -> [0, 3, 2, 1])
|
||||||
const IR::U128 shuffled_m = ir.VectorShuffleWords(m, 0b00111001);
|
const IR::U128 shuffled_m = ir.VectorRotateWholeVectorRight(m, 32);
|
||||||
|
|
||||||
// We treat the uppermost word as junk data and don't touch/use it explicitly for now.
|
// We treat the uppermost word as junk data and don't touch/use it explicitly for now.
|
||||||
// Given we don't do anything with it yet, the fact we EOR into it doesn't matter.
|
// Given we don't do anything with it yet, the fact we EOR into it doesn't matter.
|
||||||
|
|
|
@ -1695,6 +1695,11 @@ U128 IREmitter::VectorRotateRight(size_t esize, const U128& a, u8 amount) {
|
||||||
VectorLogicalShiftLeft(esize, a, static_cast<u8>(esize - amount)));
|
VectorLogicalShiftLeft(esize, a, static_cast<u8>(esize - amount)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorRotateWholeVectorRight(const U128& a, u8 amount) {
|
||||||
|
ASSERT(amount % 32 == 0);
|
||||||
|
return Inst<U128>(Opcode::VectorRotateWholeVectorRight, a, Imm8(amount));
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorRoundingHalvingAddSigned(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::VectorRoundingHalvingAddSigned(size_t esize, const U128& a, const U128& b) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
@ -1751,10 +1756,6 @@ U128 IREmitter::VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, con
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorShuffleWords(const U128& a, u8 mask) {
|
|
||||||
return Inst<U128>(Opcode::VectorShuffleWords, a, mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
U128 IREmitter::VectorSignExtend(size_t original_esize, const U128& a) {
|
U128 IREmitter::VectorSignExtend(size_t original_esize, const U128& a) {
|
||||||
switch (original_esize) {
|
switch (original_esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -291,11 +291,11 @@ public:
|
||||||
U128 VectorReduceAdd(size_t esize, const U128& a);
|
U128 VectorReduceAdd(size_t esize, const U128& a);
|
||||||
U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount);
|
U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount);
|
||||||
U128 VectorRotateRight(size_t esize, const U128& a, u8 amount);
|
U128 VectorRotateRight(size_t esize, const U128& a, u8 amount);
|
||||||
|
U128 VectorRotateWholeVectorRight(const U128& a, u8 amount);
|
||||||
U128 VectorRoundingHalvingAddSigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorRoundingHalvingAddSigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorRoundingHalvingAddUnsigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorRoundingHalvingAddUnsigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorRoundingShiftLeftSigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorRoundingShiftLeftSigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorShuffleWords(const U128& a, u8 mask);
|
|
||||||
U128 VectorSignExtend(size_t original_esize, const U128& a);
|
U128 VectorSignExtend(size_t original_esize, const U128& a);
|
||||||
U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
||||||
UpperAndLower VectorSignedMultiply(size_t esize, const U128& a, const U128& b);
|
UpperAndLower VectorSignedMultiply(size_t esize, const U128& a, const U128& b);
|
||||||
|
|
|
@ -465,6 +465,7 @@ OPCODE(VectorReduceAdd8, U128, U128
|
||||||
OPCODE(VectorReduceAdd16, U128, U128 )
|
OPCODE(VectorReduceAdd16, U128, U128 )
|
||||||
OPCODE(VectorReduceAdd32, U128, U128 )
|
OPCODE(VectorReduceAdd32, U128, U128 )
|
||||||
OPCODE(VectorReduceAdd64, U128, U128 )
|
OPCODE(VectorReduceAdd64, U128, U128 )
|
||||||
|
OPCODE(VectorRotateWholeVectorRight, U128, U128, U8 )
|
||||||
OPCODE(VectorRoundingHalvingAddS8, U128, U128, U128 )
|
OPCODE(VectorRoundingHalvingAddS8, U128, U128, U128 )
|
||||||
OPCODE(VectorRoundingHalvingAddS16, U128, U128, U128 )
|
OPCODE(VectorRoundingHalvingAddS16, U128, U128, U128 )
|
||||||
OPCODE(VectorRoundingHalvingAddS32, U128, U128, U128 )
|
OPCODE(VectorRoundingHalvingAddS32, U128, U128, U128 )
|
||||||
|
@ -479,7 +480,6 @@ OPCODE(VectorRoundingShiftLeftU8, U128, U128
|
||||||
OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 )
|
OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 )
|
||||||
OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 )
|
OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 )
|
||||||
OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 )
|
OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 )
|
||||||
OPCODE(VectorShuffleWords, U128, U128, U8 )
|
|
||||||
OPCODE(VectorSignExtend8, U128, U128 )
|
OPCODE(VectorSignExtend8, U128, U128 )
|
||||||
OPCODE(VectorSignExtend16, U128, U128 )
|
OPCODE(VectorSignExtend16, U128, U128 )
|
||||||
OPCODE(VectorSignExtend32, U128, U128 )
|
OPCODE(VectorSignExtend32, U128, U128 )
|
||||||
|
|
|
@ -45,7 +45,7 @@ void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
|
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
|
||||||
|
|
||||||
const IR::U128 lower_half = [&] {
|
const IR::U128 lower_half = [&] {
|
||||||
const IR::U128 T = ir.VectorShuffleWords(z, 0b01001110);
|
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64);
|
||||||
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
|
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
|
||||||
const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19);
|
const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19);
|
||||||
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10);
|
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10);
|
||||||
|
@ -61,8 +61,8 @@ void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
|
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
|
||||||
|
|
||||||
// Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2]
|
// Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2]
|
||||||
const IR::U128 shuffled_d = ir.VectorShuffleWords(x, 0b01001110);
|
const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64);
|
||||||
const IR::U128 shuffled_T0 = ir.VectorShuffleWords(T0, 0b01001110);
|
const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64);
|
||||||
|
|
||||||
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0));
|
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0));
|
||||||
return ir.VectorGetElement(64, tmp5, 0);
|
return ir.VectorGetElement(64, tmp5, 0);
|
||||||
|
@ -128,8 +128,8 @@ void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
const IR::U32 new_low_y = ir.Add(t, high_x);
|
const IR::U32 new_low_y = ir.Add(t, high_x);
|
||||||
|
|
||||||
// Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3]
|
// Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3]
|
||||||
const IR::U128 shuffled_x = ir.VectorShuffleWords(x, 0b10010011);
|
const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96);
|
||||||
const IR::U128 shuffled_y = ir.VectorShuffleWords(y, 0b10010011);
|
const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96);
|
||||||
|
|
||||||
x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x);
|
x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x);
|
||||||
y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y);
|
y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y);
|
||||||
|
|
Loading…
Reference in a new issue