IR: Vector instructions now take esize argument in emitter

This commit is contained in:
MerryMage 2018-02-10 10:18:10 +00:00
parent 1d0cd95b23
commit d3a4e1efe2
7 changed files with 126 additions and 226 deletions

View file

@ -18,18 +18,7 @@ bool TranslatorVisitor::DUP_gen(bool Q, Imm<5> imm5, Reg Rn, Vec Vd) {
const IR::UAny element = X(esize, Rn); const IR::UAny element = X(esize, Rn);
const IR::U128 result = [&]{ const IR::U128 result = Q ? ir.VectorBroadcast(esize, element) : ir.VectorBroadcastLower(esize, element);
switch (esize) {
case 8:
return Q ? ir.VectorBroadcast8(element) : ir.VectorBroadcastLower8(element);
case 16:
return Q ? ir.VectorBroadcast16(element) : ir.VectorBroadcastLower16(element);
case 32:
return Q ? ir.VectorBroadcast32(element) : ir.VectorBroadcastLower32(element);
default:
return ir.VectorBroadcast64(element);
}
}();
V(datasize, Vd, result); V(datasize, Vd, result);

View file

@ -16,7 +16,7 @@ bool TranslatorVisitor::MOVI(bool Q, bool op, Imm<1> a, Imm<1> b, Imm<1> c, Imm<
// also FMOV (vector, immediate) when cmode == 0b1111 // also FMOV (vector, immediate) when cmode == 0b1111
const auto movi = [&]{ const auto movi = [&]{
u64 imm64 = AdvSIMDExpandImm(op, cmode, concatenate(a, b, c, d, e, f, g, h)); u64 imm64 = AdvSIMDExpandImm(op, cmode, concatenate(a, b, c, d, e, f, g, h));
const IR::U128 imm = datasize == 64 ? ir.ZeroExtendToQuad(ir.Imm64(imm64)) : ir.VectorBroadcast64(ir.Imm64(imm64)); const IR::U128 imm = datasize == 64 ? ir.ZeroExtendToQuad(ir.Imm64(imm64)) : ir.VectorBroadcast(64, ir.Imm64(imm64));
V(128, Vd, imm); V(128, Vd, imm);
return true; return true;
}; };
@ -24,7 +24,7 @@ bool TranslatorVisitor::MOVI(bool Q, bool op, Imm<1> a, Imm<1> b, Imm<1> c, Imm<
// MVNI // MVNI
const auto mvni = [&]{ const auto mvni = [&]{
u64 imm64 = ~AdvSIMDExpandImm(op, cmode, concatenate(a, b, c, d, e, f, g, h)); u64 imm64 = ~AdvSIMDExpandImm(op, cmode, concatenate(a, b, c, d, e, f, g, h));
const IR::U128 imm = datasize == 64 ? ir.ZeroExtendToQuad(ir.Imm64(imm64)) : ir.VectorBroadcast64(ir.Imm64(imm64)); const IR::U128 imm = datasize == 64 ? ir.ZeroExtendToQuad(ir.Imm64(imm64)) : ir.VectorBroadcast(64, ir.Imm64(imm64));
V(128, Vd, imm); V(128, Vd, imm);
return true; return true;
}; };
@ -32,7 +32,7 @@ bool TranslatorVisitor::MOVI(bool Q, bool op, Imm<1> a, Imm<1> b, Imm<1> c, Imm<
// ORR (vector, immediate) // ORR (vector, immediate)
const auto orr = [&]{ const auto orr = [&]{
u64 imm64 = AdvSIMDExpandImm(op, cmode, concatenate(a, b, c, d, e, f, g, h)); u64 imm64 = AdvSIMDExpandImm(op, cmode, concatenate(a, b, c, d, e, f, g, h));
const IR::U128 imm = datasize == 64 ? ir.ZeroExtendToQuad(ir.Imm64(imm64)) : ir.VectorBroadcast64(ir.Imm64(imm64)); const IR::U128 imm = datasize == 64 ? ir.ZeroExtendToQuad(ir.Imm64(imm64)) : ir.VectorBroadcast(64, ir.Imm64(imm64));
const IR::U128 operand = V(datasize, Vd); const IR::U128 operand = V(datasize, Vd);
const IR::U128 result = ir.VectorOr(operand, imm); const IR::U128 result = ir.VectorOr(operand, imm);
V(datasize, Vd, result); V(datasize, Vd, result);
@ -42,7 +42,7 @@ bool TranslatorVisitor::MOVI(bool Q, bool op, Imm<1> a, Imm<1> b, Imm<1> c, Imm<
// BIC (vector, immediate) // BIC (vector, immediate)
const auto bic = [&]{ const auto bic = [&]{
u64 imm64 = ~AdvSIMDExpandImm(op, cmode, concatenate(a, b, c, d, e, f, g, h)); u64 imm64 = ~AdvSIMDExpandImm(op, cmode, concatenate(a, b, c, d, e, f, g, h));
const IR::U128 imm = datasize == 64 ? ir.ZeroExtendToQuad(ir.Imm64(imm64)) : ir.VectorBroadcast64(ir.Imm64(imm64)); const IR::U128 imm = datasize == 64 ? ir.ZeroExtendToQuad(ir.Imm64(imm64)) : ir.VectorBroadcast(64, ir.Imm64(imm64));
const IR::U128 operand = V(datasize, Vd); const IR::U128 operand = V(datasize, Vd);
const IR::U128 result = ir.VectorAnd(operand, imm); const IR::U128 result = ir.VectorAnd(operand, imm);
V(datasize, Vd, result); V(datasize, Vd, result);

View file

@ -13,24 +13,12 @@ bool TranslatorVisitor::ZIP1(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return ReservedValue(); return ReservedValue();
} }
const size_t esize = 8 << size.ZeroExtend<size_t>();
const size_t datasize = Q ? 128 : 64; const size_t datasize = Q ? 128 : 64;
const IR::U128 result = [&] { const IR::U128 operand1 = V(datasize, Vn);
const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand2 = V(datasize, Vm);
const IR::U128 operand2 = V(datasize, Vm); const IR::U128 result = ir.VectorInterleaveLower(esize, operand1, operand2);
switch (size.ZeroExtend()) {
case 0b00:
return ir.VectorInterleaveLower8(operand1, operand2);
case 0b01:
return ir.VectorInterleaveLower16(operand1, operand2);
case 0b10:
return ir.VectorInterleaveLower32(operand1, operand2);
case 0b11:
default:
return ir.VectorInterleaveLower64(operand1, operand2);
}
}();
V(datasize, Vd, result); V(datasize, Vd, result);
return true; return true;

View file

@ -22,19 +22,7 @@ bool TranslatorVisitor::SHL_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd)
const u8 shift_amount = concatenate(immh, immb).ZeroExtend<u8>() - static_cast<u8>(esize); const u8 shift_amount = concatenate(immh, immb).ZeroExtend<u8>() - static_cast<u8>(esize);
const IR::U128 operand = V(datasize, Vn); const IR::U128 operand = V(datasize, Vn);
const IR::U128 result = [&]{ const IR::U128 result = ir.VectorLogicalShiftLeft(esize, operand, shift_amount);
switch (esize) {
case 8:
return ir.VectorLogicalShiftLeft8(operand, shift_amount);
case 16:
return ir.VectorLogicalShiftLeft16(operand, shift_amount);
case 32:
return ir.VectorLogicalShiftLeft32(operand, shift_amount);
case 64:
default:
return ir.VectorLogicalShiftLeft64(operand, shift_amount);
}
}();
V(datasize, Vd, result); V(datasize, Vd, result);
return true; return true;

View file

@ -16,18 +16,7 @@ bool TranslatorVisitor::ADD_vector(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd)
auto operand1 = V(datasize, Vn); auto operand1 = V(datasize, Vn);
auto operand2 = V(datasize, Vm); auto operand2 = V(datasize, Vm);
auto result = [&]{ auto result = ir.VectorAdd(esize, operand1, operand2);
switch (esize) {
case 8:
return ir.VectorAdd8(operand1, operand2);
case 16:
return ir.VectorAdd16(operand1, operand2);
case 32:
return ir.VectorAdd32(operand1, operand2);
default:
return ir.VectorAdd64(operand1, operand2);
}
}();
V(datasize, Vd, result); V(datasize, Vd, result);
@ -42,18 +31,7 @@ bool TranslatorVisitor::ADDP_vec(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand1 = V(datasize, Vn);
const IR::U128 operand2 = V(datasize, Vm); const IR::U128 operand2 = V(datasize, Vm);
const IR::U128 result = [&]{ const IR::U128 result = Q ? ir.VectorPairedAdd(esize, operand1, operand2) : ir.VectorPairedAddLower(esize, operand1, operand2);
switch (esize) {
case 8:
return Q ? ir.VectorPairedAdd8(operand1, operand2) : ir.VectorPairedAddLower8(operand1, operand2);
case 16:
return Q ? ir.VectorPairedAdd16(operand1, operand2) : ir.VectorPairedAddLower16(operand1, operand2);
case 32:
return Q ? ir.VectorPairedAdd32(operand1, operand2) : ir.VectorPairedAddLower32(operand1, operand2);
default:
return ir.VectorPairedAdd64(operand1, operand2);
}
}();
V(datasize, Vd, result); V(datasize, Vd, result);
@ -128,18 +106,7 @@ bool TranslatorVisitor::CMEQ_reg_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd)
const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand1 = V(datasize, Vn);
const IR::U128 operand2 = V(datasize, Vm); const IR::U128 operand2 = V(datasize, Vm);
IR::U128 result = [&]{ IR::U128 result = ir.VectorEqual(esize, operand1, operand2);
switch (esize) {
case 8:
return ir.VectorEqual8(operand1, operand2);
case 16:
return ir.VectorEqual16(operand1, operand2);
case 32:
return ir.VectorEqual32(operand1, operand2);
default:
return ir.VectorEqual64(operand1, operand2);
}
}();
if (datasize == 64) { if (datasize == 64) {
result = ir.VectorZeroUpper(result); result = ir.VectorZeroUpper(result);
@ -170,8 +137,7 @@ bool TranslatorVisitor::BIF(bool Q, Vec Vm, Vec Vn, Vec Vd) {
auto operand4 = V(datasize, Vn); auto operand4 = V(datasize, Vn);
auto operand3 = ir.VectorNot(V(datasize, Vm)); auto operand3 = ir.VectorNot(V(datasize, Vm));
auto result = ir.VectorEor(operand1, auto result = ir.VectorEor(operand1, ir.VectorAnd(ir.VectorEor(operand1, operand4), operand3));
ir.VectorAnd(ir.VectorEor(operand1, operand4), operand3));
V(datasize, Vd, result); V(datasize, Vd, result);
@ -185,8 +151,7 @@ bool TranslatorVisitor::BIT(bool Q, Vec Vm, Vec Vn, Vec Vd) {
auto operand4 = V(datasize, Vn); auto operand4 = V(datasize, Vn);
auto operand3 = V(datasize, Vm); auto operand3 = V(datasize, Vm);
auto result = ir.VectorEor(operand1, auto result = ir.VectorEor(operand1, ir.VectorAnd(ir.VectorEor(operand1, operand4), operand3));
ir.VectorAnd(ir.VectorEor(operand1, operand4), operand3));
V(datasize, Vd, result); V(datasize, Vd, result);
@ -200,8 +165,7 @@ bool TranslatorVisitor::BSL(bool Q, Vec Vm, Vec Vn, Vec Vd) {
auto operand1 = V(datasize, Vm); auto operand1 = V(datasize, Vm);
auto operand3 = V(datasize, Vd); auto operand3 = V(datasize, Vd);
auto result = ir.VectorEor(operand1, auto result = ir.VectorEor(operand1, ir.VectorAnd(ir.VectorEor(operand1, operand4), operand3));
ir.VectorAnd(ir.VectorEor(operand1, operand4), operand3));
V(datasize, Vd, result); V(datasize, Vd, result);

View file

@ -785,108 +785,102 @@ U128 IREmitter::VectorSetElement(size_t esize, const U128& a, size_t index, cons
} }
} }
U128 IREmitter::VectorAdd8(const U128& a, const U128& b) { U128 IREmitter::VectorAdd(size_t esize, const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorAdd8, a, b); switch (esize) {
} case 8:
return Inst<U128>(Opcode::VectorAdd8, a, b);
U128 IREmitter::VectorAdd16(const U128& a, const U128& b) { case 16:
return Inst<U128>(Opcode::VectorAdd16, a, b); return Inst<U128>(Opcode::VectorAdd16, a, b);
} case 32:
return Inst<U128>(Opcode::VectorAdd32, a, b);
U128 IREmitter::VectorAdd32(const U128& a, const U128& b) { case 64:
return Inst<U128>(Opcode::VectorAdd32, a, b); return Inst<U128>(Opcode::VectorAdd64, a, b);
} }
UNREACHABLE();
U128 IREmitter::VectorAdd64(const U128& a, const U128& b) { return {};
return Inst<U128>(Opcode::VectorAdd64, a, b);
} }
U128 IREmitter::VectorAnd(const U128& a, const U128& b) { U128 IREmitter::VectorAnd(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorAnd, a, b); return Inst<U128>(Opcode::VectorAnd, a, b);
} }
U128 IREmitter::VectorBroadcastLower8(const U8& a) { U128 IREmitter::VectorBroadcastLower(size_t esize, const UAny& a) {
return Inst<U128>(Opcode::VectorBroadcastLower8, a); switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorBroadcastLower8, U8(a));
case 16:
return Inst<U128>(Opcode::VectorBroadcastLower16, U16(a));
case 32:
return Inst<U128>(Opcode::VectorBroadcastLower32, U32(a));
}
UNREACHABLE();
return {};
} }
U128 IREmitter::VectorBroadcastLower16(const U16& a) { U128 IREmitter::VectorBroadcast(size_t esize, const UAny& a) {
return Inst<U128>(Opcode::VectorBroadcastLower16, a); switch (esize) {
} case 8:
return Inst<U128>(Opcode::VectorBroadcast8, U8(a));
U128 IREmitter::VectorBroadcastLower32(const U32& a) { case 16:
return Inst<U128>(Opcode::VectorBroadcastLower32, a); return Inst<U128>(Opcode::VectorBroadcast16, U16(a));
} case 32:
return Inst<U128>(Opcode::VectorBroadcast32, U32(a));
U128 IREmitter::VectorBroadcast8(const U8& a) { case 64:
return Inst<U128>(Opcode::VectorBroadcast8, a); return Inst<U128>(Opcode::VectorBroadcast64, U64(a));
} }
UNREACHABLE();
U128 IREmitter::VectorBroadcast16(const U16& a) { return {};
return Inst<U128>(Opcode::VectorBroadcast16, a);
}
U128 IREmitter::VectorBroadcast32(const U32& a) {
return Inst<U128>(Opcode::VectorBroadcast32, a);
}
U128 IREmitter::VectorBroadcast64(const U64& a) {
return Inst<U128>(Opcode::VectorBroadcast64, a);
} }
U128 IREmitter::VectorEor(const U128& a, const U128& b) { U128 IREmitter::VectorEor(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorEor, a, b); return Inst<U128>(Opcode::VectorEor, a, b);
} }
U128 IREmitter::VectorEqual8(const U128& a, const U128& b) { U128 IREmitter::VectorEqual(size_t esize, const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorEqual8, a, b); switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorEqual8, a, b);
case 16:
return Inst<U128>(Opcode::VectorEqual16, a, b);
case 32:
return Inst<U128>(Opcode::VectorEqual32, a, b);
case 64:
return Inst<U128>(Opcode::VectorEqual64, a, b);
case 128:
return Inst<U128>(Opcode::VectorEqual128, a, b);
}
UNREACHABLE();
return {};
} }
U128 IREmitter::VectorEqual16(const U128& a, const U128& b) { U128 IREmitter::VectorInterleaveLower(size_t esize, const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorEqual16, a, b); switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorInterleaveLower8, a, b);
case 16:
return Inst<U128>(Opcode::VectorInterleaveLower16, a, b);
case 32:
return Inst<U128>(Opcode::VectorInterleaveLower32, a, b);
case 64:
return Inst<U128>(Opcode::VectorInterleaveLower64, a, b);
}
UNREACHABLE();
return {};
} }
U128 IREmitter::VectorEqual32(const U128& a, const U128& b) { U128 IREmitter::VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount) {
return Inst<U128>(Opcode::VectorEqual32, a, b); switch (esize) {
} case 8:
return Inst<U128>(Opcode::VectorLogicalShiftLeft8, a, Imm8(shift_amount));
U128 IREmitter::VectorEqual64(const U128& a, const U128& b) { case 16:
return Inst<U128>(Opcode::VectorEqual64, a, b); return Inst<U128>(Opcode::VectorLogicalShiftLeft16, a, Imm8(shift_amount));
} case 32:
return Inst<U128>(Opcode::VectorLogicalShiftLeft32, a, Imm8(shift_amount));
U128 IREmitter::VectorEqual128(const U128& a, const U128& b) { case 64:
return Inst<U128>(Opcode::VectorEqual128, a, b); return Inst<U128>(Opcode::VectorLogicalShiftLeft64, a, Imm8(shift_amount));
} }
UNREACHABLE();
U128 IREmitter::VectorInterleaveLower8(const U128& a, const U128& b) { return {};
return Inst<U128>(Opcode::VectorInterleaveLower8, a, b);
}
U128 IREmitter::VectorInterleaveLower16(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorInterleaveLower16, a, b);
}
U128 IREmitter::VectorInterleaveLower32(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorInterleaveLower32, a, b);
}
U128 IREmitter::VectorInterleaveLower64(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorInterleaveLower64, a, b);
}
U128 IREmitter::VectorLogicalShiftLeft8(const U128& a, u8 shift_amount) {
return Inst<U128>(Opcode::VectorLogicalShiftLeft8, a, Imm8(shift_amount));
}
U128 IREmitter::VectorLogicalShiftLeft16(const U128& a, u8 shift_amount) {
return Inst<U128>(Opcode::VectorLogicalShiftLeft16, a, Imm8(shift_amount));
}
U128 IREmitter::VectorLogicalShiftLeft32(const U128& a, u8 shift_amount) {
return Inst<U128>(Opcode::VectorLogicalShiftLeft32, a, Imm8(shift_amount));
}
U128 IREmitter::VectorLogicalShiftLeft64(const U128& a, u8 shift_amount) {
return Inst<U128>(Opcode::VectorLogicalShiftLeft64, a, Imm8(shift_amount));
} }
U128 IREmitter::VectorNot(const U128& a) { U128 IREmitter::VectorNot(const U128& a) {
@ -897,32 +891,32 @@ U128 IREmitter::VectorOr(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorOr, a, b); return Inst<U128>(Opcode::VectorOr, a, b);
} }
U128 IREmitter::VectorPairedAddLower8(const U128& a, const U128& b) { U128 IREmitter::VectorPairedAddLower(size_t esize, const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorPairedAddLower8, a, b); switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorPairedAddLower8, a, b);
case 16:
return Inst<U128>(Opcode::VectorPairedAddLower16, a, b);
case 32:
return Inst<U128>(Opcode::VectorPairedAddLower32, a, b);
}
UNREACHABLE();
return {};
} }
U128 IREmitter::VectorPairedAddLower16(const U128& a, const U128& b) { U128 IREmitter::VectorPairedAdd(size_t esize, const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorPairedAddLower16, a, b); switch (esize) {
} case 8:
return Inst<U128>(Opcode::VectorPairedAdd8, a, b);
U128 IREmitter::VectorPairedAddLower32(const U128& a, const U128& b) { case 16:
return Inst<U128>(Opcode::VectorPairedAddLower32, a, b); return Inst<U128>(Opcode::VectorPairedAdd16, a, b);
} case 32:
return Inst<U128>(Opcode::VectorPairedAdd32, a, b);
U128 IREmitter::VectorPairedAdd8(const U128& a, const U128& b) { case 64:
return Inst<U128>(Opcode::VectorPairedAdd8, a, b); return Inst<U128>(Opcode::VectorPairedAdd64, a, b);
} }
UNREACHABLE();
U128 IREmitter::VectorPairedAdd16(const U128& a, const U128& b) { return {};
return Inst<U128>(Opcode::VectorPairedAdd16, a, b);
}
U128 IREmitter::VectorPairedAdd32(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorPairedAdd32, a, b);
}
U128 IREmitter::VectorPairedAdd64(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorPairedAdd64, a, b);
} }
U128 IREmitter::VectorZeroUpper(const U128& a) { U128 IREmitter::VectorZeroUpper(const U128& a) {

View file

@ -207,41 +207,18 @@ public:
UAny VectorGetElement(size_t esize, const U128& a, size_t index); UAny VectorGetElement(size_t esize, const U128& a, size_t index);
U128 VectorSetElement(size_t esize, const U128& a, size_t index, const UAny& elem); U128 VectorSetElement(size_t esize, const U128& a, size_t index, const UAny& elem);
U128 VectorAdd16(const U128& a, const U128& b); U128 VectorAdd(size_t esize, const U128& a, const U128& b);
U128 VectorAdd32(const U128& a, const U128& b);
U128 VectorAdd64(const U128& a, const U128& b);
U128 VectorAdd8(const U128& a, const U128& b);
U128 VectorAnd(const U128& a, const U128& b); U128 VectorAnd(const U128& a, const U128& b);
U128 VectorBroadcast8(const U8& a); U128 VectorBroadcast(size_t esize, const UAny& a);
U128 VectorBroadcast16(const U16& a); U128 VectorBroadcastLower(size_t esize, const UAny& a);
U128 VectorBroadcast32(const U32& a);
U128 VectorBroadcast64(const U64& a);
U128 VectorBroadcastLower8(const U8& a);
U128 VectorBroadcastLower16(const U16& a);
U128 VectorBroadcastLower32(const U32& a);
U128 VectorEor(const U128& a, const U128& b); U128 VectorEor(const U128& a, const U128& b);
U128 VectorEqual8(const U128& a, const U128& b); U128 VectorEqual(size_t esize, const U128& a, const U128& b);
U128 VectorEqual16(const U128& a, const U128& b); U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b);
U128 VectorEqual32(const U128& a, const U128& b); U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount);
U128 VectorEqual64(const U128& a, const U128& b);
U128 VectorEqual128(const U128& a, const U128& b);
U128 VectorInterleaveLower8(const U128& a, const U128& b);
U128 VectorInterleaveLower16(const U128& a, const U128& b);
U128 VectorInterleaveLower32(const U128& a, const U128& b);
U128 VectorInterleaveLower64(const U128& a, const U128& b);
U128 VectorLogicalShiftLeft8(const U128& a, u8 shift_amount);
U128 VectorLogicalShiftLeft16(const U128& a, u8 shift_amount);
U128 VectorLogicalShiftLeft32(const U128& a, u8 shift_amount);
U128 VectorLogicalShiftLeft64(const U128& a, u8 shift_amount);
U128 VectorNot(const U128& a); U128 VectorNot(const U128& a);
U128 VectorOr(const U128& a, const U128& b); U128 VectorOr(const U128& a, const U128& b);
U128 VectorPairedAdd8(const U128& a, const U128& b); U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b);
U128 VectorPairedAdd16(const U128& a, const U128& b); U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b);
U128 VectorPairedAdd32(const U128& a, const U128& b);
U128 VectorPairedAdd64(const U128& a, const U128& b);
U128 VectorPairedAddLower8(const U128& a, const U128& b);
U128 VectorPairedAddLower16(const U128& a, const U128& b);
U128 VectorPairedAddLower32(const U128& a, const U128& b);
U128 VectorZeroUpper(const U128& a); U128 VectorZeroUpper(const U128& a);
U32U64 FPAbs(const U32U64& a); U32U64 FPAbs(const U32U64& a);