IR: Add VectorAndNot IR instruction

And(a, Not(b)) is a common enough operation that this can
be fused into a single `AndNot` operation. On x64 this is also
a single `pandn` instruction rather than two.
This commit is contained in:
Wunkolo 2021-06-24 23:20:04 -07:00 committed by merry
parent 253713baf1
commit 49d00634f9
9 changed files with 27 additions and 10 deletions

View file

@ -439,6 +439,17 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pand);
}
void EmitX64::EmitVectorAndNot(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
code.pandn(xmm_b, xmm_a);
ctx.reg_alloc.DefineValue(inst, xmm_b);
}
static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
if (code.HasHostFeature(HostFeature::GFNI)) {
const u64 shift_matrix = shift_amount < 8

View file

@ -318,7 +318,7 @@ bool TranslatorVisitor::asimd_VAND_reg(bool D, size_t Vn, size_t Vd, bool N, boo
bool TranslatorVisitor::asimd_VBIC_reg(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
return BitwiseInstruction<false>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_n, const auto& reg_m) {
return ir.VectorAnd(reg_n, ir.VectorNot(reg_m));
return ir.VectorAndNot(reg_n, reg_m);
});
}
@ -342,19 +342,19 @@ bool TranslatorVisitor::asimd_VEOR_reg(bool D, size_t Vn, size_t Vd, bool N, boo
bool TranslatorVisitor::asimd_VBSL(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
return BitwiseInstruction<true>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) {
return ir.VectorOr(ir.VectorAnd(reg_n, reg_d), ir.VectorAnd(reg_m, ir.VectorNot(reg_d)));
return ir.VectorOr(ir.VectorAnd(reg_n, reg_d), ir.VectorAndNot(reg_m, reg_d));
});
}
bool TranslatorVisitor::asimd_VBIT(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
return BitwiseInstruction<true>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) {
return ir.VectorOr(ir.VectorAnd(reg_n, reg_m), ir.VectorAnd(reg_d, ir.VectorNot(reg_m)));
return ir.VectorOr(ir.VectorAnd(reg_n, reg_m), ir.VectorAndNot(reg_d, reg_m));
});
}
bool TranslatorVisitor::asimd_VBIF(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
return BitwiseInstruction<true>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) {
return ir.VectorOr(ir.VectorAnd(reg_d, reg_m), ir.VectorAnd(reg_n, ir.VectorNot(reg_m)));
return ir.VectorOr(ir.VectorAnd(reg_d, reg_m), ir.VectorAndNot(reg_n, reg_m));
});
}

View file

@ -177,7 +177,7 @@ bool TranslatorVisitor::asimd_VSRI(bool D, size_t imm6, size_t Vd, bool L, bool
const auto shifted = ir.VectorLogicalShiftRight(esize, reg_m, static_cast<u8>(shift_amount));
const auto mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
const auto result = ir.VectorOr(ir.VectorAnd(reg_d, ir.VectorNot(mask_vec)), shifted);
const auto result = ir.VectorOr(ir.VectorAndNot(reg_d, mask_vec), shifted);
ir.SetVector(d, result);
return true;
@ -203,7 +203,7 @@ bool TranslatorVisitor::asimd_VSLI(bool D, size_t imm6, size_t Vd, bool L, bool
const auto shifted = ir.VectorLogicalShiftLeft(esize, reg_m, static_cast<u8>(shift_amount));
const auto mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
const auto result = ir.VectorOr(ir.VectorAnd(reg_d, ir.VectorNot(mask_vec)), shifted);
const auto result = ir.VectorOr(ir.VectorAndNot(reg_d, mask_vec), shifted);
ir.SetVector(d, result);
return true;

View file

@ -23,7 +23,7 @@ bool TranslatorVisitor::BCAX(Vec Vm, Vec Va, Vec Vn, Vec Vd) {
const IR::U128 m = ir.GetQ(Vm);
const IR::U128 n = ir.GetQ(Vn);
const IR::U128 result = ir.VectorEor(n, ir.VectorAnd(m, ir.VectorNot(a)));
const IR::U128 result = ir.VectorEor(n, ir.VectorAndNot(m, a));
ir.SetQ(Vd, result);
return true;

View file

@ -350,7 +350,7 @@ bool TranslatorVisitor::SRI_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd)
const IR::U128 shifted = ir.VectorLogicalShiftRight(esize, operand1, shift_amount);
const IR::U128 mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
const IR::U128 result = ir.VectorOr(ir.VectorAnd(operand2, ir.VectorNot(mask_vec)), shifted);
const IR::U128 result = ir.VectorOr(ir.VectorAndNot(operand2, mask_vec), shifted);
V(datasize, Vd, result);
return true;
@ -376,7 +376,7 @@ bool TranslatorVisitor::SLI_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd)
const IR::U128 shifted = ir.VectorLogicalShiftLeft(esize, operand1, shift_amount);
const IR::U128 mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
const IR::U128 result = ir.VectorOr(ir.VectorAnd(operand2, ir.VectorNot(mask_vec)), shifted);
const IR::U128 result = ir.VectorOr(ir.VectorAndNot(operand2, mask_vec), shifted);
V(datasize, Vd, result);
return true;

View file

@ -773,7 +773,7 @@ bool TranslatorVisitor::BIC_asimd_reg(bool Q, Vec Vm, Vec Vn, Vec Vd) {
const IR::U128 operand1 = V(datasize, Vn);
const IR::U128 operand2 = V(datasize, Vm);
IR::U128 result = ir.VectorAnd(operand1, ir.VectorNot(operand2));
IR::U128 result = ir.VectorAndNot(operand1, operand2);
if (datasize == 64) {
result = ir.VectorZeroUpper(result);
}

View file

@ -958,6 +958,10 @@ U128 IREmitter::VectorAnd(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorAnd, a, b);
}
U128 IREmitter::VectorAndNot(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorAndNot, a, b);
}
U128 IREmitter::VectorArithmeticShiftRight(size_t esize, const U128& a, u8 shift_amount) {
switch (esize) {
case 8:

View file

@ -240,6 +240,7 @@ public:
U128 VectorAbs(size_t esize, const U128& a);
U128 VectorAdd(size_t esize, const U128& a, const U128& b);
U128 VectorAnd(const U128& a, const U128& b);
U128 VectorAndNot(const U128& a, const U128& b);
U128 VectorArithmeticShiftRight(size_t esize, const U128& a, u8 shift_amount);
U128 VectorArithmeticVShift(size_t esize, const U128& a, const U128& b);
U128 VectorBroadcast(size_t esize, const UAny& a);

View file

@ -291,6 +291,7 @@ OPCODE(VectorAdd16, U128, U128
OPCODE(VectorAdd32, U128, U128, U128 )
OPCODE(VectorAdd64, U128, U128, U128 )
OPCODE(VectorAnd, U128, U128, U128 )
OPCODE(VectorAndNot, U128, U128, U128 )
OPCODE(VectorArithmeticShiftRight8, U128, U128, U8 )
OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 )
OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 )