IR: Add VectorAndNot IR instruction
And(a, Not(b)) is a common enough operation that this can be fused into a single `AndNot` operation. On x64 this is also a single `pandn` instruction rather than two.
This commit is contained in:
parent
253713baf1
commit
49d00634f9
9 changed files with 27 additions and 10 deletions
|
@ -439,6 +439,17 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pand);
|
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pand);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorAndNot(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
code.pandn(xmm_b, xmm_a);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, xmm_b);
|
||||||
|
}
|
||||||
|
|
||||||
static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
|
static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
|
||||||
if (code.HasHostFeature(HostFeature::GFNI)) {
|
if (code.HasHostFeature(HostFeature::GFNI)) {
|
||||||
const u64 shift_matrix = shift_amount < 8
|
const u64 shift_matrix = shift_amount < 8
|
||||||
|
|
|
@ -318,7 +318,7 @@ bool TranslatorVisitor::asimd_VAND_reg(bool D, size_t Vn, size_t Vd, bool N, boo
|
||||||
|
|
||||||
bool TranslatorVisitor::asimd_VBIC_reg(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
|
bool TranslatorVisitor::asimd_VBIC_reg(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
|
||||||
return BitwiseInstruction<false>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_n, const auto& reg_m) {
|
return BitwiseInstruction<false>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_n, const auto& reg_m) {
|
||||||
return ir.VectorAnd(reg_n, ir.VectorNot(reg_m));
|
return ir.VectorAndNot(reg_n, reg_m);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -342,19 +342,19 @@ bool TranslatorVisitor::asimd_VEOR_reg(bool D, size_t Vn, size_t Vd, bool N, boo
|
||||||
|
|
||||||
bool TranslatorVisitor::asimd_VBSL(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
|
bool TranslatorVisitor::asimd_VBSL(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
|
||||||
return BitwiseInstruction<true>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) {
|
return BitwiseInstruction<true>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) {
|
||||||
return ir.VectorOr(ir.VectorAnd(reg_n, reg_d), ir.VectorAnd(reg_m, ir.VectorNot(reg_d)));
|
return ir.VectorOr(ir.VectorAnd(reg_n, reg_d), ir.VectorAndNot(reg_m, reg_d));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::asimd_VBIT(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
|
bool TranslatorVisitor::asimd_VBIT(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
|
||||||
return BitwiseInstruction<true>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) {
|
return BitwiseInstruction<true>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) {
|
||||||
return ir.VectorOr(ir.VectorAnd(reg_n, reg_m), ir.VectorAnd(reg_d, ir.VectorNot(reg_m)));
|
return ir.VectorOr(ir.VectorAnd(reg_n, reg_m), ir.VectorAndNot(reg_d, reg_m));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::asimd_VBIF(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
|
bool TranslatorVisitor::asimd_VBIF(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
|
||||||
return BitwiseInstruction<true>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) {
|
return BitwiseInstruction<true>(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) {
|
||||||
return ir.VectorOr(ir.VectorAnd(reg_d, reg_m), ir.VectorAnd(reg_n, ir.VectorNot(reg_m)));
|
return ir.VectorOr(ir.VectorAnd(reg_d, reg_m), ir.VectorAndNot(reg_n, reg_m));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -177,7 +177,7 @@ bool TranslatorVisitor::asimd_VSRI(bool D, size_t imm6, size_t Vd, bool L, bool
|
||||||
|
|
||||||
const auto shifted = ir.VectorLogicalShiftRight(esize, reg_m, static_cast<u8>(shift_amount));
|
const auto shifted = ir.VectorLogicalShiftRight(esize, reg_m, static_cast<u8>(shift_amount));
|
||||||
const auto mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
|
const auto mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
|
||||||
const auto result = ir.VectorOr(ir.VectorAnd(reg_d, ir.VectorNot(mask_vec)), shifted);
|
const auto result = ir.VectorOr(ir.VectorAndNot(reg_d, mask_vec), shifted);
|
||||||
|
|
||||||
ir.SetVector(d, result);
|
ir.SetVector(d, result);
|
||||||
return true;
|
return true;
|
||||||
|
@ -203,7 +203,7 @@ bool TranslatorVisitor::asimd_VSLI(bool D, size_t imm6, size_t Vd, bool L, bool
|
||||||
|
|
||||||
const auto shifted = ir.VectorLogicalShiftLeft(esize, reg_m, static_cast<u8>(shift_amount));
|
const auto shifted = ir.VectorLogicalShiftLeft(esize, reg_m, static_cast<u8>(shift_amount));
|
||||||
const auto mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
|
const auto mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
|
||||||
const auto result = ir.VectorOr(ir.VectorAnd(reg_d, ir.VectorNot(mask_vec)), shifted);
|
const auto result = ir.VectorOr(ir.VectorAndNot(reg_d, mask_vec), shifted);
|
||||||
|
|
||||||
ir.SetVector(d, result);
|
ir.SetVector(d, result);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -23,7 +23,7 @@ bool TranslatorVisitor::BCAX(Vec Vm, Vec Va, Vec Vn, Vec Vd) {
|
||||||
const IR::U128 m = ir.GetQ(Vm);
|
const IR::U128 m = ir.GetQ(Vm);
|
||||||
const IR::U128 n = ir.GetQ(Vn);
|
const IR::U128 n = ir.GetQ(Vn);
|
||||||
|
|
||||||
const IR::U128 result = ir.VectorEor(n, ir.VectorAnd(m, ir.VectorNot(a)));
|
const IR::U128 result = ir.VectorEor(n, ir.VectorAndNot(m, a));
|
||||||
|
|
||||||
ir.SetQ(Vd, result);
|
ir.SetQ(Vd, result);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -350,7 +350,7 @@ bool TranslatorVisitor::SRI_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd)
|
||||||
|
|
||||||
const IR::U128 shifted = ir.VectorLogicalShiftRight(esize, operand1, shift_amount);
|
const IR::U128 shifted = ir.VectorLogicalShiftRight(esize, operand1, shift_amount);
|
||||||
const IR::U128 mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
|
const IR::U128 mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
|
||||||
const IR::U128 result = ir.VectorOr(ir.VectorAnd(operand2, ir.VectorNot(mask_vec)), shifted);
|
const IR::U128 result = ir.VectorOr(ir.VectorAndNot(operand2, mask_vec), shifted);
|
||||||
|
|
||||||
V(datasize, Vd, result);
|
V(datasize, Vd, result);
|
||||||
return true;
|
return true;
|
||||||
|
@ -376,7 +376,7 @@ bool TranslatorVisitor::SLI_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd)
|
||||||
|
|
||||||
const IR::U128 shifted = ir.VectorLogicalShiftLeft(esize, operand1, shift_amount);
|
const IR::U128 shifted = ir.VectorLogicalShiftLeft(esize, operand1, shift_amount);
|
||||||
const IR::U128 mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
|
const IR::U128 mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
|
||||||
const IR::U128 result = ir.VectorOr(ir.VectorAnd(operand2, ir.VectorNot(mask_vec)), shifted);
|
const IR::U128 result = ir.VectorOr(ir.VectorAndNot(operand2, mask_vec), shifted);
|
||||||
|
|
||||||
V(datasize, Vd, result);
|
V(datasize, Vd, result);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -773,7 +773,7 @@ bool TranslatorVisitor::BIC_asimd_reg(bool Q, Vec Vm, Vec Vn, Vec Vd) {
|
||||||
const IR::U128 operand1 = V(datasize, Vn);
|
const IR::U128 operand1 = V(datasize, Vn);
|
||||||
const IR::U128 operand2 = V(datasize, Vm);
|
const IR::U128 operand2 = V(datasize, Vm);
|
||||||
|
|
||||||
IR::U128 result = ir.VectorAnd(operand1, ir.VectorNot(operand2));
|
IR::U128 result = ir.VectorAndNot(operand1, operand2);
|
||||||
if (datasize == 64) {
|
if (datasize == 64) {
|
||||||
result = ir.VectorZeroUpper(result);
|
result = ir.VectorZeroUpper(result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -958,6 +958,10 @@ U128 IREmitter::VectorAnd(const U128& a, const U128& b) {
|
||||||
return Inst<U128>(Opcode::VectorAnd, a, b);
|
return Inst<U128>(Opcode::VectorAnd, a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorAndNot(const U128& a, const U128& b) {
|
||||||
|
return Inst<U128>(Opcode::VectorAndNot, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorArithmeticShiftRight(size_t esize, const U128& a, u8 shift_amount) {
|
U128 IREmitter::VectorArithmeticShiftRight(size_t esize, const U128& a, u8 shift_amount) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -240,6 +240,7 @@ public:
|
||||||
U128 VectorAbs(size_t esize, const U128& a);
|
U128 VectorAbs(size_t esize, const U128& a);
|
||||||
U128 VectorAdd(size_t esize, const U128& a, const U128& b);
|
U128 VectorAdd(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorAnd(const U128& a, const U128& b);
|
U128 VectorAnd(const U128& a, const U128& b);
|
||||||
|
U128 VectorAndNot(const U128& a, const U128& b);
|
||||||
U128 VectorArithmeticShiftRight(size_t esize, const U128& a, u8 shift_amount);
|
U128 VectorArithmeticShiftRight(size_t esize, const U128& a, u8 shift_amount);
|
||||||
U128 VectorArithmeticVShift(size_t esize, const U128& a, const U128& b);
|
U128 VectorArithmeticVShift(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorBroadcast(size_t esize, const UAny& a);
|
U128 VectorBroadcast(size_t esize, const UAny& a);
|
||||||
|
|
|
@ -291,6 +291,7 @@ OPCODE(VectorAdd16, U128, U128
|
||||||
OPCODE(VectorAdd32, U128, U128, U128 )
|
OPCODE(VectorAdd32, U128, U128, U128 )
|
||||||
OPCODE(VectorAdd64, U128, U128, U128 )
|
OPCODE(VectorAdd64, U128, U128, U128 )
|
||||||
OPCODE(VectorAnd, U128, U128, U128 )
|
OPCODE(VectorAnd, U128, U128, U128 )
|
||||||
|
OPCODE(VectorAndNot, U128, U128, U128 )
|
||||||
OPCODE(VectorArithmeticShiftRight8, U128, U128, U8 )
|
OPCODE(VectorArithmeticShiftRight8, U128, U128, U8 )
|
||||||
OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 )
|
OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 )
|
||||||
OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 )
|
OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 )
|
||||||
|
|
Loading…
Reference in a new issue