IR: Add AndNot{32,64} IR instruction
Also includes BMI1-acceleration for x64, when available
This commit is contained in:
parent
49d00634f9
commit
5971361160
14 changed files with 96 additions and 16 deletions
|
@ -1283,6 +1283,72 @@ void EmitX64::EmitAnd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitAndNot32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
if (!args[0].IsImmediate() && !args[1].IsImmediate() && code.HasHostFeature(HostFeature::BMI1)) {
|
||||||
|
Xbyak::Reg32 op_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||||
|
Xbyak::Reg32 op_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||||
|
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
code.andn(result, op_b, op_a);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Reg32 result;
|
||||||
|
if (args[1].IsImmediate()) {
|
||||||
|
result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
code.mov(result, u32(~args[1].GetImmediateU32()));
|
||||||
|
} else {
|
||||||
|
result = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||||
|
code.not_(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args[0].IsImmediate()) {
|
||||||
|
const u32 op_arg = args[0].GetImmediateU32();
|
||||||
|
code.and_(result, op_arg);
|
||||||
|
} else {
|
||||||
|
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[0]);
|
||||||
|
op_arg.setBit(32);
|
||||||
|
code.and_(result, *op_arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitAndNot64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
if (!args[0].IsImmediate() && !args[1].IsImmediate() && code.HasHostFeature(HostFeature::BMI1)) {
|
||||||
|
Xbyak::Reg64 op_a = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
|
Xbyak::Reg64 op_b = ctx.reg_alloc.UseGpr(args[1]);
|
||||||
|
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||||
|
code.andn(result, op_b, op_a);
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Reg64 result;
|
||||||
|
if (args[1].IsImmediate()) {
|
||||||
|
result = ctx.reg_alloc.ScratchGpr();
|
||||||
|
code.mov(result, ~args[1].GetImmediateU64());
|
||||||
|
} else {
|
||||||
|
result = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||||
|
code.not_(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args[0].FitsInImmediateS32()) {
|
||||||
|
const u32 op_arg = u32(args[0].GetImmediateS32());
|
||||||
|
code.and_(result, op_arg);
|
||||||
|
} else {
|
||||||
|
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[0]);
|
||||||
|
op_arg.setBit(64);
|
||||||
|
code.and_(result, *op_arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitEor32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitEor32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
|
|
@ -250,7 +250,7 @@ bool TranslatorVisitor::arm_BIC_imm(Cond cond, bool S, Reg n, Reg d, int rotate,
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto imm_carry = ArmExpandImm_C(rotate, imm8, ir.GetCFlag());
|
const auto imm_carry = ArmExpandImm_C(rotate, imm8, ir.GetCFlag());
|
||||||
const auto result = ir.And(ir.GetRegister(n), ir.Not(ir.Imm32(imm_carry.imm32)));
|
const auto result = ir.AndNot(ir.GetRegister(n), ir.Imm32(imm_carry.imm32));
|
||||||
if (d == Reg::PC) {
|
if (d == Reg::PC) {
|
||||||
if (S) {
|
if (S) {
|
||||||
// This is UNPREDICTABLE when in user-mode.
|
// This is UNPREDICTABLE when in user-mode.
|
||||||
|
@ -280,7 +280,7 @@ bool TranslatorVisitor::arm_BIC_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5
|
||||||
|
|
||||||
const auto carry_in = ir.GetCFlag();
|
const auto carry_in = ir.GetCFlag();
|
||||||
const auto shifted = EmitImmShift(ir.GetRegister(m), shift, imm5, carry_in);
|
const auto shifted = EmitImmShift(ir.GetRegister(m), shift, imm5, carry_in);
|
||||||
const auto result = ir.And(ir.GetRegister(n), ir.Not(shifted.result));
|
const auto result = ir.AndNot(ir.GetRegister(n), shifted.result);
|
||||||
if (d == Reg::PC) {
|
if (d == Reg::PC) {
|
||||||
if (S) {
|
if (S) {
|
||||||
// This is UNPREDICTABLE when in user-mode.
|
// This is UNPREDICTABLE when in user-mode.
|
||||||
|
@ -315,7 +315,7 @@ bool TranslatorVisitor::arm_BIC_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif
|
||||||
const auto shift_n = ir.LeastSignificantByte(ir.GetRegister(s));
|
const auto shift_n = ir.LeastSignificantByte(ir.GetRegister(s));
|
||||||
const auto carry_in = ir.GetCFlag();
|
const auto carry_in = ir.GetCFlag();
|
||||||
const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in);
|
const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in);
|
||||||
const auto result = ir.And(ir.GetRegister(n), ir.Not(shifted.result));
|
const auto result = ir.AndNot(ir.GetRegister(n), shifted.result);
|
||||||
|
|
||||||
ir.SetRegister(d, result);
|
ir.SetRegister(d, result);
|
||||||
if (S) {
|
if (S) {
|
||||||
|
|
|
@ -356,7 +356,7 @@ bool TranslatorVisitor::thumb16_MUL_reg(Reg n, Reg d_m) {
|
||||||
bool TranslatorVisitor::thumb16_BIC_reg(Reg m, Reg d_n) {
|
bool TranslatorVisitor::thumb16_BIC_reg(Reg m, Reg d_n) {
|
||||||
const Reg d = d_n;
|
const Reg d = d_n;
|
||||||
const Reg n = d_n;
|
const Reg n = d_n;
|
||||||
const auto result = ir.And(ir.GetRegister(n), ir.Not(ir.GetRegister(m)));
|
const auto result = ir.AndNot(ir.GetRegister(n), ir.GetRegister(m));
|
||||||
|
|
||||||
ir.SetRegister(d, result);
|
ir.SetRegister(d, result);
|
||||||
if (!ir.current_location.IT().IsInITBlock()) {
|
if (!ir.current_location.IT().IsInITBlock()) {
|
||||||
|
|
|
@ -45,7 +45,7 @@ bool TranslatorVisitor::thumb32_BIC_imm(Imm<1> i, bool S, Reg n, Imm<3> imm3, Re
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto imm_carry = ThumbExpandImm_C(i, imm3, imm8, ir.GetCFlag());
|
const auto imm_carry = ThumbExpandImm_C(i, imm3, imm8, ir.GetCFlag());
|
||||||
const auto result = ir.And(ir.GetRegister(n), ir.Not(ir.Imm32(imm_carry.imm32)));
|
const auto result = ir.AndNot(ir.GetRegister(n), ir.Imm32(imm_carry.imm32));
|
||||||
|
|
||||||
ir.SetRegister(d, result);
|
ir.SetRegister(d, result);
|
||||||
if (S) {
|
if (S) {
|
||||||
|
|
|
@ -45,7 +45,7 @@ bool TranslatorVisitor::thumb32_BIC_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag());
|
const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag());
|
||||||
const auto result = ir.And(ir.GetRegister(n), ir.Not(shifted.result));
|
const auto result = ir.AndNot(ir.GetRegister(n), shifted.result);
|
||||||
ir.SetRegister(d, result);
|
ir.SetRegister(d, result);
|
||||||
if (S) {
|
if (S) {
|
||||||
ir.SetNFlag(ir.MostSignificantBit(result));
|
ir.SetNFlag(ir.MostSignificantBit(result));
|
||||||
|
|
|
@ -128,8 +128,8 @@ bool TranslatorVisitor::BIC_shift(bool sf, Imm<2> shift, Reg Rm, Imm<6> imm6, Re
|
||||||
const u8 shift_amount = imm6.ZeroExtend<u8>();
|
const u8 shift_amount = imm6.ZeroExtend<u8>();
|
||||||
|
|
||||||
const auto operand1 = X(datasize, Rn);
|
const auto operand1 = X(datasize, Rn);
|
||||||
const auto operand2 = ir.Not(ShiftReg(datasize, Rm, shift, ir.Imm8(shift_amount)));
|
const auto operand2 = ShiftReg(datasize, Rm, shift, ir.Imm8(shift_amount));
|
||||||
const auto result = ir.And(operand1, operand2);
|
const auto result = ir.AndNot(operand1, operand2);
|
||||||
|
|
||||||
X(datasize, Rd, result);
|
X(datasize, Rd, result);
|
||||||
return true;
|
return true;
|
||||||
|
@ -225,8 +225,8 @@ bool TranslatorVisitor::BICS(bool sf, Imm<2> shift, Reg Rm, Imm<6> imm6, Reg Rn,
|
||||||
const u8 shift_amount = imm6.ZeroExtend<u8>();
|
const u8 shift_amount = imm6.ZeroExtend<u8>();
|
||||||
|
|
||||||
const auto operand1 = X(datasize, Rn);
|
const auto operand1 = X(datasize, Rn);
|
||||||
const auto operand2 = ir.Not(ShiftReg(datasize, Rm, shift, ir.Imm8(shift_amount)));
|
const auto operand2 = ShiftReg(datasize, Rm, shift, ir.Imm8(shift_amount));
|
||||||
const auto result = ir.And(operand1, operand2);
|
const auto result = ir.AndNot(operand1, operand2);
|
||||||
|
|
||||||
ir.SetNZCV(ir.NZCVFrom(result));
|
ir.SetNZCV(ir.NZCVFrom(result));
|
||||||
X(datasize, Rd, result);
|
X(datasize, Rd, result);
|
||||||
|
|
|
@ -65,7 +65,7 @@ bool SM3TT2(TranslatorVisitor& v, Vec Vm, Imm<2> imm2, Vec Vn, Vec Vd, SM3TTVari
|
||||||
return v.ir.Eor(after_low_d, v.ir.Eor(top_d, before_top_d));
|
return v.ir.Eor(after_low_d, v.ir.Eor(top_d, before_top_d));
|
||||||
}
|
}
|
||||||
const IR::U32 tmp1 = v.ir.And(top_d, before_top_d);
|
const IR::U32 tmp1 = v.ir.And(top_d, before_top_d);
|
||||||
const IR::U32 tmp2 = v.ir.And(v.ir.Not(top_d), after_low_d);
|
const IR::U32 tmp2 = v.ir.AndNot(after_low_d, top_d);
|
||||||
return v.ir.Or(tmp1, tmp2);
|
return v.ir.Or(tmp1, tmp2);
|
||||||
}();
|
}();
|
||||||
const IR::U32 final_tt2 = v.ir.Add(tt2, v.ir.Add(low_d, v.ir.Add(top_n, wj)));
|
const IR::U32 final_tt2 = v.ir.Add(tt2, v.ir.Add(low_d, v.ir.Add(top_n, wj)));
|
||||||
|
|
|
@ -156,7 +156,7 @@ bool ShiftAndInsert(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec
|
||||||
return v.ir.LogicalShiftLeft(operand1, v.ir.Imm8(shift_amount));
|
return v.ir.LogicalShiftLeft(operand1, v.ir.Imm8(shift_amount));
|
||||||
}();
|
}();
|
||||||
|
|
||||||
const IR::U64 result = v.ir.Or(v.ir.And(operand2, v.ir.Not(v.ir.Imm64(mask))), shifted);
|
const IR::U64 result = v.ir.Or(v.ir.AndNot(operand2, v.ir.Imm64(mask)), shifted);
|
||||||
v.V_scalar(esize, Vd, result);
|
v.V_scalar(esize, Vd, result);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,7 +50,7 @@ IR::U128 SHA512Hash(IREmitter& ir, Vec Vm, Vec Vn, Vec Vd, SHA512HashPart part)
|
||||||
const IR::U64 tmp1 = ir.And(a, b);
|
const IR::U64 tmp1 = ir.And(a, b);
|
||||||
|
|
||||||
if (part == SHA512HashPart::Part1) {
|
if (part == SHA512HashPart::Part1) {
|
||||||
const IR::U64 tmp2 = ir.And(ir.Not(a), c);
|
const IR::U64 tmp2 = ir.AndNot(c, a);
|
||||||
return ir.Eor(tmp1, tmp2);
|
return ir.Eor(tmp1, tmp2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ bool TranslatorVisitor::AXFlag() {
|
||||||
const IR::U32 v = ir.And(nzcv, ir.Imm32(0x10000000));
|
const IR::U32 v = ir.And(nzcv, ir.Imm32(0x10000000));
|
||||||
|
|
||||||
const IR::U32 new_z = ir.Or(ir.LogicalShiftLeft(v, ir.Imm8(2)), z);
|
const IR::U32 new_z = ir.Or(ir.LogicalShiftLeft(v, ir.Imm8(2)), z);
|
||||||
const IR::U32 new_c = ir.And(ir.And(c, ir.Not(ir.LogicalShiftLeft(v, ir.Imm8(1)))), ir.Imm32(0x20000000));
|
const IR::U32 new_c = ir.And(ir.AndNot(c, ir.LogicalShiftLeft(v, ir.Imm8(1))), ir.Imm32(0x20000000));
|
||||||
|
|
||||||
ir.SetNZCVRaw(ir.Or(new_z, new_c));
|
ir.SetNZCVRaw(ir.Or(new_z, new_c));
|
||||||
return true;
|
return true;
|
||||||
|
@ -27,8 +27,8 @@ bool TranslatorVisitor::XAFlag() {
|
||||||
const IR::U32 z = ir.And(nzcv, ir.Imm32(0x40000000));
|
const IR::U32 z = ir.And(nzcv, ir.Imm32(0x40000000));
|
||||||
const IR::U32 c = ir.And(nzcv, ir.Imm32(0x20000000));
|
const IR::U32 c = ir.And(nzcv, ir.Imm32(0x20000000));
|
||||||
|
|
||||||
const IR::U32 not_z = ir.And(ir.Not(z), ir.Imm32(0x40000000));
|
const IR::U32 not_z = ir.AndNot(ir.Imm32(0x40000000), z);
|
||||||
const IR::U32 not_c = ir.And(ir.Not(c), ir.Imm32(0x20000000));
|
const IR::U32 not_c = ir.AndNot(ir.Imm32(0x20000000), c);
|
||||||
|
|
||||||
const IR::U32 new_n = ir.And(ir.LogicalShiftLeft(not_c, ir.Imm8(2)),
|
const IR::U32 new_n = ir.And(ir.LogicalShiftLeft(not_c, ir.Imm8(2)),
|
||||||
ir.LogicalShiftLeft(not_z, ir.Imm8(1)));
|
ir.LogicalShiftLeft(not_z, ir.Imm8(1)));
|
||||||
|
|
|
@ -317,6 +317,15 @@ U32U64 IREmitter::And(const U32U64& a, const U32U64& b) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U32U64 IREmitter::AndNot(const U32U64& a, const U32U64& b) {
|
||||||
|
ASSERT(a.GetType() == b.GetType());
|
||||||
|
if (a.GetType() == Type::U32) {
|
||||||
|
return Inst<U32>(Opcode::AndNot32, a, b);
|
||||||
|
} else {
|
||||||
|
return Inst<U64>(Opcode::AndNot64, a, b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
U32U64 IREmitter::Eor(const U32U64& a, const U32U64& b) {
|
U32U64 IREmitter::Eor(const U32U64& a, const U32U64& b) {
|
||||||
ASSERT(a.GetType() == b.GetType());
|
ASSERT(a.GetType() == b.GetType());
|
||||||
if (a.GetType() == Type::U32) {
|
if (a.GetType() == Type::U32) {
|
||||||
|
|
|
@ -144,6 +144,7 @@ public:
|
||||||
U32U64 UnsignedDiv(const U32U64& a, const U32U64& b);
|
U32U64 UnsignedDiv(const U32U64& a, const U32U64& b);
|
||||||
U32U64 SignedDiv(const U32U64& a, const U32U64& b);
|
U32U64 SignedDiv(const U32U64& a, const U32U64& b);
|
||||||
U32U64 And(const U32U64& a, const U32U64& b);
|
U32U64 And(const U32U64& a, const U32U64& b);
|
||||||
|
U32U64 AndNot(const U32U64& a, const U32U64& b);
|
||||||
U32U64 Eor(const U32U64& a, const U32U64& b);
|
U32U64 Eor(const U32U64& a, const U32U64& b);
|
||||||
U32U64 Or(const U32U64& a, const U32U64& b);
|
U32U64 Or(const U32U64& a, const U32U64& b);
|
||||||
U32U64 Not(const U32U64& a);
|
U32U64 Not(const U32U64& a);
|
||||||
|
|
|
@ -566,6 +566,8 @@ bool Inst::MayGetNZCVFromOp() const {
|
||||||
case Opcode::Sub64:
|
case Opcode::Sub64:
|
||||||
case Opcode::And32:
|
case Opcode::And32:
|
||||||
case Opcode::And64:
|
case Opcode::And64:
|
||||||
|
case Opcode::AndNot32:
|
||||||
|
case Opcode::AndNot64:
|
||||||
case Opcode::Eor32:
|
case Opcode::Eor32:
|
||||||
case Opcode::Eor64:
|
case Opcode::Eor64:
|
||||||
case Opcode::Or32:
|
case Opcode::Or32:
|
||||||
|
|
|
@ -143,6 +143,8 @@ OPCODE(SignedDiv32, U32, U32,
|
||||||
OPCODE(SignedDiv64, U64, U64, U64 )
|
OPCODE(SignedDiv64, U64, U64, U64 )
|
||||||
OPCODE(And32, U32, U32, U32 )
|
OPCODE(And32, U32, U32, U32 )
|
||||||
OPCODE(And64, U64, U64, U64 )
|
OPCODE(And64, U64, U64, U64 )
|
||||||
|
OPCODE(AndNot32, U32, U32, U32 )
|
||||||
|
OPCODE(AndNot64, U64, U64, U64 )
|
||||||
OPCODE(Eor32, U32, U32, U32 )
|
OPCODE(Eor32, U32, U32, U32 )
|
||||||
OPCODE(Eor64, U64, U64, U64 )
|
OPCODE(Eor64, U64, U64, U64 )
|
||||||
OPCODE(Or32, U32, U32, U32 )
|
OPCODE(Or32, U32, U32, U32 )
|
||||||
|
|
Loading…
Reference in a new issue