emit_x64_vector: Reduce gf2p8affineqb
requirement to GFNI
Currently, every usage of `gf2p8affineqb` is guarded by the `AVX512F + AVX512VL + GFNI` requirement, when really we only need `GFNI` on its own. This will allow `GFNI`-only chips to get emit GFNI features without needing to have AVX512 as well. There _are_ chips in existance currently that strictly ship with GFNI and have no implementation of AVX1/AVX2/AVX512(and thus no VEX/EVEX encoding) such as Tremont(Lakefield) chips.
This commit is contained in:
parent
e47d0d11c3
commit
c157dfcc4c
1 changed files with 8 additions and 8 deletions
|
@ -440,11 +440,11 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
|
static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
|
||||||
if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
|
if (code.HasHostFeature(HostFeature::GFNI)) {
|
||||||
const u64 shift_matrix = shift_amount < 8
|
const u64 shift_matrix = shift_amount < 8
|
||||||
? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8))
|
? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8))
|
||||||
: 0x8080808080808080;
|
: 0x8080808080808080;
|
||||||
code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
|
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1472,9 +1472,9 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.pxor(result, result);
|
code.pxor(result, result);
|
||||||
} else if (shift_amount == 1) {
|
} else if (shift_amount == 1) {
|
||||||
code.paddb(result, result);
|
code.paddb(result, result);
|
||||||
} else if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
|
} else if (code.HasHostFeature(HostFeature::GFNI)) {
|
||||||
const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8);
|
const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8);
|
||||||
code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
|
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
|
||||||
} else {
|
} else {
|
||||||
const u64 replicand = (0xFFULL << shift_amount) & 0xFF;
|
const u64 replicand = (0xFFULL << shift_amount) & 0xFF;
|
||||||
const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());
|
const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());
|
||||||
|
@ -1529,9 +1529,9 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// Do nothing
|
// Do nothing
|
||||||
} else if (shift_amount >= 8) {
|
} else if (shift_amount >= 8) {
|
||||||
code.pxor(result, result);
|
code.pxor(result, result);
|
||||||
} else if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
|
} else if (code.HasHostFeature(HostFeature::GFNI)) {
|
||||||
const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8);
|
const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8);
|
||||||
code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
|
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
|
||||||
} else {
|
} else {
|
||||||
const u64 replicand = 0xFEULL >> shift_amount;
|
const u64 replicand = 0xFEULL >> shift_amount;
|
||||||
const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());
|
const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());
|
||||||
|
@ -2795,8 +2795,8 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
|
if (code.HasHostFeature(HostFeature::GFNI)) {
|
||||||
code.vgf2p8affineqb(data, data, code.MConst(xword_b, 0x8040201008040201), 0);
|
code.gf2p8affineqb(data, code.MConst(xword, 0x8040201008040201, 0x8040201008040201), 0);
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(high_nibble_reg, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));
|
code.movdqa(high_nibble_reg, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));
|
||||||
|
|
Loading…
Reference in a new issue