emit_x64_vector: Reduce gf2p8affineqb requirement to GFNI

Currently, every usage of `gf2p8affineqb` is guarded by the `AVX512F + AVX512VL + GFNI` requirement, when really we only need `GFNI` on its own. This will allow `GFNI`-only chips to get emit GFNI features without needing to have AVX512 as well. There _are_ chips in existance currently that strictly ship with GFNI and have no implementation of AVX1/AVX2/AVX512(and thus no VEX/EVEX encoding) such as Tremont(Lakefield) chips.
2021-06-07 16:48:52 -07:00 · 2021-06-07 16:48:52 -07:00 · c157dfcc4c
commit c157dfcc4c
parent e47d0d11c3
1 changed files with 8 additions and 8 deletions
--- a/src/dynarmic/backend/x64/emit_x64_vector.cpp
+++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp
@ -440,11 +440,11 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) {
 }
 static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
-    if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
+    if (code.HasHostFeature(HostFeature::GFNI)) {
        const u64 shift_matrix = shift_amount < 8
                                   ? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8))
                                   : 0x8080808080808080;
-        code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
+        code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
        return;
    }
@ -1472,9 +1472,9 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
        code.pxor(result, result);
    } else if (shift_amount == 1) {
        code.paddb(result, result);
-    } else if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
+    } else if (code.HasHostFeature(HostFeature::GFNI)) {
        const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8);
-        code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
+        code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
    } else {
        const u64 replicand = (0xFFULL << shift_amount) & 0xFF;
        const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());
@ -1529,9 +1529,9 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) {
        // Do nothing
    } else if (shift_amount >= 8) {
        code.pxor(result, result);
-    } else if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
+    } else if (code.HasHostFeature(HostFeature::GFNI)) {
        const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8);
-        code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
+        code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
    } else {
        const u64 replicand = 0xFEULL >> shift_amount;
        const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());
@ -2795,8 +2795,8 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) {
    const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
-    if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
+    if (code.HasHostFeature(HostFeature::GFNI)) {
-        code.vgf2p8affineqb(data, data, code.MConst(xword_b, 0x8040201008040201), 0);
+        code.gf2p8affineqb(data, code.MConst(xword, 0x8040201008040201, 0x8040201008040201), 0);
    } else {
        const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm();
        code.movdqa(high_nibble_reg, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));