diff --git a/src/backend/x64/block_of_code.cpp b/src/backend/x64/block_of_code.cpp index 4d15eb73..bc3e627c 100644 --- a/src/backend/x64/block_of_code.cpp +++ b/src/backend/x64/block_of_code.cpp @@ -13,6 +13,7 @@ #include "backend/x64/block_of_code.h" #include "backend/x64/perf_map.h" #include "common/assert.h" +#include "common/bit_util.h" #ifdef _WIN32 #include @@ -364,7 +365,21 @@ bool BlockOfCode::HasBMI2() const { } bool BlockOfCode::HasFastBMI2() const { - return DoesCpuSupport(Xbyak::util::Cpu::tBMI2) && !DoesCpuSupport(Xbyak::util::Cpu::tAMD); + if (DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + // BMI2 instructions such as pdep and pext have been very slow up until Zen 3. + // Check for Zen 3 or newer by its family (0x19). + // See also: https://en.wikichip.org/wiki/amd/cpuid + if (DoesCpuSupport(Xbyak::util::Cpu::tAMD)) { + std::array data{}; + cpu_info.getCpuid(1, data.data()); + const u32 family_base = Common::Bits< 8, 11>(data[0]); + const u32 family_extended = Common::Bits<20, 27>(data[0]); + const u32 family = family_base + family_extended; + return family >= 0x19; + } + return true; + } + return false; } bool BlockOfCode::HasFMA() const {