From e06933f123627e6c8eececc05c91f4f16f51c194 Mon Sep 17 00:00:00 2001 From: Wunk Date: Sat, 27 Mar 2021 14:36:51 -0700 Subject: [PATCH] block_of_code: Allow Fast BMI2 paths on Zen 3 (#593) BMI2 instructions such as `pdep` and `pext` have been known to be incredibly slow on AMD. But on Zen3 and newer, the performance of these instructions are now much greater, but previous versions of AMD architectures should still avoid BMI2. On Zen 2, pdep/pext were 300 cycles. Now on Zen 3 it is 3 cycles. This is a big enough improvement to allow BMI2 code to be dispatched if available. The Zen 3 architecture is checked for by detecting the family of the processor. --- src/backend/x64/block_of_code.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/backend/x64/block_of_code.cpp b/src/backend/x64/block_of_code.cpp index 4d15eb73..bc3e627c 100644 --- a/src/backend/x64/block_of_code.cpp +++ b/src/backend/x64/block_of_code.cpp @@ -13,6 +13,7 @@ #include "backend/x64/block_of_code.h" #include "backend/x64/perf_map.h" #include "common/assert.h" +#include "common/bit_util.h" #ifdef _WIN32 #include @@ -364,7 +365,21 @@ bool BlockOfCode::HasBMI2() const { } bool BlockOfCode::HasFastBMI2() const { - return DoesCpuSupport(Xbyak::util::Cpu::tBMI2) && !DoesCpuSupport(Xbyak::util::Cpu::tAMD); + if (DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + // BMI2 instructions such as pdep and pext have been very slow up until Zen 3. + // Check for Zen 3 or newer by its family (0x19). + // See also: https://en.wikichip.org/wiki/amd/cpuid + if (DoesCpuSupport(Xbyak::util::Cpu::tAMD)) { + std::array data{}; + cpu_info.getCpuid(1, data.data()); + const u32 family_base = Common::Bits< 8, 11>(data[0]); + const u32 family_extended = Common::Bits<20, 27>(data[0]); + const u32 family = family_base + family_extended; + return family >= 0x19; + } + return true; + } + return false; } bool BlockOfCode::HasFMA() const {