diff --git a/src/backend_x64/block_of_code.cpp b/src/backend_x64/block_of_code.cpp index b13f6984..6db3f3bf 100644 --- a/src/backend_x64/block_of_code.cpp +++ b/src/backend_x64/block_of_code.cpp @@ -86,6 +86,12 @@ void BlockOfCode::GenRunCode() { align(); run_code = getCurr(); + // As we currently do not emit AVX instructions, AVX-SSE transition may occur. + // We avoid the transition penalty by calling vzeroupper. + if (DoesCpuSupport(Xbyak::util::Cpu::tAVX)) { + vzeroupper(); + } + // This serves two purposes: // 1. It saves all the registers we as a callee need to save. // 2. It aligns the stack so that the code the JIT emits can assume diff --git a/src/backend_x64/block_of_code.h b/src/backend_x64/block_of_code.h index fd0cf02f..e264f215 100644 --- a/src/backend_x64/block_of_code.h +++ b/src/backend_x64/block_of_code.h @@ -49,6 +49,12 @@ public: const u64 address = reinterpret_cast(fn); const u64 distance = address - (getCurr() + 5); + // As we do not know if user-code is AVX or SSE, an AVX-SSE transition may occur. + // We avoid the transition penalty by calling vzeroupper. + if (DoesCpuSupport(Xbyak::util::Cpu::tAVX)) { + vzeroupper(); + } + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call mov(rax, address);