From e2358af5efac156704f49d3ad3610867af8c1a07 Mon Sep 17 00:00:00 2001 From: MerryMage <MerryMage@users.noreply.github.com> Date: Fri, 28 Sep 2018 21:12:17 +0100 Subject: [PATCH] abi: Emit AVX instructions where able Smaller codesize. --- src/backend/x64/abi.cpp | 29 +++++++++++++++++++---------- src/backend/x64/abi.h | 14 ++++++++------ 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/backend/x64/abi.cpp b/src/backend/x64/abi.cpp index 7684110d..cf0cd8a2 100644 --- a/src/backend/x64/abi.cpp +++ b/src/backend/x64/abi.cpp @@ -20,6 +20,7 @@ #include <xbyak.h> #include "backend/x64/abi.h" +#include "backend/x64/block_of_code.h" #include "common/common_types.h" #include "common/iterator_util.h" @@ -58,7 +59,7 @@ static FrameInfo CalculateFrameInfo(size_t num_gprs, size_t num_xmms, size_t fra } template<typename RegisterArrayT> -void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) { +void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size, const RegisterArrayT& regs) { using namespace Xbyak::util; const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR); @@ -79,14 +80,18 @@ void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_si size_t xmm_offset = frame_info.xmm_offset; for (HostLoc xmm : regs) { if (HostLocIsXMM(xmm)) { - code.movaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm)); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) { + code.vmovaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm)); + } else { + code.movaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm)); + } xmm_offset += XMM_SIZE; } } } template<typename RegisterArrayT> -void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) { +void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size, const RegisterArrayT& regs) { using namespace Xbyak::util; const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR); @@ -97,7 +102,11 @@ void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_siz size_t xmm_offset = frame_info.xmm_offset; for (HostLoc xmm : regs) { if (HostLocIsXMM(xmm)) { - code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) { + code.vmovaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]); + } else { + code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]); + } xmm_offset += XMM_SIZE; } } @@ -113,29 +122,29 @@ void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_siz } } -void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) { +void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) { ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE); } -void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) { +void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) { ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE); } -void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) { +void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) { ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE); } -void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) { +void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) { ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE); } -void ABI_PushCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception) { +void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) { std::vector<HostLoc> regs; std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); ABI_PushRegistersAndAdjustStack(code, 0, regs); } -void ABI_PopCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception) { +void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) { std::vector<HostLoc> regs; std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); ABI_PopRegistersAndAdjustStack(code, 0, regs); diff --git a/src/backend/x64/abi.h b/src/backend/x64/abi.h index 5a3a6548..d53d8419 100644 --- a/src/backend/x64/abi.h +++ b/src/backend/x64/abi.h @@ -11,6 +11,8 @@ namespace Dynarmic::BackendX64 { +class BlockOfCode; + #ifdef _WIN32 constexpr HostLoc ABI_RETURN = HostLoc::RAX; @@ -111,12 +113,12 @@ constexpr size_t ABI_SHADOW_SPACE = 0; // bytes static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 31, "Invalid total number of registers"); -void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0); -void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0); -void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0); -void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0); +void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0); +void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0); +void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0); +void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0); -void ABI_PushCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception); -void ABI_PopCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception); +void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception); +void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception); } // namespace Dynarmic::BackendX64