From e2358af5efac156704f49d3ad3610867af8c1a07 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Fri, 28 Sep 2018 21:12:17 +0100
Subject: [PATCH] abi: Emit AVX instructions where able

Smaller codesize.
---
 src/backend/x64/abi.cpp | 29 +++++++++++++++++++----------
 src/backend/x64/abi.h   | 14 ++++++++------
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/src/backend/x64/abi.cpp b/src/backend/x64/abi.cpp
index 7684110d..cf0cd8a2 100644
--- a/src/backend/x64/abi.cpp
+++ b/src/backend/x64/abi.cpp
@@ -20,6 +20,7 @@
 #include <xbyak.h>
 
 #include "backend/x64/abi.h"
+#include "backend/x64/block_of_code.h"
 #include "common/common_types.h"
 #include "common/iterator_util.h"
 
@@ -58,7 +59,7 @@ static FrameInfo CalculateFrameInfo(size_t num_gprs, size_t num_xmms, size_t fra
 }
 
 template<typename RegisterArrayT>
-void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) {
+void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size, const RegisterArrayT& regs) {
     using namespace Xbyak::util;
 
     const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
@@ -79,14 +80,18 @@ void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_si
     size_t xmm_offset = frame_info.xmm_offset;
     for (HostLoc xmm : regs) {
         if (HostLocIsXMM(xmm)) {
-            code.movaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm));
+            if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
+                code.vmovaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm));
+            } else {
+                code.movaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm));
+            }
             xmm_offset += XMM_SIZE;
         }
     }
 }
 
 template<typename RegisterArrayT>
-void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) {
+void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size, const RegisterArrayT& regs) {
     using namespace Xbyak::util;
 
     const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
@@ -97,7 +102,11 @@ void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_siz
     size_t xmm_offset = frame_info.xmm_offset;
     for (HostLoc xmm : regs) {
         if (HostLocIsXMM(xmm)) {
-            code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
+            if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
+                code.vmovaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
+            } else {
+                code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
+            }
             xmm_offset += XMM_SIZE;
         }
     }
@@ -113,29 +122,29 @@ void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_siz
     }
 }
 
-void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
+void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) {
     ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
 }
 
-void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
+void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) {
     ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
 }
 
-void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
+void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) {
     ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
 }
 
-void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
+void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) {
     ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
 }
 
-void ABI_PushCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception) {
+void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
     std::vector<HostLoc> regs;
     std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
     ABI_PushRegistersAndAdjustStack(code, 0, regs);
 }
 
-void ABI_PopCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception) {
+void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
     std::vector<HostLoc> regs;
     std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
     ABI_PopRegistersAndAdjustStack(code, 0, regs);
diff --git a/src/backend/x64/abi.h b/src/backend/x64/abi.h
index 5a3a6548..d53d8419 100644
--- a/src/backend/x64/abi.h
+++ b/src/backend/x64/abi.h
@@ -11,6 +11,8 @@
 
 namespace Dynarmic::BackendX64 {
 
+class BlockOfCode;
+
 #ifdef _WIN32
 
 constexpr HostLoc ABI_RETURN = HostLoc::RAX;
@@ -111,12 +113,12 @@ constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
 
 static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 31, "Invalid total number of registers");
 
-void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
-void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
-void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
-void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
+void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0);
+void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0);
+void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0);
+void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0);
 
-void ABI_PushCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception);
-void ABI_PopCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception);
+void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
+void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
 
 } // namespace Dynarmic::BackendX64