IR: Implement Vector{Max,Min}{Signed,Unsigned}

This commit is contained in:
MerryMage 2018-02-13 17:56:46 +00:00
parent adb7f5f86f
commit 47c0ad0fc8
4 changed files with 195 additions and 0 deletions

View file

@ -4,6 +4,8 @@
* General Public License version 2 or any later version. * General Public License version 2 or any later version.
*/ */
#include <algorithm>
#include "backend_x64/abi.h" #include "backend_x64/abi.h"
#include "backend_x64/block_of_code.h" #include "backend_x64/block_of_code.h"
#include "backend_x64/emit_x64.h" #include "backend_x64/emit_x64.h"
@ -650,6 +652,119 @@ void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
void EmitX64::EmitVectorMaxS8(EmitContext& ctx, IR::Inst* inst) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb);
return;
}
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s8, 16>& result, const std::array<s8, 16>& a, const std::array<s8, 16>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
});
}
void EmitX64::EmitVectorMaxS16(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsw);
}
void EmitX64::EmitVectorMaxS32(EmitContext& ctx, IR::Inst* inst) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsd);
return;
}
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s32, 4>& result, const std::array<s32, 4>& a, const std::array<s32, 4>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
});
}
void EmitX64::EmitVectorMaxS64(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s64, 2>& result, const std::array<s64, 2>& a, const std::array<s64, 2>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
});
}
void EmitX64::EmitVectorMaxU8(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxub);
}
void EmitX64::EmitVectorMaxU16(EmitContext& ctx, IR::Inst* inst) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxuw);
return;
}
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u16, 8>& result, const std::array<u16, 8>& a, const std::array<u16, 8>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
});
}
void EmitX64::EmitVectorMaxU32(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u32, 4>& result, const std::array<u32, 4>& a, const std::array<u32, 4>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
});
}
void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u64, 2>& a, const std::array<u64, 2>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
});
}
void EmitX64::EmitVectorMinS8(EmitContext& ctx, IR::Inst* inst) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsb);
return;
}
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s8, 16>& result, const std::array<s8, 16>& a, const std::array<s8, 16>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
});
}
void EmitX64::EmitVectorMinS16(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsw);
}
void EmitX64::EmitVectorMinS32(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s32, 4>& result, const std::array<s32, 4>& a, const std::array<s32, 4>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
});
}
void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s64, 2>& result, const std::array<s64, 2>& a, const std::array<s64, 2>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
});
}
void EmitX64::EmitVectorMinU8(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminub);
}
void EmitX64::EmitVectorMinU16(EmitContext& ctx, IR::Inst* inst) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminuw);
return;
}
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u16, 8>& result, const std::array<u16, 8>& a, const std::array<u16, 8>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
});
}
void EmitX64::EmitVectorMinU32(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u32, 4>& result, const std::array<u32, 4>& a, const std::array<u32, 4>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
});
}
void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u64, 2>& a, const std::array<u64, 2>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
});
}
void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);

View file

@ -932,6 +932,66 @@ U128 IREmitter::VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_am
return {}; return {};
} }
U128 IREmitter::VectorMaxSigned(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorMaxS8, a, b);
case 16:
return Inst<U128>(Opcode::VectorMaxS16, a, b);
case 32:
return Inst<U128>(Opcode::VectorMaxS32, a, b);
case 64:
return Inst<U128>(Opcode::VectorMaxS64, a, b);
}
UNREACHABLE();
return {};
}
U128 IREmitter::VectorMaxUnsigned(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorMaxU8, a, b);
case 16:
return Inst<U128>(Opcode::VectorMaxU16, a, b);
case 32:
return Inst<U128>(Opcode::VectorMaxU32, a, b);
case 64:
return Inst<U128>(Opcode::VectorMaxU64, a, b);
}
UNREACHABLE();
return {};
}
U128 IREmitter::VectorMinSigned(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorMinS8, a, b);
case 16:
return Inst<U128>(Opcode::VectorMinS16, a, b);
case 32:
return Inst<U128>(Opcode::VectorMinS32, a, b);
case 64:
return Inst<U128>(Opcode::VectorMinS64, a, b);
}
UNREACHABLE();
return {};
}
U128 IREmitter::VectorMinUnsigned(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorMinU8, a, b);
case 16:
return Inst<U128>(Opcode::VectorMinU16, a, b);
case 32:
return Inst<U128>(Opcode::VectorMinU32, a, b);
case 64:
return Inst<U128>(Opcode::VectorMinU64, a, b);
}
UNREACHABLE();
return {};
}
U128 IREmitter::VectorMultiply(size_t esize, const U128& a, const U128& b) { U128 IREmitter::VectorMultiply(size_t esize, const U128& a, const U128& b) {
switch (esize) { switch (esize) {
case 8: case 8:

View file

@ -219,6 +219,10 @@ public:
U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b); U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b);
U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount); U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount);
U128 VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount); U128 VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount);
U128 VectorMaxSigned(size_t esize, const U128& a, const U128& b);
U128 VectorMaxUnsigned(size_t esize, const U128& a, const U128& b);
U128 VectorMinSigned(size_t esize, const U128& a, const U128& b);
U128 VectorMinUnsigned(size_t esize, const U128& a, const U128& b);
U128 VectorMultiply(size_t esize, const U128& a, const U128& b); U128 VectorMultiply(size_t esize, const U128& a, const U128& b);
U128 VectorNarrow(size_t original_esize, const U128& a); U128 VectorNarrow(size_t original_esize, const U128& a);
U128 VectorNot(const U128& a); U128 VectorNot(const U128& a);

View file

@ -242,6 +242,22 @@ OPCODE(VectorLogicalShiftRight8, T::U128, T::U128, T::U8
OPCODE(VectorLogicalShiftRight16, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftRight16, T::U128, T::U128, T::U8 )
OPCODE(VectorLogicalShiftRight32, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftRight32, T::U128, T::U128, T::U8 )
OPCODE(VectorLogicalShiftRight64, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftRight64, T::U128, T::U128, T::U8 )
OPCODE(VectorMaxS8, T::U128, T::U128, T::U128 )
OPCODE(VectorMaxS16, T::U128, T::U128, T::U128 )
OPCODE(VectorMaxS32, T::U128, T::U128, T::U128 )
OPCODE(VectorMaxS64, T::U128, T::U128, T::U128 )
OPCODE(VectorMaxU8, T::U128, T::U128, T::U128 )
OPCODE(VectorMaxU16, T::U128, T::U128, T::U128 )
OPCODE(VectorMaxU32, T::U128, T::U128, T::U128 )
OPCODE(VectorMaxU64, T::U128, T::U128, T::U128 )
OPCODE(VectorMinS8, T::U128, T::U128, T::U128 )
OPCODE(VectorMinS16, T::U128, T::U128, T::U128 )
OPCODE(VectorMinS32, T::U128, T::U128, T::U128 )
OPCODE(VectorMinS64, T::U128, T::U128, T::U128 )
OPCODE(VectorMinU8, T::U128, T::U128, T::U128 )
OPCODE(VectorMinU16, T::U128, T::U128, T::U128 )
OPCODE(VectorMinU32, T::U128, T::U128, T::U128 )
OPCODE(VectorMinU64, T::U128, T::U128, T::U128 )
OPCODE(VectorMultiply8, T::U128, T::U128, T::U128 ) OPCODE(VectorMultiply8, T::U128, T::U128, T::U128 )
OPCODE(VectorMultiply16, T::U128, T::U128, T::U128 ) OPCODE(VectorMultiply16, T::U128, T::U128, T::U128 )
OPCODE(VectorMultiply32, T::U128, T::U128, T::U128 ) OPCODE(VectorMultiply32, T::U128, T::U128, T::U128 )