From 47c0ad0fc87a5448a55d6fb3735393fe99ed158b Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 13 Feb 2018 17:56:46 +0000 Subject: [PATCH] IR: Implement Vector{Max,Min}{Signed,Unsigned} --- src/backend_x64/emit_x64_vector.cpp | 115 ++++++++++++++++++++++++++++ src/frontend/ir/ir_emitter.cpp | 60 +++++++++++++++ src/frontend/ir/ir_emitter.h | 4 + src/frontend/ir/opcodes.inc | 16 ++++ 4 files changed, 195 insertions(+) diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 3ddc6419..c32edb08 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -4,6 +4,8 @@ * General Public License version 2 or any later version. */ +#include + #include "backend_x64/abi.h" #include "backend_x64/block_of_code.h" #include "backend_x64/emit_x64.h" @@ -650,6 +652,119 @@ void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void EmitX64::EmitVectorMaxS8(EmitContext& ctx, IR::Inst* inst) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); + return; + } + + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); + }); +} + +void EmitX64::EmitVectorMaxS16(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsw); +} + +void EmitX64::EmitVectorMaxS32(EmitContext& ctx, IR::Inst* inst) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsd); + return; + } + + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); + }); +} + +void EmitX64::EmitVectorMaxS64(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); + }); +} + +void EmitX64::EmitVectorMaxU8(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxub); +} + +void EmitX64::EmitVectorMaxU16(EmitContext& ctx, IR::Inst* inst) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxuw); + return; + } + + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); + }); +} + +void EmitX64::EmitVectorMaxU32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); + }); +} + +void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); + }); +} + +void EmitX64::EmitVectorMinS8(EmitContext& ctx, IR::Inst* inst) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); + return; + } + + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); + }); +} + +void EmitX64::EmitVectorMinS16(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsw); +} + +void EmitX64::EmitVectorMinS32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); + }); +} + +void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); + }); +} + +void EmitX64::EmitVectorMinU8(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminub); +} + +void EmitX64::EmitVectorMinU16(EmitContext& ctx, IR::Inst* inst) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminuw); + return; + } + + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); + }); +} + +void EmitX64::EmitVectorMinU32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); + }); +} + +void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); + }); +} + void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 6d4f890b..dc03df0b 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -932,6 +932,66 @@ U128 IREmitter::VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_am return {}; } +U128 IREmitter::VectorMaxSigned(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorMaxS8, a, b); + case 16: + return Inst(Opcode::VectorMaxS16, a, b); + case 32: + return Inst(Opcode::VectorMaxS32, a, b); + case 64: + return Inst(Opcode::VectorMaxS64, a, b); + } + UNREACHABLE(); + return {}; +} + +U128 IREmitter::VectorMaxUnsigned(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorMaxU8, a, b); + case 16: + return Inst(Opcode::VectorMaxU16, a, b); + case 32: + return Inst(Opcode::VectorMaxU32, a, b); + case 64: + return Inst(Opcode::VectorMaxU64, a, b); + } + UNREACHABLE(); + return {}; +} + +U128 IREmitter::VectorMinSigned(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorMinS8, a, b); + case 16: + return Inst(Opcode::VectorMinS16, a, b); + case 32: + return Inst(Opcode::VectorMinS32, a, b); + case 64: + return Inst(Opcode::VectorMinS64, a, b); + } + UNREACHABLE(); + return {}; +} + +U128 IREmitter::VectorMinUnsigned(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorMinU8, a, b); + case 16: + return Inst(Opcode::VectorMinU16, a, b); + case 32: + return Inst(Opcode::VectorMinU32, a, b); + case 64: + return Inst(Opcode::VectorMinU64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorMultiply(size_t esize, const U128& a, const U128& b) { switch (esize) { case 8: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 0f62ae7b..94d85173 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -219,6 +219,10 @@ public: U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b); U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount); U128 VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount); + U128 VectorMaxSigned(size_t esize, const U128& a, const U128& b); + U128 VectorMaxUnsigned(size_t esize, const U128& a, const U128& b); + U128 VectorMinSigned(size_t esize, const U128& a, const U128& b); + U128 VectorMinUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorMultiply(size_t esize, const U128& a, const U128& b); U128 VectorNarrow(size_t original_esize, const U128& a); U128 VectorNot(const U128& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index a7a1d4c3..28192680 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -242,6 +242,22 @@ OPCODE(VectorLogicalShiftRight8, T::U128, T::U128, T::U8 OPCODE(VectorLogicalShiftRight16, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftRight32, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftRight64, T::U128, T::U128, T::U8 ) +OPCODE(VectorMaxS8, T::U128, T::U128, T::U128 ) +OPCODE(VectorMaxS16, T::U128, T::U128, T::U128 ) +OPCODE(VectorMaxS32, T::U128, T::U128, T::U128 ) +OPCODE(VectorMaxS64, T::U128, T::U128, T::U128 ) +OPCODE(VectorMaxU8, T::U128, T::U128, T::U128 ) +OPCODE(VectorMaxU16, T::U128, T::U128, T::U128 ) +OPCODE(VectorMaxU32, T::U128, T::U128, T::U128 ) +OPCODE(VectorMaxU64, T::U128, T::U128, T::U128 ) +OPCODE(VectorMinS8, T::U128, T::U128, T::U128 ) +OPCODE(VectorMinS16, T::U128, T::U128, T::U128 ) +OPCODE(VectorMinS32, T::U128, T::U128, T::U128 ) +OPCODE(VectorMinS64, T::U128, T::U128, T::U128 ) +OPCODE(VectorMinU8, T::U128, T::U128, T::U128 ) +OPCODE(VectorMinU16, T::U128, T::U128, T::U128 ) +OPCODE(VectorMinU32, T::U128, T::U128, T::U128 ) +OPCODE(VectorMinU64, T::U128, T::U128, T::U128 ) OPCODE(VectorMultiply8, T::U128, T::U128, T::U128 ) OPCODE(VectorMultiply16, T::U128, T::U128, T::U128 ) OPCODE(VectorMultiply32, T::U128, T::U128, T::U128 )