diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index fc4d1ccf..3ddc6419 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -485,6 +485,31 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { } } +void EmitX64::EmitVectorGreaterS8(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpgtb); +} + +void EmitX64::EmitVectorGreaterS16(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpgtw); +} + +void EmitX64::EmitVectorGreaterS32(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpgtd); +} + +void EmitX64::EmitVectorGreaterS64(EmitContext& ctx, IR::Inst* inst) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpgtq); + return; + } + + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + for (size_t i = 0; i < 2; ++i) { + result[i] = (a[i] > b[i]) ? ~u64(0) : 0; + } + }); +} + static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 342815b1..6d4f890b 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -872,6 +872,21 @@ U128 IREmitter::VectorEqual(size_t esize, const U128& a, const U128& b) { return {}; } +U128 IREmitter::VectorGreaterSigned(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorGreaterS8, a, b); + case 16: + return Inst(Opcode::VectorGreaterS16, a, b); + case 32: + return Inst(Opcode::VectorGreaterS32, a, b); + case 64: + return Inst(Opcode::VectorGreaterS64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorInterleaveLower(size_t esize, const U128& a, const U128& b) { switch (esize) { case 8: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 1f94edff..0f62ae7b 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -215,6 +215,7 @@ public: U128 VectorBroadcastLower(size_t esize, const UAny& a); U128 VectorEor(const U128& a, const U128& b); U128 VectorEqual(size_t esize, const U128& a, const U128& b); + U128 VectorGreaterSigned(size_t esize, const U128& a, const U128& b); U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b); U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount); U128 VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 21a7003a..a7a1d4c3 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -226,6 +226,10 @@ OPCODE(VectorEqual16, T::U128, T::U128, T::U128 OPCODE(VectorEqual32, T::U128, T::U128, T::U128 ) OPCODE(VectorEqual64, T::U128, T::U128, T::U128 ) OPCODE(VectorEqual128, T::U128, T::U128, T::U128 ) +OPCODE(VectorGreaterS8, T::U128, T::U128, T::U128 ) +OPCODE(VectorGreaterS16, T::U128, T::U128, T::U128 ) +OPCODE(VectorGreaterS32, T::U128, T::U128, T::U128 ) +OPCODE(VectorGreaterS64, T::U128, T::U128, T::U128 ) OPCODE(VectorInterleaveLower8, T::U128, T::U128, T::U128 ) OPCODE(VectorInterleaveLower16, T::U128, T::U128, T::U128 ) OPCODE(VectorInterleaveLower32, T::U128, T::U128, T::U128 )