A64: Add opcodes for signed saturating negations

This commit is contained in:
Lioncash 2018-09-06 15:50:25 -04:00 committed by MerryMage
parent f1ebbcd7bc
commit fca7eddb9e
5 changed files with 145 additions and 0 deletions

View file

@ -2981,6 +2981,127 @@ void EmitX64::EmitVectorSignedSaturatedNarrowToUnsigned64(EmitContext& ctx, IR::
});
}
static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm data = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Address mask = [esize, &code] {
switch (esize) {
case 8:
return code.MConst(xword, 0x8080808080808080, 0x8080808080808080);
case 16:
return code.MConst(xword, 0x8000800080008000, 0x8000800080008000);
case 32:
return code.MConst(xword, 0x8000000080000000, 0x8000000080000000);
case 64:
return code.MConst(xword, 0x8000000000000000, 0x8000000000000000);
default:
UNREACHABLE();
return Xbyak::Address{0};
}
}();
const u32 test_mask = [esize] {
switch (esize) {
case 8:
return 0b1111'1111'1111'1111;
case 16:
return 0b1010'1010'1010'1010;
case 32:
return 0b1000'1000'1000'1000;
case 64:
return 0b10000000'10000000;
default:
UNREACHABLE();
return 0;
}
}();
const auto vector_equality = [esize, &code](const Xbyak::Xmm& x, const auto& y) {
switch (esize) {
case 8:
code.pcmpeqb(x, y);
break;
case 16:
code.pcmpeqw(x, y);
break;
case 32:
code.pcmpeqd(x, y);
break;
case 64:
code.pcmpeqq(x, y);
break;
}
};
code.movdqa(tmp, data);
vector_equality(tmp, mask);
// Perform negation
code.pxor(zero, zero);
switch (esize) {
case 8:
code.psubsb(zero, data);
break;
case 16:
code.psubsw(zero, data);
break;
case 32:
code.psubd(zero, data);
code.pxor(zero, tmp);
break;
case 64:
code.psubq(zero, data);
code.pxor(zero, tmp);
break;
}
// Check if any elements matched the mask prior to performing saturation. If so, set the Q bit.
const Xbyak::Reg64 bit = ctx.reg_alloc.ScratchGpr();
code.pmovmskb(bit, tmp);
code.test(bit.cvt32(), test_mask);
code.setnz(bit.cvt8());
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit.cvt8());
ctx.reg_alloc.DefineValue(inst, zero);
}
void EmitX64::EmitVectorSignedSaturatedNeg8(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturatedNeg(8, code, ctx, inst);
}
void EmitX64::EmitVectorSignedSaturatedNeg16(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturatedNeg(16, code, ctx, inst);
}
void EmitX64::EmitVectorSignedSaturatedNeg32(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturatedNeg(32, code, ctx, inst);
}
void EmitX64::EmitVectorSignedSaturatedNeg64(EmitContext& ctx, IR::Inst* inst) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
EmitVectorSignedSaturatedNeg(64, code, ctx, inst);
return;
}
EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<s64>& result, const VectorArray<s64>& data) {
bool qc_flag = false;
for (size_t i = 0; i < result.size(); i++) {
if (static_cast<u64>(data[i]) == 0x8000000000000000) {
result[i] = 0x7FFFFFFFFFFFFFFF;
qc_flag = true;
} else {
result[i] = -data[i];
}
}
return qc_flag;
});
}
void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb);
}

View file

@ -1566,6 +1566,21 @@ U128 IREmitter::VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, con
return {};
}
U128 IREmitter::VectorSignedSaturatedNeg(size_t esize, const U128& a) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorSignedSaturatedNeg8, a);
case 16:
return Inst<U128>(Opcode::VectorSignedSaturatedNeg16, a);
case 32:
return Inst<U128>(Opcode::VectorSignedSaturatedNeg32, a);
case 64:
return Inst<U128>(Opcode::VectorSignedSaturatedNeg64, a);
}
UNREACHABLE();
return {};
}
U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:

View file

@ -268,6 +268,7 @@ public:
U128 VectorSignedSaturatedDoublingMultiplyReturnHigh(size_t esize, const U128& a, const U128& b);
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
U128 VectorSignedSaturatedNeg(size_t esize, const U128& a);
U128 VectorSub(size_t esize, const U128& a, const U128& b);
Table VectorTable(std::vector<U128> values);
U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices);

View file

@ -359,6 +359,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
case Opcode::VectorSignedSaturatedNarrowToUnsigned64:
case Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh16:
case Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh32:
case Opcode::VectorSignedSaturatedNeg8:
case Opcode::VectorSignedSaturatedNeg16:
case Opcode::VectorSignedSaturatedNeg32:
case Opcode::VectorSignedSaturatedNeg64:
case Opcode::VectorUnsignedSaturatedNarrow16:
case Opcode::VectorUnsignedSaturatedNarrow32:
case Opcode::VectorUnsignedSaturatedNarrow64:

View file

@ -405,6 +405,10 @@ OPCODE(VectorSignedSaturatedNarrowToSigned64, U128, U128
OPCODE(VectorSignedSaturatedNarrowToUnsigned16, U128, U128 )
OPCODE(VectorSignedSaturatedNarrowToUnsigned32, U128, U128 )
OPCODE(VectorSignedSaturatedNarrowToUnsigned64, U128, U128 )
OPCODE(VectorSignedSaturatedNeg8, U128, U128 )
OPCODE(VectorSignedSaturatedNeg16, U128, U128 )
OPCODE(VectorSignedSaturatedNeg32, U128, U128 )
OPCODE(VectorSignedSaturatedNeg64, U128, U128 )
OPCODE(VectorSub8, U128, U128, U128 )
OPCODE(VectorSub16, U128, U128, U128 )
OPCODE(VectorSub32, U128, U128, U128 )