emit_x64_vector: Simplify fpsr_qc related code
Move the bool conversion into A64JitState::GetFpsr so we don't have to continuously pay the cost of conversion for every saturation instruction.
This commit is contained in:
parent
112cff9ab9
commit
a7c66d2d28
2 changed files with 16 additions and 73 deletions
|
@ -106,7 +106,7 @@ u32 A64JitState::GetFpsr() const {
|
||||||
fpsr |= FPSCR_IDC;
|
fpsr |= FPSCR_IDC;
|
||||||
fpsr |= FPSCR_UFC;
|
fpsr |= FPSCR_UFC;
|
||||||
fpsr |= fpsr_exc;
|
fpsr |= fpsr_exc;
|
||||||
fpsr |= (fpsr_qc & 1) << 27;
|
fpsr |= (fpsr_qc == 0 ? 0 : 1) << 27;
|
||||||
return fpsr;
|
return fpsr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2724,22 +2724,6 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
|
|
||||||
const u32 test_mask = [esize] {
|
|
||||||
switch (esize) {
|
|
||||||
case 8:
|
|
||||||
return 0b1111'1111'1111'1111;
|
|
||||||
case 16:
|
|
||||||
return 0b1010'1010'1010'1010;
|
|
||||||
case 32:
|
|
||||||
return 0b1000'1000'1000'1000;
|
|
||||||
case 64:
|
|
||||||
return 0b10000000'10000000;
|
|
||||||
default:
|
|
||||||
UNREACHABLE();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}();
|
|
||||||
|
|
||||||
const auto vector_equality = [esize, &code](const Xbyak::Xmm& x, const Xbyak::Xmm& y) {
|
const auto vector_equality = [esize, &code](const Xbyak::Xmm& x, const Xbyak::Xmm& y) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
@ -2786,10 +2770,7 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo
|
||||||
code.movdqa(sign, mask);
|
code.movdqa(sign, mask);
|
||||||
vector_equality(data_test, sign);
|
vector_equality(data_test, sign);
|
||||||
code.pmovmskb(bit, data_test);
|
code.pmovmskb(bit, data_test);
|
||||||
code.test(bit, test_mask);
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||||
code.setnz(bit.cvt8());
|
|
||||||
|
|
||||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit.cvt8());
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, data);
|
ctx.reg_alloc.DefineValue(inst, data);
|
||||||
}
|
}
|
||||||
|
@ -2914,11 +2895,9 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext&
|
||||||
|
|
||||||
// Check if any saturation occurred (i.e. if any halfwords in x were
|
// Check if any saturation occurred (i.e. if any halfwords in x were
|
||||||
// 0x8000 before saturating
|
// 0x8000 before saturating
|
||||||
const Xbyak::Reg64 mask = ctx.reg_alloc.ScratchGpr();
|
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
code.pmovmskb(mask, tmp);
|
code.pmovmskb(mask, tmp);
|
||||||
code.test(mask.cvt32(), 0b1010'1010'1010'1010);
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask);
|
||||||
code.setnz(mask.cvt8());
|
|
||||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask.cvt8());
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, x);
|
ctx.reg_alloc.DefineValue(inst, x);
|
||||||
}
|
}
|
||||||
|
@ -2958,11 +2937,9 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext&
|
||||||
|
|
||||||
// Check if any saturation occurred (i.e. if any words in x were
|
// Check if any saturation occurred (i.e. if any words in x were
|
||||||
// 0x80000000 before saturating
|
// 0x80000000 before saturating
|
||||||
const Xbyak::Reg64 mask = ctx.reg_alloc.ScratchGpr();
|
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
code.pmovmskb(mask, tmp2);
|
code.pmovmskb(mask, tmp2);
|
||||||
code.test(mask.cvt32(), 0b1000'1000'1000'1000);
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask);
|
||||||
code.setnz(mask.cvt8());
|
|
||||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask.cvt8());
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, x);
|
ctx.reg_alloc.DefineValue(inst, x);
|
||||||
}
|
}
|
||||||
|
@ -2998,18 +2975,10 @@ static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, Block
|
||||||
}
|
}
|
||||||
|
|
||||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
code.pcmpeqd(reconstructed, src);
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
code.movmskps(bit, reconstructed);
|
||||||
code.pxor(reconstructed, src);
|
code.xor_(bit, 0b1111);
|
||||||
code.ptest(reconstructed, reconstructed);
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||||
} else {
|
|
||||||
code.pcmpeqd(reconstructed, src);
|
|
||||||
code.movmskps(bit, reconstructed);
|
|
||||||
code.cmp(bit, 0xF);
|
|
||||||
}
|
|
||||||
|
|
||||||
code.setnz(bit.cvt8());
|
|
||||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit.cvt8());
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, dest);
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
}
|
}
|
||||||
|
@ -3062,18 +3031,10 @@ static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, Blo
|
||||||
}
|
}
|
||||||
|
|
||||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
code.pcmpeqd(reconstructed, src);
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
code.movmskps(bit, reconstructed);
|
||||||
code.pxor(reconstructed, src);
|
code.xor_(bit, 0b1111);
|
||||||
code.ptest(reconstructed, reconstructed);
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||||
} else {
|
|
||||||
code.pcmpeqd(reconstructed, src);
|
|
||||||
code.movmskps(bit, reconstructed);
|
|
||||||
code.cmp(bit, 0xF);
|
|
||||||
}
|
|
||||||
|
|
||||||
code.setnz(bit.cvt8());
|
|
||||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit.cvt8());
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, dest);
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
}
|
}
|
||||||
|
@ -3133,22 +3094,6 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
|
|
||||||
const u32 test_mask = [esize] {
|
|
||||||
switch (esize) {
|
|
||||||
case 8:
|
|
||||||
return 0b1111'1111'1111'1111;
|
|
||||||
case 16:
|
|
||||||
return 0b1010'1010'1010'1010;
|
|
||||||
case 32:
|
|
||||||
return 0b1000'1000'1000'1000;
|
|
||||||
case 64:
|
|
||||||
return 0b10000000'10000000;
|
|
||||||
default:
|
|
||||||
UNREACHABLE();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}();
|
|
||||||
|
|
||||||
const auto vector_equality = [esize, &code](const Xbyak::Xmm& x, const auto& y) {
|
const auto vector_equality = [esize, &code](const Xbyak::Xmm& x, const auto& y) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
@ -3189,11 +3134,9 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if any elements matched the mask prior to performing saturation. If so, set the Q bit.
|
// Check if any elements matched the mask prior to performing saturation. If so, set the Q bit.
|
||||||
const Xbyak::Reg64 bit = ctx.reg_alloc.ScratchGpr();
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
code.pmovmskb(bit, tmp);
|
code.pmovmskb(bit, tmp);
|
||||||
code.test(bit.cvt32(), test_mask);
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||||
code.setnz(bit.cvt8());
|
|
||||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit.cvt8());
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, zero);
|
ctx.reg_alloc.DefineValue(inst, zero);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue