shader_jit_a64_compiler: Improve Compile_SwizzleSrc (#7136)
This commit is contained in:
parent
fa08df21a5
commit
80213bf88f
1 changed files with 28 additions and 16 deletions
|
@ -257,28 +257,40 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
||||||
|
|
||||||
// Generate instructions for source register swizzling as needed
|
// Generate instructions for source register swizzling as needed
|
||||||
u8 sel = swiz.GetRawSelector(src_num);
|
u8 sel = swiz.GetRawSelector(src_num);
|
||||||
if (sel != NO_SRC_REG_SWIZZLE) {
|
switch (sel) {
|
||||||
|
case NO_SRC_REG_SWIZZLE:
|
||||||
|
// NOP
|
||||||
|
break;
|
||||||
|
case 0b00'00'00'00:
|
||||||
|
DUP(dest.S4(), dest.Selem()[0]);
|
||||||
|
break;
|
||||||
|
case 0b01'01'01'01:
|
||||||
|
DUP(dest.S4(), dest.Selem()[1]);
|
||||||
|
break;
|
||||||
|
case 0b10'10'10'10:
|
||||||
|
DUP(dest.S4(), dest.Selem()[2]);
|
||||||
|
break;
|
||||||
|
case 0b11'11'11'11:
|
||||||
|
DUP(dest.S4(), dest.Selem()[3]);
|
||||||
|
break;
|
||||||
|
default: {
|
||||||
const int table[] = {
|
const int table[] = {
|
||||||
((sel & 0b11'00'00'00) >> 6),
|
((sel & 0b11'00'00'00) >> 6),
|
||||||
((sel & 0b00'11'00'00) >> 4),
|
((sel & 0b00'11'00'00) >> 4),
|
||||||
((sel & 0b00'00'11'00) >> 2),
|
((sel & 0b00'00'11'00) >> 2),
|
||||||
((sel & 0b00'00'00'11) >> 0),
|
((sel & 0b00'00'00'11) >> 0),
|
||||||
};
|
};
|
||||||
|
MOV(VSCRATCH0.B16(), dest.B16());
|
||||||
// Generate table-vector
|
if (table[0] != 0)
|
||||||
MOV(XSCRATCH0.toW(), u32(0x03'02'01'00u + (table[0] * 0x04'04'04'04u)));
|
MOV(dest.Selem()[0], VSCRATCH0.Selem()[table[0]]);
|
||||||
MOV(VSCRATCH0.Selem()[0], XSCRATCH0.toW());
|
if (table[1] != 1)
|
||||||
|
MOV(dest.Selem()[1], VSCRATCH0.Selem()[table[1]]);
|
||||||
MOV(XSCRATCH0.toW(), u32(0x03'02'01'00u + (table[1] * 0x04'04'04'04u)));
|
if (table[2] != 2)
|
||||||
MOV(VSCRATCH0.Selem()[1], XSCRATCH0.toW());
|
MOV(dest.Selem()[2], VSCRATCH0.Selem()[table[2]]);
|
||||||
|
if (table[3] != 3)
|
||||||
MOV(XSCRATCH0.toW(), u32(0x03'02'01'00u + (table[2] * 0x04'04'04'04u)));
|
MOV(dest.Selem()[3], VSCRATCH0.Selem()[table[3]]);
|
||||||
MOV(VSCRATCH0.Selem()[2], XSCRATCH0.toW());
|
break;
|
||||||
|
}
|
||||||
MOV(XSCRATCH0.toW(), u32(0x03'02'01'00u + (table[3] * 0x04'04'04'04u)));
|
|
||||||
MOV(VSCRATCH0.Selem()[3], XSCRATCH0.toW());
|
|
||||||
|
|
||||||
TBL(dest.B16(), List{dest.B16()}, VSCRATCH0.B16());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the source register should be negated, flip the negative bit using XOR
|
// If the source register should be negated, flip the negative bit using XOR
|
||||||
|
|
Loading…
Reference in a new issue