IR: Implement Vector{Lower,}Broadcast{8,16,32,64}
This commit is contained in:
parent
8ee854232c
commit
793753bf63
4 changed files with 131 additions and 0 deletions
|
@ -205,5 +205,94 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorLowerBroadcast8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code->pxor(tmp, tmp);
|
||||
code->pshufb(a, tmp);
|
||||
code->movq(a, a);
|
||||
} else {
|
||||
code->punpcklbw(a, a);
|
||||
code->pshuflw(a, a, 0);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorLowerBroadcast16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code->pshuflw(a, a, 0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorLowerBroadcast32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code->pshuflw(a, a, 0b01000100);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code->pxor(tmp, tmp);
|
||||
code->pshufb(a, tmp);
|
||||
} else {
|
||||
code->punpcklbw(a, a);
|
||||
code->pshuflw(a, a, 0);
|
||||
code->punpcklqdq(a, a);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code->pshuflw(a, a, 0);
|
||||
code->punpcklqdq(a, a);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code->pshufd(a, a, 0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code->punpcklqdq(a, a);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
} // namespace BackendX64
|
||||
} // namespace Dynarmic
|
||||
|
|
|
@ -652,6 +652,34 @@ U128 IREmitter::VectorAnd(const U128& a, const U128& b) {
|
|||
return Inst<U128>(Opcode::VectorAnd, a, b);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorLowerBroadcast8(const U8& a) {
|
||||
return Inst<U128>(Opcode::VectorLowerBroadcast8, a);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorLowerBroadcast16(const U16& a) {
|
||||
return Inst<U128>(Opcode::VectorLowerBroadcast16, a);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorLowerBroadcast32(const U32& a) {
|
||||
return Inst<U128>(Opcode::VectorLowerBroadcast32, a);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorBroadcast8(const U8& a) {
|
||||
return Inst<U128>(Opcode::VectorBroadcast8, a);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorBroadcast16(const U16& a) {
|
||||
return Inst<U128>(Opcode::VectorBroadcast16, a);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorBroadcast32(const U32& a) {
|
||||
return Inst<U128>(Opcode::VectorBroadcast32, a);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorBroadcast64(const U64& a) {
|
||||
return Inst<U128>(Opcode::VectorBroadcast64, a);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorLowerPairedAdd8(const U128& a, const U128& b) {
|
||||
return Inst<U128>(Opcode::VectorLowerPairedAdd8, a, b);
|
||||
}
|
||||
|
|
|
@ -182,6 +182,13 @@ public:
|
|||
U128 VectorAdd32(const U128& a, const U128& b);
|
||||
U128 VectorAdd64(const U128& a, const U128& b);
|
||||
U128 VectorAnd(const U128& a, const U128& b);
|
||||
U128 VectorLowerBroadcast8(const U8& a);
|
||||
U128 VectorLowerBroadcast16(const U16& a);
|
||||
U128 VectorLowerBroadcast32(const U32& a);
|
||||
U128 VectorBroadcast8(const U8& a);
|
||||
U128 VectorBroadcast16(const U16& a);
|
||||
U128 VectorBroadcast32(const U32& a);
|
||||
U128 VectorBroadcast64(const U64& a);
|
||||
U128 VectorLowerPairedAdd8(const U128& a, const U128& b);
|
||||
U128 VectorLowerPairedAdd16(const U128& a, const U128& b);
|
||||
U128 VectorLowerPairedAdd32(const U128& a, const U128& b);
|
||||
|
|
|
@ -166,6 +166,13 @@ OPCODE(VectorAdd16, T::U128, T::U128, T::U128
|
|||
OPCODE(VectorAdd32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorAdd64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorAnd, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorLowerBroadcast8, T::U128, T::U8 )
|
||||
OPCODE(VectorLowerBroadcast16, T::U128, T::U16 )
|
||||
OPCODE(VectorLowerBroadcast32, T::U128, T::U32 )
|
||||
OPCODE(VectorBroadcast8, T::U128, T::U8 )
|
||||
OPCODE(VectorBroadcast16, T::U128, T::U16 )
|
||||
OPCODE(VectorBroadcast32, T::U128, T::U32 )
|
||||
OPCODE(VectorBroadcast64, T::U128, T::U64 )
|
||||
OPCODE(VectorLowerPairedAdd8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorLowerPairedAdd16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorLowerPairedAdd32, T::U128, T::U128, T::U128 )
|
||||
|
|
Loading…
Reference in a new issue