emit_x64_vector: AVX512 Implementation of EmitVectorNarrow{32,64}
Includes a new test case with the XTN instruction to verify the implementation
This commit is contained in:
parent
1643e8f3c6
commit
2c0be5e18c
2 changed files with 44 additions and 4 deletions
|
@ -2084,11 +2084,20 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
void EmitX64::EmitVectorNarrow32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorNarrow32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
code.vpmovdw(result, a);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
// TODO: AVX512F implementation
|
|
||||||
|
|
||||||
code.pxor(zeros, zeros);
|
code.pxor(zeros, zeros);
|
||||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||||
code.pblendw(a, zeros, 0b10101010);
|
code.pblendw(a, zeros, 0b10101010);
|
||||||
|
@ -2104,11 +2113,20 @@ void EmitX64::EmitVectorNarrow32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
void EmitX64::EmitVectorNarrow64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorNarrow64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
code.vpmovqd(result, a);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
// TODO: AVX512F implementation
|
|
||||||
|
|
||||||
code.pxor(zeros, zeros);
|
code.pxor(zeros, zeros);
|
||||||
code.shufps(a, zeros, 0b00001000);
|
code.shufps(a, zeros, 0b00001000);
|
||||||
|
|
||||||
|
|
|
@ -89,6 +89,28 @@ TEST_CASE("A64: REV16", "[a64]") {
|
||||||
REQUIRE(jit.GetPC() == 8);
|
REQUIRE(jit.GetPC() == 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE("A64: XTN", "[a64]") {
|
||||||
|
A64TestEnv env;
|
||||||
|
A64::Jit jit{A64::UserConfig{&env}};
|
||||||
|
|
||||||
|
env.code_mem.emplace_back(0x0e212803); // XTN v3.8b, v0.8h
|
||||||
|
env.code_mem.emplace_back(0x0e612824); // XTN v4.4h, v1.4s
|
||||||
|
env.code_mem.emplace_back(0x0ea12845); // XTN v5.2s, v2.2d
|
||||||
|
env.code_mem.emplace_back(0x14000000); // B .
|
||||||
|
|
||||||
|
jit.SetPC(0);
|
||||||
|
jit.SetVector(0, {0x3333222211110000, 0x7777666655554444});
|
||||||
|
jit.SetVector(1, {0x1111111100000000, 0x3333333322222222});
|
||||||
|
jit.SetVector(2, {0x0000000000000000, 0x1111111111111111});
|
||||||
|
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
|
||||||
|
REQUIRE(jit.GetVector(3) == Vector{0x7766554433221100, 0x0000000000000000});
|
||||||
|
REQUIRE(jit.GetVector(4) == Vector{0x3333222211110000, 0x0000000000000000});
|
||||||
|
REQUIRE(jit.GetVector(5) == Vector{0x1111111100000000, 0x0000000000000000});
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE("A64: AND", "[a64]") {
|
TEST_CASE("A64: AND", "[a64]") {
|
||||||
A64TestEnv env;
|
A64TestEnv env;
|
||||||
A64::Jit jit{A64::UserConfig{&env}};
|
A64::Jit jit{A64::UserConfig{&env}};
|
||||||
|
|
Loading…
Reference in a new issue