backend_x64: Use a reference to BlockOfCode instead of a pointer

This commit is contained in:
MerryMage 2018-02-03 14:28:57 +00:00
parent 8931ee346b
commit 68f46c8334
27 changed files with 1331 additions and 1331 deletions

File diff suppressed because it is too large Load diff

View file

@ -30,7 +30,7 @@ struct A32EmitContext final : public EmitContext {
class A32EmitX64 final : public EmitX64 {
public:
A32EmitX64(BlockOfCode* code, A32::UserConfig config, A32::Jit* jit_interface);
A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface);
~A32EmitX64() override;
/**

View file

@ -46,7 +46,7 @@ static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*Lo
struct Jit::Impl {
Impl(Jit* jit, A32::UserConfig config)
: block_of_code(GenRunCodeCallbacks(config.callbacks, &GetCurrentBlock, this), JitStateInfo{jit_state})
, emitter(&block_of_code, config, jit)
, emitter(block_of_code, config, jit)
, config(config)
, jit_interface(jit)
{}

View file

@ -52,17 +52,17 @@ bool A64EmitContext::FPSCR_DN() const {
return Location().FPCR().DN();
}
A64EmitX64::A64EmitX64(BlockOfCode* code, A64::UserConfig conf)
A64EmitX64::A64EmitX64(BlockOfCode& code, A64::UserConfig conf)
: EmitX64(code), conf(conf)
{
code->PreludeComplete();
code.PreludeComplete();
}
A64EmitX64::~A64EmitX64() = default;
A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
code->align();
const u8* const entrypoint = code->getCurr();
code.align();
const u8* const entrypoint = code.getCurr();
// Start emitting.
EmitCondPrelude(block);
@ -102,12 +102,12 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
EmitAddCycles(block.CycleCount());
EmitX64::EmitTerminal(block.GetTerminal(), block.Location());
code->int3();
code.int3();
const A64::LocationDescriptor descriptor{block.Location()};
Patch(descriptor, entrypoint);
const size_t size = static_cast<size_t>(code->getCurr() - entrypoint);
const size_t size = static_cast<size_t>(code.getCurr() - entrypoint);
const A64::LocationDescriptor end_location{block.EndLocation()};
const auto range = boost::icl::discrete_interval<u64>::closed(descriptor.PC(), end_location.PC() - 1);
A64EmitX64::BlockDescriptor block_desc{entrypoint, size};
@ -129,32 +129,32 @@ void A64EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u64>& rang
void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
code->mov(code->byte[r15 + offsetof(A64JitState, check_bit)], to_store);
code.mov(code.byte[r15 + offsetof(A64JitState, check_bit)], to_store);
}
void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
code->mov(result, dword[r15 + offsetof(A64JitState, CPSR_nzcv)]);
code->shr(result, 29);
code->and_(result, 1);
code.mov(result, dword[r15 + offsetof(A64JitState, CPSR_nzcv)]);
code.shr(result, 29);
code.and_(result, 1);
ctx.reg_alloc.DefineValue(inst, result);
}
void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
code->and_(to_store, 0b11000001'00000001);
code->imul(to_store, to_store, 0b00010000'00100001);
code->shl(to_store, 16);
code->and_(to_store, 0xF0000000);
code->mov(dword[r15 + offsetof(A64JitState, CPSR_nzcv)], to_store);
code.and_(to_store, 0b11000001'00000001);
code.imul(to_store, to_store, 0b00010000'00100001);
code.shl(to_store, 16);
code.and_(to_store, 0xF0000000);
code.mov(dword[r15 + offsetof(A64JitState, CPSR_nzcv)], to_store);
}
void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) {
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
code->mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
code.mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -162,7 +162,7 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) {
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
code->mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
code.mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -171,7 +171,7 @@ void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code->movd(result, addr);
code.movd(result, addr);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -180,7 +180,7 @@ void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code->movq(result, addr);
code.movq(result, addr);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -189,13 +189,13 @@ void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code->movaps(result, addr);
code.movaps(result, addr);
ctx.reg_alloc.DefineValue(inst, result);
}
void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) {
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
code->mov(result, qword[r15 + offsetof(A64JitState, sp)]);
code.mov(result, qword[r15 + offsetof(A64JitState, sp)]);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -204,12 +204,12 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) {
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
if (args[1].FitsInImmediateS32()) {
code->mov(addr, args[1].GetImmediateS32());
code.mov(addr, args[1].GetImmediateS32());
} else {
// TODO: zext tracking, xmm variant
Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(args[1]);
code->mov(to_store.cvt32(), to_store.cvt32());
code->mov(addr, to_store);
code.mov(to_store.cvt32(), to_store.cvt32());
code.mov(addr, to_store);
}
}
@ -218,13 +218,13 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
if (args[1].FitsInImmediateS32()) {
code->mov(addr, args[1].GetImmediateS32());
code.mov(addr, args[1].GetImmediateS32());
} else if (args[1].IsInXmm()) {
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
code->movq(addr, to_store);
code.movq(addr, to_store);
} else {
Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]);
code->mov(addr, to_store);
code.mov(addr, to_store);
}
}
@ -236,9 +236,9 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
// TODO: Optimize
code->pxor(tmp, tmp);
code->movss(tmp, to_store);
code->movaps(addr, tmp);
code.pxor(tmp, tmp);
code.movss(tmp, to_store);
code.movaps(addr, tmp);
}
void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
@ -247,8 +247,8 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
code->movq(to_store, to_store); // TODO: Remove when able
code->movaps(addr, to_store);
code.movq(to_store, to_store); // TODO: Remove when able
code.movaps(addr, to_store);
}
void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
@ -257,20 +257,20 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
code->movaps(addr, to_store);
code.movaps(addr, to_store);
}
void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto addr = qword[r15 + offsetof(A64JitState, sp)];
if (args[0].FitsInImmediateS32()) {
code->mov(addr, args[0].GetImmediateS32());
code.mov(addr, args[0].GetImmediateS32());
} else if (args[0].IsInXmm()) {
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
code->movq(addr, to_store);
code.movq(addr, to_store);
} else {
Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
code->mov(addr, to_store);
code.mov(addr, to_store);
}
}
@ -278,13 +278,13 @@ void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto addr = qword[r15 + offsetof(A64JitState, pc)];
if (args[0].FitsInImmediateS32()) {
code->mov(addr, args[0].GetImmediateS32());
code.mov(addr, args[0].GetImmediateS32());
} else if (args[0].IsInXmm()) {
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
code->movq(addr, to_store);
code.movq(addr, to_store);
} else {
Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
code->mov(addr, to_store);
code.mov(addr, to_store);
}
}
@ -294,7 +294,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
ASSERT(args[0].IsImmediate());
u32 imm = args[0].GetImmediateU32();
DEVIRT(conf.callbacks, &A64::UserCallbacks::CallSVC).EmitCall(code, [&](Xbyak::Reg64 param1) {
code->mov(param1.cvt32(), imm);
code.mov(param1.cvt32(), imm);
});
}
@ -305,14 +305,14 @@ void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
u64 pc = args[0].GetImmediateU64();
u64 exception = args[1].GetImmediateU64();
DEVIRT(conf.callbacks, &A64::UserCallbacks::ExceptionRaised).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) {
code->mov(param1, pc);
code->mov(param2, exception);
code.mov(param1, pc);
code.mov(param2, exception);
});
}
void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead8).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
ASSERT(vaddr == code->ABI_PARAM2);
ASSERT(vaddr == code.ABI_PARAM2);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
});
@ -320,7 +320,7 @@ void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) {
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead16).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
ASSERT(vaddr == code->ABI_PARAM2);
ASSERT(vaddr == code.ABI_PARAM2);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
});
@ -328,7 +328,7 @@ void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) {
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead32).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
ASSERT(vaddr == code->ABI_PARAM2);
ASSERT(vaddr == code.ABI_PARAM2);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
});
@ -336,7 +336,7 @@ void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) {
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead64).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
ASSERT(vaddr == code->ABI_PARAM2);
ASSERT(vaddr == code.ABI_PARAM2);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
});
@ -348,33 +348,33 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
static_assert(ABI_SHADOW_SPACE >= 16);
ctx.reg_alloc.HostCall(nullptr, {}, {}, args[0]);
code->lea(code->ABI_PARAM2, ptr[rsp]);
code->sub(rsp, ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM2, ptr[rsp]);
code.sub(rsp, ABI_SHADOW_SPACE);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code, [&](Xbyak::Reg64 return_value, Xbyak::Reg64 vaddr) {
ASSERT(return_value == code->ABI_PARAM2 && vaddr == code->ABI_PARAM3);
ASSERT(return_value == code.ABI_PARAM2 && vaddr == code.ABI_PARAM3);
});
Xbyak::Xmm result = xmm0;
code->movups(result, xword[code->ABI_RETURN]);
code->add(rsp, ABI_SHADOW_SPACE);
code.movups(result, xword[code.ABI_RETURN]);
code.add(rsp, ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
#else
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
ASSERT(vaddr == code->ABI_PARAM2);
ASSERT(vaddr == code.ABI_PARAM2);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
});
Xbyak::Xmm result = xmm0;
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
code->movq(result, code->ABI_RETURN);
code->pinsrq(result, code->ABI_RETURN2, 1);
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
code.movq(result, code.ABI_RETURN);
code.pinsrq(result, code.ABI_RETURN2, 1);
} else {
Xbyak::Xmm tmp = xmm1;
code->movq(result, code->ABI_RETURN);
code->movq(tmp, code->ABI_RETURN2);
code->punpcklqdq(result, tmp);
code.movq(result, code.ABI_RETURN);
code.movq(tmp, code.ABI_RETURN2);
code.punpcklqdq(result, tmp);
}
ctx.reg_alloc.DefineValue(inst, result);
#endif
@ -382,7 +382,7 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3);
ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
});
@ -390,7 +390,7 @@ void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) {
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite16).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3);
ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
});
@ -398,7 +398,7 @@ void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) {
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite32).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3);
ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
});
@ -406,7 +406,7 @@ void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) {
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite64).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3);
ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
});
@ -421,31 +421,31 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
code->lea(code->ABI_PARAM3, ptr[rsp]);
code->sub(rsp, ABI_SHADOW_SPACE);
code->movaps(xword[code->ABI_PARAM3], xmm_value);
code.lea(code.ABI_PARAM3, ptr[rsp]);
code.sub(rsp, ABI_SHADOW_SPACE);
code.movaps(xword[code.ABI_PARAM3], xmm_value);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value_ptr) {
ASSERT(vaddr == code->ABI_PARAM2 && value_ptr == code->ABI_PARAM3);
ASSERT(vaddr == code.ABI_PARAM2 && value_ptr == code.ABI_PARAM3);
});
code->add(rsp, ABI_SHADOW_SPACE);
code.add(rsp, ABI_SHADOW_SPACE);
#else
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value0, Xbyak::Reg64 value1) {
ASSERT(vaddr == code->ABI_PARAM2 && value0 == code->ABI_PARAM3 && value1 == code->ABI_PARAM4);
ASSERT(vaddr == code.ABI_PARAM2 && value0 == code.ABI_PARAM3 && value1 == code.ABI_PARAM4);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
ctx.reg_alloc.ScratchGpr({ABI_PARAM3});
ctx.reg_alloc.ScratchGpr({ABI_PARAM4});
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]);
code->movq(code->ABI_PARAM3, xmm_value);
code->pextrq(code->ABI_PARAM4, xmm_value, 1);
code.movq(code.ABI_PARAM3, xmm_value);
code.pextrq(code.ABI_PARAM4, xmm_value, 1);
} else {
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseScratchXmm(args[1]);
code->movq(code->ABI_PARAM3, xmm_value);
code->punpckhqdq(xmm_value, xmm_value);
code->movq(code->ABI_PARAM4, xmm_value);
code.movq(code.ABI_PARAM3, xmm_value);
code.punpckhqdq(xmm_value, xmm_value);
code.movq(code.ABI_PARAM4, xmm_value);
}
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
@ -454,35 +454,35 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
}
void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) {
code->SwitchMxcsrOnExit();
code.SwitchMxcsrOnExit();
DEVIRT(conf.callbacks, &A64::UserCallbacks::InterpreterFallback).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) {
code->mov(param1, A64::LocationDescriptor{terminal.next}.PC());
code->mov(qword[r15 + offsetof(A64JitState, pc)], param1);
code->mov(param2.cvt32(), terminal.num_instructions);
code.mov(param1, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], param1);
code.mov(param2.cvt32(), terminal.num_instructions);
});
code->ReturnFromRunCode(true); // TODO: Check cycles
code.ReturnFromRunCode(true); // TODO: Check cycles
}
void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor) {
code->ReturnFromRunCode();
code.ReturnFromRunCode();
}
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor) {
code->cmp(qword[r15 + offsetof(A64JitState, cycles_remaining)], 0);
code.cmp(qword[r15 + offsetof(A64JitState, cycles_remaining)], 0);
patch_information[terminal.next].jg.emplace_back(code->getCurr());
patch_information[terminal.next].jg.emplace_back(code.getCurr());
if (auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJg(terminal.next);
}
code->mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code->mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code->ForceReturnFromRunCode();
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode();
}
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor) {
patch_information[terminal.next].jmp.emplace_back(code->getCurr());
patch_information[terminal.next].jmp.emplace_back(code.getCurr());
if (auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJmp(terminal.next, next_bb->entrypoint);
} else {
@ -493,20 +493,20 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::Location
void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor) {
// This calculation has to match up with A64::LocationDescriptor::UniqueHash
// TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et.
code->mov(rcx, qword[r15 + offsetof(A64JitState, pc)]);
code->mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]);
code->and_(ebx, A64::LocationDescriptor::FPCR_MASK);
code->shl(ebx, 37);
code->or_(rbx, rcx);
code.mov(rcx, qword[r15 + offsetof(A64JitState, pc)]);
code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]);
code.and_(ebx, A64::LocationDescriptor::FPCR_MASK);
code.shl(ebx, 37);
code.or_(rbx, rcx);
code->mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]);
code->sub(eax, 1);
code->and_(eax, u32(A64JitState::RSBPtrMask));
code->mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax);
code->cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
code->jne(code->GetReturnFromRunCodeAddress());
code->mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]);
code->jmp(rax);
code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]);
code.sub(eax, 1);
code.and_(eax, u32(A64JitState::RSBPtrMask));
code.mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax);
code.cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
code.jne(code.GetReturnFromRunCodeAddress());
code.mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]);
code.jmp(rax);
}
void A64EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) {
@ -518,7 +518,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor
default:
Xbyak::Label pass = EmitCond(terminal.if_);
EmitTerminal(terminal.else_, initial_location);
code->L(pass);
code.L(pass);
EmitTerminal(terminal.then_, initial_location);
break;
}
@ -526,50 +526,50 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) {
Xbyak::Label fail;
code->cmp(code->byte[r15 + offsetof(A64JitState, check_bit)], u8(0));
code->jz(fail);
code.cmp(code.byte[r15 + offsetof(A64JitState, check_bit)], u8(0));
code.jz(fail);
EmitTerminal(terminal.then_, initial_location);
code->L(fail);
code.L(fail);
EmitTerminal(terminal.else_, initial_location);
}
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) {
code->cmp(code->byte[r15 + offsetof(A64JitState, halt_requested)], u8(0));
code->jne(code->GetForceReturnFromRunCodeAddress());
code.cmp(code.byte[r15 + offsetof(A64JitState, halt_requested)], u8(0));
code.jne(code.GetForceReturnFromRunCodeAddress());
EmitTerminal(terminal.else_, initial_location);
}
void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code->getCurr();
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {
code->jg(target_code_ptr);
code.jg(target_code_ptr);
} else {
code->mov(rax, A64::LocationDescriptor{target_desc}.PC());
code->mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code->jg(code->GetReturnFromRunCodeAddress());
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.jg(code.GetReturnFromRunCodeAddress());
}
code->EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size
code.EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size
}
void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code->getCurr();
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {
code->jmp(target_code_ptr);
code.jmp(target_code_ptr);
} else {
code->mov(rax, A64::LocationDescriptor{target_desc}.PC());
code->mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code->jmp(code->GetReturnFromRunCodeAddress());
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.jmp(code.GetReturnFromRunCodeAddress());
}
code->EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size
code.EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size
}
void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
if (!target_code_ptr) {
target_code_ptr = code->GetReturnFromRunCodeAddress();
target_code_ptr = code.GetReturnFromRunCodeAddress();
}
const CodePtr patch_location = code->getCurr();
code->mov(code->rcx, reinterpret_cast<u64>(target_code_ptr));
code->EnsurePatchLocationSize(patch_location, 10);
const CodePtr patch_location = code.getCurr();
code.mov(code.rcx, reinterpret_cast<u64>(target_code_ptr));
code.EnsurePatchLocationSize(patch_location, 10);
}
} // namespace Dynarmic::BackendX64

View file

@ -27,7 +27,7 @@ struct A64EmitContext final : public EmitContext {
class A64EmitX64 final : public EmitX64 {
public:
A64EmitX64(BlockOfCode* code, A64::UserConfig conf);
A64EmitX64(BlockOfCode& code, A64::UserConfig conf);
~A64EmitX64() override;
/**

View file

@ -39,7 +39,7 @@ public:
explicit Impl(UserConfig conf)
: conf(conf)
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state})
, emitter(&block_of_code, conf)
, emitter(block_of_code, conf)
{}
~Impl() = default;

View file

@ -55,7 +55,7 @@ static FrameInfo CalculateFrameInfo(size_t num_gprs, size_t num_xmms, size_t fra
}
template<typename RegisterArrayT>
void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size, const RegisterArrayT& regs) {
void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) {
using namespace Xbyak::util;
const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
@ -65,25 +65,25 @@ void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_si
for (HostLoc gpr : regs) {
if (HostLocIsGPR(gpr)) {
code->push(HostLocToReg64(gpr));
code.push(HostLocToReg64(gpr));
}
}
if (frame_info.stack_subtraction != 0) {
code->sub(rsp, u32(frame_info.stack_subtraction));
code.sub(rsp, u32(frame_info.stack_subtraction));
}
size_t xmm_offset = frame_info.xmm_offset;
for (HostLoc xmm : regs) {
if (HostLocIsXMM(xmm)) {
code->movaps(code->xword[rsp + xmm_offset], HostLocToXmm(xmm));
code.movaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm));
xmm_offset += XMM_SIZE;
}
}
}
template<typename RegisterArrayT>
void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size, const RegisterArrayT& regs) {
void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) {
using namespace Xbyak::util;
const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
@ -94,35 +94,35 @@ void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_siz
size_t xmm_offset = frame_info.xmm_offset;
for (HostLoc xmm : regs) {
if (HostLocIsXMM(xmm)) {
code->movaps(HostLocToXmm(xmm), code->xword[rsp + xmm_offset]);
code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
xmm_offset += XMM_SIZE;
}
}
if (frame_info.stack_subtraction != 0) {
code->add(rsp, u32(frame_info.stack_subtraction));
code.add(rsp, u32(frame_info.stack_subtraction));
}
for (HostLoc gpr : Common::Reverse(regs)) {
if (HostLocIsGPR(gpr)) {
code->pop(HostLocToReg64(gpr));
code.pop(HostLocToReg64(gpr));
}
}
}
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
}
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
}
void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
}
void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
}

View file

@ -109,9 +109,9 @@ constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 30, "Invalid total number of registers");
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
} // namespace Dynarmic::BackendX64

View file

@ -38,10 +38,10 @@ BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
: Xbyak::CodeGenerator(TOTAL_CODE_SIZE)
, cb(std::move(cb))
, jsi(jsi)
, constant_pool(this, 256)
, constant_pool(*this, 256)
{
GenRunCode();
exception_handler.Register(this);
exception_handler.Register(*this);
}
void BlockOfCode::PreludeComplete() {
@ -107,12 +107,12 @@ void BlockOfCode::GenRunCode() {
align();
run_code_from = getCurr<RunCodeFromFuncType>();
ABI_PushCalleeSaveRegistersAndAdjustStack(this);
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
mov(r15, ABI_PARAM1);
mov(r14, ABI_PARAM2); // save temporarily in non-volatile register
cb.GetTicksRemaining->EmitCall(this);
cb.GetTicksRemaining->EmitCall(*this);
mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN);
mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
@ -126,18 +126,18 @@ void BlockOfCode::GenRunCode() {
// 1. It saves all the registers we as a callee need to save.
// 2. It aligns the stack so that the code the JIT emits can assume
// that the stack is appropriately aligned for CALLs.
ABI_PushCalleeSaveRegistersAndAdjustStack(this);
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
mov(r15, ABI_PARAM1);
cb.GetTicksRemaining->EmitCall(this);
cb.GetTicksRemaining->EmitCall(*this);
mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN);
mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
L(enter_mxcsr_then_loop);
SwitchMxcsrOnEntry();
L(loop);
cb.LookupBlock->EmitCall(this);
cb.LookupBlock->EmitCall(*this);
jmp(ABI_RETURN);
@ -152,12 +152,12 @@ void BlockOfCode::GenRunCode() {
SwitchMxcsrOnExit();
}
cb.AddTicks->EmitCall(this, [this](Xbyak::Reg64 param1) {
cb.AddTicks->EmitCall(*this, [this](Xbyak::Reg64 param1) {
mov(param1, qword[r15 + jsi.offsetof_cycles_to_run]);
sub(param1, qword[r15 + jsi.offsetof_cycles_remaining]);
});
ABI_PopCalleeSaveRegistersAndAdjustStack(this);
ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
ret();
};

View file

@ -138,7 +138,7 @@ private:
ExceptionHandler();
~ExceptionHandler();
void Register(BlockOfCode* code);
void Register(BlockOfCode& code);
private:
struct Impl;
std::unique_ptr<Impl> impl;

View file

@ -9,48 +9,48 @@
namespace Dynarmic::BackendX64 {
void SimpleCallback::EmitCall(BlockOfCode* code, std::function<void()> l) {
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void()> l) {
l();
code->CallFunction(fn);
code.CallFunction(fn);
}
void SimpleCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64)> l) {
l(code->ABI_PARAM1);
code->CallFunction(fn);
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64)> l) {
l(code.ABI_PARAM1);
code.CallFunction(fn);
}
void SimpleCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) {
l(code->ABI_PARAM1, code->ABI_PARAM2);
code->CallFunction(fn);
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) {
l(code.ABI_PARAM1, code.ABI_PARAM2);
code.CallFunction(fn);
}
void SimpleCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) {
l(code->ABI_PARAM1, code->ABI_PARAM2, code->ABI_PARAM3);
code->CallFunction(fn);
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) {
l(code.ABI_PARAM1, code.ABI_PARAM2, code.ABI_PARAM3);
code.CallFunction(fn);
}
void ArgCallback::EmitCall(BlockOfCode* code, std::function<void()> l) {
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void()> l) {
l();
code->mov(code->ABI_PARAM1, arg);
code->CallFunction(fn);
code.mov(code.ABI_PARAM1, arg);
code.CallFunction(fn);
}
void ArgCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64)> l) {
l(code->ABI_PARAM2);
code->mov(code->ABI_PARAM1, arg);
code->CallFunction(fn);
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64)> l) {
l(code.ABI_PARAM2);
code.mov(code.ABI_PARAM1, arg);
code.CallFunction(fn);
}
void ArgCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) {
l(code->ABI_PARAM2, code->ABI_PARAM3);
code->mov(code->ABI_PARAM1, arg);
code->CallFunction(fn);
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) {
l(code.ABI_PARAM2, code.ABI_PARAM3);
code.mov(code.ABI_PARAM1, arg);
code.CallFunction(fn);
}
void ArgCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) {
l(code->ABI_PARAM2, code->ABI_PARAM3, code->ABI_PARAM4);
code->mov(code->ABI_PARAM1, arg);
code->CallFunction(fn);
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) {
l(code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4);
code.mov(code.ABI_PARAM1, arg);
code.CallFunction(fn);
}
} // namespace Dynarmic::BackendX64

View file

@ -20,10 +20,10 @@ class Callback {
public:
virtual ~Callback() = default;
virtual void EmitCall(BlockOfCode* code, std::function<void()> fn = []{}) = 0;
virtual void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64)> fn) = 0;
virtual void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> fn) = 0;
virtual void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> fn) = 0;
virtual void EmitCall(BlockOfCode& code, std::function<void()> fn = []{}) = 0;
virtual void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64)> fn) = 0;
virtual void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> fn) = 0;
virtual void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> fn) = 0;
};
class SimpleCallback final : public Callback {
@ -33,10 +33,10 @@ public:
~SimpleCallback() override = default;
void EmitCall(BlockOfCode* code, std::function<void()> l = []{}) override;
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64)> l) override;
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) override;
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) override;
void EmitCall(BlockOfCode& code, std::function<void()> l = []{}) override;
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64)> l) override;
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) override;
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) override;
private:
void (*fn)();
@ -49,10 +49,10 @@ public:
~ArgCallback() override = default;
void EmitCall(BlockOfCode* code, std::function<void()> l = []{}) override;
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64)> l) override;
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) override;
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) override;
void EmitCall(BlockOfCode& code, std::function<void()> l = []{}) override;
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64)> l) override;
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) override;
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) override;
private:
void (*fn)();

View file

@ -12,10 +12,10 @@
namespace Dynarmic::BackendX64 {
ConstantPool::ConstantPool(BlockOfCode* code, size_t size) : code(code), pool_size(size) {
code->int3();
code->align(align_size);
pool_begin = reinterpret_cast<u8*>(code->AllocateFromCodeSpace(size));
ConstantPool::ConstantPool(BlockOfCode& code, size_t size) : code(code), pool_size(size) {
code.int3();
code.align(align_size);
pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size));
std::memset(pool_begin, 0, size);
current_pool_ptr = pool_begin;
}
@ -28,7 +28,7 @@ Xbyak::Address ConstantPool::GetConstant(u64 constant) {
iter = constant_info.emplace(constant, current_pool_ptr).first;
current_pool_ptr += align_size;
}
return code->xword[code->rip + iter->second];
return code.xword[code.rip + iter->second];
}
} // namespace Dynarmic::BackendX64

View file

@ -22,7 +22,7 @@ class BlockOfCode;
/// already exists, its memory location is reused.
class ConstantPool final {
public:
ConstantPool(BlockOfCode* code, size_t size);
ConstantPool(BlockOfCode& code, size_t size);
Xbyak::Address GetConstant(u64 constant);
@ -31,7 +31,7 @@ private:
std::map<u64, void*> constant_info;
BlockOfCode* code;
BlockOfCode& code;
size_t pool_size;
u8* pool_begin;
u8* current_pool_ptr;

View file

@ -31,7 +31,7 @@ void EmitContext::EraseInstruction(IR::Inst* inst) {
inst->ClearArgs();
}
EmitX64::EmitX64(BlockOfCode* code)
EmitX64::EmitX64(BlockOfCode& code)
: code(code) {}
EmitX64::~EmitX64() = default;
@ -47,7 +47,7 @@ void EmitX64::EmitVoid(EmitContext&, IR::Inst*) {
}
void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) {
code->int3();
code.int3();
}
void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
@ -63,21 +63,21 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I
auto iter = block_descriptors.find(target);
CodePtr target_code_ptr = iter != block_descriptors.end()
? iter->second.entrypoint
: code->GetReturnFromRunCodeAddress();
: code.GetReturnFromRunCodeAddress();
code->mov(index_reg.cvt32(), dword[r15 + code->GetJitStateInfo().offsetof_rsb_ptr]);
code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]);
code->mov(loc_desc_reg, target.Value());
code.mov(loc_desc_reg, target.Value());
patch_information[target].mov_rcx.emplace_back(code->getCurr());
patch_information[target].mov_rcx.emplace_back(code.getCurr());
EmitPatchMovRcx(target_code_ptr);
code->mov(qword[r15 + index_reg * 8 + code->GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
code->mov(qword[r15 + index_reg * 8 + code->GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
code->add(index_reg.cvt32(), 1);
code->and_(index_reg.cvt32(), u32(code->GetJitStateInfo().rsb_ptr_mask));
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
code.add(index_reg.cvt32(), 1);
code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask));
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
}
void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
@ -125,9 +125,9 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr({HostLoc::RAX});
Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
code->cmp(value, 0);
code->lahf();
code->seto(code->al);
code.cmp(value, 0);
code.lahf();
code.seto(code.al);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
@ -141,28 +141,28 @@ void EmitX64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
code->mov(nzcv, value);
code.mov(nzcv, value);
ctx.reg_alloc.DefineValue(inst, nzcv);
} else {
Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
// TODO: Optimize
code->shr(nzcv, 28);
code->imul(nzcv, nzcv, 0b00010000'10000001);
code->and_(nzcv.cvt8(), 1);
code.shr(nzcv, 28);
code.imul(nzcv, nzcv, 0b00010000'10000001);
code.and_(nzcv.cvt8(), 1);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
}
void EmitX64::EmitAddCycles(size_t cycles) {
ASSERT(cycles < std::numeric_limits<u32>::max());
code->sub(qword[r15 + code->GetJitStateInfo().offsetof_cycles_remaining], static_cast<u32>(cycles));
code.sub(qword[r15 + code.GetJitStateInfo().offsetof_cycles_remaining], static_cast<u32>(cycles));
}
Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
Xbyak::Label label;
const Xbyak::Reg32 cpsr = eax;
code->mov(cpsr, dword[r15 + code->GetJitStateInfo().offsetof_CPSR_nzcv]);
code.mov(cpsr, dword[r15 + code.GetJitStateInfo().offsetof_CPSR_nzcv]);
constexpr size_t n_shift = 31;
constexpr size_t z_shift = 30;
@ -175,91 +175,91 @@ Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
switch (cond) {
case IR::Cond::EQ: //z
code->test(cpsr, z_mask);
code->jnz(label);
code.test(cpsr, z_mask);
code.jnz(label);
break;
case IR::Cond::NE: //!z
code->test(cpsr, z_mask);
code->jz(label);
code.test(cpsr, z_mask);
code.jz(label);
break;
case IR::Cond::CS: //c
code->test(cpsr, c_mask);
code->jnz(label);
code.test(cpsr, c_mask);
code.jnz(label);
break;
case IR::Cond::CC: //!c
code->test(cpsr, c_mask);
code->jz(label);
code.test(cpsr, c_mask);
code.jz(label);
break;
case IR::Cond::MI: //n
code->test(cpsr, n_mask);
code->jnz(label);
code.test(cpsr, n_mask);
code.jnz(label);
break;
case IR::Cond::PL: //!n
code->test(cpsr, n_mask);
code->jz(label);
code.test(cpsr, n_mask);
code.jz(label);
break;
case IR::Cond::VS: //v
code->test(cpsr, v_mask);
code->jnz(label);
code.test(cpsr, v_mask);
code.jnz(label);
break;
case IR::Cond::VC: //!v
code->test(cpsr, v_mask);
code->jz(label);
code.test(cpsr, v_mask);
code.jz(label);
break;
case IR::Cond::HI: { //c & !z
code->and_(cpsr, z_mask | c_mask);
code->cmp(cpsr, c_mask);
code->je(label);
code.and_(cpsr, z_mask | c_mask);
code.cmp(cpsr, c_mask);
code.je(label);
break;
}
case IR::Cond::LS: { //!c | z
code->and_(cpsr, z_mask | c_mask);
code->cmp(cpsr, c_mask);
code->jne(label);
code.and_(cpsr, z_mask | c_mask);
code.cmp(cpsr, c_mask);
code.jne(label);
break;
}
case IR::Cond::GE: { // n == v
code->and_(cpsr, n_mask | v_mask);
code->jz(label);
code->cmp(cpsr, n_mask | v_mask);
code->je(label);
code.and_(cpsr, n_mask | v_mask);
code.jz(label);
code.cmp(cpsr, n_mask | v_mask);
code.je(label);
break;
}
case IR::Cond::LT: { // n != v
Xbyak::Label fail;
code->and_(cpsr, n_mask | v_mask);
code->jz(fail);
code->cmp(cpsr, n_mask | v_mask);
code->jne(label);
code->L(fail);
code.and_(cpsr, n_mask | v_mask);
code.jz(fail);
code.cmp(cpsr, n_mask | v_mask);
code.jne(label);
code.L(fail);
break;
}
case IR::Cond::GT: { // !z & (n == v)
const Xbyak::Reg32 tmp1 = ebx;
const Xbyak::Reg32 tmp2 = esi;
code->mov(tmp1, cpsr);
code->mov(tmp2, cpsr);
code->shr(tmp1, n_shift);
code->shr(tmp2, v_shift);
code->shr(cpsr, z_shift);
code->xor_(tmp1, tmp2);
code->or_(tmp1, cpsr);
code->test(tmp1, 1);
code->jz(label);
code.mov(tmp1, cpsr);
code.mov(tmp2, cpsr);
code.shr(tmp1, n_shift);
code.shr(tmp2, v_shift);
code.shr(cpsr, z_shift);
code.xor_(tmp1, tmp2);
code.or_(tmp1, cpsr);
code.test(tmp1, 1);
code.jz(label);
break;
}
case IR::Cond::LE: { // z | (n != v)
const Xbyak::Reg32 tmp1 = ebx;
const Xbyak::Reg32 tmp2 = esi;
code->mov(tmp1, cpsr);
code->mov(tmp2, cpsr);
code->shr(tmp1, n_shift);
code->shr(tmp2, v_shift);
code->shr(cpsr, z_shift);
code->xor_(tmp1, tmp2);
code->or_(tmp1, cpsr);
code->test(tmp1, 1);
code->jnz(label);
code.mov(tmp1, cpsr);
code.mov(tmp2, cpsr);
code.shr(tmp1, n_shift);
code.shr(tmp2, v_shift);
code.shr(cpsr, z_shift);
code.xor_(tmp1, tmp2);
code.or_(tmp1, cpsr);
code.test(tmp1, 1);
code.jnz(label);
break;
}
default:
@ -281,7 +281,7 @@ void EmitX64::EmitCondPrelude(const IR::Block& block) {
Xbyak::Label pass = EmitCond(block.GetCondition());
EmitAddCycles(block.ConditionFailedCycleCount());
EmitTerminal(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location());
code->L(pass);
code.L(pass);
}
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
@ -296,25 +296,25 @@ void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial
}
void EmitX64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
const CodePtr save_code_ptr = code->getCurr();
const CodePtr save_code_ptr = code.getCurr();
const PatchInformation& patch_info = patch_information[desc];
for (CodePtr location : patch_info.jg) {
code->SetCodePtr(location);
code.SetCodePtr(location);
EmitPatchJg(desc, bb);
}
for (CodePtr location : patch_info.jmp) {
code->SetCodePtr(location);
code.SetCodePtr(location);
EmitPatchJmp(desc, bb);
}
for (CodePtr location : patch_info.mov_rcx) {
code->SetCodePtr(location);
code.SetCodePtr(location);
EmitPatchMovRcx(bb);
}
code->SetCodePtr(save_code_ptr);
code.SetCodePtr(save_code_ptr);
}
void EmitX64::Unpatch(const IR::LocationDescriptor& desc) {

View file

@ -48,7 +48,7 @@ public:
size_t size; // Length in bytes of emitted code
};
EmitX64(BlockOfCode* code);
EmitX64(BlockOfCode& code);
virtual ~EmitX64();
/// Looks up an emitted host block in the cache.
@ -100,7 +100,7 @@ protected:
virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0;
// State
BlockOfCode* code;
BlockOfCode& code;
std::unordered_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
};

View file

@ -44,21 +44,21 @@ static void EmitMixColumns(std::array<Argument, 3> args, EmitContext& ctx, Block
void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tAESNI)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAESNI)) {
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code->aesimc(result, operand);
code.aesimc(result, operand);
ctx.reg_alloc.DefineValue(inst, result);
} else {
EmitMixColumns(args, ctx, *code, inst, Common::InverseMixColumns);
EmitMixColumns(args, ctx, code, inst, Common::InverseMixColumns);
}
}
void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
EmitMixColumns(args, ctx, *code, inst, Common::MixColumns);
EmitMixColumns(args, ctx, code, inst, Common::MixColumns);
}
} // namespace Dynarmic::BackendX64

View file

@ -42,35 +42,35 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
}
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32Castagnoli(*code, ctx, inst, 8);
EmitCRC32Castagnoli(code, ctx, inst, 8);
}
void EmitX64::EmitCRC32Castagnoli16(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32Castagnoli(*code, ctx, inst, 16);
EmitCRC32Castagnoli(code, ctx, inst, 16);
}
void EmitX64::EmitCRC32Castagnoli32(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32Castagnoli(*code, ctx, inst, 32);
EmitCRC32Castagnoli(code, ctx, inst, 32);
}
void EmitX64::EmitCRC32Castagnoli64(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32Castagnoli(*code, ctx, inst, 64);
EmitCRC32Castagnoli(code, ctx, inst, 64);
}
void EmitX64::EmitCRC32ISO8(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32ISO(*code, ctx, inst, 8);
EmitCRC32ISO(code, ctx, inst, 8);
}
void EmitX64::EmitCRC32ISO16(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32ISO(*code, ctx, inst, 16);
EmitCRC32ISO(code, ctx, inst, 16);
}
void EmitX64::EmitCRC32ISO32(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32ISO(*code, ctx, inst, 32);
EmitCRC32ISO(code, ctx, inst, 32);
}
void EmitX64::EmitCRC32ISO64(EmitContext& ctx, IR::Inst* inst) {
EmitCRC32ISO(*code, ctx, inst, 64);
EmitCRC32ISO(code, ctx, inst, 64);
}
} // namespace Dynarmic::BackendX64

File diff suppressed because it is too large Load diff

View file

@ -29,96 +29,96 @@ constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
Xbyak::Label end;
// We need to report back whether we've found a denormal on input.
// SSE doesn't do this for us when SSE's DAZ is enabled.
code->movd(gpr_scratch, xmm_value);
code->and_(gpr_scratch, u32(0x7FFFFFFF));
code->sub(gpr_scratch, u32(1));
code->cmp(gpr_scratch, u32(0x007FFFFE));
code->ja(end);
code->pxor(xmm_value, xmm_value);
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7));
code->L(end);
code.movd(gpr_scratch, xmm_value);
code.and_(gpr_scratch, u32(0x7FFFFFFF));
code.sub(gpr_scratch, u32(1));
code.cmp(gpr_scratch, u32(0x007FFFFE));
code.ja(end);
code.pxor(xmm_value, xmm_value);
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7));
code.L(end);
}
static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
static void DenormalsAreZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
Xbyak::Label end;
auto mask = code->MConst(f64_non_sign_mask);
auto mask = code.MConst(f64_non_sign_mask);
mask.setBit(64);
auto penult_denormal = code->MConst(f64_penultimate_positive_denormal);
auto penult_denormal = code.MConst(f64_penultimate_positive_denormal);
penult_denormal.setBit(64);
code->movq(gpr_scratch, xmm_value);
code->and_(gpr_scratch, mask);
code->sub(gpr_scratch, u32(1));
code->cmp(gpr_scratch, penult_denormal);
code->ja(end);
code->pxor(xmm_value, xmm_value);
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7));
code->L(end);
code.movq(gpr_scratch, xmm_value);
code.and_(gpr_scratch, mask);
code.sub(gpr_scratch, u32(1));
code.cmp(gpr_scratch, penult_denormal);
code.ja(end);
code.pxor(xmm_value, xmm_value);
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7));
code.L(end);
}
static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
static void FlushToZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
Xbyak::Label end;
code->movd(gpr_scratch, xmm_value);
code->and_(gpr_scratch, u32(0x7FFFFFFF));
code->sub(gpr_scratch, u32(1));
code->cmp(gpr_scratch, u32(0x007FFFFE));
code->ja(end);
code->pxor(xmm_value, xmm_value);
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3));
code->L(end);
code.movd(gpr_scratch, xmm_value);
code.and_(gpr_scratch, u32(0x7FFFFFFF));
code.sub(gpr_scratch, u32(1));
code.cmp(gpr_scratch, u32(0x007FFFFE));
code.ja(end);
code.pxor(xmm_value, xmm_value);
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3));
code.L(end);
}
static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
Xbyak::Label end;
auto mask = code->MConst(f64_non_sign_mask);
auto mask = code.MConst(f64_non_sign_mask);
mask.setBit(64);
auto penult_denormal = code->MConst(f64_penultimate_positive_denormal);
auto penult_denormal = code.MConst(f64_penultimate_positive_denormal);
penult_denormal.setBit(64);
code->movq(gpr_scratch, xmm_value);
code->and_(gpr_scratch, mask);
code->sub(gpr_scratch, u32(1));
code->cmp(gpr_scratch, penult_denormal);
code->ja(end);
code->pxor(xmm_value, xmm_value);
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3));
code->L(end);
code.movq(gpr_scratch, xmm_value);
code.and_(gpr_scratch, mask);
code.sub(gpr_scratch, u32(1));
code.cmp(gpr_scratch, penult_denormal);
code.ja(end);
code.pxor(xmm_value, xmm_value);
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3));
code.L(end);
}
static void DefaultNaN32(BlockOfCode* code, Xbyak::Xmm xmm_value) {
static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) {
Xbyak::Label end;
code->ucomiss(xmm_value, xmm_value);
code->jnp(end);
code->movaps(xmm_value, code->MConst(f32_nan));
code->L(end);
code.ucomiss(xmm_value, xmm_value);
code.jnp(end);
code.movaps(xmm_value, code.MConst(f32_nan));
code.L(end);
}
static void DefaultNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value) {
static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) {
Xbyak::Label end;
code->ucomisd(xmm_value, xmm_value);
code->jnp(end);
code->movaps(xmm_value, code->MConst(f64_nan));
code->L(end);
code.ucomisd(xmm_value, xmm_value);
code.jnp(end);
code.movaps(xmm_value, code.MConst(f64_nan));
code.L(end);
}
static void ZeroIfNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
code->pxor(xmm_scratch, xmm_scratch);
code->cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
code->pand(xmm_value, xmm_scratch);
static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
code.pxor(xmm_scratch, xmm_scratch);
code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
code.pand(xmm_value, xmm_scratch);
}
static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
@ -129,7 +129,7 @@ static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi
DenormalsAreZero32(code, result, gpr_scratch);
DenormalsAreZero32(code, operand, gpr_scratch);
}
(code->*fn)(result, operand);
(code.*fn)(result, operand);
if (ctx.FPSCR_FTZ()) {
FlushToZero32(code, result, gpr_scratch);
}
@ -140,7 +140,7 @@ static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi
ctx.reg_alloc.DefineValue(inst, result);
}
static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
@ -151,7 +151,7 @@ static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi
DenormalsAreZero64(code, result, gpr_scratch);
DenormalsAreZero64(code, operand, gpr_scratch);
}
(code->*fn)(result, operand);
(code.*fn)(result, operand);
if (ctx.FPSCR_FTZ()) {
FlushToZero64(code, result, gpr_scratch);
}
@ -162,7 +162,7 @@ static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi
ctx.reg_alloc.DefineValue(inst, result);
}
static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
@ -172,7 +172,7 @@ static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void
DenormalsAreZero32(code, result, gpr_scratch);
}
(code->*fn)(result, result);
(code.*fn)(result, result);
if (ctx.FPSCR_FTZ()) {
FlushToZero32(code, result, gpr_scratch);
}
@ -183,7 +183,7 @@ static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void
ctx.reg_alloc.DefineValue(inst, result);
}
static void FPTwoOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
@ -193,7 +193,7 @@ static void FPTwoOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void
DenormalsAreZero64(code, result, gpr_scratch);
}
(code->*fn)(result, result);
(code.*fn)(result, result);
if (ctx.FPSCR_FTZ()) {
FlushToZero64(code, result, gpr_scratch);
}
@ -208,7 +208,7 @@ void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code->pand(result, code->MConst(f32_non_sign_mask));
code.pand(result, code.MConst(f32_non_sign_mask));
ctx.reg_alloc.DefineValue(inst, result);
}
@ -217,7 +217,7 @@ void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code->pand(result, code->MConst(f64_non_sign_mask));
code.pand(result, code.MConst(f64_non_sign_mask));
ctx.reg_alloc.DefineValue(inst, result);
}
@ -226,7 +226,7 @@ void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code->pxor(result, code->MConst(f32_negative_zero));
code.pxor(result, code.MConst(f32_negative_zero));
ctx.reg_alloc.DefineValue(inst, result);
}
@ -235,7 +235,7 @@ void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code->pxor(result, code->MConst(f64_negative_zero));
code.pxor(result, code.MConst(f64_negative_zero));
ctx.reg_alloc.DefineValue(inst, result);
}
@ -280,16 +280,16 @@ void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd);
}
static void SetFpscrNzcvFromFlags(BlockOfCode* code, EmitContext& ctx) {
static void SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl
Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32();
code->mov(nzcv, 0x28630000);
code->sete(cl);
code->rcl(cl, 3);
code->shl(nzcv, cl);
code->and_(nzcv, 0xF0000000);
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_nzcv], nzcv);
code.mov(nzcv, 0x28630000);
code.sete(cl);
code.rcl(cl, 3);
code.shl(nzcv, cl);
code.and_(nzcv, 0xF0000000);
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_nzcv], nzcv);
}
void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
@ -299,9 +299,9 @@ void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
bool exc_on_qnan = args[2].GetImmediateU1();
if (exc_on_qnan) {
code->comiss(reg_a, reg_b);
code.comiss(reg_a, reg_b);
} else {
code->ucomiss(reg_a, reg_b);
code.ucomiss(reg_a, reg_b);
}
SetFpscrNzcvFromFlags(code, ctx);
@ -314,9 +314,9 @@ void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
bool exc_on_qnan = args[2].GetImmediateU1();
if (exc_on_qnan) {
code->comisd(reg_a, reg_b);
code.comisd(reg_a, reg_b);
} else {
code->ucomisd(reg_a, reg_b);
code.ucomisd(reg_a, reg_b);
}
SetFpscrNzcvFromFlags(code, ctx);
@ -330,7 +330,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero32(code, result, gpr_scratch.cvt32());
}
code->cvtss2sd(result, result);
code.cvtss2sd(result, result);
if (ctx.FPSCR_FTZ()) {
FlushToZero64(code, result, gpr_scratch);
}
@ -349,7 +349,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero64(code, result, gpr_scratch);
}
code->cvtsd2ss(result, result);
code.cvtsd2ss(result, result);
if (ctx.FPSCR_FTZ()) {
FlushToZero32(code, result, gpr_scratch.cvt32());
}
@ -373,22 +373,22 @@ void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) {
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero32(code, from, to);
}
code->cvtss2sd(from, from);
code.cvtss2sd(from, from);
// First time is to set flags
if (round_towards_zero) {
code->cvttsd2si(to, from); // 32 bit gpr
code.cvttsd2si(to, from); // 32 bit gpr
} else {
code->cvtsd2si(to, from); // 32 bit gpr
code.cvtsd2si(to, from); // 32 bit gpr
}
// Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch);
code->minsd(from, code->MConst(f64_max_s32));
code->maxsd(from, code->MConst(f64_min_s32));
code.minsd(from, code.MConst(f64_max_s32));
code.maxsd(from, code.MConst(f64_min_s32));
// Second time is for real
if (round_towards_zero) {
code->cvttsd2si(to, from); // 32 bit gpr
code.cvttsd2si(to, from); // 32 bit gpr
} else {
code->cvtsd2si(to, from); // 32 bit gpr
code.cvtsd2si(to, from); // 32 bit gpr
}
ctx.reg_alloc.DefineValue(inst, to);
@ -412,19 +412,19 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) {
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero32(code, from, to);
}
code->cvtss2sd(from, from);
code.cvtss2sd(from, from);
ZeroIfNaN64(code, from, xmm_scratch);
// Bring into SSE range
code->addsd(from, code->MConst(f64_min_s32));
code.addsd(from, code.MConst(f64_min_s32));
// First time is to set flags
code->cvtsd2si(to, from); // 32 bit gpr
code.cvtsd2si(to, from); // 32 bit gpr
// Clamp to output range
code->minsd(from, code->MConst(f64_max_s32));
code->maxsd(from, code->MConst(f64_min_s32));
code.minsd(from, code.MConst(f64_max_s32));
code.maxsd(from, code.MConst(f64_min_s32));
// Actually convert
code->cvtsd2si(to, from); // 32 bit gpr
code.cvtsd2si(to, from); // 32 bit gpr
// Bring back into original range
code->add(to, u32(2147483648u));
code.add(to, u32(2147483648u));
} else {
Xbyak::Xmm xmm_mask = ctx.reg_alloc.ScratchXmm();
Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32();
@ -432,25 +432,25 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) {
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero32(code, from, to);
}
code->cvtss2sd(from, from);
code.cvtss2sd(from, from);
ZeroIfNaN64(code, from, xmm_scratch);
// Generate masks if out-of-signed-range
code->movaps(xmm_mask, code->MConst(f64_max_s32));
code->cmpltsd(xmm_mask, from);
code->movd(gpr_mask, xmm_mask);
code->pand(xmm_mask, code->MConst(f64_min_s32));
code->and_(gpr_mask, u32(2147483648u));
code.movaps(xmm_mask, code.MConst(f64_max_s32));
code.cmpltsd(xmm_mask, from);
code.movd(gpr_mask, xmm_mask);
code.pand(xmm_mask, code.MConst(f64_min_s32));
code.and_(gpr_mask, u32(2147483648u));
// Bring into range if necessary
code->addsd(from, xmm_mask);
code.addsd(from, xmm_mask);
// First time is to set flags
code->cvttsd2si(to, from); // 32 bit gpr
code.cvttsd2si(to, from); // 32 bit gpr
// Clamp to output range
code->minsd(from, code->MConst(f64_max_s32));
code->maxsd(from, code->MConst(f64_min_u32));
code.minsd(from, code.MConst(f64_max_s32));
code.maxsd(from, code.MConst(f64_min_u32));
// Actually convert
code->cvttsd2si(to, from); // 32 bit gpr
code.cvttsd2si(to, from); // 32 bit gpr
// Bring back into original range if necessary
code->add(to, gpr_mask);
code.add(to, gpr_mask);
}
ctx.reg_alloc.DefineValue(inst, to);
@ -471,19 +471,19 @@ void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) {
}
// First time is to set flags
if (round_towards_zero) {
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
code.cvttsd2si(gpr_scratch, from); // 32 bit gpr
} else {
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
code.cvtsd2si(gpr_scratch, from); // 32 bit gpr
}
// Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch);
code->minsd(from, code->MConst(f64_max_s32));
code->maxsd(from, code->MConst(f64_min_s32));
code.minsd(from, code.MConst(f64_max_s32));
code.maxsd(from, code.MConst(f64_min_s32));
// Second time is for real
if (round_towards_zero) {
code->cvttsd2si(to, from); // 32 bit gpr
code.cvttsd2si(to, from); // 32 bit gpr
} else {
code->cvtsd2si(to, from); // 32 bit gpr
code.cvtsd2si(to, from); // 32 bit gpr
}
ctx.reg_alloc.DefineValue(inst, to);
@ -507,16 +507,16 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) {
}
ZeroIfNaN64(code, from, xmm_scratch);
// Bring into SSE range
code->addsd(from, code->MConst(f64_min_s32));
code.addsd(from, code.MConst(f64_min_s32));
// First time is to set flags
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
code.cvtsd2si(gpr_scratch, from); // 32 bit gpr
// Clamp to output range
code->minsd(from, code->MConst(f64_max_s32));
code->maxsd(from, code->MConst(f64_min_s32));
code.minsd(from, code.MConst(f64_max_s32));
code.maxsd(from, code.MConst(f64_min_s32));
// Actually convert
code->cvtsd2si(to, from); // 32 bit gpr
code.cvtsd2si(to, from); // 32 bit gpr
// Bring back into original range
code->add(to, u32(2147483648u));
code.add(to, u32(2147483648u));
} else {
Xbyak::Xmm xmm_mask = ctx.reg_alloc.ScratchXmm();
Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32();
@ -526,22 +526,22 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) {
}
ZeroIfNaN64(code, from, xmm_scratch);
// Generate masks if out-of-signed-range
code->movaps(xmm_mask, code->MConst(f64_max_s32));
code->cmpltsd(xmm_mask, from);
code->movd(gpr_mask, xmm_mask);
code->pand(xmm_mask, code->MConst(f64_min_s32));
code->and_(gpr_mask, u32(2147483648u));
code.movaps(xmm_mask, code.MConst(f64_max_s32));
code.cmpltsd(xmm_mask, from);
code.movd(gpr_mask, xmm_mask);
code.pand(xmm_mask, code.MConst(f64_min_s32));
code.and_(gpr_mask, u32(2147483648u));
// Bring into range if necessary
code->addsd(from, xmm_mask);
code.addsd(from, xmm_mask);
// First time is to set flags
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
code.cvttsd2si(gpr_scratch, from); // 32 bit gpr
// Clamp to output range
code->minsd(from, code->MConst(f64_max_s32));
code->maxsd(from, code->MConst(f64_min_u32));
code.minsd(from, code.MConst(f64_max_s32));
code.maxsd(from, code.MConst(f64_min_u32));
// Actually convert
code->cvttsd2si(to, from); // 32 bit gpr
code.cvttsd2si(to, from); // 32 bit gpr
// Bring back into original range if necessary
code->add(to, gpr_mask);
code.add(to, gpr_mask);
}
ctx.reg_alloc.DefineValue(inst, to);
@ -554,7 +554,7 @@ void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
bool round_to_nearest = args[1].GetImmediateU1();
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
code->cvtsi2ss(to, from);
code.cvtsi2ss(to, from);
ctx.reg_alloc.DefineValue(inst, to);
}
@ -567,8 +567,8 @@ void EmitX64::EmitFPU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
code->cvtsi2ss(to, from);
code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
code.cvtsi2ss(to, from);
ctx.reg_alloc.DefineValue(inst, to);
}
@ -580,7 +580,7 @@ void EmitX64::EmitFPS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
bool round_to_nearest = args[1].GetImmediateU1();
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
code->cvtsi2sd(to, from);
code.cvtsi2sd(to, from);
ctx.reg_alloc.DefineValue(inst, to);
}
@ -593,8 +593,8 @@ void EmitX64::EmitFPU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
code->cvtsi2sd(to, from);
code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
code.cvtsi2sd(to, from);
ctx.reg_alloc.DefineValue(inst, to);
}

View file

@ -23,18 +23,18 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
code->paddb(xmm_a, xmm_b);
code.paddb(xmm_a, xmm_b);
if (ge_inst) {
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
code->pcmpeqb(ones, ones);
code.pcmpeqb(ones, ones);
code->movdqa(xmm_ge, xmm_a);
code->pminub(xmm_ge, xmm_b);
code->pcmpeqb(xmm_ge, xmm_b);
code->pxor(xmm_ge, ones);
code.movdqa(xmm_ge, xmm_a);
code.pminub(xmm_ge, xmm_b);
code.pcmpeqb(xmm_ge, xmm_b);
code.pxor(xmm_ge, ones);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
@ -54,18 +54,18 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code->pxor(xmm_ge, xmm_ge);
code->movdqa(saturated_sum, xmm_a);
code->paddsb(saturated_sum, xmm_b);
code->pcmpgtb(xmm_ge, saturated_sum);
code->pcmpeqb(saturated_sum, saturated_sum);
code->pxor(xmm_ge, saturated_sum);
code.pxor(xmm_ge, xmm_ge);
code.movdqa(saturated_sum, xmm_a);
code.paddsb(saturated_sum, xmm_b);
code.pcmpgtb(xmm_ge, saturated_sum);
code.pcmpeqb(saturated_sum, saturated_sum);
code.pxor(xmm_ge, saturated_sum);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
}
code->paddb(xmm_a, xmm_b);
code.paddb(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -77,19 +77,19 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
code->paddw(xmm_a, xmm_b);
code.paddw(xmm_a, xmm_b);
if (ge_inst) {
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
code->pcmpeqb(ones, ones);
code.pcmpeqb(ones, ones);
code->movdqa(xmm_ge, xmm_a);
code->pminuw(xmm_ge, xmm_b);
code->pcmpeqw(xmm_ge, xmm_b);
code->pxor(xmm_ge, ones);
code.movdqa(xmm_ge, xmm_a);
code.pminuw(xmm_ge, xmm_b);
code.pcmpeqw(xmm_ge, xmm_b);
code.pxor(xmm_ge, ones);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
@ -98,11 +98,11 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
// !(b <= a+b) == b > a+b
code->movdqa(tmp_a, xmm_a);
code->movdqa(tmp_b, xmm_b);
code->paddw(tmp_a, code->MConst(0x80008000));
code->paddw(tmp_b, code->MConst(0x80008000));
code->pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
code.movdqa(tmp_a, xmm_a);
code.movdqa(tmp_b, xmm_b);
code.paddw(tmp_a, code.MConst(0x80008000));
code.paddw(tmp_b, code.MConst(0x80008000));
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
ctx.EraseInstruction(ge_inst);
@ -123,18 +123,18 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code->pxor(xmm_ge, xmm_ge);
code->movdqa(saturated_sum, xmm_a);
code->paddsw(saturated_sum, xmm_b);
code->pcmpgtw(xmm_ge, saturated_sum);
code->pcmpeqw(saturated_sum, saturated_sum);
code->pxor(xmm_ge, saturated_sum);
code.pxor(xmm_ge, xmm_ge);
code.movdqa(saturated_sum, xmm_a);
code.paddsw(saturated_sum, xmm_b);
code.pcmpgtw(xmm_ge, saturated_sum);
code.pcmpeqw(saturated_sum, saturated_sum);
code.pxor(xmm_ge, saturated_sum);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
}
code->paddw(xmm_a, xmm_b);
code.paddw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -149,15 +149,15 @@ void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
if (ge_inst) {
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code->movdqa(xmm_ge, xmm_a);
code->pmaxub(xmm_ge, xmm_b);
code->pcmpeqb(xmm_ge, xmm_a);
code.movdqa(xmm_ge, xmm_a);
code.pmaxub(xmm_ge, xmm_b);
code.pcmpeqb(xmm_ge, xmm_a);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
}
code->psubb(xmm_a, xmm_b);
code.psubb(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -173,18 +173,18 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code->pxor(xmm_ge, xmm_ge);
code->movdqa(saturated_sum, xmm_a);
code->psubsb(saturated_sum, xmm_b);
code->pcmpgtb(xmm_ge, saturated_sum);
code->pcmpeqb(saturated_sum, saturated_sum);
code->pxor(xmm_ge, saturated_sum);
code.pxor(xmm_ge, xmm_ge);
code.movdqa(saturated_sum, xmm_a);
code.psubsb(saturated_sum, xmm_b);
code.pcmpgtb(xmm_ge, saturated_sum);
code.pcmpeqb(saturated_sum, saturated_sum);
code.pxor(xmm_ge, saturated_sum);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
}
code->psubb(xmm_a, xmm_b);
code.psubb(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -197,22 +197,22 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
code->psubw(xmm_a, xmm_b);
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
return;
}
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code->movdqa(xmm_ge, xmm_a);
code->pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
code->pcmpeqw(xmm_ge, xmm_a);
code.movdqa(xmm_ge, xmm_a);
code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
code.pcmpeqw(xmm_ge, xmm_a);
code->psubw(xmm_a, xmm_b);
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
@ -226,14 +226,14 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
// (a >= b) == !(b > a)
code->pcmpeqb(ones, ones);
code->paddw(xmm_a, code->MConst(0x80008000));
code->paddw(xmm_b, code->MConst(0x80008000));
code->movdqa(xmm_ge, xmm_b);
code->pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
code->pxor(xmm_ge, ones);
code.pcmpeqb(ones, ones);
code.paddw(xmm_a, code.MConst(0x80008000));
code.paddw(xmm_b, code.MConst(0x80008000));
code.movdqa(xmm_ge, xmm_b);
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
code.pxor(xmm_ge, ones);
code->psubw(xmm_a, xmm_b);
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
@ -251,18 +251,18 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code->pxor(xmm_ge, xmm_ge);
code->movdqa(saturated_diff, xmm_a);
code->psubsw(saturated_diff, xmm_b);
code->pcmpgtw(xmm_ge, saturated_diff);
code->pcmpeqw(saturated_diff, saturated_diff);
code->pxor(xmm_ge, saturated_diff);
code.pxor(xmm_ge, xmm_ge);
code.movdqa(saturated_diff, xmm_a);
code.psubsw(saturated_diff, xmm_b);
code.pcmpgtw(xmm_ge, saturated_diff);
code.pcmpeqw(saturated_diff, saturated_diff);
code.pxor(xmm_ge, saturated_diff);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
}
code->psubw(xmm_a, xmm_b);
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -280,11 +280,11 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
// Therefore,
// ~pavg(~a, ~b) == (a + b) >> 1
code->pcmpeqb(ones, ones);
code->pxor(xmm_a, ones);
code->pxor(xmm_b, ones);
code->pavgb(xmm_a, xmm_b);
code->pxor(xmm_a, ones);
code.pcmpeqb(ones, ones);
code.pxor(xmm_a, ones);
code.pxor(xmm_b, ones);
code.pavgb(xmm_a, xmm_b);
code.pxor(xmm_a, ones);
ctx.reg_alloc.DefineValue(inst, xmm_a);
} else {
@ -299,12 +299,12 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
// We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
code->mov(xor_a_b, reg_a);
code->and_(and_a_b, reg_b);
code->xor_(xor_a_b, reg_b);
code->shr(xor_a_b, 1);
code->and_(xor_a_b, 0x7F7F7F7F);
code->add(result, xor_a_b);
code.mov(xor_a_b, reg_a);
code.and_(and_a_b, reg_b);
code.xor_(xor_a_b, reg_b);
code.shr(xor_a_b, 1);
code.and_(xor_a_b, 0x7F7F7F7F);
code.add(result, xor_a_b);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -318,11 +318,11 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code->movdqa(tmp, xmm_a);
code->pand(xmm_a, xmm_b);
code->pxor(tmp, xmm_b);
code->psrlw(tmp, 1);
code->paddw(xmm_a, tmp);
code.movdqa(tmp, xmm_a);
code.pand(xmm_a, xmm_b);
code.pxor(tmp, xmm_b);
code.psrlw(tmp, 1);
code.paddw(xmm_a, tmp);
ctx.reg_alloc.DefineValue(inst, xmm_a);
} else {
@ -337,12 +337,12 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
// We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below.
code->mov(xor_a_b, reg_a);
code->and_(and_a_b, reg_b);
code->xor_(xor_a_b, reg_b);
code->shr(xor_a_b, 1);
code->and_(xor_a_b, 0x7FFF7FFF);
code->add(result, xor_a_b);
code.mov(xor_a_b, reg_a);
code.and_(and_a_b, reg_b);
code.xor_(xor_a_b, reg_b);
code.shr(xor_a_b, 1);
code.and_(xor_a_b, 0x7FFF7FFF);
code.add(result, xor_a_b);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -364,15 +364,15 @@ void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
// We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
// carry propagates the sign bit from (x^y)>>1 upwards by one.
code->mov(xor_a_b, reg_a);
code->and_(and_a_b, reg_b);
code->xor_(xor_a_b, reg_b);
code->mov(carry, xor_a_b);
code->and_(carry, 0x80808080);
code->shr(xor_a_b, 1);
code->and_(xor_a_b, 0x7F7F7F7F);
code->add(result, xor_a_b);
code->xor_(result, carry);
code.mov(xor_a_b, reg_a);
code.and_(and_a_b, reg_b);
code.xor_(xor_a_b, reg_b);
code.mov(carry, xor_a_b);
code.and_(carry, 0x80808080);
code.shr(xor_a_b, 1);
code.and_(xor_a_b, 0x7F7F7F7F);
code.add(result, xor_a_b);
code.xor_(result, carry);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -389,11 +389,11 @@ void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>>1).
// The arithmetic shift right makes this signed.
code->movdqa(tmp, xmm_a);
code->pand(xmm_a, xmm_b);
code->pxor(tmp, xmm_b);
code->psraw(tmp, 1);
code->paddw(xmm_a, tmp);
code.movdqa(tmp, xmm_a);
code.pand(xmm_a, xmm_b);
code.pxor(tmp, xmm_b);
code.psraw(tmp, 1);
code.paddw(xmm_a, tmp);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -408,9 +408,9 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
code->xor_(minuend, subtrahend);
code->and_(subtrahend, minuend);
code->shr(minuend, 1);
code.xor_(minuend, subtrahend);
code.and_(subtrahend, minuend);
code.shr(minuend, 1);
// At this point,
// minuend := (a^b) >> 1
@ -420,9 +420,9 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
// We can do this because minuend contains 7 bit fields.
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
// We invert this bit at the end as this tells us if that bit was borrowed from.
code->or_(minuend, 0x80808080);
code->sub(minuend, subtrahend);
code->xor_(minuend, 0x80808080);
code.or_(minuend, 0x80808080);
code.sub(minuend, subtrahend);
code.xor_(minuend, 0x80808080);
// minuend now contains the desired result.
ctx.reg_alloc.DefineValue(inst, minuend);
@ -440,11 +440,11 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
code->xor_(minuend, subtrahend);
code->and_(subtrahend, minuend);
code->mov(carry, minuend);
code->and_(carry, 0x80808080);
code->shr(minuend, 1);
code.xor_(minuend, subtrahend);
code.and_(subtrahend, minuend);
code.mov(carry, minuend);
code.and_(carry, 0x80808080);
code.shr(minuend, 1);
// At this point,
// minuend := (a^b) >> 1
@ -456,10 +456,10 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
// We invert this bit at the end as this tells us if that bit was borrowed from.
// We then sign extend the result into this bit.
code->or_(minuend, 0x80808080);
code->sub(minuend, subtrahend);
code->xor_(minuend, 0x80808080);
code->xor_(minuend, carry);
code.or_(minuend, 0x80808080);
code.sub(minuend, subtrahend);
code.xor_(minuend, 0x80808080);
code.xor_(minuend, carry);
ctx.reg_alloc.DefineValue(inst, minuend);
}
@ -474,15 +474,15 @@ void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
code->pxor(minuend, subtrahend);
code->pand(subtrahend, minuend);
code->psrlw(minuend, 1);
code.pxor(minuend, subtrahend);
code.pand(subtrahend, minuend);
code.psrlw(minuend, 1);
// At this point,
// minuend := (a^b) >> 1
// subtrahend := (a^b) & b
code->psubw(minuend, subtrahend);
code.psubw(minuend, subtrahend);
ctx.reg_alloc.DefineValue(inst, minuend);
}
@ -497,20 +497,20 @@ void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>>1) - ((x^y)&y).
code->pxor(minuend, subtrahend);
code->pand(subtrahend, minuend);
code->psraw(minuend, 1);
code.pxor(minuend, subtrahend);
code.pand(subtrahend, minuend);
code.psraw(minuend, 1);
// At this point,
// minuend := (a^b) >>> 1
// subtrahend := (a^b) & b
code->psubw(minuend, subtrahend);
code.psubw(minuend, subtrahend);
ctx.reg_alloc.DefineValue(inst, minuend);
}
void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
@ -521,25 +521,25 @@ void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool
Xbyak::Reg32 reg_sum, reg_diff;
if (is_signed) {
code->movsx(reg_a_lo, reg_a_hi.cvt16());
code->movsx(reg_b_lo, reg_b_hi.cvt16());
code->sar(reg_a_hi, 16);
code->sar(reg_b_hi, 16);
code.movsx(reg_a_lo, reg_a_hi.cvt16());
code.movsx(reg_b_lo, reg_b_hi.cvt16());
code.sar(reg_a_hi, 16);
code.sar(reg_b_hi, 16);
} else {
code->movzx(reg_a_lo, reg_a_hi.cvt16());
code->movzx(reg_b_lo, reg_b_hi.cvt16());
code->shr(reg_a_hi, 16);
code->shr(reg_b_hi, 16);
code.movzx(reg_a_lo, reg_a_hi.cvt16());
code.movzx(reg_b_lo, reg_b_hi.cvt16());
code.shr(reg_a_hi, 16);
code.shr(reg_b_hi, 16);
}
if (hi_is_sum) {
code->sub(reg_a_lo, reg_b_hi);
code->add(reg_a_hi, reg_b_lo);
code.sub(reg_a_lo, reg_b_hi);
code.add(reg_a_hi, reg_b_lo);
reg_diff = reg_a_lo;
reg_sum = reg_a_hi;
} else {
code->add(reg_a_lo, reg_b_hi);
code->sub(reg_a_hi, reg_b_lo);
code.add(reg_a_lo, reg_b_hi);
code.sub(reg_a_hi, reg_b_lo);
reg_diff = reg_a_hi;
reg_sum = reg_a_lo;
}
@ -549,36 +549,36 @@ void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool
Xbyak::Reg32 ge_sum = reg_b_hi;
Xbyak::Reg32 ge_diff = reg_b_lo;
code->mov(ge_sum, reg_sum);
code->mov(ge_diff, reg_diff);
code.mov(ge_sum, reg_sum);
code.mov(ge_diff, reg_diff);
if (!is_signed) {
code->shl(ge_sum, 15);
code->sar(ge_sum, 31);
code.shl(ge_sum, 15);
code.sar(ge_sum, 31);
} else {
code->not_(ge_sum);
code->sar(ge_sum, 31);
code.not_(ge_sum);
code.sar(ge_sum, 31);
}
code->not_(ge_diff);
code->sar(ge_diff, 31);
code->and_(ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF);
code->and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
code->or_(ge_sum, ge_diff);
code.not_(ge_diff);
code.sar(ge_diff, 31);
code.and_(ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF);
code.and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
code.or_(ge_sum, ge_diff);
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
ctx.EraseInstruction(ge_inst);
}
if (is_halving) {
code->shl(reg_a_lo, 15);
code->shr(reg_a_hi, 1);
code.shl(reg_a_lo, 15);
code.shr(reg_a_hi, 1);
} else {
code->shl(reg_a_lo, 16);
code.shl(reg_a_lo, 16);
}
// reg_a_lo now contains the low word and reg_a_hi now contains the high word.
// Merge them.
code->shld(reg_a_hi, reg_a_lo, 16);
code.shld(reg_a_hi, reg_a_lo, 16);
ctx.reg_alloc.DefineValue(inst, reg_a_hi);
}
@ -615,13 +615,13 @@ void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, true, true);
}
static void EmitPackedOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
static void EmitPackedOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
(code->*fn)(xmm_a, xmm_b);
(code.*fn)(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -672,19 +672,19 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]);
code->pand(from, ge);
code->pandn(ge, to);
code->por(from, ge);
code.pand(from, ge);
code.pandn(ge, to);
code.por(from, ge);
ctx.reg_alloc.DefineValue(inst, from);
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI1)) {
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI1)) {
Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32();
Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
code->and_(from, ge);
code->andn(to, ge, to);
code->or_(from, to);
code.and_(from, ge);
code.andn(to, ge, to);
code.or_(from, to);
ctx.reg_alloc.DefineValue(inst, from);
} else {
@ -692,10 +692,10 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
code->and_(from, ge);
code->not_(ge);
code->and_(ge, to);
code->or_(from, ge);
code.and_(from, ge);
code.not_(ge);
code.and_(ge, to);
code.or_(from, ge);
ctx.reg_alloc.DefineValue(inst, from);
}

View file

@ -26,15 +26,15 @@ void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Reg32 addend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
code->mov(overflow, result);
code->shr(overflow, 31);
code->add(overflow, 0x7FFFFFFF);
code.mov(overflow, result);
code.shr(overflow, 31);
code.add(overflow, 0x7FFFFFFF);
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
code->add(result, addend);
code->cmovo(result, overflow);
code.add(result, addend);
code.cmovo(result, overflow);
if (overflow_inst) {
code->seto(overflow.cvt8());
code.seto(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
@ -52,15 +52,15 @@ void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Reg32 subend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
code->mov(overflow, result);
code->shr(overflow, 31);
code->add(overflow, 0x7FFFFFFF);
code.mov(overflow, result);
code.shr(overflow, 31);
code.add(overflow, 0x7FFFFFFF);
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
code->sub(result, subend);
code->cmovo(result, overflow);
code.sub(result, subend);
code.cmovo(result, overflow);
if (overflow_inst) {
code->seto(overflow.cvt8());
code.seto(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
@ -83,14 +83,14 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
code->xor_(overflow, overflow);
code->cmp(reg_a, saturated_value);
code->mov(result, saturated_value);
code->cmovle(result, overflow);
code->cmovbe(result, reg_a);
code.xor_(overflow, overflow);
code.cmp(reg_a, saturated_value);
code.mov(result, saturated_value);
code.cmovle(result, overflow);
code.cmovbe(result, reg_a);
if (overflow_inst) {
code->seta(overflow.cvt8());
code.seta(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
@ -126,20 +126,20 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
code->lea(overflow, code->ptr[reg_a.cvt64() + negative_saturated_value]);
code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]);
// Put the appropriate saturated value in result
code->cmp(reg_a, positive_saturated_value);
code->mov(tmp, positive_saturated_value);
code->mov(result, sext_negative_satured_value);
code->cmovg(result, tmp);
code.cmp(reg_a, positive_saturated_value);
code.mov(tmp, positive_saturated_value);
code.mov(result, sext_negative_satured_value);
code.cmovg(result, tmp);
// Do the saturation
code->cmp(overflow, mask);
code->cmovbe(result, reg_a);
code.cmp(overflow, mask);
code.cmovbe(result, reg_a);
if (overflow_inst) {
code->seta(overflow.cvt8());
code.seta(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);

View file

@ -17,13 +17,13 @@ namespace Dynarmic::BackendX64 {
using namespace Xbyak::util;
template <typename Function>
static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, Function fn) {
static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
(code->*fn)(xmm_a, xmm_b);
(code.*fn)(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -35,15 +35,15 @@ void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
code->pextrb(dest, source, index);
code.pextrb(dest, source, index);
ctx.reg_alloc.DefineValue(inst, dest);
} else {
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
code->pextrw(dest, source, index / 2);
code.pextrw(dest, source, index / 2);
if (index % 2 == 1) {
code->shr(dest, 8);
code.shr(dest, 8);
}
ctx.reg_alloc.DefineValue(inst, dest);
}
@ -56,7 +56,7 @@ void EmitX64::EmitVectorGetElement16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
code->pextrw(dest, source, index);
code.pextrw(dest, source, index);
ctx.reg_alloc.DefineValue(inst, dest);
}
@ -69,14 +69,14 @@ void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) {
if (index == 0) {
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
code->movd(dest, source);
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
code.movd(dest, source);
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
code->pextrd(dest, source, index);
code.pextrd(dest, source, index);
} else {
Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]);
code->pshufd(source, source, index);
code->movd(dest, source);
code.pshufd(source, source, index);
code.movd(dest, source);
}
ctx.reg_alloc.DefineValue(inst, dest);
@ -91,14 +91,14 @@ void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) {
if (index == 0) {
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
code->movq(dest, source);
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
code.movq(dest, source);
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
code->pextrq(dest, source, 1);
code.pextrq(dest, source, 1);
} else {
Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]);
code->punpckhqdq(source, source);
code->movq(dest, source);
code.punpckhqdq(source, source);
code.movq(dest, source);
}
ctx.reg_alloc.DefineValue(inst, dest);
@ -109,11 +109,11 @@ void EmitX64::EmitVectorSetElement8(EmitContext& ctx, IR::Inst* inst) {
ASSERT(args[1].IsImmediate());
u8 index = args[1].GetImmediateU8();
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg8 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt8();
code->pinsrb(source_vector, source_elem.cvt32(), index);
code.pinsrb(source_vector, source_elem.cvt32(), index);
ctx.reg_alloc.DefineValue(inst, source_vector);
} else {
@ -121,17 +121,17 @@ void EmitX64::EmitVectorSetElement8(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
code->pextrw(tmp, source_vector, index / 2);
code.pextrw(tmp, source_vector, index / 2);
if (index % 2 == 0) {
code->and_(tmp, 0xFF00);
code->and_(source_elem, 0x00FF);
code->or_(tmp, source_elem);
code.and_(tmp, 0xFF00);
code.and_(source_elem, 0x00FF);
code.or_(tmp, source_elem);
} else {
code->and_(tmp, 0x00FF);
code->shl(source_elem, 8);
code->or_(tmp, source_elem);
code.and_(tmp, 0x00FF);
code.shl(source_elem, 8);
code.or_(tmp, source_elem);
}
code->pinsrw(source_vector, tmp, index / 2);
code.pinsrw(source_vector, tmp, index / 2);
ctx.reg_alloc.DefineValue(inst, source_vector);
}
@ -145,7 +145,7 @@ void EmitX64::EmitVectorSetElement16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg16 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt16();
code->pinsrw(source_vector, source_elem.cvt32(), index);
code.pinsrw(source_vector, source_elem.cvt32(), index);
ctx.reg_alloc.DefineValue(inst, source_vector);
}
@ -155,20 +155,20 @@ void EmitX64::EmitVectorSetElement32(EmitContext& ctx, IR::Inst* inst) {
ASSERT(args[1].IsImmediate());
u8 index = args[1].GetImmediateU8();
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg32 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt32();
code->pinsrd(source_vector, source_elem, index);
code.pinsrd(source_vector, source_elem, index);
ctx.reg_alloc.DefineValue(inst, source_vector);
} else {
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
code->pinsrw(source_vector, source_elem, index * 2);
code->shr(source_elem, 16);
code->pinsrw(source_vector, source_elem, index * 2 + 1);
code.pinsrw(source_vector, source_elem, index * 2);
code.shr(source_elem, 16);
code.pinsrw(source_vector, source_elem, index * 2 + 1);
ctx.reg_alloc.DefineValue(inst, source_vector);
}
@ -179,24 +179,24 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) {
ASSERT(args[1].IsImmediate());
u8 index = args[1].GetImmediateU8();
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(args[2]);
code->pinsrq(source_vector, source_elem, index);
code.pinsrq(source_vector, source_elem, index);
ctx.reg_alloc.DefineValue(inst, source_vector);
} else {
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Reg64 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]);
code->pinsrw(source_vector, source_elem.cvt32(), index * 4);
code->shr(source_elem, 16);
code->pinsrw(source_vector, source_elem.cvt32(), index * 4 + 1);
code->shr(source_elem, 16);
code->pinsrw(source_vector, source_elem.cvt32(), index * 4 + 2);
code->shr(source_elem, 16);
code->pinsrw(source_vector, source_elem.cvt32(), index * 4 + 3);
code.pinsrw(source_vector, source_elem.cvt32(), index * 4);
code.shr(source_elem, 16);
code.pinsrw(source_vector, source_elem.cvt32(), index * 4 + 1);
code.shr(source_elem, 16);
code.pinsrw(source_vector, source_elem.cvt32(), index * 4 + 2);
code.shr(source_elem, 16);
code.pinsrw(source_vector, source_elem.cvt32(), index * 4 + 3);
ctx.reg_alloc.DefineValue(inst, source_vector);
}
@ -227,15 +227,15 @@ void EmitX64::EmitVectorLowerBroadcast8(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code->pxor(tmp, tmp);
code->pshufb(a, tmp);
code->movq(a, a);
code.pxor(tmp, tmp);
code.pshufb(a, tmp);
code.movq(a, a);
} else {
code->punpcklbw(a, a);
code->pshuflw(a, a, 0);
code.punpcklbw(a, a);
code.pshuflw(a, a, 0);
}
ctx.reg_alloc.DefineValue(inst, a);
@ -246,7 +246,7 @@ void EmitX64::EmitVectorLowerBroadcast16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
code->pshuflw(a, a, 0);
code.pshuflw(a, a, 0);
ctx.reg_alloc.DefineValue(inst, a);
}
@ -256,7 +256,7 @@ void EmitX64::EmitVectorLowerBroadcast32(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
code->pshuflw(a, a, 0b01000100);
code.pshuflw(a, a, 0b01000100);
ctx.reg_alloc.DefineValue(inst, a);
}
@ -266,15 +266,15 @@ void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code->pxor(tmp, tmp);
code->pshufb(a, tmp);
code.pxor(tmp, tmp);
code.pshufb(a, tmp);
} else {
code->punpcklbw(a, a);
code->pshuflw(a, a, 0);
code->punpcklqdq(a, a);
code.punpcklbw(a, a);
code.pshuflw(a, a, 0);
code.punpcklqdq(a, a);
}
ctx.reg_alloc.DefineValue(inst, a);
@ -285,8 +285,8 @@ void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
code->pshuflw(a, a, 0);
code->punpcklqdq(a, a);
code.pshuflw(a, a, 0);
code.punpcklqdq(a, a);
ctx.reg_alloc.DefineValue(inst, a);
}
@ -296,7 +296,7 @@ void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
code->pshufd(a, a, 0);
code.pshufd(a, a, 0);
ctx.reg_alloc.DefineValue(inst, a);
}
@ -306,7 +306,7 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
code->punpcklqdq(a, a);
code.punpcklqdq(a, a);
ctx.reg_alloc.DefineValue(inst, a);
}
@ -325,8 +325,8 @@ void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm();
code->pcmpeqw(xmm_b, xmm_b);
code->pxor(xmm_a, xmm_b);
code.pcmpeqw(xmm_b, xmm_b);
code.pxor(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -344,7 +344,7 @@ void EmitX64::EmitVectorEqual32(EmitContext& ctx, IR::Inst* inst) {
}
void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpeqq);
return;
}
@ -355,9 +355,9 @@ void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code->pcmpeqd(xmm_a, xmm_b);
code->pshufd(tmp, xmm_a, 0b10110001);
code->pand(xmm_a, tmp);
code.pcmpeqd(xmm_a, xmm_b);
code.pshufd(tmp, xmm_a, 0b10110001);
code.pand(xmm_a, tmp);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -365,14 +365,14 @@ void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code->pcmpeqq(xmm_a, xmm_b);
code->pshufd(tmp, xmm_a, 0b01001110);
code->pand(xmm_a, tmp);
code.pcmpeqq(xmm_a, xmm_b);
code.pshufd(tmp, xmm_a, 0b01001110);
code.pand(xmm_a, tmp);
ctx.reg_alloc.DefineValue(inst, xmm_a);
} else {
@ -380,11 +380,11 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code->pcmpeqd(xmm_a, xmm_b);
code->pshufd(tmp, xmm_a, 0b10110001);
code->pand(xmm_a, tmp);
code->pshufd(tmp, xmm_a, 0b01001110);
code->pand(xmm_a, tmp);
code.pcmpeqd(xmm_a, xmm_b);
code.pshufd(tmp, xmm_a, 0b10110001);
code.pand(xmm_a, tmp);
code.pshufd(tmp, xmm_a, 0b01001110);
code.pand(xmm_a, tmp);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -397,13 +397,13 @@ void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code->punpcklqdq(xmm_a, xmm_b);
code->movdqa(tmp, xmm_a);
code->psllw(xmm_a, 8);
code->paddw(xmm_a, tmp);
code->pxor(tmp, tmp);
code->psrlw(xmm_a, 8);
code->packuswb(xmm_a, tmp);
code.punpcklqdq(xmm_a, xmm_b);
code.movdqa(tmp, xmm_a);
code.psllw(xmm_a, 8);
code.paddw(xmm_a, tmp);
code.pxor(tmp, tmp);
code.psrlw(xmm_a, 8);
code.packuswb(xmm_a, tmp);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -415,17 +415,17 @@ void EmitX64::EmitVectorLowerPairedAdd16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code->punpcklqdq(xmm_a, xmm_b);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
code->pxor(tmp, tmp);
code->phaddw(xmm_a, tmp);
code.punpcklqdq(xmm_a, xmm_b);
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
code.pxor(tmp, tmp);
code.phaddw(xmm_a, tmp);
} else {
code->movdqa(tmp, xmm_a);
code->pslld(xmm_a, 16);
code->paddd(xmm_a, tmp);
code->pxor(tmp, tmp);
code->psrad(xmm_a, 16);
code->packssdw(xmm_a, tmp); // Note: packusdw is SSE4.1, hence the arithmetic shift above.
code.movdqa(tmp, xmm_a);
code.pslld(xmm_a, 16);
code.paddd(xmm_a, tmp);
code.pxor(tmp, tmp);
code.psrad(xmm_a, 16);
code.packssdw(xmm_a, tmp); // Note: packusdw is SSE4.1, hence the arithmetic shift above.
}
ctx.reg_alloc.DefineValue(inst, xmm_a);
@ -438,16 +438,16 @@ void EmitX64::EmitVectorLowerPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code->punpcklqdq(xmm_a, xmm_b);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
code->pxor(tmp, tmp);
code->phaddd(xmm_a, tmp);
code.punpcklqdq(xmm_a, xmm_b);
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
code.pxor(tmp, tmp);
code.phaddd(xmm_a, tmp);
} else {
code->movdqa(tmp, xmm_a);
code->psllq(xmm_a, 32);
code->paddq(xmm_a, tmp);
code->psrlq(xmm_a, 32);
code->pshufd(xmm_a, xmm_a, 0b11011000);
code.movdqa(tmp, xmm_a);
code.psllq(xmm_a, 32);
code.paddq(xmm_a, tmp);
code.psrlq(xmm_a, 32);
code.pshufd(xmm_a, xmm_a, 0b11011000);
}
ctx.reg_alloc.DefineValue(inst, xmm_a);
@ -461,15 +461,15 @@ void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm();
code->movdqa(c, a);
code->movdqa(d, b);
code->psllw(a, 8);
code->psllw(b, 8);
code->paddw(a, c);
code->paddw(b, d);
code->psrlw(a, 8);
code->psrlw(b, 8);
code->packuswb(a, b);
code.movdqa(c, a);
code.movdqa(d, b);
code.psllw(a, 8);
code.psllw(b, 8);
code.paddw(a, c);
code.paddw(b, d);
code.psrlw(a, 8);
code.psrlw(b, 8);
code.packuswb(a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
@ -477,11 +477,11 @@ void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
code->phaddw(a, b);
code.phaddw(a, b);
ctx.reg_alloc.DefineValue(inst, a);
} else {
@ -490,15 +490,15 @@ void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm();
code->movdqa(c, a);
code->movdqa(d, b);
code->pslld(a, 16);
code->pslld(b, 16);
code->paddd(a, c);
code->paddd(b, d);
code->psrad(a, 16);
code->psrad(b, 16);
code->packssdw(a, b);
code.movdqa(c, a);
code.movdqa(d, b);
code.pslld(a, 16);
code.pslld(b, 16);
code.paddd(a, c);
code.paddd(b, d);
code.psrad(a, 16);
code.psrad(b, 16);
code.packssdw(a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
@ -507,11 +507,11 @@ void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
code->phaddd(a, b);
code.phaddd(a, b);
ctx.reg_alloc.DefineValue(inst, a);
} else {
@ -520,13 +520,13 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm();
code->movdqa(c, a);
code->movdqa(d, b);
code->psllq(a, 32);
code->psllq(b, 32);
code->paddq(a, c);
code->paddq(b, d);
code->shufps(a, b, 0b11011101);
code.movdqa(c, a);
code.movdqa(d, b);
code.psllq(a, 32);
code.psllq(b, 32);
code.paddq(a, c);
code.paddq(b, d);
code.shufps(a, b, 0b11011101);
ctx.reg_alloc.DefineValue(inst, a);
}
@ -539,10 +539,10 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
code->movdqa(c, a);
code->punpcklqdq(a, b);
code->punpckhqdq(c, b);
code->paddq(a, c);
code.movdqa(c, a);
code.punpcklqdq(a, b);
code.punpckhqdq(c, b);
code.paddq(a, c);
ctx.reg_alloc.DefineValue(inst, a);
}
@ -552,7 +552,7 @@ void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
code->movq(a, a); // TODO: !IsLastUse
code.movq(a, a); // TODO: !IsLastUse
ctx.reg_alloc.DefineValue(inst, a);
}

View file

@ -14,7 +14,7 @@ struct BlockOfCode::ExceptionHandler::Impl final {
BlockOfCode::ExceptionHandler::ExceptionHandler() = default;
BlockOfCode::ExceptionHandler::~ExceptionHandler() = default;
void BlockOfCode::ExceptionHandler::Register(BlockOfCode*) {
void BlockOfCode::ExceptionHandler::Register(BlockOfCode&) {
// Do nothing
}

View file

@ -173,11 +173,11 @@ private:
BlockOfCode::ExceptionHandler::ExceptionHandler() = default;
BlockOfCode::ExceptionHandler::~ExceptionHandler() = default;
void BlockOfCode::ExceptionHandler::Register(BlockOfCode* code) {
void BlockOfCode::ExceptionHandler::Register(BlockOfCode& code) {
const auto prolog_info = GetPrologueInformation();
code->align(16);
UNWIND_INFO* unwind_info = static_cast<UNWIND_INFO*>(code->AllocateFromCodeSpace(sizeof(UNWIND_INFO)));
code.align(16);
UNWIND_INFO* unwind_info = static_cast<UNWIND_INFO*>(code.AllocateFromCodeSpace(sizeof(UNWIND_INFO)));
unwind_info->Version = 1;
unwind_info->Flags = 0; // No special exception handling required.
unwind_info->SizeOfProlog = prolog_info.prolog_size;
@ -186,16 +186,16 @@ void BlockOfCode::ExceptionHandler::Register(BlockOfCode* code) {
unwind_info->FrameOffset = 0; // Unused because FrameRegister == 0
// UNWIND_INFO::UnwindCode field:
const size_t size_of_unwind_code = sizeof(UNWIND_CODE) * prolog_info.unwind_code.size();
UNWIND_CODE* unwind_code = static_cast<UNWIND_CODE*>(code->AllocateFromCodeSpace(size_of_unwind_code));
UNWIND_CODE* unwind_code = static_cast<UNWIND_CODE*>(code.AllocateFromCodeSpace(size_of_unwind_code));
memcpy(unwind_code, prolog_info.unwind_code.data(), size_of_unwind_code);
code->align(16);
RUNTIME_FUNCTION* rfuncs = static_cast<RUNTIME_FUNCTION*>(code->AllocateFromCodeSpace(sizeof(RUNTIME_FUNCTION)));
rfuncs->BeginAddress = static_cast<DWORD>(reinterpret_cast<u8*>(code->run_code) - code->getCode());
rfuncs->EndAddress = static_cast<DWORD>(code->maxSize_);
rfuncs->UnwindData = static_cast<DWORD>(reinterpret_cast<u8*>(unwind_info) - code->getCode());
code.align(16);
RUNTIME_FUNCTION* rfuncs = static_cast<RUNTIME_FUNCTION*>(code.AllocateFromCodeSpace(sizeof(RUNTIME_FUNCTION)));
rfuncs->BeginAddress = static_cast<DWORD>(reinterpret_cast<u8*>(code.run_code) - code.getCode());
rfuncs->EndAddress = static_cast<DWORD>(code.maxSize_);
rfuncs->UnwindData = static_cast<DWORD>(reinterpret_cast<u8*>(unwind_info) - code.getCode());
impl = std::make_unique<Impl>(rfuncs, code->getCode());
impl = std::make_unique<Impl>(rfuncs, code.getCode());
}
} // namespace BackendX64

View file

@ -370,10 +370,10 @@ void RegAlloc::HostCall(IR::Inst* result_def, boost::optional<Argument&> arg0, b
Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
switch (args[i]->GetType()) {
case IR::Type::U8:
code->movzx(reg.cvt32(), reg.cvt8());
code.movzx(reg.cvt32(), reg.cvt8());
break;
case IR::Type::U16:
code->movzx(reg.cvt32(), reg.cvt16());
code.movzx(reg.cvt32(), reg.cvt16());
break;
default:
break; // Nothing needs to be done
@ -459,9 +459,9 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
Xbyak::Reg64 reg = HostLocToReg64(host_loc);
u64 imm_value = ImmediateToU64(imm);
if (imm_value == 0)
code->xor_(reg.cvt32(), reg.cvt32());
code.xor_(reg.cvt32(), reg.cvt32());
else
code->mov(reg, imm_value);
code.mov(reg, imm_value);
return host_loc;
}
@ -469,9 +469,9 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
Xbyak::Xmm reg = HostLocToXmm(host_loc);
u64 imm_value = ImmediateToU64(imm);
if (imm_value == 0)
code->pxor(reg, reg);
code.pxor(reg, reg);
else
code->movdqa(reg, code->MConst(imm_value)); // TODO: movaps/movapd more appropriate sometimes
code.movdqa(reg, code.MConst(imm_value)); // TODO: movaps/movapd more appropriate sometimes
return host_loc;
}
@ -557,42 +557,42 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
code->movaps(HostLocToXmm(to), HostLocToXmm(from));
code.movaps(HostLocToXmm(to), HostLocToXmm(from));
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code->mov(HostLocToReg64(to), HostLocToReg64(from));
code.mov(HostLocToReg64(to), HostLocToReg64(from));
} else {
code->mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
code.mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
}
} else if (HostLocIsXMM(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code->movq(HostLocToXmm(to), HostLocToReg64(from));
code.movq(HostLocToXmm(to), HostLocToReg64(from));
} else {
code->movd(HostLocToXmm(to), HostLocToReg64(from).cvt32());
code.movd(HostLocToXmm(to), HostLocToReg64(from).cvt32());
}
} else if (HostLocIsGPR(to) && HostLocIsXMM(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code->movq(HostLocToReg64(to), HostLocToXmm(from));
code.movq(HostLocToReg64(to), HostLocToXmm(from));
} else {
code->movd(HostLocToReg64(to).cvt32(), HostLocToXmm(from));
code.movd(HostLocToReg64(to).cvt32(), HostLocToXmm(from));
}
} else if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
Xbyak::Address spill_addr = spill_to_addr(from);
ASSERT(spill_addr.getBit() >= bit_width);
switch (bit_width) {
case 128:
code->movaps(HostLocToXmm(to), spill_addr);
code.movaps(HostLocToXmm(to), spill_addr);
break;
case 64:
code->movsd(HostLocToXmm(to), spill_addr);
code.movsd(HostLocToXmm(to), spill_addr);
break;
case 32:
case 16:
case 8:
code->movss(HostLocToXmm(to), spill_addr);
code.movss(HostLocToXmm(to), spill_addr);
break;
default:
UNREACHABLE();
@ -602,15 +602,15 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
ASSERT(spill_addr.getBit() >= bit_width);
switch (bit_width) {
case 128:
code->movaps(spill_addr, HostLocToXmm(from));
code.movaps(spill_addr, HostLocToXmm(from));
break;
case 64:
code->movsd(spill_addr, HostLocToXmm(from));
code.movsd(spill_addr, HostLocToXmm(from));
break;
case 32:
case 16:
case 8:
code->movss(spill_addr, HostLocToXmm(from));
code.movss(spill_addr, HostLocToXmm(from));
break;
default:
UNREACHABLE();
@ -618,16 +618,16 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code->mov(HostLocToReg64(to), spill_to_addr(from));
code.mov(HostLocToReg64(to), spill_to_addr(from));
} else {
code->mov(HostLocToReg64(to).cvt32(), spill_to_addr(from));
code.mov(HostLocToReg64(to).cvt32(), spill_to_addr(from));
}
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code->mov(spill_to_addr(to), HostLocToReg64(from));
code.mov(spill_to_addr(to), HostLocToReg64(from));
} else {
code->mov(spill_to_addr(to), HostLocToReg64(from).cvt32());
code.mov(spill_to_addr(to), HostLocToReg64(from).cvt32());
}
} else {
ASSERT_MSG(false, "Invalid RegAlloc::EmitMove");
@ -636,7 +636,7 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
code->xchg(HostLocToReg64(a), HostLocToReg64(b));
code.xchg(HostLocToReg64(a), HostLocToReg64(b));
} else if (HostLocIsXMM(a) && HostLocIsXMM(b)) {
ASSERT_MSG(false, "Check your code: Exchanging XMM registers is unnecessary");
} else {

View file

@ -91,7 +91,7 @@ private:
class RegAlloc final {
public:
explicit RegAlloc(BlockOfCode* code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr)
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr)
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
std::array<Argument, 3> GetArgumentInfo(IR::Inst* inst);
@ -144,7 +144,7 @@ private:
HostLocInfo& LocInfo(HostLoc loc);
const HostLocInfo& LocInfo(HostLoc loc) const;
BlockOfCode* code = nullptr;
BlockOfCode& code;
std::function<Xbyak::Address(HostLoc)> spill_to_addr;
void EmitMove(HostLoc to, HostLoc from);
void EmitExchange(HostLoc a, HostLoc b);