constant_pool: Add frame parameter
This commit is contained in:
parent
bd2b415850
commit
1dfce0894d
9 changed files with 39 additions and 39 deletions
|
@ -189,8 +189,8 @@ void BlockOfCode::SwitchMxcsrOnExit() {
|
|||
ldmxcsr(dword[r15 + jsi.offsetof_save_host_MXCSR]);
|
||||
}
|
||||
|
||||
Xbyak::Address BlockOfCode::MConst(u64 lower, u64 upper) {
|
||||
return constant_pool.GetConstant(lower, upper);
|
||||
Xbyak::Address BlockOfCode::MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
|
||||
return constant_pool.GetConstant(frame, lower, upper);
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchToFarCode() {
|
||||
|
|
|
@ -70,7 +70,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
Xbyak::Address MConst(u64 lower, u64 upper = 0);
|
||||
Xbyak::Address MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
|
||||
|
||||
/// Far code sits far away from the near code. Execution remains primarily in near code.
|
||||
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
|
||||
|
|
|
@ -20,7 +20,7 @@ ConstantPool::ConstantPool(BlockOfCode& code, size_t size) : code(code), pool_si
|
|||
current_pool_ptr = pool_begin;
|
||||
}
|
||||
|
||||
Xbyak::Address ConstantPool::GetConstant(u64 lower, u64 upper) {
|
||||
Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
|
||||
const auto constant = std::make_tuple(lower, upper);
|
||||
auto iter = constant_info.find(constant);
|
||||
if (iter == constant_info.end()) {
|
||||
|
@ -30,7 +30,7 @@ Xbyak::Address ConstantPool::GetConstant(u64 lower, u64 upper) {
|
|||
iter = constant_info.emplace(constant, current_pool_ptr).first;
|
||||
current_pool_ptr += align_size;
|
||||
}
|
||||
return code.xword[code.rip + iter->second];
|
||||
return frame[code.rip + iter->second];
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
|
|
|
@ -24,7 +24,7 @@ class ConstantPool final {
|
|||
public:
|
||||
ConstantPool(BlockOfCode& code, size_t size);
|
||||
|
||||
Xbyak::Address GetConstant(u64 lower, u64 upper = 0);
|
||||
Xbyak::Address GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
|
||||
|
||||
private:
|
||||
static constexpr size_t align_size = 16; // bytes
|
||||
|
|
|
@ -53,9 +53,9 @@ static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::R
|
|||
static void DenormalsAreZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
||||
Xbyak::Label end;
|
||||
|
||||
auto mask = code.MConst(f64_non_sign_mask);
|
||||
auto mask = code.MConst(xword, f64_non_sign_mask);
|
||||
mask.setBit(64);
|
||||
auto penult_denormal = code.MConst(f64_penultimate_positive_denormal);
|
||||
auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal);
|
||||
penult_denormal.setBit(64);
|
||||
|
||||
code.movq(gpr_scratch, xmm_value);
|
||||
|
@ -84,9 +84,9 @@ static void FlushToZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32
|
|||
static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
||||
Xbyak::Label end;
|
||||
|
||||
auto mask = code.MConst(f64_non_sign_mask);
|
||||
auto mask = code.MConst(xword, f64_non_sign_mask);
|
||||
mask.setBit(64);
|
||||
auto penult_denormal = code.MConst(f64_penultimate_positive_denormal);
|
||||
auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal);
|
||||
penult_denormal.setBit(64);
|
||||
|
||||
code.movq(gpr_scratch, xmm_value);
|
||||
|
@ -142,7 +142,7 @@ static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) {
|
|||
Xbyak::Label end;
|
||||
code.ucomiss(xmm_value, xmm_value);
|
||||
code.jnp(end);
|
||||
code.movaps(xmm_value, code.MConst(f32_nan));
|
||||
code.movaps(xmm_value, code.MConst(xword, f32_nan));
|
||||
code.L(end);
|
||||
}
|
||||
|
||||
|
@ -181,7 +181,7 @@ static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) {
|
|||
Xbyak::Label end;
|
||||
code.ucomisd(xmm_value, xmm_value);
|
||||
code.jnp(end);
|
||||
code.movaps(xmm_value, code.MConst(f64_nan));
|
||||
code.movaps(xmm_value, code.MConst(xword, f64_nan));
|
||||
code.L(end);
|
||||
}
|
||||
|
||||
|
@ -193,7 +193,7 @@ static Xbyak::Label ProcessNaN32(BlockOfCode& code, Xbyak::Xmm a) {
|
|||
code.SwitchToFarCode();
|
||||
code.L(nan);
|
||||
|
||||
code.orps(a, code.MConst(0x00400000));
|
||||
code.orps(a, code.MConst(xword, 0x00400000));
|
||||
|
||||
code.jmp(end, code.T_NEAR);
|
||||
code.SwitchToNearCode();
|
||||
|
@ -208,7 +208,7 @@ static Xbyak::Label ProcessNaN64(BlockOfCode& code, Xbyak::Xmm a) {
|
|||
code.SwitchToFarCode();
|
||||
code.L(nan);
|
||||
|
||||
code.orps(a, code.MConst(0x0008'0000'0000'0000));
|
||||
code.orps(a, code.MConst(xword, 0x0008'0000'0000'0000));
|
||||
|
||||
code.jmp(end, code.T_NEAR);
|
||||
code.SwitchToNearCode();
|
||||
|
@ -355,7 +355,7 @@ void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) {
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code.pand(result, code.MConst(f32_non_sign_mask));
|
||||
code.pand(result, code.MConst(xword, f32_non_sign_mask));
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -364,7 +364,7 @@ void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code.pand(result, code.MConst(f64_non_sign_mask));
|
||||
code.pand(result, code.MConst(xword, f64_non_sign_mask));
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -373,7 +373,7 @@ void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) {
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code.pxor(result, code.MConst(f32_negative_zero));
|
||||
code.pxor(result, code.MConst(xword, f32_negative_zero));
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -382,7 +382,7 @@ void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code.pxor(result, code.MConst(f64_negative_zero));
|
||||
code.pxor(result, code.MConst(xword, f64_negative_zero));
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -612,8 +612,8 @@ void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
// Clamp to output range
|
||||
ZeroIfNaN64(code, from, xmm_scratch);
|
||||
code.minsd(from, code.MConst(f64_max_s32));
|
||||
code.maxsd(from, code.MConst(f64_min_s32));
|
||||
code.minsd(from, code.MConst(xword, f64_max_s32));
|
||||
code.maxsd(from, code.MConst(xword, f64_min_s32));
|
||||
// Second time is for real
|
||||
if (round_towards_zero) {
|
||||
code.cvttsd2si(to, from); // 32 bit gpr
|
||||
|
@ -644,8 +644,8 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) {
|
|||
code.cvtss2sd(from, from);
|
||||
// Clamp to output range
|
||||
ZeroIfNaN64(code, from, xmm_scratch);
|
||||
code.minsd(from, code.MConst(f64_max_u32));
|
||||
code.maxsd(from, code.MConst(f64_min_u32));
|
||||
code.minsd(from, code.MConst(xword, f64_max_u32));
|
||||
code.maxsd(from, code.MConst(xword, f64_min_u32));
|
||||
if (round_towards_zero) {
|
||||
code.cvttsd2si(to, from); // 64 bit gpr
|
||||
} else {
|
||||
|
@ -676,8 +676,8 @@ void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
// Clamp to output range
|
||||
ZeroIfNaN64(code, from, xmm_scratch);
|
||||
code.minsd(from, code.MConst(f64_max_s32));
|
||||
code.maxsd(from, code.MConst(f64_min_s32));
|
||||
code.minsd(from, code.MConst(xword, f64_max_s32));
|
||||
code.maxsd(from, code.MConst(xword, f64_min_s32));
|
||||
// Second time is for real
|
||||
if (round_towards_zero) {
|
||||
code.cvttsd2si(to, from); // 32 bit gpr
|
||||
|
@ -704,8 +704,8 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
// Clamp to output range
|
||||
ZeroIfNaN64(code, from, xmm_scratch);
|
||||
code.minsd(from, code.MConst(f64_max_u32));
|
||||
code.maxsd(from, code.MConst(f64_min_u32));
|
||||
code.minsd(from, code.MConst(xword, f64_max_u32));
|
||||
code.maxsd(from, code.MConst(xword, f64_min_u32));
|
||||
if (round_towards_zero) {
|
||||
code.cvttsd2si(to, from); // 64 bit gpr
|
||||
} else {
|
||||
|
|
|
@ -100,8 +100,8 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
|||
// !(b <= a+b) == b > a+b
|
||||
code.movdqa(tmp_a, xmm_a);
|
||||
code.movdqa(tmp_b, xmm_b);
|
||||
code.paddw(tmp_a, code.MConst(0x80008000));
|
||||
code.paddw(tmp_b, code.MConst(0x80008000));
|
||||
code.paddw(tmp_a, code.MConst(xword, 0x80008000));
|
||||
code.paddw(tmp_b, code.MConst(xword, 0x80008000));
|
||||
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
||||
|
@ -227,8 +227,8 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
// (a >= b) == !(b > a)
|
||||
code.pcmpeqb(ones, ones);
|
||||
code.paddw(xmm_a, code.MConst(0x80008000));
|
||||
code.paddw(xmm_b, code.MConst(0x80008000));
|
||||
code.paddw(xmm_a, code.MConst(xword, 0x80008000));
|
||||
code.paddw(xmm_b, code.MConst(xword, 0x80008000));
|
||||
code.movdqa(xmm_ge, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
||||
code.pxor(xmm_ge, ones);
|
||||
|
|
|
@ -327,7 +327,7 @@ void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst)
|
|||
|
||||
code.pxor(tmp2, tmp2);
|
||||
code.psrlq(result, shift_amount);
|
||||
code.movdqa(tmp1, code.MConst(sign_bit, sign_bit));
|
||||
code.movdqa(tmp1, code.MConst(xword, sign_bit, sign_bit));
|
||||
code.pand(tmp1, result);
|
||||
code.psubq(tmp2, tmp1);
|
||||
code.por(result, tmp2);
|
||||
|
@ -779,7 +779,7 @@ void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) {
|
|||
code.psrlw(tmp_a, 8);
|
||||
code.psrlw(tmp_b, 8);
|
||||
code.pmullw(tmp_a, tmp_b);
|
||||
code.pand(a, code.MConst(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
||||
code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
||||
code.psllw(tmp_a, 8);
|
||||
code.por(a, tmp_a);
|
||||
|
||||
|
@ -839,7 +839,7 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) {
|
|||
// TODO: AVX512F implementation
|
||||
|
||||
code.pxor(zeros, zeros);
|
||||
code.pand(a, code.MConst(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
||||
code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
||||
code.packuswb(a, zeros);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
|
@ -853,7 +853,7 @@ void EmitX64::EmitVectorNarrow32(EmitContext& ctx, IR::Inst* inst) {
|
|||
// TODO: AVX512F implementation
|
||||
|
||||
code.pxor(zeros, zeros);
|
||||
code.pand(a, code.MConst(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF));
|
||||
code.pand(a, code.MConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF));
|
||||
code.packusdw(a, zeros);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
|
@ -1056,11 +1056,11 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
code.movdqa(high_a, low_a);
|
||||
code.psrlw(high_a, 4);
|
||||
code.movdqa(tmp1, code.MConst(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F));
|
||||
code.movdqa(tmp1, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F));
|
||||
code.pand(high_a, tmp1); // High nibbles
|
||||
code.pand(low_a, tmp1); // Low nibbles
|
||||
|
||||
code.movdqa(tmp1, code.MConst(0x0302020102010100, 0x0403030203020201));
|
||||
code.movdqa(tmp1, code.MConst(xword, 0x0302020102010100, 0x0403030203020201));
|
||||
code.movdqa(tmp2, tmp1);
|
||||
code.pshufb(tmp1, low_a);
|
||||
code.pshufb(tmp2, high_a);
|
||||
|
|
|
@ -32,7 +32,7 @@ static void EmitVectorOperation32(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
code.cmpordps(nan_mask, nan_mask);
|
||||
code.andps(xmm_a, nan_mask);
|
||||
code.xorps(nan_mask, tmp);
|
||||
code.andps(nan_mask, code.MConst(0x7fc0'0000'7fc0'0000, 0x7fc0'0000'7fc0'0000));
|
||||
code.andps(nan_mask, code.MConst(xword, 0x7fc0'0000'7fc0'0000, 0x7fc0'0000'7fc0'0000));
|
||||
code.orps(xmm_a, nan_mask);
|
||||
}
|
||||
|
||||
|
@ -114,7 +114,7 @@ static void EmitVectorOperation64(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
code.cmpordpd(nan_mask, nan_mask);
|
||||
code.andps(xmm_a, nan_mask);
|
||||
code.xorps(nan_mask, tmp);
|
||||
code.andps(nan_mask, code.MConst(0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000));
|
||||
code.andps(nan_mask, code.MConst(xword, 0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000));
|
||||
code.orps(xmm_a, nan_mask);
|
||||
}
|
||||
|
||||
|
|
|
@ -470,7 +470,7 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
|
|||
if (imm_value == 0)
|
||||
code.pxor(reg, reg);
|
||||
else
|
||||
code.movdqa(reg, code.MConst(imm_value)); // TODO: movaps/movapd more appropriate sometimes
|
||||
code.movdqa(reg, code.MConst(code.xword, imm_value)); // TODO: movaps/movapd more appropriate sometimes
|
||||
return host_loc;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue