constant_pool: Implement a constant pool
This commit is contained in:
parent
b1b8acf16e
commit
d9c69ad997
6 changed files with 129 additions and 135 deletions
|
@ -75,6 +75,7 @@ if (ARCHITECTURE_x86_64)
|
||||||
list(APPEND SRCS
|
list(APPEND SRCS
|
||||||
backend_x64/abi.cpp
|
backend_x64/abi.cpp
|
||||||
backend_x64/block_of_code.cpp
|
backend_x64/block_of_code.cpp
|
||||||
|
backend_x64/constant_pool.cpp
|
||||||
backend_x64/emit_x64.cpp
|
backend_x64/emit_x64.cpp
|
||||||
backend_x64/hostloc.cpp
|
backend_x64/hostloc.cpp
|
||||||
backend_x64/interface_x64.cpp
|
backend_x64/interface_x64.cpp
|
||||||
|
@ -85,6 +86,7 @@ if (ARCHITECTURE_x86_64)
|
||||||
list(APPEND HEADERS
|
list(APPEND HEADERS
|
||||||
backend_x64/abi.h
|
backend_x64/abi.h
|
||||||
backend_x64/block_of_code.h
|
backend_x64/block_of_code.h
|
||||||
|
backend_x64/constant_pool.h
|
||||||
backend_x64/emit_x64.h
|
backend_x64/emit_x64.h
|
||||||
backend_x64/hostloc.h
|
backend_x64/hostloc.h
|
||||||
backend_x64/jitstate.h
|
backend_x64/jitstate.h
|
||||||
|
|
|
@ -18,8 +18,7 @@
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
namespace BackendX64 {
|
namespace BackendX64 {
|
||||||
|
|
||||||
BlockOfCode::BlockOfCode(UserCallbacks cb) : Xbyak::CodeGenerator(128 * 1024 * 1024), cb(cb) {
|
BlockOfCode::BlockOfCode(UserCallbacks cb) : Xbyak::CodeGenerator(128 * 1024 * 1024), cb(cb), constant_pool(this, 256) {
|
||||||
GenConstants();
|
|
||||||
GenRunCode();
|
GenRunCode();
|
||||||
GenReturnFromRunCode();
|
GenReturnFromRunCode();
|
||||||
GenMemoryAccessors();
|
GenMemoryAccessors();
|
||||||
|
@ -44,56 +43,6 @@ void BlockOfCode::ReturnFromRunCode(bool MXCSR_switch) {
|
||||||
jmp(MXCSR_switch ? return_from_run_code : return_from_run_code_without_mxcsr_switch);
|
jmp(MXCSR_switch ? return_from_run_code : return_from_run_code_without_mxcsr_switch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlockOfCode::GenConstants() {
|
|
||||||
align();
|
|
||||||
L(consts.FloatNegativeZero32);
|
|
||||||
dd(0x80000000u);
|
|
||||||
|
|
||||||
align();
|
|
||||||
L(consts.FloatNaN32);
|
|
||||||
dd(0x7fc00000u);
|
|
||||||
|
|
||||||
align();
|
|
||||||
L(consts.FloatNonSignMask32);
|
|
||||||
dq(0x7fffffffu);
|
|
||||||
|
|
||||||
align();
|
|
||||||
L(consts.FloatNegativeZero64);
|
|
||||||
dq(0x8000000000000000u);
|
|
||||||
|
|
||||||
align();
|
|
||||||
L(consts.FloatNaN64);
|
|
||||||
dq(0x7ff8000000000000u);
|
|
||||||
|
|
||||||
align();
|
|
||||||
L(consts.FloatNonSignMask64);
|
|
||||||
dq(0x7fffffffffffffffu);
|
|
||||||
|
|
||||||
align();
|
|
||||||
L(consts.FloatPenultimatePositiveDenormal64);
|
|
||||||
dq(0x000ffffffffffffeu);
|
|
||||||
|
|
||||||
align();
|
|
||||||
L(consts.FloatMinS32);
|
|
||||||
dq(0xc1e0000000000000u); // -2147483648 as a double
|
|
||||||
|
|
||||||
align();
|
|
||||||
L(consts.FloatMaxS32);
|
|
||||||
dq(0x41dfffffffc00000u); // 2147483647 as a double
|
|
||||||
|
|
||||||
align();
|
|
||||||
L(consts.FloatPositiveZero32);
|
|
||||||
L(consts.FloatPositiveZero64);
|
|
||||||
L(consts.FloatMinU32);
|
|
||||||
dq(0x0000000000000000u); // 0 as a double
|
|
||||||
|
|
||||||
align();
|
|
||||||
L(consts.FloatMaxU32);
|
|
||||||
dq(0x41efffffffe00000u); // 4294967295 as a double
|
|
||||||
|
|
||||||
align();
|
|
||||||
}
|
|
||||||
|
|
||||||
void BlockOfCode::GenRunCode() {
|
void BlockOfCode::GenRunCode() {
|
||||||
align();
|
align();
|
||||||
run_code = getCurr<RunCodeFuncType>();
|
run_code = getCurr<RunCodeFuncType>();
|
||||||
|
@ -188,6 +137,10 @@ void BlockOfCode::SwitchMxcsrOnExit() {
|
||||||
ldmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]);
|
ldmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Xbyak::Address BlockOfCode::MConst(u64 constant) {
|
||||||
|
return constant_pool.GetConstant(constant);
|
||||||
|
}
|
||||||
|
|
||||||
void BlockOfCode::nop(size_t size) {
|
void BlockOfCode::nop(size_t size) {
|
||||||
switch (size) {
|
switch (size) {
|
||||||
case 0:
|
case 0:
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include <xbyak.h>
|
#include <xbyak.h>
|
||||||
|
|
||||||
|
#include "backend_x64/constant_pool.h"
|
||||||
#include "backend_x64/jitstate.h"
|
#include "backend_x64/jitstate.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "dynarmic/callbacks.h"
|
#include "dynarmic/callbacks.h"
|
||||||
|
@ -52,45 +53,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Xbyak::Address MFloatPositiveZero32() {
|
Xbyak::Address MConst(u64 constant);
|
||||||
return xword[rip + consts.FloatPositiveZero32];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatNegativeZero32() {
|
|
||||||
return xword[rip + consts.FloatNegativeZero32];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatNaN32() {
|
|
||||||
return xword[rip + consts.FloatNaN32];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatNonSignMask32() {
|
|
||||||
return xword[rip + consts.FloatNonSignMask32];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatPositiveZero64() {
|
|
||||||
return xword[rip + consts.FloatPositiveZero64];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatNegativeZero64() {
|
|
||||||
return xword[rip + consts.FloatNegativeZero64];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatNaN64() {
|
|
||||||
return xword[rip + consts.FloatNaN64];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatNonSignMask64() {
|
|
||||||
return xword[rip + consts.FloatNonSignMask64];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatPenultimatePositiveDenormal64() {
|
|
||||||
return xword[rip + consts.FloatPenultimatePositiveDenormal64];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatMinS32() {
|
|
||||||
return xword[rip + consts.FloatMinS32];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatMaxS32() {
|
|
||||||
return xword[rip + consts.FloatMaxS32];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatMinU32() {
|
|
||||||
return xword[rip + consts.FloatMinU32];
|
|
||||||
}
|
|
||||||
Xbyak::Address MFloatMaxU32() {
|
|
||||||
return xword[rip + consts.FloatMaxU32];
|
|
||||||
}
|
|
||||||
|
|
||||||
const void* GetReturnFromRunCodeAddress() const {
|
const void* GetReturnFromRunCodeAddress() const {
|
||||||
return return_from_run_code;
|
return return_from_run_code;
|
||||||
|
@ -155,22 +118,7 @@ private:
|
||||||
UserCallbacks cb;
|
UserCallbacks cb;
|
||||||
CodePtr user_code_begin;
|
CodePtr user_code_begin;
|
||||||
|
|
||||||
struct Consts {
|
ConstantPool constant_pool;
|
||||||
Xbyak::Label FloatPositiveZero32;
|
|
||||||
Xbyak::Label FloatNegativeZero32;
|
|
||||||
Xbyak::Label FloatNaN32;
|
|
||||||
Xbyak::Label FloatNonSignMask32;
|
|
||||||
Xbyak::Label FloatPositiveZero64;
|
|
||||||
Xbyak::Label FloatNegativeZero64;
|
|
||||||
Xbyak::Label FloatNaN64;
|
|
||||||
Xbyak::Label FloatNonSignMask64;
|
|
||||||
Xbyak::Label FloatPenultimatePositiveDenormal64;
|
|
||||||
Xbyak::Label FloatMinS32;
|
|
||||||
Xbyak::Label FloatMaxS32;
|
|
||||||
Xbyak::Label FloatMinU32;
|
|
||||||
Xbyak::Label FloatMaxU32;
|
|
||||||
} consts;
|
|
||||||
void GenConstants();
|
|
||||||
|
|
||||||
using RunCodeFuncType = void(*)(JitState*, CodePtr);
|
using RunCodeFuncType = void(*)(JitState*, CodePtr);
|
||||||
RunCodeFuncType run_code = nullptr;
|
RunCodeFuncType run_code = nullptr;
|
||||||
|
|
36
src/backend_x64/constant_pool.cpp
Normal file
36
src/backend_x64/constant_pool.cpp
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include "backend_x64/block_of_code.h"
|
||||||
|
#include "backend_x64/constant_pool.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
|
||||||
|
namespace Dynarmic {
|
||||||
|
namespace BackendX64 {
|
||||||
|
|
||||||
|
ConstantPool::ConstantPool(BlockOfCode* code, size_t size) : code(code), pool_size(size) {
|
||||||
|
code->int3();
|
||||||
|
code->align(align_size);
|
||||||
|
pool_begin = reinterpret_cast<u8*>(code->AllocateFromCodeSpace(size));
|
||||||
|
std::memset(pool_begin, 0, size);
|
||||||
|
current_pool_ptr = pool_begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Address ConstantPool::GetConstant(u64 constant) {
|
||||||
|
auto iter = constant_info.find(constant);
|
||||||
|
if (iter == constant_info.end()) {
|
||||||
|
ASSERT(static_cast<size_t>(current_pool_ptr - pool_begin) < pool_size);
|
||||||
|
std::memcpy(current_pool_ptr, &constant, sizeof(u64));
|
||||||
|
iter = constant_info.emplace(constant, current_pool_ptr).first;
|
||||||
|
current_pool_ptr += align_size;
|
||||||
|
}
|
||||||
|
return code->xword[code->rip + iter->second];
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace BackendX64
|
||||||
|
} // namespace Dynarmic
|
42
src/backend_x64/constant_pool.h
Normal file
42
src/backend_x64/constant_pool.h
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#include <xbyak.h>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Dynarmic {
|
||||||
|
namespace BackendX64 {
|
||||||
|
|
||||||
|
class BlockOfCode;
|
||||||
|
|
||||||
|
/// ConstantPool allocates a block of memory from BlockOfCode.
|
||||||
|
/// It places constants into this block of memory, returning the address
|
||||||
|
/// of the memory location where the constant is placed. If the constant
|
||||||
|
/// already exists, its memory location is reused.
|
||||||
|
class ConstantPool final {
|
||||||
|
public:
|
||||||
|
ConstantPool(BlockOfCode* code, size_t size);
|
||||||
|
|
||||||
|
Xbyak::Address GetConstant(u64 constant);
|
||||||
|
|
||||||
|
private:
|
||||||
|
constexpr static size_t align_size = 16; // bytes
|
||||||
|
|
||||||
|
std::map<u64, void*> constant_info;
|
||||||
|
|
||||||
|
BlockOfCode* code;
|
||||||
|
size_t pool_size;
|
||||||
|
u8* pool_begin;
|
||||||
|
u8* current_pool_ptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace BackendX64
|
||||||
|
} // namespace Dynarmic
|
|
@ -27,6 +27,19 @@
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
namespace BackendX64 {
|
namespace BackendX64 {
|
||||||
|
|
||||||
|
constexpr u64 f32_negative_zero = 0x80000000u;
|
||||||
|
constexpr u64 f32_nan = 0x7fc00000u;
|
||||||
|
constexpr u64 f32_non_sign_mask = 0x7fffffffu;
|
||||||
|
|
||||||
|
constexpr u64 f64_negative_zero = 0x8000000000000000u;
|
||||||
|
constexpr u64 f64_nan = 0x7ff8000000000000u;
|
||||||
|
constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
|
||||||
|
|
||||||
|
constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu;
|
||||||
|
constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
|
||||||
|
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
|
||||||
|
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
|
||||||
|
|
||||||
static Xbyak::Address MJitStateReg(Arm::Reg reg) {
|
static Xbyak::Address MJitStateReg(Arm::Reg reg) {
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
return dword[r15 + offsetof(JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
|
return dword[r15 + offsetof(JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
|
||||||
|
@ -2094,9 +2107,9 @@ static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
|
||||||
auto mask = code->MFloatNonSignMask64();
|
auto mask = code->MConst(f64_non_sign_mask);
|
||||||
mask.setBit(64);
|
mask.setBit(64);
|
||||||
auto penult_denormal = code->MFloatPenultimatePositiveDenormal64();
|
auto penult_denormal = code->MConst(f64_penultimate_positive_denormal);
|
||||||
penult_denormal.setBit(64);
|
penult_denormal.setBit(64);
|
||||||
|
|
||||||
code->movq(gpr_scratch, xmm_value);
|
code->movq(gpr_scratch, xmm_value);
|
||||||
|
@ -2127,9 +2140,9 @@ static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
|
||||||
auto mask = code->MFloatNonSignMask64();
|
auto mask = code->MConst(f64_non_sign_mask);
|
||||||
mask.setBit(64);
|
mask.setBit(64);
|
||||||
auto penult_denormal = code->MFloatPenultimatePositiveDenormal64();
|
auto penult_denormal = code->MConst(f64_penultimate_positive_denormal);
|
||||||
penult_denormal.setBit(64);
|
penult_denormal.setBit(64);
|
||||||
|
|
||||||
code->movq(gpr_scratch, xmm_value);
|
code->movq(gpr_scratch, xmm_value);
|
||||||
|
@ -2147,7 +2160,7 @@ static void DefaultNaN32(BlockOfCode* code, Xbyak::Xmm xmm_value) {
|
||||||
|
|
||||||
code->ucomiss(xmm_value, xmm_value);
|
code->ucomiss(xmm_value, xmm_value);
|
||||||
code->jnp(end);
|
code->jnp(end);
|
||||||
code->movaps(xmm_value, code->MFloatNaN32());
|
code->movaps(xmm_value, code->MConst(f32_nan));
|
||||||
code->L(end);
|
code->L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2156,7 +2169,7 @@ static void DefaultNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value) {
|
||||||
|
|
||||||
code->ucomisd(xmm_value, xmm_value);
|
code->ucomisd(xmm_value, xmm_value);
|
||||||
code->jnp(end);
|
code->jnp(end);
|
||||||
code->movaps(xmm_value, code->MFloatNaN64());
|
code->movaps(xmm_value, code->MConst(f64_nan));
|
||||||
code->L(end);
|
code->L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2288,7 +2301,7 @@ void EmitX64::EmitFPAbs32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
||||||
auto args = reg_alloc.GetArgumentInfo(inst);
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pand(result, code->MFloatNonSignMask32());
|
code->pand(result, code->MConst(f32_non_sign_mask));
|
||||||
|
|
||||||
reg_alloc.DefineValue(inst, result);
|
reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -2297,7 +2310,7 @@ void EmitX64::EmitFPAbs64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
||||||
auto args = reg_alloc.GetArgumentInfo(inst);
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pand(result, code->MFloatNonSignMask64());
|
code->pand(result, code->MConst(f64_non_sign_mask));
|
||||||
|
|
||||||
reg_alloc.DefineValue(inst, result);
|
reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -2306,7 +2319,7 @@ void EmitX64::EmitFPNeg32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
||||||
auto args = reg_alloc.GetArgumentInfo(inst);
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pxor(result, code->MFloatNegativeZero32());
|
code->pxor(result, code->MConst(f32_negative_zero));
|
||||||
|
|
||||||
reg_alloc.DefineValue(inst, result);
|
reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -2315,7 +2328,7 @@ void EmitX64::EmitFPNeg64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
||||||
auto args = reg_alloc.GetArgumentInfo(inst);
|
auto args = reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pxor(result, code->MFloatNegativeZero64());
|
code->pxor(result, code->MConst(f64_negative_zero));
|
||||||
|
|
||||||
reg_alloc.DefineValue(inst, result);
|
reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -2473,8 +2486,8 @@ void EmitX64::EmitFPSingleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst*
|
||||||
}
|
}
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
code->minsd(from, code->MFloatMaxS32());
|
code->minsd(from, code->MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MFloatMinS32());
|
code->maxsd(from, code->MConst(f64_min_s32));
|
||||||
// Second time is for real
|
// Second time is for real
|
||||||
if (round_towards_zero) {
|
if (round_towards_zero) {
|
||||||
code->cvttsd2si(to, from); // 32 bit gpr
|
code->cvttsd2si(to, from); // 32 bit gpr
|
||||||
|
@ -2506,12 +2519,12 @@ void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst*
|
||||||
code->cvtss2sd(from, from);
|
code->cvtss2sd(from, from);
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
// Bring into SSE range
|
// Bring into SSE range
|
||||||
code->addsd(from, code->MFloatMinS32());
|
code->addsd(from, code->MConst(f64_min_s32));
|
||||||
// First time is to set flags
|
// First time is to set flags
|
||||||
code->cvtsd2si(to, from); // 32 bit gpr
|
code->cvtsd2si(to, from); // 32 bit gpr
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
code->minsd(from, code->MFloatMaxS32());
|
code->minsd(from, code->MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MFloatMinS32());
|
code->maxsd(from, code->MConst(f64_min_s32));
|
||||||
// Actually convert
|
// Actually convert
|
||||||
code->cvtsd2si(to, from); // 32 bit gpr
|
code->cvtsd2si(to, from); // 32 bit gpr
|
||||||
// Bring back into original range
|
// Bring back into original range
|
||||||
|
@ -2526,18 +2539,18 @@ void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst*
|
||||||
code->cvtss2sd(from, from);
|
code->cvtss2sd(from, from);
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
// Generate masks if out-of-signed-range
|
// Generate masks if out-of-signed-range
|
||||||
code->movaps(xmm_mask, code->MFloatMaxS32());
|
code->movaps(xmm_mask, code->MConst(f64_max_s32));
|
||||||
code->cmpltsd(xmm_mask, from);
|
code->cmpltsd(xmm_mask, from);
|
||||||
code->movd(gpr_mask, xmm_mask);
|
code->movd(gpr_mask, xmm_mask);
|
||||||
code->pand(xmm_mask, code->MFloatMinS32());
|
code->pand(xmm_mask, code->MConst(f64_min_s32));
|
||||||
code->and_(gpr_mask, u32(2147483648u));
|
code->and_(gpr_mask, u32(2147483648u));
|
||||||
// Bring into range if necessary
|
// Bring into range if necessary
|
||||||
code->addsd(from, xmm_mask);
|
code->addsd(from, xmm_mask);
|
||||||
// First time is to set flags
|
// First time is to set flags
|
||||||
code->cvttsd2si(to, from); // 32 bit gpr
|
code->cvttsd2si(to, from); // 32 bit gpr
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
code->minsd(from, code->MFloatMaxS32());
|
code->minsd(from, code->MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MFloatMinU32());
|
code->maxsd(from, code->MConst(f64_min_u32));
|
||||||
// Actually convert
|
// Actually convert
|
||||||
code->cvttsd2si(to, from); // 32 bit gpr
|
code->cvttsd2si(to, from); // 32 bit gpr
|
||||||
// Bring back into original range if necessary
|
// Bring back into original range if necessary
|
||||||
|
@ -2568,8 +2581,8 @@ void EmitX64::EmitFPDoubleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst*
|
||||||
}
|
}
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
code->minsd(from, code->MFloatMaxS32());
|
code->minsd(from, code->MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MFloatMinS32());
|
code->maxsd(from, code->MConst(f64_min_s32));
|
||||||
// Second time is for real
|
// Second time is for real
|
||||||
if (round_towards_zero) {
|
if (round_towards_zero) {
|
||||||
code->cvttsd2si(to, from); // 32 bit gpr
|
code->cvttsd2si(to, from); // 32 bit gpr
|
||||||
|
@ -2598,12 +2611,12 @@ void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst*
|
||||||
}
|
}
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
// Bring into SSE range
|
// Bring into SSE range
|
||||||
code->addsd(from, code->MFloatMinS32());
|
code->addsd(from, code->MConst(f64_min_s32));
|
||||||
// First time is to set flags
|
// First time is to set flags
|
||||||
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
code->minsd(from, code->MFloatMaxS32());
|
code->minsd(from, code->MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MFloatMinS32());
|
code->maxsd(from, code->MConst(f64_min_s32));
|
||||||
// Actually convert
|
// Actually convert
|
||||||
code->cvtsd2si(to, from); // 32 bit gpr
|
code->cvtsd2si(to, from); // 32 bit gpr
|
||||||
// Bring back into original range
|
// Bring back into original range
|
||||||
|
@ -2617,18 +2630,18 @@ void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst*
|
||||||
}
|
}
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
// Generate masks if out-of-signed-range
|
// Generate masks if out-of-signed-range
|
||||||
code->movaps(xmm_mask, code->MFloatMaxS32());
|
code->movaps(xmm_mask, code->MConst(f64_max_s32));
|
||||||
code->cmpltsd(xmm_mask, from);
|
code->cmpltsd(xmm_mask, from);
|
||||||
code->movd(gpr_mask, xmm_mask);
|
code->movd(gpr_mask, xmm_mask);
|
||||||
code->pand(xmm_mask, code->MFloatMinS32());
|
code->pand(xmm_mask, code->MConst(f64_min_s32));
|
||||||
code->and_(gpr_mask, u32(2147483648u));
|
code->and_(gpr_mask, u32(2147483648u));
|
||||||
// Bring into range if necessary
|
// Bring into range if necessary
|
||||||
code->addsd(from, xmm_mask);
|
code->addsd(from, xmm_mask);
|
||||||
// First time is to set flags
|
// First time is to set flags
|
||||||
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
code->minsd(from, code->MFloatMaxS32());
|
code->minsd(from, code->MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MFloatMinU32());
|
code->maxsd(from, code->MConst(f64_min_u32));
|
||||||
// Actually convert
|
// Actually convert
|
||||||
code->cvttsd2si(to, from); // 32 bit gpr
|
code->cvttsd2si(to, from); // 32 bit gpr
|
||||||
// Bring back into original range if necessary
|
// Bring back into original range if necessary
|
||||||
|
|
Loading…
Reference in a new issue