Merge pull request #3662 from wwylele/shader-hash-cache

shader: avoid recomputing hash for the same program
This commit is contained in:
Weiyi Wang 2018-04-18 12:10:25 +03:00 committed by GitHub
commit 048b0fc0d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 62 additions and 24 deletions

View file

@ -451,6 +451,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset);
} else {
g_state.gs.program_code[offset] = value;
g_state.gs.MarkProgramCodeDirty();
offset++;
}
break;
@ -469,6 +470,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset);
} else {
g_state.gs.swizzle_data[offset] = value;
g_state.gs.MarkSwizzleDataDirty();
offset++;
}
break;
@ -518,8 +520,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset);
} else {
g_state.vs.program_code[offset] = value;
g_state.vs.MarkProgramCodeDirty();
if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
g_state.gs.program_code[offset] = value;
g_state.gs.MarkProgramCodeDirty();
}
offset++;
}
@ -539,8 +543,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset);
} else {
g_state.vs.swizzle_data[offset] = value;
g_state.vs.MarkSwizzleDataDirty();
if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
g_state.gs.swizzle_data[offset] = value;
g_state.gs.MarkSwizzleDataDirty();
}
offset++;
}

View file

@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/vector_math.h"
#include "video_core/pica_types.h"
#include "video_core/regs_rasterizer.h"
@ -173,27 +174,29 @@ struct GSUnitState : public UnitState {
GSEmitter emitter;
};
struct ShaderSetup {
struct {
struct Uniforms {
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
// therefore required to be 16-byte aligned.
alignas(16) Math::Vec4<float24> f[96];
std::array<bool, 16> b;
std::array<Math::Vec4<u8>, 4> i;
} uniforms;
static size_t GetFloatUniformOffset(unsigned index) {
return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>);
return offsetof(Uniforms, f) + index * sizeof(Math::Vec4<float24>);
}
static size_t GetBoolUniformOffset(unsigned index) {
return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool);
return offsetof(Uniforms, b) + index * sizeof(bool);
}
static size_t GetIntUniformOffset(unsigned index) {
return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>);
return offsetof(Uniforms, i) + index * sizeof(Math::Vec4<u8>);
}
};
struct ShaderSetup {
Uniforms uniforms;
std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code;
std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data;
@ -204,6 +207,36 @@ struct ShaderSetup {
/// Used by the JIT, points to a compiled shader object.
const void* cached_shader = nullptr;
} engine_data;
void MarkProgramCodeDirty() {
program_code_hash_dirty = true;
}
void MarkSwizzleDataDirty() {
swizzle_data_hash_dirty = true;
}
u64 GetProgramCodeHash() {
if (program_code_hash_dirty) {
program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
program_code_hash_dirty = false;
}
return program_code_hash;
}
u64 GetSwizzleDataHash() {
if (swizzle_data_hash_dirty) {
swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data));
swizzle_data_hash_dirty = false;
}
return swizzle_data_hash;
}
private:
bool program_code_hash_dirty = true;
bool swizzle_data_hash_dirty = true;
u64 program_code_hash = 0xDEADC0DE;
u64 swizzle_data_hash = 0xDEADC0DE;
};
class ShaderEngine {

View file

@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/hash.h"
#include "common/microprofile.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_jit_x64.h"
@ -18,8 +17,8 @@ void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
setup.engine_data.entry_point = entry_point;
u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code));
u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data));
u64 code_hash = setup.GetProgramCodeHash();
u64 swizzle_hash = setup.GetSwizzleDataHash();
u64 cache_key = code_hash ^ swizzle_hash;
auto iter = cache.find(cache_key);

View file

@ -104,7 +104,7 @@ const JitFunction instr_table[64] = {
// purposes, as documented below:
/// Pointer to the uniform memory
static const Reg64 SETUP = r9;
static const Reg64 UNIFORMS = r9;
/// The two 32-bit VS address offset registers set by the MOVA instruction
static const Reg64 ADDROFFS_REG_0 = r10;
static const Reg64 ADDROFFS_REG_1 = r11;
@ -139,7 +139,7 @@ static const Xmm NEGBIT = xmm15;
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
static const BitSet32 persistent_regs = BuildRegSet({
// Pointers to register blocks
SETUP,
UNIFORMS,
STATE,
// Cached registers
ADDROFFS_REG_0,
@ -184,8 +184,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
size_t src_offset;
if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
src_ptr = SETUP;
src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex());
src_ptr = UNIFORMS;
src_offset = Uniforms::GetFloatUniformOffset(src_reg.GetIndex());
} else {
src_ptr = STATE;
src_offset = UnitState::InputOffset(src_reg);
@ -354,8 +354,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
}
void JitShader::Compile_UniformCondition(Instruction instr) {
size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id);
cmp(byte[SETUP + offset], 0);
size_t offset = Uniforms::GetBoolUniformOffset(instr.flow_control.bool_uniform_id);
cmp(byte[UNIFORMS + offset], 0);
}
BitSet32 JitShader::PersistentCallerSavedRegs() {
@ -713,8 +713,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
// This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
// The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
// 4 bits) to be used as an offset into the 16-byte vector registers later
size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id);
mov(LOOPCOUNT, dword[SETUP + offset]);
size_t offset = Uniforms::GetIntUniformOffset(instr.flow_control.int_uniform_id);
mov(LOOPCOUNT, dword[UNIFORMS + offset]);
mov(LOOPCOUNT_REG, LOOPCOUNT);
shr(LOOPCOUNT_REG, 4);
and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
@ -882,7 +882,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL);
mov(SETUP, ABI_PARAM1);
mov(UNIFORMS, ABI_PARAM1);
mov(STATE, ABI_PARAM2);
// Zero address/loop registers

View file

@ -34,7 +34,7 @@ public:
JitShader();
void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
program(&setup, &state, instruction_labels[offset].getAddress());
program(&setup.uniforms, &state, instruction_labels[offset].getAddress());
}
void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code,