Merge pull request #3662 from wwylele/shader-hash-cache

shader: avoid recomputing hash for the same program
This commit is contained in:
Weiyi Wang 2018-04-18 12:10:25 +03:00 committed by GitHub
commit 048b0fc0d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 62 additions and 24 deletions

View file

@ -451,6 +451,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset); LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset);
} else { } else {
g_state.gs.program_code[offset] = value; g_state.gs.program_code[offset] = value;
g_state.gs.MarkProgramCodeDirty();
offset++; offset++;
} }
break; break;
@ -469,6 +470,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset); LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset);
} else { } else {
g_state.gs.swizzle_data[offset] = value; g_state.gs.swizzle_data[offset] = value;
g_state.gs.MarkSwizzleDataDirty();
offset++; offset++;
} }
break; break;
@ -518,8 +520,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset); LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset);
} else { } else {
g_state.vs.program_code[offset] = value; g_state.vs.program_code[offset] = value;
g_state.vs.MarkProgramCodeDirty();
if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
g_state.gs.program_code[offset] = value; g_state.gs.program_code[offset] = value;
g_state.gs.MarkProgramCodeDirty();
} }
offset++; offset++;
} }
@ -539,8 +543,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset); LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset);
} else { } else {
g_state.vs.swizzle_data[offset] = value; g_state.vs.swizzle_data[offset] = value;
g_state.vs.MarkSwizzleDataDirty();
if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
g_state.gs.swizzle_data[offset] = value; g_state.gs.swizzle_data[offset] = value;
g_state.gs.MarkSwizzleDataDirty();
} }
offset++; offset++;
} }

View file

@ -12,6 +12,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/hash.h"
#include "common/vector_math.h" #include "common/vector_math.h"
#include "video_core/pica_types.h" #include "video_core/pica_types.h"
#include "video_core/regs_rasterizer.h" #include "video_core/regs_rasterizer.h"
@ -173,27 +174,29 @@ struct GSUnitState : public UnitState {
GSEmitter emitter; GSEmitter emitter;
}; };
struct ShaderSetup { struct Uniforms {
struct {
// The float uniforms are accessed by the shader JIT using SSE instructions, and are // The float uniforms are accessed by the shader JIT using SSE instructions, and are
// therefore required to be 16-byte aligned. // therefore required to be 16-byte aligned.
alignas(16) Math::Vec4<float24> f[96]; alignas(16) Math::Vec4<float24> f[96];
std::array<bool, 16> b; std::array<bool, 16> b;
std::array<Math::Vec4<u8>, 4> i; std::array<Math::Vec4<u8>, 4> i;
} uniforms;
static size_t GetFloatUniformOffset(unsigned index) { static size_t GetFloatUniformOffset(unsigned index) {
return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>); return offsetof(Uniforms, f) + index * sizeof(Math::Vec4<float24>);
} }
static size_t GetBoolUniformOffset(unsigned index) { static size_t GetBoolUniformOffset(unsigned index) {
return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); return offsetof(Uniforms, b) + index * sizeof(bool);
} }
static size_t GetIntUniformOffset(unsigned index) { static size_t GetIntUniformOffset(unsigned index) {
return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); return offsetof(Uniforms, i) + index * sizeof(Math::Vec4<u8>);
} }
};
struct ShaderSetup {
Uniforms uniforms;
std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code; std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code;
std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data; std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data;
@ -204,6 +207,36 @@ struct ShaderSetup {
/// Used by the JIT, points to a compiled shader object. /// Used by the JIT, points to a compiled shader object.
const void* cached_shader = nullptr; const void* cached_shader = nullptr;
} engine_data; } engine_data;
void MarkProgramCodeDirty() {
program_code_hash_dirty = true;
}
void MarkSwizzleDataDirty() {
swizzle_data_hash_dirty = true;
}
u64 GetProgramCodeHash() {
if (program_code_hash_dirty) {
program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
program_code_hash_dirty = false;
}
return program_code_hash;
}
u64 GetSwizzleDataHash() {
if (swizzle_data_hash_dirty) {
swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data));
swizzle_data_hash_dirty = false;
}
return swizzle_data_hash;
}
private:
bool program_code_hash_dirty = true;
bool swizzle_data_hash_dirty = true;
u64 program_code_hash = 0xDEADC0DE;
u64 swizzle_data_hash = 0xDEADC0DE;
}; };
class ShaderEngine { class ShaderEngine {

View file

@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "common/hash.h"
#include "common/microprofile.h" #include "common/microprofile.h"
#include "video_core/shader/shader.h" #include "video_core/shader/shader.h"
#include "video_core/shader/shader_jit_x64.h" #include "video_core/shader/shader_jit_x64.h"
@ -18,8 +17,8 @@ void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
setup.engine_data.entry_point = entry_point; setup.engine_data.entry_point = entry_point;
u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code)); u64 code_hash = setup.GetProgramCodeHash();
u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data)); u64 swizzle_hash = setup.GetSwizzleDataHash();
u64 cache_key = code_hash ^ swizzle_hash; u64 cache_key = code_hash ^ swizzle_hash;
auto iter = cache.find(cache_key); auto iter = cache.find(cache_key);

View file

@ -104,7 +104,7 @@ const JitFunction instr_table[64] = {
// purposes, as documented below: // purposes, as documented below:
/// Pointer to the uniform memory /// Pointer to the uniform memory
static const Reg64 SETUP = r9; static const Reg64 UNIFORMS = r9;
/// The two 32-bit VS address offset registers set by the MOVA instruction /// The two 32-bit VS address offset registers set by the MOVA instruction
static const Reg64 ADDROFFS_REG_0 = r10; static const Reg64 ADDROFFS_REG_0 = r10;
static const Reg64 ADDROFFS_REG_1 = r11; static const Reg64 ADDROFFS_REG_1 = r11;
@ -139,7 +139,7 @@ static const Xmm NEGBIT = xmm15;
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
static const BitSet32 persistent_regs = BuildRegSet({ static const BitSet32 persistent_regs = BuildRegSet({
// Pointers to register blocks // Pointers to register blocks
SETUP, UNIFORMS,
STATE, STATE,
// Cached registers // Cached registers
ADDROFFS_REG_0, ADDROFFS_REG_0,
@ -184,8 +184,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
size_t src_offset; size_t src_offset;
if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
src_ptr = SETUP; src_ptr = UNIFORMS;
src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex()); src_offset = Uniforms::GetFloatUniformOffset(src_reg.GetIndex());
} else { } else {
src_ptr = STATE; src_ptr = STATE;
src_offset = UnitState::InputOffset(src_reg); src_offset = UnitState::InputOffset(src_reg);
@ -354,8 +354,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
} }
void JitShader::Compile_UniformCondition(Instruction instr) { void JitShader::Compile_UniformCondition(Instruction instr) {
size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); size_t offset = Uniforms::GetBoolUniformOffset(instr.flow_control.bool_uniform_id);
cmp(byte[SETUP + offset], 0); cmp(byte[UNIFORMS + offset], 0);
} }
BitSet32 JitShader::PersistentCallerSavedRegs() { BitSet32 JitShader::PersistentCallerSavedRegs() {
@ -713,8 +713,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
// This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
// The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
// 4 bits) to be used as an offset into the 16-byte vector registers later // 4 bits) to be used as an offset into the 16-byte vector registers later
size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id); size_t offset = Uniforms::GetIntUniformOffset(instr.flow_control.int_uniform_id);
mov(LOOPCOUNT, dword[SETUP + offset]); mov(LOOPCOUNT, dword[UNIFORMS + offset]);
mov(LOOPCOUNT_REG, LOOPCOUNT); mov(LOOPCOUNT_REG, LOOPCOUNT);
shr(LOOPCOUNT_REG, 4); shr(LOOPCOUNT_REG, 4);
and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
@ -882,7 +882,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16); ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL); mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL);
mov(SETUP, ABI_PARAM1); mov(UNIFORMS, ABI_PARAM1);
mov(STATE, ABI_PARAM2); mov(STATE, ABI_PARAM2);
// Zero address/loop registers // Zero address/loop registers

View file

@ -34,7 +34,7 @@ public:
JitShader(); JitShader();
void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const { void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
program(&setup, &state, instruction_labels[offset].getAddress()); program(&setup.uniforms, &state, instruction_labels[offset].getAddress());
} }
void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code, void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code,