video_core/shader: Refactor JIT-Engines into JitEngine type (#7210)

This commit is contained in:
Wunk 2023-11-26 15:15:36 -08:00 committed by GitHub
parent db7b929e47
commit 83b329f6e1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 80 additions and 165 deletions

View file

@ -37,7 +37,7 @@ static inline u64 ComputeStructHash64(const T& data) noexcept {
* Combines the seed parameter with the provided hash, producing a new unique hash
* Implementation from: http://boost.sourceforge.net/doc/html/boost/hash_combine.html
*/
inline u64 HashCombine(std::size_t seed, const u64 hash) {
inline u64 HashCombine(const u64 seed, const u64 hash) {
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
}

View file

@ -156,13 +156,11 @@ add_library(video_core STATIC
shader/shader.h
shader/shader_interpreter.cpp
shader/shader_interpreter.h
shader/shader_jit_a64.cpp
shader/shader_jit.cpp
shader/shader_jit.h
shader/shader_jit_a64_compiler.cpp
shader/shader_jit_a64.h
shader/shader_jit_a64_compiler.h
shader/shader_jit_x64.cpp
shader/shader_jit_x64_compiler.cpp
shader/shader_jit_x64.h
shader/shader_jit_x64_compiler.h
texture/etc1.cpp
texture/etc1.h

View file

@ -13,17 +13,15 @@
#include "video_core/regs_shader.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_interpreter.h"
#if CITRA_ARCH(x86_64)
#include "video_core/shader/shader_jit_x64.h"
#elif CITRA_ARCH(arm64)
#include "video_core/shader/shader_jit_a64.h"
#endif
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
#include "video_core/shader/shader_jit.h"
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
#include "video_core/video_core.h"
namespace Pica::Shader {
void OutputVertex::ValidateSemantics(const RasterizerRegs& regs) {
unsigned int num_attributes = regs.vs_output_total;
u32 num_attributes = regs.vs_output_total;
ASSERT(num_attributes <= 7);
for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) {
u32 output_register_map = regs.vs_output_attributes[attrib].raw;
@ -54,7 +52,7 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
static_assert(sizeof(std::array<f24, 24>) == sizeof(ret),
"Struct and array have different sizes.");
unsigned int num_attributes = regs.vs_output_total & 7;
u32 num_attributes = regs.vs_output_total & 7;
for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) {
const auto output_register_map = regs.vs_output_attributes[attrib];
vertex_slots_overflow[output_register_map.map_x] = input.attr[attrib][0];
@ -65,7 +63,7 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
// The hardware takes the absolute and saturates vertex colors like this, *before* doing
// interpolation
for (unsigned i = 0; i < 4; ++i) {
for (u32 i = 0; i < 4; ++i) {
float c = std::fabs(ret.color[i].ToFloat32());
ret.color[i] = f24::FromFloat32(c < 1.0f ? c : 1.0f);
}
@ -84,10 +82,10 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
}
void UnitState::LoadInput(const ShaderRegs& config, const AttributeBuffer& input) {
const unsigned max_attribute = config.max_input_attribute_index;
const u32 max_attribute = config.max_input_attribute_index;
for (unsigned attr = 0; attr <= max_attribute; ++attr) {
unsigned reg = config.GetRegisterForAttribute(attr);
for (u32 attr = 0; attr <= max_attribute; ++attr) {
u32 reg = config.GetRegisterForAttribute(attr);
registers.input[reg] = input.attr[attr];
}
}
@ -141,11 +139,9 @@ void GSUnitState::ConfigOutput(const ShaderRegs& config) {
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
#if CITRA_ARCH(x86_64)
static std::unique_ptr<JitX64Engine> jit_engine;
#elif CITRA_ARCH(arm64)
static std::unique_ptr<JitA64Engine> jit_engine;
#endif
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
static std::unique_ptr<JitEngine> jit_engine;
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
static InterpreterEngine interpreter_engine;
ShaderEngine* GetEngine() {
@ -153,7 +149,7 @@ ShaderEngine* GetEngine() {
// TODO(yuriks): Re-initialize on each change rather than being persistent
if (VideoCore::g_shader_jit_enabled) {
if (jit_engine == nullptr) {
jit_engine = std::make_unique<decltype(jit_engine)::element_type>();
jit_engine = std::make_unique<JitEngine>();
}
return jit_engine.get();
}
@ -164,7 +160,7 @@ ShaderEngine* GetEngine() {
void Shutdown() {
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
jit_engine = nullptr;
jit_engine.reset();
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
}

View file

@ -22,8 +22,8 @@
namespace Pica::Shader {
constexpr unsigned MAX_PROGRAM_CODE_LENGTH = 4096;
constexpr unsigned MAX_SWIZZLE_DATA_LENGTH = 4096;
constexpr u32 MAX_PROGRAM_CODE_LENGTH = 4096;
constexpr u32 MAX_SWIZZLE_DATA_LENGTH = 4096;
using ProgramCode = std::array<u32, MAX_PROGRAM_CODE_LENGTH>;
using SwizzleData = std::array<u32, MAX_SWIZZLE_DATA_LENGTH>;
@ -33,7 +33,7 @@ struct AttributeBuffer {
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const unsigned int file_version) {
void serialize(Archive& ar, const u32 file_version) {
ar& attr;
}
};
@ -62,7 +62,7 @@ struct OutputVertex {
private:
template <class Archive>
void serialize(Archive& ar, const unsigned int) {
void serialize(Archive& ar, const u32) {
ar& pos;
ar& quat;
ar& color;
@ -113,7 +113,7 @@ struct GSEmitter {
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const unsigned int file_version) {
void serialize(Archive& ar, const u32 file_version) {
ar& buffer;
ar& vertex_id;
ar& prim_emit;
@ -142,7 +142,7 @@ struct UnitState {
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const unsigned int file_version) {
void serialize(Archive& ar, const u32 file_version) {
ar& input;
ar& temporary;
ar& output;
@ -158,15 +158,15 @@ struct UnitState {
GSEmitter* emitter_ptr;
static std::size_t InputOffset(int register_index) {
static std::size_t InputOffset(s32 register_index) {
return offsetof(UnitState, registers.input) + register_index * sizeof(Common::Vec4<f24>);
}
static std::size_t OutputOffset(int register_index) {
static std::size_t OutputOffset(s32 register_index) {
return offsetof(UnitState, registers.output) + register_index * sizeof(Common::Vec4<f24>);
}
static std::size_t TemporaryOffset(int register_index) {
static std::size_t TemporaryOffset(s32 register_index) {
return offsetof(UnitState, registers.temporary) +
register_index * sizeof(Common::Vec4<f24>);
}
@ -184,7 +184,7 @@ struct UnitState {
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const unsigned int file_version) {
void serialize(Archive& ar, const u32 file_version) {
ar& registers;
ar& conditional_code;
ar& address_registers;
@ -207,7 +207,7 @@ struct GSUnitState : public UnitState {
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const unsigned int file_version) {
void serialize(Archive& ar, const u32 file_version) {
ar& boost::serialization::base_object<UnitState>(*this);
ar& emitter;
}
@ -221,22 +221,22 @@ struct Uniforms {
std::array<bool, 16> b;
std::array<Common::Vec4<u8>, 4> i;
static std::size_t GetFloatUniformOffset(unsigned index) {
static std::size_t GetFloatUniformOffset(u32 index) {
return offsetof(Uniforms, f) + index * sizeof(Common::Vec4<f24>);
}
static std::size_t GetBoolUniformOffset(unsigned index) {
static std::size_t GetBoolUniformOffset(u32 index) {
return offsetof(Uniforms, b) + index * sizeof(bool);
}
static std::size_t GetIntUniformOffset(unsigned index) {
static std::size_t GetIntUniformOffset(u32 index) {
return offsetof(Uniforms, i) + index * sizeof(Common::Vec4<u8>);
}
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const unsigned int file_version) {
void serialize(Archive& ar, const u32 file_version) {
ar& f;
ar& b;
ar& i;
@ -251,7 +251,7 @@ struct ShaderSetup {
/// Data private to ShaderEngines
struct EngineData {
unsigned int entry_point;
u32 entry_point;
/// Used by the JIT, points to a compiled shader object.
const void* cached_shader = nullptr;
} engine_data;
@ -288,7 +288,7 @@ private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const unsigned int file_version) {
void serialize(Archive& ar, const u32 file_version) {
ar& uniforms;
ar& program_code;
ar& swizzle_data;
@ -307,7 +307,7 @@ public:
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once
* per vertex, which would happen within the `Run` function).
*/
virtual void SetupBatch(ShaderSetup& setup, unsigned int entry_point) = 0;
virtual void SetupBatch(ShaderSetup& setup, u32 entry_point) = 0;
/**
* Runs the currently setup shader.

View file

@ -3,27 +3,32 @@
// Refer to the license.txt file included.
#include "common/arch.h"
#if CITRA_ARCH(x86_64)
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_jit_x64.h"
#include "video_core/shader/shader_jit.h"
#if CITRA_ARCH(arm64)
#include "video_core/shader/shader_jit_a64_compiler.h"
#endif
#if CITRA_ARCH(x86_64)
#include "video_core/shader/shader_jit_x64_compiler.h"
#endif
namespace Pica::Shader {
JitX64Engine::JitX64Engine() = default;
JitX64Engine::~JitX64Engine() = default;
JitEngine::JitEngine() = default;
JitEngine::~JitEngine() = default;
void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) {
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
setup.engine_data.entry_point = entry_point;
u64 code_hash = setup.GetProgramCodeHash();
u64 swizzle_hash = setup.GetSwizzleDataHash();
const u64 code_hash = setup.GetProgramCodeHash();
const u64 swizzle_hash = setup.GetSwizzleDataHash();
u64 cache_key = code_hash ^ swizzle_hash;
const u64 cache_key = Common::HashCombine(code_hash, swizzle_hash);
auto iter = cache.find(cache_key);
if (iter != cache.end()) {
setup.engine_data.cached_shader = iter->second.get();
@ -37,7 +42,7 @@ void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
MICROPROFILE_DECLARE(GPU_Shader);
void JitX64Engine::Run(const ShaderSetup& setup, UnitState& state) const {
void JitEngine::Run(const ShaderSetup& setup, UnitState& state) const {
ASSERT(setup.engine_data.cached_shader != nullptr);
MICROPROFILE_SCOPE(GPU_Shader);
@ -48,4 +53,4 @@ void JitX64Engine::Run(const ShaderSetup& setup, UnitState& state) const {
} // namespace Pica::Shader
#endif // CITRA_ARCH(x86_64)
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)

View file

@ -5,7 +5,7 @@
#pragma once
#include "common/arch.h"
#if CITRA_ARCH(x86_64)
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
#include <memory>
#include <unordered_map>
@ -16,12 +16,12 @@ namespace Pica::Shader {
class JitShader;
class JitX64Engine final : public ShaderEngine {
class JitEngine final : public ShaderEngine {
public:
JitX64Engine();
~JitX64Engine() override;
JitEngine();
~JitEngine() override;
void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override;
void SetupBatch(ShaderSetup& setup, u32 entry_point) override;
void Run(const ShaderSetup& setup, UnitState& state) const override;
private:
@ -30,4 +30,4 @@ private:
} // namespace Pica::Shader
#endif // CITRA_ARCH(x86_64)
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)

View file

@ -1,51 +0,0 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/arch.h"
#if CITRA_ARCH(arm64)
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_jit_a64.h"
#include "video_core/shader/shader_jit_a64_compiler.h"
namespace Pica::Shader {
JitA64Engine::JitA64Engine() = default;
JitA64Engine::~JitA64Engine() = default;
void JitA64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
setup.engine_data.entry_point = entry_point;
u64 code_hash = setup.GetProgramCodeHash();
u64 swizzle_hash = setup.GetSwizzleDataHash();
u64 cache_key = code_hash ^ swizzle_hash;
auto iter = cache.find(cache_key);
if (iter != cache.end()) {
setup.engine_data.cached_shader = iter->second.get();
} else {
auto shader = std::make_unique<JitShader>();
shader->Compile(&setup.program_code, &setup.swizzle_data);
setup.engine_data.cached_shader = shader.get();
cache.emplace_hint(iter, cache_key, std::move(shader));
}
}
MICROPROFILE_DECLARE(GPU_Shader);
void JitA64Engine::Run(const ShaderSetup& setup, UnitState& state) const {
ASSERT(setup.engine_data.cached_shader != nullptr);
MICROPROFILE_SCOPE(GPU_Shader);
const JitShader* shader = static_cast<const JitShader*>(setup.engine_data.cached_shader);
shader->Run(setup, state, setup.engine_data.entry_point);
}
} // namespace Pica::Shader
#endif // CITRA_ARCH(arm64)

View file

@ -1,33 +0,0 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/arch.h"
#if CITRA_ARCH(arm64)
#include <memory>
#include <unordered_map>
#include "common/common_types.h"
#include "video_core/shader/shader.h"
namespace Pica::Shader {
class JitShader;
class JitA64Engine final : public ShaderEngine {
public:
JitA64Engine();
~JitA64Engine() override;
void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override;
void Run(const ShaderSetup& setup, UnitState& state) const override;
private:
std::unordered_map<u64, std::unique_ptr<JitShader>> cache;
};
} // namespace Pica::Shader
#endif // CITRA_ARCH(arm64)

View file

@ -163,7 +163,7 @@ void JitShader::Compile_Assert(bool condition, const char* msg) {}
* @param src_reg SourceRegister object corresponding to the source register to load
* @param dest Destination QReg register to store the loaded, swizzled source register
*/
void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
void JitShader::Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegister src_reg,
QReg dest) {
XReg src_ptr = XZR;
std::size_t src_offset;
@ -855,7 +855,7 @@ void JitShader::Compile_SETE(Instruction instr) {
l(end);
}
void JitShader::Compile_Block(unsigned end) {
void JitShader::Compile_Block(u32 end) {
while (program_counter < end) {
Compile_NextInstr();
}
@ -957,7 +957,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
BR(ABI_PARAM3);
// Compile entire program
Compile_Block(static_cast<unsigned>(program_code->size()));
Compile_Block(static_cast<u32>(program_code->size()));
// Free memory that's no longer needed
program_code = nullptr;

View file

@ -37,7 +37,7 @@ class JitShader : private oaknut::CodeBlock, public oaknut::CodeGenerator {
public:
JitShader();
void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
void Run(const ShaderSetup& setup, UnitState& state, u32 offset) const {
program(&setup.uniforms, &state, instruction_labels[offset].ptr<const std::byte*>());
}
@ -75,10 +75,10 @@ public:
void Compile_SETE(Instruction instr);
private:
void Compile_Block(unsigned end);
void Compile_Block(u32 end);
void Compile_NextInstr();
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
void Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegister src_reg,
oaknut::QReg dest);
void Compile_DestEnable(Instruction instr, oaknut::QReg dest);
@ -129,9 +129,9 @@ private:
std::vector<oaknut::Label> loop_break_labels;
/// Offsets in code where a return needs to be inserted
std::vector<unsigned> return_offsets;
std::vector<u32> return_offsets;
unsigned program_counter = 0; ///< Offset of the next instruction to decode
u32 program_counter = 0; ///< Offset of the next instruction to decode
u8 loop_depth = 0; ///< Depth of the (nested) loops currently compiled
using CompiledShader = void(const void* setup, void* state, const std::byte* start_addr);

View file

@ -187,7 +187,7 @@ void JitShader::Compile_Assert(bool condition, const char* msg) {
* @param src_reg SourceRegister object corresponding to the source register to load
* @param dest Destination XMM register to store the loaded, swizzled source register
*/
void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
void JitShader::Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegister src_reg,
Xmm dest) {
Reg64 src_ptr;
std::size_t src_offset;
@ -213,13 +213,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
ASSERT_MSG(src_offset == static_cast<std::size_t>(src_offset_disp),
"Source register offset too large for int type");
unsigned operand_desc_id;
u32 operand_desc_id;
const bool is_inverted =
(0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
unsigned address_register_index;
unsigned offset_src;
u32 address_register_index;
u32 offset_src;
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
@ -254,7 +254,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
// First we add 128 to address_reg so the first comparison is turned to
// address_reg >= 0 && address_reg < 256 which can be performed with
// a single unsigned comparison (cmovb)
// a single u32 comparison (cmovb)
lea(eax, ptr[address_reg + 128]);
mov(ebx, src_reg.GetIndex());
mov(ecx, address_reg.cvt32());
@ -297,7 +297,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
void JitShader::Compile_DestEnable(Instruction instr, Xmm src) {
DestRegister dest;
unsigned operand_desc_id;
u32 operand_desc_id;
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
operand_desc_id = instr.mad.operand_desc_id;
@ -915,7 +915,7 @@ void JitShader::Compile_SETE(Instruction instr) {
L(end);
}
void JitShader::Compile_Block(unsigned end) {
void JitShader::Compile_Block(u32 end) {
while (program_counter < end) {
Compile_NextInstr();
}
@ -943,7 +943,7 @@ void JitShader::Compile_NextInstr() {
Instruction instr = {(*program_code)[program_counter++]};
OpCode::Id opcode = instr.opcode.Value();
auto instr_func = instr_table[static_cast<unsigned>(opcode)];
auto instr_func = instr_table[static_cast<u32>(opcode)];
if (instr_func) {
// JIT the instruction!
@ -1023,7 +1023,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
jmp(ABI_PARAM3);
// Compile entire program
Compile_Block(static_cast<unsigned>(program_code->size()));
Compile_Block(static_cast<u32>(program_code->size()));
// Free memory that's no longer needed
program_code = nullptr;

View file

@ -36,7 +36,7 @@ class JitShader : public Xbyak::CodeGenerator {
public:
JitShader();
void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
void Run(const ShaderSetup& setup, UnitState& state, u32 offset) const {
program(&setup.uniforms, &state, instruction_labels[offset].getAddress());
}
@ -74,10 +74,10 @@ public:
void Compile_SETE(Instruction instr);
private:
void Compile_Block(unsigned end);
void Compile_Block(u32 end);
void Compile_NextInstr();
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
void Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegister src_reg,
Xbyak::Xmm dest);
void Compile_DestEnable(Instruction instr, Xbyak::Xmm dest);
@ -128,9 +128,9 @@ private:
std::vector<Xbyak::Label> loop_break_labels;
/// Offsets in code where a return needs to be inserted
std::vector<unsigned> return_offsets;
std::vector<u32> return_offsets;
unsigned program_counter = 0; ///< Offset of the next instruction to decode
u32 program_counter = 0; ///< Offset of the next instruction to decode
u8 loop_depth = 0; ///< Depth of the (nested) loops currently compiled
using CompiledShader = void(const void* setup, void* state, const u8* start_addr);