Merge pull request #6010 from SachinVin/gunman

shader_jit: Fixes for Gunman clive
This commit is contained in:
BreadFish64 2022-07-06 23:45:44 -05:00 committed by GitHub
commit 353aaaf665
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 107 additions and 31 deletions

View file

@ -7,28 +7,28 @@
#include <memory> #include <memory>
#include <catch2/catch.hpp> #include <catch2/catch.hpp>
#include <nihstro/inline_assembly.h> #include <nihstro/inline_assembly.h>
#include "video_core/shader/shader_interpreter.h"
#include "video_core/shader/shader_jit_x64_compiler.h" #include "video_core/shader/shader_jit_x64_compiler.h"
using float24 = Pica::float24; using float24 = Pica::float24;
using JitShader = Pica::Shader::JitShader; using JitShader = Pica::Shader::JitShader;
using ShaderInterpreter = Pica::Shader::InterpreterEngine;
using DestRegister = nihstro::DestRegister; using DestRegister = nihstro::DestRegister;
using OpCode = nihstro::OpCode; using OpCode = nihstro::OpCode;
using SourceRegister = nihstro::SourceRegister; using SourceRegister = nihstro::SourceRegister;
using Type = nihstro::InlineAsm::Type;
static std::unique_ptr<JitShader> CompileShader(std::initializer_list<nihstro::InlineAsm> code) { static std::unique_ptr<Pica::Shader::ShaderSetup> CompileShaderSetup(
std::initializer_list<nihstro::InlineAsm> code) {
const auto shbin = nihstro::InlineAsm::CompileToRawBinary(code); const auto shbin = nihstro::InlineAsm::CompileToRawBinary(code);
std::array<u32, Pica::Shader::MAX_PROGRAM_CODE_LENGTH> program_code{}; auto shader = std::make_unique<Pica::Shader::ShaderSetup>();
std::array<u32, Pica::Shader::MAX_SWIZZLE_DATA_LENGTH> swizzle_data{};
std::transform(shbin.program.begin(), shbin.program.end(), program_code.begin(), std::transform(shbin.program.begin(), shbin.program.end(), shader->program_code.begin(),
[](const auto& x) { return x.hex; }); [](const auto& x) { return x.hex; });
std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(), swizzle_data.begin(), std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(),
[](const auto& x) { return x.hex; }); shader->swizzle_data.begin(), [](const auto& x) { return x.hex; });
auto shader = std::make_unique<JitShader>();
shader->Compile(&program_code, &swizzle_data);
return shader; return shader;
} }
@ -36,19 +36,32 @@ static std::unique_ptr<JitShader> CompileShader(std::initializer_list<nihstro::I
class ShaderTest { class ShaderTest {
public: public:
explicit ShaderTest(std::initializer_list<nihstro::InlineAsm> code) explicit ShaderTest(std::initializer_list<nihstro::InlineAsm> code)
: shader(CompileShader(code)) {} : shader_setup(CompileShaderSetup(code)) {
shader_jit.Compile(&shader_setup->program_code, &shader_setup->swizzle_data);
}
float Run(float input) { float Run(float input) {
Pica::Shader::ShaderSetup shader_setup;
Pica::Shader::UnitState shader_unit; Pica::Shader::UnitState shader_unit;
RunJit(shader_unit, input);
shader_unit.registers.input[0].x = float24::FromFloat32(input);
shader->Run(shader_setup, shader_unit, 0);
return shader_unit.registers.output[0].x.ToFloat32(); return shader_unit.registers.output[0].x.ToFloat32();
} }
void RunJit(Pica::Shader::UnitState& shader_unit, float input) {
shader_unit.registers.input[0].x = float24::FromFloat32(input);
shader_unit.registers.temporary[0].x = float24::FromFloat32(0);
shader_jit.Run(*shader_setup, shader_unit, 0);
}
void RunInterpreter(Pica::Shader::UnitState& shader_unit, float input) {
shader_unit.registers.input[0].x = float24::FromFloat32(input);
shader_unit.registers.temporary[0].x = float24::FromFloat32(0);
shader_interpreter.Run(*shader_setup, shader_unit);
}
public: public:
std::unique_ptr<JitShader> shader; JitShader shader_jit;
ShaderInterpreter shader_interpreter;
std::unique_ptr<Pica::Shader::ShaderSetup> shader_setup;
}; };
TEST_CASE("LG2", "[video_core][shader][shader_jit]") { TEST_CASE("LG2", "[video_core][shader][shader_jit]") {
@ -89,3 +102,58 @@ TEST_CASE("EX2", "[video_core][shader][shader_jit]") {
REQUIRE(shader.Run(79.7262742773f) == Approx(1.e24f)); REQUIRE(shader.Run(79.7262742773f) == Approx(1.e24f));
REQUIRE(std::isinf(shader.Run(800.f))); REQUIRE(std::isinf(shader.Run(800.f)));
} }
TEST_CASE("Nested Loop", "[video_core][shader][shader_jit]") {
const auto sh_input = SourceRegister::MakeInput(0);
const auto sh_temp = SourceRegister::MakeTemporary(0);
const auto sh_output = DestRegister::MakeOutput(0);
auto shader_test = ShaderTest({
// clang-format off
{OpCode::Id::MOV, sh_temp, sh_input},
{OpCode::Id::LOOP, 0},
{OpCode::Id::LOOP, 1},
{OpCode::Id::ADD, sh_temp, sh_temp, sh_input},
{Type::EndLoop},
{Type::EndLoop},
{OpCode::Id::MOV, sh_output, sh_temp},
{OpCode::Id::END},
// clang-format on
});
{
shader_test.shader_setup->uniforms.i[0] = {4, 0, 1, 0};
shader_test.shader_setup->uniforms.i[1] = {4, 0, 1, 0};
Common::Vec4<u8> loop_parms{shader_test.shader_setup->uniforms.i[0]};
const int expected_aL = loop_parms[1] + ((loop_parms[0] + 1) * loop_parms[2]);
const float input = 1.0f;
const float expected_out = (((shader_test.shader_setup->uniforms.i[0][0] + 1) *
(shader_test.shader_setup->uniforms.i[1][0] + 1)) *
input) +
input;
Pica::Shader::UnitState shader_unit_jit;
shader_test.RunJit(shader_unit_jit, input);
REQUIRE(shader_unit_jit.address_registers[2] == expected_aL);
REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() == Approx(expected_out));
}
{
shader_test.shader_setup->uniforms.i[0] = {9, 0, 2, 0};
shader_test.shader_setup->uniforms.i[1] = {7, 0, 1, 0};
const Common::Vec4<u8> loop_parms{shader_test.shader_setup->uniforms.i[0]};
const int expected_aL = loop_parms[1] + ((loop_parms[0] + 1) * loop_parms[2]);
const float input = 1.0f;
const float expected_out = (((shader_test.shader_setup->uniforms.i[0][0] + 1) *
(shader_test.shader_setup->uniforms.i[1][0] + 1)) *
input) +
input;
Pica::Shader::UnitState shader_unit_jit;
shader_test.RunJit(shader_unit_jit, input);
REQUIRE(shader_unit_jit.address_registers[2] == expected_aL);
REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() == Approx(expected_out));
}
}

View file

@ -164,8 +164,10 @@ static void LogCritical(const char* msg) {
void JitShader::Compile_Assert(bool condition, const char* msg) { void JitShader::Compile_Assert(bool condition, const char* msg) {
if (!condition) { if (!condition) {
ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(ABI_PARAM1, reinterpret_cast<std::size_t>(msg)); mov(ABI_PARAM1, reinterpret_cast<std::size_t>(msg));
CallFarFunction(*this, LogCritical); CallFarFunction(*this, LogCritical);
ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
} }
} }
@ -595,11 +597,11 @@ void JitShader::Compile_END(Instruction instr) {
} }
void JitShader::Compile_BREAKC(Instruction instr) { void JitShader::Compile_BREAKC(Instruction instr) {
Compile_Assert(looping, "BREAKC must be inside a LOOP"); Compile_Assert(loop_depth, "BREAKC must be inside a LOOP");
if (looping) { if (loop_depth) {
Compile_EvaluateCondition(instr); Compile_EvaluateCondition(instr);
ASSERT(loop_break_label); ASSERT(!loop_break_labels.empty());
jnz(*loop_break_label); jnz(loop_break_labels.back(), T_NEAR);
} }
} }
@ -725,9 +727,11 @@ void JitShader::Compile_IF(Instruction instr) {
void JitShader::Compile_LOOP(Instruction instr) { void JitShader::Compile_LOOP(Instruction instr) {
Compile_Assert(instr.flow_control.dest_offset >= program_counter, Compile_Assert(instr.flow_control.dest_offset >= program_counter,
"Backwards loops not supported"); "Backwards loops not supported");
Compile_Assert(!looping, "Nested loops not supported"); Compile_Assert(loop_depth < 1, "Nested loops may not be supported");
if (loop_depth++) {
looping = true; const auto loop_save_regs = BuildRegSet({LOOPCOUNT_REG, LOOPINC, LOOPCOUNT});
ABI_PushRegistersAndAdjustStack(*this, loop_save_regs, 0);
}
// This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
// The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
@ -746,16 +750,20 @@ void JitShader::Compile_LOOP(Instruction instr) {
Label l_loop_start; Label l_loop_start;
L(l_loop_start); L(l_loop_start);
loop_break_label = Xbyak::Label(); loop_break_labels.emplace_back(Xbyak::Label());
Compile_Block(instr.flow_control.dest_offset + 1); Compile_Block(instr.flow_control.dest_offset + 1);
add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
sub(LOOPCOUNT, 1); // Increment loop count by 1 sub(LOOPCOUNT, 1); // Increment loop count by 1
jnz(l_loop_start); // Loop if not equal jnz(l_loop_start); // Loop if not equal
L(*loop_break_label);
loop_break_label.reset();
looping = false; L(loop_break_labels.back());
loop_break_labels.pop_back();
if (--loop_depth) {
const auto loop_save_regs = BuildRegSet({LOOPCOUNT_REG, LOOPINC, LOOPCOUNT});
ABI_PopRegistersAndAdjustStack(*this, loop_save_regs, 0);
}
} }
void JitShader::Compile_JMP(Instruction instr) { void JitShader::Compile_JMP(Instruction instr) {
@ -892,7 +900,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
// Reset flow control state // Reset flow control state
program = (CompiledShader*)getCurr(); program = (CompiledShader*)getCurr();
program_counter = 0; program_counter = 0;
looping = false; loop_depth = 0;
instruction_labels.fill(Xbyak::Label()); instruction_labels.fill(Xbyak::Label());
// Find all `CALL` instructions and identify return locations // Find all `CALL` instructions and identify return locations

View file

@ -120,15 +120,15 @@ private:
/// Mapping of Pica VS instructions to pointers in the emitted code /// Mapping of Pica VS instructions to pointers in the emitted code
std::array<Xbyak::Label, MAX_PROGRAM_CODE_LENGTH> instruction_labels; std::array<Xbyak::Label, MAX_PROGRAM_CODE_LENGTH> instruction_labels;
/// Label pointing to the end of the current LOOP block. Used by the BREAKC instruction to break /// Labels pointing to the end of each nested LOOP block. Used by the BREAKC instruction to
/// out of the loop. /// break out of a loop.
std::optional<Xbyak::Label> loop_break_label; std::vector<Xbyak::Label> loop_break_labels;
/// Offsets in code where a return needs to be inserted /// Offsets in code where a return needs to be inserted
std::vector<unsigned> return_offsets; std::vector<unsigned> return_offsets;
unsigned program_counter = 0; ///< Offset of the next instruction to decode unsigned program_counter = 0; ///< Offset of the next instruction to decode
bool looping = false; ///< True if compiling a loop, used to check for nested loops u8 loop_depth = 0; ///< Depth of the (nested) loops currently compiled
using CompiledShader = void(const void* setup, void* state, const u8* start_addr); using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
CompiledShader* program = nullptr; CompiledShader* program = nullptr;