diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 06c1fe653..6a27a8015 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -67,29 +67,29 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) // Setup input register table const auto& attribute_register_map = config.input_register_map; - if (num_attributes > 0) state.input_registers[attribute_register_map.attribute0_register] = input.attr[0]; - if (num_attributes > 1) state.input_registers[attribute_register_map.attribute1_register] = input.attr[1]; - if (num_attributes > 2) state.input_registers[attribute_register_map.attribute2_register] = input.attr[2]; - if (num_attributes > 3) state.input_registers[attribute_register_map.attribute3_register] = input.attr[3]; - if (num_attributes > 4) state.input_registers[attribute_register_map.attribute4_register] = input.attr[4]; - if (num_attributes > 5) state.input_registers[attribute_register_map.attribute5_register] = input.attr[5]; - if (num_attributes > 6) state.input_registers[attribute_register_map.attribute6_register] = input.attr[6]; - if (num_attributes > 7) state.input_registers[attribute_register_map.attribute7_register] = input.attr[7]; - if (num_attributes > 8) state.input_registers[attribute_register_map.attribute8_register] = input.attr[8]; - if (num_attributes > 9) state.input_registers[attribute_register_map.attribute9_register] = input.attr[9]; - if (num_attributes > 10) state.input_registers[attribute_register_map.attribute10_register] = input.attr[10]; - if (num_attributes > 11) state.input_registers[attribute_register_map.attribute11_register] = input.attr[11]; - if (num_attributes > 12) state.input_registers[attribute_register_map.attribute12_register] = input.attr[12]; - if (num_attributes > 13) state.input_registers[attribute_register_map.attribute13_register] = input.attr[13]; - if (num_attributes > 14) state.input_registers[attribute_register_map.attribute14_register] = input.attr[14]; - if (num_attributes > 15) state.input_registers[attribute_register_map.attribute15_register] = input.attr[15]; + if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; + if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; + if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; + if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3]; + if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4]; + if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5]; + if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6]; + if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7]; + if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8]; + if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9]; + if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10]; + if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11]; + if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12]; + if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13]; + if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14]; + if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15]; state.conditional_code[0] = false; state.conditional_code[1] = false; #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) - jit_shader(&state); + jit_shader(&state.registers); else RunInterpreter(state); #else @@ -117,7 +117,7 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) for (int comp = 0; comp < 4; ++comp) { float24* out = ((float24*)&ret) + semantics[comp]; if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { - *out = state.output_registers[i][comp]; + *out = state.registers.output[i][comp]; } else { // Zero output so that attributes which aren't output won't have denormals in them, // which would slow us down later. diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 5825e9983..2007a2844 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -79,11 +79,14 @@ static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has inva * here will make it easier for us to parallelize the shader processing later. */ struct UnitState { - // The registers are accessed by the shader JIT using SSE instructions, and are therefore - // required to be 16-byte aligned. - Math::Vec4 MEMORY_ALIGNED16(input_registers[16]); - Math::Vec4 MEMORY_ALIGNED16(output_registers[16]); - Math::Vec4 MEMORY_ALIGNED16(temporary_registers[16]); + struct Registers { + // The registers are accessed by the shader JIT using SSE instructions, and are therefore + // required to be 16-byte aligned. + Math::Vec4 MEMORY_ALIGNED16(input[16]); + Math::Vec4 MEMORY_ALIGNED16(output[16]); + Math::Vec4 MEMORY_ALIGNED16(temporary[16]); + } registers; + static_assert(std::is_pod::value, "Structure is not POD"); u32 program_counter; bool conditional_code[2]; @@ -116,10 +119,10 @@ struct UnitState { static int InputOffset(const SourceRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Input: - return (int)offsetof(UnitState, input_registers) + reg.GetIndex()*sizeof(Math::Vec4); + return (int)offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4); case RegisterType::Temporary: - return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4); + return (int)offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4); default: UNREACHABLE(); @@ -130,10 +133,10 @@ struct UnitState { static int OutputOffset(const DestRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Output: - return (int)offsetof(UnitState, output_registers) + reg.GetIndex()*sizeof(Math::Vec4); + return (int)offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4); case RegisterType::Temporary: - return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4); + return (int)offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4); default: UNREACHABLE(); diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index eb48e7053..c8489f920 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -62,10 +62,10 @@ void RunInterpreter(UnitState& state) { auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { switch (source_reg.GetRegisterType()) { case RegisterType::Input: - return &state.input_registers[source_reg.GetIndex()].x; + return &state.registers.input[source_reg.GetIndex()].x; case RegisterType::Temporary: - return &state.temporary_registers[source_reg.GetIndex()].x; + return &state.registers.temporary[source_reg.GetIndex()].x; case RegisterType::FloatUniform: return &uniforms.f[source_reg.GetIndex()].x; @@ -114,8 +114,8 @@ void RunInterpreter(UnitState& state) { src2[3] = src2[3] * float24::FromFloat32(-1); } - float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0] - : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0] + float24* dest = (instr.common.dest.Value() < 0x10) ? &state.registers.output[instr.common.dest.Value().GetIndex()][0] + : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] : dummy_vec4_float24; state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); @@ -355,8 +355,8 @@ void RunInterpreter(UnitState& state) { src3[3] = src3[3] * float24::FromFloat32(-1); } - float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0] - : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0] + float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0] + : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] : dummy_vec4_float24; for (int i = 0; i < 4; ++i) { diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index a7be433df..ce47774d5 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -106,7 +106,7 @@ static const X64Reg COND0 = R13; /// Result of the previous CMP instruction for the Y-component comparison static const X64Reg COND1 = R14; /// Pointer to the UnitState instance for the current VS unit -static const X64Reg STATE = R15; +static const X64Reg REGISTERS = R15; /// SIMD scratch register static const X64Reg SCRATCH = XMM0; /// Loaded with the first swizzled source register, otherwise can be used as a scratch register @@ -140,7 +140,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source src_ptr = UNIFORMS; src_offset = src_reg.GetIndex() * sizeof(float24) * 4; } else { - src_ptr = STATE; + src_ptr = REGISTERS; src_offset = UnitState::InputOffset(src_reg); } @@ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { // If all components are enabled, write the result to the destination register if (swiz.dest_mask == NO_DEST_REG_MASK) { // Store dest back to memory - MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), src); + MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src); } else { // Not all components are enabled, so mask the result when storing to the destination register... - MOVAPS(SCRATCH, MDisp(STATE, UnitState::OutputOffset(dest))); + MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest))); if (Common::GetCPUCaps().sse4_1) { u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); @@ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { } // Store dest back to memory - MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), SCRATCH); + MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH); } } @@ -635,7 +635,7 @@ CompiledShader* JitCompiler::Compile() { ABI_PushAllCalleeSavedRegsAndAdjustStack(); - MOV(PTRBITS, R(STATE), R(ABI_PARAM1)); + MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); // Zero address/loop registers diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 719a24210..b88f2a0d2 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -20,7 +20,7 @@ namespace Pica { namespace Shader { -using CompiledShader = void(void* state); +using CompiledShader = void(void* registers); /** * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64