diff --git a/externals/xbyak b/externals/xbyak index 18c9caaa0..c306b8e57 160000 --- a/externals/xbyak +++ b/externals/xbyak @@ -1 +1 @@ -Subproject commit 18c9caaa0a3ed5706c39f5aa86cce0db6e65b174 +Subproject commit c306b8e5786eeeb87b8925a8af5c3bf057ff5a90 diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 5c0f9d7ff..0139db232 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -4,14 +4,14 @@ #pragma once +#include #include #include #include "common/assert.h" -#include "common/bit_set.h" namespace Common::X64 { -inline std::size_t RegToIndex(const Xbyak::Reg& reg) { +constexpr std::size_t RegToIndex(const Xbyak::Reg& reg) { using Kind = Xbyak::Reg::Kind; ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, "RegSet only support GPRs and XMM registers."); @@ -19,17 +19,17 @@ inline std::size_t RegToIndex(const Xbyak::Reg& reg) { return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); } -inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { +constexpr Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { ASSERT(reg_index < 16); return Xbyak::Reg64(static_cast(reg_index)); } -inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) { +constexpr Xbyak::Xmm IndexToXmm(std::size_t reg_index) { ASSERT(reg_index >= 16 && reg_index < 32); return Xbyak::Xmm(static_cast(reg_index - 16)); } -inline Xbyak::Reg IndexToReg(std::size_t reg_index) { +constexpr Xbyak::Reg IndexToReg(std::size_t reg_index) { if (reg_index < 16) { return IndexToReg64(reg_index); } else { @@ -37,27 +37,27 @@ inline Xbyak::Reg IndexToReg(std::size_t reg_index) { } } -inline BitSet32 BuildRegSet(std::initializer_list regs) { - BitSet32 bits; +inline std::bitset<32> BuildRegSet(std::initializer_list regs) { + std::bitset<32> bits; for (const Xbyak::Reg& reg : regs) { bits[RegToIndex(reg)] = true; } return bits; } -const BitSet32 ABI_ALL_GPRS(0x0000FFFF); -const BitSet32 ABI_ALL_XMMS(0xFFFF0000); +constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); +constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000); #ifdef _WIN32 // Microsoft x64 ABI -const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; -const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; -const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; -const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; -const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; +constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax; +constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; +constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; +constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; +constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; -const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ +const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ // GPRs Xbyak::util::rcx, Xbyak::util::rdx, @@ -74,7 +74,7 @@ const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ Xbyak::util::xmm5, }); -const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({ +const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ // GPRs Xbyak::util::rbx, Xbyak::util::rsi, @@ -102,13 +102,13 @@ constexpr std::size_t ABI_SHADOW_SPACE = 0x20; #else // System V x86-64 ABI -const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; -const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; -const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; -const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; -const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; +constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax; +constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; +constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; +constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; +constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; -const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ +const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ // GPRs Xbyak::util::rcx, Xbyak::util::rdx, @@ -137,7 +137,7 @@ const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ Xbyak::util::xmm15, }); -const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({ +const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ // GPRs Xbyak::util::rbx, Xbyak::util::rbp, @@ -156,12 +156,12 @@ struct ABIFrameInfo { s32 xmm_offset; }; -inline ABIFrameInfo ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment, +inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, std::size_t rsp_alignment, std::size_t needed_frame_size) { - int count = (regs & ABI_ALL_GPRS).Count(); + int count = (regs & ABI_ALL_GPRS).count(); rsp_alignment -= count * 8; std::size_t subtraction = 0; - int xmm_count = (regs & ABI_ALL_XMMS).Count(); + int xmm_count = (regs & ABI_ALL_XMMS).count(); if (xmm_count) { // If we have any XMMs to save, we must align the stack here. subtraction = rsp_alignment & 0xF; @@ -178,35 +178,41 @@ inline ABIFrameInfo ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignm static_cast(subtraction - xmm_base_subtraction)}; } -inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, +inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, std::size_t rsp_alignment, std::size_t needed_frame_size = 0) { auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); - for (int reg_index : (regs & ABI_ALL_GPRS)) { - code.push(IndexToReg64(reg_index)); + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_GPRS[i]) { + code.push(IndexToReg64(i)); + } } if (frame_info.subtraction != 0) { code.sub(code.rsp, frame_info.subtraction); } - for (int reg_index : (regs & ABI_ALL_XMMS)) { - code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(reg_index)); - frame_info.xmm_offset += 0x10; + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_XMMS[i]) { + code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); + frame_info.xmm_offset += 0x10; + } } return ABI_SHADOW_SPACE; } -inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, +inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, std::size_t rsp_alignment, std::size_t needed_frame_size = 0) { auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); - for (int reg_index : (regs & ABI_ALL_XMMS)) { - code.movaps(IndexToXmm(reg_index), code.xword[code.rsp + frame_info.xmm_offset]); - frame_info.xmm_offset += 0x10; + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_XMMS[i]) { + code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); + frame_info.xmm_offset += 0x10; + } } if (frame_info.subtraction != 0) { diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 5286c5905..606762788 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -102,40 +102,40 @@ const JitFunction instr_table[64] = { // purposes, as documented below: /// Pointer to the uniform memory -static const Reg64 UNIFORMS = r9; +constexpr Reg64 UNIFORMS = r9; /// The two 32-bit VS address offset registers set by the MOVA instruction -static const Reg64 ADDROFFS_REG_0 = r10; -static const Reg64 ADDROFFS_REG_1 = r11; +constexpr Reg64 ADDROFFS_REG_0 = r10; +constexpr Reg64 ADDROFFS_REG_1 = r11; /// VS loop count register (Multiplied by 16) -static const Reg32 LOOPCOUNT_REG = r12d; +constexpr Reg32 LOOPCOUNT_REG = r12d; /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) -static const Reg32 LOOPCOUNT = esi; +constexpr Reg32 LOOPCOUNT = esi; /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16) -static const Reg32 LOOPINC = edi; +constexpr Reg32 LOOPINC = edi; /// Result of the previous CMP instruction for the X-component comparison -static const Reg64 COND0 = r13; +constexpr Reg64 COND0 = r13; /// Result of the previous CMP instruction for the Y-component comparison -static const Reg64 COND1 = r14; +constexpr Reg64 COND1 = r14; /// Pointer to the UnitState instance for the current VS unit -static const Reg64 STATE = r15; +constexpr Reg64 STATE = r15; /// SIMD scratch register -static const Xmm SCRATCH = xmm0; +constexpr Xmm SCRATCH = xmm0; /// Loaded with the first swizzled source register, otherwise can be used as a scratch register -static const Xmm SRC1 = xmm1; +constexpr Xmm SRC1 = xmm1; /// Loaded with the second swizzled source register, otherwise can be used as a scratch register -static const Xmm SRC2 = xmm2; +constexpr Xmm SRC2 = xmm2; /// Loaded with the third swizzled source register, otherwise can be used as a scratch register -static const Xmm SRC3 = xmm3; +constexpr Xmm SRC3 = xmm3; /// Additional scratch register -static const Xmm SCRATCH2 = xmm4; +constexpr Xmm SCRATCH2 = xmm4; /// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one -static const Xmm ONE = xmm14; +constexpr Xmm ONE = xmm14; /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR -static const Xmm NEGBIT = xmm15; +constexpr Xmm NEGBIT = xmm15; // State registers that must not be modified by external functions calls // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed -static const BitSet32 persistent_regs = BuildRegSet({ +static const std::bitset<32> persistent_regs = BuildRegSet({ // Pointers to register blocks UNIFORMS, STATE, @@ -356,7 +356,7 @@ void JitShader::Compile_UniformCondition(Instruction instr) { cmp(byte[UNIFORMS + offset], 0); } -BitSet32 JitShader::PersistentCallerSavedRegs() { +std::bitset<32> JitShader::PersistentCallerSavedRegs() { return persistent_regs & ABI_ALL_CALLER_SAVED; } diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index bdf569632..507cd0ff3 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -91,7 +92,7 @@ private: */ void Compile_Return(); - BitSet32 PersistentCallerSavedRegs(); + std::bitset<32> PersistentCallerSavedRegs(); /** * Assertion evaluated at compile-time, but only triggered if executed at runtime.