Merge branch 'master' into quickstart-faq
This commit is contained in:
commit
0f4512291a
143 changed files with 6352 additions and 2236 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -13,6 +13,9 @@
|
||||||
[submodule "soundtouch"]
|
[submodule "soundtouch"]
|
||||||
path = externals/soundtouch
|
path = externals/soundtouch
|
||||||
url = https://github.com/citra-emu/ext-soundtouch.git
|
url = https://github.com/citra-emu/ext-soundtouch.git
|
||||||
|
[submodule "libressl"]
|
||||||
|
path = externals/libressl
|
||||||
|
url = https://github.com/citra-emu/ext-libressl-portable.git
|
||||||
[submodule "discord-rpc"]
|
[submodule "discord-rpc"]
|
||||||
path = externals/discord-rpc
|
path = externals/discord-rpc
|
||||||
url = https://github.com/discordapp/discord-rpc.git
|
url = https://github.com/discordapp/discord-rpc.git
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
cmake_minimum_required(VERSION 3.11)
|
cmake_minimum_required(VERSION 3.15)
|
||||||
|
|
||||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
|
||||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
|
||||||
|
@ -13,7 +13,7 @@ project(yuzu)
|
||||||
option(ENABLE_SDL2 "Enable the SDL2 frontend" ON)
|
option(ENABLE_SDL2 "Enable the SDL2 frontend" ON)
|
||||||
|
|
||||||
option(ENABLE_QT "Enable the Qt frontend" ON)
|
option(ENABLE_QT "Enable the Qt frontend" ON)
|
||||||
CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" OFF "ENABLE_QT;MSVC" OFF)
|
CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" ON "ENABLE_QT;MSVC" OFF)
|
||||||
|
|
||||||
option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
|
option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
|
||||||
|
|
||||||
|
@ -152,7 +152,6 @@ macro(yuzu_find_packages)
|
||||||
"Boost 1.71 boost/1.72.0"
|
"Boost 1.71 boost/1.72.0"
|
||||||
"Catch2 2.11 catch2/2.11.0"
|
"Catch2 2.11 catch2/2.11.0"
|
||||||
"fmt 6.2 fmt/6.2.0"
|
"fmt 6.2 fmt/6.2.0"
|
||||||
"OpenSSL 1.1 openssl/1.1.1f"
|
|
||||||
# can't use until https://github.com/bincrafters/community/issues/1173
|
# can't use until https://github.com/bincrafters/community/issues/1173
|
||||||
#"libzip 1.5 libzip/1.5.2@bincrafters/stable"
|
#"libzip 1.5 libzip/1.5.2@bincrafters/stable"
|
||||||
"lz4 1.8 lz4/1.9.2"
|
"lz4 1.8 lz4/1.9.2"
|
||||||
|
@ -312,15 +311,6 @@ elseif (TARGET Boost::boost)
|
||||||
add_library(boost ALIAS Boost::boost)
|
add_library(boost ALIAS Boost::boost)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT TARGET OpenSSL::SSL)
|
|
||||||
set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
|
|
||||||
add_library(OpenSSL::SSL ALIAS OpenSSL::OpenSSL)
|
|
||||||
endif()
|
|
||||||
if (NOT TARGET OpenSSL::Crypto)
|
|
||||||
set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
|
|
||||||
add_library(OpenSSL::Crypto ALIAS OpenSSL::OpenSSL)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (TARGET sdl2::sdl2)
|
if (TARGET sdl2::sdl2)
|
||||||
# imported from the conan generated sdl2Config.cmake
|
# imported from the conan generated sdl2Config.cmake
|
||||||
set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE)
|
set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE)
|
||||||
|
|
|
@ -51,6 +51,8 @@ endif()
|
||||||
# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
|
# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
|
||||||
set(VIDEO_CORE "${SRC_DIR}/src/video_core")
|
set(VIDEO_CORE "${SRC_DIR}/src/video_core")
|
||||||
set(HASH_FILES
|
set(HASH_FILES
|
||||||
|
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
|
||||||
|
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
|
||||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
|
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
|
||||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
|
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
|
||||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
|
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
|
||||||
|
|
4
dist/qt_themes/qdarkstyle/style.qss
vendored
4
dist/qt_themes/qdarkstyle/style.qss
vendored
|
@ -673,10 +673,6 @@ QTabWidget::pane {
|
||||||
border-bottom-left-radius: 2px;
|
border-bottom-left-radius: 2px;
|
||||||
}
|
}
|
||||||
|
|
||||||
QTabWidget::tab-bar {
|
|
||||||
overflow: visible;
|
|
||||||
}
|
|
||||||
|
|
||||||
QTabBar {
|
QTabBar {
|
||||||
qproperty-drawBase: 0;
|
qproperty-drawBase: 0;
|
||||||
border-radius: 3px;
|
border-radius: 3px;
|
||||||
|
|
80
dist/yuzu.manifest
vendored
80
dist/yuzu.manifest
vendored
|
@ -1,24 +1,58 @@
|
||||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
|
<assembly manifestVersion="1.0"
|
||||||
<trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
|
xmlns="urn:schemas-microsoft-com:asm.v1"
|
||||||
<security>
|
xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
|
||||||
<requestedPrivileges>
|
<asmv3:application>
|
||||||
<requestedExecutionLevel level="asInvoker" uiAccess="false"/>
|
<asmv3:windowsSettings>
|
||||||
</requestedPrivileges>
|
<!-- Windows 7/8/8.1/10 -->
|
||||||
</security>
|
<dpiAware
|
||||||
</trustInfo>
|
xmlns="http://schemas.microsoft.com/SMI/2005/WindowsSettings">
|
||||||
<application xmlns="urn:schemas-microsoft-com:asm.v3">
|
true/pm
|
||||||
<windowsSettings>
|
</dpiAware>
|
||||||
<dpiAware xmlns="http://schemas.microsoft.com/SMI/2005/WindowsSettings">True/PM</dpiAware>
|
<!-- Windows 10, version 1607 or later -->
|
||||||
<longPathAware xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">true</longPathAware>
|
<dpiAwareness
|
||||||
</windowsSettings>
|
xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
|
||||||
</application>
|
PerMonitorV2
|
||||||
<compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
|
</dpiAwareness>
|
||||||
<application>
|
<!-- Windows 10, version 1703 or later -->
|
||||||
<supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
|
<gdiScaling
|
||||||
<supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
|
xmlns="http://schemas.microsoft.com/SMI/2017/WindowsSettings">
|
||||||
<supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
|
true
|
||||||
<supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
|
</gdiScaling>
|
||||||
</application>
|
<ws2:longPathAware
|
||||||
</compatibility>
|
xmlns:ws3="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
|
||||||
</assembly>
|
true
|
||||||
|
</ws2:longPathAware>
|
||||||
|
</asmv3:windowsSettings>
|
||||||
|
</asmv3:application>
|
||||||
|
<compatibility
|
||||||
|
xmlns="urn:schemas-microsoft-com:compatibility.v1">
|
||||||
|
<application>
|
||||||
|
<!-- Windows 10 -->
|
||||||
|
<supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
|
||||||
|
<!-- Windows 8.1 -->
|
||||||
|
<supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
|
||||||
|
<!-- Windows 8 -->
|
||||||
|
<supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
|
||||||
|
<!-- Windows 7 -->
|
||||||
|
<supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
|
||||||
|
</application>
|
||||||
|
</compatibility>
|
||||||
|
<trustInfo
|
||||||
|
xmlns="urn:schemas-microsoft-com:asm.v3">
|
||||||
|
<security>
|
||||||
|
<requestedPrivileges>
|
||||||
|
<!--
|
||||||
|
UAC settings:
|
||||||
|
- app should run at same integrity level as calling process
|
||||||
|
- app does not need to manipulate windows belonging to
|
||||||
|
higher-integrity-level processes
|
||||||
|
-->
|
||||||
|
<requestedExecutionLevel
|
||||||
|
level="asInvoker"
|
||||||
|
uiAccess="false"
|
||||||
|
/>
|
||||||
|
</requestedPrivileges>
|
||||||
|
</security>
|
||||||
|
</trustInfo>
|
||||||
|
</assembly>
|
||||||
|
|
26
externals/CMakeLists.txt
vendored
26
externals/CMakeLists.txt
vendored
|
@ -4,6 +4,13 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
|
||||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules")
|
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules")
|
||||||
include(DownloadExternals)
|
include(DownloadExternals)
|
||||||
|
|
||||||
|
# xbyak
|
||||||
|
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
|
||||||
|
add_library(xbyak INTERFACE)
|
||||||
|
target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
|
||||||
|
target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Catch
|
# Catch
|
||||||
add_library(catch-single-include INTERFACE)
|
add_library(catch-single-include INTERFACE)
|
||||||
target_include_directories(catch-single-include INTERFACE catch/single_include)
|
target_include_directories(catch-single-include INTERFACE catch/single_include)
|
||||||
|
@ -66,6 +73,15 @@ if (NOT LIBZIP_FOUND)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (ENABLE_WEB_SERVICE)
|
if (ENABLE_WEB_SERVICE)
|
||||||
|
# LibreSSL
|
||||||
|
set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
|
||||||
|
add_subdirectory(libressl EXCLUDE_FROM_ALL)
|
||||||
|
target_include_directories(ssl INTERFACE ./libressl/include)
|
||||||
|
target_compile_definitions(ssl PRIVATE -DHAVE_INET_NTOP)
|
||||||
|
get_directory_property(OPENSSL_LIBRARIES
|
||||||
|
DIRECTORY libressl
|
||||||
|
DEFINITION OPENSSL_LIBS)
|
||||||
|
|
||||||
# lurlparser
|
# lurlparser
|
||||||
add_subdirectory(lurlparser EXCLUDE_FROM_ALL)
|
add_subdirectory(lurlparser EXCLUDE_FROM_ALL)
|
||||||
|
|
||||||
|
@ -73,13 +89,5 @@ if (ENABLE_WEB_SERVICE)
|
||||||
add_library(httplib INTERFACE)
|
add_library(httplib INTERFACE)
|
||||||
target_include_directories(httplib INTERFACE ./httplib)
|
target_include_directories(httplib INTERFACE ./httplib)
|
||||||
target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
|
target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
|
||||||
target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto)
|
target_link_libraries(httplib INTERFACE ${OPENSSL_LIBRARIES})
|
||||||
endif()
|
|
||||||
|
|
||||||
if (NOT TARGET xbyak)
|
|
||||||
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
|
|
||||||
add_library(xbyak INTERFACE)
|
|
||||||
target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
|
|
||||||
target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
1
externals/libressl
vendored
Submodule
1
externals/libressl
vendored
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 7d01cb01cb1a926ecb4c9c98b107ef3c26f59dfb
|
2
externals/sirit
vendored
2
externals/sirit
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit a62c5bbc100a5e5a31ea0ccc4a78d8fa6a4167ce
|
Subproject commit eefca56afd49379bdebc97ded8b480839f930881
|
|
@ -180,11 +180,12 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
|
||||||
|
|
||||||
// Copy output header
|
// Copy output header
|
||||||
UpdateDataHeader response_data{worker_params};
|
UpdateDataHeader response_data{worker_params};
|
||||||
std::vector<u8> output_params(response_data.total_size);
|
|
||||||
if (behavior_info.IsElapsedFrameCountSupported()) {
|
if (behavior_info.IsElapsedFrameCountSupported()) {
|
||||||
response_data.frame_count = 0x10;
|
response_data.render_info = sizeof(RendererInfo);
|
||||||
response_data.total_size += 0x10;
|
response_data.total_size += sizeof(RendererInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<u8> output_params(response_data.total_size);
|
||||||
std::memcpy(output_params.data(), &response_data, sizeof(UpdateDataHeader));
|
std::memcpy(output_params.data(), &response_data, sizeof(UpdateDataHeader));
|
||||||
|
|
||||||
// Copy output memory pool entries
|
// Copy output memory pool entries
|
||||||
|
@ -219,6 +220,17 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
|
||||||
return Audren::ERR_INVALID_PARAMETERS;
|
return Audren::ERR_INVALID_PARAMETERS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (behavior_info.IsElapsedFrameCountSupported()) {
|
||||||
|
const std::size_t renderer_info_offset{
|
||||||
|
sizeof(UpdateDataHeader) + response_data.memory_pools_size + response_data.voices_size +
|
||||||
|
response_data.effects_size + response_data.sinks_size +
|
||||||
|
response_data.performance_manager_size + response_data.behavior_size};
|
||||||
|
RendererInfo renderer_info{};
|
||||||
|
renderer_info.elasped_frame_count = elapsed_frame_count;
|
||||||
|
std::memcpy(output_params.data() + renderer_info_offset, &renderer_info,
|
||||||
|
sizeof(RendererInfo));
|
||||||
|
}
|
||||||
|
|
||||||
return MakeResult(output_params);
|
return MakeResult(output_params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -447,6 +459,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
audio_out->QueueBuffer(stream, tag, std::move(buffer));
|
audio_out->QueueBuffer(stream, tag, std::move(buffer));
|
||||||
|
elapsed_frame_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioRenderer::ReleaseAndQueueBuffers() {
|
void AudioRenderer::ReleaseAndQueueBuffers() {
|
||||||
|
|
|
@ -196,6 +196,12 @@ struct EffectOutStatus {
|
||||||
};
|
};
|
||||||
static_assert(sizeof(EffectOutStatus) == 0x10, "EffectOutStatus is an invalid size");
|
static_assert(sizeof(EffectOutStatus) == 0x10, "EffectOutStatus is an invalid size");
|
||||||
|
|
||||||
|
struct RendererInfo {
|
||||||
|
u64_le elasped_frame_count{};
|
||||||
|
INSERT_PADDING_WORDS(2);
|
||||||
|
};
|
||||||
|
static_assert(sizeof(RendererInfo) == 0x10, "RendererInfo is an invalid size");
|
||||||
|
|
||||||
struct UpdateDataHeader {
|
struct UpdateDataHeader {
|
||||||
UpdateDataHeader() {}
|
UpdateDataHeader() {}
|
||||||
|
|
||||||
|
@ -209,7 +215,7 @@ struct UpdateDataHeader {
|
||||||
mixes_size = 0x0;
|
mixes_size = 0x0;
|
||||||
sinks_size = config.sink_count * 0x20;
|
sinks_size = config.sink_count * 0x20;
|
||||||
performance_manager_size = 0x10;
|
performance_manager_size = 0x10;
|
||||||
frame_count = 0;
|
render_info = 0;
|
||||||
total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size + voices_size +
|
total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size + voices_size +
|
||||||
effects_size + sinks_size + performance_manager_size;
|
effects_size + sinks_size + performance_manager_size;
|
||||||
}
|
}
|
||||||
|
@ -223,8 +229,8 @@ struct UpdateDataHeader {
|
||||||
u32_le mixes_size{};
|
u32_le mixes_size{};
|
||||||
u32_le sinks_size{};
|
u32_le sinks_size{};
|
||||||
u32_le performance_manager_size{};
|
u32_le performance_manager_size{};
|
||||||
INSERT_PADDING_WORDS(1);
|
u32_le splitter_size{};
|
||||||
u32_le frame_count{};
|
u32_le render_info{};
|
||||||
INSERT_PADDING_WORDS(4);
|
INSERT_PADDING_WORDS(4);
|
||||||
u32_le total_size{};
|
u32_le total_size{};
|
||||||
};
|
};
|
||||||
|
@ -258,6 +264,7 @@ private:
|
||||||
std::unique_ptr<AudioOut> audio_out;
|
std::unique_ptr<AudioOut> audio_out;
|
||||||
StreamPtr stream;
|
StreamPtr stream;
|
||||||
Core::Memory::Memory& memory;
|
Core::Memory::Memory& memory;
|
||||||
|
std::size_t elapsed_frame_count{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace AudioCore
|
} // namespace AudioCore
|
||||||
|
|
|
@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
|
||||||
DEPENDS
|
DEPENDS
|
||||||
# WARNING! It was too much work to try and make a common location for this list,
|
# WARNING! It was too much work to try and make a common location for this list,
|
||||||
# so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
|
# so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
|
||||||
|
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
|
||||||
|
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
|
||||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
|
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
|
||||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
|
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
|
||||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
|
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
|
||||||
|
|
|
@ -60,6 +60,7 @@ void AppendCPUInfo(FieldCollection& fc) {
|
||||||
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
|
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
|
||||||
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
|
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
|
||||||
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
|
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
|
||||||
|
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512);
|
||||||
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);
|
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);
|
||||||
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);
|
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);
|
||||||
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma);
|
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma);
|
||||||
|
|
|
@ -110,6 +110,11 @@ static CPUCaps Detect() {
|
||||||
caps.bmi1 = true;
|
caps.bmi1 = true;
|
||||||
if ((cpu_id[1] >> 8) & 1)
|
if ((cpu_id[1] >> 8) & 1)
|
||||||
caps.bmi2 = true;
|
caps.bmi2 = true;
|
||||||
|
// Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
|
||||||
|
if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
|
||||||
|
(cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
|
||||||
|
caps.avx512 = caps.avx2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ struct CPUCaps {
|
||||||
bool lzcnt;
|
bool lzcnt;
|
||||||
bool avx;
|
bool avx;
|
||||||
bool avx2;
|
bool avx2;
|
||||||
|
bool avx512;
|
||||||
bool bmi1;
|
bool bmi1;
|
||||||
bool bmi2;
|
bool bmi2;
|
||||||
bool fma;
|
bool fma;
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
namespace Common::X64 {
|
namespace Common::X64 {
|
||||||
|
|
||||||
inline int RegToIndex(const Xbyak::Reg& reg) {
|
inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
|
||||||
using Kind = Xbyak::Reg::Kind;
|
using Kind = Xbyak::Reg::Kind;
|
||||||
ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
|
ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
|
||||||
"RegSet only support GPRs and XMM registers.");
|
"RegSet only support GPRs and XMM registers.");
|
||||||
|
@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) {
|
||||||
return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
|
return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Xbyak::Reg64 IndexToReg64(int reg_index) {
|
inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
|
||||||
ASSERT(reg_index < 16);
|
ASSERT(reg_index < 16);
|
||||||
return Xbyak::Reg64(reg_index);
|
return Xbyak::Reg64(static_cast<int>(reg_index));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Xbyak::Xmm IndexToXmm(int reg_index) {
|
inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
|
||||||
ASSERT(reg_index >= 16 && reg_index < 32);
|
ASSERT(reg_index >= 16 && reg_index < 32);
|
||||||
return Xbyak::Xmm(reg_index - 16);
|
return Xbyak::Xmm(static_cast<int>(reg_index - 16));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Xbyak::Reg IndexToReg(int reg_index) {
|
inline Xbyak::Reg IndexToReg(std::size_t reg_index) {
|
||||||
if (reg_index < 16) {
|
if (reg_index < 16) {
|
||||||
return IndexToReg64(reg_index);
|
return IndexToReg64(reg_index);
|
||||||
} else {
|
} else {
|
||||||
|
@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
|
struct ABIFrameInfo {
|
||||||
size_t needed_frame_size, s32* out_subtraction,
|
s32 subtraction;
|
||||||
s32* out_xmm_offset) {
|
s32 xmm_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
|
||||||
|
size_t needed_frame_size) {
|
||||||
const auto count = (regs & ABI_ALL_GPRS).count();
|
const auto count = (regs & ABI_ALL_GPRS).count();
|
||||||
rsp_alignment -= count * 8;
|
rsp_alignment -= count * 8;
|
||||||
size_t subtraction = 0;
|
size_t subtraction = 0;
|
||||||
|
@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
|
||||||
rsp_alignment -= subtraction;
|
rsp_alignment -= subtraction;
|
||||||
subtraction += rsp_alignment & 0xF;
|
subtraction += rsp_alignment & 0xF;
|
||||||
|
|
||||||
*out_subtraction = (s32)subtraction;
|
return ABIFrameInfo{static_cast<s32>(subtraction),
|
||||||
*out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
|
static_cast<s32>(subtraction - xmm_base_subtraction)};
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||||
s32 subtraction, xmm_offset;
|
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
|
||||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
|
||||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||||
if (regs[i] && ABI_ALL_GPRS[i]) {
|
if (regs[i] && ABI_ALL_GPRS[i]) {
|
||||||
code.push(IndexToReg64(static_cast<int>(i)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (subtraction != 0) {
|
|
||||||
code.sub(code.rsp, subtraction);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < regs.count(); i++) {
|
|
||||||
if (regs.test(i) & ABI_ALL_GPRS.test(i)) {
|
|
||||||
code.push(IndexToReg64(i));
|
code.push(IndexToReg64(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (frame_info.subtraction != 0) {
|
||||||
|
code.sub(code.rsp, frame_info.subtraction);
|
||||||
|
}
|
||||||
|
|
||||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||||
if (regs[i] && ABI_ALL_XMMS[i]) {
|
if (regs[i] && ABI_ALL_XMMS[i]) {
|
||||||
code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i)));
|
code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
|
||||||
xmm_offset += 0x10;
|
frame_info.xmm_offset += 0x10;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b
|
||||||
|
|
||||||
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||||
s32 subtraction, xmm_offset;
|
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
|
||||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
|
||||||
|
|
||||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||||
if (regs[i] && ABI_ALL_XMMS[i]) {
|
if (regs[i] && ABI_ALL_XMMS[i]) {
|
||||||
code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]);
|
code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
|
||||||
xmm_offset += 0x10;
|
frame_info.xmm_offset += 0x10;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (subtraction != 0) {
|
if (frame_info.subtraction != 0) {
|
||||||
code.add(code.rsp, subtraction);
|
code.add(code.rsp, frame_info.subtraction);
|
||||||
}
|
}
|
||||||
|
|
||||||
// GPRs need to be popped in reverse order
|
// GPRs need to be popped in reverse order
|
||||||
for (int i = 15; i >= 0; i--) {
|
for (std::size_t j = 0; j < regs.size(); ++j) {
|
||||||
if (regs[i]) {
|
const std::size_t i = regs.size() - j - 1;
|
||||||
code.pop(IndexToReg64(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
|
||||||
size_t rsp_alignment,
|
|
||||||
size_t needed_frame_size = 0) {
|
|
||||||
s32 subtraction, xmm_offset;
|
|
||||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
|
||||||
|
|
||||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
|
||||||
if (regs[i] && ABI_ALL_GPRS[i]) {
|
if (regs[i] && ABI_ALL_GPRS[i]) {
|
||||||
code.push(IndexToReg64(static_cast<int>(i)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (subtraction != 0) {
|
|
||||||
code.sub(code.rsp, subtraction);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ABI_SHADOW_SPACE;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
|
||||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
|
||||||
s32 subtraction, xmm_offset;
|
|
||||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
|
||||||
|
|
||||||
if (subtraction != 0) {
|
|
||||||
code.add(code.rsp, subtraction);
|
|
||||||
}
|
|
||||||
|
|
||||||
// GPRs need to be popped in reverse order
|
|
||||||
for (int i = 15; i >= 0; i--) {
|
|
||||||
if (regs[i]) {
|
|
||||||
code.pop(IndexToReg64(i));
|
code.pop(IndexToReg64(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -606,11 +606,11 @@ endif()
|
||||||
create_target_directory_groups(core)
|
create_target_directory_groups(core)
|
||||||
|
|
||||||
target_link_libraries(core PUBLIC common PRIVATE audio_core video_core)
|
target_link_libraries(core PUBLIC common PRIVATE audio_core video_core)
|
||||||
target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::Opus unicorn)
|
target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::Opus unicorn zip)
|
||||||
|
|
||||||
if (YUZU_ENABLE_BOXCAT)
|
if (YUZU_ENABLE_BOXCAT)
|
||||||
target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT)
|
target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT)
|
||||||
target_link_libraries(core PRIVATE httplib nlohmann_json::nlohmann_json zip)
|
target_link_libraries(core PRIVATE httplib nlohmann_json::nlohmann_json)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (ENABLE_WEB_SERVICE)
|
if (ENABLE_WEB_SERVICE)
|
||||||
|
|
|
@ -50,7 +50,8 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
|
void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc,
|
||||||
|
MemoryReadCode(pc));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
|
void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
|
||||||
|
@ -61,7 +62,7 @@ public:
|
||||||
case Dynarmic::A32::Exception::Breakpoint:
|
case Dynarmic::A32::Exception::Breakpoint:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
|
LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
|
||||||
static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
|
static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
}
|
}
|
||||||
|
@ -89,8 +90,6 @@ public:
|
||||||
|
|
||||||
ARM_Dynarmic_32& parent;
|
ARM_Dynarmic_32& parent;
|
||||||
std::size_t num_interpreted_instructions{};
|
std::size_t num_interpreted_instructions{};
|
||||||
u64 tpidrro_el0{};
|
|
||||||
u64 tpidr_el0{};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
|
std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
|
||||||
|
@ -99,7 +98,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
|
||||||
config.callbacks = cb.get();
|
config.callbacks = cb.get();
|
||||||
// TODO(bunnei): Implement page table for 32-bit
|
// TODO(bunnei): Implement page table for 32-bit
|
||||||
// config.page_table = &page_table.pointers;
|
// config.page_table = &page_table.pointers;
|
||||||
config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]);
|
config.coprocessors[15] = cp15;
|
||||||
config.define_unpredictable_behaviour = true;
|
config.define_unpredictable_behaviour = true;
|
||||||
return std::make_unique<Dynarmic::A32::Jit>(config);
|
return std::make_unique<Dynarmic::A32::Jit>(config);
|
||||||
}
|
}
|
||||||
|
@ -112,13 +111,13 @@ void ARM_Dynarmic_32::Run() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARM_Dynarmic_32::Step() {
|
void ARM_Dynarmic_32::Step() {
|
||||||
cb->InterpreterFallback(jit->Regs()[15], 1);
|
jit->Step();
|
||||||
}
|
}
|
||||||
|
|
||||||
ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
|
ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
|
||||||
std::size_t core_index)
|
std::size_t core_index)
|
||||||
: ARM_Interface{system},
|
: ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks32>(*this)),
|
||||||
cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index},
|
cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},
|
||||||
exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
|
exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
|
||||||
|
|
||||||
ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
|
ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
|
||||||
|
@ -154,19 +153,19 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 ARM_Dynarmic_32::GetTlsAddress() const {
|
u64 ARM_Dynarmic_32::GetTlsAddress() const {
|
||||||
return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
|
return cp15->uro;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
|
void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
|
||||||
CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address);
|
cp15->uro = static_cast<u32>(address);
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
|
u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
|
||||||
return cb->tpidr_el0;
|
return cp15->uprw;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
|
void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
|
||||||
cb->tpidr_el0 = value;
|
cp15->uprw = static_cast<u32>(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
|
void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
|
||||||
|
|
|
@ -22,6 +22,7 @@ class Memory;
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
|
||||||
class DynarmicCallbacks32;
|
class DynarmicCallbacks32;
|
||||||
|
class DynarmicCP15;
|
||||||
class DynarmicExclusiveMonitor;
|
class DynarmicExclusiveMonitor;
|
||||||
class System;
|
class System;
|
||||||
|
|
||||||
|
@ -66,12 +67,14 @@ private:
|
||||||
std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
|
std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
|
||||||
|
|
||||||
friend class DynarmicCallbacks32;
|
friend class DynarmicCallbacks32;
|
||||||
|
friend class DynarmicCP15;
|
||||||
|
|
||||||
std::unique_ptr<DynarmicCallbacks32> cb;
|
std::unique_ptr<DynarmicCallbacks32> cb;
|
||||||
JitCacheType jit_cache;
|
JitCacheType jit_cache;
|
||||||
std::shared_ptr<Dynarmic::A32::Jit> jit;
|
std::shared_ptr<Dynarmic::A32::Jit> jit;
|
||||||
|
std::shared_ptr<DynarmicCP15> cp15;
|
||||||
std::size_t core_index;
|
std::size_t core_index;
|
||||||
DynarmicExclusiveMonitor& exclusive_monitor;
|
DynarmicExclusiveMonitor& exclusive_monitor;
|
||||||
std::array<u32, 84> CP15_regs{};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Core
|
} // namespace Core
|
||||||
|
|
|
@ -98,8 +98,8 @@ public:
|
||||||
}
|
}
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
default:
|
default:
|
||||||
ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:X})",
|
ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
|
||||||
static_cast<std::size_t>(exception), pc);
|
static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,79 +2,132 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <fmt/format.h>
|
||||||
|
#include "common/logging/log.h"
|
||||||
|
#include "core/arm/dynarmic/arm_dynarmic_32.h"
|
||||||
#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
|
#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
|
||||||
|
#include "core/core.h"
|
||||||
|
#include "core/core_timing.h"
|
||||||
|
#include "core/core_timing_util.h"
|
||||||
|
|
||||||
using Callback = Dynarmic::A32::Coprocessor::Callback;
|
using Callback = Dynarmic::A32::Coprocessor::Callback;
|
||||||
using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
|
using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
|
||||||
using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
|
using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<Dynarmic::A32::CoprocReg> {
|
||||||
|
constexpr auto parse(format_parse_context& ctx) {
|
||||||
|
return ctx.begin();
|
||||||
|
}
|
||||||
|
template <typename FormatContext>
|
||||||
|
auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) {
|
||||||
|
return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace Core {
|
||||||
|
|
||||||
|
static u32 dummy_value;
|
||||||
|
|
||||||
std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
|
std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
|
||||||
CoprocReg CRd, CoprocReg CRn,
|
CoprocReg CRd, CoprocReg CRn,
|
||||||
CoprocReg CRm, unsigned opc2) {
|
CoprocReg CRm, unsigned opc2) {
|
||||||
|
LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn,
|
||||||
|
CRm, opc2);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
|
CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
|
||||||
CoprocReg CRm, unsigned opc2) {
|
CoprocReg CRm, unsigned opc2) {
|
||||||
// TODO(merry): Privileged CP15 registers
|
|
||||||
|
|
||||||
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
|
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
|
||||||
|
// CP15_FLUSH_PREFETCH_BUFFER
|
||||||
// This is a dummy write, we ignore the value written here.
|
// This is a dummy write, we ignore the value written here.
|
||||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)];
|
return &dummy_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
|
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
|
||||||
switch (opc2) {
|
switch (opc2) {
|
||||||
case 4:
|
case 4:
|
||||||
|
// CP15_DATA_SYNC_BARRIER
|
||||||
// This is a dummy write, we ignore the value written here.
|
// This is a dummy write, we ignore the value written here.
|
||||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)];
|
return &dummy_value;
|
||||||
case 5:
|
case 5:
|
||||||
|
// CP15_DATA_MEMORY_BARRIER
|
||||||
// This is a dummy write, we ignore the value written here.
|
// This is a dummy write, we ignore the value written here.
|
||||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)];
|
return &dummy_value;
|
||||||
default:
|
|
||||||
return {};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
|
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
|
||||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
|
// CP15_THREAD_UPRW
|
||||||
|
return &uprw;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
|
||||||
|
opc2);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
|
CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
|
||||||
|
LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
|
CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
|
||||||
CoprocReg CRm, unsigned opc2) {
|
CoprocReg CRm, unsigned opc2) {
|
||||||
// TODO(merry): Privileged CP15 registers
|
|
||||||
|
|
||||||
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
|
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
|
||||||
switch (opc2) {
|
switch (opc2) {
|
||||||
case 2:
|
case 2:
|
||||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
|
// CP15_THREAD_UPRW
|
||||||
|
return &uprw;
|
||||||
case 3:
|
case 3:
|
||||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
|
// CP15_THREAD_URO
|
||||||
default:
|
return &uro;
|
||||||
return {};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
|
||||||
|
opc2);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
|
CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
|
||||||
|
if (!two && opc == 0 && CRm == CoprocReg::C14) {
|
||||||
|
// CNTPCT
|
||||||
|
const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>(
|
||||||
|
[](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 {
|
||||||
|
ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg;
|
||||||
|
return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
|
||||||
|
});
|
||||||
|
return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent};
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
|
std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
|
||||||
std::optional<u8> option) {
|
std::optional<u8> option) {
|
||||||
|
if (option) {
|
||||||
|
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
|
||||||
|
long_transfer ? "l" : "", CRd, *option);
|
||||||
|
} else {
|
||||||
|
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
|
||||||
|
long_transfer ? "l" : "", CRd);
|
||||||
|
}
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
|
std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
|
||||||
std::optional<u8> option) {
|
std::optional<u8> option) {
|
||||||
|
if (option) {
|
||||||
|
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
|
||||||
|
long_transfer ? "l" : "", CRd, *option);
|
||||||
|
} else {
|
||||||
|
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
|
||||||
|
long_transfer ? "l" : "", CRd);
|
||||||
|
}
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace Core
|
||||||
|
|
|
@ -10,128 +10,15 @@
|
||||||
#include <dynarmic/A32/coprocessor.h>
|
#include <dynarmic/A32/coprocessor.h>
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
|
||||||
enum class CP15Register {
|
namespace Core {
|
||||||
// c0 - Information registers
|
|
||||||
CP15_MAIN_ID,
|
|
||||||
CP15_CACHE_TYPE,
|
|
||||||
CP15_TCM_STATUS,
|
|
||||||
CP15_TLB_TYPE,
|
|
||||||
CP15_CPU_ID,
|
|
||||||
CP15_PROCESSOR_FEATURE_0,
|
|
||||||
CP15_PROCESSOR_FEATURE_1,
|
|
||||||
CP15_DEBUG_FEATURE_0,
|
|
||||||
CP15_AUXILIARY_FEATURE_0,
|
|
||||||
CP15_MEMORY_MODEL_FEATURE_0,
|
|
||||||
CP15_MEMORY_MODEL_FEATURE_1,
|
|
||||||
CP15_MEMORY_MODEL_FEATURE_2,
|
|
||||||
CP15_MEMORY_MODEL_FEATURE_3,
|
|
||||||
CP15_ISA_FEATURE_0,
|
|
||||||
CP15_ISA_FEATURE_1,
|
|
||||||
CP15_ISA_FEATURE_2,
|
|
||||||
CP15_ISA_FEATURE_3,
|
|
||||||
CP15_ISA_FEATURE_4,
|
|
||||||
|
|
||||||
// c1 - Control registers
|
class ARM_Dynarmic_32;
|
||||||
CP15_CONTROL,
|
|
||||||
CP15_AUXILIARY_CONTROL,
|
|
||||||
CP15_COPROCESSOR_ACCESS_CONTROL,
|
|
||||||
|
|
||||||
// c2 - Translation table registers
|
|
||||||
CP15_TRANSLATION_BASE_TABLE_0,
|
|
||||||
CP15_TRANSLATION_BASE_TABLE_1,
|
|
||||||
CP15_TRANSLATION_BASE_CONTROL,
|
|
||||||
CP15_DOMAIN_ACCESS_CONTROL,
|
|
||||||
CP15_RESERVED,
|
|
||||||
|
|
||||||
// c5 - Fault status registers
|
|
||||||
CP15_FAULT_STATUS,
|
|
||||||
CP15_INSTR_FAULT_STATUS,
|
|
||||||
CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
|
|
||||||
CP15_INST_FSR,
|
|
||||||
|
|
||||||
// c6 - Fault Address registers
|
|
||||||
CP15_FAULT_ADDRESS,
|
|
||||||
CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
|
|
||||||
CP15_WFAR,
|
|
||||||
CP15_IFAR,
|
|
||||||
|
|
||||||
// c7 - Cache operation registers
|
|
||||||
CP15_WAIT_FOR_INTERRUPT,
|
|
||||||
CP15_PHYS_ADDRESS,
|
|
||||||
CP15_INVALIDATE_INSTR_CACHE,
|
|
||||||
CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
|
|
||||||
CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
|
|
||||||
CP15_FLUSH_PREFETCH_BUFFER,
|
|
||||||
CP15_FLUSH_BRANCH_TARGET_CACHE,
|
|
||||||
CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
|
|
||||||
CP15_INVALIDATE_DATA_CACHE,
|
|
||||||
CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
|
|
||||||
CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
|
|
||||||
CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
|
|
||||||
CP15_CLEAN_DATA_CACHE,
|
|
||||||
CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
|
|
||||||
CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
|
|
||||||
CP15_DATA_SYNC_BARRIER,
|
|
||||||
CP15_DATA_MEMORY_BARRIER,
|
|
||||||
CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
|
|
||||||
CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
|
|
||||||
CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
|
|
||||||
|
|
||||||
// c8 - TLB operations
|
|
||||||
CP15_INVALIDATE_ITLB,
|
|
||||||
CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
|
|
||||||
CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
|
|
||||||
CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
|
|
||||||
CP15_INVALIDATE_DTLB,
|
|
||||||
CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
|
|
||||||
CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
|
|
||||||
CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
|
|
||||||
CP15_INVALIDATE_UTLB,
|
|
||||||
CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
|
|
||||||
CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
|
|
||||||
CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
|
|
||||||
|
|
||||||
// c9 - Data cache lockdown register
|
|
||||||
CP15_DATA_CACHE_LOCKDOWN,
|
|
||||||
|
|
||||||
// c10 - TLB/Memory map registers
|
|
||||||
CP15_TLB_LOCKDOWN,
|
|
||||||
CP15_PRIMARY_REGION_REMAP,
|
|
||||||
CP15_NORMAL_REGION_REMAP,
|
|
||||||
|
|
||||||
// c13 - Thread related registers
|
|
||||||
CP15_PID,
|
|
||||||
CP15_CONTEXT_ID,
|
|
||||||
CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
|
|
||||||
CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W)
|
|
||||||
CP15_THREAD_PRW, // Thread ID register - Privileged R/W only.
|
|
||||||
|
|
||||||
// c15 - Performance and TLB lockdown registers
|
|
||||||
CP15_PERFORMANCE_MONITOR_CONTROL,
|
|
||||||
CP15_CYCLE_COUNTER,
|
|
||||||
CP15_COUNT_0,
|
|
||||||
CP15_COUNT_1,
|
|
||||||
CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
|
|
||||||
CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
|
|
||||||
CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
|
|
||||||
CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
|
|
||||||
CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
|
|
||||||
CP15_TLB_DEBUG_CONTROL,
|
|
||||||
|
|
||||||
// Skyeye defined
|
|
||||||
CP15_TLB_FAULT_ADDR,
|
|
||||||
CP15_TLB_FAULT_STATUS,
|
|
||||||
|
|
||||||
// Not an actual register.
|
|
||||||
// All registers should be defined above this.
|
|
||||||
CP15_REGISTER_COUNT,
|
|
||||||
};
|
|
||||||
|
|
||||||
class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
|
class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
|
||||||
public:
|
public:
|
||||||
using CoprocReg = Dynarmic::A32::CoprocReg;
|
using CoprocReg = Dynarmic::A32::CoprocReg;
|
||||||
|
|
||||||
explicit DynarmicCP15(u32* cp15) : CP15(cp15){};
|
explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {}
|
||||||
|
|
||||||
std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
|
std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
|
||||||
CoprocReg CRn, CoprocReg CRm,
|
CoprocReg CRn, CoprocReg CRm,
|
||||||
|
@ -147,6 +34,9 @@ public:
|
||||||
std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
|
std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
|
||||||
std::optional<u8> option) override;
|
std::optional<u8> option) override;
|
||||||
|
|
||||||
private:
|
ARM_Dynarmic_32& parent;
|
||||||
u32* CP15{};
|
u32 uprw;
|
||||||
|
u32 uro;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // namespace Core
|
||||||
|
|
|
@ -40,7 +40,7 @@ VirtualDir MiiModel() {
|
||||||
out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_MID.size()>>(
|
out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_MID.size()>>(
|
||||||
MiiModelData::SHAPE_MID, "ShapeMid.dat"));
|
MiiModelData::SHAPE_MID, "ShapeMid.dat"));
|
||||||
|
|
||||||
return std::move(out);
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace FileSys::SystemArchive
|
} // namespace FileSys::SystemArchive
|
||||||
|
|
|
@ -23,7 +23,7 @@ VirtualFile PackBFTTF(const std::array<u8, Size>& data, const std::string& name)
|
||||||
|
|
||||||
std::vector<u8> bfttf(Size + sizeof(u64));
|
std::vector<u8> bfttf(Size + sizeof(u64));
|
||||||
|
|
||||||
u64 offset = 0;
|
size_t offset = 0;
|
||||||
Service::NS::EncryptSharedFont(vec, bfttf, offset);
|
Service::NS::EncryptSharedFont(vec, bfttf, offset);
|
||||||
return std::make_shared<VectorVfsFile>(std::move(bfttf), name);
|
return std::make_shared<VectorVfsFile>(std::move(bfttf), name);
|
||||||
}
|
}
|
||||||
|
|
|
@ -104,7 +104,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
|
||||||
// Ensure that we don't leave anything un-freed
|
// Ensure that we don't leave anything un-freed
|
||||||
auto group_guard = detail::ScopeExit([&] {
|
auto group_guard = detail::ScopeExit([&] {
|
||||||
for (const auto& it : page_list.Nodes()) {
|
for (const auto& it : page_list.Nodes()) {
|
||||||
const auto min_num_pages{std::min(
|
const auto min_num_pages{std::min<size_t>(
|
||||||
it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
|
it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
|
||||||
chosen_manager.Free(it.GetAddress(), min_num_pages);
|
chosen_manager.Free(it.GetAddress(), min_num_pages);
|
||||||
}
|
}
|
||||||
|
@ -165,7 +165,7 @@ ResultCode MemoryManager::Free(PageLinkedList& page_list, std::size_t num_pages,
|
||||||
|
|
||||||
// Free all of the pages
|
// Free all of the pages
|
||||||
for (const auto& it : page_list.Nodes()) {
|
for (const auto& it : page_list.Nodes()) {
|
||||||
const auto min_num_pages{std::min(
|
const auto min_num_pages{std::min<size_t>(
|
||||||
it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
|
it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
|
||||||
chosen_manager.Free(it.GetAddress(), min_num_pages);
|
chosen_manager.Free(it.GetAddress(), min_num_pages);
|
||||||
}
|
}
|
||||||
|
|
|
@ -132,7 +132,8 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const {
|
||||||
|
|
||||||
u64 Process::GetTotalPhysicalMemoryAvailable() const {
|
u64 Process::GetTotalPhysicalMemoryAvailable() const {
|
||||||
const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) +
|
const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) +
|
||||||
page_table->GetTotalHeapSize() + image_size + main_thread_stack_size};
|
page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
|
||||||
|
main_thread_stack_size};
|
||||||
|
|
||||||
if (capacity < memory_usage_capacity) {
|
if (capacity < memory_usage_capacity) {
|
||||||
return capacity;
|
return capacity;
|
||||||
|
@ -146,7 +147,8 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 Process::GetTotalPhysicalMemoryUsed() const {
|
u64 Process::GetTotalPhysicalMemoryUsed() const {
|
||||||
return image_size + main_thread_stack_size + page_table->GetTotalHeapSize();
|
return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() +
|
||||||
|
GetSystemResourceSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
|
u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
|
||||||
|
|
|
@ -38,7 +38,7 @@ void ReadableEvent::Clear() {
|
||||||
|
|
||||||
ResultCode ReadableEvent::Reset() {
|
ResultCode ReadableEvent::Reset() {
|
||||||
if (!is_signaled) {
|
if (!is_signaled) {
|
||||||
LOG_ERROR(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",
|
LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",
|
||||||
GetObjectId(), GetTypeName(), GetName());
|
GetObjectId(), GetTypeName(), GetName());
|
||||||
return ERR_INVALID_STATE;
|
return ERR_INVALID_STATE;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) {
|
||||||
const std::size_t index{ResourceTypeToIndex(resource)};
|
const std::size_t index{ResourceTypeToIndex(resource)};
|
||||||
|
|
||||||
s64 new_value = current[index] + amount;
|
s64 new_value = current[index] + amount;
|
||||||
while (new_value > limit[index] && available[index] + amount <= limit[index]) {
|
if (new_value > limit[index] && available[index] + amount <= limit[index]) {
|
||||||
// TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout
|
// TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout
|
||||||
new_value = current[index] + amount;
|
new_value = current[index] + amount;
|
||||||
|
|
||||||
if (timeout >= 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_value <= limit[index]) {
|
if (new_value <= limit[index]) {
|
||||||
|
|
|
@ -44,6 +44,218 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) {
|
||||||
return static_cast<u32>(std::min(size, max_jpeg_image_size));
|
return static_cast<u32>(std::min(size, max_jpeg_image_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class IManagerForSystemService final : public ServiceFramework<IManagerForSystemService> {
|
||||||
|
public:
|
||||||
|
explicit IManagerForSystemService(Common::UUID user_id)
|
||||||
|
: ServiceFramework("IManagerForSystemService") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "CheckAvailability"},
|
||||||
|
{1, nullptr, "GetAccountId"},
|
||||||
|
{2, nullptr, "EnsureIdTokenCacheAsync"},
|
||||||
|
{3, nullptr, "LoadIdTokenCache"},
|
||||||
|
{100, nullptr, "SetSystemProgramIdentification"},
|
||||||
|
{101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+
|
||||||
|
{110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+
|
||||||
|
{111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+
|
||||||
|
{112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0
|
||||||
|
{113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+
|
||||||
|
{120, nullptr, "GetNintendoAccountId"},
|
||||||
|
{121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+
|
||||||
|
{130, nullptr, "GetNintendoAccountUserResourceCache"},
|
||||||
|
{131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"},
|
||||||
|
{132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"},
|
||||||
|
{133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+
|
||||||
|
{134, nullptr, "RefreshNintendoAccountVerificationUrlCache"}, // 9.0.0+
|
||||||
|
{135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+
|
||||||
|
{140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+
|
||||||
|
{141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+
|
||||||
|
{142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+
|
||||||
|
{150, nullptr, "CreateAuthorizationRequest"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// 3.0.0+
|
||||||
|
class IFloatingRegistrationRequest final : public ServiceFramework<IFloatingRegistrationRequest> {
|
||||||
|
public:
|
||||||
|
explicit IFloatingRegistrationRequest(Common::UUID user_id)
|
||||||
|
: ServiceFramework("IFloatingRegistrationRequest") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "GetSessionId"},
|
||||||
|
{12, nullptr, "GetAccountId"},
|
||||||
|
{13, nullptr, "GetLinkedNintendoAccountId"},
|
||||||
|
{14, nullptr, "GetNickname"},
|
||||||
|
{15, nullptr, "GetProfileImage"},
|
||||||
|
{21, nullptr, "LoadIdTokenCache"},
|
||||||
|
{100, nullptr, "RegisterUser"}, // [1.0.0-3.0.2] RegisterAsync
|
||||||
|
{101, nullptr, "RegisterUserWithUid"}, // [1.0.0-3.0.2] RegisterWithUidAsync
|
||||||
|
{102, nullptr, "RegisterNetworkServiceAccountAsync"}, // 4.0.0+
|
||||||
|
{103, nullptr, "RegisterNetworkServiceAccountWithUidAsync"}, // 4.0.0+
|
||||||
|
{110, nullptr, "SetSystemProgramIdentification"},
|
||||||
|
{111, nullptr, "EnsureIdTokenCacheAsync"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class IAdministrator final : public ServiceFramework<IAdministrator> {
|
||||||
|
public:
|
||||||
|
explicit IAdministrator(Common::UUID user_id) : ServiceFramework("IAdministrator") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "CheckAvailability"},
|
||||||
|
{1, nullptr, "GetAccountId"},
|
||||||
|
{2, nullptr, "EnsureIdTokenCacheAsync"},
|
||||||
|
{3, nullptr, "LoadIdTokenCache"},
|
||||||
|
{100, nullptr, "SetSystemProgramIdentification"},
|
||||||
|
{101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+
|
||||||
|
{110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+
|
||||||
|
{111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+
|
||||||
|
{112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0
|
||||||
|
{113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+
|
||||||
|
{120, nullptr, "GetNintendoAccountId"},
|
||||||
|
{121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+
|
||||||
|
{130, nullptr, "GetNintendoAccountUserResourceCache"},
|
||||||
|
{131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"},
|
||||||
|
{132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"},
|
||||||
|
{133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+
|
||||||
|
{134, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsync"}, // 9.0.0+
|
||||||
|
{135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+
|
||||||
|
{140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+
|
||||||
|
{141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+
|
||||||
|
{142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+
|
||||||
|
{150, nullptr, "CreateAuthorizationRequest"},
|
||||||
|
{200, nullptr, "IsRegistered"},
|
||||||
|
{201, nullptr, "RegisterAsync"},
|
||||||
|
{202, nullptr, "UnregisterAsync"},
|
||||||
|
{203, nullptr, "DeleteRegistrationInfoLocally"},
|
||||||
|
{220, nullptr, "SynchronizeProfileAsync"},
|
||||||
|
{221, nullptr, "UploadProfileAsync"},
|
||||||
|
{222, nullptr, "SynchronizaProfileAsyncIfSecondsElapsed"},
|
||||||
|
{250, nullptr, "IsLinkedWithNintendoAccount"},
|
||||||
|
{251, nullptr, "CreateProcedureToLinkWithNintendoAccount"},
|
||||||
|
{252, nullptr, "ResumeProcedureToLinkWithNintendoAccount"},
|
||||||
|
{255, nullptr, "CreateProcedureToUpdateLinkageStateOfNintendoAccount"},
|
||||||
|
{256, nullptr, "ResumeProcedureToUpdateLinkageStateOfNintendoAccount"},
|
||||||
|
{260, nullptr, "CreateProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+
|
||||||
|
{261, nullptr, "ResumeProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+
|
||||||
|
{280, nullptr, "ProxyProcedureToAcquireApplicationAuthorizationForNintendoAccount"},
|
||||||
|
{290, nullptr, "GetRequestForNintendoAccountUserResourceView"}, // 8.0.0+
|
||||||
|
{300, nullptr, "TryRecoverNintendoAccountUserStateAsync"}, // 6.0.0+
|
||||||
|
{400, nullptr, "IsServiceEntryRequirementCacheRefreshRequiredForOnlinePlay"}, // 6.1.0+
|
||||||
|
{401, nullptr, "RefreshServiceEntryRequirementCacheForOnlinePlayAsync"}, // 6.1.0+
|
||||||
|
{900, nullptr, "GetAuthenticationInfoForWin"}, // 9.0.0+
|
||||||
|
{901, nullptr, "ImportAsyncForWin"}, // 9.0.0+
|
||||||
|
{997, nullptr, "DebugUnlinkNintendoAccountAsync"},
|
||||||
|
{998, nullptr, "DebugSetAvailabilityErrorDetail"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class IAuthorizationRequest final : public ServiceFramework<IAuthorizationRequest> {
|
||||||
|
public:
|
||||||
|
explicit IAuthorizationRequest(Common::UUID user_id)
|
||||||
|
: ServiceFramework("IAuthorizationRequest") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "GetSessionId"},
|
||||||
|
{10, nullptr, "InvokeWithoutInteractionAsync"},
|
||||||
|
{19, nullptr, "IsAuthorized"},
|
||||||
|
{20, nullptr, "GetAuthorizationCode"},
|
||||||
|
{21, nullptr, "GetIdToken"},
|
||||||
|
{22, nullptr, "GetState"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class IOAuthProcedure final : public ServiceFramework<IOAuthProcedure> {
|
||||||
|
public:
|
||||||
|
explicit IOAuthProcedure(Common::UUID user_id) : ServiceFramework("IOAuthProcedure") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "PrepareAsync"},
|
||||||
|
{1, nullptr, "GetRequest"},
|
||||||
|
{2, nullptr, "ApplyResponse"},
|
||||||
|
{3, nullptr, "ApplyResponseAsync"},
|
||||||
|
{10, nullptr, "Suspend"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// 3.0.0+
|
||||||
|
class IOAuthProcedureForExternalNsa final : public ServiceFramework<IOAuthProcedureForExternalNsa> {
|
||||||
|
public:
|
||||||
|
explicit IOAuthProcedureForExternalNsa(Common::UUID user_id)
|
||||||
|
: ServiceFramework("IOAuthProcedureForExternalNsa") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "PrepareAsync"},
|
||||||
|
{1, nullptr, "GetRequest"},
|
||||||
|
{2, nullptr, "ApplyResponse"},
|
||||||
|
{3, nullptr, "ApplyResponseAsync"},
|
||||||
|
{10, nullptr, "Suspend"},
|
||||||
|
{100, nullptr, "GetAccountId"},
|
||||||
|
{101, nullptr, "GetLinkedNintendoAccountId"},
|
||||||
|
{102, nullptr, "GetNickname"},
|
||||||
|
{103, nullptr, "GetProfileImage"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class IOAuthProcedureForNintendoAccountLinkage final
|
||||||
|
: public ServiceFramework<IOAuthProcedureForNintendoAccountLinkage> {
|
||||||
|
public:
|
||||||
|
explicit IOAuthProcedureForNintendoAccountLinkage(Common::UUID user_id)
|
||||||
|
: ServiceFramework("IOAuthProcedureForNintendoAccountLinkage") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "PrepareAsync"},
|
||||||
|
{1, nullptr, "GetRequest"},
|
||||||
|
{2, nullptr, "ApplyResponse"},
|
||||||
|
{3, nullptr, "ApplyResponseAsync"},
|
||||||
|
{10, nullptr, "Suspend"},
|
||||||
|
{100, nullptr, "GetRequestWithTheme"},
|
||||||
|
{101, nullptr, "IsNetworkServiceAccountReplaced"},
|
||||||
|
{199, nullptr, "GetUrlForIntroductionOfExtraMembership"}, // 2.0.0 - 5.1.0
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class INotifier final : public ServiceFramework<INotifier> {
|
||||||
|
public:
|
||||||
|
explicit INotifier(Common::UUID user_id) : ServiceFramework("INotifier") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "GetSystemEvent"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class IProfileCommon : public ServiceFramework<IProfileCommon> {
|
class IProfileCommon : public ServiceFramework<IProfileCommon> {
|
||||||
public:
|
public:
|
||||||
explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id,
|
explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id,
|
||||||
|
@ -226,6 +438,54 @@ public:
|
||||||
: IProfileCommon("IProfileEditor", true, user_id, profile_manager) {}
|
: IProfileCommon("IProfileEditor", true, user_id, profile_manager) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class IAsyncContext final : public ServiceFramework<IAsyncContext> {
|
||||||
|
public:
|
||||||
|
explicit IAsyncContext(Common::UUID user_id) : ServiceFramework("IAsyncContext") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "GetSystemEvent"},
|
||||||
|
{1, nullptr, "Cancel"},
|
||||||
|
{2, nullptr, "HasDone"},
|
||||||
|
{3, nullptr, "GetResult"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class ISessionObject final : public ServiceFramework<ISessionObject> {
|
||||||
|
public:
|
||||||
|
explicit ISessionObject(Common::UUID user_id) : ServiceFramework("ISessionObject") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{999, nullptr, "Dummy"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class IGuestLoginRequest final : public ServiceFramework<IGuestLoginRequest> {
|
||||||
|
public:
|
||||||
|
explicit IGuestLoginRequest(Common::UUID) : ServiceFramework("IGuestLoginRequest") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "GetSessionId"},
|
||||||
|
{11, nullptr, "Unknown"}, // 1.0.0 - 2.3.0 (the name is blank on Switchbrew)
|
||||||
|
{12, nullptr, "GetAccountId"},
|
||||||
|
{13, nullptr, "GetLinkedNintendoAccountId"},
|
||||||
|
{14, nullptr, "GetNickname"},
|
||||||
|
{15, nullptr, "GetProfileImage"},
|
||||||
|
{21, nullptr, "LoadIdTokenCache"}, // 3.0.0+
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
|
class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
|
||||||
public:
|
public:
|
||||||
explicit IManagerForApplication(Common::UUID user_id)
|
explicit IManagerForApplication(Common::UUID user_id)
|
||||||
|
@ -265,6 +525,87 @@ private:
|
||||||
Common::UUID user_id;
|
Common::UUID user_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// 6.0.0+
|
||||||
|
class IAsyncNetworkServiceLicenseKindContext final
|
||||||
|
: public ServiceFramework<IAsyncNetworkServiceLicenseKindContext> {
|
||||||
|
public:
|
||||||
|
explicit IAsyncNetworkServiceLicenseKindContext(Common::UUID user_id)
|
||||||
|
: ServiceFramework("IAsyncNetworkServiceLicenseKindContext") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "GetSystemEvent"},
|
||||||
|
{1, nullptr, "Cancel"},
|
||||||
|
{2, nullptr, "HasDone"},
|
||||||
|
{3, nullptr, "GetResult"},
|
||||||
|
{4, nullptr, "GetNetworkServiceLicenseKind"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// 8.0.0+
|
||||||
|
class IOAuthProcedureForUserRegistration final
|
||||||
|
: public ServiceFramework<IOAuthProcedureForUserRegistration> {
|
||||||
|
public:
|
||||||
|
explicit IOAuthProcedureForUserRegistration(Common::UUID user_id)
|
||||||
|
: ServiceFramework("IOAuthProcedureForUserRegistration") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "PrepareAsync"},
|
||||||
|
{1, nullptr, "GetRequest"},
|
||||||
|
{2, nullptr, "ApplyResponse"},
|
||||||
|
{3, nullptr, "ApplyResponseAsync"},
|
||||||
|
{10, nullptr, "Suspend"},
|
||||||
|
{100, nullptr, "GetAccountId"},
|
||||||
|
{101, nullptr, "GetLinkedNintendoAccountId"},
|
||||||
|
{102, nullptr, "GetNickname"},
|
||||||
|
{103, nullptr, "GetProfileImage"},
|
||||||
|
{110, nullptr, "RegisterUserAsync"},
|
||||||
|
{111, nullptr, "GetUid"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class DAUTH_O final : public ServiceFramework<DAUTH_O> {
|
||||||
|
public:
|
||||||
|
explicit DAUTH_O(Common::UUID) : ServiceFramework("dauth:o") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "EnsureAuthenticationTokenCacheAsync"}, // [5.0.0-5.1.0] GeneratePostData
|
||||||
|
{1, nullptr, "LoadAuthenticationTokenCache"}, // 6.0.0+
|
||||||
|
{2, nullptr, "InvalidateAuthenticationTokenCache"}, // 6.0.0+
|
||||||
|
{10, nullptr, "EnsureEdgeTokenCacheAsync"}, // 6.0.0+
|
||||||
|
{11, nullptr, "LoadEdgeTokenCache"}, // 6.0.0+
|
||||||
|
{12, nullptr, "InvalidateEdgeTokenCache"}, // 6.0.0+
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// 6.0.0+
|
||||||
|
class IAsyncResult final : public ServiceFramework<IAsyncResult> {
|
||||||
|
public:
|
||||||
|
explicit IAsyncResult(Common::UUID user_id) : ServiceFramework("IAsyncResult") {
|
||||||
|
// clang-format off
|
||||||
|
static const FunctionInfo functions[] = {
|
||||||
|
{0, nullptr, "GetResult"},
|
||||||
|
{1, nullptr, "Cancel"},
|
||||||
|
{2, nullptr, "IsAvailable"},
|
||||||
|
{3, nullptr, "GetSystemEvent"},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RegisterHandlers(functions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
void Module::Interface::GetUserCount(Kernel::HLERequestContext& ctx) {
|
void Module::Interface::GetUserCount(Kernel::HLERequestContext& ctx) {
|
||||||
LOG_DEBUG(Service_ACC, "called");
|
LOG_DEBUG(Service_ACC, "called");
|
||||||
IPC::ResponseBuilder rb{ctx, 3};
|
IPC::ResponseBuilder rb{ctx, 3};
|
||||||
|
|
|
@ -13,8 +13,8 @@ ACC_AA::ACC_AA(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
|
||||||
{0, nullptr, "EnsureCacheAsync"},
|
{0, nullptr, "EnsureCacheAsync"},
|
||||||
{1, nullptr, "LoadCache"},
|
{1, nullptr, "LoadCache"},
|
||||||
{2, nullptr, "GetDeviceAccountId"},
|
{2, nullptr, "GetDeviceAccountId"},
|
||||||
{50, nullptr, "RegisterNotificationTokenAsync"},
|
{50, nullptr, "RegisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0
|
||||||
{51, nullptr, "UnregisterNotificationTokenAsync"},
|
{51, nullptr, "UnregisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0
|
||||||
};
|
};
|
||||||
RegisterHandlers(functions);
|
RegisterHandlers(functions);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,28 +17,28 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
|
||||||
{3, &ACC_SU::ListOpenUsers, "ListOpenUsers"},
|
{3, &ACC_SU::ListOpenUsers, "ListOpenUsers"},
|
||||||
{4, &ACC_SU::GetLastOpenedUser, "GetLastOpenedUser"},
|
{4, &ACC_SU::GetLastOpenedUser, "GetLastOpenedUser"},
|
||||||
{5, &ACC_SU::GetProfile, "GetProfile"},
|
{5, &ACC_SU::GetProfile, "GetProfile"},
|
||||||
{6, nullptr, "GetProfileDigest"},
|
{6, nullptr, "GetProfileDigest"}, // 3.0.0+
|
||||||
{50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
|
{50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
|
||||||
{51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
|
{51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
|
||||||
{60, nullptr, "ListOpenContextStoredUsers"},
|
{60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
|
||||||
{99, nullptr, "DebugActivateOpenContextRetention"},
|
{99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
|
||||||
{100, nullptr, "GetUserRegistrationNotifier"},
|
{100, nullptr, "GetUserRegistrationNotifier"},
|
||||||
{101, nullptr, "GetUserStateChangeNotifier"},
|
{101, nullptr, "GetUserStateChangeNotifier"},
|
||||||
{102, nullptr, "GetBaasAccountManagerForSystemService"},
|
{102, nullptr, "GetBaasAccountManagerForSystemService"},
|
||||||
{103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
|
{103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
|
||||||
{104, nullptr, "GetProfileUpdateNotifier"},
|
{104, nullptr, "GetProfileUpdateNotifier"},
|
||||||
{105, nullptr, "CheckNetworkServiceAvailabilityAsync"},
|
{105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
|
||||||
{106, nullptr, "GetProfileSyncNotifier"},
|
{106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
|
||||||
{110, nullptr, "StoreSaveDataThumbnail"},
|
{110, nullptr, "StoreSaveDataThumbnail"},
|
||||||
{111, nullptr, "ClearSaveDataThumbnail"},
|
{111, nullptr, "ClearSaveDataThumbnail"},
|
||||||
{112, nullptr, "LoadSaveDataThumbnail"},
|
{112, nullptr, "LoadSaveDataThumbnail"},
|
||||||
{113, nullptr, "GetSaveDataThumbnailExistence"},
|
{113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
|
||||||
{120, nullptr, "ListOpenUsersInApplication"},
|
{120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+
|
||||||
{130, nullptr, "ActivateOpenContextRetention"},
|
{130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+
|
||||||
{140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"},
|
{140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
|
||||||
{150, nullptr, "AuthenticateApplicationAsync"},
|
{150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+
|
||||||
{190, nullptr, "GetUserLastOpenedApplication"},
|
{190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0
|
||||||
{191, nullptr, "ActivateOpenContextHolder"},
|
{191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+
|
||||||
{200, nullptr, "BeginUserRegistration"},
|
{200, nullptr, "BeginUserRegistration"},
|
||||||
{201, nullptr, "CompleteUserRegistration"},
|
{201, nullptr, "CompleteUserRegistration"},
|
||||||
{202, nullptr, "CancelUserRegistration"},
|
{202, nullptr, "CancelUserRegistration"},
|
||||||
|
@ -46,15 +46,15 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
|
||||||
{204, nullptr, "SetUserPosition"},
|
{204, nullptr, "SetUserPosition"},
|
||||||
{205, &ACC_SU::GetProfileEditor, "GetProfileEditor"},
|
{205, &ACC_SU::GetProfileEditor, "GetProfileEditor"},
|
||||||
{206, nullptr, "CompleteUserRegistrationForcibly"},
|
{206, nullptr, "CompleteUserRegistrationForcibly"},
|
||||||
{210, nullptr, "CreateFloatingRegistrationRequest"},
|
{210, nullptr, "CreateFloatingRegistrationRequest"}, // 3.0.0+
|
||||||
{211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"},
|
{211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+
|
||||||
{212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"},
|
{212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+
|
||||||
{230, nullptr, "AuthenticateServiceAsync"},
|
{230, nullptr, "AuthenticateServiceAsync"},
|
||||||
{250, nullptr, "GetBaasAccountAdministrator"},
|
{250, nullptr, "GetBaasAccountAdministrator"},
|
||||||
{290, nullptr, "ProxyProcedureForGuestLoginWithNintendoAccount"},
|
{290, nullptr, "ProxyProcedureForGuestLoginWithNintendoAccount"},
|
||||||
{291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"},
|
{291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"}, // 3.0.0+
|
||||||
{299, nullptr, "SuspendBackgroundDaemon"},
|
{299, nullptr, "SuspendBackgroundDaemon"},
|
||||||
{997, nullptr, "DebugInvalidateTokenCacheForUser"},
|
{997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+
|
||||||
{998, nullptr, "DebugSetUserStateClose"},
|
{998, nullptr, "DebugSetUserStateClose"},
|
||||||
{999, nullptr, "DebugSetUserStateOpen"},
|
{999, nullptr, "DebugSetUserStateOpen"},
|
||||||
};
|
};
|
||||||
|
|
|
@ -17,23 +17,23 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
|
||||||
{3, &ACC_U0::ListOpenUsers, "ListOpenUsers"},
|
{3, &ACC_U0::ListOpenUsers, "ListOpenUsers"},
|
||||||
{4, &ACC_U0::GetLastOpenedUser, "GetLastOpenedUser"},
|
{4, &ACC_U0::GetLastOpenedUser, "GetLastOpenedUser"},
|
||||||
{5, &ACC_U0::GetProfile, "GetProfile"},
|
{5, &ACC_U0::GetProfile, "GetProfile"},
|
||||||
{6, nullptr, "GetProfileDigest"},
|
{6, nullptr, "GetProfileDigest"}, // 3.0.0+
|
||||||
{50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
|
{50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
|
||||||
{51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
|
{51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
|
||||||
{60, nullptr, "ListOpenContextStoredUsers"},
|
{60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
|
||||||
{99, nullptr, "DebugActivateOpenContextRetention"},
|
{99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
|
||||||
{100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"},
|
{100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"},
|
||||||
{101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},
|
{101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},
|
||||||
{102, nullptr, "AuthenticateApplicationAsync"},
|
{102, nullptr, "AuthenticateApplicationAsync"},
|
||||||
{103, nullptr, "CheckNetworkServiceAvailabilityAsync"},
|
{103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
|
||||||
{110, nullptr, "StoreSaveDataThumbnail"},
|
{110, nullptr, "StoreSaveDataThumbnail"},
|
||||||
{111, nullptr, "ClearSaveDataThumbnail"},
|
{111, nullptr, "ClearSaveDataThumbnail"},
|
||||||
{120, nullptr, "CreateGuestLoginRequest"},
|
{120, nullptr, "CreateGuestLoginRequest"},
|
||||||
{130, nullptr, "LoadOpenContext"},
|
{130, nullptr, "LoadOpenContext"}, // 5.0.0+
|
||||||
{131, nullptr, "ListOpenContextStoredUsers"},
|
{131, nullptr, "ListOpenContextStoredUsers"}, // 6.0.0+
|
||||||
{140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"},
|
{140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, // 6.0.0+
|
||||||
{141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"},
|
{141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
|
||||||
{150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"},
|
{150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, // 6.0.0+
|
||||||
};
|
};
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
|
|
|
@ -17,28 +17,29 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
|
||||||
{3, &ACC_U1::ListOpenUsers, "ListOpenUsers"},
|
{3, &ACC_U1::ListOpenUsers, "ListOpenUsers"},
|
||||||
{4, &ACC_U1::GetLastOpenedUser, "GetLastOpenedUser"},
|
{4, &ACC_U1::GetLastOpenedUser, "GetLastOpenedUser"},
|
||||||
{5, &ACC_U1::GetProfile, "GetProfile"},
|
{5, &ACC_U1::GetProfile, "GetProfile"},
|
||||||
{6, nullptr, "GetProfileDigest"},
|
{6, nullptr, "GetProfileDigest"}, // 3.0.0+
|
||||||
{50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
|
{50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
|
||||||
{51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
|
{51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
|
||||||
{60, nullptr, "ListOpenContextStoredUsers"},
|
{60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
|
||||||
{99, nullptr, "DebugActivateOpenContextRetention"},
|
{99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
|
||||||
{100, nullptr, "GetUserRegistrationNotifier"},
|
{100, nullptr, "GetUserRegistrationNotifier"},
|
||||||
{101, nullptr, "GetUserStateChangeNotifier"},
|
{101, nullptr, "GetUserStateChangeNotifier"},
|
||||||
{102, nullptr, "GetBaasAccountManagerForSystemService"},
|
{102, nullptr, "GetBaasAccountManagerForSystemService"},
|
||||||
{103, nullptr, "GetProfileUpdateNotifier"},
|
{103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
|
||||||
{104, nullptr, "CheckNetworkServiceAvailabilityAsync"},
|
{104, nullptr, "GetProfileUpdateNotifier"},
|
||||||
{105, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
|
{105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
|
||||||
{106, nullptr, "GetProfileSyncNotifier"},
|
{106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
|
||||||
{110, nullptr, "StoreSaveDataThumbnail"},
|
{110, nullptr, "StoreSaveDataThumbnail"},
|
||||||
{111, nullptr, "ClearSaveDataThumbnail"},
|
{111, nullptr, "ClearSaveDataThumbnail"},
|
||||||
{112, nullptr, "LoadSaveDataThumbnail"},
|
{112, nullptr, "LoadSaveDataThumbnail"},
|
||||||
{113, nullptr, "GetSaveDataThumbnailExistence"},
|
{113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
|
||||||
{130, nullptr, "ActivateOpenContextRetention"},
|
{120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+
|
||||||
{140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"},
|
{130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+
|
||||||
{150, nullptr, "AuthenticateApplicationAsync"},
|
{140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
|
||||||
{190, nullptr, "GetUserLastOpenedApplication"},
|
{150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+
|
||||||
{191, nullptr, "ActivateOpenContextHolder"},
|
{190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0
|
||||||
{997, nullptr, "DebugInvalidateTokenCacheForUser"},
|
{191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+
|
||||||
|
{997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+
|
||||||
{998, nullptr, "DebugSetUserStateClose"},
|
{998, nullptr, "DebugSetUserStateClose"},
|
||||||
{999, nullptr, "DebugSetUserStateOpen"},
|
{999, nullptr, "DebugSetUserStateOpen"},
|
||||||
};
|
};
|
||||||
|
|
|
@ -30,7 +30,7 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters(
|
||||||
config.sub_text.size());
|
config.sub_text.size());
|
||||||
params.guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(config.guide_text.data(),
|
params.guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(config.guide_text.data(),
|
||||||
config.guide_text.size());
|
config.guide_text.size());
|
||||||
params.initial_text = initial_text;
|
params.initial_text = std::move(initial_text);
|
||||||
params.max_length = config.length_limit == 0 ? DEFAULT_MAX_LENGTH : config.length_limit;
|
params.max_length = config.length_limit == 0 ? DEFAULT_MAX_LENGTH : config.length_limit;
|
||||||
params.password = static_cast<bool>(config.is_password);
|
params.password = static_cast<bool>(config.is_password);
|
||||||
params.cursor_at_beginning = static_cast<bool>(config.initial_cursor_position);
|
params.cursor_at_beginning = static_cast<bool>(config.initial_cursor_position);
|
||||||
|
@ -109,7 +109,7 @@ void SoftwareKeyboard::Execute() {
|
||||||
|
|
||||||
const auto parameters = ConvertToFrontendParameters(config, initial_text);
|
const auto parameters = ConvertToFrontendParameters(config, initial_text);
|
||||||
|
|
||||||
frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(text); },
|
frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(std::move(text)); },
|
||||||
parameters);
|
parameters);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -566,6 +566,14 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) {
|
||||||
connected_controllers[NPadIdToIndex(npad_id)].is_connected = false;
|
connected_controllers[NPadIdToIndex(npad_id)].is_connected = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Controller_NPad::SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode) {
|
||||||
|
gyroscope_zero_drift_mode = drift_mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
Controller_NPad::GyroscopeZeroDriftMode Controller_NPad::GetGyroscopeZeroDriftMode() const {
|
||||||
|
return gyroscope_zero_drift_mode;
|
||||||
|
}
|
||||||
|
|
||||||
void Controller_NPad::StartLRAssignmentMode() {
|
void Controller_NPad::StartLRAssignmentMode() {
|
||||||
// Nothing internally is used for lr assignment mode. Since we have the ability to set the
|
// Nothing internally is used for lr assignment mode. Since we have the ability to set the
|
||||||
// controller types from boot, it doesn't really matter about showing a selection screen
|
// controller types from boot, it doesn't really matter about showing a selection screen
|
||||||
|
|
|
@ -58,6 +58,12 @@ public:
|
||||||
};
|
};
|
||||||
static_assert(sizeof(Vibration) == 0x10, "Vibration is an invalid size");
|
static_assert(sizeof(Vibration) == 0x10, "Vibration is an invalid size");
|
||||||
|
|
||||||
|
enum class GyroscopeZeroDriftMode : u32 {
|
||||||
|
Loose = 0,
|
||||||
|
Standard = 1,
|
||||||
|
Tight = 2,
|
||||||
|
};
|
||||||
|
|
||||||
enum class NpadHoldType : u64 {
|
enum class NpadHoldType : u64 {
|
||||||
Vertical = 0,
|
Vertical = 0,
|
||||||
Horizontal = 1,
|
Horizontal = 1,
|
||||||
|
@ -117,6 +123,8 @@ public:
|
||||||
|
|
||||||
void ConnectNPad(u32 npad_id);
|
void ConnectNPad(u32 npad_id);
|
||||||
void DisconnectNPad(u32 npad_id);
|
void DisconnectNPad(u32 npad_id);
|
||||||
|
void SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode);
|
||||||
|
GyroscopeZeroDriftMode GetGyroscopeZeroDriftMode() const;
|
||||||
LedPattern GetLedPattern(u32 npad_id);
|
LedPattern GetLedPattern(u32 npad_id);
|
||||||
void SetVibrationEnabled(bool can_vibrate);
|
void SetVibrationEnabled(bool can_vibrate);
|
||||||
bool IsVibrationEnabled() const;
|
bool IsVibrationEnabled() const;
|
||||||
|
@ -324,8 +332,8 @@ private:
|
||||||
std::array<Kernel::EventPair, 10> styleset_changed_events;
|
std::array<Kernel::EventPair, 10> styleset_changed_events;
|
||||||
Vibration last_processed_vibration{};
|
Vibration last_processed_vibration{};
|
||||||
std::array<ControllerHolder, 10> connected_controllers{};
|
std::array<ControllerHolder, 10> connected_controllers{};
|
||||||
|
GyroscopeZeroDriftMode gyroscope_zero_drift_mode{GyroscopeZeroDriftMode::Standard};
|
||||||
bool can_controllers_vibrate{true};
|
bool can_controllers_vibrate{true};
|
||||||
|
|
||||||
std::array<ControllerPad, 10> npad_pad_states{};
|
std::array<ControllerPad, 10> npad_pad_states{};
|
||||||
bool is_in_lr_assignment_mode{false};
|
bool is_in_lr_assignment_mode{false};
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
|
|
@ -161,7 +161,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
|
||||||
{40, nullptr, "AcquireXpadIdEventHandle"},
|
{40, nullptr, "AcquireXpadIdEventHandle"},
|
||||||
{41, nullptr, "ReleaseXpadIdEventHandle"},
|
{41, nullptr, "ReleaseXpadIdEventHandle"},
|
||||||
{51, &Hid::ActivateXpad, "ActivateXpad"},
|
{51, &Hid::ActivateXpad, "ActivateXpad"},
|
||||||
{55, nullptr, "GetXpadIds"},
|
{55, &Hid::GetXpadIDs, "GetXpadIds"},
|
||||||
{56, nullptr, "ActivateJoyXpad"},
|
{56, nullptr, "ActivateJoyXpad"},
|
||||||
{58, nullptr, "GetJoyXpadLifoHandle"},
|
{58, nullptr, "GetJoyXpadLifoHandle"},
|
||||||
{59, nullptr, "GetJoyXpadIds"},
|
{59, nullptr, "GetJoyXpadIds"},
|
||||||
|
@ -185,8 +185,8 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
|
||||||
{77, nullptr, "GetAccelerometerPlayMode"},
|
{77, nullptr, "GetAccelerometerPlayMode"},
|
||||||
{78, nullptr, "ResetAccelerometerPlayMode"},
|
{78, nullptr, "ResetAccelerometerPlayMode"},
|
||||||
{79, &Hid::SetGyroscopeZeroDriftMode, "SetGyroscopeZeroDriftMode"},
|
{79, &Hid::SetGyroscopeZeroDriftMode, "SetGyroscopeZeroDriftMode"},
|
||||||
{80, nullptr, "GetGyroscopeZeroDriftMode"},
|
{80, &Hid::GetGyroscopeZeroDriftMode, "GetGyroscopeZeroDriftMode"},
|
||||||
{81, nullptr, "ResetGyroscopeZeroDriftMode"},
|
{81, &Hid::ResetGyroscopeZeroDriftMode, "ResetGyroscopeZeroDriftMode"},
|
||||||
{82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"},
|
{82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"},
|
||||||
{83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"},
|
{83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"},
|
||||||
{91, &Hid::ActivateGesture, "ActivateGesture"},
|
{91, &Hid::ActivateGesture, "ActivateGesture"},
|
||||||
|
@ -230,15 +230,15 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
|
||||||
{211, nullptr, "IsVibrationDeviceMounted"},
|
{211, nullptr, "IsVibrationDeviceMounted"},
|
||||||
{300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"},
|
{300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"},
|
||||||
{301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"},
|
{301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"},
|
||||||
{302, nullptr, "StopConsoleSixAxisSensor"},
|
{302, &Hid::StopConsoleSixAxisSensor, "StopConsoleSixAxisSensor"},
|
||||||
{303, nullptr, "ActivateSevenSixAxisSensor"},
|
{303, &Hid::ActivateSevenSixAxisSensor, "ActivateSevenSixAxisSensor"},
|
||||||
{304, nullptr, "StartSevenSixAxisSensor"},
|
{304, &Hid::StartSevenSixAxisSensor, "StartSevenSixAxisSensor"},
|
||||||
{305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"},
|
{305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"},
|
||||||
{306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"},
|
{306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"},
|
||||||
{307, nullptr, "FinalizeSevenSixAxisSensor"},
|
{307, &Hid::FinalizeSevenSixAxisSensor, "FinalizeSevenSixAxisSensor"},
|
||||||
{308, nullptr, "SetSevenSixAxisSensorFusionStrength"},
|
{308, nullptr, "SetSevenSixAxisSensorFusionStrength"},
|
||||||
{309, nullptr, "GetSevenSixAxisSensorFusionStrength"},
|
{309, nullptr, "GetSevenSixAxisSensorFusionStrength"},
|
||||||
{310, nullptr, "ResetSevenSixAxisSensorTimestamp"},
|
{310, &Hid::ResetSevenSixAxisSensorTimestamp, "ResetSevenSixAxisSensorTimestamp"},
|
||||||
{400, nullptr, "IsUsbFullKeyControllerEnabled"},
|
{400, nullptr, "IsUsbFullKeyControllerEnabled"},
|
||||||
{401, nullptr, "EnableUsbFullKeyController"},
|
{401, nullptr, "EnableUsbFullKeyController"},
|
||||||
{402, nullptr, "IsUsbFullKeyControllerConnected"},
|
{402, nullptr, "IsUsbFullKeyControllerConnected"},
|
||||||
|
@ -319,6 +319,17 @@ void Hid::ActivateXpad(Kernel::HLERequestContext& ctx) {
|
||||||
rb.Push(RESULT_SUCCESS);
|
rb.Push(RESULT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Hid::GetXpadIDs(Kernel::HLERequestContext& ctx) {
|
||||||
|
IPC::RequestParser rp{ctx};
|
||||||
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
|
|
||||||
|
LOG_DEBUG(Service_HID, "(STUBBED) called, applet_resource_user_id={}", applet_resource_user_id);
|
||||||
|
|
||||||
|
IPC::ResponseBuilder rb{ctx, 3};
|
||||||
|
rb.Push(RESULT_SUCCESS);
|
||||||
|
rb.Push(0);
|
||||||
|
}
|
||||||
|
|
||||||
void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) {
|
void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) {
|
||||||
IPC::RequestParser rp{ctx};
|
IPC::RequestParser rp{ctx};
|
||||||
const auto applet_resource_user_id{rp.Pop<u64>()};
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
|
@ -363,6 +374,15 @@ void Hid::ActivateKeyboard(Kernel::HLERequestContext& ctx) {
|
||||||
rb.Push(RESULT_SUCCESS);
|
rb.Push(RESULT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) {
|
||||||
|
IPC::RequestParser rp{ctx};
|
||||||
|
const auto flags{rp.Pop<u32>()};
|
||||||
|
LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags);
|
||||||
|
|
||||||
|
IPC::ResponseBuilder rb{ctx, 2};
|
||||||
|
rb.Push(RESULT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) {
|
void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) {
|
||||||
IPC::RequestParser rp{ctx};
|
IPC::RequestParser rp{ctx};
|
||||||
const auto unknown{rp.Pop<u32>()};
|
const auto unknown{rp.Pop<u32>()};
|
||||||
|
@ -402,15 +422,59 @@ void Hid::StartSixAxisSensor(Kernel::HLERequestContext& ctx) {
|
||||||
rb.Push(RESULT_SUCCESS);
|
rb.Push(RESULT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) {
|
||||||
|
IPC::RequestParser rp{ctx};
|
||||||
|
const auto handle{rp.Pop<u32>()};
|
||||||
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
|
|
||||||
|
LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle,
|
||||||
|
applet_resource_user_id);
|
||||||
|
|
||||||
|
IPC::ResponseBuilder rb{ctx, 2};
|
||||||
|
rb.Push(RESULT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
|
void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
|
||||||
IPC::RequestParser rp{ctx};
|
IPC::RequestParser rp{ctx};
|
||||||
const auto handle{rp.Pop<u32>()};
|
const auto handle{rp.Pop<u32>()};
|
||||||
const auto drift_mode{rp.Pop<u32>()};
|
const auto drift_mode{rp.Pop<u32>()};
|
||||||
const auto applet_resource_user_id{rp.Pop<u64>()};
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
|
|
||||||
LOG_WARNING(Service_HID,
|
applet_resource->GetController<Controller_NPad>(HidController::NPad)
|
||||||
"(STUBBED) called, handle={}, drift_mode={}, applet_resource_user_id={}", handle,
|
.SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode{drift_mode});
|
||||||
drift_mode, applet_resource_user_id);
|
|
||||||
|
LOG_DEBUG(Service_HID, "called, handle={}, drift_mode={}, applet_resource_user_id={}", handle,
|
||||||
|
drift_mode, applet_resource_user_id);
|
||||||
|
|
||||||
|
IPC::ResponseBuilder rb{ctx, 2};
|
||||||
|
rb.Push(RESULT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
|
||||||
|
IPC::RequestParser rp{ctx};
|
||||||
|
const auto handle{rp.Pop<u32>()};
|
||||||
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
|
|
||||||
|
LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle,
|
||||||
|
applet_resource_user_id);
|
||||||
|
|
||||||
|
IPC::ResponseBuilder rb{ctx, 3};
|
||||||
|
rb.Push(RESULT_SUCCESS);
|
||||||
|
rb.Push<u32>(
|
||||||
|
static_cast<u32>(applet_resource->GetController<Controller_NPad>(HidController::NPad)
|
||||||
|
.GetGyroscopeZeroDriftMode()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
|
||||||
|
IPC::RequestParser rp{ctx};
|
||||||
|
const auto handle{rp.Pop<u32>()};
|
||||||
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
|
|
||||||
|
applet_resource->GetController<Controller_NPad>(HidController::NPad)
|
||||||
|
.SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode::Standard);
|
||||||
|
|
||||||
|
LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle,
|
||||||
|
applet_resource_user_id);
|
||||||
|
|
||||||
IPC::ResponseBuilder rb{ctx, 2};
|
IPC::ResponseBuilder rb{ctx, 2};
|
||||||
rb.Push(RESULT_SUCCESS);
|
rb.Push(RESULT_SUCCESS);
|
||||||
|
@ -821,33 +885,35 @@ void Hid::StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
|
||||||
rb.Push(RESULT_SUCCESS);
|
rb.Push(RESULT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) {
|
void Hid::StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
|
||||||
IPC::RequestParser rp{ctx};
|
IPC::RequestParser rp{ctx};
|
||||||
const auto handle{rp.Pop<u32>()};
|
const auto handle{rp.Pop<u32>()};
|
||||||
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
|
|
||||||
LOG_WARNING(Service_HID, "(STUBBED) called, handle={}", handle);
|
LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle,
|
||||||
|
applet_resource_user_id);
|
||||||
|
|
||||||
IPC::ResponseBuilder rb{ctx, 2};
|
IPC::ResponseBuilder rb{ctx, 2};
|
||||||
rb.Push(RESULT_SUCCESS);
|
rb.Push(RESULT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) {
|
void Hid::ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
|
||||||
IPC::RequestParser rp{ctx};
|
IPC::RequestParser rp{ctx};
|
||||||
const auto applet_resource_user_id{rp.Pop<u64>()};
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
const auto unknown{rp.Pop<u32>()};
|
|
||||||
|
|
||||||
LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}, unknown={}",
|
LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
|
||||||
applet_resource_user_id, unknown);
|
applet_resource_user_id);
|
||||||
|
|
||||||
IPC::ResponseBuilder rb{ctx, 2};
|
IPC::ResponseBuilder rb{ctx, 2};
|
||||||
rb.Push(RESULT_SUCCESS);
|
rb.Push(RESULT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
|
void Hid::StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
|
||||||
IPC::RequestParser rp{ctx};
|
IPC::RequestParser rp{ctx};
|
||||||
const auto unknown{rp.Pop<u32>()};
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
|
|
||||||
LOG_WARNING(Service_HID, "(STUBBED) called, unknown={}", unknown);
|
LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
|
||||||
|
applet_resource_user_id);
|
||||||
|
|
||||||
IPC::ResponseBuilder rb{ctx, 2};
|
IPC::ResponseBuilder rb{ctx, 2};
|
||||||
rb.Push(RESULT_SUCCESS);
|
rb.Push(RESULT_SUCCESS);
|
||||||
|
@ -871,10 +937,46 @@ void Hid::InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
|
||||||
rb.Push(RESULT_SUCCESS);
|
rb.Push(RESULT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) {
|
void Hid::FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
|
||||||
IPC::RequestParser rp{ctx};
|
IPC::RequestParser rp{ctx};
|
||||||
const auto flags{rp.Pop<u32>()};
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags);
|
|
||||||
|
LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
|
||||||
|
applet_resource_user_id);
|
||||||
|
|
||||||
|
IPC::ResponseBuilder rb{ctx, 2};
|
||||||
|
rb.Push(RESULT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hid::ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx) {
|
||||||
|
IPC::RequestParser rp{ctx};
|
||||||
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
|
|
||||||
|
LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
|
||||||
|
applet_resource_user_id);
|
||||||
|
|
||||||
|
IPC::ResponseBuilder rb{ctx, 2};
|
||||||
|
rb.Push(RESULT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) {
|
||||||
|
IPC::RequestParser rp{ctx};
|
||||||
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
|
const auto is_palma_all_connectable{rp.Pop<bool>()};
|
||||||
|
|
||||||
|
LOG_WARNING(Service_HID,
|
||||||
|
"(STUBBED) called, applet_resource_user_id={}, is_palma_all_connectable={}",
|
||||||
|
applet_resource_user_id, is_palma_all_connectable);
|
||||||
|
|
||||||
|
IPC::ResponseBuilder rb{ctx, 2};
|
||||||
|
rb.Push(RESULT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
|
||||||
|
IPC::RequestParser rp{ctx};
|
||||||
|
const auto palma_boost_mode{rp.Pop<bool>()};
|
||||||
|
|
||||||
|
LOG_WARNING(Service_HID, "(STUBBED) called, palma_boost_mode={}", palma_boost_mode);
|
||||||
|
|
||||||
IPC::ResponseBuilder rb{ctx, 2};
|
IPC::ResponseBuilder rb{ctx, 2};
|
||||||
rb.Push(RESULT_SUCCESS);
|
rb.Push(RESULT_SUCCESS);
|
||||||
|
|
|
@ -86,14 +86,19 @@ public:
|
||||||
private:
|
private:
|
||||||
void CreateAppletResource(Kernel::HLERequestContext& ctx);
|
void CreateAppletResource(Kernel::HLERequestContext& ctx);
|
||||||
void ActivateXpad(Kernel::HLERequestContext& ctx);
|
void ActivateXpad(Kernel::HLERequestContext& ctx);
|
||||||
|
void GetXpadIDs(Kernel::HLERequestContext& ctx);
|
||||||
void ActivateDebugPad(Kernel::HLERequestContext& ctx);
|
void ActivateDebugPad(Kernel::HLERequestContext& ctx);
|
||||||
void ActivateTouchScreen(Kernel::HLERequestContext& ctx);
|
void ActivateTouchScreen(Kernel::HLERequestContext& ctx);
|
||||||
void ActivateMouse(Kernel::HLERequestContext& ctx);
|
void ActivateMouse(Kernel::HLERequestContext& ctx);
|
||||||
void ActivateKeyboard(Kernel::HLERequestContext& ctx);
|
void ActivateKeyboard(Kernel::HLERequestContext& ctx);
|
||||||
|
void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx);
|
||||||
void ActivateGesture(Kernel::HLERequestContext& ctx);
|
void ActivateGesture(Kernel::HLERequestContext& ctx);
|
||||||
void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx);
|
void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx);
|
||||||
void StartSixAxisSensor(Kernel::HLERequestContext& ctx);
|
void StartSixAxisSensor(Kernel::HLERequestContext& ctx);
|
||||||
|
void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
|
||||||
void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
|
void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
|
||||||
|
void GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
|
||||||
|
void ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
|
||||||
void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx);
|
void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx);
|
||||||
void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
|
void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
|
||||||
void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
|
void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
|
||||||
|
@ -125,12 +130,15 @@ private:
|
||||||
void IsVibrationPermitted(Kernel::HLERequestContext& ctx);
|
void IsVibrationPermitted(Kernel::HLERequestContext& ctx);
|
||||||
void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
|
void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
|
||||||
void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
|
void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
|
||||||
void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
|
void StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
|
||||||
void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
|
void ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
|
||||||
void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
|
void StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
|
||||||
void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
|
void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
|
||||||
void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
|
void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
|
||||||
void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx);
|
void FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
|
||||||
|
void ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx);
|
||||||
|
void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
|
||||||
|
void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
|
||||||
|
|
||||||
std::shared_ptr<IAppletResource> applet_resource;
|
std::shared_ptr<IAppletResource> applet_resource;
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
|
|
@ -86,7 +86,8 @@ std::string FormatField(Field type, const std::vector<u8>& data) {
|
||||||
return Common::StringFromFixedZeroTerminatedBuffer(
|
return Common::StringFromFixedZeroTerminatedBuffer(
|
||||||
reinterpret_cast<const char*>(data.data()), data.size());
|
reinterpret_cast<const char*>(data.data()), data.size());
|
||||||
default:
|
default:
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED_MSG("Unimplemented field type={}", type);
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input,
|
||||||
case IoctlCommand::IocGetCharacteristicsCommand:
|
case IoctlCommand::IocGetCharacteristicsCommand:
|
||||||
return GetCharacteristics(input, output, output2, version);
|
return GetCharacteristics(input, output, output2, version);
|
||||||
case IoctlCommand::IocGetTPCMasksCommand:
|
case IoctlCommand::IocGetTPCMasksCommand:
|
||||||
return GetTPCMasks(input, output);
|
return GetTPCMasks(input, output, output2, version);
|
||||||
case IoctlCommand::IocGetActiveSlotMaskCommand:
|
case IoctlCommand::IocGetActiveSlotMaskCommand:
|
||||||
return GetActiveSlotMask(input, output);
|
return GetActiveSlotMask(input, output);
|
||||||
case IoctlCommand::IocZcullGetCtxSizeCommand:
|
case IoctlCommand::IocZcullGetCtxSizeCommand:
|
||||||
|
@ -98,17 +98,22 @@ u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vecto
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) {
|
u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output,
|
||||||
|
std::vector<u8>& output2, IoctlVersion version) {
|
||||||
IoctlGpuGetTpcMasksArgs params{};
|
IoctlGpuGetTpcMasksArgs params{};
|
||||||
std::memcpy(¶ms, input.data(), input.size());
|
std::memcpy(¶ms, input.data(), input.size());
|
||||||
LOG_INFO(Service_NVDRV, "called, mask=0x{:X}, mask_buf_addr=0x{:X}", params.mask_buf_size,
|
LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
|
||||||
params.mask_buf_addr);
|
if (params.mask_buffer_size != 0) {
|
||||||
// TODO(ogniK): Confirm value on hardware
|
params.tcp_mask = 3;
|
||||||
if (params.mask_buf_size)
|
}
|
||||||
params.tpc_mask_size = 4 * 1; // 4 * num_gpc
|
|
||||||
else
|
if (version == IoctlVersion::Version3) {
|
||||||
params.tpc_mask_size = 0;
|
std::memcpy(output.data(), input.data(), output.size());
|
||||||
std::memcpy(output.data(), ¶ms, sizeof(params));
|
std::memcpy(output2.data(), ¶ms.tcp_mask, output2.size());
|
||||||
|
} else {
|
||||||
|
std::memcpy(output.data(), ¶ms, output.size());
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -92,16 +92,11 @@ private:
|
||||||
"IoctlCharacteristics is incorrect size");
|
"IoctlCharacteristics is incorrect size");
|
||||||
|
|
||||||
struct IoctlGpuGetTpcMasksArgs {
|
struct IoctlGpuGetTpcMasksArgs {
|
||||||
/// [in] TPC mask buffer size reserved by userspace. Should be at least
|
u32_le mask_buffer_size{};
|
||||||
/// sizeof(__u32) * fls(gpc_mask) to receive TPC mask for each GPC.
|
INSERT_PADDING_WORDS(1);
|
||||||
/// [out] full kernel buffer size
|
u64_le mask_buffer_address{};
|
||||||
u32_le mask_buf_size;
|
u32_le tcp_mask{};
|
||||||
u32_le reserved;
|
INSERT_PADDING_WORDS(1);
|
||||||
|
|
||||||
/// [in] pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if
|
|
||||||
/// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0.
|
|
||||||
u64_le mask_buf_addr;
|
|
||||||
u64_le tpc_mask_size; // Nintendo add this?
|
|
||||||
};
|
};
|
||||||
static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24,
|
static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24,
|
||||||
"IoctlGpuGetTpcMasksArgs is incorrect size");
|
"IoctlGpuGetTpcMasksArgs is incorrect size");
|
||||||
|
@ -166,7 +161,8 @@ private:
|
||||||
|
|
||||||
u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
|
u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
|
||||||
std::vector<u8>& output2, IoctlVersion version);
|
std::vector<u8>& output2, IoctlVersion version);
|
||||||
u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& output2,
|
||||||
|
IoctlVersion version);
|
||||||
u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
|
|
|
@ -437,7 +437,7 @@ struct Values {
|
||||||
bool renderer_debug;
|
bool renderer_debug;
|
||||||
int vulkan_device;
|
int vulkan_device;
|
||||||
|
|
||||||
float resolution_factor;
|
u16 resolution_factor{1};
|
||||||
int aspect_ratio;
|
int aspect_ratio;
|
||||||
int max_anisotropy;
|
int max_anisotropy;
|
||||||
bool use_frame_limit;
|
bool use_frame_limit;
|
||||||
|
@ -474,6 +474,7 @@ struct Values {
|
||||||
bool reporting_services;
|
bool reporting_services;
|
||||||
bool quest_flag;
|
bool quest_flag;
|
||||||
bool disable_cpu_opt;
|
bool disable_cpu_opt;
|
||||||
|
bool disable_macro_jit;
|
||||||
|
|
||||||
// BCAT
|
// BCAT
|
||||||
std::string bcat_backend;
|
std::string bcat_backend;
|
||||||
|
|
|
@ -76,7 +76,7 @@ std::unique_ptr<Input::ButtonDevice> Keyboard::Create(const Common::ParamPackage
|
||||||
int key_code = params.Get("code", 0);
|
int key_code = params.Get("code", 0);
|
||||||
std::unique_ptr<KeyButton> button = std::make_unique<KeyButton>(key_button_list);
|
std::unique_ptr<KeyButton> button = std::make_unique<KeyButton>(key_button_list);
|
||||||
key_button_list->AddKeyButton(key_code, button.get());
|
key_button_list->AddKeyButton(key_code, button.get());
|
||||||
return std::move(button);
|
return button;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Keyboard::PressKey(int key_code) {
|
void Keyboard::PressKey(int key_code) {
|
||||||
|
|
|
@ -145,7 +145,7 @@ std::unique_ptr<Input::MotionDevice> MotionEmu::Create(const Common::ParamPackag
|
||||||
// Previously created device is disconnected here. Having two motion devices for 3DS is not
|
// Previously created device is disconnected here. Having two motion devices for 3DS is not
|
||||||
// expected.
|
// expected.
|
||||||
current_device = device_wrapper->device;
|
current_device = device_wrapper->device;
|
||||||
return std::move(device_wrapper);
|
return device_wrapper;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MotionEmu::BeginTilt(int x, int y) {
|
void MotionEmu::BeginTilt(int x, int y) {
|
||||||
|
|
|
@ -25,6 +25,14 @@ add_library(video_core STATIC
|
||||||
engines/shader_bytecode.h
|
engines/shader_bytecode.h
|
||||||
engines/shader_header.h
|
engines/shader_header.h
|
||||||
engines/shader_type.h
|
engines/shader_type.h
|
||||||
|
macro/macro.cpp
|
||||||
|
macro/macro.h
|
||||||
|
macro/macro_hle.cpp
|
||||||
|
macro/macro_hle.h
|
||||||
|
macro/macro_interpreter.cpp
|
||||||
|
macro/macro_interpreter.h
|
||||||
|
macro/macro_jit_x64.cpp
|
||||||
|
macro/macro_jit_x64.h
|
||||||
fence_manager.h
|
fence_manager.h
|
||||||
gpu.cpp
|
gpu.cpp
|
||||||
gpu.h
|
gpu.h
|
||||||
|
@ -36,8 +44,6 @@ add_library(video_core STATIC
|
||||||
gpu_thread.h
|
gpu_thread.h
|
||||||
guest_driver.cpp
|
guest_driver.cpp
|
||||||
guest_driver.h
|
guest_driver.h
|
||||||
macro_interpreter.cpp
|
|
||||||
macro_interpreter.h
|
|
||||||
memory_manager.cpp
|
memory_manager.cpp
|
||||||
memory_manager.h
|
memory_manager.h
|
||||||
morton.cpp
|
morton.cpp
|
||||||
|
@ -45,11 +51,11 @@ add_library(video_core STATIC
|
||||||
query_cache.h
|
query_cache.h
|
||||||
rasterizer_accelerated.cpp
|
rasterizer_accelerated.cpp
|
||||||
rasterizer_accelerated.h
|
rasterizer_accelerated.h
|
||||||
rasterizer_cache.cpp
|
|
||||||
rasterizer_cache.h
|
|
||||||
rasterizer_interface.h
|
rasterizer_interface.h
|
||||||
renderer_base.cpp
|
renderer_base.cpp
|
||||||
renderer_base.h
|
renderer_base.h
|
||||||
|
renderer_opengl/gl_arb_decompiler.cpp
|
||||||
|
renderer_opengl/gl_arb_decompiler.h
|
||||||
renderer_opengl/gl_buffer_cache.cpp
|
renderer_opengl/gl_buffer_cache.cpp
|
||||||
renderer_opengl/gl_buffer_cache.h
|
renderer_opengl/gl_buffer_cache.h
|
||||||
renderer_opengl/gl_device.cpp
|
renderer_opengl/gl_device.cpp
|
||||||
|
@ -89,6 +95,7 @@ add_library(video_core STATIC
|
||||||
renderer_opengl/utils.h
|
renderer_opengl/utils.h
|
||||||
sampler_cache.cpp
|
sampler_cache.cpp
|
||||||
sampler_cache.h
|
sampler_cache.h
|
||||||
|
shader_cache.h
|
||||||
shader/decode/arithmetic.cpp
|
shader/decode/arithmetic.cpp
|
||||||
shader/decode/arithmetic_immediate.cpp
|
shader/decode/arithmetic_immediate.cpp
|
||||||
shader/decode/bfe.cpp
|
shader/decode/bfe.cpp
|
||||||
|
|
|
@ -15,48 +15,47 @@ namespace VideoCommon {
|
||||||
|
|
||||||
class BufferBlock {
|
class BufferBlock {
|
||||||
public:
|
public:
|
||||||
bool Overlaps(const VAddr start, const VAddr end) const {
|
bool Overlaps(VAddr start, VAddr end) const {
|
||||||
return (cpu_addr < end) && (cpu_addr_end > start);
|
return (cpu_addr < end) && (cpu_addr_end > start);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsInside(const VAddr other_start, const VAddr other_end) const {
|
bool IsInside(VAddr other_start, VAddr other_end) const {
|
||||||
return cpu_addr <= other_start && other_end <= cpu_addr_end;
|
return cpu_addr <= other_start && other_end <= cpu_addr_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t GetOffset(const VAddr in_addr) {
|
std::size_t Offset(VAddr in_addr) const {
|
||||||
return static_cast<std::size_t>(in_addr - cpu_addr);
|
return static_cast<std::size_t>(in_addr - cpu_addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
VAddr GetCpuAddr() const {
|
VAddr CpuAddr() const {
|
||||||
return cpu_addr;
|
return cpu_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
VAddr GetCpuAddrEnd() const {
|
VAddr CpuAddrEnd() const {
|
||||||
return cpu_addr_end;
|
return cpu_addr_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetCpuAddr(const VAddr new_addr) {
|
void SetCpuAddr(VAddr new_addr) {
|
||||||
cpu_addr = new_addr;
|
cpu_addr = new_addr;
|
||||||
cpu_addr_end = new_addr + size;
|
cpu_addr_end = new_addr + size;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t GetSize() const {
|
std::size_t Size() const {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 Epoch() const {
|
||||||
|
return epoch;
|
||||||
|
}
|
||||||
|
|
||||||
void SetEpoch(u64 new_epoch) {
|
void SetEpoch(u64 new_epoch) {
|
||||||
epoch = new_epoch;
|
epoch = new_epoch;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 GetEpoch() {
|
|
||||||
return epoch;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
|
explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
|
||||||
SetCpuAddr(cpu_addr);
|
SetCpuAddr(cpu_addr_);
|
||||||
}
|
}
|
||||||
~BufferBlock() = default;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
VAddr cpu_addr{};
|
VAddr cpu_addr{};
|
||||||
|
|
|
@ -30,23 +30,31 @@
|
||||||
|
|
||||||
namespace VideoCommon {
|
namespace VideoCommon {
|
||||||
|
|
||||||
template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
|
template <typename Buffer, typename BufferType, typename StreamBuffer>
|
||||||
class BufferCache {
|
class BufferCache {
|
||||||
using IntervalSet = boost::icl::interval_set<VAddr>;
|
using IntervalSet = boost::icl::interval_set<VAddr>;
|
||||||
using IntervalType = typename IntervalSet::interval_type;
|
using IntervalType = typename IntervalSet::interval_type;
|
||||||
using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
|
using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
|
||||||
|
|
||||||
|
static constexpr u64 WRITE_PAGE_BIT = 11;
|
||||||
|
static constexpr u64 BLOCK_PAGE_BITS = 21;
|
||||||
|
static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using BufferInfo = std::pair<BufferType, u64>;
|
struct BufferInfo {
|
||||||
|
BufferType handle;
|
||||||
|
u64 offset;
|
||||||
|
u64 address;
|
||||||
|
};
|
||||||
|
|
||||||
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
|
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
|
||||||
bool is_written = false, bool use_fast_cbuf = false) {
|
bool is_written = false, bool use_fast_cbuf = false) {
|
||||||
std::lock_guard lock{mutex};
|
std::lock_guard lock{mutex};
|
||||||
|
|
||||||
const auto& memory_manager = system.GPU().MemoryManager();
|
auto& memory_manager = system.GPU().MemoryManager();
|
||||||
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
|
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||||
if (!cpu_addr_opt) {
|
if (!cpu_addr_opt) {
|
||||||
return {GetEmptyBuffer(size), 0};
|
return GetEmptyBuffer(size);
|
||||||
}
|
}
|
||||||
const VAddr cpu_addr = *cpu_addr_opt;
|
const VAddr cpu_addr = *cpu_addr_opt;
|
||||||
|
|
||||||
|
@ -55,33 +63,36 @@ public:
|
||||||
constexpr std::size_t max_stream_size = 0x800;
|
constexpr std::size_t max_stream_size = 0x800;
|
||||||
if (use_fast_cbuf || size < max_stream_size) {
|
if (use_fast_cbuf || size < max_stream_size) {
|
||||||
if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
|
if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
|
||||||
auto& memory_manager = system.GPU().MemoryManager();
|
const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
|
||||||
if (use_fast_cbuf) {
|
if (use_fast_cbuf) {
|
||||||
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
u8* dest;
|
||||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
if (is_granular) {
|
||||||
return ConstBufferUpload(host_ptr, size);
|
dest = memory_manager.GetPointer(gpu_addr);
|
||||||
} else {
|
} else {
|
||||||
staging_buffer.resize(size);
|
staging_buffer.resize(size);
|
||||||
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
dest = staging_buffer.data();
|
||||||
return ConstBufferUpload(staging_buffer.data(), size);
|
memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
|
||||||
}
|
}
|
||||||
|
return ConstBufferUpload(dest, size);
|
||||||
|
}
|
||||||
|
if (is_granular) {
|
||||||
|
u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||||
|
return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
|
||||||
|
std::memcpy(dest, host_ptr, size);
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
return StreamBufferUpload(
|
||||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
|
||||||
return StreamBufferUpload(host_ptr, size, alignment);
|
memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
|
||||||
} else {
|
});
|
||||||
staging_buffer.resize(size);
|
|
||||||
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
|
||||||
return StreamBufferUpload(staging_buffer.data(), size, alignment);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
OwnerBuffer block = GetBlock(cpu_addr, size);
|
Buffer* const block = GetBlock(cpu_addr, size);
|
||||||
MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
|
MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
|
||||||
if (!map) {
|
if (!map) {
|
||||||
return {GetEmptyBuffer(size), 0};
|
return GetEmptyBuffer(size);
|
||||||
}
|
}
|
||||||
if (is_written) {
|
if (is_written) {
|
||||||
map->MarkAsModified(true, GetModifiedTicks());
|
map->MarkAsModified(true, GetModifiedTicks());
|
||||||
|
@ -94,41 +105,49 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
|
return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
|
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
|
||||||
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
|
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
|
||||||
std::size_t alignment = 4) {
|
std::size_t alignment = 4) {
|
||||||
std::lock_guard lock{mutex};
|
std::lock_guard lock{mutex};
|
||||||
return StreamBufferUpload(raw_pointer, size, alignment);
|
return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
|
||||||
|
std::memcpy(dest, raw_pointer, size);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void Map(std::size_t max_size) {
|
/// Prepares the buffer cache for data uploading
|
||||||
|
/// @param max_size Maximum number of bytes that will be uploaded
|
||||||
|
/// @return True when a stream buffer invalidation was required, false otherwise
|
||||||
|
bool Map(std::size_t max_size) {
|
||||||
std::lock_guard lock{mutex};
|
std::lock_guard lock{mutex};
|
||||||
|
|
||||||
|
bool invalidated;
|
||||||
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
|
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
|
||||||
buffer_offset = buffer_offset_base;
|
buffer_offset = buffer_offset_base;
|
||||||
|
|
||||||
|
return invalidated;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Finishes the upload stream, returns true on bindings invalidation.
|
/// Finishes the upload stream
|
||||||
bool Unmap() {
|
void Unmap() {
|
||||||
std::lock_guard lock{mutex};
|
std::lock_guard lock{mutex};
|
||||||
|
|
||||||
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
|
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
|
||||||
return std::exchange(invalidated, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Function called at the end of each frame, inteded for deferred operations
|
||||||
void TickFrame() {
|
void TickFrame() {
|
||||||
++epoch;
|
++epoch;
|
||||||
|
|
||||||
while (!pending_destruction.empty()) {
|
while (!pending_destruction.empty()) {
|
||||||
// Delay at least 4 frames before destruction.
|
// Delay at least 4 frames before destruction.
|
||||||
// This is due to triple buffering happening on some drivers.
|
// This is due to triple buffering happening on some drivers.
|
||||||
static constexpr u64 epochs_to_destroy = 5;
|
static constexpr u64 epochs_to_destroy = 5;
|
||||||
if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
|
if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
pending_destruction.pop_front();
|
pending_destruction.pop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -239,28 +258,16 @@ public:
|
||||||
committed_flushes.pop_front();
|
committed_flushes.pop_front();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
|
virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||||
std::unique_ptr<StreamBuffer> stream_buffer)
|
std::unique_ptr<StreamBuffer> stream_buffer)
|
||||||
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
|
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
|
||||||
stream_buffer_handle{this->stream_buffer->GetHandle()} {}
|
|
||||||
|
|
||||||
~BufferCache() = default;
|
~BufferCache() = default;
|
||||||
|
|
||||||
virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
|
virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
|
||||||
|
|
||||||
virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
|
|
||||||
|
|
||||||
virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
const u8* data) = 0;
|
|
||||||
|
|
||||||
virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
u8* data) = 0;
|
|
||||||
|
|
||||||
virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
|
|
||||||
std::size_t dst_offset, std::size_t size) = 0;
|
|
||||||
|
|
||||||
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
|
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
|
||||||
return {};
|
return {};
|
||||||
|
@ -315,7 +322,7 @@ protected:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr,
|
MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr,
|
||||||
std::size_t size) {
|
std::size_t size) {
|
||||||
const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
|
const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
|
||||||
if (overlaps.empty()) {
|
if (overlaps.empty()) {
|
||||||
|
@ -323,11 +330,11 @@ private:
|
||||||
const VAddr cpu_addr_end = cpu_addr + size;
|
const VAddr cpu_addr_end = cpu_addr + size;
|
||||||
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
||||||
u8* host_ptr = memory_manager.GetPointer(gpu_addr);
|
u8* host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||||
UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
|
block->Upload(block->Offset(cpu_addr), size, host_ptr);
|
||||||
} else {
|
} else {
|
||||||
staging_buffer.resize(size);
|
staging_buffer.resize(size);
|
||||||
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
||||||
UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
|
block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
|
||||||
}
|
}
|
||||||
return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
|
return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
|
||||||
}
|
}
|
||||||
|
@ -370,7 +377,7 @@ private:
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
|
void UpdateBlock(const Buffer* block, VAddr start, VAddr end,
|
||||||
const VectorMapInterval& overlaps) {
|
const VectorMapInterval& overlaps) {
|
||||||
const IntervalType base_interval{start, end};
|
const IntervalType base_interval{start, end};
|
||||||
IntervalSet interval_set{};
|
IntervalSet interval_set{};
|
||||||
|
@ -380,13 +387,13 @@ private:
|
||||||
interval_set.subtract(subtract);
|
interval_set.subtract(subtract);
|
||||||
}
|
}
|
||||||
for (auto& interval : interval_set) {
|
for (auto& interval : interval_set) {
|
||||||
std::size_t size = interval.upper() - interval.lower();
|
const std::size_t size = interval.upper() - interval.lower();
|
||||||
if (size > 0) {
|
if (size == 0) {
|
||||||
staging_buffer.resize(size);
|
continue;
|
||||||
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
|
|
||||||
UploadBlockData(block, block->GetOffset(interval.lower()), size,
|
|
||||||
staging_buffer.data());
|
|
||||||
}
|
}
|
||||||
|
staging_buffer.resize(size);
|
||||||
|
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
|
||||||
|
block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -416,23 +423,27 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
void FlushMap(MapInterval* map) {
|
void FlushMap(MapInterval* map) {
|
||||||
|
const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
|
||||||
|
ASSERT_OR_EXECUTE(it != blocks.end(), return;);
|
||||||
|
|
||||||
|
std::shared_ptr<Buffer> block = it->second;
|
||||||
|
|
||||||
const std::size_t size = map->end - map->start;
|
const std::size_t size = map->end - map->start;
|
||||||
OwnerBuffer block = blocks[map->start >> block_page_bits];
|
|
||||||
staging_buffer.resize(size);
|
staging_buffer.resize(size);
|
||||||
DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data());
|
block->Download(block->Offset(map->start), size, staging_buffer.data());
|
||||||
system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
|
system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
|
||||||
map->MarkAsModified(false, 0);
|
map->MarkAsModified(false, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
|
template <typename Callable>
|
||||||
std::size_t alignment) {
|
BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
|
||||||
AlignBuffer(alignment);
|
AlignBuffer(alignment);
|
||||||
const std::size_t uploaded_offset = buffer_offset;
|
const std::size_t uploaded_offset = buffer_offset;
|
||||||
std::memcpy(buffer_ptr, raw_pointer, size);
|
callable(buffer_ptr);
|
||||||
|
|
||||||
buffer_ptr += size;
|
buffer_ptr += size;
|
||||||
buffer_offset += size;
|
buffer_offset += size;
|
||||||
return {stream_buffer_handle, uploaded_offset};
|
return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
|
||||||
}
|
}
|
||||||
|
|
||||||
void AlignBuffer(std::size_t alignment) {
|
void AlignBuffer(std::size_t alignment) {
|
||||||
|
@ -442,97 +453,89 @@ private:
|
||||||
buffer_offset = offset_aligned;
|
buffer_offset = offset_aligned;
|
||||||
}
|
}
|
||||||
|
|
||||||
OwnerBuffer EnlargeBlock(OwnerBuffer buffer) {
|
std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
|
||||||
const std::size_t old_size = buffer->GetSize();
|
const std::size_t old_size = buffer->Size();
|
||||||
const std::size_t new_size = old_size + block_page_size;
|
const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
|
||||||
const VAddr cpu_addr = buffer->GetCpuAddr();
|
const VAddr cpu_addr = buffer->CpuAddr();
|
||||||
OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size);
|
std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
|
||||||
CopyBlock(buffer, new_buffer, 0, 0, old_size);
|
new_buffer->CopyFrom(*buffer, 0, 0, old_size);
|
||||||
buffer->SetEpoch(epoch);
|
QueueDestruction(std::move(buffer));
|
||||||
pending_destruction.push_back(buffer);
|
|
||||||
const VAddr cpu_addr_end = cpu_addr + new_size - 1;
|
const VAddr cpu_addr_end = cpu_addr + new_size - 1;
|
||||||
u64 page_start = cpu_addr >> block_page_bits;
|
const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
|
||||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
|
||||||
while (page_start <= page_end) {
|
blocks.insert_or_assign(page_start, new_buffer);
|
||||||
blocks[page_start] = new_buffer;
|
|
||||||
++page_start;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return new_buffer;
|
return new_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) {
|
std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
|
||||||
const std::size_t size_1 = first->GetSize();
|
std::shared_ptr<Buffer> second) {
|
||||||
const std::size_t size_2 = second->GetSize();
|
const std::size_t size_1 = first->Size();
|
||||||
const VAddr first_addr = first->GetCpuAddr();
|
const std::size_t size_2 = second->Size();
|
||||||
const VAddr second_addr = second->GetCpuAddr();
|
const VAddr first_addr = first->CpuAddr();
|
||||||
|
const VAddr second_addr = second->CpuAddr();
|
||||||
const VAddr new_addr = std::min(first_addr, second_addr);
|
const VAddr new_addr = std::min(first_addr, second_addr);
|
||||||
const std::size_t new_size = size_1 + size_2;
|
const std::size_t new_size = size_1 + size_2;
|
||||||
OwnerBuffer new_buffer = CreateBlock(new_addr, new_size);
|
|
||||||
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
|
std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
|
||||||
CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
|
new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
|
||||||
first->SetEpoch(epoch);
|
new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
|
||||||
second->SetEpoch(epoch);
|
QueueDestruction(std::move(first));
|
||||||
pending_destruction.push_back(first);
|
QueueDestruction(std::move(second));
|
||||||
pending_destruction.push_back(second);
|
|
||||||
const VAddr cpu_addr_end = new_addr + new_size - 1;
|
const VAddr cpu_addr_end = new_addr + new_size - 1;
|
||||||
u64 page_start = new_addr >> block_page_bits;
|
const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
|
||||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
|
||||||
while (page_start <= page_end) {
|
blocks.insert_or_assign(page_start, new_buffer);
|
||||||
blocks[page_start] = new_buffer;
|
|
||||||
++page_start;
|
|
||||||
}
|
}
|
||||||
return new_buffer;
|
return new_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
|
Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
|
||||||
OwnerBuffer found;
|
std::shared_ptr<Buffer> found;
|
||||||
|
|
||||||
const VAddr cpu_addr_end = cpu_addr + size - 1;
|
const VAddr cpu_addr_end = cpu_addr + size - 1;
|
||||||
u64 page_start = cpu_addr >> block_page_bits;
|
const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
|
||||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
|
||||||
while (page_start <= page_end) {
|
|
||||||
auto it = blocks.find(page_start);
|
auto it = blocks.find(page_start);
|
||||||
if (it == blocks.end()) {
|
if (it == blocks.end()) {
|
||||||
if (found) {
|
if (found) {
|
||||||
found = EnlargeBlock(found);
|
found = EnlargeBlock(found);
|
||||||
} else {
|
continue;
|
||||||
const VAddr start_addr = (page_start << block_page_bits);
|
|
||||||
found = CreateBlock(start_addr, block_page_size);
|
|
||||||
blocks[page_start] = found;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (found) {
|
|
||||||
if (found == it->second) {
|
|
||||||
++page_start;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
found = MergeBlocks(found, it->second);
|
|
||||||
} else {
|
|
||||||
found = it->second;
|
|
||||||
}
|
}
|
||||||
|
const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
|
||||||
|
found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
|
||||||
|
blocks.insert_or_assign(page_start, found);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
found = it->second;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (found != it->second) {
|
||||||
|
found = MergeBlocks(std::move(found), it->second);
|
||||||
}
|
}
|
||||||
++page_start;
|
|
||||||
}
|
}
|
||||||
return found;
|
return found.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MarkRegionAsWritten(const VAddr start, const VAddr end) {
|
void MarkRegionAsWritten(VAddr start, VAddr end) {
|
||||||
u64 page_start = start >> write_page_bit;
|
const u64 page_end = end >> WRITE_PAGE_BIT;
|
||||||
const u64 page_end = end >> write_page_bit;
|
for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
|
||||||
while (page_start <= page_end) {
|
|
||||||
auto it = written_pages.find(page_start);
|
auto it = written_pages.find(page_start);
|
||||||
if (it != written_pages.end()) {
|
if (it != written_pages.end()) {
|
||||||
it->second = it->second + 1;
|
it->second = it->second + 1;
|
||||||
} else {
|
} else {
|
||||||
written_pages[page_start] = 1;
|
written_pages.insert_or_assign(page_start, 1);
|
||||||
}
|
}
|
||||||
++page_start;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
|
void UnmarkRegionAsWritten(VAddr start, VAddr end) {
|
||||||
u64 page_start = start >> write_page_bit;
|
const u64 page_end = end >> WRITE_PAGE_BIT;
|
||||||
const u64 page_end = end >> write_page_bit;
|
for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
|
||||||
while (page_start <= page_end) {
|
|
||||||
auto it = written_pages.find(page_start);
|
auto it = written_pages.find(page_start);
|
||||||
if (it != written_pages.end()) {
|
if (it != written_pages.end()) {
|
||||||
if (it->second > 1) {
|
if (it->second > 1) {
|
||||||
|
@ -541,22 +544,24 @@ private:
|
||||||
written_pages.erase(it);
|
written_pages.erase(it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
++page_start;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsRegionWritten(const VAddr start, const VAddr end) const {
|
bool IsRegionWritten(VAddr start, VAddr end) const {
|
||||||
u64 page_start = start >> write_page_bit;
|
const u64 page_end = end >> WRITE_PAGE_BIT;
|
||||||
const u64 page_end = end >> write_page_bit;
|
for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
|
||||||
while (page_start <= page_end) {
|
|
||||||
if (written_pages.count(page_start) > 0) {
|
if (written_pages.count(page_start) > 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
++page_start;
|
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void QueueDestruction(std::shared_ptr<Buffer> buffer) {
|
||||||
|
buffer->SetEpoch(epoch);
|
||||||
|
pending_destruction.push(std::move(buffer));
|
||||||
|
}
|
||||||
|
|
||||||
void MarkForAsyncFlush(MapInterval* map) {
|
void MarkForAsyncFlush(MapInterval* map) {
|
||||||
if (!uncommitted_flushes) {
|
if (!uncommitted_flushes) {
|
||||||
uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
|
uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
|
||||||
|
@ -568,9 +573,7 @@ private:
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
|
||||||
std::unique_ptr<StreamBuffer> stream_buffer;
|
std::unique_ptr<StreamBuffer> stream_buffer;
|
||||||
BufferType stream_buffer_handle{};
|
BufferType stream_buffer_handle;
|
||||||
|
|
||||||
bool invalidated = false;
|
|
||||||
|
|
||||||
u8* buffer_ptr = nullptr;
|
u8* buffer_ptr = nullptr;
|
||||||
u64 buffer_offset = 0;
|
u64 buffer_offset = 0;
|
||||||
|
@ -580,18 +583,15 @@ private:
|
||||||
boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
|
boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
|
||||||
mapped_addresses;
|
mapped_addresses;
|
||||||
|
|
||||||
static constexpr u64 write_page_bit = 11;
|
|
||||||
std::unordered_map<u64, u32> written_pages;
|
std::unordered_map<u64, u32> written_pages;
|
||||||
|
std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
|
||||||
|
|
||||||
static constexpr u64 block_page_bits = 21;
|
std::queue<std::shared_ptr<Buffer>> pending_destruction;
|
||||||
static constexpr u64 block_page_size = 1ULL << block_page_bits;
|
|
||||||
std::unordered_map<u64, OwnerBuffer> blocks;
|
|
||||||
|
|
||||||
std::list<OwnerBuffer> pending_destruction;
|
|
||||||
u64 epoch = 0;
|
u64 epoch = 0;
|
||||||
u64 modified_ticks = 0;
|
u64 modified_ticks = 0;
|
||||||
|
|
||||||
std::vector<u8> staging_buffer;
|
std::vector<u8> staging_buffer;
|
||||||
|
|
||||||
std::list<MapInterval*> marked_for_unregister;
|
std::list<MapInterval*> marked_for_unregister;
|
||||||
|
|
||||||
std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
|
std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
|
||||||
|
|
|
@ -93,6 +93,7 @@ public:
|
||||||
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
|
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
|
||||||
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||||
u64 offset) const = 0;
|
u64 offset) const = 0;
|
||||||
|
virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
|
||||||
virtual u32 GetBoundBuffer() const = 0;
|
virtual u32 GetBoundBuffer() const = 0;
|
||||||
|
|
||||||
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
|
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
|
||||||
|
|
|
@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
|
||||||
ASSERT(stage == ShaderType::Compute);
|
ASSERT(stage == ShaderType::Compute);
|
||||||
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
|
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
|
||||||
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
|
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
|
||||||
|
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
|
||||||
|
}
|
||||||
|
|
||||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
|
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
|
||||||
|
const Texture::TextureHandle tex_handle{handle};
|
||||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||||
|
|
|
@ -219,6 +219,8 @@ public:
|
||||||
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||||
u64 offset) const override;
|
u64 offset) const override;
|
||||||
|
|
||||||
|
SamplerDescriptor AccessSampler(u32 handle) const override;
|
||||||
|
|
||||||
u32 GetBoundBuffer() const override {
|
u32 GetBoundBuffer() const override {
|
||||||
return regs.tex_cb_index;
|
return regs.tex_cb_index;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,9 +25,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
|
||||||
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
MemoryManager& memory_manager)
|
MemoryManager& memory_manager)
|
||||||
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
|
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
|
||||||
macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
|
macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
|
||||||
dirty.flags.flip();
|
dirty.flags.flip();
|
||||||
|
|
||||||
InitializeRegisterDefaults();
|
InitializeRegisterDefaults();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,7 +105,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||||
regs.rasterize_enable = 1;
|
regs.rasterize_enable = 1;
|
||||||
regs.rt_separate_frag_data = 1;
|
regs.rt_separate_frag_data = 1;
|
||||||
regs.framebuffer_srgb = 1;
|
regs.framebuffer_srgb = 1;
|
||||||
|
regs.line_width_aliased = 1.0f;
|
||||||
|
regs.line_width_smooth = 1.0f;
|
||||||
regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
|
regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
|
||||||
|
regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill;
|
||||||
|
regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;
|
||||||
|
|
||||||
shadow_state = regs;
|
shadow_state = regs;
|
||||||
|
|
||||||
|
@ -116,7 +119,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||||
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
|
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
|
void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
|
||||||
// Reset the current macro.
|
// Reset the current macro.
|
||||||
executing_macro = 0;
|
executing_macro = 0;
|
||||||
|
|
||||||
|
@ -125,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
|
||||||
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
|
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
|
||||||
|
|
||||||
// Execute the current macro.
|
// Execute the current macro.
|
||||||
macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters);
|
macro_engine->Execute(*this, macro_positions[entry], parameters);
|
||||||
if (mme_draw.current_mode != MMEDrawMode::Undefined) {
|
if (mme_draw.current_mode != MMEDrawMode::Undefined) {
|
||||||
FlushMMEInlineDraw();
|
FlushMMEInlineDraw();
|
||||||
}
|
}
|
||||||
|
@ -161,7 +164,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||||
|
|
||||||
// Call the macro when there are no more parameters in the command buffer
|
// Call the macro when there are no more parameters in the command buffer
|
||||||
if (is_last_call) {
|
if (is_last_call) {
|
||||||
CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
|
CallMacroMethod(executing_macro, macro_params);
|
||||||
macro_params.clear();
|
macro_params.clear();
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
@ -197,7 +200,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case MAXWELL3D_REG_INDEX(macros.data): {
|
case MAXWELL3D_REG_INDEX(macros.data): {
|
||||||
ProcessMacroUpload(arg);
|
macro_engine->AddCode(regs.macros.upload_address, arg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case MAXWELL3D_REG_INDEX(macros.bind): {
|
case MAXWELL3D_REG_INDEX(macros.bind): {
|
||||||
|
@ -306,7 +309,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||||
|
|
||||||
// Call the macro when there are no more parameters in the command buffer
|
// Call the macro when there are no more parameters in the command buffer
|
||||||
if (amount == methods_pending) {
|
if (amount == methods_pending) {
|
||||||
CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
|
CallMacroMethod(executing_macro, macro_params);
|
||||||
macro_params.clear();
|
macro_params.clear();
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
@ -420,9 +423,7 @@ void Maxwell3D::FlushMMEInlineDraw() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessMacroUpload(u32 data) {
|
void Maxwell3D::ProcessMacroUpload(u32 data) {
|
||||||
ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
|
macro_engine->AddCode(regs.macros.upload_address++, data);
|
||||||
"upload_address exceeded macro_memory size!");
|
|
||||||
macro_memory[regs.macros.upload_address++] = data;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessMacroBind(u32 data) {
|
void Maxwell3D::ProcessMacroBind(u32 data) {
|
||||||
|
@ -739,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
|
||||||
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
|
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
|
||||||
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
|
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
|
||||||
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
|
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
|
||||||
|
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
|
||||||
|
}
|
||||||
|
|
||||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
|
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
|
||||||
|
const Texture::TextureHandle tex_handle{handle};
|
||||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
#include "video_core/engines/engine_upload.h"
|
#include "video_core/engines/engine_upload.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
#include "video_core/macro_interpreter.h"
|
#include "video_core/macro/macro.h"
|
||||||
#include "video_core/textures/texture.h"
|
#include "video_core/textures/texture.h"
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
@ -598,6 +598,7 @@ public:
|
||||||
BitField<4, 3, u32> block_height;
|
BitField<4, 3, u32> block_height;
|
||||||
BitField<8, 3, u32> block_depth;
|
BitField<8, 3, u32> block_depth;
|
||||||
BitField<12, 1, InvMemoryLayout> type;
|
BitField<12, 1, InvMemoryLayout> type;
|
||||||
|
BitField<16, 1, u32> is_3d;
|
||||||
} memory_layout;
|
} memory_layout;
|
||||||
union {
|
union {
|
||||||
BitField<0, 16, u32> layers;
|
BitField<0, 16, u32> layers;
|
||||||
|
@ -1403,6 +1404,8 @@ public:
|
||||||
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||||
u64 offset) const override;
|
u64 offset) const override;
|
||||||
|
|
||||||
|
SamplerDescriptor AccessSampler(u32 handle) const override;
|
||||||
|
|
||||||
u32 GetBoundBuffer() const override {
|
u32 GetBoundBuffer() const override {
|
||||||
return regs.tex_cb_index;
|
return regs.tex_cb_index;
|
||||||
}
|
}
|
||||||
|
@ -1411,19 +1414,18 @@ public:
|
||||||
|
|
||||||
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
|
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
|
||||||
|
|
||||||
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
|
|
||||||
/// we've seen used.
|
|
||||||
using MacroMemory = std::array<u32, 0x40000>;
|
|
||||||
|
|
||||||
/// Gets a reference to macro memory.
|
|
||||||
const MacroMemory& GetMacroMemory() const {
|
|
||||||
return macro_memory;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ShouldExecute() const {
|
bool ShouldExecute() const {
|
||||||
return execute_on;
|
return execute_on;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VideoCore::RasterizerInterface& GetRasterizer() {
|
||||||
|
return rasterizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
const VideoCore::RasterizerInterface& GetRasterizer() const {
|
||||||
|
return rasterizer;
|
||||||
|
}
|
||||||
|
|
||||||
/// Notify a memory write has happened.
|
/// Notify a memory write has happened.
|
||||||
void OnMemoryWrite() {
|
void OnMemoryWrite() {
|
||||||
dirty.flags |= dirty.on_write_stores;
|
dirty.flags |= dirty.on_write_stores;
|
||||||
|
@ -1468,16 +1470,13 @@ private:
|
||||||
|
|
||||||
std::array<bool, Regs::NUM_REGS> mme_inline{};
|
std::array<bool, Regs::NUM_REGS> mme_inline{};
|
||||||
|
|
||||||
/// Memory for macro code
|
|
||||||
MacroMemory macro_memory;
|
|
||||||
|
|
||||||
/// Macro method that is currently being executed / being fed parameters.
|
/// Macro method that is currently being executed / being fed parameters.
|
||||||
u32 executing_macro = 0;
|
u32 executing_macro = 0;
|
||||||
/// Parameters that have been submitted to the macro call so far.
|
/// Parameters that have been submitted to the macro call so far.
|
||||||
std::vector<u32> macro_params;
|
std::vector<u32> macro_params;
|
||||||
|
|
||||||
/// Interpreter for the macro codes uploaded to the GPU.
|
/// Interpreter for the macro codes uploaded to the GPU.
|
||||||
MacroInterpreter macro_interpreter;
|
std::unique_ptr<MacroEngine> macro_engine;
|
||||||
|
|
||||||
static constexpr u32 null_cb_data = 0xFFFFFFFF;
|
static constexpr u32 null_cb_data = 0xFFFFFFFF;
|
||||||
struct {
|
struct {
|
||||||
|
@ -1506,7 +1505,7 @@ private:
|
||||||
* @param num_parameters Number of arguments
|
* @param num_parameters Number of arguments
|
||||||
* @param parameters Arguments to the method call
|
* @param parameters Arguments to the method call
|
||||||
*/
|
*/
|
||||||
void CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters);
|
void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
|
||||||
|
|
||||||
/// Handles writes to the macro uploading register.
|
/// Handles writes to the macro uploading register.
|
||||||
void ProcessMacroUpload(u32 data);
|
void ProcessMacroUpload(u32 data);
|
||||||
|
|
72
src/video_core/macro/macro.cpp
Normal file
72
src/video_core/macro/macro.cpp
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <boost/container_hash/hash.hpp>
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/logging/log.h"
|
||||||
|
#include "core/settings.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
|
#include "video_core/macro/macro.h"
|
||||||
|
#include "video_core/macro/macro_hle.h"
|
||||||
|
#include "video_core/macro/macro_interpreter.h"
|
||||||
|
#include "video_core/macro/macro_jit_x64.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
|
||||||
|
MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
|
||||||
|
: hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
|
||||||
|
|
||||||
|
MacroEngine::~MacroEngine() = default;
|
||||||
|
|
||||||
|
void MacroEngine::AddCode(u32 method, u32 data) {
|
||||||
|
uploaded_macro_code[method].push_back(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
|
||||||
|
const std::vector<u32>& parameters) {
|
||||||
|
auto compiled_macro = macro_cache.find(method);
|
||||||
|
if (compiled_macro != macro_cache.end()) {
|
||||||
|
const auto& cache_info = compiled_macro->second;
|
||||||
|
if (cache_info.has_hle_program) {
|
||||||
|
cache_info.hle_program->Execute(parameters, method);
|
||||||
|
} else {
|
||||||
|
cache_info.lle_program->Execute(parameters, method);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Macro not compiled, check if it's uploaded and if so, compile it
|
||||||
|
auto macro_code = uploaded_macro_code.find(method);
|
||||||
|
if (macro_code == uploaded_macro_code.end()) {
|
||||||
|
UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto& cache_info = macro_cache[method];
|
||||||
|
cache_info.hash = boost::hash_value(macro_code->second);
|
||||||
|
cache_info.lle_program = Compile(macro_code->second);
|
||||||
|
|
||||||
|
auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
|
||||||
|
if (hle_program.has_value()) {
|
||||||
|
cache_info.has_hle_program = true;
|
||||||
|
cache_info.hle_program = std::move(hle_program.value());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cache_info.has_hle_program) {
|
||||||
|
cache_info.hle_program->Execute(parameters, method);
|
||||||
|
} else {
|
||||||
|
cache_info.lle_program->Execute(parameters, method);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) {
|
||||||
|
if (Settings::values.disable_macro_jit) {
|
||||||
|
return std::make_unique<MacroInterpreter>(maxwell3d);
|
||||||
|
}
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
return std::make_unique<MacroJITx64>(maxwell3d);
|
||||||
|
#else
|
||||||
|
return std::make_unique<MacroInterpreter>(maxwell3d);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Tegra
|
141
src/video_core/macro/macro.h
Normal file
141
src/video_core/macro/macro.h
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
#include "common/bit_field.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
|
||||||
|
namespace Engines {
|
||||||
|
class Maxwell3D;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Macro {
|
||||||
|
constexpr std::size_t NUM_MACRO_REGISTERS = 8;
|
||||||
|
enum class Operation : u32 {
|
||||||
|
ALU = 0,
|
||||||
|
AddImmediate = 1,
|
||||||
|
ExtractInsert = 2,
|
||||||
|
ExtractShiftLeftImmediate = 3,
|
||||||
|
ExtractShiftLeftRegister = 4,
|
||||||
|
Read = 5,
|
||||||
|
Unused = 6, // This operation doesn't seem to be a valid encoding.
|
||||||
|
Branch = 7,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class ALUOperation : u32 {
|
||||||
|
Add = 0,
|
||||||
|
AddWithCarry = 1,
|
||||||
|
Subtract = 2,
|
||||||
|
SubtractWithBorrow = 3,
|
||||||
|
// Operations 4-7 don't seem to be valid encodings.
|
||||||
|
Xor = 8,
|
||||||
|
Or = 9,
|
||||||
|
And = 10,
|
||||||
|
AndNot = 11,
|
||||||
|
Nand = 12
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class ResultOperation : u32 {
|
||||||
|
IgnoreAndFetch = 0,
|
||||||
|
Move = 1,
|
||||||
|
MoveAndSetMethod = 2,
|
||||||
|
FetchAndSend = 3,
|
||||||
|
MoveAndSend = 4,
|
||||||
|
FetchAndSetMethod = 5,
|
||||||
|
MoveAndSetMethodFetchAndSend = 6,
|
||||||
|
MoveAndSetMethodSend = 7
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class BranchCondition : u32 {
|
||||||
|
Zero = 0,
|
||||||
|
NotZero = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
union Opcode {
|
||||||
|
u32 raw;
|
||||||
|
BitField<0, 3, Operation> operation;
|
||||||
|
BitField<4, 3, ResultOperation> result_operation;
|
||||||
|
BitField<4, 1, BranchCondition> branch_condition;
|
||||||
|
// If set on a branch, then the branch doesn't have a delay slot.
|
||||||
|
BitField<5, 1, u32> branch_annul;
|
||||||
|
BitField<7, 1, u32> is_exit;
|
||||||
|
BitField<8, 3, u32> dst;
|
||||||
|
BitField<11, 3, u32> src_a;
|
||||||
|
BitField<14, 3, u32> src_b;
|
||||||
|
// The signed immediate overlaps the second source operand and the alu operation.
|
||||||
|
BitField<14, 18, s32> immediate;
|
||||||
|
|
||||||
|
BitField<17, 5, ALUOperation> alu_operation;
|
||||||
|
|
||||||
|
// Bitfield instructions data
|
||||||
|
BitField<17, 5, u32> bf_src_bit;
|
||||||
|
BitField<22, 5, u32> bf_size;
|
||||||
|
BitField<27, 5, u32> bf_dst_bit;
|
||||||
|
|
||||||
|
u32 GetBitfieldMask() const {
|
||||||
|
return (1 << bf_size) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
s32 GetBranchTarget() const {
|
||||||
|
return static_cast<s32>(immediate * sizeof(u32));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
union MethodAddress {
|
||||||
|
u32 raw;
|
||||||
|
BitField<0, 12, u32> address;
|
||||||
|
BitField<12, 6, u32> increment;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Macro
|
||||||
|
|
||||||
|
class HLEMacro;
|
||||||
|
|
||||||
|
class CachedMacro {
|
||||||
|
public:
|
||||||
|
virtual ~CachedMacro() = default;
|
||||||
|
/**
|
||||||
|
* Executes the macro code with the specified input parameters.
|
||||||
|
* @param code The macro byte code to execute
|
||||||
|
* @param parameters The parameters of the macro
|
||||||
|
*/
|
||||||
|
virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class MacroEngine {
|
||||||
|
public:
|
||||||
|
explicit MacroEngine(Engines::Maxwell3D& maxwell3d);
|
||||||
|
virtual ~MacroEngine();
|
||||||
|
|
||||||
|
// Store the uploaded macro code to compile them when they're called.
|
||||||
|
void AddCode(u32 method, u32 data);
|
||||||
|
|
||||||
|
// Compiles the macro if its not in the cache, and executes the compiled macro
|
||||||
|
void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct CacheInfo {
|
||||||
|
std::unique_ptr<CachedMacro> lle_program{};
|
||||||
|
std::unique_ptr<CachedMacro> hle_program{};
|
||||||
|
u64 hash{};
|
||||||
|
bool has_hle_program{};
|
||||||
|
};
|
||||||
|
|
||||||
|
std::unordered_map<u32, CacheInfo> macro_cache;
|
||||||
|
std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
|
||||||
|
std::unique_ptr<HLEMacro> hle_macros;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
|
||||||
|
|
||||||
|
} // namespace Tegra
|
113
src/video_core/macro/macro_hle.cpp
Normal file
113
src/video_core/macro/macro_hle.cpp
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <vector>
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
|
#include "video_core/macro/macro_hle.h"
|
||||||
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
// HLE'd functions
|
||||||
|
static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d,
|
||||||
|
const std::vector<u32>& parameters) {
|
||||||
|
const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
|
||||||
|
|
||||||
|
maxwell3d.regs.draw.topology.Assign(
|
||||||
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] &
|
||||||
|
~(0x3ffffff << 26)));
|
||||||
|
maxwell3d.regs.vb_base_instance = parameters[5];
|
||||||
|
maxwell3d.mme_draw.instance_count = instance_count;
|
||||||
|
maxwell3d.regs.vb_element_base = parameters[3];
|
||||||
|
maxwell3d.regs.index_array.count = parameters[1];
|
||||||
|
maxwell3d.regs.index_array.first = parameters[4];
|
||||||
|
|
||||||
|
if (maxwell3d.ShouldExecute()) {
|
||||||
|
maxwell3d.GetRasterizer().Draw(true, true);
|
||||||
|
}
|
||||||
|
maxwell3d.regs.index_array.count = 0;
|
||||||
|
maxwell3d.mme_draw.instance_count = 0;
|
||||||
|
maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d,
|
||||||
|
const std::vector<u32>& parameters) {
|
||||||
|
const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
||||||
|
|
||||||
|
maxwell3d.regs.vertex_buffer.first = parameters[3];
|
||||||
|
maxwell3d.regs.vertex_buffer.count = parameters[1];
|
||||||
|
maxwell3d.regs.vb_base_instance = parameters[4];
|
||||||
|
maxwell3d.regs.draw.topology.Assign(
|
||||||
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
|
||||||
|
maxwell3d.mme_draw.instance_count = count;
|
||||||
|
|
||||||
|
if (maxwell3d.ShouldExecute()) {
|
||||||
|
maxwell3d.GetRasterizer().Draw(false, true);
|
||||||
|
}
|
||||||
|
maxwell3d.regs.vertex_buffer.count = 0;
|
||||||
|
maxwell3d.mme_draw.instance_count = 0;
|
||||||
|
maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d,
|
||||||
|
const std::vector<u32>& parameters) {
|
||||||
|
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
||||||
|
const u32 element_base = parameters[4];
|
||||||
|
const u32 base_instance = parameters[5];
|
||||||
|
maxwell3d.regs.index_array.first = parameters[3];
|
||||||
|
maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base?
|
||||||
|
maxwell3d.regs.index_array.count = parameters[1];
|
||||||
|
maxwell3d.regs.vb_element_base = element_base;
|
||||||
|
maxwell3d.regs.vb_base_instance = base_instance;
|
||||||
|
maxwell3d.mme_draw.instance_count = instance_count;
|
||||||
|
maxwell3d.CallMethodFromMME(0x8e3, 0x640);
|
||||||
|
maxwell3d.CallMethodFromMME(0x8e4, element_base);
|
||||||
|
maxwell3d.CallMethodFromMME(0x8e5, base_instance);
|
||||||
|
maxwell3d.regs.draw.topology.Assign(
|
||||||
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
|
||||||
|
if (maxwell3d.ShouldExecute()) {
|
||||||
|
maxwell3d.GetRasterizer().Draw(true, true);
|
||||||
|
}
|
||||||
|
maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
|
||||||
|
maxwell3d.regs.index_array.count = 0;
|
||||||
|
maxwell3d.regs.vb_element_base = 0x0;
|
||||||
|
maxwell3d.regs.vb_base_instance = 0x0;
|
||||||
|
maxwell3d.mme_draw.instance_count = 0;
|
||||||
|
maxwell3d.CallMethodFromMME(0x8e3, 0x640);
|
||||||
|
maxwell3d.CallMethodFromMME(0x8e4, 0x0);
|
||||||
|
maxwell3d.CallMethodFromMME(0x8e5, 0x0);
|
||||||
|
maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
|
||||||
|
std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0),
|
||||||
|
std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD),
|
||||||
|
std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7),
|
||||||
|
}};
|
||||||
|
|
||||||
|
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
|
||||||
|
HLEMacro::~HLEMacro() = default;
|
||||||
|
|
||||||
|
std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
|
||||||
|
const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
|
||||||
|
[hash](const auto& pair) { return pair.first == hash; });
|
||||||
|
if (it == hle_funcs.end()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
|
||||||
|
}
|
||||||
|
|
||||||
|
HLEMacroImpl::~HLEMacroImpl() = default;
|
||||||
|
|
||||||
|
HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func)
|
||||||
|
: maxwell3d(maxwell3d), func(func) {}
|
||||||
|
|
||||||
|
void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
|
||||||
|
func(maxwell3d, parameters);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Tegra
|
44
src/video_core/macro/macro_hle.h
Normal file
44
src/video_core/macro/macro_hle.h
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
|
#include <vector>
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/macro/macro.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
|
||||||
|
namespace Engines {
|
||||||
|
class Maxwell3D;
|
||||||
|
}
|
||||||
|
|
||||||
|
using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
|
||||||
|
|
||||||
|
class HLEMacro {
|
||||||
|
public:
|
||||||
|
explicit HLEMacro(Engines::Maxwell3D& maxwell3d);
|
||||||
|
~HLEMacro();
|
||||||
|
|
||||||
|
std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Engines::Maxwell3D& maxwell3d;
|
||||||
|
};
|
||||||
|
|
||||||
|
class HLEMacroImpl : public CachedMacro {
|
||||||
|
public:
|
||||||
|
explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
|
||||||
|
~HLEMacroImpl();
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, u32 method) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Engines::Maxwell3D& maxwell3d;
|
||||||
|
HLEFunction func;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Tegra
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright 2018 yuzu Emulator Project
|
// Copyright 2020 yuzu Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
@ -6,109 +6,47 @@
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/macro_interpreter.h"
|
#include "video_core/macro/macro_interpreter.h"
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
|
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
namespace {
|
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d)
|
||||||
enum class Operation : u32 {
|
: MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
|
||||||
ALU = 0,
|
|
||||||
AddImmediate = 1,
|
|
||||||
ExtractInsert = 2,
|
|
||||||
ExtractShiftLeftImmediate = 3,
|
|
||||||
ExtractShiftLeftRegister = 4,
|
|
||||||
Read = 5,
|
|
||||||
Unused = 6, // This operation doesn't seem to be a valid encoding.
|
|
||||||
Branch = 7,
|
|
||||||
};
|
|
||||||
} // Anonymous namespace
|
|
||||||
|
|
||||||
enum class MacroInterpreter::ALUOperation : u32 {
|
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
|
||||||
Add = 0,
|
return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
|
||||||
AddWithCarry = 1,
|
}
|
||||||
Subtract = 2,
|
|
||||||
SubtractWithBorrow = 3,
|
|
||||||
// Operations 4-7 don't seem to be valid encodings.
|
|
||||||
Xor = 8,
|
|
||||||
Or = 9,
|
|
||||||
And = 10,
|
|
||||||
AndNot = 11,
|
|
||||||
Nand = 12
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class MacroInterpreter::ResultOperation : u32 {
|
MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d,
|
||||||
IgnoreAndFetch = 0,
|
const std::vector<u32>& code)
|
||||||
Move = 1,
|
: maxwell3d(maxwell3d), code(code) {}
|
||||||
MoveAndSetMethod = 2,
|
|
||||||
FetchAndSend = 3,
|
|
||||||
MoveAndSend = 4,
|
|
||||||
FetchAndSetMethod = 5,
|
|
||||||
MoveAndSetMethodFetchAndSend = 6,
|
|
||||||
MoveAndSetMethodSend = 7
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class MacroInterpreter::BranchCondition : u32 {
|
void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 method) {
|
||||||
Zero = 0,
|
|
||||||
NotZero = 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
union MacroInterpreter::Opcode {
|
|
||||||
u32 raw;
|
|
||||||
BitField<0, 3, Operation> operation;
|
|
||||||
BitField<4, 3, ResultOperation> result_operation;
|
|
||||||
BitField<4, 1, BranchCondition> branch_condition;
|
|
||||||
// If set on a branch, then the branch doesn't have a delay slot.
|
|
||||||
BitField<5, 1, u32> branch_annul;
|
|
||||||
BitField<7, 1, u32> is_exit;
|
|
||||||
BitField<8, 3, u32> dst;
|
|
||||||
BitField<11, 3, u32> src_a;
|
|
||||||
BitField<14, 3, u32> src_b;
|
|
||||||
// The signed immediate overlaps the second source operand and the alu operation.
|
|
||||||
BitField<14, 18, s32> immediate;
|
|
||||||
|
|
||||||
BitField<17, 5, ALUOperation> alu_operation;
|
|
||||||
|
|
||||||
// Bitfield instructions data
|
|
||||||
BitField<17, 5, u32> bf_src_bit;
|
|
||||||
BitField<22, 5, u32> bf_size;
|
|
||||||
BitField<27, 5, u32> bf_dst_bit;
|
|
||||||
|
|
||||||
u32 GetBitfieldMask() const {
|
|
||||||
return (1 << bf_size) - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
s32 GetBranchTarget() const {
|
|
||||||
return static_cast<s32>(immediate * sizeof(u32));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
|
|
||||||
|
|
||||||
void MacroInterpreter::Execute(u32 offset, std::size_t num_parameters, const u32* parameters) {
|
|
||||||
MICROPROFILE_SCOPE(MacroInterp);
|
MICROPROFILE_SCOPE(MacroInterp);
|
||||||
Reset();
|
Reset();
|
||||||
|
|
||||||
registers[1] = parameters[0];
|
registers[1] = parameters[0];
|
||||||
|
num_parameters = parameters.size();
|
||||||
|
|
||||||
if (num_parameters > parameters_capacity) {
|
if (num_parameters > parameters_capacity) {
|
||||||
parameters_capacity = num_parameters;
|
parameters_capacity = num_parameters;
|
||||||
this->parameters = std::make_unique<u32[]>(num_parameters);
|
this->parameters = std::make_unique<u32[]>(num_parameters);
|
||||||
}
|
}
|
||||||
std::memcpy(this->parameters.get(), parameters, num_parameters * sizeof(u32));
|
std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32));
|
||||||
this->num_parameters = num_parameters;
|
this->num_parameters = num_parameters;
|
||||||
|
|
||||||
// Execute the code until we hit an exit condition.
|
// Execute the code until we hit an exit condition.
|
||||||
bool keep_executing = true;
|
bool keep_executing = true;
|
||||||
while (keep_executing) {
|
while (keep_executing) {
|
||||||
keep_executing = Step(offset, false);
|
keep_executing = Step(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assert the the macro used all the input parameters
|
// Assert the the macro used all the input parameters
|
||||||
ASSERT(next_parameter_index == num_parameters);
|
ASSERT(next_parameter_index == num_parameters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroInterpreter::Reset() {
|
void MacroInterpreterImpl::Reset() {
|
||||||
registers = {};
|
registers = {};
|
||||||
pc = 0;
|
pc = 0;
|
||||||
delayed_pc = {};
|
delayed_pc = {};
|
||||||
|
@ -120,10 +58,10 @@ void MacroInterpreter::Reset() {
|
||||||
carry_flag = false;
|
carry_flag = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
bool MacroInterpreterImpl::Step(bool is_delay_slot) {
|
||||||
u32 base_address = pc;
|
u32 base_address = pc;
|
||||||
|
|
||||||
Opcode opcode = GetOpcode(offset);
|
Macro::Opcode opcode = GetOpcode();
|
||||||
pc += 4;
|
pc += 4;
|
||||||
|
|
||||||
// Update the program counter if we were delayed
|
// Update the program counter if we were delayed
|
||||||
|
@ -134,18 +72,18 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (opcode.operation) {
|
switch (opcode.operation) {
|
||||||
case Operation::ALU: {
|
case Macro::Operation::ALU: {
|
||||||
u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a),
|
u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a),
|
||||||
GetRegister(opcode.src_b));
|
GetRegister(opcode.src_b));
|
||||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Operation::AddImmediate: {
|
case Macro::Operation::AddImmediate: {
|
||||||
ProcessResult(opcode.result_operation, opcode.dst,
|
ProcessResult(opcode.result_operation, opcode.dst,
|
||||||
GetRegister(opcode.src_a) + opcode.immediate);
|
GetRegister(opcode.src_a) + opcode.immediate);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Operation::ExtractInsert: {
|
case Macro::Operation::ExtractInsert: {
|
||||||
u32 dst = GetRegister(opcode.src_a);
|
u32 dst = GetRegister(opcode.src_a);
|
||||||
u32 src = GetRegister(opcode.src_b);
|
u32 src = GetRegister(opcode.src_b);
|
||||||
|
|
||||||
|
@ -155,7 +93,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||||
ProcessResult(opcode.result_operation, opcode.dst, dst);
|
ProcessResult(opcode.result_operation, opcode.dst, dst);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Operation::ExtractShiftLeftImmediate: {
|
case Macro::Operation::ExtractShiftLeftImmediate: {
|
||||||
u32 dst = GetRegister(opcode.src_a);
|
u32 dst = GetRegister(opcode.src_a);
|
||||||
u32 src = GetRegister(opcode.src_b);
|
u32 src = GetRegister(opcode.src_b);
|
||||||
|
|
||||||
|
@ -164,7 +102,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Operation::ExtractShiftLeftRegister: {
|
case Macro::Operation::ExtractShiftLeftRegister: {
|
||||||
u32 dst = GetRegister(opcode.src_a);
|
u32 dst = GetRegister(opcode.src_a);
|
||||||
u32 src = GetRegister(opcode.src_b);
|
u32 src = GetRegister(opcode.src_b);
|
||||||
|
|
||||||
|
@ -173,12 +111,12 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Operation::Read: {
|
case Macro::Operation::Read: {
|
||||||
u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate);
|
u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate);
|
||||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Operation::Branch: {
|
case Macro::Operation::Branch: {
|
||||||
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
|
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
|
||||||
u32 value = GetRegister(opcode.src_a);
|
u32 value = GetRegister(opcode.src_a);
|
||||||
bool taken = EvaluateBranchCondition(opcode.branch_condition, value);
|
bool taken = EvaluateBranchCondition(opcode.branch_condition, value);
|
||||||
|
@ -191,7 +129,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||||
|
|
||||||
delayed_pc = base_address + opcode.GetBranchTarget();
|
delayed_pc = base_address + opcode.GetBranchTarget();
|
||||||
// Execute one more instruction due to the delay slot.
|
// Execute one more instruction due to the delay slot.
|
||||||
return Step(offset, true);
|
return Step(true);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -204,51 +142,44 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||||
// cause an exit if it's executed inside a delay slot.
|
// cause an exit if it's executed inside a delay slot.
|
||||||
if (opcode.is_exit && !is_delay_slot) {
|
if (opcode.is_exit && !is_delay_slot) {
|
||||||
// Exit has a delay slot, execute the next instruction
|
// Exit has a delay slot, execute the next instruction
|
||||||
Step(offset, true);
|
Step(true);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const {
|
u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) {
|
||||||
const auto& macro_memory{maxwell3d.GetMacroMemory()};
|
|
||||||
ASSERT((pc % sizeof(u32)) == 0);
|
|
||||||
ASSERT((pc + offset) < macro_memory.size() * sizeof(u32));
|
|
||||||
return {macro_memory[offset + pc / sizeof(u32)]};
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) {
|
|
||||||
switch (operation) {
|
switch (operation) {
|
||||||
case ALUOperation::Add: {
|
case Macro::ALUOperation::Add: {
|
||||||
const u64 result{static_cast<u64>(src_a) + src_b};
|
const u64 result{static_cast<u64>(src_a) + src_b};
|
||||||
carry_flag = result > 0xffffffff;
|
carry_flag = result > 0xffffffff;
|
||||||
return static_cast<u32>(result);
|
return static_cast<u32>(result);
|
||||||
}
|
}
|
||||||
case ALUOperation::AddWithCarry: {
|
case Macro::ALUOperation::AddWithCarry: {
|
||||||
const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)};
|
const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)};
|
||||||
carry_flag = result > 0xffffffff;
|
carry_flag = result > 0xffffffff;
|
||||||
return static_cast<u32>(result);
|
return static_cast<u32>(result);
|
||||||
}
|
}
|
||||||
case ALUOperation::Subtract: {
|
case Macro::ALUOperation::Subtract: {
|
||||||
const u64 result{static_cast<u64>(src_a) - src_b};
|
const u64 result{static_cast<u64>(src_a) - src_b};
|
||||||
carry_flag = result < 0x100000000;
|
carry_flag = result < 0x100000000;
|
||||||
return static_cast<u32>(result);
|
return static_cast<u32>(result);
|
||||||
}
|
}
|
||||||
case ALUOperation::SubtractWithBorrow: {
|
case Macro::ALUOperation::SubtractWithBorrow: {
|
||||||
const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)};
|
const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)};
|
||||||
carry_flag = result < 0x100000000;
|
carry_flag = result < 0x100000000;
|
||||||
return static_cast<u32>(result);
|
return static_cast<u32>(result);
|
||||||
}
|
}
|
||||||
case ALUOperation::Xor:
|
case Macro::ALUOperation::Xor:
|
||||||
return src_a ^ src_b;
|
return src_a ^ src_b;
|
||||||
case ALUOperation::Or:
|
case Macro::ALUOperation::Or:
|
||||||
return src_a | src_b;
|
return src_a | src_b;
|
||||||
case ALUOperation::And:
|
case Macro::ALUOperation::And:
|
||||||
return src_a & src_b;
|
return src_a & src_b;
|
||||||
case ALUOperation::AndNot:
|
case Macro::ALUOperation::AndNot:
|
||||||
return src_a & ~src_b;
|
return src_a & ~src_b;
|
||||||
case ALUOperation::Nand:
|
case Macro::ALUOperation::Nand:
|
||||||
return ~(src_a & src_b);
|
return ~(src_a & src_b);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -257,43 +188,43 @@ u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 result) {
|
void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) {
|
||||||
switch (operation) {
|
switch (operation) {
|
||||||
case ResultOperation::IgnoreAndFetch:
|
case Macro::ResultOperation::IgnoreAndFetch:
|
||||||
// Fetch parameter and ignore result.
|
// Fetch parameter and ignore result.
|
||||||
SetRegister(reg, FetchParameter());
|
SetRegister(reg, FetchParameter());
|
||||||
break;
|
break;
|
||||||
case ResultOperation::Move:
|
case Macro::ResultOperation::Move:
|
||||||
// Move result.
|
// Move result.
|
||||||
SetRegister(reg, result);
|
SetRegister(reg, result);
|
||||||
break;
|
break;
|
||||||
case ResultOperation::MoveAndSetMethod:
|
case Macro::ResultOperation::MoveAndSetMethod:
|
||||||
// Move result and use as Method Address.
|
// Move result and use as Method Address.
|
||||||
SetRegister(reg, result);
|
SetRegister(reg, result);
|
||||||
SetMethodAddress(result);
|
SetMethodAddress(result);
|
||||||
break;
|
break;
|
||||||
case ResultOperation::FetchAndSend:
|
case Macro::ResultOperation::FetchAndSend:
|
||||||
// Fetch parameter and send result.
|
// Fetch parameter and send result.
|
||||||
SetRegister(reg, FetchParameter());
|
SetRegister(reg, FetchParameter());
|
||||||
Send(result);
|
Send(result);
|
||||||
break;
|
break;
|
||||||
case ResultOperation::MoveAndSend:
|
case Macro::ResultOperation::MoveAndSend:
|
||||||
// Move and send result.
|
// Move and send result.
|
||||||
SetRegister(reg, result);
|
SetRegister(reg, result);
|
||||||
Send(result);
|
Send(result);
|
||||||
break;
|
break;
|
||||||
case ResultOperation::FetchAndSetMethod:
|
case Macro::ResultOperation::FetchAndSetMethod:
|
||||||
// Fetch parameter and use result as Method Address.
|
// Fetch parameter and use result as Method Address.
|
||||||
SetRegister(reg, FetchParameter());
|
SetRegister(reg, FetchParameter());
|
||||||
SetMethodAddress(result);
|
SetMethodAddress(result);
|
||||||
break;
|
break;
|
||||||
case ResultOperation::MoveAndSetMethodFetchAndSend:
|
case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
|
||||||
// Move result and use as Method Address, then fetch and send parameter.
|
// Move result and use as Method Address, then fetch and send parameter.
|
||||||
SetRegister(reg, result);
|
SetRegister(reg, result);
|
||||||
SetMethodAddress(result);
|
SetMethodAddress(result);
|
||||||
Send(FetchParameter());
|
Send(FetchParameter());
|
||||||
break;
|
break;
|
||||||
case ResultOperation::MoveAndSetMethodSend:
|
case Macro::ResultOperation::MoveAndSetMethodSend:
|
||||||
// Move result and use as Method Address, then send bits 12:17 of result.
|
// Move result and use as Method Address, then send bits 12:17 of result.
|
||||||
SetRegister(reg, result);
|
SetRegister(reg, result);
|
||||||
SetMethodAddress(result);
|
SetMethodAddress(result);
|
||||||
|
@ -304,16 +235,28 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 MacroInterpreter::FetchParameter() {
|
bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const {
|
||||||
ASSERT(next_parameter_index < num_parameters);
|
switch (cond) {
|
||||||
return parameters[next_parameter_index++];
|
case Macro::BranchCondition::Zero:
|
||||||
|
return value == 0;
|
||||||
|
case Macro::BranchCondition::NotZero:
|
||||||
|
return value != 0;
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 MacroInterpreter::GetRegister(u32 register_id) const {
|
Macro::Opcode MacroInterpreterImpl::GetOpcode() const {
|
||||||
|
ASSERT((pc % sizeof(u32)) == 0);
|
||||||
|
ASSERT(pc < code.size() * sizeof(u32));
|
||||||
|
return {code[pc / sizeof(u32)]};
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 MacroInterpreterImpl::GetRegister(u32 register_id) const {
|
||||||
return registers.at(register_id);
|
return registers.at(register_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
|
void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) {
|
||||||
// Register 0 is hardwired as the zero register.
|
// Register 0 is hardwired as the zero register.
|
||||||
// Ensure no writes to it actually occur.
|
// Ensure no writes to it actually occur.
|
||||||
if (register_id == 0) {
|
if (register_id == 0) {
|
||||||
|
@ -323,30 +266,24 @@ void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
|
||||||
registers.at(register_id) = value;
|
registers.at(register_id) = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroInterpreter::SetMethodAddress(u32 address) {
|
void MacroInterpreterImpl::SetMethodAddress(u32 address) {
|
||||||
method_address.raw = address;
|
method_address.raw = address;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroInterpreter::Send(u32 value) {
|
void MacroInterpreterImpl::Send(u32 value) {
|
||||||
maxwell3d.CallMethodFromMME(method_address.address, value);
|
maxwell3d.CallMethodFromMME(method_address.address, value);
|
||||||
// Increment the method address by the method increment.
|
// Increment the method address by the method increment.
|
||||||
method_address.address.Assign(method_address.address.Value() +
|
method_address.address.Assign(method_address.address.Value() +
|
||||||
method_address.increment.Value());
|
method_address.increment.Value());
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 MacroInterpreter::Read(u32 method) const {
|
u32 MacroInterpreterImpl::Read(u32 method) const {
|
||||||
return maxwell3d.GetRegisterValue(method);
|
return maxwell3d.GetRegisterValue(method);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value) const {
|
u32 MacroInterpreterImpl::FetchParameter() {
|
||||||
switch (cond) {
|
ASSERT(next_parameter_index < num_parameters);
|
||||||
case BranchCondition::Zero:
|
return parameters[next_parameter_index++];
|
||||||
return value == 0;
|
|
||||||
case BranchCondition::NotZero:
|
|
||||||
return value != 0;
|
|
||||||
}
|
|
||||||
UNREACHABLE();
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
|
@ -1,44 +1,37 @@
|
||||||
// Copyright 2018 yuzu Emulator Project
|
// Copyright 2020 yuzu Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <vector>
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/macro/macro.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
namespace Engines {
|
namespace Engines {
|
||||||
class Maxwell3D;
|
class Maxwell3D;
|
||||||
}
|
}
|
||||||
|
|
||||||
class MacroInterpreter final {
|
class MacroInterpreter final : public MacroEngine {
|
||||||
public:
|
public:
|
||||||
explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d);
|
explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d);
|
||||||
|
|
||||||
/**
|
protected:
|
||||||
* Executes the macro code with the specified input parameters.
|
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
|
||||||
* @param offset Offset to start execution at.
|
|
||||||
* @param parameters The parameters of the macro.
|
|
||||||
*/
|
|
||||||
void Execute(u32 offset, std::size_t num_parameters, const u32* parameters);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum class ALUOperation : u32;
|
Engines::Maxwell3D& maxwell3d;
|
||||||
enum class BranchCondition : u32;
|
};
|
||||||
enum class ResultOperation : u32;
|
|
||||||
|
|
||||||
union Opcode;
|
class MacroInterpreterImpl : public CachedMacro {
|
||||||
|
public:
|
||||||
union MethodAddress {
|
MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
|
||||||
u32 raw;
|
void Execute(const std::vector<u32>& parameters, u32 method) override;
|
||||||
BitField<0, 12, u32> address;
|
|
||||||
BitField<12, 6, u32> increment;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
private:
|
||||||
/// Resets the execution engine state, zeroing registers, etc.
|
/// Resets the execution engine state, zeroing registers, etc.
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
|
@ -49,20 +42,20 @@ private:
|
||||||
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
|
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
|
||||||
* previous instruction.
|
* previous instruction.
|
||||||
*/
|
*/
|
||||||
bool Step(u32 offset, bool is_delay_slot);
|
bool Step(bool is_delay_slot);
|
||||||
|
|
||||||
/// Calculates the result of an ALU operation. src_a OP src_b;
|
/// Calculates the result of an ALU operation. src_a OP src_b;
|
||||||
u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b);
|
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
|
||||||
|
|
||||||
/// Performs the result operation on the input result and stores it in the specified register
|
/// Performs the result operation on the input result and stores it in the specified register
|
||||||
/// (if necessary).
|
/// (if necessary).
|
||||||
void ProcessResult(ResultOperation operation, u32 reg, u32 result);
|
void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
|
||||||
|
|
||||||
/// Evaluates the branch condition and returns whether the branch should be taken or not.
|
/// Evaluates the branch condition and returns whether the branch should be taken or not.
|
||||||
bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;
|
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
|
||||||
|
|
||||||
/// Reads an opcode at the current program counter location.
|
/// Reads an opcode at the current program counter location.
|
||||||
Opcode GetOpcode(u32 offset) const;
|
Macro::Opcode GetOpcode() const;
|
||||||
|
|
||||||
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
|
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
|
||||||
u32 GetRegister(u32 register_id) const;
|
u32 GetRegister(u32 register_id) const;
|
||||||
|
@ -89,13 +82,11 @@ private:
|
||||||
/// Program counter to execute at after the delay slot is executed.
|
/// Program counter to execute at after the delay slot is executed.
|
||||||
std::optional<u32> delayed_pc;
|
std::optional<u32> delayed_pc;
|
||||||
|
|
||||||
static constexpr std::size_t NumMacroRegisters = 8;
|
|
||||||
|
|
||||||
/// General purpose macro registers.
|
/// General purpose macro registers.
|
||||||
std::array<u32, NumMacroRegisters> registers = {};
|
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
|
||||||
|
|
||||||
/// Method address to use for the next Send instruction.
|
/// Method address to use for the next Send instruction.
|
||||||
MethodAddress method_address = {};
|
Macro::MethodAddress method_address = {};
|
||||||
|
|
||||||
/// Input parameters of the current macro.
|
/// Input parameters of the current macro.
|
||||||
std::unique_ptr<u32[]> parameters;
|
std::unique_ptr<u32[]> parameters;
|
||||||
|
@ -105,5 +96,7 @@ private:
|
||||||
u32 next_parameter_index = 0;
|
u32 next_parameter_index = 0;
|
||||||
|
|
||||||
bool carry_flag = false;
|
bool carry_flag = false;
|
||||||
|
const std::vector<u32>& code;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
621
src/video_core/macro/macro_jit_x64.cpp
Normal file
621
src/video_core/macro/macro_jit_x64.cpp
Normal file
|
@ -0,0 +1,621 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/logging/log.h"
|
||||||
|
#include "common/microprofile.h"
|
||||||
|
#include "common/x64/xbyak_util.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
|
#include "video_core/macro/macro_interpreter.h"
|
||||||
|
#include "video_core/macro/macro_jit_x64.h"
|
||||||
|
|
||||||
|
MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255, 47));
|
||||||
|
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
|
||||||
|
static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
|
||||||
|
static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
|
||||||
|
static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
|
||||||
|
static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
|
||||||
|
|
||||||
|
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
|
||||||
|
STATE,
|
||||||
|
RESULT,
|
||||||
|
PARAMETERS,
|
||||||
|
METHOD_ADDRESS,
|
||||||
|
BRANCH_HOLDER,
|
||||||
|
});
|
||||||
|
|
||||||
|
MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d)
|
||||||
|
: MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
|
||||||
|
|
||||||
|
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
|
||||||
|
return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code)
|
||||||
|
: Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) {
|
||||||
|
Compile();
|
||||||
|
}
|
||||||
|
|
||||||
|
MacroJITx64Impl::~MacroJITx64Impl() = default;
|
||||||
|
|
||||||
|
void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
|
||||||
|
MICROPROFILE_SCOPE(MacroJitExecute);
|
||||||
|
ASSERT_OR_EXECUTE(program != nullptr, { return; });
|
||||||
|
JITState state{};
|
||||||
|
state.maxwell3d = &maxwell3d;
|
||||||
|
state.registers = {};
|
||||||
|
program(&state, parameters.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
|
||||||
|
const bool is_a_zero = opcode.src_a == 0;
|
||||||
|
const bool is_b_zero = opcode.src_b == 0;
|
||||||
|
const bool valid_operation = !is_a_zero && !is_b_zero;
|
||||||
|
[[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero;
|
||||||
|
const bool has_zero_register = is_a_zero || is_b_zero;
|
||||||
|
const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry ||
|
||||||
|
opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow;
|
||||||
|
|
||||||
|
Xbyak::Reg32 src_a;
|
||||||
|
Xbyak::Reg32 src_b;
|
||||||
|
|
||||||
|
if (!optimizer.zero_reg_skip || no_zero_reg_skip) {
|
||||||
|
src_a = Compile_GetRegister(opcode.src_a, RESULT);
|
||||||
|
src_b = Compile_GetRegister(opcode.src_b, eax);
|
||||||
|
} else {
|
||||||
|
if (!is_a_zero) {
|
||||||
|
src_a = Compile_GetRegister(opcode.src_a, RESULT);
|
||||||
|
}
|
||||||
|
if (!is_b_zero) {
|
||||||
|
src_b = Compile_GetRegister(opcode.src_b, eax);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_emitted = false;
|
||||||
|
|
||||||
|
switch (opcode.alu_operation) {
|
||||||
|
case Macro::ALUOperation::Add:
|
||||||
|
if (optimizer.zero_reg_skip) {
|
||||||
|
if (valid_operation) {
|
||||||
|
add(src_a, src_b);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
add(src_a, src_b);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!optimizer.can_skip_carry) {
|
||||||
|
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Macro::ALUOperation::AddWithCarry:
|
||||||
|
bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
|
||||||
|
adc(src_a, src_b);
|
||||||
|
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||||
|
break;
|
||||||
|
case Macro::ALUOperation::Subtract:
|
||||||
|
if (optimizer.zero_reg_skip) {
|
||||||
|
if (valid_operation) {
|
||||||
|
sub(src_a, src_b);
|
||||||
|
has_emitted = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sub(src_a, src_b);
|
||||||
|
has_emitted = true;
|
||||||
|
}
|
||||||
|
if (!optimizer.can_skip_carry && has_emitted) {
|
||||||
|
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Macro::ALUOperation::SubtractWithBorrow:
|
||||||
|
bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
|
||||||
|
sbb(src_a, src_b);
|
||||||
|
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||||
|
break;
|
||||||
|
case Macro::ALUOperation::Xor:
|
||||||
|
if (optimizer.zero_reg_skip) {
|
||||||
|
if (valid_operation) {
|
||||||
|
xor_(src_a, src_b);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
xor_(src_a, src_b);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Macro::ALUOperation::Or:
|
||||||
|
if (optimizer.zero_reg_skip) {
|
||||||
|
if (valid_operation) {
|
||||||
|
or_(src_a, src_b);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
or_(src_a, src_b);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Macro::ALUOperation::And:
|
||||||
|
if (optimizer.zero_reg_skip) {
|
||||||
|
if (!has_zero_register) {
|
||||||
|
and_(src_a, src_b);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
and_(src_a, src_b);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Macro::ALUOperation::AndNot:
|
||||||
|
if (optimizer.zero_reg_skip) {
|
||||||
|
if (!is_a_zero) {
|
||||||
|
not_(src_b);
|
||||||
|
and_(src_a, src_b);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
not_(src_b);
|
||||||
|
and_(src_a, src_b);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Macro::ALUOperation::Nand:
|
||||||
|
if (optimizer.zero_reg_skip) {
|
||||||
|
if (!is_a_zero) {
|
||||||
|
and_(src_a, src_b);
|
||||||
|
not_(src_a);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
and_(src_a, src_b);
|
||||||
|
not_(src_a);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}",
|
||||||
|
static_cast<std::size_t>(opcode.alu_operation.Value()));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
|
||||||
|
if (optimizer.skip_dummy_addimmediate) {
|
||||||
|
// Games tend to use this as an exit instruction placeholder. It's to encode an instruction
|
||||||
|
// without doing anything. In our case we can just not emit anything.
|
||||||
|
if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check for redundant moves
|
||||||
|
if (optimizer.optimize_for_method_move &&
|
||||||
|
opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
|
||||||
|
if (next_opcode.has_value()) {
|
||||||
|
const auto next = *next_opcode;
|
||||||
|
if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod &&
|
||||||
|
opcode.dst == next.dst) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (optimizer.zero_reg_skip && opcode.src_a == 0) {
|
||||||
|
if (opcode.immediate == 0) {
|
||||||
|
xor_(RESULT, RESULT);
|
||||||
|
} else {
|
||||||
|
mov(RESULT, opcode.immediate);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto result = Compile_GetRegister(opcode.src_a, RESULT);
|
||||||
|
if (opcode.immediate > 2) {
|
||||||
|
add(result, opcode.immediate);
|
||||||
|
} else if (opcode.immediate == 1) {
|
||||||
|
inc(result);
|
||||||
|
} else if (opcode.immediate < 0) {
|
||||||
|
sub(result, opcode.immediate * -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
|
||||||
|
auto dst = Compile_GetRegister(opcode.src_a, RESULT);
|
||||||
|
auto src = Compile_GetRegister(opcode.src_b, eax);
|
||||||
|
|
||||||
|
if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) {
|
||||||
|
shr(src, opcode.bf_src_bit);
|
||||||
|
} else if (opcode.bf_src_bit == 31) {
|
||||||
|
xor_(src, src);
|
||||||
|
}
|
||||||
|
// Don't bother masking the whole register since we're using a 32 bit register
|
||||||
|
if (opcode.bf_size != 31 && opcode.bf_size != 0) {
|
||||||
|
and_(src, opcode.GetBitfieldMask());
|
||||||
|
} else if (opcode.bf_size == 0) {
|
||||||
|
xor_(src, src);
|
||||||
|
}
|
||||||
|
if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) {
|
||||||
|
shl(src, opcode.bf_dst_bit);
|
||||||
|
} else if (opcode.bf_dst_bit == 31) {
|
||||||
|
xor_(src, src);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
|
||||||
|
if (mask != 0xffffffff) {
|
||||||
|
and_(dst, mask);
|
||||||
|
}
|
||||||
|
or_(dst, src);
|
||||||
|
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
|
||||||
|
const auto dst = Compile_GetRegister(opcode.src_a, ecx);
|
||||||
|
const auto src = Compile_GetRegister(opcode.src_b, RESULT);
|
||||||
|
|
||||||
|
shr(src, dst.cvt8());
|
||||||
|
if (opcode.bf_size != 0 && opcode.bf_size != 31) {
|
||||||
|
and_(src, opcode.GetBitfieldMask());
|
||||||
|
} else if (opcode.bf_size == 0) {
|
||||||
|
xor_(src, src);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) {
|
||||||
|
shl(src, opcode.bf_dst_bit);
|
||||||
|
} else if (opcode.bf_dst_bit == 31) {
|
||||||
|
xor_(src, src);
|
||||||
|
}
|
||||||
|
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
|
||||||
|
const auto dst = Compile_GetRegister(opcode.src_a, ecx);
|
||||||
|
const auto src = Compile_GetRegister(opcode.src_b, RESULT);
|
||||||
|
|
||||||
|
if (opcode.bf_src_bit != 0) {
|
||||||
|
shr(src, opcode.bf_src_bit);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opcode.bf_size != 31) {
|
||||||
|
and_(src, opcode.GetBitfieldMask());
|
||||||
|
}
|
||||||
|
shl(src, dst.cvt8());
|
||||||
|
|
||||||
|
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
|
||||||
|
if (optimizer.zero_reg_skip && opcode.src_a == 0) {
|
||||||
|
if (opcode.immediate == 0) {
|
||||||
|
xor_(RESULT, RESULT);
|
||||||
|
} else {
|
||||||
|
mov(RESULT, opcode.immediate);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto result = Compile_GetRegister(opcode.src_a, RESULT);
|
||||||
|
if (opcode.immediate > 2) {
|
||||||
|
add(result, opcode.immediate);
|
||||||
|
} else if (opcode.immediate == 1) {
|
||||||
|
inc(result);
|
||||||
|
} else if (opcode.immediate < 0) {
|
||||||
|
sub(result, opcode.immediate * -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Equivalent to Engines::Maxwell3D::GetRegisterValue:
|
||||||
|
if (optimizer.enable_asserts) {
|
||||||
|
Xbyak::Label pass_range_check;
|
||||||
|
cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS));
|
||||||
|
jb(pass_range_check);
|
||||||
|
int3();
|
||||||
|
L(pass_range_check);
|
||||||
|
}
|
||||||
|
mov(rax, qword[STATE]);
|
||||||
|
mov(RESULT,
|
||||||
|
dword[rax + offsetof(Engines::Maxwell3D, regs) +
|
||||||
|
offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
|
||||||
|
|
||||||
|
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
|
||||||
|
maxwell3d->CallMethodFromMME(method_address.address, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
|
||||||
|
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||||
|
mov(Common::X64::ABI_PARAM1, qword[STATE]);
|
||||||
|
mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
|
||||||
|
mov(Common::X64::ABI_PARAM3, value);
|
||||||
|
Common::X64::CallFarFunction(*this, &Send);
|
||||||
|
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||||
|
|
||||||
|
Xbyak::Label dont_process{};
|
||||||
|
// Get increment
|
||||||
|
test(METHOD_ADDRESS, 0x3f000);
|
||||||
|
// If zero, method address doesn't update
|
||||||
|
je(dont_process);
|
||||||
|
|
||||||
|
mov(ecx, METHOD_ADDRESS);
|
||||||
|
and_(METHOD_ADDRESS, 0xfff);
|
||||||
|
shr(ecx, 12);
|
||||||
|
and_(ecx, 0x3f);
|
||||||
|
lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
|
||||||
|
sal(ecx, 12);
|
||||||
|
or_(eax, ecx);
|
||||||
|
|
||||||
|
mov(METHOD_ADDRESS, eax);
|
||||||
|
|
||||||
|
L(dont_process);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
|
||||||
|
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
|
||||||
|
const s32 jump_address =
|
||||||
|
static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
|
||||||
|
|
||||||
|
Xbyak::Label end;
|
||||||
|
auto value = Compile_GetRegister(opcode.src_a, eax);
|
||||||
|
test(value, value);
|
||||||
|
if (optimizer.has_delayed_pc) {
|
||||||
|
switch (opcode.branch_condition) {
|
||||||
|
case Macro::BranchCondition::Zero:
|
||||||
|
jne(end, T_NEAR);
|
||||||
|
break;
|
||||||
|
case Macro::BranchCondition::NotZero:
|
||||||
|
je(end, T_NEAR);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opcode.branch_annul) {
|
||||||
|
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||||
|
jmp(labels[jump_address], T_NEAR);
|
||||||
|
} else {
|
||||||
|
Xbyak::Label handle_post_exit{};
|
||||||
|
Xbyak::Label skip{};
|
||||||
|
jmp(skip, T_NEAR);
|
||||||
|
if (opcode.is_exit) {
|
||||||
|
L(handle_post_exit);
|
||||||
|
// Execute 1 instruction
|
||||||
|
mov(BRANCH_HOLDER, end_of_code);
|
||||||
|
// Jump to next instruction to skip delay slot check
|
||||||
|
jmp(labels[jump_address], T_NEAR);
|
||||||
|
} else {
|
||||||
|
L(handle_post_exit);
|
||||||
|
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||||
|
jmp(labels[jump_address], T_NEAR);
|
||||||
|
}
|
||||||
|
L(skip);
|
||||||
|
mov(BRANCH_HOLDER, handle_post_exit);
|
||||||
|
jmp(delay_skip[pc], T_NEAR);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (opcode.branch_condition) {
|
||||||
|
case Macro::BranchCondition::Zero:
|
||||||
|
je(labels[jump_address], T_NEAR);
|
||||||
|
break;
|
||||||
|
case Macro::BranchCondition::NotZero:
|
||||||
|
jne(labels[jump_address], T_NEAR);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
L(end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() {
|
||||||
|
optimizer.can_skip_carry = true;
|
||||||
|
optimizer.has_delayed_pc = false;
|
||||||
|
for (auto raw_op : code) {
|
||||||
|
Macro::Opcode op{};
|
||||||
|
op.raw = raw_op;
|
||||||
|
|
||||||
|
if (op.operation == Macro::Operation::ALU) {
|
||||||
|
// Scan for any ALU operations which actually use the carry flag, if they don't exist in
|
||||||
|
// our current code we can skip emitting the carry flag handling operations
|
||||||
|
if (op.alu_operation == Macro::ALUOperation::AddWithCarry ||
|
||||||
|
op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) {
|
||||||
|
optimizer.can_skip_carry = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (op.operation == Macro::Operation::Branch) {
|
||||||
|
if (!op.branch_annul) {
|
||||||
|
optimizer.has_delayed_pc = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroJITx64Impl::Compile() {
|
||||||
|
MICROPROFILE_SCOPE(MacroJitCompile);
|
||||||
|
bool keep_executing = true;
|
||||||
|
labels.fill(Xbyak::Label());
|
||||||
|
|
||||||
|
Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
|
||||||
|
// JIT state
|
||||||
|
mov(STATE, Common::X64::ABI_PARAM1);
|
||||||
|
mov(PARAMETERS, Common::X64::ABI_PARAM2);
|
||||||
|
xor_(RESULT, RESULT);
|
||||||
|
xor_(METHOD_ADDRESS, METHOD_ADDRESS);
|
||||||
|
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||||
|
|
||||||
|
mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
|
||||||
|
|
||||||
|
// Track get register for zero registers and mark it as no-op
|
||||||
|
optimizer.zero_reg_skip = true;
|
||||||
|
|
||||||
|
// AddImmediate tends to be used as a NOP instruction, if we detect this we can
|
||||||
|
// completely skip the entire code path and no emit anything
|
||||||
|
optimizer.skip_dummy_addimmediate = true;
|
||||||
|
|
||||||
|
// SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting
|
||||||
|
// one if our register isn't "dirty"
|
||||||
|
optimizer.optimize_for_method_move = true;
|
||||||
|
|
||||||
|
// Enable run-time assertions in JITted code
|
||||||
|
optimizer.enable_asserts = false;
|
||||||
|
|
||||||
|
// Check to see if we can skip emitting certain instructions
|
||||||
|
Optimizer_ScanFlags();
|
||||||
|
|
||||||
|
const u32 op_count = static_cast<u32>(code.size());
|
||||||
|
for (u32 i = 0; i < op_count; i++) {
|
||||||
|
if (i < op_count - 1) {
|
||||||
|
pc = i + 1;
|
||||||
|
next_opcode = GetOpCode();
|
||||||
|
} else {
|
||||||
|
next_opcode = {};
|
||||||
|
}
|
||||||
|
pc = i;
|
||||||
|
Compile_NextInstruction();
|
||||||
|
}
|
||||||
|
|
||||||
|
L(end_of_code);
|
||||||
|
|
||||||
|
Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
|
||||||
|
ret();
|
||||||
|
ready();
|
||||||
|
program = getCode<ProgramType>();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MacroJITx64Impl::Compile_NextInstruction() {
|
||||||
|
const auto opcode = GetOpCode();
|
||||||
|
if (labels[pc].getAddress()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
L(labels[pc]);
|
||||||
|
|
||||||
|
switch (opcode.operation) {
|
||||||
|
case Macro::Operation::ALU:
|
||||||
|
Compile_ALU(opcode);
|
||||||
|
break;
|
||||||
|
case Macro::Operation::AddImmediate:
|
||||||
|
Compile_AddImmediate(opcode);
|
||||||
|
break;
|
||||||
|
case Macro::Operation::ExtractInsert:
|
||||||
|
Compile_ExtractInsert(opcode);
|
||||||
|
break;
|
||||||
|
case Macro::Operation::ExtractShiftLeftImmediate:
|
||||||
|
Compile_ExtractShiftLeftImmediate(opcode);
|
||||||
|
break;
|
||||||
|
case Macro::Operation::ExtractShiftLeftRegister:
|
||||||
|
Compile_ExtractShiftLeftRegister(opcode);
|
||||||
|
break;
|
||||||
|
case Macro::Operation::Read:
|
||||||
|
Compile_Read(opcode);
|
||||||
|
break;
|
||||||
|
case Macro::Operation::Branch:
|
||||||
|
Compile_Branch(opcode);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (optimizer.has_delayed_pc) {
|
||||||
|
if (opcode.is_exit) {
|
||||||
|
mov(rax, end_of_code);
|
||||||
|
test(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||||
|
cmove(BRANCH_HOLDER, rax);
|
||||||
|
// Jump to next instruction to skip delay slot check
|
||||||
|
je(labels[pc + 1], T_NEAR);
|
||||||
|
} else {
|
||||||
|
// TODO(ogniK): Optimize delay slot branching
|
||||||
|
Xbyak::Label no_delay_slot{};
|
||||||
|
test(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||||
|
je(no_delay_slot, T_NEAR);
|
||||||
|
mov(rax, BRANCH_HOLDER);
|
||||||
|
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||||
|
jmp(rax);
|
||||||
|
L(no_delay_slot);
|
||||||
|
}
|
||||||
|
L(delay_skip[pc]);
|
||||||
|
if (opcode.is_exit) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
test(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||||
|
jne(end_of_code, T_NEAR);
|
||||||
|
if (opcode.is_exit) {
|
||||||
|
inc(BRANCH_HOLDER);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
|
||||||
|
mov(eax, dword[PARAMETERS]);
|
||||||
|
add(PARAMETERS, sizeof(u32));
|
||||||
|
return eax;
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
|
||||||
|
if (index == 0) {
|
||||||
|
// Register 0 is always zero
|
||||||
|
xor_(dst, dst);
|
||||||
|
} else {
|
||||||
|
mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
|
||||||
|
const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) {
|
||||||
|
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
|
||||||
|
// register.
|
||||||
|
if (reg == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
|
||||||
|
};
|
||||||
|
const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); };
|
||||||
|
|
||||||
|
switch (operation) {
|
||||||
|
case Macro::ResultOperation::IgnoreAndFetch:
|
||||||
|
SetRegister(reg, Compile_FetchParameter());
|
||||||
|
break;
|
||||||
|
case Macro::ResultOperation::Move:
|
||||||
|
SetRegister(reg, RESULT);
|
||||||
|
break;
|
||||||
|
case Macro::ResultOperation::MoveAndSetMethod:
|
||||||
|
SetRegister(reg, RESULT);
|
||||||
|
SetMethodAddress(RESULT);
|
||||||
|
break;
|
||||||
|
case Macro::ResultOperation::FetchAndSend:
|
||||||
|
// Fetch parameter and send result.
|
||||||
|
SetRegister(reg, Compile_FetchParameter());
|
||||||
|
Compile_Send(RESULT);
|
||||||
|
break;
|
||||||
|
case Macro::ResultOperation::MoveAndSend:
|
||||||
|
// Move and send result.
|
||||||
|
SetRegister(reg, RESULT);
|
||||||
|
Compile_Send(RESULT);
|
||||||
|
break;
|
||||||
|
case Macro::ResultOperation::FetchAndSetMethod:
|
||||||
|
// Fetch parameter and use result as Method Address.
|
||||||
|
SetRegister(reg, Compile_FetchParameter());
|
||||||
|
SetMethodAddress(RESULT);
|
||||||
|
break;
|
||||||
|
case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
|
||||||
|
// Move result and use as Method Address, then fetch and send parameter.
|
||||||
|
SetRegister(reg, RESULT);
|
||||||
|
SetMethodAddress(RESULT);
|
||||||
|
Compile_Send(Compile_FetchParameter());
|
||||||
|
break;
|
||||||
|
case Macro::ResultOperation::MoveAndSetMethodSend:
|
||||||
|
// Move result and use as Method Address, then send bits 12:17 of result.
|
||||||
|
SetRegister(reg, RESULT);
|
||||||
|
SetMethodAddress(RESULT);
|
||||||
|
shr(RESULT, 12);
|
||||||
|
and_(RESULT, 0b111111);
|
||||||
|
Compile_Send(RESULT);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast<std::size_t>(operation));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Macro::Opcode MacroJITx64Impl::GetOpCode() const {
|
||||||
|
ASSERT(pc < code.size());
|
||||||
|
return {code[pc]};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const {
|
||||||
|
return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Tegra
|
98
src/video_core/macro/macro_jit_x64.h
Normal file
98
src/video_core/macro/macro_jit_x64.h
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <bitset>
|
||||||
|
#include <xbyak.h>
|
||||||
|
#include "common/bit_field.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "common/x64/xbyak_abi.h"
|
||||||
|
#include "video_core/macro/macro.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
|
||||||
|
namespace Engines {
|
||||||
|
class Maxwell3D;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games
|
||||||
|
constexpr size_t MAX_CODE_SIZE = 0x10000;
|
||||||
|
|
||||||
|
class MacroJITx64 final : public MacroEngine {
|
||||||
|
public:
|
||||||
|
explicit MacroJITx64(Engines::Maxwell3D& maxwell3d);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Engines::Maxwell3D& maxwell3d;
|
||||||
|
};
|
||||||
|
|
||||||
|
class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
|
||||||
|
public:
|
||||||
|
MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
|
||||||
|
~MacroJITx64Impl();
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, u32 method) override;
|
||||||
|
|
||||||
|
void Compile_ALU(Macro::Opcode opcode);
|
||||||
|
void Compile_AddImmediate(Macro::Opcode opcode);
|
||||||
|
void Compile_ExtractInsert(Macro::Opcode opcode);
|
||||||
|
void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
|
||||||
|
void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
|
||||||
|
void Compile_Read(Macro::Opcode opcode);
|
||||||
|
void Compile_Branch(Macro::Opcode opcode);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Optimizer_ScanFlags();
|
||||||
|
|
||||||
|
void Compile();
|
||||||
|
bool Compile_NextInstruction();
|
||||||
|
|
||||||
|
Xbyak::Reg32 Compile_FetchParameter();
|
||||||
|
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
|
||||||
|
|
||||||
|
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
|
||||||
|
void Compile_Send(Xbyak::Reg32 value);
|
||||||
|
|
||||||
|
Macro::Opcode GetOpCode() const;
|
||||||
|
std::bitset<32> PersistentCallerSavedRegs() const;
|
||||||
|
|
||||||
|
struct JITState {
|
||||||
|
Engines::Maxwell3D* maxwell3d{};
|
||||||
|
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
|
||||||
|
u32 carry_flag{};
|
||||||
|
};
|
||||||
|
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
|
||||||
|
using ProgramType = void (*)(JITState*, const u32*);
|
||||||
|
|
||||||
|
struct OptimizerState {
|
||||||
|
bool can_skip_carry{};
|
||||||
|
bool has_delayed_pc{};
|
||||||
|
bool zero_reg_skip{};
|
||||||
|
bool skip_dummy_addimmediate{};
|
||||||
|
bool optimize_for_method_move{};
|
||||||
|
bool enable_asserts{};
|
||||||
|
};
|
||||||
|
OptimizerState optimizer{};
|
||||||
|
|
||||||
|
std::optional<Macro::Opcode> next_opcode{};
|
||||||
|
ProgramType program{nullptr};
|
||||||
|
|
||||||
|
std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
|
||||||
|
std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
|
||||||
|
Xbyak::Label end_of_code{};
|
||||||
|
|
||||||
|
bool is_delay_slot{};
|
||||||
|
u32 pc{};
|
||||||
|
std::optional<u32> delayed_pc;
|
||||||
|
|
||||||
|
const std::vector<u32>& code;
|
||||||
|
Engines::Maxwell3D& maxwell3d;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Tegra
|
|
@ -210,10 +210,11 @@ bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t si
|
||||||
return range == inner_size;
|
return range == inner_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const {
|
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer,
|
||||||
|
const std::size_t size) const {
|
||||||
std::size_t remaining_size{size};
|
std::size_t remaining_size{size};
|
||||||
std::size_t page_index{src_addr >> page_bits};
|
std::size_t page_index{gpu_src_addr >> page_bits};
|
||||||
std::size_t page_offset{src_addr & page_mask};
|
std::size_t page_offset{gpu_src_addr & page_mask};
|
||||||
|
|
||||||
auto& memory = system.Memory();
|
auto& memory = system.Memory();
|
||||||
|
|
||||||
|
@ -234,11 +235,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
|
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
|
||||||
const std::size_t size) const {
|
const std::size_t size) const {
|
||||||
std::size_t remaining_size{size};
|
std::size_t remaining_size{size};
|
||||||
std::size_t page_index{src_addr >> page_bits};
|
std::size_t page_index{gpu_src_addr >> page_bits};
|
||||||
std::size_t page_offset{src_addr & page_mask};
|
std::size_t page_offset{gpu_src_addr & page_mask};
|
||||||
|
|
||||||
auto& memory = system.Memory();
|
auto& memory = system.Memory();
|
||||||
|
|
||||||
|
@ -259,10 +260,11 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) {
|
void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
|
||||||
|
const std::size_t size) {
|
||||||
std::size_t remaining_size{size};
|
std::size_t remaining_size{size};
|
||||||
std::size_t page_index{dest_addr >> page_bits};
|
std::size_t page_index{gpu_dest_addr >> page_bits};
|
||||||
std::size_t page_offset{dest_addr & page_mask};
|
std::size_t page_offset{gpu_dest_addr & page_mask};
|
||||||
|
|
||||||
auto& memory = system.Memory();
|
auto& memory = system.Memory();
|
||||||
|
|
||||||
|
@ -283,11 +285,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
|
void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
|
||||||
const std::size_t size) {
|
const std::size_t size) {
|
||||||
std::size_t remaining_size{size};
|
std::size_t remaining_size{size};
|
||||||
std::size_t page_index{dest_addr >> page_bits};
|
std::size_t page_index{gpu_dest_addr >> page_bits};
|
||||||
std::size_t page_offset{dest_addr & page_mask};
|
std::size_t page_offset{gpu_dest_addr & page_mask};
|
||||||
|
|
||||||
auto& memory = system.Memory();
|
auto& memory = system.Memory();
|
||||||
|
|
||||||
|
@ -306,16 +308,18 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
|
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
|
||||||
|
const std::size_t size) {
|
||||||
std::vector<u8> tmp_buffer(size);
|
std::vector<u8> tmp_buffer(size);
|
||||||
ReadBlock(src_addr, tmp_buffer.data(), size);
|
ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
|
||||||
WriteBlock(dest_addr, tmp_buffer.data(), size);
|
WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
|
void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
|
||||||
|
const std::size_t size) {
|
||||||
std::vector<u8> tmp_buffer(size);
|
std::vector<u8> tmp_buffer(size);
|
||||||
ReadBlockUnsafe(src_addr, tmp_buffer.data(), size);
|
ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size);
|
||||||
WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
|
WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
|
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
|
||||||
|
|
|
@ -79,9 +79,9 @@ public:
|
||||||
* in the Host Memory counterpart. Note: This functions cause Host GPU Memory
|
* in the Host Memory counterpart. Note: This functions cause Host GPU Memory
|
||||||
* Flushes and Invalidations, respectively to each operation.
|
* Flushes and Invalidations, respectively to each operation.
|
||||||
*/
|
*/
|
||||||
void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
|
void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
|
||||||
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
|
void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||||
void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
|
void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
|
* ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
|
||||||
|
@ -93,9 +93,9 @@ public:
|
||||||
* WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
|
* WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
|
||||||
* being flushed.
|
* being flushed.
|
||||||
*/
|
*/
|
||||||
void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
|
void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
|
||||||
void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
|
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||||
void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
|
void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IsGranularRange checks if a gpu region can be simply read with a pointer
|
* IsGranularRange checks if a gpu region can be simply read with a pointer
|
||||||
|
|
|
@ -220,8 +220,8 @@ private:
|
||||||
return cache_begin < addr_end && addr_begin < cache_end;
|
return cache_begin < addr_end && addr_begin < cache_end;
|
||||||
};
|
};
|
||||||
|
|
||||||
const u64 page_end = addr_end >> PAGE_SHIFT;
|
const u64 page_end = addr_end >> PAGE_BITS;
|
||||||
for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
|
for (u64 page = addr_begin >> PAGE_BITS; page <= page_end; ++page) {
|
||||||
const auto& it = cached_queries.find(page);
|
const auto& it = cached_queries.find(page);
|
||||||
if (it == std::end(cached_queries)) {
|
if (it == std::end(cached_queries)) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -242,14 +242,14 @@ private:
|
||||||
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
|
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
|
||||||
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
|
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
|
||||||
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
|
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
|
||||||
const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
|
const u64 page = static_cast<u64>(cpu_addr) >> PAGE_BITS;
|
||||||
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
|
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
|
||||||
host_ptr);
|
host_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tries to a get a cached query. Returns nullptr on failure.
|
/// Tries to a get a cached query. Returns nullptr on failure.
|
||||||
CachedQuery* TryGet(VAddr addr) {
|
CachedQuery* TryGet(VAddr addr) {
|
||||||
const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
|
const u64 page = static_cast<u64>(addr) >> PAGE_BITS;
|
||||||
const auto it = cached_queries.find(page);
|
const auto it = cached_queries.find(page);
|
||||||
if (it == std::end(cached_queries)) {
|
if (it == std::end(cached_queries)) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -268,7 +268,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr std::uintptr_t PAGE_SIZE = 4096;
|
static constexpr std::uintptr_t PAGE_SIZE = 4096;
|
||||||
static constexpr unsigned PAGE_SHIFT = 12;
|
static constexpr unsigned PAGE_BITS = 12;
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
// Copyright 2018 yuzu Emulator Project
|
|
||||||
// Licensed under GPLv2 or any later version
|
|
||||||
// Refer to the license.txt file included.
|
|
||||||
|
|
||||||
#include "video_core/rasterizer_cache.h"
|
|
||||||
|
|
||||||
RasterizerCacheObject::~RasterizerCacheObject() = default;
|
|
|
@ -1,253 +0,0 @@
|
||||||
// Copyright 2018 yuzu Emulator Project
|
|
||||||
// Licensed under GPLv2 or any later version
|
|
||||||
// Refer to the license.txt file included.
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <mutex>
|
|
||||||
#include <set>
|
|
||||||
#include <unordered_map>
|
|
||||||
|
|
||||||
#include <boost/icl/interval_map.hpp>
|
|
||||||
#include <boost/range/iterator_range_core.hpp>
|
|
||||||
|
|
||||||
#include "common/common_types.h"
|
|
||||||
#include "core/settings.h"
|
|
||||||
#include "video_core/gpu.h"
|
|
||||||
#include "video_core/rasterizer_interface.h"
|
|
||||||
|
|
||||||
class RasterizerCacheObject {
|
|
||||||
public:
|
|
||||||
explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
|
|
||||||
|
|
||||||
virtual ~RasterizerCacheObject();
|
|
||||||
|
|
||||||
VAddr GetCpuAddr() const {
|
|
||||||
return cpu_addr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Gets the size of the shader in guest memory, required for cache management
|
|
||||||
virtual std::size_t GetSizeInBytes() const = 0;
|
|
||||||
|
|
||||||
/// Sets whether the cached object should be considered registered
|
|
||||||
void SetIsRegistered(bool registered) {
|
|
||||||
is_registered = registered;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if the cached object is registered
|
|
||||||
bool IsRegistered() const {
|
|
||||||
return is_registered;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if the cached object is dirty
|
|
||||||
bool IsDirty() const {
|
|
||||||
return is_dirty;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns ticks from when this cached object was last modified
|
|
||||||
u64 GetLastModifiedTicks() const {
|
|
||||||
return last_modified_ticks;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Marks an object as recently modified, used to specify whether it is clean or dirty
|
|
||||||
template <class T>
|
|
||||||
void MarkAsModified(bool dirty, T& cache) {
|
|
||||||
is_dirty = dirty;
|
|
||||||
last_modified_ticks = cache.GetModifiedTicks();
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetMemoryMarked(bool is_memory_marked_) {
|
|
||||||
is_memory_marked = is_memory_marked_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool IsMemoryMarked() const {
|
|
||||||
return is_memory_marked;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetSyncPending(bool is_sync_pending_) {
|
|
||||||
is_sync_pending = is_sync_pending_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool IsSyncPending() const {
|
|
||||||
return is_sync_pending;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
bool is_registered{}; ///< Whether the object is currently registered with the cache
|
|
||||||
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
|
|
||||||
bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory.
|
|
||||||
bool is_sync_pending{}; ///< Whether the object is pending deletion.
|
|
||||||
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
|
|
||||||
VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
class RasterizerCache : NonCopyable {
|
|
||||||
friend class RasterizerCacheObject;
|
|
||||||
|
|
||||||
public:
|
|
||||||
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
|
|
||||||
|
|
||||||
/// Write any cached resources overlapping the specified region back to memory
|
|
||||||
void FlushRegion(VAddr addr, std::size_t size) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
|
||||||
for (auto& object : objects) {
|
|
||||||
FlushObject(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Mark the specified region as being invalidated
|
|
||||||
void InvalidateRegion(VAddr addr, u64 size) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
|
||||||
for (auto& object : objects) {
|
|
||||||
if (!object->IsRegistered()) {
|
|
||||||
// Skip duplicates
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Unregister(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void OnCPUWrite(VAddr addr, std::size_t size) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
|
|
||||||
if (object->IsRegistered()) {
|
|
||||||
UnmarkMemory(object);
|
|
||||||
object->SetSyncPending(true);
|
|
||||||
marked_for_unregister.emplace_back(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void SyncGuestHost() {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
for (const auto& object : marked_for_unregister) {
|
|
||||||
if (object->IsRegistered()) {
|
|
||||||
object->SetSyncPending(false);
|
|
||||||
Unregister(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
marked_for_unregister.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Invalidates everything in the cache
|
|
||||||
void InvalidateAll() {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
while (interval_cache.begin() != interval_cache.end()) {
|
|
||||||
Unregister(*interval_cache.begin()->second.begin());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
/// Tries to get an object from the cache with the specified cache address
|
|
||||||
T TryGet(VAddr addr) const {
|
|
||||||
const auto iter = map_cache.find(addr);
|
|
||||||
if (iter != map_cache.end())
|
|
||||||
return iter->second;
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Register an object into the cache
|
|
||||||
virtual void Register(const T& object) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
object->SetIsRegistered(true);
|
|
||||||
interval_cache.add({GetInterval(object), ObjectSet{object}});
|
|
||||||
map_cache.insert({object->GetCpuAddr(), object});
|
|
||||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
|
|
||||||
object->SetMemoryMarked(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Unregisters an object from the cache
|
|
||||||
virtual void Unregister(const T& object) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
UnmarkMemory(object);
|
|
||||||
object->SetIsRegistered(false);
|
|
||||||
if (object->IsSyncPending()) {
|
|
||||||
marked_for_unregister.remove(object);
|
|
||||||
object->SetSyncPending(false);
|
|
||||||
}
|
|
||||||
const VAddr addr = object->GetCpuAddr();
|
|
||||||
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
|
|
||||||
map_cache.erase(addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void UnmarkMemory(const T& object) {
|
|
||||||
if (!object->IsMemoryMarked()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
|
|
||||||
object->SetMemoryMarked(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns a ticks counter used for tracking when cached objects were last modified
|
|
||||||
u64 GetModifiedTicks() {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
return ++modified_ticks;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void FlushObjectInner(const T& object) = 0;
|
|
||||||
|
|
||||||
/// Flushes the specified object, updating appropriate cache state as needed
|
|
||||||
void FlushObject(const T& object) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
if (!object->IsDirty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
FlushObjectInner(object);
|
|
||||||
object->MarkAsModified(false, *this);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::recursive_mutex mutex;
|
|
||||||
|
|
||||||
private:
|
|
||||||
/// Returns a list of cached objects from the specified memory region, ordered by access time
|
|
||||||
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
|
|
||||||
if (size == 0) {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<T> objects;
|
|
||||||
const ObjectInterval interval{addr, addr + size};
|
|
||||||
for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
|
|
||||||
for (auto& cached_object : pair.second) {
|
|
||||||
if (!cached_object) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
objects.push_back(cached_object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
|
|
||||||
return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
|
|
||||||
});
|
|
||||||
|
|
||||||
return objects;
|
|
||||||
}
|
|
||||||
|
|
||||||
using ObjectSet = std::set<T>;
|
|
||||||
using ObjectCache = std::unordered_map<VAddr, T>;
|
|
||||||
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
|
|
||||||
using ObjectInterval = typename IntervalCache::interval_type;
|
|
||||||
|
|
||||||
static auto GetInterval(const T& object) {
|
|
||||||
return ObjectInterval::right_open(object->GetCpuAddr(),
|
|
||||||
object->GetCpuAddr() + object->GetSizeInBytes());
|
|
||||||
}
|
|
||||||
|
|
||||||
ObjectCache map_cache;
|
|
||||||
IntervalCache interval_cache; ///< Cache of objects
|
|
||||||
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
|
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
|
||||||
std::list<T> marked_for_unregister;
|
|
||||||
};
|
|
2073
src/video_core/renderer_opengl/gl_arb_decompiler.cpp
Normal file
2073
src/video_core/renderer_opengl/gl_arb_decompiler.cpp
Normal file
File diff suppressed because it is too large
Load diff
29
src/video_core/renderer_opengl/gl_arb_decompiler.h
Normal file
29
src/video_core/renderer_opengl/gl_arb_decompiler.h
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Tegra::Engines {
|
||||||
|
enum class ShaderType : u32;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace VideoCommon::Shader {
|
||||||
|
class ShaderIR;
|
||||||
|
class Registry;
|
||||||
|
} // namespace VideoCommon::Shader
|
||||||
|
|
||||||
|
namespace OpenGL {
|
||||||
|
|
||||||
|
class Device;
|
||||||
|
|
||||||
|
std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||||
|
const VideoCommon::Shader::Registry& registry,
|
||||||
|
Tegra::Engines::ShaderType stage, std::string_view identifier);
|
||||||
|
|
||||||
|
} // namespace OpenGL
|
|
@ -22,22 +22,46 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
||||||
|
|
||||||
CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
|
Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
|
||||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||||
gl_buffer.Create();
|
gl_buffer.Create();
|
||||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
||||||
|
if (device.HasVertexBufferUnifiedMemory()) {
|
||||||
|
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
||||||
|
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CachedBufferBlock::~CachedBufferBlock() = default;
|
Buffer::~Buffer() = default;
|
||||||
|
|
||||||
|
void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
|
||||||
|
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
|
||||||
|
data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
||||||
|
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||||
|
glGetNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
|
||||||
|
data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
|
std::size_t size) const {
|
||||||
|
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
|
||||||
|
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
|
||||||
|
}
|
||||||
|
|
||||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||||
const Device& device, std::size_t stream_size)
|
const Device& device_, std::size_t stream_size)
|
||||||
: GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
|
: GenericBufferCache{rasterizer, system,
|
||||||
|
std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
|
||||||
|
device{device_} {
|
||||||
if (!device.HasFastBufferSubData()) {
|
if (!device.HasFastBufferSubData()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
|
static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
|
||||||
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||||
for (const GLuint cbuf : cbufs) {
|
for (const GLuint cbuf : cbufs) {
|
||||||
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
|
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
|
||||||
|
@ -48,44 +72,21 @@ OGLBufferCache::~OGLBufferCache() {
|
||||||
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||||
}
|
}
|
||||||
|
|
||||||
Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||||
return std::make_shared<CachedBufferBlock>(cpu_addr, size);
|
return std::make_shared<Buffer>(device, cpu_addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
|
OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||||
return buffer->GetHandle();
|
return {0, 0, 0};
|
||||||
}
|
|
||||||
|
|
||||||
GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
const u8* data) {
|
|
||||||
glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
|
|
||||||
static_cast<GLsizeiptr>(size), data);
|
|
||||||
}
|
|
||||||
|
|
||||||
void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
u8* data) {
|
|
||||||
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
|
||||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
|
||||||
glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
|
|
||||||
static_cast<GLsizeiptr>(size), data);
|
|
||||||
}
|
|
||||||
|
|
||||||
void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
|
||||||
std::size_t dst_offset, std::size_t size) {
|
|
||||||
glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
|
|
||||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
|
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
|
||||||
std::size_t size) {
|
std::size_t size) {
|
||||||
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
||||||
const GLuint& cbuf = cbufs[cbuf_cursor++];
|
const GLuint cbuf = cbufs[cbuf_cursor++];
|
||||||
|
|
||||||
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
||||||
return {cbuf, 0};
|
return {cbuf, 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -10,7 +10,6 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/rasterizer_cache.h"
|
|
||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||||
|
|
||||||
|
@ -24,57 +23,57 @@ class Device;
|
||||||
class OGLStreamBuffer;
|
class OGLStreamBuffer;
|
||||||
class RasterizerOpenGL;
|
class RasterizerOpenGL;
|
||||||
|
|
||||||
class CachedBufferBlock;
|
class Buffer : public VideoCommon::BufferBlock {
|
||||||
|
|
||||||
using Buffer = std::shared_ptr<CachedBufferBlock>;
|
|
||||||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
|
||||||
|
|
||||||
class CachedBufferBlock : public VideoCommon::BufferBlock {
|
|
||||||
public:
|
public:
|
||||||
explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
|
explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
|
||||||
~CachedBufferBlock();
|
~Buffer();
|
||||||
|
|
||||||
GLuint GetHandle() const {
|
void Upload(std::size_t offset, std::size_t size, const u8* data) const;
|
||||||
|
|
||||||
|
void Download(std::size_t offset, std::size_t size, u8* data) const;
|
||||||
|
|
||||||
|
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
|
std::size_t size) const;
|
||||||
|
|
||||||
|
GLuint Handle() const noexcept {
|
||||||
return gl_buffer.handle;
|
return gl_buffer.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 Address() const noexcept {
|
||||||
|
return gpu_address;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
OGLBuffer gl_buffer;
|
OGLBuffer gl_buffer;
|
||||||
|
u64 gpu_address = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||||
class OGLBufferCache final : public GenericBufferCache {
|
class OGLBufferCache final : public GenericBufferCache {
|
||||||
public:
|
public:
|
||||||
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||||
const Device& device, std::size_t stream_size);
|
const Device& device, std::size_t stream_size);
|
||||||
~OGLBufferCache();
|
~OGLBufferCache();
|
||||||
|
|
||||||
GLuint GetEmptyBuffer(std::size_t) override;
|
BufferInfo GetEmptyBuffer(std::size_t) override;
|
||||||
|
|
||||||
void Acquire() noexcept {
|
void Acquire() noexcept {
|
||||||
cbuf_cursor = 0;
|
cbuf_cursor = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||||
|
|
||||||
GLuint ToHandle(const Buffer& buffer) override;
|
|
||||||
|
|
||||||
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
const u8* data) override;
|
|
||||||
|
|
||||||
void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
u8* data) override;
|
|
||||||
|
|
||||||
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
|
||||||
std::size_t dst_offset, std::size_t size) override;
|
|
||||||
|
|
||||||
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
|
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||||
|
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||||
|
|
||||||
|
const Device& device;
|
||||||
|
|
||||||
std::size_t cbuf_cursor = 0;
|
std::size_t cbuf_cursor = 0;
|
||||||
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
std::array<GLuint, NUM_CBUFS> cbufs{};
|
||||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
|
|
||||||
cbufs;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <limits>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
@ -26,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1;
|
||||||
|
|
||||||
constexpr u32 NumStages = 5;
|
constexpr u32 NumStages = 5;
|
||||||
|
|
||||||
constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
constexpr std::array LimitUBOs = {
|
||||||
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS,
|
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||||
GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS};
|
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
|
||||||
|
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
|
||||||
|
|
||||||
constexpr std::array LimitSSBOs = {
|
constexpr std::array LimitSSBOs = {
|
||||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||||
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
|
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
|
||||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS};
|
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
|
||||||
|
|
||||||
constexpr std::array LimitSamplers = {
|
constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||||
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||||
GL_MAX_TEXTURE_IMAGE_UNITS};
|
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||||
|
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||||
|
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
|
||||||
|
|
||||||
constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS,
|
constexpr std::array LimitImages = {
|
||||||
GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
||||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS,
|
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
|
||||||
GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS};
|
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T GetInteger(GLenum pname) {
|
T GetInteger(GLenum pname) {
|
||||||
|
@ -85,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
||||||
return std::exchange(base, base + amount);
|
return std::exchange(base, base + amount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
|
||||||
|
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
|
||||||
|
std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
|
||||||
|
[](GLenum pname) { return GetInteger<u32>(pname); });
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
|
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
|
||||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
|
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
|
||||||
|
|
||||||
|
@ -112,16 +123,24 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
||||||
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
|
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
|
||||||
u32 base_images = 0;
|
u32 base_images = 0;
|
||||||
|
|
||||||
// Reserve more image bindings on fragment and vertex stages.
|
// GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
|
||||||
|
// Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
|
||||||
|
// fragment stage, and at least 1 for the rest of the stages.
|
||||||
|
// So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
|
||||||
|
|
||||||
|
// Reserve at least 4 image bindings on the fragment stage.
|
||||||
bindings[4].image =
|
bindings[4].image =
|
||||||
Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]);
|
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
|
||||||
bindings[0].image =
|
|
||||||
Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]);
|
// This is guaranteed to be at least 1.
|
||||||
|
const u32 total_extracted_images = num_images / (NumStages - 1);
|
||||||
|
|
||||||
// Reserve the other image bindings.
|
// Reserve the other image bindings.
|
||||||
const u32 total_extracted_images = num_images / (NumStages - 2);
|
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||||
for (std::size_t i = 2; i < NumStages; ++i) {
|
|
||||||
const std::size_t stage = stage_swizzle[i];
|
const std::size_t stage = stage_swizzle[i];
|
||||||
|
if (stage == 4) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
bindings[stage].image =
|
bindings[stage].image =
|
||||||
Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
|
Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
|
||||||
}
|
}
|
||||||
|
@ -133,6 +152,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsASTCSupported() {
|
bool IsASTCSupported() {
|
||||||
|
static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
|
||||||
static constexpr std::array formats = {
|
static constexpr std::array formats = {
|
||||||
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
|
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
|
||||||
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
|
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
|
||||||
|
@ -149,25 +169,59 @@ bool IsASTCSupported() {
|
||||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
|
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
|
||||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
|
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
|
||||||
};
|
};
|
||||||
return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) {
|
static constexpr std::array required_support = {
|
||||||
GLint supported;
|
GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
|
||||||
glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1,
|
GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
|
||||||
&supported);
|
};
|
||||||
return supported == GL_TRUE;
|
|
||||||
}) == formats.end();
|
for (const GLenum target : targets) {
|
||||||
|
for (const GLenum format : formats) {
|
||||||
|
for (const GLenum support : required_support) {
|
||||||
|
GLint value;
|
||||||
|
glGetInternalformativ(target, format, support, 1, &value);
|
||||||
|
if (value != GL_FULL_SUPPORT) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Returns true when a GL_RENDERER is a Turing GPU
|
||||||
|
/// @param renderer GL_RENDERER string
|
||||||
|
bool IsTuring(std::string_view renderer) {
|
||||||
|
static constexpr std::array<std::string_view, 12> TURING_GPUS = {
|
||||||
|
"GTX 1650", "GTX 1660", "RTX 2060", "RTX 2070",
|
||||||
|
"RTX 2080", "TITAN RTX", "Quadro RTX 3000", "Quadro RTX 4000",
|
||||||
|
"Quadro RTX 5000", "Quadro RTX 6000", "Quadro RTX 8000", "Tesla T4",
|
||||||
|
};
|
||||||
|
return std::any_of(TURING_GPUS.begin(), TURING_GPUS.end(),
|
||||||
|
[renderer](std::string_view candidate) {
|
||||||
|
return renderer.find(candidate) != std::string_view::npos;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
Device::Device() : base_bindings{BuildBaseBindings()} {
|
Device::Device()
|
||||||
|
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
|
||||||
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||||
const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
|
const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
|
||||||
|
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
|
||||||
const std::vector extensions = GetExtensions();
|
const std::vector extensions = GetExtensions();
|
||||||
|
|
||||||
const bool is_nvidia = vendor == "NVIDIA Corporation";
|
const bool is_nvidia = vendor == "NVIDIA Corporation";
|
||||||
const bool is_amd = vendor == "ATI Technologies Inc.";
|
const bool is_amd = vendor == "ATI Technologies Inc.";
|
||||||
const bool is_intel = vendor == "Intel";
|
const bool is_turing = is_nvidia && IsTuring(renderer);
|
||||||
const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
|
|
||||||
|
bool disable_fast_buffer_sub_data = false;
|
||||||
|
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
|
||||||
|
LOG_WARNING(
|
||||||
|
Render_OpenGL,
|
||||||
|
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
||||||
|
disable_fast_buffer_sub_data = true;
|
||||||
|
}
|
||||||
|
|
||||||
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||||
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||||
|
@ -178,14 +232,24 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||||
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
|
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
|
||||||
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
|
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
|
||||||
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
|
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
|
||||||
|
has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
|
||||||
has_astc = IsASTCSupported();
|
has_astc = IsASTCSupported();
|
||||||
has_variable_aoffi = TestVariableAoffi();
|
has_variable_aoffi = TestVariableAoffi();
|
||||||
has_component_indexing_bug = is_amd;
|
has_component_indexing_bug = is_amd;
|
||||||
has_precise_bug = TestPreciseBug();
|
has_precise_bug = TestPreciseBug();
|
||||||
has_broken_compute = is_intel_proprietary;
|
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
||||||
has_fast_buffer_sub_data = is_nvidia;
|
|
||||||
|
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
|
||||||
|
// uniform buffers as "push constants"
|
||||||
|
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
|
||||||
|
|
||||||
|
// Nvidia's driver on Turing GPUs randomly crashes when the buffer is made resident, or on
|
||||||
|
// DeleteBuffers. Disable unified memory on these devices.
|
||||||
|
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory && !is_turing;
|
||||||
|
|
||||||
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
|
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
|
||||||
GLAD_GL_NV_compute_program5;
|
GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
|
||||||
|
GLAD_GL_NV_transform_feedback2;
|
||||||
|
|
||||||
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
|
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
|
||||||
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
|
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
|
||||||
|
@ -197,17 +261,17 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||||
}
|
}
|
||||||
|
|
||||||
Device::Device(std::nullptr_t) {
|
Device::Device(std::nullptr_t) {
|
||||||
uniform_buffer_alignment = 0;
|
max_uniform_buffers.fill(std::numeric_limits<u32>::max());
|
||||||
|
uniform_buffer_alignment = 4;
|
||||||
|
shader_storage_alignment = 4;
|
||||||
max_vertex_attributes = 16;
|
max_vertex_attributes = 16;
|
||||||
max_varyings = 15;
|
max_varyings = 15;
|
||||||
has_warp_intrinsics = true;
|
has_warp_intrinsics = true;
|
||||||
has_shader_ballot = true;
|
has_shader_ballot = true;
|
||||||
has_vertex_viewport_layer = true;
|
has_vertex_viewport_layer = true;
|
||||||
has_image_load_formatted = true;
|
has_image_load_formatted = true;
|
||||||
|
has_texture_shadow_lod = true;
|
||||||
has_variable_aoffi = true;
|
has_variable_aoffi = true;
|
||||||
has_component_indexing_bug = false;
|
|
||||||
has_broken_compute = false;
|
|
||||||
has_precise_bug = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Device::TestVariableAoffi() {
|
bool Device::TestVariableAoffi() {
|
||||||
|
|
|
@ -24,6 +24,10 @@ public:
|
||||||
explicit Device();
|
explicit Device();
|
||||||
explicit Device(std::nullptr_t);
|
explicit Device(std::nullptr_t);
|
||||||
|
|
||||||
|
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
|
||||||
|
return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
|
||||||
|
}
|
||||||
|
|
||||||
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
|
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
|
||||||
return base_bindings[stage_index];
|
return base_bindings[stage_index];
|
||||||
}
|
}
|
||||||
|
@ -64,6 +68,14 @@ public:
|
||||||
return has_image_load_formatted;
|
return has_image_load_formatted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool HasTextureShadowLod() const {
|
||||||
|
return has_texture_shadow_lod;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasVertexBufferUnifiedMemory() const {
|
||||||
|
return has_vertex_buffer_unified_memory;
|
||||||
|
}
|
||||||
|
|
||||||
bool HasASTC() const {
|
bool HasASTC() const {
|
||||||
return has_astc;
|
return has_astc;
|
||||||
}
|
}
|
||||||
|
@ -80,14 +92,14 @@ public:
|
||||||
return has_precise_bug;
|
return has_precise_bug;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HasBrokenCompute() const {
|
|
||||||
return has_broken_compute;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool HasFastBufferSubData() const {
|
bool HasFastBufferSubData() const {
|
||||||
return has_fast_buffer_sub_data;
|
return has_fast_buffer_sub_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool HasNvViewportArray2() const {
|
||||||
|
return has_nv_viewport_array2;
|
||||||
|
}
|
||||||
|
|
||||||
bool UseAssemblyShaders() const {
|
bool UseAssemblyShaders() const {
|
||||||
return use_assembly_shaders;
|
return use_assembly_shaders;
|
||||||
}
|
}
|
||||||
|
@ -96,7 +108,8 @@ private:
|
||||||
static bool TestVariableAoffi();
|
static bool TestVariableAoffi();
|
||||||
static bool TestPreciseBug();
|
static bool TestPreciseBug();
|
||||||
|
|
||||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings;
|
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
|
||||||
|
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
|
||||||
std::size_t uniform_buffer_alignment{};
|
std::size_t uniform_buffer_alignment{};
|
||||||
std::size_t shader_storage_alignment{};
|
std::size_t shader_storage_alignment{};
|
||||||
u32 max_vertex_attributes{};
|
u32 max_vertex_attributes{};
|
||||||
|
@ -105,12 +118,14 @@ private:
|
||||||
bool has_shader_ballot{};
|
bool has_shader_ballot{};
|
||||||
bool has_vertex_viewport_layer{};
|
bool has_vertex_viewport_layer{};
|
||||||
bool has_image_load_formatted{};
|
bool has_image_load_formatted{};
|
||||||
|
bool has_texture_shadow_lod{};
|
||||||
|
bool has_vertex_buffer_unified_memory{};
|
||||||
bool has_astc{};
|
bool has_astc{};
|
||||||
bool has_variable_aoffi{};
|
bool has_variable_aoffi{};
|
||||||
bool has_component_indexing_bug{};
|
bool has_component_indexing_bug{};
|
||||||
bool has_precise_bug{};
|
bool has_precise_bug{};
|
||||||
bool has_broken_compute{};
|
|
||||||
bool has_fast_buffer_sub_data{};
|
bool has_fast_buffer_sub_data{};
|
||||||
|
bool has_nv_viewport_array2{};
|
||||||
bool use_assembly_shaders{};
|
bool use_assembly_shaders{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||||
|
#include "video_core/shader_cache.h"
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
@ -54,15 +55,34 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
constexpr std::size_t NumSupportedVertexAttributes = 16;
|
constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
|
||||||
|
constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
|
||||||
|
NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
|
||||||
|
constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
|
||||||
|
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
|
||||||
|
|
||||||
|
constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||||
|
constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
|
||||||
|
|
||||||
template <typename Engine, typename Entry>
|
template <typename Engine, typename Entry>
|
||||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||||
ShaderType shader_type, std::size_t index = 0) {
|
ShaderType shader_type, std::size_t index = 0) {
|
||||||
if (entry.is_bindless) {
|
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||||
const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
if (entry.is_separated) {
|
||||||
return engine.GetTextureInfo(tex_handle);
|
const u32 buffer_1 = entry.buffer;
|
||||||
|
const u32 buffer_2 = entry.secondary_buffer;
|
||||||
|
const u32 offset_1 = entry.offset;
|
||||||
|
const u32 offset_2 = entry.secondary_offset;
|
||||||
|
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
|
||||||
|
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
|
||||||
|
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if (entry.is_bindless) {
|
||||||
|
const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||||
|
return engine.GetTextureInfo(handle);
|
||||||
|
}
|
||||||
|
|
||||||
const auto& gpu_profile = engine.AccessGuestDriverProfile();
|
const auto& gpu_profile = engine.AccessGuestDriverProfile();
|
||||||
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
|
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
|
||||||
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
||||||
|
@ -87,6 +107,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||||
return buffer.size;
|
return buffer.size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Translates hardware transform feedback indices
|
||||||
|
/// @param location Hardware location
|
||||||
|
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
|
||||||
|
/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
|
||||||
|
std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
|
||||||
|
const u8 index = location / 4;
|
||||||
|
if (index >= 8 && index <= 39) {
|
||||||
|
return {GL_GENERIC_ATTRIB_NV, index - 8};
|
||||||
|
}
|
||||||
|
if (index >= 48 && index <= 55) {
|
||||||
|
return {GL_TEXTURE_COORD_NV, index - 48};
|
||||||
|
}
|
||||||
|
switch (index) {
|
||||||
|
case 7:
|
||||||
|
return {GL_POSITION, 0};
|
||||||
|
case 40:
|
||||||
|
return {GL_PRIMARY_COLOR_NV, 0};
|
||||||
|
case 41:
|
||||||
|
return {GL_SECONDARY_COLOR_NV, 0};
|
||||||
|
case 42:
|
||||||
|
return {GL_BACK_PRIMARY_COLOR_NV, 0};
|
||||||
|
case 43:
|
||||||
|
return {GL_BACK_SECONDARY_COLOR_NV, 0};
|
||||||
|
}
|
||||||
|
UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
|
||||||
|
return {GL_POSITION, 0};
|
||||||
|
}
|
||||||
|
|
||||||
void oglEnable(GLenum cap, bool state) {
|
void oglEnable(GLenum cap, bool state) {
|
||||||
(state ? glEnable : glDisable)(cap);
|
(state ? glEnable : glDisable)(cap);
|
||||||
}
|
}
|
||||||
|
@ -104,6 +152,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
|
||||||
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
|
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
|
||||||
CheckExtensions();
|
CheckExtensions();
|
||||||
|
|
||||||
|
unified_uniform_buffer.Create();
|
||||||
|
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
|
||||||
|
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
||||||
for (const GLuint cbuf : staging_cbufs) {
|
for (const GLuint cbuf : staging_cbufs) {
|
||||||
|
@ -143,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
|
||||||
// avoid OpenGL errors.
|
// avoid OpenGL errors.
|
||||||
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
|
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
|
||||||
// assume every shader uses them all.
|
// assume every shader uses them all.
|
||||||
for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
|
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
|
||||||
if (!flags[Dirty::VertexFormat0 + index]) {
|
if (!flags[Dirty::VertexFormat0 + index]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -181,9 +232,11 @@ void RasterizerOpenGL::SetupVertexBuffer() {
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||||
|
|
||||||
|
const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
|
||||||
|
|
||||||
// Upload all guest vertex arrays sequentially to our buffer
|
// Upload all guest vertex arrays sequentially to our buffer
|
||||||
const auto& regs = gpu.regs;
|
const auto& regs = gpu.regs;
|
||||||
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
|
||||||
if (!flags[Dirty::VertexBuffer0 + index]) {
|
if (!flags[Dirty::VertexBuffer0 + index]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -196,16 +249,25 @@ void RasterizerOpenGL::SetupVertexBuffer() {
|
||||||
|
|
||||||
const GPUVAddr start = vertex_array.StartAddress();
|
const GPUVAddr start = vertex_array.StartAddress();
|
||||||
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
||||||
|
|
||||||
ASSERT(end >= start);
|
ASSERT(end >= start);
|
||||||
|
|
||||||
|
const GLuint gl_index = static_cast<GLuint>(index);
|
||||||
const u64 size = end - start;
|
const u64 size = end - start;
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride);
|
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
|
||||||
|
if (use_unified_memory) {
|
||||||
|
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
|
const auto info = buffer_cache.UploadMemory(start, size);
|
||||||
glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset,
|
if (use_unified_memory) {
|
||||||
vertex_array.stride);
|
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
|
||||||
|
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
|
||||||
|
info.address + info.offset, size);
|
||||||
|
} else {
|
||||||
|
glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,7 +280,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
|
||||||
flags[Dirty::VertexInstances] = false;
|
flags[Dirty::VertexInstances] = false;
|
||||||
|
|
||||||
const auto& regs = gpu.regs;
|
const auto& regs = gpu.regs;
|
||||||
for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
|
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
|
||||||
if (!flags[Dirty::VertexInstance0 + index]) {
|
if (!flags[Dirty::VertexInstance0 + index]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -235,9 +297,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
const std::size_t size = CalculateIndexBufferSize();
|
const std::size_t size = CalculateIndexBufferSize();
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
|
const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
|
||||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer);
|
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
|
||||||
return offset;
|
return info.offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
|
@ -273,7 +335,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader shader{shader_cache.GetStageProgram(program)};
|
Shader* const shader = shader_cache.GetStageProgram(program);
|
||||||
|
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
// Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
|
// Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
|
||||||
|
@ -567,7 +629,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||||
|
|
||||||
// Prepare the vertex array.
|
// Prepare the vertex array.
|
||||||
buffer_cache.Map(buffer_size);
|
const bool invalidated = buffer_cache.Map(buffer_size);
|
||||||
|
|
||||||
|
if (invalidated) {
|
||||||
|
// When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
|
||||||
|
auto& dirty = gpu.dirty.flags;
|
||||||
|
dirty[Dirty::VertexBuffers] = true;
|
||||||
|
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
|
||||||
|
dirty[index] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Prepare vertex array format.
|
// Prepare vertex array format.
|
||||||
SetupVertexFormat();
|
SetupVertexFormat();
|
||||||
|
@ -584,9 +655,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
if (!device.UseAssemblyShaders()) {
|
if (!device.UseAssemblyShaders()) {
|
||||||
MaxwellUniformData ubo;
|
MaxwellUniformData ubo;
|
||||||
ubo.SetFromRegs(gpu);
|
ubo.SetFromRegs(gpu);
|
||||||
const auto [buffer, offset] =
|
const auto info =
|
||||||
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
|
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
|
||||||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -655,10 +726,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
if (device.HasBrokenCompute()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer_cache.Acquire();
|
buffer_cache.Acquire();
|
||||||
current_cbuf = 0;
|
current_cbuf = 0;
|
||||||
|
|
||||||
|
@ -837,7 +904,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
|
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||||
static constexpr std::array PARAMETER_LUT = {
|
static constexpr std::array PARAMETER_LUT = {
|
||||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||||
|
@ -846,41 +913,62 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
|
||||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||||
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
|
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
|
||||||
const auto& shader_stage = stages[stage_index];
|
const auto& shader_stage = stages[stage_index];
|
||||||
|
const auto& entries = shader->GetEntries();
|
||||||
|
const bool use_unified = entries.use_unified_uniforms;
|
||||||
|
const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
|
||||||
|
|
||||||
u32 binding =
|
const auto base_bindings = device.GetBaseBindings(stage_index);
|
||||||
device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer;
|
u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
|
||||||
for (const auto& entry : shader->GetEntries().const_buffers) {
|
for (const auto& entry : entries.const_buffers) {
|
||||||
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
|
const u32 index = entry.GetIndex();
|
||||||
SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry);
|
const auto& buffer = shader_stage.const_buffers[index];
|
||||||
|
SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
|
||||||
|
base_unified_offset + index * Maxwell::MaxConstBufferSize);
|
||||||
|
++binding;
|
||||||
|
}
|
||||||
|
if (use_unified) {
|
||||||
|
const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
|
||||||
|
entries.global_memory_entries.size());
|
||||||
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
|
||||||
|
base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
|
void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||||
|
const auto& entries = kernel->GetEntries();
|
||||||
|
const bool use_unified = entries.use_unified_uniforms;
|
||||||
|
|
||||||
u32 binding = 0;
|
u32 binding = 0;
|
||||||
for (const auto& entry : kernel->GetEntries().const_buffers) {
|
for (const auto& entry : entries.const_buffers) {
|
||||||
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
|
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
|
||||||
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
|
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
|
||||||
Tegra::Engines::ConstBufferInfo buffer;
|
Tegra::Engines::ConstBufferInfo buffer;
|
||||||
buffer.address = config.Address();
|
buffer.address = config.Address();
|
||||||
buffer.size = config.size;
|
buffer.size = config.size;
|
||||||
buffer.enabled = mask[entry.GetIndex()];
|
buffer.enabled = mask[entry.GetIndex()];
|
||||||
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry);
|
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
|
||||||
|
use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
|
||||||
|
++binding;
|
||||||
|
}
|
||||||
|
if (use_unified) {
|
||||||
|
const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
|
||||||
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
|
||||||
|
NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||||
const Tegra::Engines::ConstBufferInfo& buffer,
|
const Tegra::Engines::ConstBufferInfo& buffer,
|
||||||
const ConstBufferEntry& entry) {
|
const ConstBufferEntry& entry, bool use_unified,
|
||||||
|
std::size_t unified_offset) {
|
||||||
if (!buffer.enabled) {
|
if (!buffer.enabled) {
|
||||||
// Set values to zero to unbind buffers
|
// Set values to zero to unbind buffers
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
|
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
|
||||||
} else {
|
} else {
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
|
glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
|
||||||
buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -889,23 +977,33 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||||
// UBO alignment requirements.
|
// UBO alignment requirements.
|
||||||
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
|
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
|
||||||
|
|
||||||
const auto alignment = device.GetUniformBufferAlignment();
|
const bool fast_upload = !use_unified && device.HasFastBufferSubData();
|
||||||
auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
|
|
||||||
device.HasFastBufferSubData());
|
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
|
||||||
if (!device.UseAssemblyShaders()) {
|
const GPUVAddr gpu_addr = buffer.address;
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
|
auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
|
||||||
|
|
||||||
|
if (device.UseAssemblyShaders()) {
|
||||||
|
UNIMPLEMENTED_IF(use_unified);
|
||||||
|
if (info.offset != 0) {
|
||||||
|
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
||||||
|
glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
|
||||||
|
info.handle = staging_cbuf;
|
||||||
|
info.offset = 0;
|
||||||
|
}
|
||||||
|
glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (offset != 0) {
|
|
||||||
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
if (use_unified) {
|
||||||
glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
|
glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
|
||||||
cbuf = staging_cbuf;
|
unified_offset, size);
|
||||||
offset = 0;
|
} else {
|
||||||
|
glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
|
||||||
}
|
}
|
||||||
glBindBufferRangeNV(stage, binding, cbuf, offset, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
|
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
|
||||||
auto& gpu{system.GPU()};
|
auto& gpu{system.GPU()};
|
||||||
auto& memory_manager{gpu.MemoryManager()};
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
|
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
|
||||||
|
@ -920,7 +1018,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
|
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||||
auto& gpu{system.GPU()};
|
auto& gpu{system.GPU()};
|
||||||
auto& memory_manager{gpu.MemoryManager()};
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
|
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
|
||||||
|
@ -937,13 +1035,12 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
|
||||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||||
GPUVAddr gpu_addr, std::size_t size) {
|
GPUVAddr gpu_addr, std::size_t size) {
|
||||||
const auto alignment{device.GetShaderStorageBufferAlignment()};
|
const auto alignment{device.GetShaderStorageBufferAlignment()};
|
||||||
const auto [ssbo, buffer_offset] =
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||||
buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
|
|
||||||
static_cast<GLsizeiptr>(size));
|
static_cast<GLsizeiptr>(size));
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
|
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||||
const auto& maxwell3d = system.GPU().Maxwell3D();
|
const auto& maxwell3d = system.GPU().Maxwell3D();
|
||||||
u32 binding = device.GetBaseBindings(stage_index).sampler;
|
u32 binding = device.GetBaseBindings(stage_index).sampler;
|
||||||
|
@ -956,7 +1053,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
|
void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||||
const auto& compute = system.GPU().KeplerCompute();
|
const auto& compute = system.GPU().KeplerCompute();
|
||||||
u32 binding = 0;
|
u32 binding = 0;
|
||||||
|
@ -985,7 +1082,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
|
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
|
||||||
const auto& maxwell3d = system.GPU().Maxwell3D();
|
const auto& maxwell3d = system.GPU().Maxwell3D();
|
||||||
u32 binding = device.GetBaseBindings(stage_index).image;
|
u32 binding = device.GetBaseBindings(stage_index).image;
|
||||||
for (const auto& entry : shader->GetEntries().images) {
|
for (const auto& entry : shader->GetEntries().images) {
|
||||||
|
@ -995,7 +1092,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
|
void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
|
||||||
const auto& compute = system.GPU().KeplerCompute();
|
const auto& compute = system.GPU().KeplerCompute();
|
||||||
u32 binding = 0;
|
u32 binding = 0;
|
||||||
for (const auto& entry : shader->GetEntries().images) {
|
for (const auto& entry : shader->GetEntries().images) {
|
||||||
|
@ -1024,6 +1121,26 @@ void RasterizerOpenGL::SyncViewport() {
|
||||||
const auto& regs = gpu.regs;
|
const auto& regs = gpu.regs;
|
||||||
|
|
||||||
const bool dirty_viewport = flags[Dirty::Viewports];
|
const bool dirty_viewport = flags[Dirty::Viewports];
|
||||||
|
const bool dirty_clip_control = flags[Dirty::ClipControl];
|
||||||
|
|
||||||
|
if (dirty_clip_control || flags[Dirty::FrontFace]) {
|
||||||
|
flags[Dirty::FrontFace] = false;
|
||||||
|
|
||||||
|
GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
|
||||||
|
if (regs.screen_y_control.triangle_rast_flip != 0 &&
|
||||||
|
regs.viewport_transform[0].scale_y < 0.0f) {
|
||||||
|
switch (mode) {
|
||||||
|
case GL_CW:
|
||||||
|
mode = GL_CCW;
|
||||||
|
break;
|
||||||
|
case GL_CCW:
|
||||||
|
mode = GL_CW;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
glFrontFace(mode);
|
||||||
|
}
|
||||||
|
|
||||||
if (dirty_viewport || flags[Dirty::ClipControl]) {
|
if (dirty_viewport || flags[Dirty::ClipControl]) {
|
||||||
flags[Dirty::ClipControl] = false;
|
flags[Dirty::ClipControl] = false;
|
||||||
|
|
||||||
|
@ -1121,11 +1238,6 @@ void RasterizerOpenGL::SyncCullMode() {
|
||||||
glDisable(GL_CULL_FACE);
|
glDisable(GL_CULL_FACE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags[Dirty::FrontFace]) {
|
|
||||||
flags[Dirty::FrontFace] = false;
|
|
||||||
glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SyncPrimitiveRestart() {
|
void RasterizerOpenGL::SyncPrimitiveRestart() {
|
||||||
|
@ -1496,12 +1608,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
|
||||||
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
|
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::SyncTransformFeedback() {
|
||||||
|
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
|
||||||
|
// when this is required.
|
||||||
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
|
|
||||||
|
static constexpr std::size_t STRIDE = 3;
|
||||||
|
std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
|
||||||
|
std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
|
||||||
|
|
||||||
|
GLint* cursor = attribs.data();
|
||||||
|
GLint* current_stream = streams.data();
|
||||||
|
|
||||||
|
for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
|
||||||
|
const auto& layout = regs.tfb_layouts[feedback];
|
||||||
|
UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
|
||||||
|
if (layout.varying_count == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
*current_stream = static_cast<GLint>(feedback);
|
||||||
|
if (current_stream != streams.data()) {
|
||||||
|
// When stepping one stream, push the expected token
|
||||||
|
cursor[0] = GL_NEXT_BUFFER_NV;
|
||||||
|
cursor[1] = 0;
|
||||||
|
cursor[2] = 0;
|
||||||
|
cursor += STRIDE;
|
||||||
|
}
|
||||||
|
++current_stream;
|
||||||
|
|
||||||
|
const auto& locations = regs.tfb_varying_locs[feedback];
|
||||||
|
std::optional<u8> current_index;
|
||||||
|
for (u32 offset = 0; offset < layout.varying_count; ++offset) {
|
||||||
|
const u8 location = locations[offset];
|
||||||
|
const u8 index = location / 4;
|
||||||
|
|
||||||
|
if (current_index == index) {
|
||||||
|
// Increase number of components of the previous attachment
|
||||||
|
++cursor[-2];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
current_index = index;
|
||||||
|
|
||||||
|
std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
|
||||||
|
cursor[1] = 1;
|
||||||
|
cursor += STRIDE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
|
||||||
|
const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
|
||||||
|
glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
|
||||||
|
GL_INTERLEAVED_ATTRIBS);
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
||||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
if (regs.tfb_enabled == 0) {
|
if (regs.tfb_enabled == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (device.UseAssemblyShaders()) {
|
||||||
|
SyncTransformFeedback();
|
||||||
|
}
|
||||||
|
|
||||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
|
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
|
||||||
|
@ -1528,6 +1698,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
||||||
static_cast<GLsizeiptr>(size));
|
static_cast<GLsizeiptr>(size));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We may have to call BeginTransformFeedbackNV here since they seem to call different
|
||||||
|
// implementations on Nvidia's driver (the pointer is different) but we are using
|
||||||
|
// ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
|
||||||
|
// extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
|
||||||
glBeginTransformFeedback(GL_POINTS);
|
glBeginTransformFeedback(GL_POINTS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1549,8 +1723,9 @@ void RasterizerOpenGL::EndTransformFeedback() {
|
||||||
const GLuint handle = transform_feedback_buffers[index].handle;
|
const GLuint handle = transform_feedback_buffers[index].handle;
|
||||||
const GPUVAddr gpu_addr = binding.Address();
|
const GPUVAddr gpu_addr = binding.Address();
|
||||||
const std::size_t size = binding.buffer_size;
|
const std::size_t size = binding.buffer_size;
|
||||||
const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||||
glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
|
glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
|
||||||
|
static_cast<GLsizeiptr>(size));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
#include "video_core/engines/const_buffer_info.h"
|
#include "video_core/engines/const_buffer_info.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/rasterizer_accelerated.h"
|
#include "video_core/rasterizer_accelerated.h"
|
||||||
#include "video_core/rasterizer_cache.h"
|
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_device.h"
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
|
@ -100,40 +99,41 @@ private:
|
||||||
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
|
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
|
||||||
|
|
||||||
/// Configures the current constbuffers to use for the draw command.
|
/// Configures the current constbuffers to use for the draw command.
|
||||||
void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
|
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
|
||||||
|
|
||||||
/// Configures the current constbuffers to use for the kernel invocation.
|
/// Configures the current constbuffers to use for the kernel invocation.
|
||||||
void SetupComputeConstBuffers(const Shader& kernel);
|
void SetupComputeConstBuffers(Shader* kernel);
|
||||||
|
|
||||||
/// Configures a constant buffer.
|
/// Configures a constant buffer.
|
||||||
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
||||||
const ConstBufferEntry& entry);
|
const ConstBufferEntry& entry, bool use_unified,
|
||||||
|
std::size_t unified_offset);
|
||||||
|
|
||||||
/// Configures the current global memory entries to use for the draw command.
|
/// Configures the current global memory entries to use for the draw command.
|
||||||
void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
|
void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
|
||||||
|
|
||||||
/// Configures the current global memory entries to use for the kernel invocation.
|
/// Configures the current global memory entries to use for the kernel invocation.
|
||||||
void SetupComputeGlobalMemory(const Shader& kernel);
|
void SetupComputeGlobalMemory(Shader* kernel);
|
||||||
|
|
||||||
/// Configures a constant buffer.
|
/// Configures a constant buffer.
|
||||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||||
std::size_t size);
|
std::size_t size);
|
||||||
|
|
||||||
/// Configures the current textures to use for the draw command.
|
/// Configures the current textures to use for the draw command.
|
||||||
void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
|
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
||||||
|
|
||||||
/// Configures the textures used in a compute shader.
|
/// Configures the textures used in a compute shader.
|
||||||
void SetupComputeTextures(const Shader& kernel);
|
void SetupComputeTextures(Shader* kernel);
|
||||||
|
|
||||||
/// Configures a texture.
|
/// Configures a texture.
|
||||||
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
|
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
|
||||||
const SamplerEntry& entry);
|
const SamplerEntry& entry);
|
||||||
|
|
||||||
/// Configures images in a graphics shader.
|
/// Configures images in a graphics shader.
|
||||||
void SetupDrawImages(std::size_t stage_index, const Shader& shader);
|
void SetupDrawImages(std::size_t stage_index, Shader* shader);
|
||||||
|
|
||||||
/// Configures images in a compute shader.
|
/// Configures images in a compute shader.
|
||||||
void SetupComputeImages(const Shader& shader);
|
void SetupComputeImages(Shader* shader);
|
||||||
|
|
||||||
/// Configures an image.
|
/// Configures an image.
|
||||||
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
||||||
|
@ -201,6 +201,10 @@ private:
|
||||||
/// Syncs the framebuffer sRGB state to match the guest state
|
/// Syncs the framebuffer sRGB state to match the guest state
|
||||||
void SyncFramebufferSRGB();
|
void SyncFramebufferSRGB();
|
||||||
|
|
||||||
|
/// Syncs transform feedback state to match guest state
|
||||||
|
/// @note Only valid on assembly shaders
|
||||||
|
void SyncTransformFeedback();
|
||||||
|
|
||||||
/// Begin a transform feedback
|
/// Begin a transform feedback
|
||||||
void BeginTransformFeedback(GLenum primitive_mode);
|
void BeginTransformFeedback(GLenum primitive_mode);
|
||||||
|
|
||||||
|
@ -253,6 +257,7 @@ private:
|
||||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||||
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
|
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
|
||||||
std::size_t current_cbuf = 0;
|
std::size_t current_cbuf = 0;
|
||||||
|
OGLBuffer unified_uniform_buffer;
|
||||||
|
|
||||||
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
||||||
std::size_t num_queued_commands = 0;
|
std::size_t num_queued_commands = 0;
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_arb_decompiler.h"
|
||||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||||
|
@ -29,6 +30,7 @@
|
||||||
#include "video_core/shader/memory_util.h"
|
#include "video_core/shader/memory_util.h"
|
||||||
#include "video_core/shader/registry.h"
|
#include "video_core/shader/registry.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
#include "video_core/shader_cache.h"
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
@ -147,7 +149,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
|
||||||
auto program = std::make_shared<ProgramHandle>();
|
auto program = std::make_shared<ProgramHandle>();
|
||||||
|
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
const std::string arb = "Not implemented";
|
const std::string arb =
|
||||||
|
DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
|
||||||
|
|
||||||
GLuint& arb_prog = program->assembly_program.handle;
|
GLuint& arb_prog = program->assembly_program.handle;
|
||||||
|
|
||||||
|
@ -194,12 +197,9 @@ std::unordered_set<GLenum> GetSupportedFormats() {
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
|
||||||
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
ProgramSharedPtr program_)
|
||||||
ShaderEntries entries, ProgramSharedPtr program_)
|
: registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
|
||||||
: RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
|
|
||||||
size_in_bytes{size_in_bytes}, program{std::move(program_)} {
|
|
||||||
// Assign either the assembly program or source program. We can't have both.
|
|
||||||
handle = program->assembly_program.handle;
|
handle = program->assembly_program.handle;
|
||||||
if (handle == 0) {
|
if (handle == 0) {
|
||||||
handle = program->source_program.handle;
|
handle = program->source_program.handle;
|
||||||
|
@ -207,16 +207,16 @@ CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
||||||
ASSERT(handle != 0);
|
ASSERT(handle != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
CachedShader::~CachedShader() = default;
|
Shader::~Shader() = default;
|
||||||
|
|
||||||
GLuint CachedShader::GetHandle() const {
|
GLuint Shader::GetHandle() const {
|
||||||
DEBUG_ASSERT(registry->IsConsistent());
|
DEBUG_ASSERT(registry->IsConsistent());
|
||||||
return handle;
|
return handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
|
||||||
Maxwell::ShaderProgram program_type, ProgramCode code,
|
Maxwell::ShaderProgram program_type,
|
||||||
ProgramCode code_b) {
|
ProgramCode code, ProgramCode code_b) {
|
||||||
const auto shader_type = GetShaderType(program_type);
|
const auto shader_type = GetShaderType(program_type);
|
||||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||||
|
|
||||||
|
@ -241,11 +241,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||||
params.disk_cache.SaveEntry(std::move(entry));
|
params.disk_cache.SaveEntry(std::move(entry));
|
||||||
|
|
||||||
return std::shared_ptr<CachedShader>(new CachedShader(
|
return std::unique_ptr<Shader>(new Shader(
|
||||||
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
|
std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
|
std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
|
||||||
|
ProgramCode code) {
|
||||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||||
|
|
||||||
auto& engine = params.system.GPU().KeplerCompute();
|
auto& engine = params.system.GPU().KeplerCompute();
|
||||||
|
@ -265,22 +266,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
|
||||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||||
params.disk_cache.SaveEntry(std::move(entry));
|
params.disk_cache.SaveEntry(std::move(entry));
|
||||||
|
|
||||||
return std::shared_ptr<CachedShader>(new CachedShader(
|
return std::unique_ptr<Shader>(new Shader(std::move(registry),
|
||||||
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
|
MakeEntries(params.device, ir, ShaderType::Compute),
|
||||||
|
std::move(program)));
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
|
std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
|
||||||
const PrecompiledShader& precompiled_shader,
|
const PrecompiledShader& precompiled_shader) {
|
||||||
std::size_t size_in_bytes) {
|
return std::unique_ptr<Shader>(new Shader(
|
||||||
return std::shared_ptr<CachedShader>(
|
precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
|
||||||
new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
|
|
||||||
precompiled_shader.entries, precompiled_shader.program));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||||
Core::Frontend::EmuWindow& emu_window, const Device& device)
|
Core::Frontend::EmuWindow& emu_window, const Device& device)
|
||||||
: RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
|
: VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
|
||||||
disk_cache{system} {}
|
emu_window{emu_window}, device{device}, disk_cache{system} {}
|
||||||
|
|
||||||
|
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
|
||||||
|
|
||||||
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
const VideoCore::DiskResourceLoadCallback& callback) {
|
const VideoCore::DiskResourceLoadCallback& callback) {
|
||||||
|
@ -348,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
PrecompiledShader shader;
|
PrecompiledShader shader;
|
||||||
shader.program = std::move(program);
|
shader.program = std::move(program);
|
||||||
shader.registry = std::move(registry);
|
shader.registry = std::move(registry);
|
||||||
shader.entries = MakeEntries(ir);
|
shader.entries = MakeEntries(device, ir, entry.type);
|
||||||
|
|
||||||
std::scoped_lock lock{mutex};
|
std::scoped_lock lock{mutex};
|
||||||
if (callback) {
|
if (callback) {
|
||||||
|
@ -434,7 +436,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||||
if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
|
if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
|
||||||
return last_shaders[static_cast<std::size_t>(program)];
|
return last_shaders[static_cast<std::size_t>(program)];
|
||||||
}
|
}
|
||||||
|
@ -444,8 +446,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||||
|
|
||||||
// Look up shader in the cache based on address
|
// Look up shader in the cache based on address
|
||||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
|
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
|
||||||
Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
|
if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
|
||||||
if (shader) {
|
|
||||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -466,30 +467,29 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||||
const ShaderParameters params{system, disk_cache, device,
|
const ShaderParameters params{system, disk_cache, device,
|
||||||
*cpu_addr, host_ptr, unique_identifier};
|
*cpu_addr, host_ptr, unique_identifier};
|
||||||
|
|
||||||
|
std::unique_ptr<Shader> shader;
|
||||||
const auto found = runtime_cache.find(unique_identifier);
|
const auto found = runtime_cache.find(unique_identifier);
|
||||||
if (found == runtime_cache.end()) {
|
if (found == runtime_cache.end()) {
|
||||||
shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
|
shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
|
||||||
std::move(code_b));
|
|
||||||
} else {
|
} else {
|
||||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
shader = Shader::CreateFromCache(params, found->second);
|
||||||
shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Shader* const result = shader.get();
|
||||||
if (cpu_addr) {
|
if (cpu_addr) {
|
||||||
Register(shader);
|
Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64));
|
||||||
} else {
|
} else {
|
||||||
null_shader = shader;
|
null_shader = std::move(shader);
|
||||||
}
|
}
|
||||||
|
|
||||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
return last_shaders[static_cast<std::size_t>(program)] = result;
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||||
auto& memory_manager{system.GPU().MemoryManager()};
|
auto& memory_manager{system.GPU().MemoryManager()};
|
||||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
|
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
|
||||||
|
|
||||||
auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
|
if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
|
||||||
if (kernel) {
|
|
||||||
return kernel;
|
return kernel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -501,20 +501,21 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||||
const ShaderParameters params{system, disk_cache, device,
|
const ShaderParameters params{system, disk_cache, device,
|
||||||
*cpu_addr, host_ptr, unique_identifier};
|
*cpu_addr, host_ptr, unique_identifier};
|
||||||
|
|
||||||
|
std::unique_ptr<Shader> kernel;
|
||||||
const auto found = runtime_cache.find(unique_identifier);
|
const auto found = runtime_cache.find(unique_identifier);
|
||||||
if (found == runtime_cache.end()) {
|
if (found == runtime_cache.end()) {
|
||||||
kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
|
kernel = Shader::CreateKernelFromMemory(params, std::move(code));
|
||||||
} else {
|
} else {
|
||||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
kernel = Shader::CreateFromCache(params, found->second);
|
||||||
kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Shader* const result = kernel.get();
|
||||||
if (cpu_addr) {
|
if (cpu_addr) {
|
||||||
Register(kernel);
|
Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64));
|
||||||
} else {
|
} else {
|
||||||
null_kernel = kernel;
|
null_kernel = std::move(kernel);
|
||||||
}
|
}
|
||||||
return kernel;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -18,12 +18,12 @@
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
#include "video_core/rasterizer_cache.h"
|
|
||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||||
#include "video_core/shader/registry.h"
|
#include "video_core/shader/registry.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
#include "video_core/shader_cache.h"
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
class System;
|
class System;
|
||||||
|
@ -35,12 +35,10 @@ class EmuWindow;
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
class CachedShader;
|
|
||||||
class Device;
|
class Device;
|
||||||
class RasterizerOpenGL;
|
class RasterizerOpenGL;
|
||||||
struct UnspecializedShader;
|
struct UnspecializedShader;
|
||||||
|
|
||||||
using Shader = std::shared_ptr<CachedShader>;
|
|
||||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
|
||||||
struct ProgramHandle {
|
struct ProgramHandle {
|
||||||
|
@ -64,62 +62,53 @@ struct ShaderParameters {
|
||||||
u64 unique_identifier;
|
u64 unique_identifier;
|
||||||
};
|
};
|
||||||
|
|
||||||
class CachedShader final : public RasterizerCacheObject {
|
class Shader final {
|
||||||
public:
|
public:
|
||||||
~CachedShader();
|
~Shader();
|
||||||
|
|
||||||
/// Gets the GL program handle for the shader
|
/// Gets the GL program handle for the shader
|
||||||
GLuint GetHandle() const;
|
GLuint GetHandle() const;
|
||||||
|
|
||||||
/// Returns the size in bytes of the shader
|
|
||||||
std::size_t GetSizeInBytes() const override {
|
|
||||||
return size_in_bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Gets the shader entries for the shader
|
/// Gets the shader entries for the shader
|
||||||
const ShaderEntries& GetEntries() const {
|
const ShaderEntries& GetEntries() const {
|
||||||
return entries;
|
return entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Shader CreateStageFromMemory(const ShaderParameters& params,
|
static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
|
||||||
Maxwell::ShaderProgram program_type,
|
Maxwell::ShaderProgram program_type,
|
||||||
ProgramCode program_code, ProgramCode program_code_b);
|
ProgramCode program_code,
|
||||||
static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
|
ProgramCode program_code_b);
|
||||||
|
static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
|
||||||
|
ProgramCode code);
|
||||||
|
|
||||||
static Shader CreateFromCache(const ShaderParameters& params,
|
static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
|
||||||
const PrecompiledShader& precompiled_shader,
|
const PrecompiledShader& precompiled_shader);
|
||||||
std::size_t size_in_bytes);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
|
||||||
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
ProgramSharedPtr program);
|
||||||
ShaderEntries entries, ProgramSharedPtr program);
|
|
||||||
|
|
||||||
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
||||||
ShaderEntries entries;
|
ShaderEntries entries;
|
||||||
std::size_t size_in_bytes = 0;
|
|
||||||
ProgramSharedPtr program;
|
ProgramSharedPtr program;
|
||||||
GLuint handle = 0;
|
GLuint handle = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
|
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
|
||||||
public:
|
public:
|
||||||
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||||
Core::Frontend::EmuWindow& emu_window, const Device& device);
|
Core::Frontend::EmuWindow& emu_window, const Device& device);
|
||||||
|
~ShaderCacheOpenGL() override;
|
||||||
|
|
||||||
/// Loads disk cache for the current game
|
/// Loads disk cache for the current game
|
||||||
void LoadDiskCache(const std::atomic_bool& stop_loading,
|
void LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
const VideoCore::DiskResourceLoadCallback& callback);
|
const VideoCore::DiskResourceLoadCallback& callback);
|
||||||
|
|
||||||
/// Gets the current specified shader stage program
|
/// Gets the current specified shader stage program
|
||||||
Shader GetStageProgram(Maxwell::ShaderProgram program);
|
Shader* GetStageProgram(Maxwell::ShaderProgram program);
|
||||||
|
|
||||||
/// Gets a compute kernel in the passed address
|
/// Gets a compute kernel in the passed address
|
||||||
Shader GetComputeKernel(GPUVAddr code_addr);
|
Shader* GetComputeKernel(GPUVAddr code_addr);
|
||||||
|
|
||||||
protected:
|
|
||||||
// We do not have to flush this cache as things in it are never modified by us.
|
|
||||||
void FlushObjectInner(const Shader& object) override {}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ProgramSharedPtr GeneratePrecompiledProgram(
|
ProgramSharedPtr GeneratePrecompiledProgram(
|
||||||
|
@ -132,10 +121,10 @@ private:
|
||||||
ShaderDiskCacheOpenGL disk_cache;
|
ShaderDiskCacheOpenGL disk_cache;
|
||||||
std::unordered_map<u64, PrecompiledShader> runtime_cache;
|
std::unordered_map<u64, PrecompiledShader> runtime_cache;
|
||||||
|
|
||||||
Shader null_shader{};
|
std::unique_ptr<Shader> null_shader;
|
||||||
Shader null_kernel{};
|
std::unique_ptr<Shader> null_kernel;
|
||||||
|
|
||||||
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode;
|
||||||
using Tegra::Shader::IpaSampleMode;
|
using Tegra::Shader::IpaSampleMode;
|
||||||
using Tegra::Shader::PixelImap;
|
using Tegra::Shader::PixelImap;
|
||||||
using Tegra::Shader::Register;
|
using Tegra::Shader::Register;
|
||||||
|
using Tegra::Shader::TextureType;
|
||||||
using VideoCommon::Shader::BuildTransformFeedback;
|
using VideoCommon::Shader::BuildTransformFeedback;
|
||||||
using VideoCommon::Shader::Registry;
|
using VideoCommon::Shader::Registry;
|
||||||
|
|
||||||
|
@ -61,8 +62,8 @@ struct TextureDerivates {};
|
||||||
using TextureArgument = std::pair<Type, Node>;
|
using TextureArgument = std::pair<Type, Node>;
|
||||||
using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
|
using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
|
||||||
|
|
||||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
|
||||||
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
|
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
|
||||||
|
|
||||||
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
|
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
|
||||||
#define ftou floatBitsToUint
|
#define ftou floatBitsToUint
|
||||||
|
@ -402,6 +403,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
|
||||||
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
|
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
|
||||||
|
const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
|
||||||
|
// We waste one UBO for emulation
|
||||||
|
const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
|
||||||
|
return num_ubos > num_available_ubos;
|
||||||
|
}
|
||||||
|
|
||||||
struct GenericVaryingDescription {
|
struct GenericVaryingDescription {
|
||||||
std::string name;
|
std::string name;
|
||||||
u8 first_element = 0;
|
u8 first_element = 0;
|
||||||
|
@ -412,8 +420,9 @@ class GLSLDecompiler final {
|
||||||
public:
|
public:
|
||||||
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||||
ShaderType stage, std::string_view identifier, std::string_view suffix)
|
ShaderType stage, std::string_view identifier, std::string_view suffix)
|
||||||
: device{device}, ir{ir}, registry{registry}, stage{stage},
|
: device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier},
|
||||||
identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
|
suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{
|
||||||
|
UseUnifiedUniforms(device, ir, stage)} {
|
||||||
if (stage != ShaderType::Compute) {
|
if (stage != ShaderType::Compute) {
|
||||||
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
|
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
|
||||||
}
|
}
|
||||||
|
@ -518,6 +527,9 @@ private:
|
||||||
if (device.HasImageLoadFormatted()) {
|
if (device.HasImageLoadFormatted()) {
|
||||||
code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
|
code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
|
||||||
}
|
}
|
||||||
|
if (device.HasTextureShadowLod()) {
|
||||||
|
code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
|
||||||
|
}
|
||||||
if (device.HasWarpIntrinsics()) {
|
if (device.HasWarpIntrinsics()) {
|
||||||
code.AddLine("#extension GL_NV_gpu_shader5 : require");
|
code.AddLine("#extension GL_NV_gpu_shader5 : require");
|
||||||
code.AddLine("#extension GL_NV_shader_thread_group : require");
|
code.AddLine("#extension GL_NV_shader_thread_group : require");
|
||||||
|
@ -618,7 +630,9 @@ private:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) {
|
|
||||||
|
if (stage != ShaderType::Geometry &&
|
||||||
|
(stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
|
||||||
if (ir.UsesLayer()) {
|
if (ir.UsesLayer()) {
|
||||||
code.AddLine("int gl_Layer;");
|
code.AddLine("int gl_Layer;");
|
||||||
}
|
}
|
||||||
|
@ -647,6 +661,16 @@ private:
|
||||||
--code.scope;
|
--code.scope;
|
||||||
code.AddLine("}};");
|
code.AddLine("}};");
|
||||||
code.AddNewLine();
|
code.AddNewLine();
|
||||||
|
|
||||||
|
if (stage == ShaderType::Geometry) {
|
||||||
|
if (ir.UsesLayer()) {
|
||||||
|
code.AddLine("out int gl_Layer;");
|
||||||
|
}
|
||||||
|
if (ir.UsesViewportIndex()) {
|
||||||
|
code.AddLine("out int gl_ViewportIndex;");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
code.AddNewLine();
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeclareRegisters() {
|
void DeclareRegisters() {
|
||||||
|
@ -834,12 +858,24 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeclareConstantBuffers() {
|
void DeclareConstantBuffers() {
|
||||||
|
if (use_unified_uniforms) {
|
||||||
|
const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
|
||||||
|
static_cast<u32>(ir.GetGlobalMemory().size());
|
||||||
|
code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
|
||||||
|
binding);
|
||||||
|
code.AddLine(" uint cbufs[];");
|
||||||
|
code.AddLine("}};");
|
||||||
|
code.AddNewLine();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
|
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||||
for (const auto& buffers : ir.GetConstantBuffers()) {
|
for (const auto [index, info] : ir.GetConstantBuffers()) {
|
||||||
const auto index = buffers.first;
|
const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
|
||||||
|
const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
|
||||||
code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
|
code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
|
||||||
GetConstBufferBlock(index));
|
GetConstBufferBlock(index));
|
||||||
code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
|
code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
|
||||||
code.AddLine("}};");
|
code.AddLine("}};");
|
||||||
code.AddNewLine();
|
code.AddNewLine();
|
||||||
}
|
}
|
||||||
|
@ -877,13 +913,13 @@ private:
|
||||||
return "samplerBuffer";
|
return "samplerBuffer";
|
||||||
}
|
}
|
||||||
switch (sampler.type) {
|
switch (sampler.type) {
|
||||||
case Tegra::Shader::TextureType::Texture1D:
|
case TextureType::Texture1D:
|
||||||
return "sampler1D";
|
return "sampler1D";
|
||||||
case Tegra::Shader::TextureType::Texture2D:
|
case TextureType::Texture2D:
|
||||||
return "sampler2D";
|
return "sampler2D";
|
||||||
case Tegra::Shader::TextureType::Texture3D:
|
case TextureType::Texture3D:
|
||||||
return "sampler3D";
|
return "sampler3D";
|
||||||
case Tegra::Shader::TextureType::TextureCube:
|
case TextureType::TextureCube:
|
||||||
return "samplerCube";
|
return "samplerCube";
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
@ -1038,42 +1074,51 @@ private:
|
||||||
|
|
||||||
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
|
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
|
||||||
const Node offset = cbuf->GetOffset();
|
const Node offset = cbuf->GetOffset();
|
||||||
|
const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
|
||||||
|
|
||||||
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||||
// Direct access
|
// Direct access
|
||||||
const u32 offset_imm = immediate->GetValue();
|
const u32 offset_imm = immediate->GetValue();
|
||||||
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
|
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
|
||||||
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
|
if (use_unified_uniforms) {
|
||||||
offset_imm / (4 * 4), (offset_imm / 4) % 4),
|
return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
|
||||||
|
Type::Uint};
|
||||||
|
} else {
|
||||||
|
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
|
||||||
|
offset_imm / (4 * 4), (offset_imm / 4) % 4),
|
||||||
|
Type::Uint};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Indirect access
|
||||||
|
if (use_unified_uniforms) {
|
||||||
|
return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
|
||||||
|
Visit(offset).AsUint()),
|
||||||
Type::Uint};
|
Type::Uint};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (std::holds_alternative<OperationNode>(*offset)) {
|
const std::string final_offset = code.GenerateTemporary();
|
||||||
// Indirect access
|
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
|
||||||
const std::string final_offset = code.GenerateTemporary();
|
|
||||||
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
|
|
||||||
|
|
||||||
if (!device.HasComponentIndexingBug()) {
|
if (!device.HasComponentIndexingBug()) {
|
||||||
return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
|
return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
|
||||||
final_offset, final_offset),
|
final_offset, final_offset),
|
||||||
Type::Uint};
|
Type::Uint};
|
||||||
}
|
|
||||||
|
|
||||||
// AMD's proprietary GLSL compiler emits ill code for variable component access.
|
|
||||||
// To bypass this driver bug generate 4 ifs, one per each component.
|
|
||||||
const std::string pack = code.GenerateTemporary();
|
|
||||||
code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
|
|
||||||
final_offset);
|
|
||||||
|
|
||||||
const std::string result = code.GenerateTemporary();
|
|
||||||
code.AddLine("uint {};", result);
|
|
||||||
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
|
||||||
code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
|
|
||||||
pack, GetSwizzle(swizzle));
|
|
||||||
}
|
|
||||||
return {result, Type::Uint};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
UNREACHABLE_MSG("Unmanaged offset node type");
|
// AMD's proprietary GLSL compiler emits ill code for variable component access.
|
||||||
|
// To bypass this driver bug generate 4 ifs, one per each component.
|
||||||
|
const std::string pack = code.GenerateTemporary();
|
||||||
|
code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
|
||||||
|
final_offset);
|
||||||
|
|
||||||
|
const std::string result = code.GenerateTemporary();
|
||||||
|
code.AddLine("uint {};", result);
|
||||||
|
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
||||||
|
code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
|
||||||
|
GetSwizzle(swizzle));
|
||||||
|
}
|
||||||
|
return {result, Type::Uint};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||||
|
@ -1339,8 +1384,19 @@ private:
|
||||||
const std::size_t count = operation.GetOperandsCount();
|
const std::size_t count = operation.GetOperandsCount();
|
||||||
const bool has_array = meta->sampler.is_array;
|
const bool has_array = meta->sampler.is_array;
|
||||||
const bool has_shadow = meta->sampler.is_shadow;
|
const bool has_shadow = meta->sampler.is_shadow;
|
||||||
|
const bool workaround_lod_array_shadow_as_grad =
|
||||||
|
!device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
|
||||||
|
((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
|
||||||
|
meta->sampler.type == TextureType::TextureCube);
|
||||||
|
|
||||||
|
std::string expr = "texture";
|
||||||
|
|
||||||
|
if (workaround_lod_array_shadow_as_grad) {
|
||||||
|
expr += "Grad";
|
||||||
|
} else {
|
||||||
|
expr += function_suffix;
|
||||||
|
}
|
||||||
|
|
||||||
std::string expr = "texture" + function_suffix;
|
|
||||||
if (!meta->aoffi.empty()) {
|
if (!meta->aoffi.empty()) {
|
||||||
expr += "Offset";
|
expr += "Offset";
|
||||||
} else if (!meta->ptp.empty()) {
|
} else if (!meta->ptp.empty()) {
|
||||||
|
@ -1374,6 +1430,16 @@ private:
|
||||||
expr += ')';
|
expr += ')';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (workaround_lod_array_shadow_as_grad) {
|
||||||
|
switch (meta->sampler.type) {
|
||||||
|
case TextureType::Texture2D:
|
||||||
|
return expr + ", vec2(0.0), vec2(0.0))";
|
||||||
|
case TextureType::TextureCube:
|
||||||
|
return expr + ", vec3(0.0), vec3(0.0))";
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
for (const auto& variant : extras) {
|
for (const auto& variant : extras) {
|
||||||
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
|
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
|
||||||
expr += GenerateTextureArgument(*argument);
|
expr += GenerateTextureArgument(*argument);
|
||||||
|
@ -2000,8 +2066,19 @@ private:
|
||||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||||
ASSERT(meta);
|
ASSERT(meta);
|
||||||
|
|
||||||
std::string expr = GenerateTexture(
|
std::string expr{};
|
||||||
operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
|
|
||||||
|
if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
|
||||||
|
((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
|
||||||
|
meta->sampler.type == TextureType::TextureCube)) {
|
||||||
|
LOG_ERROR(Render_OpenGL,
|
||||||
|
"Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
|
||||||
|
expr = GenerateTexture(operation, "Lod", {});
|
||||||
|
} else {
|
||||||
|
expr = GenerateTexture(operation, "Lod",
|
||||||
|
{TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
|
||||||
|
}
|
||||||
|
|
||||||
if (meta->sampler.is_shadow) {
|
if (meta->sampler.is_shadow) {
|
||||||
expr = "vec4(" + expr + ')';
|
expr = "vec4(" + expr + ')';
|
||||||
}
|
}
|
||||||
|
@ -2710,6 +2787,7 @@ private:
|
||||||
const std::string_view identifier;
|
const std::string_view identifier;
|
||||||
const std::string_view suffix;
|
const std::string_view suffix;
|
||||||
const Header header;
|
const Header header;
|
||||||
|
const bool use_unified_uniforms;
|
||||||
std::unordered_map<u8, VaryingTFB> transform_feedback;
|
std::unordered_map<u8, VaryingTFB> transform_feedback;
|
||||||
|
|
||||||
ShaderWriter code;
|
ShaderWriter code;
|
||||||
|
@ -2905,7 +2983,7 @@ void GLSLDecompiler::DecompileAST() {
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
|
||||||
ShaderEntries entries;
|
ShaderEntries entries;
|
||||||
for (const auto& cbuf : ir.GetConstantBuffers()) {
|
for (const auto& cbuf : ir.GetConstantBuffers()) {
|
||||||
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
|
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
|
||||||
|
@ -2926,6 +3004,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||||
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
|
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
|
||||||
}
|
}
|
||||||
entries.shader_length = ir.GetLength();
|
entries.shader_length = ir.GetLength();
|
||||||
|
entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
|
||||||
return entries;
|
return entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,11 +53,13 @@ struct ShaderEntries {
|
||||||
std::vector<GlobalMemoryEntry> global_memory_entries;
|
std::vector<GlobalMemoryEntry> global_memory_entries;
|
||||||
std::vector<SamplerEntry> samplers;
|
std::vector<SamplerEntry> samplers;
|
||||||
std::vector<ImageEntry> images;
|
std::vector<ImageEntry> images;
|
||||||
u32 clip_distances{};
|
|
||||||
std::size_t shader_length{};
|
std::size_t shader_length{};
|
||||||
|
u32 clip_distances{};
|
||||||
|
bool use_unified_uniforms{};
|
||||||
};
|
};
|
||||||
|
|
||||||
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
|
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||||
|
Tegra::Engines::ShaderType stage);
|
||||||
|
|
||||||
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||||
const VideoCommon::Shader::Registry& registry,
|
const VideoCommon::Shader::Registry& registry,
|
||||||
|
|
|
@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
using VideoCommon::Shader::SeparateSamplerKey;
|
||||||
|
|
||||||
using ShaderCacheVersionHash = std::array<u8, 64>;
|
using ShaderCacheVersionHash = std::array<u8, 64>;
|
||||||
|
|
||||||
struct ConstBufferKey {
|
struct ConstBufferKey {
|
||||||
|
@ -37,18 +39,26 @@ struct ConstBufferKey {
|
||||||
u32 value = 0;
|
u32 value = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BoundSamplerKey {
|
struct BoundSamplerEntry {
|
||||||
u32 offset = 0;
|
u32 offset = 0;
|
||||||
Tegra::Engines::SamplerDescriptor sampler;
|
Tegra::Engines::SamplerDescriptor sampler;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BindlessSamplerKey {
|
struct SeparateSamplerEntry {
|
||||||
|
u32 cbuf1 = 0;
|
||||||
|
u32 cbuf2 = 0;
|
||||||
|
u32 offset1 = 0;
|
||||||
|
u32 offset2 = 0;
|
||||||
|
Tegra::Engines::SamplerDescriptor sampler;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BindlessSamplerEntry {
|
||||||
u32 cbuf = 0;
|
u32 cbuf = 0;
|
||||||
u32 offset = 0;
|
u32 offset = 0;
|
||||||
Tegra::Engines::SamplerDescriptor sampler;
|
Tegra::Engines::SamplerDescriptor sampler;
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr u32 NativeVersion = 20;
|
constexpr u32 NativeVersion = 21;
|
||||||
|
|
||||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||||
ShaderCacheVersionHash hash{};
|
ShaderCacheVersionHash hash{};
|
||||||
|
@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
|
||||||
u32 texture_handler_size_value;
|
u32 texture_handler_size_value;
|
||||||
u32 num_keys;
|
u32 num_keys;
|
||||||
u32 num_bound_samplers;
|
u32 num_bound_samplers;
|
||||||
|
u32 num_separate_samplers;
|
||||||
u32 num_bindless_samplers;
|
u32 num_bindless_samplers;
|
||||||
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
|
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
|
||||||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
|
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
|
||||||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
|
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
|
||||||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
|
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
|
||||||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
||||||
|
file.ReadArray(&num_separate_samplers, 1) != 1 ||
|
||||||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ConstBufferKey> flat_keys(num_keys);
|
std::vector<ConstBufferKey> flat_keys(num_keys);
|
||||||
std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
|
std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
|
||||||
std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
|
std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
|
||||||
|
std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
|
||||||
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
|
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
|
||||||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
|
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
|
||||||
flat_bound_samplers.size() ||
|
flat_bound_samplers.size() ||
|
||||||
|
file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
|
||||||
|
flat_separate_samplers.size() ||
|
||||||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
|
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
|
||||||
flat_bindless_samplers.size()) {
|
flat_bindless_samplers.size()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (const auto& key : flat_keys) {
|
for (const auto& entry : flat_keys) {
|
||||||
keys.insert({{key.cbuf, key.offset}, key.value});
|
keys.insert({{entry.cbuf, entry.offset}, entry.value});
|
||||||
}
|
}
|
||||||
for (const auto& key : flat_bound_samplers) {
|
for (const auto& entry : flat_bound_samplers) {
|
||||||
bound_samplers.emplace(key.offset, key.sampler);
|
bound_samplers.emplace(entry.offset, entry.sampler);
|
||||||
}
|
}
|
||||||
for (const auto& key : flat_bindless_samplers) {
|
for (const auto& entry : flat_separate_samplers) {
|
||||||
bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
|
SeparateSamplerKey key;
|
||||||
|
key.buffers = {entry.cbuf1, entry.cbuf2};
|
||||||
|
key.offsets = {entry.offset1, entry.offset2};
|
||||||
|
separate_samplers.emplace(key, entry.sampler);
|
||||||
|
}
|
||||||
|
for (const auto& entry : flat_bindless_samplers) {
|
||||||
|
bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
|
||||||
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
|
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
|
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
|
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
|
||||||
|
file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
|
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
|
||||||
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
|
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<BoundSamplerKey> flat_bound_samplers;
|
std::vector<BoundSamplerEntry> flat_bound_samplers;
|
||||||
flat_bound_samplers.reserve(bound_samplers.size());
|
flat_bound_samplers.reserve(bound_samplers.size());
|
||||||
for (const auto& [address, sampler] : bound_samplers) {
|
for (const auto& [address, sampler] : bound_samplers) {
|
||||||
flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
|
flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<BindlessSamplerKey> flat_bindless_samplers;
|
std::vector<SeparateSamplerEntry> flat_separate_samplers;
|
||||||
|
flat_separate_samplers.reserve(separate_samplers.size());
|
||||||
|
for (const auto& [key, sampler] : separate_samplers) {
|
||||||
|
SeparateSamplerEntry entry;
|
||||||
|
std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
|
||||||
|
std::tie(entry.offset1, entry.offset2) = key.offsets;
|
||||||
|
entry.sampler = sampler;
|
||||||
|
flat_separate_samplers.push_back(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<BindlessSamplerEntry> flat_bindless_samplers;
|
||||||
flat_bindless_samplers.reserve(bindless_samplers.size());
|
flat_bindless_samplers.reserve(bindless_samplers.size());
|
||||||
for (const auto& [address, sampler] : bindless_samplers) {
|
for (const auto& [address, sampler] : bindless_samplers) {
|
||||||
flat_bindless_samplers.push_back(
|
flat_bindless_samplers.push_back(
|
||||||
BindlessSamplerKey{address.first, address.second, sampler});
|
BindlessSamplerEntry{address.first, address.second, sampler});
|
||||||
}
|
}
|
||||||
|
|
||||||
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
|
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
|
||||||
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
|
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
|
||||||
flat_bound_samplers.size() &&
|
flat_bound_samplers.size() &&
|
||||||
|
file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
|
||||||
|
flat_separate_samplers.size() &&
|
||||||
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
|
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
|
||||||
flat_bindless_samplers.size();
|
flat_bindless_samplers.size();
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
|
||||||
VideoCommon::Shader::ComputeInfo compute_info;
|
VideoCommon::Shader::ComputeInfo compute_info;
|
||||||
VideoCommon::Shader::KeyMap keys;
|
VideoCommon::Shader::KeyMap keys;
|
||||||
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
||||||
|
VideoCommon::Shader::SeparateSamplerMap separate_samplers;
|
||||||
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -2,11 +2,13 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <deque>
|
#include <tuple>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||||
|
@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
|
OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
|
||||||
bool use_persistent)
|
|
||||||
: buffer_size(size) {
|
: buffer_size(size) {
|
||||||
gl_buffer.Create();
|
gl_buffer.Create();
|
||||||
|
|
||||||
|
@ -29,34 +30,22 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
|
||||||
allocate_size *= 2;
|
allocate_size *= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (use_persistent) {
|
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
|
||||||
persistent = true;
|
glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
|
||||||
coherent = prefer_coherent;
|
mapped_ptr = static_cast<u8*>(
|
||||||
const GLbitfield flags =
|
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||||
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
|
|
||||||
glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
|
if (device.HasVertexBufferUnifiedMemory()) {
|
||||||
mapped_ptr = static_cast<u8*>(glMapNamedBufferRange(
|
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
||||||
gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
|
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||||
} else {
|
|
||||||
glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
OGLStreamBuffer::~OGLStreamBuffer() {
|
OGLStreamBuffer::~OGLStreamBuffer() {
|
||||||
if (persistent) {
|
glUnmapNamedBuffer(gl_buffer.handle);
|
||||||
glUnmapNamedBuffer(gl_buffer.handle);
|
|
||||||
}
|
|
||||||
gl_buffer.Release();
|
gl_buffer.Release();
|
||||||
}
|
}
|
||||||
|
|
||||||
GLuint OGLStreamBuffer::GetHandle() const {
|
|
||||||
return gl_buffer.handle;
|
|
||||||
}
|
|
||||||
|
|
||||||
GLsizeiptr OGLStreamBuffer::GetSize() const {
|
|
||||||
return buffer_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||||
ASSERT(size <= buffer_size);
|
ASSERT(size <= buffer_size);
|
||||||
ASSERT(alignment <= buffer_size);
|
ASSERT(alignment <= buffer_size);
|
||||||
|
@ -68,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
|
||||||
|
|
||||||
bool invalidate = false;
|
bool invalidate = false;
|
||||||
if (buffer_pos + size > buffer_size) {
|
if (buffer_pos + size > buffer_size) {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
||||||
|
glInvalidateBufferData(gl_buffer.handle);
|
||||||
|
|
||||||
buffer_pos = 0;
|
buffer_pos = 0;
|
||||||
invalidate = true;
|
invalidate = true;
|
||||||
|
|
||||||
if (persistent) {
|
|
||||||
glUnmapNamedBuffer(gl_buffer.handle);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (invalidate || !persistent) {
|
return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
|
||||||
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
|
||||||
GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
|
|
||||||
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
|
|
||||||
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
|
|
||||||
mapped_ptr = static_cast<u8*>(
|
|
||||||
glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags));
|
|
||||||
mapped_offset = buffer_pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||||
ASSERT(size <= mapped_size);
|
ASSERT(size <= mapped_size);
|
||||||
|
|
||||||
if (!coherent && size > 0) {
|
if (size > 0) {
|
||||||
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size);
|
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
|
||||||
}
|
|
||||||
|
|
||||||
if (!persistent) {
|
|
||||||
glUnmapNamedBuffer(gl_buffer.handle);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer_pos += size;
|
buffer_pos += size;
|
||||||
|
|
|
@ -11,15 +11,13 @@
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
class Device;
|
||||||
|
|
||||||
class OGLStreamBuffer : private NonCopyable {
|
class OGLStreamBuffer : private NonCopyable {
|
||||||
public:
|
public:
|
||||||
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
|
explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
|
||||||
bool use_persistent = true);
|
|
||||||
~OGLStreamBuffer();
|
~OGLStreamBuffer();
|
||||||
|
|
||||||
GLuint GetHandle() const;
|
|
||||||
GLsizeiptr GetSize() const;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||||
* and the optional alignment requirement.
|
* and the optional alignment requirement.
|
||||||
|
@ -32,15 +30,24 @@ public:
|
||||||
|
|
||||||
void Unmap(GLsizeiptr size);
|
void Unmap(GLsizeiptr size);
|
||||||
|
|
||||||
|
GLuint Handle() const {
|
||||||
|
return gl_buffer.handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 Address() const {
|
||||||
|
return gpu_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
GLsizeiptr Size() const noexcept {
|
||||||
|
return buffer_size;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
OGLBuffer gl_buffer;
|
OGLBuffer gl_buffer;
|
||||||
|
|
||||||
bool coherent = false;
|
GLuint64EXT gpu_address = 0;
|
||||||
bool persistent = false;
|
|
||||||
|
|
||||||
GLintptr buffer_pos = 0;
|
GLintptr buffer_pos = 0;
|
||||||
GLsizeiptr buffer_size = 0;
|
GLsizeiptr buffer_size = 0;
|
||||||
GLintptr mapped_offset = 0;
|
|
||||||
GLsizeiptr mapped_size = 0;
|
GLsizeiptr mapped_size = 0;
|
||||||
u8* mapped_ptr = nullptr;
|
u8* mapped_ptr = nullptr;
|
||||||
};
|
};
|
||||||
|
|
|
@ -263,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
|
||||||
target = GetTextureTarget(params.target);
|
target = GetTextureTarget(params.target);
|
||||||
texture = CreateTexture(params, target, internal_format, texture_buffer);
|
texture = CreateTexture(params, target, internal_format, texture_buffer);
|
||||||
DecorateSurfaceName();
|
DecorateSurfaceName();
|
||||||
main_view = CreateViewInner(
|
|
||||||
ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
|
u32 num_layers = 1;
|
||||||
true);
|
if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
|
||||||
|
num_layers = params.depth;
|
||||||
|
}
|
||||||
|
|
||||||
|
main_view =
|
||||||
|
CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
CachedSurface::~CachedSurface() = default;
|
CachedSurface::~CachedSurface() = default;
|
||||||
|
@ -404,8 +409,7 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr
|
||||||
|
|
||||||
CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
|
CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
|
||||||
bool is_proxy)
|
bool is_proxy)
|
||||||
: VideoCommon::ViewBase(params), surface{surface},
|
: VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format},
|
||||||
format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format},
|
|
||||||
target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
|
target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
|
||||||
if (!is_proxy) {
|
if (!is_proxy) {
|
||||||
main_view = CreateTextureView();
|
main_view = CreateTextureView();
|
||||||
|
@ -414,37 +418,40 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
|
||||||
|
|
||||||
CachedSurfaceView::~CachedSurfaceView() = default;
|
CachedSurfaceView::~CachedSurfaceView() = default;
|
||||||
|
|
||||||
void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
|
void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
|
||||||
ASSERT(params.num_levels == 1);
|
ASSERT(params.num_levels == 1);
|
||||||
|
|
||||||
if (params.num_layers > 1) {
|
if (params.target == SurfaceTarget::Texture3D) {
|
||||||
// Layered framebuffer attachments
|
if (params.num_layers > 1) {
|
||||||
UNIMPLEMENTED_IF(params.base_layer != 0);
|
ASSERT(params.base_layer == 0);
|
||||||
|
glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
|
||||||
switch (params.target) {
|
} else {
|
||||||
case SurfaceTarget::Texture2DArray:
|
glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
|
||||||
glFramebufferTexture(target, attachment, GetTexture(), 0);
|
params.base_level, params.base_layer);
|
||||||
break;
|
|
||||||
default:
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (params.num_layers > 1) {
|
||||||
|
UNIMPLEMENTED_IF(params.base_layer != 0);
|
||||||
|
glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const GLenum view_target = surface.GetTarget();
|
const GLenum view_target = surface.GetTarget();
|
||||||
const GLuint texture = surface.GetTexture();
|
const GLuint texture = surface.GetTexture();
|
||||||
switch (surface.GetSurfaceParams().target) {
|
switch (surface.GetSurfaceParams().target) {
|
||||||
case SurfaceTarget::Texture1D:
|
case SurfaceTarget::Texture1D:
|
||||||
glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
|
glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
|
||||||
break;
|
break;
|
||||||
case SurfaceTarget::Texture2D:
|
case SurfaceTarget::Texture2D:
|
||||||
glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
|
glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
|
||||||
break;
|
break;
|
||||||
case SurfaceTarget::Texture1DArray:
|
case SurfaceTarget::Texture1DArray:
|
||||||
case SurfaceTarget::Texture2DArray:
|
case SurfaceTarget::Texture2DArray:
|
||||||
case SurfaceTarget::TextureCubemap:
|
case SurfaceTarget::TextureCubemap:
|
||||||
case SurfaceTarget::TextureCubeArray:
|
case SurfaceTarget::TextureCubeArray:
|
||||||
glFramebufferTextureLayer(target, attachment, texture, params.base_level,
|
glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
|
||||||
params.base_layer);
|
params.base_layer);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -501,8 +508,13 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
|
||||||
OGLTextureView texture_view;
|
OGLTextureView texture_view;
|
||||||
texture_view.Create();
|
texture_view.Create();
|
||||||
|
|
||||||
glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
|
if (target == GL_TEXTURE_3D) {
|
||||||
params.num_levels, params.base_layer, params.num_layers);
|
glTextureView(texture_view.handle, target, surface.texture.handle, format,
|
||||||
|
params.base_level, params.num_levels, 0, 1);
|
||||||
|
} else {
|
||||||
|
glTextureView(texture_view.handle, target, surface.texture.handle, format,
|
||||||
|
params.base_level, params.num_levels, params.base_layer, params.num_layers);
|
||||||
|
}
|
||||||
ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
|
ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
|
||||||
|
|
||||||
return texture_view;
|
return texture_view;
|
||||||
|
@ -545,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
|
||||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||||
const auto& src_params{src_view->GetSurfaceParams()};
|
const auto& src_params{src_view->GetSurfaceParams()};
|
||||||
const auto& dst_params{dst_view->GetSurfaceParams()};
|
const auto& dst_params{dst_view->GetSurfaceParams()};
|
||||||
UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
|
UNIMPLEMENTED_IF(src_params.depth != 1);
|
||||||
UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
|
UNIMPLEMENTED_IF(dst_params.depth != 1);
|
||||||
|
|
||||||
state_tracker.NotifyScissor0();
|
state_tracker.NotifyScissor0();
|
||||||
state_tracker.NotifyFramebuffer();
|
state_tracker.NotifyFramebuffer();
|
||||||
|
|
|
@ -80,8 +80,10 @@ public:
|
||||||
explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
|
explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
|
||||||
~CachedSurfaceView();
|
~CachedSurfaceView();
|
||||||
|
|
||||||
/// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
|
/// @brief Attaches this texture view to the currently bound fb_target framebuffer
|
||||||
void Attach(GLenum attachment, GLenum target) const;
|
/// @param attachment Attachment to bind textures to
|
||||||
|
/// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
|
||||||
|
void Attach(GLenum attachment, GLenum fb_target) const;
|
||||||
|
|
||||||
GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
|
GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
|
||||||
Tegra::Texture::SwizzleSource y_source,
|
Tegra::Texture::SwizzleSource y_source,
|
||||||
|
|
|
@ -46,10 +46,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||||
return GL_UNSIGNED_INT;
|
return GL_UNSIGNED_INT;
|
||||||
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
||||||
return GL_UNSIGNED_INT_2_10_10_10_REV;
|
return GL_UNSIGNED_INT_2_10_10_10_REV;
|
||||||
default:
|
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
|
||||||
return {};
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
case Maxwell::VertexAttribute::Type::SignedInt:
|
case Maxwell::VertexAttribute::Type::SignedInt:
|
||||||
case Maxwell::VertexAttribute::Type::SignedNorm:
|
case Maxwell::VertexAttribute::Type::SignedNorm:
|
||||||
switch (attrib.size) {
|
switch (attrib.size) {
|
||||||
|
@ -70,10 +68,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||||
return GL_INT;
|
return GL_INT;
|
||||||
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
||||||
return GL_INT_2_10_10_10_REV;
|
return GL_INT_2_10_10_10_REV;
|
||||||
default:
|
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
|
||||||
return {};
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
case Maxwell::VertexAttribute::Type::Float:
|
case Maxwell::VertexAttribute::Type::Float:
|
||||||
switch (attrib.size) {
|
switch (attrib.size) {
|
||||||
case Maxwell::VertexAttribute::Size::Size_16:
|
case Maxwell::VertexAttribute::Size::Size_16:
|
||||||
|
@ -86,10 +82,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||||
case Maxwell::VertexAttribute::Size::Size_32_32_32:
|
case Maxwell::VertexAttribute::Size::Size_32_32_32:
|
||||||
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
|
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
|
||||||
return GL_FLOAT;
|
return GL_FLOAT;
|
||||||
default:
|
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
|
||||||
return {};
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
case Maxwell::VertexAttribute::Type::UnsignedScaled:
|
case Maxwell::VertexAttribute::Type::UnsignedScaled:
|
||||||
switch (attrib.size) {
|
switch (attrib.size) {
|
||||||
case Maxwell::VertexAttribute::Size::Size_8:
|
case Maxwell::VertexAttribute::Size::Size_8:
|
||||||
|
@ -102,10 +96,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||||
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
||||||
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
||||||
return GL_UNSIGNED_SHORT;
|
return GL_UNSIGNED_SHORT;
|
||||||
default:
|
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
|
||||||
return {};
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
case Maxwell::VertexAttribute::Type::SignedScaled:
|
case Maxwell::VertexAttribute::Type::SignedScaled:
|
||||||
switch (attrib.size) {
|
switch (attrib.size) {
|
||||||
case Maxwell::VertexAttribute::Size::Size_8:
|
case Maxwell::VertexAttribute::Size::Size_8:
|
||||||
|
@ -118,14 +110,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||||
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
||||||
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
||||||
return GL_SHORT;
|
return GL_SHORT;
|
||||||
default:
|
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
|
||||||
return {};
|
|
||||||
}
|
}
|
||||||
default:
|
break;
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
|
|
||||||
return {};
|
|
||||||
}
|
}
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(),
|
||||||
|
attrib.SizeString());
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
|
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
|
||||||
|
@ -137,8 +127,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
|
||||||
case Maxwell::IndexFormat::UnsignedInt:
|
case Maxwell::IndexFormat::UnsignedInt:
|
||||||
return GL_UNSIGNED_INT;
|
return GL_UNSIGNED_INT;
|
||||||
}
|
}
|
||||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
|
UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format));
|
||||||
UNREACHABLE();
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,10 +169,20 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
|
||||||
}
|
}
|
||||||
|
|
||||||
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
|
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
|
||||||
Tegra::Texture::TextureMipmapFilter mip_filter_mode) {
|
Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) {
|
||||||
switch (filter_mode) {
|
switch (filter_mode) {
|
||||||
case Tegra::Texture::TextureFilter::Linear: {
|
case Tegra::Texture::TextureFilter::Nearest:
|
||||||
switch (mip_filter_mode) {
|
switch (mipmap_filter_mode) {
|
||||||
|
case Tegra::Texture::TextureMipmapFilter::None:
|
||||||
|
return GL_NEAREST;
|
||||||
|
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||||
|
return GL_NEAREST_MIPMAP_NEAREST;
|
||||||
|
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||||
|
return GL_NEAREST_MIPMAP_LINEAR;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Tegra::Texture::TextureFilter::Linear:
|
||||||
|
switch (mipmap_filter_mode) {
|
||||||
case Tegra::Texture::TextureMipmapFilter::None:
|
case Tegra::Texture::TextureMipmapFilter::None:
|
||||||
return GL_LINEAR;
|
return GL_LINEAR;
|
||||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||||
|
@ -193,20 +192,9 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Tegra::Texture::TextureFilter::Nearest: {
|
UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}",
|
||||||
switch (mip_filter_mode) {
|
static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode));
|
||||||
case Tegra::Texture::TextureMipmapFilter::None:
|
return GL_NEAREST;
|
||||||
return GL_NEAREST;
|
|
||||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
|
||||||
return GL_NEAREST_MIPMAP_NEAREST;
|
|
||||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
|
||||||
return GL_NEAREST_MIPMAP_LINEAR;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode));
|
|
||||||
return GL_LINEAR;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
|
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
|
||||||
|
@ -229,10 +217,9 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
|
||||||
} else {
|
} else {
|
||||||
return GL_MIRROR_CLAMP_TO_EDGE;
|
return GL_MIRROR_CLAMP_TO_EDGE;
|
||||||
}
|
}
|
||||||
default:
|
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
|
|
||||||
return GL_REPEAT;
|
|
||||||
}
|
}
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
|
||||||
|
return GL_REPEAT;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
|
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
|
||||||
|
@ -254,8 +241,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
|
||||||
case Tegra::Texture::DepthCompareFunc::Always:
|
case Tegra::Texture::DepthCompareFunc::Always:
|
||||||
return GL_ALWAYS;
|
return GL_ALWAYS;
|
||||||
}
|
}
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}",
|
UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func));
|
||||||
static_cast<u32>(func));
|
|
||||||
return GL_GREATER;
|
return GL_GREATER;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -277,7 +263,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
|
||||||
case Maxwell::Blend::Equation::MaxGL:
|
case Maxwell::Blend::Equation::MaxGL:
|
||||||
return GL_MAX;
|
return GL_MAX;
|
||||||
}
|
}
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
|
UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
|
||||||
return GL_FUNC_ADD;
|
return GL_FUNC_ADD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -341,7 +327,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
|
||||||
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
|
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
|
||||||
return GL_ONE_MINUS_CONSTANT_ALPHA;
|
return GL_ONE_MINUS_CONSTANT_ALPHA;
|
||||||
}
|
}
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
|
UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
|
||||||
return GL_ZERO;
|
return GL_ZERO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -361,7 +347,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
|
||||||
case Tegra::Texture::SwizzleSource::OneFloat:
|
case Tegra::Texture::SwizzleSource::OneFloat:
|
||||||
return GL_ONE;
|
return GL_ONE;
|
||||||
}
|
}
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
|
UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source));
|
||||||
return GL_ZERO;
|
return GL_ZERO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -392,7 +378,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
|
||||||
case Maxwell::ComparisonOp::AlwaysOld:
|
case Maxwell::ComparisonOp::AlwaysOld:
|
||||||
return GL_ALWAYS;
|
return GL_ALWAYS;
|
||||||
}
|
}
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
|
UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
|
||||||
return GL_ALWAYS;
|
return GL_ALWAYS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -423,7 +409,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
|
||||||
case Maxwell::StencilOp::DecrWrapOGL:
|
case Maxwell::StencilOp::DecrWrapOGL:
|
||||||
return GL_DECR_WRAP;
|
return GL_DECR_WRAP;
|
||||||
}
|
}
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil));
|
UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil));
|
||||||
return GL_KEEP;
|
return GL_KEEP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -434,7 +420,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
|
||||||
case Maxwell::FrontFace::CounterClockWise:
|
case Maxwell::FrontFace::CounterClockWise:
|
||||||
return GL_CCW;
|
return GL_CCW;
|
||||||
}
|
}
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
|
UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face));
|
||||||
return GL_CCW;
|
return GL_CCW;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -447,7 +433,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
|
||||||
case Maxwell::CullFace::FrontAndBack:
|
case Maxwell::CullFace::FrontAndBack:
|
||||||
return GL_FRONT_AND_BACK;
|
return GL_FRONT_AND_BACK;
|
||||||
}
|
}
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
|
UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
|
||||||
return GL_BACK;
|
return GL_BACK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -486,7 +472,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
|
||||||
case Maxwell::LogicOperation::Set:
|
case Maxwell::LogicOperation::Set:
|
||||||
return GL_SET;
|
return GL_SET;
|
||||||
}
|
}
|
||||||
LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation));
|
UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation));
|
||||||
return GL_COPY;
|
return GL_COPY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() {
|
||||||
|
|
||||||
// Clear screen to black
|
// Clear screen to black
|
||||||
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
|
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
|
||||||
|
|
||||||
|
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
|
||||||
|
if (device.HasVertexBufferUnifiedMemory()) {
|
||||||
|
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||||
|
|
||||||
|
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
|
||||||
|
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
|
||||||
|
&vertex_buffer_address);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererOpenGL::AddTelemetryFields() {
|
void RendererOpenGL::AddTelemetryFields() {
|
||||||
|
@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
||||||
offsetof(ScreenRectVertex, tex_coord));
|
offsetof(ScreenRectVertex, tex_coord));
|
||||||
glVertexAttribBinding(PositionLocation, 0);
|
glVertexAttribBinding(PositionLocation, 0);
|
||||||
glVertexAttribBinding(TexCoordLocation, 0);
|
glVertexAttribBinding(TexCoordLocation, 0);
|
||||||
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
|
if (device.HasVertexBufferUnifiedMemory()) {
|
||||||
|
glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
|
||||||
|
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
|
||||||
|
sizeof(vertices));
|
||||||
|
} else {
|
||||||
|
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
|
||||||
|
}
|
||||||
|
|
||||||
glBindTextureUnit(0, screen_info.display_texture);
|
glBindTextureUnit(0, screen_info.display_texture);
|
||||||
glBindSampler(0, 0);
|
glBindSampler(0, 0);
|
||||||
|
@ -751,8 +766,9 @@ void RendererOpenGL::RenderScreenshot() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RendererOpenGL::Init() {
|
bool RendererOpenGL::Init() {
|
||||||
if (GLAD_GL_KHR_debug) {
|
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
|
||||||
glEnable(GL_DEBUG_OUTPUT);
|
glEnable(GL_DEBUG_OUTPUT);
|
||||||
|
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||||
glDebugMessageCallback(DebugHandler, nullptr);
|
glDebugMessageCallback(DebugHandler, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -107,6 +107,9 @@ private:
|
||||||
OGLPipeline pipeline;
|
OGLPipeline pipeline;
|
||||||
OGLFramebuffer screenshot_framebuffer;
|
OGLFramebuffer screenshot_framebuffer;
|
||||||
|
|
||||||
|
// GPU address of the vertex buffer
|
||||||
|
GLuint64EXT vertex_buffer_address = 0;
|
||||||
|
|
||||||
/// Display information for Switch screen
|
/// Display information for Switch screen
|
||||||
ScreenInfo screen_info;
|
ScreenInfo screen_info;
|
||||||
|
|
||||||
|
|
|
@ -71,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
|
||||||
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
|
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
|
||||||
|
|
||||||
u32 packed_front_face = PackFrontFace(regs.front_face);
|
u32 packed_front_face = PackFrontFace(regs.front_face);
|
||||||
if (regs.screen_y_control.triangle_rast_flip != 0 &&
|
if (regs.screen_y_control.triangle_rast_flip != 0) {
|
||||||
regs.viewport_transform[0].scale_y > 0.0f) {
|
|
||||||
// Flip front face
|
// Flip front face
|
||||||
packed_front_face = 1 - packed_front_face;
|
packed_front_face = 1 - packed_front_face;
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,29 +21,29 @@ namespace Sampler {
|
||||||
|
|
||||||
VkFilter Filter(Tegra::Texture::TextureFilter filter) {
|
VkFilter Filter(Tegra::Texture::TextureFilter filter) {
|
||||||
switch (filter) {
|
switch (filter) {
|
||||||
case Tegra::Texture::TextureFilter::Linear:
|
|
||||||
return VK_FILTER_LINEAR;
|
|
||||||
case Tegra::Texture::TextureFilter::Nearest:
|
case Tegra::Texture::TextureFilter::Nearest:
|
||||||
return VK_FILTER_NEAREST;
|
return VK_FILTER_NEAREST;
|
||||||
|
case Tegra::Texture::TextureFilter::Linear:
|
||||||
|
return VK_FILTER_LINEAR;
|
||||||
}
|
}
|
||||||
UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
|
UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter));
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
|
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
|
||||||
switch (mipmap_filter) {
|
switch (mipmap_filter) {
|
||||||
case Tegra::Texture::TextureMipmapFilter::None:
|
case Tegra::Texture::TextureMipmapFilter::None:
|
||||||
// TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
|
// There are no Vulkan filter modes that directly correspond to OpenGL minification filters
|
||||||
// (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
|
// of GL_LINEAR or GL_NEAREST, but they can be emulated using
|
||||||
// use an image view with a single mipmap level to emulate this.
|
// VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter =
|
||||||
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
// VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively.
|
||||||
;
|
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
|
||||||
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
|
||||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||||
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||||
|
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||||
|
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
||||||
}
|
}
|
||||||
UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
|
UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,10 +78,9 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
|
||||||
case Tegra::Texture::WrapMode::MirrorOnceBorder:
|
case Tegra::Texture::WrapMode::MirrorOnceBorder:
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
|
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
|
||||||
default:
|
|
||||||
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
|
|
||||||
return {};
|
|
||||||
}
|
}
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
|
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
|
||||||
|
@ -149,7 +148,7 @@ struct FormatTuple {
|
||||||
{VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
|
{VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
|
||||||
{VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
|
{VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
|
||||||
{VK_FORMAT_UNDEFINED}, // R16S
|
{VK_FORMAT_UNDEFINED}, // R16S
|
||||||
{VK_FORMAT_UNDEFINED}, // R16UI
|
{VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI
|
||||||
{VK_FORMAT_UNDEFINED}, // R16I
|
{VK_FORMAT_UNDEFINED}, // R16I
|
||||||
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
|
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
|
||||||
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
|
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
|
||||||
|
@ -288,10 +287,9 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
|
||||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||||
case Maxwell::PrimitiveTopology::Patches:
|
case Maxwell::PrimitiveTopology::Patches:
|
||||||
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
||||||
default:
|
|
||||||
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
|
|
||||||
return {};
|
|
||||||
}
|
}
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
|
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include "common/dynamic_library.h"
|
#include "common/dynamic_library.h"
|
||||||
|
#include "common/file_util.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/telemetry.h"
|
#include "common/telemetry.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
|
@ -76,7 +77,8 @@ Common::DynamicLibrary OpenVulkanLibrary() {
|
||||||
char* libvulkan_env = getenv("LIBVULKAN_PATH");
|
char* libvulkan_env = getenv("LIBVULKAN_PATH");
|
||||||
if (!libvulkan_env || !library.Open(libvulkan_env)) {
|
if (!libvulkan_env || !library.Open(libvulkan_env)) {
|
||||||
// Use the libvulkan.dylib from the application bundle.
|
// Use the libvulkan.dylib from the application bundle.
|
||||||
std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
|
const std::string filename =
|
||||||
|
FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
|
||||||
library.Open(filename.c_str());
|
library.Open(filename.c_str());
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
|
||||||
VAddr cpu_addr, std::size_t size)
|
VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
|
||||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
: VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
|
||||||
VkBufferCreateInfo ci;
|
VkBufferCreateInfo ci;
|
||||||
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||||
ci.pNext = nullptr;
|
ci.pNext = nullptr;
|
||||||
|
@ -54,46 +54,17 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me
|
||||||
buffer.commit = memory_manager.Commit(buffer.handle, false);
|
buffer.commit = memory_manager.Commit(buffer.handle, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
CachedBufferBlock::~CachedBufferBlock() = default;
|
Buffer::~Buffer() = default;
|
||||||
|
|
||||||
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
|
||||||
const VKDevice& device, VKMemoryManager& memory_manager,
|
|
||||||
VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
|
|
||||||
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
|
|
||||||
CreateStreamBuffer(device,
|
|
||||||
scheduler)},
|
|
||||||
device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
|
|
||||||
staging_pool} {}
|
|
||||||
|
|
||||||
VKBufferCache::~VKBufferCache() = default;
|
|
||||||
|
|
||||||
Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
|
||||||
return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
|
|
||||||
return buffer->GetHandle();
|
|
||||||
}
|
|
||||||
|
|
||||||
VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
|
||||||
size = std::max(size, std::size_t(4));
|
|
||||||
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
|
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
|
||||||
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
|
|
||||||
cmdbuf.FillBuffer(buffer, 0, size, 0);
|
|
||||||
});
|
|
||||||
return *empty.handle;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
const u8* data) {
|
|
||||||
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||||
std::memcpy(staging.commit->Map(size), data, size);
|
std::memcpy(staging.commit->Map(size), data, size);
|
||||||
|
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
|
|
||||||
size](vk::CommandBuffer cmdbuf) {
|
const VkBuffer handle = Handle();
|
||||||
cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
|
scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
|
||||||
|
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
|
||||||
|
|
||||||
VkBufferMemoryBarrier barrier;
|
VkBufferMemoryBarrier barrier;
|
||||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||||
|
@ -102,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
|
||||||
barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
|
barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
|
||||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
barrier.buffer = buffer;
|
barrier.buffer = handle;
|
||||||
barrier.offset = offset;
|
barrier.offset = offset;
|
||||||
barrier.size = size;
|
barrier.size = size;
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
|
||||||
|
@ -110,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
|
||||||
u8* data) {
|
|
||||||
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
|
|
||||||
size](vk::CommandBuffer cmdbuf) {
|
const VkBuffer handle = Handle();
|
||||||
|
scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
|
||||||
VkBufferMemoryBarrier barrier;
|
VkBufferMemoryBarrier barrier;
|
||||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||||
barrier.pNext = nullptr;
|
barrier.pNext = nullptr;
|
||||||
|
@ -123,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
|
||||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
barrier.buffer = buffer;
|
barrier.buffer = handle;
|
||||||
barrier.offset = offset;
|
barrier.offset = offset;
|
||||||
barrier.size = size;
|
barrier.size = size;
|
||||||
|
|
||||||
|
@ -131,18 +102,20 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
|
||||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
||||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
|
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
|
||||||
cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size});
|
cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
|
||||||
});
|
});
|
||||||
scheduler.Finish();
|
scheduler.Finish();
|
||||||
|
|
||||||
std::memcpy(data, staging.commit->Map(size), size);
|
std::memcpy(data, staging.commit->Map(size), size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
std::size_t dst_offset, std::size_t size) {
|
std::size_t size) const {
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset,
|
|
||||||
dst_offset, size](vk::CommandBuffer cmdbuf) {
|
const VkBuffer dst_buffer = Handle();
|
||||||
|
scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
|
||||||
|
size](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
|
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
|
||||||
|
|
||||||
std::array<VkBufferMemoryBarrier, 2> barriers;
|
std::array<VkBufferMemoryBarrier, 2> barriers;
|
||||||
|
@ -169,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||||
|
const VKDevice& device, VKMemoryManager& memory_manager,
|
||||||
|
VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
|
||||||
|
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
|
||||||
|
CreateStreamBuffer(device,
|
||||||
|
scheduler)},
|
||||||
|
device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
|
||||||
|
staging_pool} {}
|
||||||
|
|
||||||
|
VKBufferCache::~VKBufferCache() = default;
|
||||||
|
|
||||||
|
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||||
|
return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
|
||||||
|
size);
|
||||||
|
}
|
||||||
|
|
||||||
|
VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
||||||
|
size = std::max(size, std::size_t(4));
|
||||||
|
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
|
||||||
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
|
||||||
|
cmdbuf.FillBuffer(buffer, 0, size, 0);
|
||||||
|
});
|
||||||
|
return {*empty.handle, 0, 0};
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -8,7 +8,6 @@
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/rasterizer_cache.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
|
@ -24,22 +23,34 @@ class VKDevice;
|
||||||
class VKMemoryManager;
|
class VKMemoryManager;
|
||||||
class VKScheduler;
|
class VKScheduler;
|
||||||
|
|
||||||
class CachedBufferBlock final : public VideoCommon::BufferBlock {
|
class Buffer final : public VideoCommon::BufferBlock {
|
||||||
public:
|
public:
|
||||||
explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
|
||||||
VAddr cpu_addr, std::size_t size);
|
VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
|
||||||
~CachedBufferBlock();
|
~Buffer();
|
||||||
|
|
||||||
VkBuffer GetHandle() const {
|
void Upload(std::size_t offset, std::size_t size, const u8* data) const;
|
||||||
|
|
||||||
|
void Download(std::size_t offset, std::size_t size, u8* data) const;
|
||||||
|
|
||||||
|
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
|
std::size_t size) const;
|
||||||
|
|
||||||
|
VkBuffer Handle() const {
|
||||||
return *buffer.handle;
|
return *buffer.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 Address() const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
VKScheduler& scheduler;
|
||||||
|
VKStagingBufferPool& staging_pool;
|
||||||
|
|
||||||
VKBuffer buffer;
|
VKBuffer buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
using Buffer = std::shared_ptr<CachedBufferBlock>;
|
|
||||||
|
|
||||||
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
|
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
|
||||||
public:
|
public:
|
||||||
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||||
|
@ -47,21 +58,10 @@ public:
|
||||||
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
|
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
|
||||||
~VKBufferCache();
|
~VKBufferCache();
|
||||||
|
|
||||||
VkBuffer GetEmptyBuffer(std::size_t size) override;
|
BufferInfo GetEmptyBuffer(std::size_t size) override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
VkBuffer ToHandle(const Buffer& buffer) override;
|
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||||
|
|
||||||
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
|
||||||
|
|
||||||
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
const u8* data) override;
|
|
||||||
|
|
||||||
void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
u8* data) override;
|
|
||||||
|
|
||||||
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
|
||||||
std::size_t dst_offset, std::size_t size) override;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const VKDevice& device;
|
const VKDevice& device;
|
||||||
|
|
|
@ -53,8 +53,9 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
|
||||||
};
|
};
|
||||||
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
|
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
|
||||||
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
|
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
|
||||||
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size());
|
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
|
||||||
add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
|
add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
|
||||||
|
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
|
||||||
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
|
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
|
||||||
|
|
||||||
VkDescriptorSetLayoutCreateInfo ci;
|
VkDescriptorSetLayoutCreateInfo ci;
|
||||||
|
|
|
@ -42,6 +42,7 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
|
||||||
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
|
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
|
||||||
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
|
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
|
||||||
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
|
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
|
||||||
|
{VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
|
||||||
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
|
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
|
||||||
|
|
||||||
VkDescriptorPoolCreateInfo ci;
|
VkDescriptorPoolCreateInfo ci;
|
||||||
|
|
|
@ -73,76 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
|
||||||
|
|
||||||
std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
|
std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
|
||||||
vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
|
vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
|
||||||
static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
static constexpr std::array formats{
|
||||||
VK_FORMAT_A8B8G8R8_UINT_PACK32,
|
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
||||||
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
|
VK_FORMAT_A8B8G8R8_UINT_PACK32,
|
||||||
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
|
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
|
||||||
VK_FORMAT_B5G6R5_UNORM_PACK16,
|
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
|
||||||
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
|
VK_FORMAT_B5G6R5_UNORM_PACK16,
|
||||||
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
|
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
|
||||||
VK_FORMAT_R32G32B32A32_SFLOAT,
|
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
|
||||||
VK_FORMAT_R32G32B32A32_UINT,
|
VK_FORMAT_R32G32B32A32_SFLOAT,
|
||||||
VK_FORMAT_R32G32_SFLOAT,
|
VK_FORMAT_R32G32B32A32_UINT,
|
||||||
VK_FORMAT_R32G32_UINT,
|
VK_FORMAT_R32G32_SFLOAT,
|
||||||
VK_FORMAT_R16G16B16A16_UINT,
|
VK_FORMAT_R32G32_UINT,
|
||||||
VK_FORMAT_R16G16B16A16_SNORM,
|
VK_FORMAT_R16G16B16A16_UINT,
|
||||||
VK_FORMAT_R16G16B16A16_UNORM,
|
VK_FORMAT_R16G16B16A16_SNORM,
|
||||||
VK_FORMAT_R16G16_UNORM,
|
VK_FORMAT_R16G16B16A16_UNORM,
|
||||||
VK_FORMAT_R16G16_SNORM,
|
VK_FORMAT_R16G16_UNORM,
|
||||||
VK_FORMAT_R16G16_SFLOAT,
|
VK_FORMAT_R16G16_SNORM,
|
||||||
VK_FORMAT_R16_UNORM,
|
VK_FORMAT_R16G16_SFLOAT,
|
||||||
VK_FORMAT_R8G8B8A8_SRGB,
|
VK_FORMAT_R16_UNORM,
|
||||||
VK_FORMAT_R8G8_UNORM,
|
VK_FORMAT_R16_UINT,
|
||||||
VK_FORMAT_R8G8_SNORM,
|
VK_FORMAT_R8G8B8A8_SRGB,
|
||||||
VK_FORMAT_R8G8_UINT,
|
VK_FORMAT_R8G8_UNORM,
|
||||||
VK_FORMAT_R8_UNORM,
|
VK_FORMAT_R8G8_SNORM,
|
||||||
VK_FORMAT_R8_UINT,
|
VK_FORMAT_R8G8_UINT,
|
||||||
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
|
VK_FORMAT_R8_UNORM,
|
||||||
VK_FORMAT_R32_SFLOAT,
|
VK_FORMAT_R8_UINT,
|
||||||
VK_FORMAT_R32_UINT,
|
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
|
||||||
VK_FORMAT_R32_SINT,
|
VK_FORMAT_R32_SFLOAT,
|
||||||
VK_FORMAT_R16_SFLOAT,
|
VK_FORMAT_R32_UINT,
|
||||||
VK_FORMAT_R16G16B16A16_SFLOAT,
|
VK_FORMAT_R32_SINT,
|
||||||
VK_FORMAT_B8G8R8A8_UNORM,
|
VK_FORMAT_R16_SFLOAT,
|
||||||
VK_FORMAT_B8G8R8A8_SRGB,
|
VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||||
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
|
VK_FORMAT_B8G8R8A8_UNORM,
|
||||||
VK_FORMAT_D32_SFLOAT,
|
VK_FORMAT_B8G8R8A8_SRGB,
|
||||||
VK_FORMAT_D16_UNORM,
|
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
|
||||||
VK_FORMAT_D16_UNORM_S8_UINT,
|
VK_FORMAT_D32_SFLOAT,
|
||||||
VK_FORMAT_D24_UNORM_S8_UINT,
|
VK_FORMAT_D16_UNORM,
|
||||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
VK_FORMAT_D16_UNORM_S8_UINT,
|
||||||
VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
|
VK_FORMAT_D24_UNORM_S8_UINT,
|
||||||
VK_FORMAT_BC2_UNORM_BLOCK,
|
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||||
VK_FORMAT_BC3_UNORM_BLOCK,
|
VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
|
||||||
VK_FORMAT_BC4_UNORM_BLOCK,
|
VK_FORMAT_BC2_UNORM_BLOCK,
|
||||||
VK_FORMAT_BC5_UNORM_BLOCK,
|
VK_FORMAT_BC3_UNORM_BLOCK,
|
||||||
VK_FORMAT_BC5_SNORM_BLOCK,
|
VK_FORMAT_BC4_UNORM_BLOCK,
|
||||||
VK_FORMAT_BC7_UNORM_BLOCK,
|
VK_FORMAT_BC5_UNORM_BLOCK,
|
||||||
VK_FORMAT_BC6H_UFLOAT_BLOCK,
|
VK_FORMAT_BC5_SNORM_BLOCK,
|
||||||
VK_FORMAT_BC6H_SFLOAT_BLOCK,
|
VK_FORMAT_BC7_UNORM_BLOCK,
|
||||||
VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
|
VK_FORMAT_BC6H_UFLOAT_BLOCK,
|
||||||
VK_FORMAT_BC2_SRGB_BLOCK,
|
VK_FORMAT_BC6H_SFLOAT_BLOCK,
|
||||||
VK_FORMAT_BC3_SRGB_BLOCK,
|
VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
|
||||||
VK_FORMAT_BC7_SRGB_BLOCK,
|
VK_FORMAT_BC2_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
|
VK_FORMAT_BC3_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
|
VK_FORMAT_BC7_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
|
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
|
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
|
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
|
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
|
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
|
||||||
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
|
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
|
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
|
||||||
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
|
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
|
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
|
||||||
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
|
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
|
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
|
||||||
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
|
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
|
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
|
||||||
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
|
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
|
||||||
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
|
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
|
||||||
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
|
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
|
||||||
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32};
|
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
|
||||||
|
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
|
||||||
|
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
|
||||||
|
};
|
||||||
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
|
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
|
||||||
for (const auto format : formats) {
|
for (const auto format : formats) {
|
||||||
format_properties.emplace(format, physical.GetFormatProperties(format));
|
format_properties.emplace(format, physical.GetFormatProperties(format));
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include "video_core/renderer_vulkan/wrapper.h"
|
#include "video_core/renderer_vulkan/wrapper.h"
|
||||||
#include "video_core/shader/compiler_settings.h"
|
#include "video_core/shader/compiler_settings.h"
|
||||||
#include "video_core/shader/memory_util.h"
|
#include "video_core/shader/memory_util.h"
|
||||||
|
#include "video_core/shader_cache.h"
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
@ -45,6 +46,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||||
constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||||
constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
|
constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
|
||||||
constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||||
|
constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
|
||||||
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||||
|
|
||||||
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
|
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
|
||||||
|
@ -104,8 +106,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
|
||||||
u32 binding = base_binding;
|
u32 binding = base_binding;
|
||||||
AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
|
AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
|
||||||
AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
|
AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
|
||||||
AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers);
|
AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
|
||||||
AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
|
AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
|
||||||
|
AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
|
||||||
AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
|
AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
|
||||||
return binding;
|
return binding;
|
||||||
}
|
}
|
||||||
|
@ -130,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
|
||||||
return std::memcmp(&rhs, this, sizeof *this) == 0;
|
return std::memcmp(&rhs, this, sizeof *this) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
|
Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
|
||||||
GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
|
VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
|
||||||
u32 main_offset)
|
: gpu_addr{gpu_addr}, program_code{std::move(program_code)},
|
||||||
: RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
|
|
||||||
registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
|
registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
|
||||||
compiler_settings, registry},
|
compiler_settings, registry},
|
||||||
entries{GenerateShaderEntries(shader_ir)} {}
|
entries{GenerateShaderEntries(shader_ir)} {}
|
||||||
|
|
||||||
CachedShader::~CachedShader() = default;
|
Shader::~Shader() = default;
|
||||||
|
|
||||||
Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
|
Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
|
||||||
Core::System& system, Tegra::Engines::ShaderType stage) {
|
Tegra::Engines::ShaderType stage) {
|
||||||
if (stage == Tegra::Engines::ShaderType::Compute) {
|
if (stage == ShaderType::Compute) {
|
||||||
return system.GPU().KeplerCompute();
|
return system.GPU().KeplerCompute();
|
||||||
} else {
|
} else {
|
||||||
return system.GPU().Maxwell3D();
|
return system.GPU().Maxwell3D();
|
||||||
|
@ -154,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
|
||||||
VKDescriptorPool& descriptor_pool,
|
VKDescriptorPool& descriptor_pool,
|
||||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||||
VKRenderPassCache& renderpass_cache)
|
VKRenderPassCache& renderpass_cache)
|
||||||
: RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
|
: VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
|
||||||
descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
|
scheduler{scheduler}, descriptor_pool{descriptor_pool},
|
||||||
renderpass_cache{renderpass_cache} {}
|
update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
|
||||||
|
|
||||||
VKPipelineCache::~VKPipelineCache() = default;
|
VKPipelineCache::~VKPipelineCache() = default;
|
||||||
|
|
||||||
std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
||||||
const auto& gpu = system.GPU().Maxwell3D();
|
const auto& gpu = system.GPU().Maxwell3D();
|
||||||
|
|
||||||
std::array<Shader, Maxwell::MaxShaderProgram> shaders;
|
std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
|
||||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||||
|
|
||||||
|
@ -176,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
||||||
const GPUVAddr program_addr{GetShaderAddress(system, program)};
|
const GPUVAddr program_addr{GetShaderAddress(system, program)};
|
||||||
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||||
ASSERT(cpu_addr);
|
ASSERT(cpu_addr);
|
||||||
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
|
|
||||||
if (!shader) {
|
Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
|
||||||
|
if (!result) {
|
||||||
const auto host_ptr{memory_manager.GetPointer(program_addr)};
|
const auto host_ptr{memory_manager.GetPointer(program_addr)};
|
||||||
|
|
||||||
// No shader found - create a new one
|
// No shader found - create a new one
|
||||||
constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
|
constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
|
||||||
const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
|
const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
|
||||||
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
|
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
|
||||||
|
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||||
|
|
||||||
|
auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
|
||||||
|
stage_offset);
|
||||||
|
result = shader.get();
|
||||||
|
|
||||||
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
|
|
||||||
std::move(code), stage_offset);
|
|
||||||
if (cpu_addr) {
|
if (cpu_addr) {
|
||||||
Register(shader);
|
Register(std::move(shader), *cpu_addr, size_in_bytes);
|
||||||
} else {
|
} else {
|
||||||
null_shader = shader;
|
null_shader = std::move(shader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
shaders[index] = std::move(shader);
|
shaders[index] = result;
|
||||||
}
|
}
|
||||||
return last_shaders = shaders;
|
return last_shaders = shaders;
|
||||||
}
|
}
|
||||||
|
@ -234,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
|
||||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||||
ASSERT(cpu_addr);
|
ASSERT(cpu_addr);
|
||||||
|
|
||||||
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
|
Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
|
||||||
if (!shader) {
|
if (!shader) {
|
||||||
// No shader found - create a new one
|
// No shader found - create a new one
|
||||||
const auto host_ptr = memory_manager.GetPointer(program_addr);
|
const auto host_ptr = memory_manager.GetPointer(program_addr);
|
||||||
|
|
||||||
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
|
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
|
||||||
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
|
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||||
program_addr, *cpu_addr, std::move(code),
|
|
||||||
KERNEL_MAIN_OFFSET);
|
auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
|
||||||
|
std::move(code), KERNEL_MAIN_OFFSET);
|
||||||
|
shader = shader_info.get();
|
||||||
|
|
||||||
if (cpu_addr) {
|
if (cpu_addr) {
|
||||||
Register(shader);
|
Register(std::move(shader_info), *cpu_addr, size_in_bytes);
|
||||||
} else {
|
} else {
|
||||||
null_kernel = shader;
|
null_kernel = std::move(shader_info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -262,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
|
||||||
return *entry;
|
return *entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKPipelineCache::Unregister(const Shader& shader) {
|
void VKPipelineCache::OnShaderRemoval(Shader* shader) {
|
||||||
bool finished = false;
|
bool finished = false;
|
||||||
const auto Finish = [&] {
|
const auto Finish = [&] {
|
||||||
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
|
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
|
||||||
|
@ -294,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) {
|
||||||
Finish();
|
Finish();
|
||||||
it = compute_cache.erase(it);
|
it = compute_cache.erase(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
RasterizerCache::Unregister(shader);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
|
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
|
||||||
|
@ -312,7 +319,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
|
||||||
ASSERT(point_size != 0.0f);
|
ASSERT(point_size != 0.0f);
|
||||||
}
|
}
|
||||||
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
|
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
|
||||||
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type();
|
const auto& attribute = fixed_state.vertex_input.attributes[i];
|
||||||
|
specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
|
||||||
|
specialization.attribute_types[i] = attribute.Type();
|
||||||
}
|
}
|
||||||
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
|
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
|
||||||
|
|
||||||
|
@ -328,12 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
|
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
|
||||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||||
const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
|
Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
|
||||||
ASSERT(shader);
|
|
||||||
|
|
||||||
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
|
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
|
||||||
const auto program_type = GetShaderType(program_enum);
|
const ShaderType program_type = GetShaderType(program_enum);
|
||||||
const auto& entries = shader->GetEntries();
|
const auto& entries = shader->GetEntries();
|
||||||
program[stage] = {
|
program[stage] = {
|
||||||
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
|
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
|
||||||
|
@ -375,16 +383,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) {
|
if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
|
||||||
// Nvidia has a bug where updating multiple uniform texels at once causes the driver to
|
descriptor_type == STORAGE_TEXEL_BUFFER) {
|
||||||
// crash.
|
// Nvidia has a bug where updating multiple texels at once causes the driver to crash.
|
||||||
|
// Note: Fixed in driver Windows 443.24, Linux 440.66.15
|
||||||
for (u32 i = 0; i < count; ++i) {
|
for (u32 i = 0; i < count; ++i) {
|
||||||
VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
|
VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
|
||||||
entry.dstBinding = binding + i;
|
entry.dstBinding = binding + i;
|
||||||
entry.dstArrayElement = 0;
|
entry.dstArrayElement = 0;
|
||||||
entry.descriptorCount = 1;
|
entry.descriptorCount = 1;
|
||||||
entry.descriptorType = descriptor_type;
|
entry.descriptorType = descriptor_type;
|
||||||
entry.offset = offset + i * entry_size;
|
entry.offset = static_cast<std::size_t>(offset + i * entry_size);
|
||||||
entry.stride = entry_size;
|
entry.stride = entry_size;
|
||||||
}
|
}
|
||||||
} else if (count > 0) {
|
} else if (count > 0) {
|
||||||
|
@ -405,8 +414,9 @@ void FillDescriptorUpdateTemplateEntries(
|
||||||
std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
|
std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
|
||||||
AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
|
AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
|
||||||
AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
|
AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
|
||||||
AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers);
|
AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
|
||||||
AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
|
AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
|
||||||
|
AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
|
||||||
AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
|
AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/const_buffer_engine_interface.h"
|
#include "video_core/engines/const_buffer_engine_interface.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/rasterizer_cache.h"
|
|
||||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||||
|
@ -26,6 +25,7 @@
|
||||||
#include "video_core/shader/memory_util.h"
|
#include "video_core/shader/memory_util.h"
|
||||||
#include "video_core/shader/registry.h"
|
#include "video_core/shader/registry.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
#include "video_core/shader_cache.h"
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
class System;
|
class System;
|
||||||
|
@ -41,8 +41,6 @@ class VKFence;
|
||||||
class VKScheduler;
|
class VKScheduler;
|
||||||
class VKUpdateDescriptorQueue;
|
class VKUpdateDescriptorQueue;
|
||||||
|
|
||||||
class CachedShader;
|
|
||||||
using Shader = std::shared_ptr<CachedShader>;
|
|
||||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
|
||||||
struct GraphicsPipelineCacheKey {
|
struct GraphicsPipelineCacheKey {
|
||||||
|
@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
class CachedShader final : public RasterizerCacheObject {
|
class Shader {
|
||||||
public:
|
public:
|
||||||
explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
|
explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
|
||||||
VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
|
VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
|
||||||
u32 main_offset);
|
~Shader();
|
||||||
~CachedShader();
|
|
||||||
|
|
||||||
GPUVAddr GetGpuAddr() const {
|
GPUVAddr GetGpuAddr() const {
|
||||||
return gpu_addr;
|
return gpu_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t GetSizeInBytes() const override {
|
|
||||||
return program_code.size() * sizeof(u64);
|
|
||||||
}
|
|
||||||
|
|
||||||
VideoCommon::Shader::ShaderIR& GetIR() {
|
VideoCommon::Shader::ShaderIR& GetIR() {
|
||||||
return shader_ir;
|
return shader_ir;
|
||||||
}
|
}
|
||||||
|
@ -144,25 +137,23 @@ private:
|
||||||
ShaderEntries entries;
|
ShaderEntries entries;
|
||||||
};
|
};
|
||||||
|
|
||||||
class VKPipelineCache final : public RasterizerCache<Shader> {
|
class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
|
||||||
public:
|
public:
|
||||||
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
|
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
|
||||||
const VKDevice& device, VKScheduler& scheduler,
|
const VKDevice& device, VKScheduler& scheduler,
|
||||||
VKDescriptorPool& descriptor_pool,
|
VKDescriptorPool& descriptor_pool,
|
||||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||||
VKRenderPassCache& renderpass_cache);
|
VKRenderPassCache& renderpass_cache);
|
||||||
~VKPipelineCache();
|
~VKPipelineCache() override;
|
||||||
|
|
||||||
std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
|
std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
|
||||||
|
|
||||||
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
|
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
|
||||||
|
|
||||||
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
|
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void Unregister(const Shader& shader) override;
|
void OnShaderRemoval(Shader* shader) final;
|
||||||
|
|
||||||
void FlushObjectInner(const Shader& object) override {}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
|
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
|
||||||
|
@ -175,10 +166,10 @@ private:
|
||||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||||
VKRenderPassCache& renderpass_cache;
|
VKRenderPassCache& renderpass_cache;
|
||||||
|
|
||||||
Shader null_shader{};
|
std::unique_ptr<Shader> null_shader;
|
||||||
Shader null_kernel{};
|
std::unique_ptr<Shader> null_kernel;
|
||||||
|
|
||||||
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
|
||||||
|
|
||||||
GraphicsPipelineCacheKey last_graphics_key;
|
GraphicsPipelineCacheKey last_graphics_key;
|
||||||
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
|
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
|
||||||
|
|
|
@ -38,6 +38,7 @@
|
||||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||||
#include "video_core/renderer_vulkan/wrapper.h"
|
#include "video_core/renderer_vulkan/wrapper.h"
|
||||||
|
#include "video_core/shader_cache.h"
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
|
std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
|
||||||
const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
|
const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
|
||||||
std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
|
std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
|
||||||
for (std::size_t i = 0; i < std::size(addresses); ++i) {
|
for (std::size_t i = 0; i < std::size(addresses); ++i) {
|
||||||
addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
|
addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
|
||||||
|
@ -117,6 +118,17 @@ template <typename Engine, typename Entry>
|
||||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||||
std::size_t stage, std::size_t index = 0) {
|
std::size_t stage, std::size_t index = 0) {
|
||||||
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
|
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
|
||||||
|
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||||
|
if (entry.is_separated) {
|
||||||
|
const u32 buffer_1 = entry.buffer;
|
||||||
|
const u32 buffer_2 = entry.secondary_buffer;
|
||||||
|
const u32 offset_1 = entry.offset;
|
||||||
|
const u32 offset_2 = entry.secondary_offset;
|
||||||
|
const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
|
||||||
|
const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
|
||||||
|
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (entry.is_bindless) {
|
if (entry.is_bindless) {
|
||||||
const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
|
const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
|
||||||
return engine.GetTextureInfo(tex_handle);
|
return engine.GetTextureInfo(tex_handle);
|
||||||
|
@ -468,8 +480,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||||
const auto& entries = pipeline.GetEntries();
|
const auto& entries = pipeline.GetEntries();
|
||||||
SetupComputeConstBuffers(entries);
|
SetupComputeConstBuffers(entries);
|
||||||
SetupComputeGlobalBuffers(entries);
|
SetupComputeGlobalBuffers(entries);
|
||||||
SetupComputeTexelBuffers(entries);
|
SetupComputeUniformTexels(entries);
|
||||||
SetupComputeTextures(entries);
|
SetupComputeTextures(entries);
|
||||||
|
SetupComputeStorageTexels(entries);
|
||||||
SetupComputeImages(entries);
|
SetupComputeImages(entries);
|
||||||
|
|
||||||
buffer_cache.Unmap();
|
buffer_cache.Unmap();
|
||||||
|
@ -715,7 +728,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
|
||||||
if (!view) {
|
if (!view) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
key.views.push_back(view->GetHandle());
|
key.views.push_back(view->GetAttachment());
|
||||||
key.width = std::min(key.width, view->GetWidth());
|
key.width = std::min(key.width, view->GetWidth());
|
||||||
key.height = std::min(key.height, view->GetHeight());
|
key.height = std::min(key.height, view->GetHeight());
|
||||||
key.layers = std::min(key.layers, view->GetNumLayers());
|
key.layers = std::min(key.layers, view->GetNumLayers());
|
||||||
|
@ -775,20 +788,21 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupShaderDescriptors(
|
void RasterizerVulkan::SetupShaderDescriptors(
|
||||||
const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
|
const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
|
||||||
texture_cache.GuardSamplers(true);
|
texture_cache.GuardSamplers(true);
|
||||||
|
|
||||||
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||||
// Skip VertexA stage
|
// Skip VertexA stage
|
||||||
const auto& shader = shaders[stage + 1];
|
Shader* const shader = shaders[stage + 1];
|
||||||
if (!shader) {
|
if (!shader) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto& entries = shader->GetEntries();
|
const auto& entries = shader->GetEntries();
|
||||||
SetupGraphicsConstBuffers(entries, stage);
|
SetupGraphicsConstBuffers(entries, stage);
|
||||||
SetupGraphicsGlobalBuffers(entries, stage);
|
SetupGraphicsGlobalBuffers(entries, stage);
|
||||||
SetupGraphicsTexelBuffers(entries, stage);
|
SetupGraphicsUniformTexels(entries, stage);
|
||||||
SetupGraphicsTextures(entries, stage);
|
SetupGraphicsTextures(entries, stage);
|
||||||
|
SetupGraphicsStorageTexels(entries, stage);
|
||||||
SetupGraphicsImages(entries, stage);
|
SetupGraphicsImages(entries, stage);
|
||||||
}
|
}
|
||||||
texture_cache.GuardSamplers(false);
|
texture_cache.GuardSamplers(false);
|
||||||
|
@ -838,6 +852,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
|
||||||
if (regs.tfb_enabled == 0) {
|
if (regs.tfb_enabled == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (!device.IsExtTransformFeedbackSupported()) {
|
||||||
|
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||||
|
@ -852,10 +870,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
|
||||||
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
||||||
|
|
||||||
const GPUVAddr gpu_addr = binding.Address();
|
const GPUVAddr gpu_addr = binding.Address();
|
||||||
const std::size_t size = binding.buffer_size;
|
const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||||
|
|
||||||
scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
|
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
|
||||||
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
|
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
|
||||||
});
|
});
|
||||||
|
@ -866,6 +884,9 @@ void RasterizerVulkan::EndTransformFeedback() {
|
||||||
if (regs.tfb_enabled == 0) {
|
if (regs.tfb_enabled == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (!device.IsExtTransformFeedbackSupported()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
scheduler.Record(
|
scheduler.Record(
|
||||||
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
|
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
|
||||||
|
@ -877,14 +898,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
|
||||||
|
|
||||||
for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
|
for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
|
||||||
const auto& attrib = regs.vertex_attrib_format[index];
|
const auto& attrib = regs.vertex_attrib_format[index];
|
||||||
if (!attrib.IsValid()) {
|
if (attrib.IsConstant()) {
|
||||||
vertex_input.SetAttribute(index, false, 0, 0, {}, {});
|
vertex_input.SetAttribute(index, false, 0, 0, {}, {});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
|
|
||||||
ASSERT(buffer.IsEnabled());
|
|
||||||
|
|
||||||
vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
|
vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
|
||||||
attrib.size.Value());
|
attrib.size.Value());
|
||||||
}
|
}
|
||||||
|
@ -908,8 +925,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
|
||||||
buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
|
buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
|
const auto info = buffer_cache.UploadMemory(start, size);
|
||||||
buffer_bindings.AddVertexBinding(buffer, offset);
|
buffer_bindings.AddVertexBinding(info.handle, info.offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -931,7 +948,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
||||||
auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
||||||
|
VkBuffer buffer = info.handle;
|
||||||
|
u64 offset = info.offset;
|
||||||
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
|
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
|
||||||
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
|
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
|
||||||
|
|
||||||
|
@ -945,7 +964,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
||||||
auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
||||||
|
VkBuffer buffer = info.handle;
|
||||||
|
u64 offset = info.offset;
|
||||||
|
|
||||||
auto format = regs.index_array.format;
|
auto format = regs.index_array.format;
|
||||||
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
|
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
|
||||||
|
@ -980,12 +1001,12 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
|
void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
|
||||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||||
const auto& gpu = system.GPU().Maxwell3D();
|
const auto& gpu = system.GPU().Maxwell3D();
|
||||||
for (const auto& entry : entries.texel_buffers) {
|
for (const auto& entry : entries.uniform_texels) {
|
||||||
const auto image = GetTextureInfo(gpu, entry, stage).tic;
|
const auto image = GetTextureInfo(gpu, entry, stage).tic;
|
||||||
SetupTexelBuffer(image, entry);
|
SetupUniformTexels(image, entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1000,6 +1021,15 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
|
||||||
|
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||||
|
const auto& gpu = system.GPU().Maxwell3D();
|
||||||
|
for (const auto& entry : entries.storage_texels) {
|
||||||
|
const auto image = GetTextureInfo(gpu, entry, stage).tic;
|
||||||
|
SetupStorageTexel(image, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
|
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
|
||||||
MICROPROFILE_SCOPE(Vulkan_Images);
|
MICROPROFILE_SCOPE(Vulkan_Images);
|
||||||
const auto& gpu = system.GPU().Maxwell3D();
|
const auto& gpu = system.GPU().Maxwell3D();
|
||||||
|
@ -1032,12 +1062,12 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
|
void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
|
||||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||||
const auto& gpu = system.GPU().KeplerCompute();
|
const auto& gpu = system.GPU().KeplerCompute();
|
||||||
for (const auto& entry : entries.texel_buffers) {
|
for (const auto& entry : entries.uniform_texels) {
|
||||||
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
|
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
|
||||||
SetupTexelBuffer(image, entry);
|
SetupUniformTexels(image, entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1052,6 +1082,15 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
|
||||||
|
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||||
|
const auto& gpu = system.GPU().KeplerCompute();
|
||||||
|
for (const auto& entry : entries.storage_texels) {
|
||||||
|
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
|
||||||
|
SetupStorageTexel(image, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
|
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
|
||||||
MICROPROFILE_SCOPE(Vulkan_Images);
|
MICROPROFILE_SCOPE(Vulkan_Images);
|
||||||
const auto& gpu = system.GPU().KeplerCompute();
|
const auto& gpu = system.GPU().KeplerCompute();
|
||||||
|
@ -1074,10 +1113,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
|
||||||
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
|
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
|
||||||
ASSERT(size <= MaxConstbufferSize);
|
ASSERT(size <= MaxConstbufferSize);
|
||||||
|
|
||||||
const auto [buffer_handle, offset] =
|
const auto info =
|
||||||
buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
|
buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
|
||||||
|
update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
|
||||||
update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
|
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
|
||||||
|
@ -1091,18 +1129,18 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
|
||||||
// Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
|
// Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
|
||||||
// default buffer.
|
// default buffer.
|
||||||
static constexpr std::size_t dummy_size = 4;
|
static constexpr std::size_t dummy_size = 4;
|
||||||
const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
|
const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
|
||||||
update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
|
update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(
|
const auto info = buffer_cache.UploadMemory(
|
||||||
actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
|
actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
|
||||||
update_descriptor_queue.AddBuffer(buffer, offset, size);
|
update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
|
void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
|
||||||
const TexelBufferEntry& entry) {
|
const UniformTexelEntry& entry) {
|
||||||
const auto view = texture_cache.GetTextureSurface(tic, entry);
|
const auto view = texture_cache.GetTextureSurface(tic, entry);
|
||||||
ASSERT(view->IsBufferView());
|
ASSERT(view->IsBufferView());
|
||||||
|
|
||||||
|
@ -1114,16 +1152,24 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
|
||||||
auto view = texture_cache.GetTextureSurface(texture.tic, entry);
|
auto view = texture_cache.GetTextureSurface(texture.tic, entry);
|
||||||
ASSERT(!view->IsBufferView());
|
ASSERT(!view->IsBufferView());
|
||||||
|
|
||||||
const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
|
const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
|
||||||
texture.tic.z_source, texture.tic.w_source);
|
texture.tic.z_source, texture.tic.w_source);
|
||||||
const auto sampler = sampler_cache.GetSampler(texture.tsc);
|
const auto sampler = sampler_cache.GetSampler(texture.tsc);
|
||||||
update_descriptor_queue.AddSampledImage(sampler, image_view);
|
update_descriptor_queue.AddSampledImage(sampler, image_view);
|
||||||
|
|
||||||
const auto image_layout = update_descriptor_queue.GetLastImageLayout();
|
VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
|
||||||
*image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
*image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||||
sampled_views.push_back(ImageView{std::move(view), image_layout});
|
sampled_views.push_back(ImageView{std::move(view), image_layout});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
|
||||||
|
const StorageTexelEntry& entry) {
|
||||||
|
const auto view = texture_cache.GetImageSurface(tic, entry);
|
||||||
|
ASSERT(view->IsBufferView());
|
||||||
|
|
||||||
|
update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
|
void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
|
||||||
auto view = texture_cache.GetImageSurface(tic, entry);
|
auto view = texture_cache.GetImageSurface(tic, entry);
|
||||||
|
|
||||||
|
@ -1133,10 +1179,11 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
|
||||||
|
|
||||||
UNIMPLEMENTED_IF(tic.IsBuffer());
|
UNIMPLEMENTED_IF(tic.IsBuffer());
|
||||||
|
|
||||||
const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
|
const VkImageView image_view =
|
||||||
|
view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
|
||||||
update_descriptor_queue.AddImage(image_view);
|
update_descriptor_queue.AddImage(image_view);
|
||||||
|
|
||||||
const auto image_layout = update_descriptor_queue.GetLastImageLayout();
|
VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
|
||||||
*image_layout = VK_IMAGE_LAYOUT_GENERAL;
|
*image_layout = VK_IMAGE_LAYOUT_GENERAL;
|
||||||
image_views.push_back(ImageView{std::move(view), image_layout});
|
image_views.push_back(ImageView{std::move(view), image_layout});
|
||||||
}
|
}
|
||||||
|
|
|
@ -168,7 +168,7 @@ private:
|
||||||
bool is_indexed, bool is_instanced);
|
bool is_indexed, bool is_instanced);
|
||||||
|
|
||||||
/// Setup descriptors in the graphics pipeline.
|
/// Setup descriptors in the graphics pipeline.
|
||||||
void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
|
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
|
||||||
|
|
||||||
void SetupImageTransitions(Texceptions texceptions,
|
void SetupImageTransitions(Texceptions texceptions,
|
||||||
const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
|
const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
|
||||||
|
@ -193,12 +193,15 @@ private:
|
||||||
/// Setup global buffers in the graphics pipeline.
|
/// Setup global buffers in the graphics pipeline.
|
||||||
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
|
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
|
||||||
|
|
||||||
/// Setup texel buffers in the graphics pipeline.
|
/// Setup uniform texels in the graphics pipeline.
|
||||||
void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
|
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
|
||||||
|
|
||||||
/// Setup textures in the graphics pipeline.
|
/// Setup textures in the graphics pipeline.
|
||||||
void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
|
void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
|
||||||
|
|
||||||
|
/// Setup storage texels in the graphics pipeline.
|
||||||
|
void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
|
||||||
|
|
||||||
/// Setup images in the graphics pipeline.
|
/// Setup images in the graphics pipeline.
|
||||||
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
|
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
|
||||||
|
|
||||||
|
@ -209,11 +212,14 @@ private:
|
||||||
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
|
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
|
||||||
|
|
||||||
/// Setup texel buffers in the compute pipeline.
|
/// Setup texel buffers in the compute pipeline.
|
||||||
void SetupComputeTexelBuffers(const ShaderEntries& entries);
|
void SetupComputeUniformTexels(const ShaderEntries& entries);
|
||||||
|
|
||||||
/// Setup textures in the compute pipeline.
|
/// Setup textures in the compute pipeline.
|
||||||
void SetupComputeTextures(const ShaderEntries& entries);
|
void SetupComputeTextures(const ShaderEntries& entries);
|
||||||
|
|
||||||
|
/// Setup storage texels in the compute pipeline.
|
||||||
|
void SetupComputeStorageTexels(const ShaderEntries& entries);
|
||||||
|
|
||||||
/// Setup images in the compute pipeline.
|
/// Setup images in the compute pipeline.
|
||||||
void SetupComputeImages(const ShaderEntries& entries);
|
void SetupComputeImages(const ShaderEntries& entries);
|
||||||
|
|
||||||
|
@ -222,10 +228,12 @@ private:
|
||||||
|
|
||||||
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
|
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
|
||||||
|
|
||||||
void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
|
void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
|
||||||
|
|
||||||
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
|
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
|
||||||
|
|
||||||
|
void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
|
||||||
|
|
||||||
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
||||||
|
|
||||||
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||||
|
|
|
@ -9,6 +9,8 @@
|
||||||
#include "video_core/renderer_vulkan/wrapper.h"
|
#include "video_core/renderer_vulkan/wrapper.h"
|
||||||
#include "video_core/textures/texture.h"
|
#include "video_core/textures/texture.h"
|
||||||
|
|
||||||
|
using Tegra::Texture::TextureMipmapFilter;
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -63,8 +65,8 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
|
||||||
ci.maxAnisotropy = tsc.GetMaxAnisotropy();
|
ci.maxAnisotropy = tsc.GetMaxAnisotropy();
|
||||||
ci.compareEnable = tsc.depth_compare_enabled;
|
ci.compareEnable = tsc.depth_compare_enabled;
|
||||||
ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
|
ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
|
||||||
ci.minLod = tsc.GetMinLod();
|
ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod();
|
||||||
ci.maxLod = tsc.GetMaxLod();
|
ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod();
|
||||||
ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
|
ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
|
||||||
ci.unnormalizedCoordinates = VK_FALSE;
|
ci.unnormalizedCoordinates = VK_FALSE;
|
||||||
return device.GetLogical().CreateSampler(ci);
|
return device.GetLogical().CreateSampler(ci);
|
||||||
|
|
|
@ -400,8 +400,9 @@ private:
|
||||||
u32 binding = specialization.base_binding;
|
u32 binding = specialization.base_binding;
|
||||||
binding = DeclareConstantBuffers(binding);
|
binding = DeclareConstantBuffers(binding);
|
||||||
binding = DeclareGlobalBuffers(binding);
|
binding = DeclareGlobalBuffers(binding);
|
||||||
binding = DeclareTexelBuffers(binding);
|
binding = DeclareUniformTexels(binding);
|
||||||
binding = DeclareSamplers(binding);
|
binding = DeclareSamplers(binding);
|
||||||
|
binding = DeclareStorageTexels(binding);
|
||||||
binding = DeclareImages(binding);
|
binding = DeclareImages(binding);
|
||||||
|
|
||||||
const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
|
const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
|
||||||
|
@ -741,8 +742,10 @@ private:
|
||||||
if (!IsGenericAttribute(index)) {
|
if (!IsGenericAttribute(index)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 location = GetGenericAttributeLocation(index);
|
const u32 location = GetGenericAttributeLocation(index);
|
||||||
|
if (!IsAttributeEnabled(location)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
const auto type_descriptor = GetAttributeType(location);
|
const auto type_descriptor = GetAttributeType(location);
|
||||||
Id type;
|
Id type;
|
||||||
if (IsInputAttributeArray()) {
|
if (IsInputAttributeArray()) {
|
||||||
|
@ -887,7 +890,7 @@ private:
|
||||||
return binding;
|
return binding;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 DeclareTexelBuffers(u32 binding) {
|
u32 DeclareUniformTexels(u32 binding) {
|
||||||
for (const auto& sampler : ir.GetSamplers()) {
|
for (const auto& sampler : ir.GetSamplers()) {
|
||||||
if (!sampler.is_buffer) {
|
if (!sampler.is_buffer) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -908,7 +911,7 @@ private:
|
||||||
Decorate(id, spv::Decoration::Binding, binding++);
|
Decorate(id, spv::Decoration::Binding, binding++);
|
||||||
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
|
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
|
||||||
|
|
||||||
texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id});
|
uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
|
||||||
}
|
}
|
||||||
return binding;
|
return binding;
|
||||||
}
|
}
|
||||||
|
@ -943,31 +946,48 @@ private:
|
||||||
return binding;
|
return binding;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 DeclareImages(u32 binding) {
|
u32 DeclareStorageTexels(u32 binding) {
|
||||||
for (const auto& image : ir.GetImages()) {
|
for (const auto& image : ir.GetImages()) {
|
||||||
const auto [dim, arrayed] = GetImageDim(image);
|
if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
|
||||||
constexpr int depth = 0;
|
continue;
|
||||||
constexpr bool ms = false;
|
|
||||||
constexpr int sampled = 2; // This won't be accessed with a sampler
|
|
||||||
constexpr auto format = spv::ImageFormat::Unknown;
|
|
||||||
const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
|
|
||||||
const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
|
|
||||||
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
|
|
||||||
AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
|
|
||||||
|
|
||||||
Decorate(id, spv::Decoration::Binding, binding++);
|
|
||||||
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
|
|
||||||
if (image.is_read && !image.is_written) {
|
|
||||||
Decorate(id, spv::Decoration::NonWritable);
|
|
||||||
} else if (image.is_written && !image.is_read) {
|
|
||||||
Decorate(id, spv::Decoration::NonReadable);
|
|
||||||
}
|
}
|
||||||
|
DeclareImage(image, binding);
|
||||||
images.emplace(image.index, StorageImage{image_type, id});
|
|
||||||
}
|
}
|
||||||
return binding;
|
return binding;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 DeclareImages(u32 binding) {
|
||||||
|
for (const auto& image : ir.GetImages()) {
|
||||||
|
if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
DeclareImage(image, binding);
|
||||||
|
}
|
||||||
|
return binding;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeclareImage(const Image& image, u32& binding) {
|
||||||
|
const auto [dim, arrayed] = GetImageDim(image);
|
||||||
|
constexpr int depth = 0;
|
||||||
|
constexpr bool ms = false;
|
||||||
|
constexpr int sampled = 2; // This won't be accessed with a sampler
|
||||||
|
const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
|
||||||
|
const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
|
||||||
|
const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
|
||||||
|
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
|
||||||
|
AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
|
||||||
|
|
||||||
|
Decorate(id, spv::Decoration::Binding, binding++);
|
||||||
|
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
|
||||||
|
if (image.is_read && !image.is_written) {
|
||||||
|
Decorate(id, spv::Decoration::NonWritable);
|
||||||
|
} else if (image.is_written && !image.is_read) {
|
||||||
|
Decorate(id, spv::Decoration::NonReadable);
|
||||||
|
}
|
||||||
|
|
||||||
|
images.emplace(image.index, StorageImage{image_type, id});
|
||||||
|
}
|
||||||
|
|
||||||
bool IsRenderTargetEnabled(u32 rt) const {
|
bool IsRenderTargetEnabled(u32 rt) const {
|
||||||
for (u32 component = 0; component < 4; ++component) {
|
for (u32 component = 0; component < 4; ++component) {
|
||||||
if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
|
if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
|
||||||
|
@ -986,6 +1006,10 @@ private:
|
||||||
return stage == ShaderType::TesselationControl;
|
return stage == ShaderType::TesselationControl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsAttributeEnabled(u32 location) const {
|
||||||
|
return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
|
||||||
|
}
|
||||||
|
|
||||||
u32 GetNumInputVertices() const {
|
u32 GetNumInputVertices() const {
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case ShaderType::Geometry:
|
case ShaderType::Geometry:
|
||||||
|
@ -1201,16 +1225,20 @@ private:
|
||||||
UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
|
UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
|
||||||
return {v_float_zero, Type::Float};
|
return {v_float_zero, Type::Float};
|
||||||
default:
|
default:
|
||||||
if (IsGenericAttribute(attribute)) {
|
if (!IsGenericAttribute(attribute)) {
|
||||||
const u32 location = GetGenericAttributeLocation(attribute);
|
break;
|
||||||
const auto type_descriptor = GetAttributeType(location);
|
|
||||||
const Type type = type_descriptor.type;
|
|
||||||
const Id attribute_id = input_attributes.at(attribute);
|
|
||||||
const std::vector elements = {element};
|
|
||||||
const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
|
|
||||||
return {OpLoad(GetTypeDefinition(type), pointer), type};
|
|
||||||
}
|
}
|
||||||
break;
|
const u32 location = GetGenericAttributeLocation(attribute);
|
||||||
|
if (!IsAttributeEnabled(location)) {
|
||||||
|
// Disabled attributes (also known as constant attributes) always return zero.
|
||||||
|
return {v_float_zero, Type::Float};
|
||||||
|
}
|
||||||
|
const auto type_descriptor = GetAttributeType(location);
|
||||||
|
const Type type = type_descriptor.type;
|
||||||
|
const Id attribute_id = input_attributes.at(attribute);
|
||||||
|
const std::vector elements = {element};
|
||||||
|
const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
|
||||||
|
return {OpLoad(GetTypeDefinition(type), pointer), type};
|
||||||
}
|
}
|
||||||
UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
|
UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
|
||||||
return {v_float_zero, Type::Float};
|
return {v_float_zero, Type::Float};
|
||||||
|
@ -1246,7 +1274,7 @@ private:
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE_MSG("Unmanaged offset node type");
|
UNREACHABLE_MSG("Unmanaged offset node type");
|
||||||
}
|
}
|
||||||
pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index,
|
pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
|
||||||
buffer_element);
|
buffer_element);
|
||||||
}
|
}
|
||||||
return {OpLoad(t_float, pointer), Type::Float};
|
return {OpLoad(t_float, pointer), Type::Float};
|
||||||
|
@ -1601,7 +1629,7 @@ private:
|
||||||
|
|
||||||
const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
|
const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
|
||||||
const Id carry = OpCompositeExtract(t_uint, result, 1);
|
const Id carry = OpCompositeExtract(t_uint, result, 1);
|
||||||
return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool};
|
return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
|
||||||
}
|
}
|
||||||
|
|
||||||
Expression LogicalAssign(Operation operation) {
|
Expression LogicalAssign(Operation operation) {
|
||||||
|
@ -1664,7 +1692,7 @@ private:
|
||||||
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
|
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
|
||||||
const u32 index = meta.sampler.index;
|
const u32 index = meta.sampler.index;
|
||||||
if (meta.sampler.is_buffer) {
|
if (meta.sampler.is_buffer) {
|
||||||
const auto& entry = texel_buffers.at(index);
|
const auto& entry = uniform_texels.at(index);
|
||||||
return OpLoad(entry.image_type, entry.image);
|
return OpLoad(entry.image_type, entry.image);
|
||||||
} else {
|
} else {
|
||||||
const auto& entry = sampled_images.at(index);
|
const auto& entry = sampled_images.at(index);
|
||||||
|
@ -1941,39 +1969,20 @@ private:
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
Expression AtomicImageAdd(Operation operation) {
|
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
|
||||||
UNIMPLEMENTED();
|
Expression AtomicImage(Operation operation) {
|
||||||
return {};
|
const auto& meta{std::get<MetaImage>(operation.GetMeta())};
|
||||||
}
|
ASSERT(meta.values.size() == 1);
|
||||||
|
|
||||||
Expression AtomicImageMin(Operation operation) {
|
const Id coordinate = GetCoordinates(operation, Type::Int);
|
||||||
UNIMPLEMENTED();
|
const Id image = images.at(meta.image.index).image;
|
||||||
return {};
|
const Id sample = v_uint_zero;
|
||||||
}
|
const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
|
||||||
|
|
||||||
Expression AtomicImageMax(Operation operation) {
|
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
||||||
UNIMPLEMENTED();
|
const Id semantics = v_uint_zero;
|
||||||
return {};
|
const Id value = AsUint(Visit(meta.values[0]));
|
||||||
}
|
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
|
||||||
|
|
||||||
Expression AtomicImageAnd(Operation operation) {
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression AtomicImageOr(Operation operation) {
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression AtomicImageXor(Operation operation) {
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression AtomicImageExchange(Operation operation) {
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
return {};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
|
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
|
||||||
|
@ -1988,7 +1997,7 @@ private:
|
||||||
return {v_float_zero, Type::Float};
|
return {v_float_zero, Type::Float};
|
||||||
}
|
}
|
||||||
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
||||||
const Id semantics = Constant(t_uint, 0);
|
const Id semantics = v_uint_zero;
|
||||||
const Id value = AsUint(Visit(operation[1]));
|
const Id value = AsUint(Visit(operation[1]));
|
||||||
|
|
||||||
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
|
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
|
||||||
|
@ -2612,11 +2621,11 @@ private:
|
||||||
|
|
||||||
&SPIRVDecompiler::ImageLoad,
|
&SPIRVDecompiler::ImageLoad,
|
||||||
&SPIRVDecompiler::ImageStore,
|
&SPIRVDecompiler::ImageStore,
|
||||||
&SPIRVDecompiler::AtomicImageAdd,
|
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
|
||||||
&SPIRVDecompiler::AtomicImageAnd,
|
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
|
||||||
&SPIRVDecompiler::AtomicImageOr,
|
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
|
||||||
&SPIRVDecompiler::AtomicImageXor,
|
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
|
||||||
&SPIRVDecompiler::AtomicImageExchange,
|
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
|
||||||
|
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
|
||||||
|
@ -2758,8 +2767,11 @@ private:
|
||||||
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
|
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
|
||||||
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
|
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
|
||||||
|
|
||||||
|
const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
|
||||||
|
|
||||||
const Id v_float_zero = Constant(t_float, 0.0f);
|
const Id v_float_zero = Constant(t_float, 0.0f);
|
||||||
const Id v_float_one = Constant(t_float, 1.0f);
|
const Id v_float_one = Constant(t_float, 1.0f);
|
||||||
|
const Id v_uint_zero = Constant(t_uint, 0);
|
||||||
|
|
||||||
// Nvidia uses these defaults for varyings (e.g. position and generic attributes)
|
// Nvidia uses these defaults for varyings (e.g. position and generic attributes)
|
||||||
const Id v_varying_default =
|
const Id v_varying_default =
|
||||||
|
@ -2784,15 +2796,16 @@ private:
|
||||||
std::unordered_map<u8, GenericVaryingDescription> output_attributes;
|
std::unordered_map<u8, GenericVaryingDescription> output_attributes;
|
||||||
std::map<u32, Id> constant_buffers;
|
std::map<u32, Id> constant_buffers;
|
||||||
std::map<GlobalMemoryBase, Id> global_buffers;
|
std::map<GlobalMemoryBase, Id> global_buffers;
|
||||||
std::map<u32, TexelBuffer> texel_buffers;
|
std::map<u32, TexelBuffer> uniform_texels;
|
||||||
std::map<u32, SampledImage> sampled_images;
|
std::map<u32, SampledImage> sampled_images;
|
||||||
|
std::map<u32, TexelBuffer> storage_texels;
|
||||||
std::map<u32, StorageImage> images;
|
std::map<u32, StorageImage> images;
|
||||||
|
|
||||||
|
std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
|
||||||
Id instance_index{};
|
Id instance_index{};
|
||||||
Id vertex_index{};
|
Id vertex_index{};
|
||||||
Id base_instance{};
|
Id base_instance{};
|
||||||
Id base_vertex{};
|
Id base_vertex{};
|
||||||
std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
|
|
||||||
Id frag_depth{};
|
Id frag_depth{};
|
||||||
Id frag_coord{};
|
Id frag_coord{};
|
||||||
Id front_facing{};
|
Id front_facing{};
|
||||||
|
@ -3048,13 +3061,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||||
}
|
}
|
||||||
for (const auto& sampler : ir.GetSamplers()) {
|
for (const auto& sampler : ir.GetSamplers()) {
|
||||||
if (sampler.is_buffer) {
|
if (sampler.is_buffer) {
|
||||||
entries.texel_buffers.emplace_back(sampler);
|
entries.uniform_texels.emplace_back(sampler);
|
||||||
} else {
|
} else {
|
||||||
entries.samplers.emplace_back(sampler);
|
entries.samplers.emplace_back(sampler);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const auto& image : ir.GetImages()) {
|
for (const auto& image : ir.GetImages()) {
|
||||||
entries.images.emplace_back(image);
|
if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
|
||||||
|
entries.storage_texels.emplace_back(image);
|
||||||
|
} else {
|
||||||
|
entries.images.emplace_back(image);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (const auto& attribute : ir.GetInputAttributes()) {
|
for (const auto& attribute : ir.GetInputAttributes()) {
|
||||||
if (IsGenericAttribute(attribute)) {
|
if (IsGenericAttribute(attribute)) {
|
||||||
|
|
|
@ -21,8 +21,9 @@ class VKDevice;
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
using TexelBufferEntry = VideoCommon::Shader::Sampler;
|
using UniformTexelEntry = VideoCommon::Shader::Sampler;
|
||||||
using SamplerEntry = VideoCommon::Shader::Sampler;
|
using SamplerEntry = VideoCommon::Shader::Sampler;
|
||||||
|
using StorageTexelEntry = VideoCommon::Shader::Image;
|
||||||
using ImageEntry = VideoCommon::Shader::Image;
|
using ImageEntry = VideoCommon::Shader::Image;
|
||||||
|
|
||||||
constexpr u32 DESCRIPTOR_SET = 0;
|
constexpr u32 DESCRIPTOR_SET = 0;
|
||||||
|
@ -66,13 +67,15 @@ private:
|
||||||
struct ShaderEntries {
|
struct ShaderEntries {
|
||||||
u32 NumBindings() const {
|
u32 NumBindings() const {
|
||||||
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
|
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
|
||||||
texel_buffers.size() + samplers.size() + images.size());
|
uniform_texels.size() + samplers.size() + storage_texels.size() +
|
||||||
|
images.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ConstBufferEntry> const_buffers;
|
std::vector<ConstBufferEntry> const_buffers;
|
||||||
std::vector<GlobalBufferEntry> global_buffers;
|
std::vector<GlobalBufferEntry> global_buffers;
|
||||||
std::vector<TexelBufferEntry> texel_buffers;
|
std::vector<UniformTexelEntry> uniform_texels;
|
||||||
std::vector<SamplerEntry> samplers;
|
std::vector<SamplerEntry> samplers;
|
||||||
|
std::vector<StorageTexelEntry> storage_texels;
|
||||||
std::vector<ImageEntry> images;
|
std::vector<ImageEntry> images;
|
||||||
std::set<u32> attributes;
|
std::set<u32> attributes;
|
||||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||||
|
@ -88,7 +91,8 @@ struct Specialization final {
|
||||||
u32 shared_memory_size{};
|
u32 shared_memory_size{};
|
||||||
|
|
||||||
// Graphics specific
|
// Graphics specific
|
||||||
std::optional<float> point_size{};
|
std::optional<float> point_size;
|
||||||
|
std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
|
||||||
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
|
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
|
||||||
bool ndc_minus_one_to_one{};
|
bool ndc_minus_one_to_one{};
|
||||||
};
|
};
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue