mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2025-01-10 17:51:01 +01:00
gl_arb_decompiler: Implement robust buffer operations
This emulates the behavior we get on GLSL with regular SSBOs with a pointer + length pair. It aims to be consistent with the crashes we might get. Out of bounds stores are ignored. Atomics are ignored and return zero. Reads return zero.
This commit is contained in:
parent
e7a26ecec5
commit
f21a189148
3 changed files with 54 additions and 33 deletions
|
@ -376,9 +376,11 @@ private:
|
||||||
std::string temporary = AllocTemporary();
|
std::string temporary = AllocTemporary();
|
||||||
std::string address;
|
std::string address;
|
||||||
std::string_view opname;
|
std::string_view opname;
|
||||||
|
bool robust = false;
|
||||||
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
||||||
address = GlobalMemoryPointer(*gmem);
|
address = GlobalMemoryPointer(*gmem);
|
||||||
opname = "ATOM";
|
opname = "ATOM";
|
||||||
|
robust = true;
|
||||||
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
||||||
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
|
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
|
||||||
opname = "ATOMS";
|
opname = "ATOMS";
|
||||||
|
@ -386,7 +388,15 @@ private:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return "{0, 0, 0, 0}";
|
return "{0, 0, 0, 0}";
|
||||||
}
|
}
|
||||||
|
if (robust) {
|
||||||
|
AddLine("IF NE.x;");
|
||||||
|
}
|
||||||
AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
|
AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
|
||||||
|
if (robust) {
|
||||||
|
AddLine("ELSE;");
|
||||||
|
AddLine("MOV.S {}, 0;", temporary);
|
||||||
|
AddLine("ENDIF;");
|
||||||
|
}
|
||||||
return temporary;
|
return temporary;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARBDecompiler::DeclareGlobalMemory() {
|
void ARBDecompiler::DeclareGlobalMemory() {
|
||||||
const std::size_t num_entries = ir.GetGlobalMemory().size();
|
const size_t num_entries = ir.GetGlobalMemory().size();
|
||||||
if (num_entries > 0) {
|
if (num_entries > 0) {
|
||||||
const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
|
AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
|
||||||
AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) {
|
||||||
|
|
||||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||||
std::string temporary = AllocTemporary();
|
std::string temporary = AllocTemporary();
|
||||||
AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
|
AddLine("MOV {}, 0;", temporary);
|
||||||
|
AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
|
||||||
return temporary;
|
return temporary;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1441,18 +1451,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
|
std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
|
||||||
|
// Read a bindless SSBO, return its address and set CC accordingly
|
||||||
|
// address = c[binding].xy
|
||||||
|
// length = c[binding].z
|
||||||
const u32 binding = global_memory_names.at(gmem.GetDescriptor());
|
const u32 binding = global_memory_names.at(gmem.GetDescriptor());
|
||||||
const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
|
|
||||||
|
|
||||||
const std::string pointer = AllocLongVectorTemporary();
|
const std::string pointer = AllocLongVectorTemporary();
|
||||||
std::string temporary = AllocTemporary();
|
std::string temporary = AllocTemporary();
|
||||||
|
|
||||||
const u32 local_index = binding / 2;
|
AddLine("PK64.U {}, c[{}];", pointer, binding);
|
||||||
AddLine("PK64.U {}, c[{}];", pointer, local_index);
|
|
||||||
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
|
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
|
||||||
Visit(gmem.GetBaseAddress()));
|
Visit(gmem.GetBaseAddress()));
|
||||||
AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
|
AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
|
||||||
AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
|
AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
|
||||||
|
// Compare offset to length and set CC
|
||||||
|
AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
|
||||||
return fmt::format("{}.x", pointer);
|
return fmt::format("{}.x", pointer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1552,7 +1565,9 @@ std::string ARBDecompiler::Assign(Operation operation) {
|
||||||
ResetTemporaries();
|
ResetTemporaries();
|
||||||
return {};
|
return {};
|
||||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||||
|
AddLine("IF NE.x;");
|
||||||
AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
|
AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
|
||||||
|
AddLine("ENDIF;");
|
||||||
ResetTemporaries();
|
ResetTemporaries();
|
||||||
return {};
|
return {};
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -139,16 +139,12 @@ void oglEnable(GLenum cap, bool state) {
|
||||||
(state ? glEnable : glDisable)(cap);
|
(state ? glEnable : glDisable)(cap);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
|
void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
|
||||||
if (num_entries == 0) {
|
if (num_ssbos == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (num_entries % 2 == 1) {
|
glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
|
||||||
pointers[num_entries] = 0;
|
reinterpret_cast<const GLuint*>(ssbos));
|
||||||
}
|
|
||||||
const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
|
|
||||||
glProgramLocalParametersI4uivNV(target, 0, num_vectors,
|
|
||||||
reinterpret_cast<const GLuint*>(pointers));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
@ -900,11 +896,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||||
static constexpr std::array PARAMETER_LUT = {
|
static constexpr std::array PARAMETER_LUT{
|
||||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
|
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
|
||||||
|
};
|
||||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||||
const auto& stages = maxwell3d.state.shader_stages;
|
const auto& stages = maxwell3d.state.shader_stages;
|
||||||
const auto& shader_stage = stages[stage_index];
|
const auto& shader_stage = stages[stage_index];
|
||||||
|
@ -1007,8 +1003,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
|
||||||
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
|
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
|
||||||
const auto& entries{shader->GetEntries().global_memory_entries};
|
const auto& entries{shader->GetEntries().global_memory_entries};
|
||||||
|
|
||||||
std::array<GLuint64EXT, 32> pointers;
|
std::array<BindlessSSBO, 32> ssbos;
|
||||||
ASSERT(entries.size() < pointers.size());
|
ASSERT(entries.size() < ssbos.size());
|
||||||
|
|
||||||
const bool assembly_shaders = device.UseAssemblyShaders();
|
const bool assembly_shaders = device.UseAssemblyShaders();
|
||||||
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
||||||
|
@ -1016,11 +1012,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
|
||||||
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
||||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||||
++binding;
|
++binding;
|
||||||
}
|
}
|
||||||
if (assembly_shaders) {
|
if (assembly_shaders) {
|
||||||
UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
|
UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1028,29 +1024,32 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||||
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
|
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
|
||||||
const auto& entries{kernel->GetEntries().global_memory_entries};
|
const auto& entries{kernel->GetEntries().global_memory_entries};
|
||||||
|
|
||||||
std::array<GLuint64EXT, 32> pointers;
|
std::array<BindlessSSBO, 32> ssbos;
|
||||||
ASSERT(entries.size() < pointers.size());
|
ASSERT(entries.size() < ssbos.size());
|
||||||
|
|
||||||
u32 binding = 0;
|
u32 binding = 0;
|
||||||
for (const auto& entry : entries) {
|
for (const auto& entry : entries) {
|
||||||
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
||||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||||
++binding;
|
++binding;
|
||||||
}
|
}
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
|
UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||||
GPUVAddr gpu_addr, std::size_t size,
|
GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
|
||||||
GLuint64EXT* pointer) {
|
const size_t alignment{device.GetShaderStorageBufferAlignment()};
|
||||||
const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
|
|
||||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
*pointer = info.address + info.offset;
|
*ssbo = BindlessSSBO{
|
||||||
|
.address = static_cast<GLuint64EXT>(info.address + info.offset),
|
||||||
|
.length = static_cast<GLsizei>(size),
|
||||||
|
.padding = 0,
|
||||||
|
};
|
||||||
} else {
|
} else {
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||||
static_cast<GLsizeiptr>(size));
|
static_cast<GLsizeiptr>(size));
|
||||||
|
|
|
@ -53,6 +53,13 @@ namespace OpenGL {
|
||||||
struct ScreenInfo;
|
struct ScreenInfo;
|
||||||
struct DrawParameters;
|
struct DrawParameters;
|
||||||
|
|
||||||
|
struct BindlessSSBO {
|
||||||
|
GLuint64EXT address;
|
||||||
|
GLsizei length;
|
||||||
|
GLsizei padding;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
|
||||||
|
|
||||||
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
|
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
|
||||||
public:
|
public:
|
||||||
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
|
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
|
||||||
|
@ -126,7 +133,7 @@ private:
|
||||||
|
|
||||||
/// Configures a global memory buffer.
|
/// Configures a global memory buffer.
|
||||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||||
std::size_t size, GLuint64EXT* pointer);
|
size_t size, BindlessSSBO* ssbo);
|
||||||
|
|
||||||
/// Configures the current textures to use for the draw command.
|
/// Configures the current textures to use for the draw command.
|
||||||
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
||||||
|
|
Loading…
Reference in a new issue