mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2024-11-22 23:02:47 +01:00
MacroHLE: Add MultidrawIndirect HLE Macro.
This commit is contained in:
parent
a12a4f2a13
commit
a5a94f52ff
13 changed files with 169 additions and 47 deletions
|
@ -170,6 +170,9 @@ public:
|
|||
void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
|
||||
bool is_written, bool is_image);
|
||||
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
|
||||
bool synchronize, bool mark_as_written);
|
||||
|
||||
void FlushCachedWrites();
|
||||
|
||||
/// Return true when there are uncommitted buffers to be downloaded
|
||||
|
@ -790,6 +793,25 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add
|
|||
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size,
|
||||
bool synchronize,
|
||||
bool mark_as_written) {
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
return {&slot_buffers[NULL_BUFFER_ID], 0};
|
||||
}
|
||||
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
if (synchronize) {
|
||||
SynchronizeBuffer(buffer, *cpu_addr, size);
|
||||
}
|
||||
if (mark_as_written) {
|
||||
MarkWrittenBuffer(buffer_id, *cpu_addr, size);
|
||||
}
|
||||
return {&buffer, buffer.Offset(*cpu_addr)};
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::FlushCachedWrites() {
|
||||
for (const BufferId buffer_id : cached_write_buffer_ids) {
|
||||
|
|
|
@ -61,7 +61,7 @@ bool DmaPusher::Step() {
|
|||
} else {
|
||||
const CommandListHeader command_list_header{
|
||||
command_list.command_lists[dma_pushbuffer_subindex++]};
|
||||
const GPUVAddr dma_get = command_list_header.addr;
|
||||
dma_state.dma_get = command_list_header.addr;
|
||||
|
||||
if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
|
||||
// We've gone through the current list, remove it from the queue
|
||||
|
@ -75,11 +75,11 @@ bool DmaPusher::Step() {
|
|||
|
||||
// Push buffer non-empty, read a word
|
||||
command_headers.resize_destructive(command_list_header.size);
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
memory_manager.ReadBlock(dma_get, command_headers.data(),
|
||||
if (Settings::IsGPULevelExtreme()) {
|
||||
memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
|
||||
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
}
|
||||
ProcessCommands(command_headers);
|
||||
|
@ -174,8 +174,10 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
|
|||
puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
} else {
|
||||
subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
|
||||
num_methods, dma_state.method_count);
|
||||
auto subchannel = subchannels[dma_state.subchannel];
|
||||
subchannel->current_dma_segment = dma_state.dma_get;
|
||||
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -156,6 +156,7 @@ private:
|
|||
u32 subchannel; ///< Current subchannel
|
||||
u32 method_count; ///< Current method count
|
||||
u32 length_pending; ///< Large NI command length pending
|
||||
GPUVAddr dma_get; ///< Currently read segment
|
||||
bool non_incrementing; ///< Current command's NI flag
|
||||
bool is_last_call;
|
||||
};
|
||||
|
|
|
@ -91,6 +91,16 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
|
|||
ProcessDraw(true, num_instances);
|
||||
}
|
||||
|
||||
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) {
|
||||
const auto& regs{maxwell3d->regs};
|
||||
draw_state.topology = topology;
|
||||
draw_state.index_buffer = regs.index_buffer;
|
||||
draw_state.index_buffer.first = index_first;
|
||||
draw_state.index_buffer.count = index_count;
|
||||
|
||||
ProcessDrawIndirect(true);
|
||||
}
|
||||
|
||||
void DrawManager::SetInlineIndexBuffer(u32 index) {
|
||||
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
|
||||
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
|
||||
|
@ -198,4 +208,15 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
|
|||
maxwell3d->rasterizer->Draw(draw_indexed, instance_count);
|
||||
}
|
||||
}
|
||||
|
||||
void DrawManager::ProcessDrawIndirect(bool draw_indexed) {
|
||||
LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology,
|
||||
draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count);
|
||||
|
||||
UpdateTopology();
|
||||
|
||||
if (maxwell3d->ShouldExecute()) {
|
||||
maxwell3d->rasterizer->DrawIndirect(draw_indexed);
|
||||
}
|
||||
}
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -32,6 +32,13 @@ public:
|
|||
std::vector<u8> inline_index_draw_indexes;
|
||||
};
|
||||
|
||||
struct IndirectParams {
|
||||
GPUVAddr start_address;
|
||||
size_t buffer_size;
|
||||
size_t max_draw_counts;
|
||||
size_t stride;
|
||||
};
|
||||
|
||||
explicit DrawManager(Maxwell3D* maxwell_3d);
|
||||
|
||||
void ProcessMethodCall(u32 method, u32 argument);
|
||||
|
@ -46,10 +53,20 @@ public:
|
|||
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
|
||||
u32 base_instance, u32 num_instances);
|
||||
|
||||
void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count);
|
||||
|
||||
const State& GetDrawState() const {
|
||||
return draw_state;
|
||||
}
|
||||
|
||||
IndirectParams& GetIndirectParams() {
|
||||
return indirect_state;
|
||||
}
|
||||
|
||||
const IndirectParams& GetIndirectParams() const {
|
||||
return indirect_state;
|
||||
}
|
||||
|
||||
private:
|
||||
void SetInlineIndexBuffer(u32 index);
|
||||
|
||||
|
@ -63,7 +80,10 @@ private:
|
|||
|
||||
void ProcessDraw(bool draw_indexed, u32 instance_count);
|
||||
|
||||
void ProcessDrawIndirect(bool draw_indexed);
|
||||
|
||||
Maxwell3D* maxwell3d{};
|
||||
State draw_state{};
|
||||
IndirectParams indirect_state{};
|
||||
};
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -17,6 +17,8 @@ public:
|
|||
/// Write multiple values to the register identified by method.
|
||||
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) = 0;
|
||||
|
||||
GPUVAddr current_dma_segment;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -53,42 +53,43 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
|
|||
|
||||
// Multidraw Indirect
|
||||
void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
||||
SCOPE_EXIT({
|
||||
// Clean everything.
|
||||
maxwell3d.regs.vertex_id_base = 0x0;
|
||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||
maxwell3d.CallMethod(0x8e4, 0x0, true);
|
||||
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
});
|
||||
const u32 start_indirect = parameters[0];
|
||||
const u32 end_indirect = parameters[1];
|
||||
if (start_indirect >= end_indirect) {
|
||||
// Nothing to do.
|
||||
return;
|
||||
}
|
||||
const u32 padding = parameters[3];
|
||||
const std::size_t max_draws = parameters[4];
|
||||
const auto topology =
|
||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
|
||||
const u32 padding = parameters[3]; // padding is in words
|
||||
|
||||
// size of each indirect segment
|
||||
const u32 indirect_words = 5 + padding;
|
||||
const std::size_t first_draw = start_indirect;
|
||||
const std::size_t effective_draws = end_indirect - start_indirect;
|
||||
const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws);
|
||||
|
||||
for (std::size_t index = first_draw; index < last_draw; index++) {
|
||||
const u32 stride = indirect_words * sizeof(u32);
|
||||
const GPUVAddr start_address = maxwell3d.current_dma_segment + 4 * sizeof(u32);
|
||||
const std::size_t draw_count = end_indirect - start_indirect;
|
||||
u32 lowest_first = std::numeric_limits<u32>::max();
|
||||
u32 highest_limit = std::numeric_limits<u32>::min();
|
||||
for (std::size_t index = 0; index < draw_count; index++) {
|
||||
const std::size_t base = index * indirect_words + 5;
|
||||
const u32 base_vertex = parameters[base + 3];
|
||||
const u32 base_instance = parameters[base + 4];
|
||||
maxwell3d.regs.vertex_id_base = base_vertex;
|
||||
const u32 count = parameters[base];
|
||||
const u32 first_index = parameters[base + 2];
|
||||
lowest_first = std::min(lowest_first, first_index);
|
||||
highest_limit = std::max(highest_limit, first_index + count);
|
||||
}
|
||||
|
||||
const u32 base_vertex = parameters[8];
|
||||
const u32 base_instance = parameters[9];
|
||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||
maxwell3d.CallMethod(0x8e4, base_vertex, true);
|
||||
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||
params.start_address = start_address;
|
||||
params.buffer_size = sizeof(u32) + stride * draw_count;
|
||||
params.max_draw_counts = draw_count;
|
||||
params.stride = stride;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.draw_manager->DrawIndex(
|
||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]),
|
||||
parameters[base + 2], parameters[base], base_vertex, base_instance,
|
||||
parameters[base + 1]);
|
||||
}
|
||||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit);
|
||||
}
|
||||
|
||||
// Multi-layer Clear
|
||||
|
|
|
@ -42,6 +42,9 @@ public:
|
|||
/// Dispatches a draw invocation
|
||||
virtual void Draw(bool is_indexed, u32 instance_count) = 0;
|
||||
|
||||
/// Dispatches an indirect draw invocation
|
||||
virtual void DrawIndirect(bool is_indexed) {}
|
||||
|
||||
/// Clear the current framebuffer
|
||||
virtual void Clear(u32 layer_count) = 0;
|
||||
|
||||
|
|
|
@ -180,7 +180,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
|
||||
RasterizerVulkan::~RasterizerVulkan() = default;
|
||||
|
||||
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
|
||||
template <typename Func>
|
||||
void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Drawing);
|
||||
|
||||
SCOPE_EXIT({ gpu.TickWork(); });
|
||||
|
@ -201,6 +202,13 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
|
|||
|
||||
UpdateDynamicStates();
|
||||
|
||||
draw_func();
|
||||
|
||||
EndTransformFeedback();
|
||||
}
|
||||
|
||||
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
|
||||
PrepareDraw(is_indexed, [this, is_indexed, instance_count] {
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
const u32 num_instances{instance_count};
|
||||
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
|
||||
|
@ -214,7 +222,28 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
|
|||
draw_params.base_vertex, draw_params.base_instance);
|
||||
}
|
||||
});
|
||||
EndTransformFeedback();
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::DrawIndirect(bool is_indexed) {
|
||||
PrepareDraw(is_indexed, [this, is_indexed] {
|
||||
const auto params = maxwell3d->draw_manager->GetIndirectParams();
|
||||
const auto [buffer, offset] = buffer_cache.ObtainBuffer(
|
||||
params.start_address, static_cast<u32>(params.buffer_size), true, false);
|
||||
scheduler.Record([buffer_obj = buffer->Handle(), offset,
|
||||
max_draw_counts = params.max_draw_counts, stride = params.stride,
|
||||
is_indexed](vk::CommandBuffer cmdbuf) {
|
||||
if (is_indexed) {
|
||||
cmdbuf.DrawIndexedIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset,
|
||||
static_cast<u32>(max_draw_counts),
|
||||
static_cast<u32>(stride));
|
||||
} else {
|
||||
cmdbuf.DrawIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset,
|
||||
static_cast<u32>(max_draw_counts),
|
||||
static_cast<u32>(stride));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::Clear(u32 layer_count) {
|
||||
|
|
|
@ -65,6 +65,7 @@ public:
|
|||
~RasterizerVulkan() override;
|
||||
|
||||
void Draw(bool is_indexed, u32 instance_count) override;
|
||||
void DrawIndirect(bool is_indexed) override;
|
||||
void Clear(u32 layer_count) override;
|
||||
void DispatchCompute() override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
|
@ -114,6 +115,9 @@ private:
|
|||
|
||||
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
|
||||
|
||||
template <typename Func>
|
||||
void PrepareDraw(bool is_indexed, Func&&);
|
||||
|
||||
void FlushWork();
|
||||
|
||||
void UpdateDynamicStates();
|
||||
|
|
|
@ -350,7 +350,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
|||
.sampleRateShading = true,
|
||||
.dualSrcBlend = true,
|
||||
.logicOp = true,
|
||||
.multiDrawIndirect = false,
|
||||
.multiDrawIndirect = true,
|
||||
.drawIndirectFirstInstance = false,
|
||||
.depthClamp = true,
|
||||
.depthBiasClamp = true,
|
||||
|
|
|
@ -94,6 +94,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
|||
X(vkCmdDispatch);
|
||||
X(vkCmdDraw);
|
||||
X(vkCmdDrawIndexed);
|
||||
X(vkCmdDrawIndirectCount);
|
||||
X(vkCmdDrawIndexedIndirectCount);
|
||||
X(vkCmdEndQuery);
|
||||
X(vkCmdEndRenderPass);
|
||||
X(vkCmdEndTransformFeedbackEXT);
|
||||
|
|
|
@ -213,6 +213,8 @@ struct DeviceDispatch : InstanceDispatch {
|
|||
PFN_vkCmdDispatch vkCmdDispatch{};
|
||||
PFN_vkCmdDraw vkCmdDraw{};
|
||||
PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
|
||||
PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{};
|
||||
PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{};
|
||||
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
|
||||
PFN_vkCmdEndQuery vkCmdEndQuery{};
|
||||
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
|
||||
|
@ -1019,6 +1021,19 @@ public:
|
|||
first_instance);
|
||||
}
|
||||
|
||||
void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer,
|
||||
VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept {
|
||||
dld->vkCmdDrawIndirectCount(handle, src_buffer, src_offset, count_buffer, count_offset,
|
||||
draw_count, stride);
|
||||
}
|
||||
|
||||
void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset,
|
||||
VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count,
|
||||
u32 stride) const noexcept {
|
||||
dld->vkCmdDrawIndexedIndirectCount(handle, src_buffer, src_offset, count_buffer,
|
||||
count_offset, draw_count, stride);
|
||||
}
|
||||
|
||||
void ClearAttachments(Span<VkClearAttachment> attachments,
|
||||
Span<VkClearRect> rects) const noexcept {
|
||||
dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(),
|
||||
|
|
Loading…
Reference in a new issue