renderer_software: Move memory access out of the raster loop

* Profiling shows this has a significant impact
This commit is contained in:
GPUCode 2023-07-16 03:36:47 +03:00
parent 531d280461
commit 8012b28b92
3 changed files with 39 additions and 34 deletions

View file

@ -41,10 +41,22 @@ Framebuffer::Framebuffer(Memory::MemorySystem& memory_, const Pica::FramebufferR
Framebuffer::~Framebuffer() = default; Framebuffer::~Framebuffer() = default;
void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const { void Framebuffer::Bind() {
const auto& framebuffer = regs.framebuffer; PAddr addr = regs.framebuffer.GetColorBufferPhysicalAddress();
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); if (color_addr != addr) [[unlikely]] {
color_addr = addr;
color_buffer = memory.GetPhysicalPointer(color_addr);
}
addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
if (depth_addr != addr) [[unlikely]] {
depth_addr = addr;
depth_buffer = memory.GetPhysicalPointer(depth_addr);
}
}
void Framebuffer::DrawPixel(u32 x, u32 y, const Common::Vec4<u8>& color) const {
const auto& framebuffer = regs.framebuffer;
// Similarly to textures, the render framebuffer is laid out from bottom to top, too. // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
// NOTE: The framebuffer height register contains the actual FB height minus one. // NOTE: The framebuffer height register contains the actual FB height minus one.
y = framebuffer.height - y; y = framebuffer.height - y;
@ -54,8 +66,7 @@ void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const {
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
coarse_y * framebuffer.width * bytes_per_pixel; coarse_y * framebuffer.width * bytes_per_pixel;
u8* depth_buffer = memory.GetPhysicalPointer(addr); u8* dst_pixel = color_buffer + dst_offset;
u8* dst_pixel = depth_buffer + dst_offset;
switch (framebuffer.color_format) { switch (framebuffer.color_format) {
case FramebufferRegs::ColorFormat::RGBA8: case FramebufferRegs::ColorFormat::RGBA8:
@ -80,10 +91,8 @@ void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const {
} }
} }
const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const { const Common::Vec4<u8> Framebuffer::GetPixel(u32 x, u32 y) const {
const auto& framebuffer = regs.framebuffer; const auto& framebuffer = regs.framebuffer;
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
y = framebuffer.height - y; y = framebuffer.height - y;
const u32 coarse_y = y & ~7; const u32 coarse_y = y & ~7;
@ -91,7 +100,6 @@ const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const {
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
coarse_y * framebuffer.width * bytes_per_pixel; coarse_y * framebuffer.width * bytes_per_pixel;
const u8* color_buffer = memory.GetPhysicalPointer(addr);
const u8* src_pixel = color_buffer + src_offset; const u8* src_pixel = color_buffer + src_offset;
switch (framebuffer.color_format) { switch (framebuffer.color_format) {
@ -114,10 +122,8 @@ const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const {
return {0, 0, 0, 0}; return {0, 0, 0, 0};
} }
u32 Framebuffer::GetDepth(int x, int y) const { u32 Framebuffer::GetDepth(u32 x, u32 y) const {
const auto& framebuffer = regs.framebuffer; const auto& framebuffer = regs.framebuffer;
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
y = framebuffer.height - y; y = framebuffer.height - y;
const u32 coarse_y = y & ~7; const u32 coarse_y = y & ~7;
@ -125,7 +131,6 @@ u32 Framebuffer::GetDepth(int x, int y) const {
const u32 stride = framebuffer.width * bytes_per_pixel; const u32 stride = framebuffer.width * bytes_per_pixel;
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
const u8* depth_buffer = memory.GetPhysicalPointer(addr);
const u8* src_pixel = depth_buffer + src_offset; const u8* src_pixel = depth_buffer + src_offset;
switch (framebuffer.depth_format) { switch (framebuffer.depth_format) {
@ -143,10 +148,8 @@ u32 Framebuffer::GetDepth(int x, int y) const {
} }
} }
u8 Framebuffer::GetStencil(int x, int y) const { u8 Framebuffer::GetStencil(u32 x, u32 y) const {
const auto& framebuffer = regs.framebuffer; const auto& framebuffer = regs.framebuffer;
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
y = framebuffer.height - y; y = framebuffer.height - y;
const u32 coarse_y = y & ~7; const u32 coarse_y = y & ~7;
@ -154,7 +157,6 @@ u8 Framebuffer::GetStencil(int x, int y) const {
const u32 stride = framebuffer.width * bytes_per_pixel; const u32 stride = framebuffer.width * bytes_per_pixel;
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
const u8* depth_buffer = memory.GetPhysicalPointer(addr);
const u8* src_pixel = depth_buffer + src_offset; const u8* src_pixel = depth_buffer + src_offset;
switch (framebuffer.depth_format) { switch (framebuffer.depth_format) {
@ -169,10 +171,8 @@ u8 Framebuffer::GetStencil(int x, int y) const {
} }
} }
void Framebuffer::SetDepth(int x, int y, u32 value) const { void Framebuffer::SetDepth(u32 x, u32 y, u32 value) const {
const auto& framebuffer = regs.framebuffer; const auto& framebuffer = regs.framebuffer;
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
y = framebuffer.height - y; y = framebuffer.height - y;
const u32 coarse_y = y & ~7; const u32 coarse_y = y & ~7;
@ -180,7 +180,6 @@ void Framebuffer::SetDepth(int x, int y, u32 value) const {
const u32 stride = framebuffer.width * bytes_per_pixel; const u32 stride = framebuffer.width * bytes_per_pixel;
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
u8* depth_buffer = memory.GetPhysicalPointer(addr);
u8* dst_pixel = depth_buffer + dst_offset; u8* dst_pixel = depth_buffer + dst_offset;
switch (framebuffer.depth_format) { switch (framebuffer.depth_format) {
@ -201,10 +200,8 @@ void Framebuffer::SetDepth(int x, int y, u32 value) const {
} }
} }
void Framebuffer::SetStencil(int x, int y, u8 value) const { void Framebuffer::SetStencil(u32 x, u32 y, u8 value) const {
const auto& framebuffer = regs.framebuffer; const auto& framebuffer = regs.framebuffer;
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
y = framebuffer.height - y; y = framebuffer.height - y;
const u32 coarse_y = y & ~7; const u32 coarse_y = y & ~7;
@ -212,7 +209,6 @@ void Framebuffer::SetStencil(int x, int y, u8 value) const {
const u32 stride = framebuffer.width * bytes_per_pixel; const u32 stride = framebuffer.width * bytes_per_pixel;
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
u8* depth_buffer = memory.GetPhysicalPointer(addr);
u8* dst_pixel = depth_buffer + dst_offset; u8* dst_pixel = depth_buffer + dst_offset;
switch (framebuffer.depth_format) { switch (framebuffer.depth_format) {
@ -231,7 +227,7 @@ void Framebuffer::SetStencil(int x, int y, u8 value) const {
} }
} }
void Framebuffer::DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil) const { void Framebuffer::DrawShadowMapPixel(u32 x, u32 y, u32 depth, u8 stencil) const {
const auto& framebuffer = regs.framebuffer; const auto& framebuffer = regs.framebuffer;
const auto& shadow = regs.shadow; const auto& shadow = regs.shadow;
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();

View file

@ -23,30 +23,37 @@ public:
explicit Framebuffer(Memory::MemorySystem& memory, const Pica::FramebufferRegs& framebuffer); explicit Framebuffer(Memory::MemorySystem& memory, const Pica::FramebufferRegs& framebuffer);
~Framebuffer(); ~Framebuffer();
/// Updates the framebuffer addresses from the PICA registers.
void Bind();
/// Draws a pixel at the specified coordinates. /// Draws a pixel at the specified coordinates.
void DrawPixel(int x, int y, const Common::Vec4<u8>& color) const; void DrawPixel(u32 x, u32 y, const Common::Vec4<u8>& color) const;
/// Returns the current color at the specified coordinates. /// Returns the current color at the specified coordinates.
[[nodiscard]] const Common::Vec4<u8> GetPixel(int x, int y) const; [[nodiscard]] const Common::Vec4<u8> GetPixel(u32 x, u32 y) const;
/// Returns the depth value at the specified coordinates. /// Returns the depth value at the specified coordinates.
[[nodiscard]] u32 GetDepth(int x, int y) const; [[nodiscard]] u32 GetDepth(u32 x, u32 y) const;
/// Returns the stencil value at the specified coordinates. /// Returns the stencil value at the specified coordinates.
[[nodiscard]] u8 GetStencil(int x, int y) const; [[nodiscard]] u8 GetStencil(u32 x, u32 y) const;
/// Stores the provided depth value at the specified coordinates. /// Stores the provided depth value at the specified coordinates.
void SetDepth(int x, int y, u32 value) const; void SetDepth(u32 x, u32 y, u32 value) const;
/// Stores the provided stencil value at the specified coordinates. /// Stores the provided stencil value at the specified coordinates.
void SetStencil(int x, int y, u8 value) const; void SetStencil(u32 x, u32 y, u8 value) const;
/// Draws a pixel to the shadow buffer. /// Draws a pixel to the shadow buffer.
void DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil) const; void DrawShadowMapPixel(u32 x, u32 y, u32 depth, u8 stencil) const;
private: private:
Memory::MemorySystem& memory; Memory::MemorySystem& memory;
const Pica::FramebufferRegs& regs; const Pica::FramebufferRegs& regs;
PAddr color_addr;
u8* color_buffer{};
PAddr depth_addr;
u8* depth_buffer{};
}; };
u8 PerformStencilAction(Pica::FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref); u8 PerformStencilAction(Pica::FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref);

View file

@ -295,9 +295,11 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
const auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); const auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
auto textures = regs.texturing.GetTextures(); const auto textures = regs.texturing.GetTextures();
const auto tev_stages = regs.texturing.GetTevStages(); const auto tev_stages = regs.texturing.GetTevStages();
fb.Bind();
// Enter rasterization loop, starting at the center of the topleft bounding box corner. // Enter rasterization loop, starting at the center of the topleft bounding box corner.
// TODO: Not sure if looping through x first might be faster // TODO: Not sure if looping through x first might be faster
for (u16 y = min_y + 8; y < max_y; y += 0x10) { for (u16 y = min_y + 8; y < max_y; y += 0x10) {