diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 581bbba46..7353f2c10 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -95,6 +95,8 @@ add_library(video_core STATIC renderer_software/sw_proctex.h renderer_software/sw_rasterizer.cpp renderer_software/sw_rasterizer.h + renderer_software/sw_tev_jit.cpp + renderer_software/sw_tev_jit.h renderer_software/sw_texturing.cpp renderer_software/sw_texturing.h renderer_vulkan/pica_to_vk.h diff --git a/src/video_core/renderer_software/renderer_software.cpp b/src/video_core/renderer_software/renderer_software.cpp index 7194be8e1..c1f752a14 100644 --- a/src/video_core/renderer_software/renderer_software.cpp +++ b/src/video_core/renderer_software/renderer_software.cpp @@ -8,6 +8,7 @@ #include "core/hw/hw.h" #include "core/hw/lcd.h" #include "video_core/renderer_software/renderer_software.h" +#include "video_core/renderer_software/sw_rasterizer.h" namespace SwRenderer { @@ -17,6 +18,10 @@ RendererSoftware::RendererSoftware(Core::System& system, Frontend::EmuWindow& wi RendererSoftware::~RendererSoftware() = default; +VideoCore::RasterizerInterface* RendererSoftware::Rasterizer() const { + return rasterizer.get(); +} + void RendererSoftware::SwapBuffers() { PrepareRenderTarget(); EndFrame(); diff --git a/src/video_core/renderer_software/renderer_software.h b/src/video_core/renderer_software/renderer_software.h index 6ed86ffa3..963996cd3 100644 --- a/src/video_core/renderer_software/renderer_software.h +++ b/src/video_core/renderer_software/renderer_software.h @@ -5,7 +5,6 @@ #pragma once #include "video_core/renderer_base.h" -#include "video_core/renderer_software/sw_rasterizer.h" namespace Core { class System; @@ -19,19 +18,18 @@ struct ScreenInfo { std::vector pixels; }; +class RasterizerSoftware; + class RendererSoftware : public VideoCore::RendererBase { public: explicit RendererSoftware(Core::System& system, Frontend::EmuWindow& window); ~RendererSoftware() override; - [[nodiscard]] VideoCore::RasterizerInterface* Rasterizer() const override { - return rasterizer.get(); - } - [[nodiscard]] const ScreenInfo& Screen(VideoCore::ScreenId id) const noexcept { return screen_infos[static_cast(id)]; } + VideoCore::RasterizerInterface* Rasterizer() const override; void SwapBuffers() override; void TryPresent(int timeout_ms, bool is_secondary) override {} void Sync() override {} diff --git a/src/video_core/renderer_software/sw_rasterizer.cpp b/src/video_core/renderer_software/sw_rasterizer.cpp index 9c559fc85..b3c6fea3f 100644 --- a/src/video_core/renderer_software/sw_rasterizer.cpp +++ b/src/video_core/renderer_software/sw_rasterizer.cpp @@ -297,9 +297,24 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con const auto textures = regs.texturing.GetTextures(); const auto tev_stages = regs.texturing.GetTevStages(); + for (u32 i = 0; i < texture_data.size(); i++) { + const PAddr addr = textures[i].config.GetPhysicalAddress(); + if (addr) { + texture_data[i] = memory.GetPhysicalPointer(addr); + } + } fb.Bind(); + if (use_jit) { + const TevConfigKey key{regs.texturing}; + auto [it, new_fun] = tev_cache.try_emplace(key.Hash()); + if (new_fun) { + it->second = std::make_unique(regs, key); + } + tev_config = it->second.get(); + } + // Enter rasterization loop, starting at the center of the topleft bounding box corner. // TODO: Not sure if looping through x first might be faster for (u16 y = min_y + 8; y < max_y; y += 0x10) { @@ -408,7 +423,7 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con // Sample bound texture units. const f24 tc0_w = get_interpolated_attribute(v0.tc0_w, v1.tc0_w, v2.tc0_w); - const auto texture_color = TextureColor(uv, textures, tc0_w); + auto texture_color = TextureColor(uv, textures, tc0_w); Common::Vec4 primary_fragment_color = {0, 0, 0, 0}; Common::Vec4 secondary_fragment_color = {0, 0, 0, 0}; @@ -556,11 +571,10 @@ std::array, 4> RasterizerSoftware::TextureColor( t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); - const u8* texture_data = memory.GetPhysicalPointer(texture_address); const auto info = TextureInfo::FromPicaRegister(texture.config, texture.format); // TODO: Apply the min and mag filters to the texture - texture_color[i] = LookupTexture(texture_data, s, t, info); + texture_color[i] = LookupTexture(texture_data[i], s, t, info); } if (i == 0 && (texture.config.type == TexturingRegs::TextureConfig::Shadow2D || @@ -681,10 +695,20 @@ Common::Vec4 RasterizerSoftware::PixelColor(u16 x, u16 y) const { } void RasterizerSoftware::WriteTevConfig( - std::span, 4> texture_color, + std::span, 4> texture_color, std::span tev_stages, Common::Vec4 primary_color, Common::Vec4 primary_fragment_color, Common::Vec4 secondary_fragment_color) { + +#if CITRA_ARCH(x86_64) + if (use_jit) { + const u32 tev_combiner_buffer_color = regs.texturing.tev_combiner_buffer_color.raw; + combiner_output = tev_config->Run(texture_color, primary_color, primary_fragment_color, + secondary_fragment_color, tev_combiner_buffer_color); + return; + } +#endif + /** * Texture environment - consists of 6 stages of color and alpha combining. * Color combiners take three input color values from some source (e.g. interpolated @@ -748,6 +772,7 @@ void RasterizerSoftware::WriteTevConfig( GetColorModifier(tev_stage.color_modifier2, get_source(tev_stage.color_source2)), GetColorModifier(tev_stage.color_modifier3, get_source(tev_stage.color_source3)), }; + const Common::Vec3 color_output = ColorCombine(tev_stage.color_op, color_result); u8 alpha_output; diff --git a/src/video_core/renderer_software/sw_rasterizer.h b/src/video_core/renderer_software/sw_rasterizer.h index 28b68263d..0ab075b75 100644 --- a/src/video_core/renderer_software/sw_rasterizer.h +++ b/src/video_core/renderer_software/sw_rasterizer.h @@ -4,13 +4,20 @@ #pragma once +#include #include +#include + +#include "common/arch.h" #include "common/thread_worker.h" #include "video_core/rasterizer_interface.h" -#include "video_core/regs_texturing.h" #include "video_core/renderer_software/sw_clipper.h" #include "video_core/renderer_software/sw_framebuffer.h" +#if CITRA_ARCH(x86_64) +#include "video_core/renderer_software/sw_tev_jit.h" +#endif + namespace Pica::Shader { struct OutputVertex; } @@ -55,7 +62,7 @@ private: Common::Vec4 PixelColor(u16 x, u16 y) const; /// Emulates the TEV configuration and returns the combiner output. - void WriteTevConfig(std::span, 4> texture_color, + void WriteTevConfig(std::span, 4> texture_color, std::span tev_stages, Common::Vec4 primary_color, Common::Vec4 primary_fragment_color, Common::Vec4 secondary_fragment_color); @@ -73,9 +80,13 @@ private: Memory::MemorySystem& memory; Pica::State& state; const Pica::Regs& regs; + bool use_jit{true}; size_t num_sw_threads; Common::ThreadWorker sw_workers; Framebuffer fb; + TevCache tev_cache; + TevConfig* tev_config{}; + std::array texture_data{}; }; } // namespace SwRenderer diff --git a/src/video_core/renderer_software/sw_tev_jit.cpp b/src/video_core/renderer_software/sw_tev_jit.cpp new file mode 100644 index 000000000..1c8ea6ae9 --- /dev/null +++ b/src/video_core/renderer_software/sw_tev_jit.cpp @@ -0,0 +1,473 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/x64/xbyak_abi.h" +#include "video_core/regs.h" +#include "video_core/renderer_software/sw_tev_jit.h" + +namespace SwRenderer { + +namespace { + +using namespace Common::X64; +using namespace Xbyak::util; +using Pica::TexturingRegs; +using Xbyak::Reg32; +using Xbyak::Reg64; +using Xbyak::Xmm; +using TevStageConfig = Pica::TexturingRegs::TevStageConfig; + +constexpr Reg32 A0 = r11d; +constexpr Reg32 A1 = r12d; +constexpr Reg32 A2 = r13d; +constexpr Reg32 ALPHA_OUTPUT = r14d; +constexpr Xmm COMBINER_OUTPUT = xmm0; +constexpr Xmm COMBINER_BUFFER = xmm1; +constexpr Xmm NEXT_COMBINER_BUFFER = xmm2; +constexpr Xmm VEC0 = xmm3; +constexpr Xmm VEC1 = xmm4; +constexpr Xmm VEC2 = xmm5; +constexpr Xmm COLOR_OUTPUT = xmm6; +constexpr Xmm ZERO = xmm13; +constexpr Xmm MID_COLOR = xmm14; +constexpr Xmm MAX_COLOR = xmm15; + +bool IsPassThroughTevStage(const TevStageConfig& stage) { + return (stage.color_op == TevStageConfig::Operation::Replace && + stage.alpha_op == TevStageConfig::Operation::Replace && + stage.color_source1 == TevStageConfig::Source::Previous && + stage.alpha_source1 == TevStageConfig::Source::Previous && + stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && + stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && + stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); +} + +} // Anonymous namespace + +TevConfigKey::TevConfigKey(const Pica::TexturingRegs& regs) { + const auto& tev_stages = regs.GetTevStages(); + for (size_t i = 0; i < tev_stages.size(); i++) { + const auto& tev_stage = tev_stages[i]; + stages[i].sources_raw = tev_stage.sources_raw; + stages[i].modifiers_raw = tev_stage.modifiers_raw; + stages[i].ops_raw = tev_stage.ops_raw; + stages[i].const_color = tev_stage.const_color; + stages[i].scales_raw = tev_stage.scales_raw; + } +} + +TevConfig::TevConfig(const Pica::Regs& regs_, const TevConfigKey& key) : regs{regs_} { + WriteTevConfig(key); +} + +TevConfig::~TevConfig() = default; + +Common::Vec4 TevConfig::Run(std::span, 4> texture_color_, + Common::Vec4 primary_color_, + Common::Vec4 primary_fragment_color_, + Common::Vec4 secondary_fragment_color_, + u64 tev_combiner_buffer_color) { + u32* texture_color = reinterpret_cast(texture_color_.data()); + const u32 primary_color = std::bit_cast(primary_color_); + const u32 primary_fragment_color = std::bit_cast(primary_fragment_color_); + const u32 secondary_fragment_color = std::bit_cast(secondary_fragment_color_); + const u64 secondary_fragment_color_and_tev_combiner_buffer_color = + secondary_fragment_color | (tev_combiner_buffer_color << 32); + const u32 result = program(texture_color, primary_color, primary_fragment_color, + secondary_fragment_color_and_tev_combiner_buffer_color); + return std::bit_cast>(result); +} + +void TevConfig::WriteTevConfig(const TevConfigKey& key) { + program = (CompiledTevFun*)getCurr(); + + constexpr Xbyak::Reg TEXTURE_COLOR = ABI_PARAM1; + constexpr Xbyak::Reg PRIMARY_COLOR = ABI_PARAM2; + constexpr Xbyak::Reg PRIMARY_FRAGMENT_COLOR = ABI_PARAM3; + constexpr Xbyak::Reg SECONDARY_FRAGMENT_COLOR = ABI_PARAM4; + + // Save calle state + ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16); + + // Clear the combiner registers and zero constant + pxor(COMBINER_OUTPUT, COMBINER_OUTPUT); + pxor(COMBINER_BUFFER, COMBINER_BUFFER); + pxor(ZERO, ZERO); + + // Used to set an xmm register to the max color + static const __m128i max = _mm_set1_epi32(255); + mov(rax, reinterpret_cast(&max)); + movdqu(MAX_COLOR, xword[rax]); + + // Used to set an xmm register to the mid color + static const __m128i mid = _mm_set1_epi32(128); + mov(rax, reinterpret_cast(&mid)); + movdqu(MID_COLOR, xword[rax]); + + // Load next_combiner_buffer + mov(rax, ABI_PARAM4); + shr(rax, 32); + vmovd(NEXT_COMBINER_BUFFER, eax); + pmovzxbd(NEXT_COMBINER_BUFFER, NEXT_COMBINER_BUFFER); + + for (u32 tev_stage_index = 0; tev_stage_index < key.stages.size(); ++tev_stage_index) { + const auto& tev_stage = key.stages[tev_stage_index]; + if (!IsPassThroughTevStage(tev_stage)) { + using Source = TexturingRegs::TevStageConfig::Source; + + const auto get_source = [&](const Xbyak::Xmm& dest, Source source) { + switch (source) { + case Source::PrimaryColor: + vmovd(dest, PRIMARY_COLOR.cvt32()); + pmovzxbd(dest, dest); + break; + case Source::PrimaryFragmentColor: + vmovd(dest, PRIMARY_FRAGMENT_COLOR.cvt32()); + pmovzxbd(dest, dest); + break; + case Source::SecondaryFragmentColor: + vmovd(dest, SECONDARY_FRAGMENT_COLOR.cvt32()); + pmovzxbd(dest, dest); + break; + case Source::Texture0: + case Source::Texture1: + case Source::Texture2: + case Source::Texture3: { + const u32 index = static_cast(source) - static_cast(Source::Texture0); + vmovd(dest, dword[TEXTURE_COLOR + index * sizeof(u32)]); + pmovzxbd(dest, dest); + break; + } + case Source::PreviousBuffer: + vmovdqa(dest, COMBINER_BUFFER); + break; + case Source::Constant: + mov(eax, tev_stage.const_color); + vmovd(dest, eax); + pmovzxbd(dest, dest); + break; + case Source::Previous: + vmovdqa(dest, COMBINER_OUTPUT); + break; + default: + LOG_ERROR(HW_GPU, "Unknown color combiner source {}", source); + UNIMPLEMENTED(); + vmovdqa(dest, ZERO); + } + return dest; + }; + + // Load the color modifiers to VEC0/1/2. + GetColorModifier(get_source(VEC0, tev_stage.color_source1), tev_stage.color_modifier1); + GetColorModifier(get_source(VEC1, tev_stage.color_source2), tev_stage.color_modifier2); + GetColorModifier(get_source(VEC2, tev_stage.color_source3), tev_stage.color_modifier3); + + // Combine the texture colors to COLOR_OUTPUT. + ColorCombine(COLOR_OUTPUT, tev_stage.color_op); + + if (tev_stage.color_op == TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { + // Result of Dot3_RGBA operation is also placed to the alpha component + vmovd(ALPHA_OUTPUT.cvt32(), COLOR_OUTPUT); + } else { + // Load the alpha modifers to VEC0/1/2. + GetAlphaModifier(get_source(VEC0, tev_stage.alpha_source1), A0, + tev_stage.alpha_modifier1); + GetAlphaModifier(get_source(VEC1, tev_stage.alpha_source2), A1, + tev_stage.alpha_modifier2); + GetAlphaModifier(get_source(VEC2, tev_stage.alpha_source3), A2, + tev_stage.alpha_modifier3); + + // Combine the alpha values to ALPHA_OUTPUT. + AlphaCombine(ALPHA_OUTPUT, tev_stage.alpha_op); + } + + // Load the color multipler to an SSE vector. + mov(eax, tev_stage.GetColorMultiplier()); + movd(VEC0, eax); + pshufd(VEC0, VEC0, 0); + + // Multiply color output with the multiplier and take the minimum. + pmulld(COLOR_OUTPUT, VEC0); + pminsd(COLOR_OUTPUT, MAX_COLOR); + + // Load the alpha multiplier, multiply it with the alpha output. + mov(eax, tev_stage.GetAlphaMultiplier()); + imul(ALPHA_OUTPUT, eax); + + // Load result to a vector and take the minimum + movd(VEC0, ALPHA_OUTPUT); + pshufd(VEC0, VEC0, 0); + pminsd(VEC0, MAX_COLOR); + + // Blend vectors to get the combiner output + vpblendd(COMBINER_OUTPUT, COLOR_OUTPUT, VEC0, 0b1000); + } + + // Set combiner buffer to the next buffer + movq(COMBINER_BUFFER, NEXT_COMBINER_BUFFER); + + if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor( + tev_stage_index)) { + vpblendd(NEXT_COMBINER_BUFFER, COMBINER_OUTPUT, NEXT_COMBINER_BUFFER, 0b1000); + } + + if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha( + tev_stage_index)) { + vpblendd(NEXT_COMBINER_BUFFER, COMBINER_OUTPUT, NEXT_COMBINER_BUFFER, 0b0111); + } + } + + // Pack combiner output to a u32 to be returned. + vpextrd(edx, COMBINER_OUTPUT, 3); + vpextrd(eax, COMBINER_OUTPUT, 2); + sal(edx, 8); + or_(eax, edx); + vpextrd(edx, COMBINER_OUTPUT, 1); + sal(eax, 8); + or_(edx, eax); + vmovd(eax, COMBINER_OUTPUT); + sal(edx, 8); + or_(eax, edx); + + ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16); + ret(); + ready(); +} + +void TevConfig::GetColorModifier(const Xbyak::Xmm& dest, TevStageConfig::ColorModifier factor) { + using ColorModifier = TevStageConfig::ColorModifier; + + const auto broadcast = [&](u32 comp) { + const u8 mask = comp | (comp << 2) | (comp << 4); + vpshufd(dest, dest, mask); + }; + + switch (factor) { + case ColorModifier::SourceColor: + vpblendd(dest, dest, ZERO, 0b1000); + break; + case ColorModifier::OneMinusSourceColor: + vpsubd(dest, MAX_COLOR, dest); + break; + case ColorModifier::SourceAlpha: + broadcast(3); + break; + case ColorModifier::OneMinusSourceAlpha: + broadcast(3); + vpsubd(dest, MAX_COLOR, dest); + break; + case ColorModifier::SourceRed: + broadcast(0); + break; + case ColorModifier::OneMinusSourceRed: + broadcast(0); + vpsubd(dest, MAX_COLOR, dest); + break; + case ColorModifier::SourceGreen: + broadcast(1); + break; + case ColorModifier::OneMinusSourceGreen: + broadcast(1); + vpsubd(dest, MAX_COLOR, dest); + break; + case ColorModifier::SourceBlue: + broadcast(2); + break; + case ColorModifier::OneMinusSourceBlue: + broadcast(2); + vpsubd(dest, MAX_COLOR, dest); + break; + default: + UNREACHABLE(); + } + pand(dest, MAX_COLOR); +}; + +void TevConfig::ColorCombine(const Xbyak::Xmm& dest, TevStageConfig::Operation op) { + using Operation = TevStageConfig::Operation; + + switch (op) { + case Operation::Replace: + vmovdqa(dest, VEC0); + break; + case Operation::Modulate: + pmulld(VEC0, VEC1); + vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255 + break; + case Operation::Add: + vpaddd(VEC0, VEC0, VEC1); + vpminsd(dest, MAX_COLOR, VEC0); + break; + case Operation::AddSigned: + vpaddd(VEC0, VEC0, VEC1); + vpsubd(VEC0, VEC0, MID_COLOR); + vpminsd(VEC0, VEC0, MAX_COLOR); + vpmaxsd(dest, VEC0, ZERO); + break; + case Operation::Lerp: + pmulld(VEC0, VEC2); + psubd(VEC2, MAX_COLOR); + pmulld(VEC1, VEC2); + vpaddd(dest, VEC0, VEC1); + vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255 + break; + case Operation::Subtract: + psubd(VEC0, VEC1); + vpmaxsd(dest, VEC0, ZERO); + break; + case Operation::MultiplyThenAdd: + pmulld(VEC0, VEC1); + pmulld(VEC2, MAX_COLOR); + paddd(VEC0, VEC2); + pminsd(VEC0, MAX_COLOR); + vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255 + break; + case Operation::AddThenMultiply: + paddd(VEC0, VEC1); + pminsd(VEC0, MAX_COLOR); + pmulld(VEC0, VEC2); + vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255 + break; + case Operation::Dot3_RGB: + case Operation::Dot3_RGBA: + pslld(VEC0, 1); + psubd(VEC0, MAX_COLOR); + pslld(VEC1, 1); + psubd(VEC1, MAX_COLOR); + pmulld(VEC0, VEC1); + paddd(VEC0, MID_COLOR); + psrld(VEC0, 8); + vpblendd(VEC0, VEC0, ZERO, 0b1000); + phaddd(VEC0, VEC0); + phaddd(VEC0, VEC0); + pminsd(VEC0, MAX_COLOR); + pmaxsd(VEC0, ZERO); + pshufd(dest, VEC0, 0); + break; + default: + LOG_ERROR(HW_GPU, "Unknown color combiner operation {}", (int)op); + UNIMPLEMENTED(); + } + pand(dest, MAX_COLOR); +}; + +void TevConfig::GetAlphaModifier(const Xbyak::Xmm& src, const Xbyak::Reg32& dest, + TevStageConfig::AlphaModifier factor) { + using AlphaModifier = TevStageConfig::AlphaModifier; + + const auto get_comp = [&](u32 comp, bool minus = false) { + const auto& reg = minus ? eax : dest; + vpextrd(reg, src, comp); + if (minus) { + mov(dest, 255); + sub(dest, reg); + } + }; + + switch (factor) { + case AlphaModifier::SourceAlpha: + get_comp(3); + break; + case AlphaModifier::OneMinusSourceAlpha: + get_comp(3, true); + break; + case AlphaModifier::SourceRed: + get_comp(0); + break; + case AlphaModifier::OneMinusSourceRed: + get_comp(0, true); + break; + case AlphaModifier::SourceGreen: + get_comp(1); + break; + case AlphaModifier::OneMinusSourceGreen: + get_comp(1, true); + break; + case AlphaModifier::SourceBlue: + get_comp(2); + break; + case AlphaModifier::OneMinusSourceBlue: + get_comp(2, true); + break; + default: + UNREACHABLE(); + } +}; + +void TevConfig::AlphaCombine(const Xbyak::Reg32& dest, TevStageConfig::Operation op) { + using Operation = TevStageConfig::Operation; + + const auto div_255 = [&](const Reg32& dst, const Reg32& src) { + mov(dst, 0x80808081); + imul(dst.cvt64(), src.cvt64()); + shr(dst.cvt64(), 39); + }; + + switch (op) { + case Operation::Replace: + mov(dest, A0); + break; + case Operation::Modulate: + imul(A0, A1); + div_255(dest, A0); + break; + case Operation::Add: + add(A0, A1); + cmp(A0, 255); + mov(eax, 255); + cmovb(A0, eax); + break; + case Operation::AddSigned: + xor_(eax, eax); + add(A0, A1); + sub(A0, 128); + test(A0, A0); + cmovg(eax, A0); + cmp(eax, 255); + mov(A0, 255); + cmovb(A0, eax); + break; + case Operation::Lerp: + imul(A0, A2); + mov(eax, 255); + sub(eax, A2); + imul(A1, eax); + add(A0, A1); + div_255(dest, A0); + break; + case Operation::Subtract: + sub(A0, A1); + xor_(eax, eax); + test(A0, A0); + cmovl(A0, eax); + mov(dest, A0); + break; + case Operation::MultiplyThenAdd: + imul(A0, A1); + mov(dest, A2); + shl(dest, 8); + sub(dest, A2); + add(dest, A0); + div_255(eax, dest); + cmp(eax, 255); + mov(dest, 255); + cmovb(dest, eax); + break; + case Operation::AddThenMultiply: + add(A0, A1); + cmp(A0, 255); + mov(eax, 255); + cmovg(A0, eax); + imul(A0, A2); + div_255(dest, A0); + break; + default: + LOG_ERROR(HW_GPU, "Unknown alpha combiner operation {}", (int)op); + UNIMPLEMENTED(); + } +}; + +} // namespace SwRenderer diff --git a/src/video_core/renderer_software/sw_tev_jit.h b/src/video_core/renderer_software/sw_tev_jit.h new file mode 100644 index 000000000..9d1588ee4 --- /dev/null +++ b/src/video_core/renderer_software/sw_tev_jit.h @@ -0,0 +1,64 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/hash.h" +#include "common/vector_math.h" +#include "video_core/regs_texturing.h" + +namespace Pica { +struct State; +struct Regs; +} // namespace Pica + +namespace SwRenderer { + +struct TevConfigKey { + explicit TevConfigKey(const Pica::TexturingRegs& regs); + + u64 Hash() const noexcept { + return Common::ComputeHash64(this, sizeof(TevConfigKey)); + } + + std::array stages; +}; + +class TevConfig : public Xbyak::CodeGenerator { +public: + explicit TevConfig(const Pica::Regs& regs, const TevConfigKey& key); + ~TevConfig(); + + Common::Vec4 Run(std::span, 4> texture_color_, + Common::Vec4 primary_color_, Common::Vec4 primary_fragment_color_, + Common::Vec4 secondary_fragment_color_, u64 tev_combiner_buffer_color); + +private: + void WriteTevConfig(const TevConfigKey& key); + + void GetColorModifier(const Xbyak::Xmm& dest, + Pica::TexturingRegs::TevStageConfig::ColorModifier factor); + + void GetAlphaModifier(const Xbyak::Xmm& src, const Xbyak::Reg32& dest, + Pica::TexturingRegs::TevStageConfig::AlphaModifier factor); + + void ColorCombine(const Xbyak::Xmm& dest, Pica::TexturingRegs::TevStageConfig::Operation op); + + void AlphaCombine(const Xbyak::Reg32& dest, Pica::TexturingRegs::TevStageConfig::Operation op); + +private: + const Pica::Regs& regs; + + using CompiledTevFun = u32(u32* texture_color, u32 primary_color, u32 primary_fragment_color, + u64 secondary_fragment_color_and_tev_combiner_buffer_color); + + CompiledTevFun* program = nullptr; +}; + +using TevCache = std::unordered_map, Common::IdentityHash>; + +} // namespace SwRenderer