renderer_software: Add jit compiler for tev stages
This commit is contained in:
parent
8012b28b92
commit
cfdb10a7ba
7 changed files with 589 additions and 11 deletions
|
@ -95,6 +95,8 @@ add_library(video_core STATIC
|
||||||
renderer_software/sw_proctex.h
|
renderer_software/sw_proctex.h
|
||||||
renderer_software/sw_rasterizer.cpp
|
renderer_software/sw_rasterizer.cpp
|
||||||
renderer_software/sw_rasterizer.h
|
renderer_software/sw_rasterizer.h
|
||||||
|
renderer_software/sw_tev_jit.cpp
|
||||||
|
renderer_software/sw_tev_jit.h
|
||||||
renderer_software/sw_texturing.cpp
|
renderer_software/sw_texturing.cpp
|
||||||
renderer_software/sw_texturing.h
|
renderer_software/sw_texturing.h
|
||||||
renderer_vulkan/pica_to_vk.h
|
renderer_vulkan/pica_to_vk.h
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include "core/hw/hw.h"
|
#include "core/hw/hw.h"
|
||||||
#include "core/hw/lcd.h"
|
#include "core/hw/lcd.h"
|
||||||
#include "video_core/renderer_software/renderer_software.h"
|
#include "video_core/renderer_software/renderer_software.h"
|
||||||
|
#include "video_core/renderer_software/sw_rasterizer.h"
|
||||||
|
|
||||||
namespace SwRenderer {
|
namespace SwRenderer {
|
||||||
|
|
||||||
|
@ -17,6 +18,10 @@ RendererSoftware::RendererSoftware(Core::System& system, Frontend::EmuWindow& wi
|
||||||
|
|
||||||
RendererSoftware::~RendererSoftware() = default;
|
RendererSoftware::~RendererSoftware() = default;
|
||||||
|
|
||||||
|
VideoCore::RasterizerInterface* RendererSoftware::Rasterizer() const {
|
||||||
|
return rasterizer.get();
|
||||||
|
}
|
||||||
|
|
||||||
void RendererSoftware::SwapBuffers() {
|
void RendererSoftware::SwapBuffers() {
|
||||||
PrepareRenderTarget();
|
PrepareRenderTarget();
|
||||||
EndFrame();
|
EndFrame();
|
||||||
|
|
|
@ -5,7 +5,6 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
#include "video_core/renderer_software/sw_rasterizer.h"
|
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
class System;
|
class System;
|
||||||
|
@ -19,19 +18,18 @@ struct ScreenInfo {
|
||||||
std::vector<u8> pixels;
|
std::vector<u8> pixels;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class RasterizerSoftware;
|
||||||
|
|
||||||
class RendererSoftware : public VideoCore::RendererBase {
|
class RendererSoftware : public VideoCore::RendererBase {
|
||||||
public:
|
public:
|
||||||
explicit RendererSoftware(Core::System& system, Frontend::EmuWindow& window);
|
explicit RendererSoftware(Core::System& system, Frontend::EmuWindow& window);
|
||||||
~RendererSoftware() override;
|
~RendererSoftware() override;
|
||||||
|
|
||||||
[[nodiscard]] VideoCore::RasterizerInterface* Rasterizer() const override {
|
|
||||||
return rasterizer.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] const ScreenInfo& Screen(VideoCore::ScreenId id) const noexcept {
|
[[nodiscard]] const ScreenInfo& Screen(VideoCore::ScreenId id) const noexcept {
|
||||||
return screen_infos[static_cast<u32>(id)];
|
return screen_infos[static_cast<u32>(id)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VideoCore::RasterizerInterface* Rasterizer() const override;
|
||||||
void SwapBuffers() override;
|
void SwapBuffers() override;
|
||||||
void TryPresent(int timeout_ms, bool is_secondary) override {}
|
void TryPresent(int timeout_ms, bool is_secondary) override {}
|
||||||
void Sync() override {}
|
void Sync() override {}
|
||||||
|
|
|
@ -297,9 +297,24 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
|
||||||
|
|
||||||
const auto textures = regs.texturing.GetTextures();
|
const auto textures = regs.texturing.GetTextures();
|
||||||
const auto tev_stages = regs.texturing.GetTevStages();
|
const auto tev_stages = regs.texturing.GetTevStages();
|
||||||
|
for (u32 i = 0; i < texture_data.size(); i++) {
|
||||||
|
const PAddr addr = textures[i].config.GetPhysicalAddress();
|
||||||
|
if (addr) {
|
||||||
|
texture_data[i] = memory.GetPhysicalPointer(addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fb.Bind();
|
fb.Bind();
|
||||||
|
|
||||||
|
if (use_jit) {
|
||||||
|
const TevConfigKey key{regs.texturing};
|
||||||
|
auto [it, new_fun] = tev_cache.try_emplace(key.Hash());
|
||||||
|
if (new_fun) {
|
||||||
|
it->second = std::make_unique<TevConfig>(regs, key);
|
||||||
|
}
|
||||||
|
tev_config = it->second.get();
|
||||||
|
}
|
||||||
|
|
||||||
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
|
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
|
||||||
// TODO: Not sure if looping through x first might be faster
|
// TODO: Not sure if looping through x first might be faster
|
||||||
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
|
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
|
||||||
|
@ -408,7 +423,7 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
|
||||||
|
|
||||||
// Sample bound texture units.
|
// Sample bound texture units.
|
||||||
const f24 tc0_w = get_interpolated_attribute(v0.tc0_w, v1.tc0_w, v2.tc0_w);
|
const f24 tc0_w = get_interpolated_attribute(v0.tc0_w, v1.tc0_w, v2.tc0_w);
|
||||||
const auto texture_color = TextureColor(uv, textures, tc0_w);
|
auto texture_color = TextureColor(uv, textures, tc0_w);
|
||||||
|
|
||||||
Common::Vec4<u8> primary_fragment_color = {0, 0, 0, 0};
|
Common::Vec4<u8> primary_fragment_color = {0, 0, 0, 0};
|
||||||
Common::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0};
|
Common::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0};
|
||||||
|
@ -556,11 +571,10 @@ std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor(
|
||||||
t = texture.config.height - 1 -
|
t = texture.config.height - 1 -
|
||||||
GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
|
GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
|
||||||
|
|
||||||
const u8* texture_data = memory.GetPhysicalPointer(texture_address);
|
|
||||||
const auto info = TextureInfo::FromPicaRegister(texture.config, texture.format);
|
const auto info = TextureInfo::FromPicaRegister(texture.config, texture.format);
|
||||||
|
|
||||||
// TODO: Apply the min and mag filters to the texture
|
// TODO: Apply the min and mag filters to the texture
|
||||||
texture_color[i] = LookupTexture(texture_data, s, t, info);
|
texture_color[i] = LookupTexture(texture_data[i], s, t, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i == 0 && (texture.config.type == TexturingRegs::TextureConfig::Shadow2D ||
|
if (i == 0 && (texture.config.type == TexturingRegs::TextureConfig::Shadow2D ||
|
||||||
|
@ -681,10 +695,20 @@ Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerSoftware::WriteTevConfig(
|
void RasterizerSoftware::WriteTevConfig(
|
||||||
std::span<const Common::Vec4<u8>, 4> texture_color,
|
std::span<Common::Vec4<u8>, 4> texture_color,
|
||||||
std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages,
|
std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages,
|
||||||
Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color,
|
Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color,
|
||||||
Common::Vec4<u8> secondary_fragment_color) {
|
Common::Vec4<u8> secondary_fragment_color) {
|
||||||
|
|
||||||
|
#if CITRA_ARCH(x86_64)
|
||||||
|
if (use_jit) {
|
||||||
|
const u32 tev_combiner_buffer_color = regs.texturing.tev_combiner_buffer_color.raw;
|
||||||
|
combiner_output = tev_config->Run(texture_color, primary_color, primary_fragment_color,
|
||||||
|
secondary_fragment_color, tev_combiner_buffer_color);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Texture environment - consists of 6 stages of color and alpha combining.
|
* Texture environment - consists of 6 stages of color and alpha combining.
|
||||||
* Color combiners take three input color values from some source (e.g. interpolated
|
* Color combiners take three input color values from some source (e.g. interpolated
|
||||||
|
@ -748,6 +772,7 @@ void RasterizerSoftware::WriteTevConfig(
|
||||||
GetColorModifier(tev_stage.color_modifier2, get_source(tev_stage.color_source2)),
|
GetColorModifier(tev_stage.color_modifier2, get_source(tev_stage.color_source2)),
|
||||||
GetColorModifier(tev_stage.color_modifier3, get_source(tev_stage.color_source3)),
|
GetColorModifier(tev_stage.color_modifier3, get_source(tev_stage.color_source3)),
|
||||||
};
|
};
|
||||||
|
|
||||||
const Common::Vec3<u8> color_output = ColorCombine(tev_stage.color_op, color_result);
|
const Common::Vec3<u8> color_output = ColorCombine(tev_stage.color_op, color_result);
|
||||||
|
|
||||||
u8 alpha_output;
|
u8 alpha_output;
|
||||||
|
|
|
@ -4,13 +4,20 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
#include <span>
|
#include <span>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "common/arch.h"
|
||||||
#include "common/thread_worker.h"
|
#include "common/thread_worker.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/regs_texturing.h"
|
|
||||||
#include "video_core/renderer_software/sw_clipper.h"
|
#include "video_core/renderer_software/sw_clipper.h"
|
||||||
#include "video_core/renderer_software/sw_framebuffer.h"
|
#include "video_core/renderer_software/sw_framebuffer.h"
|
||||||
|
|
||||||
|
#if CITRA_ARCH(x86_64)
|
||||||
|
#include "video_core/renderer_software/sw_tev_jit.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Pica::Shader {
|
namespace Pica::Shader {
|
||||||
struct OutputVertex;
|
struct OutputVertex;
|
||||||
}
|
}
|
||||||
|
@ -55,7 +62,7 @@ private:
|
||||||
Common::Vec4<u8> PixelColor(u16 x, u16 y) const;
|
Common::Vec4<u8> PixelColor(u16 x, u16 y) const;
|
||||||
|
|
||||||
/// Emulates the TEV configuration and returns the combiner output.
|
/// Emulates the TEV configuration and returns the combiner output.
|
||||||
void WriteTevConfig(std::span<const Common::Vec4<u8>, 4> texture_color,
|
void WriteTevConfig(std::span<Common::Vec4<u8>, 4> texture_color,
|
||||||
std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages,
|
std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages,
|
||||||
Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color,
|
Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color,
|
||||||
Common::Vec4<u8> secondary_fragment_color);
|
Common::Vec4<u8> secondary_fragment_color);
|
||||||
|
@ -73,9 +80,13 @@ private:
|
||||||
Memory::MemorySystem& memory;
|
Memory::MemorySystem& memory;
|
||||||
Pica::State& state;
|
Pica::State& state;
|
||||||
const Pica::Regs& regs;
|
const Pica::Regs& regs;
|
||||||
|
bool use_jit{true};
|
||||||
size_t num_sw_threads;
|
size_t num_sw_threads;
|
||||||
Common::ThreadWorker sw_workers;
|
Common::ThreadWorker sw_workers;
|
||||||
Framebuffer fb;
|
Framebuffer fb;
|
||||||
|
TevCache tev_cache;
|
||||||
|
TevConfig* tev_config{};
|
||||||
|
std::array<const u8*, 3> texture_data{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace SwRenderer
|
} // namespace SwRenderer
|
||||||
|
|
473
src/video_core/renderer_software/sw_tev_jit.cpp
Normal file
473
src/video_core/renderer_software/sw_tev_jit.cpp
Normal file
|
@ -0,0 +1,473 @@
|
||||||
|
// Copyright 2023 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <bit>
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#include "common/x64/xbyak_abi.h"
|
||||||
|
#include "video_core/regs.h"
|
||||||
|
#include "video_core/renderer_software/sw_tev_jit.h"
|
||||||
|
|
||||||
|
namespace SwRenderer {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using namespace Common::X64;
|
||||||
|
using namespace Xbyak::util;
|
||||||
|
using Pica::TexturingRegs;
|
||||||
|
using Xbyak::Reg32;
|
||||||
|
using Xbyak::Reg64;
|
||||||
|
using Xbyak::Xmm;
|
||||||
|
using TevStageConfig = Pica::TexturingRegs::TevStageConfig;
|
||||||
|
|
||||||
|
constexpr Reg32 A0 = r11d;
|
||||||
|
constexpr Reg32 A1 = r12d;
|
||||||
|
constexpr Reg32 A2 = r13d;
|
||||||
|
constexpr Reg32 ALPHA_OUTPUT = r14d;
|
||||||
|
constexpr Xmm COMBINER_OUTPUT = xmm0;
|
||||||
|
constexpr Xmm COMBINER_BUFFER = xmm1;
|
||||||
|
constexpr Xmm NEXT_COMBINER_BUFFER = xmm2;
|
||||||
|
constexpr Xmm VEC0 = xmm3;
|
||||||
|
constexpr Xmm VEC1 = xmm4;
|
||||||
|
constexpr Xmm VEC2 = xmm5;
|
||||||
|
constexpr Xmm COLOR_OUTPUT = xmm6;
|
||||||
|
constexpr Xmm ZERO = xmm13;
|
||||||
|
constexpr Xmm MID_COLOR = xmm14;
|
||||||
|
constexpr Xmm MAX_COLOR = xmm15;
|
||||||
|
|
||||||
|
bool IsPassThroughTevStage(const TevStageConfig& stage) {
|
||||||
|
return (stage.color_op == TevStageConfig::Operation::Replace &&
|
||||||
|
stage.alpha_op == TevStageConfig::Operation::Replace &&
|
||||||
|
stage.color_source1 == TevStageConfig::Source::Previous &&
|
||||||
|
stage.alpha_source1 == TevStageConfig::Source::Previous &&
|
||||||
|
stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
|
||||||
|
stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
|
||||||
|
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
TevConfigKey::TevConfigKey(const Pica::TexturingRegs& regs) {
|
||||||
|
const auto& tev_stages = regs.GetTevStages();
|
||||||
|
for (size_t i = 0; i < tev_stages.size(); i++) {
|
||||||
|
const auto& tev_stage = tev_stages[i];
|
||||||
|
stages[i].sources_raw = tev_stage.sources_raw;
|
||||||
|
stages[i].modifiers_raw = tev_stage.modifiers_raw;
|
||||||
|
stages[i].ops_raw = tev_stage.ops_raw;
|
||||||
|
stages[i].const_color = tev_stage.const_color;
|
||||||
|
stages[i].scales_raw = tev_stage.scales_raw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TevConfig::TevConfig(const Pica::Regs& regs_, const TevConfigKey& key) : regs{regs_} {
|
||||||
|
WriteTevConfig(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
TevConfig::~TevConfig() = default;
|
||||||
|
|
||||||
|
Common::Vec4<u8> TevConfig::Run(std::span<Common::Vec4<u8>, 4> texture_color_,
|
||||||
|
Common::Vec4<u8> primary_color_,
|
||||||
|
Common::Vec4<u8> primary_fragment_color_,
|
||||||
|
Common::Vec4<u8> secondary_fragment_color_,
|
||||||
|
u64 tev_combiner_buffer_color) {
|
||||||
|
u32* texture_color = reinterpret_cast<u32*>(texture_color_.data());
|
||||||
|
const u32 primary_color = std::bit_cast<u32>(primary_color_);
|
||||||
|
const u32 primary_fragment_color = std::bit_cast<u32>(primary_fragment_color_);
|
||||||
|
const u32 secondary_fragment_color = std::bit_cast<u32>(secondary_fragment_color_);
|
||||||
|
const u64 secondary_fragment_color_and_tev_combiner_buffer_color =
|
||||||
|
secondary_fragment_color | (tev_combiner_buffer_color << 32);
|
||||||
|
const u32 result = program(texture_color, primary_color, primary_fragment_color,
|
||||||
|
secondary_fragment_color_and_tev_combiner_buffer_color);
|
||||||
|
return std::bit_cast<Common::Vec4<u8>>(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TevConfig::WriteTevConfig(const TevConfigKey& key) {
|
||||||
|
program = (CompiledTevFun*)getCurr();
|
||||||
|
|
||||||
|
constexpr Xbyak::Reg TEXTURE_COLOR = ABI_PARAM1;
|
||||||
|
constexpr Xbyak::Reg PRIMARY_COLOR = ABI_PARAM2;
|
||||||
|
constexpr Xbyak::Reg PRIMARY_FRAGMENT_COLOR = ABI_PARAM3;
|
||||||
|
constexpr Xbyak::Reg SECONDARY_FRAGMENT_COLOR = ABI_PARAM4;
|
||||||
|
|
||||||
|
// Save calle state
|
||||||
|
ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
|
||||||
|
|
||||||
|
// Clear the combiner registers and zero constant
|
||||||
|
pxor(COMBINER_OUTPUT, COMBINER_OUTPUT);
|
||||||
|
pxor(COMBINER_BUFFER, COMBINER_BUFFER);
|
||||||
|
pxor(ZERO, ZERO);
|
||||||
|
|
||||||
|
// Used to set an xmm register to the max color
|
||||||
|
static const __m128i max = _mm_set1_epi32(255);
|
||||||
|
mov(rax, reinterpret_cast<size_t>(&max));
|
||||||
|
movdqu(MAX_COLOR, xword[rax]);
|
||||||
|
|
||||||
|
// Used to set an xmm register to the mid color
|
||||||
|
static const __m128i mid = _mm_set1_epi32(128);
|
||||||
|
mov(rax, reinterpret_cast<size_t>(&mid));
|
||||||
|
movdqu(MID_COLOR, xword[rax]);
|
||||||
|
|
||||||
|
// Load next_combiner_buffer
|
||||||
|
mov(rax, ABI_PARAM4);
|
||||||
|
shr(rax, 32);
|
||||||
|
vmovd(NEXT_COMBINER_BUFFER, eax);
|
||||||
|
pmovzxbd(NEXT_COMBINER_BUFFER, NEXT_COMBINER_BUFFER);
|
||||||
|
|
||||||
|
for (u32 tev_stage_index = 0; tev_stage_index < key.stages.size(); ++tev_stage_index) {
|
||||||
|
const auto& tev_stage = key.stages[tev_stage_index];
|
||||||
|
if (!IsPassThroughTevStage(tev_stage)) {
|
||||||
|
using Source = TexturingRegs::TevStageConfig::Source;
|
||||||
|
|
||||||
|
const auto get_source = [&](const Xbyak::Xmm& dest, Source source) {
|
||||||
|
switch (source) {
|
||||||
|
case Source::PrimaryColor:
|
||||||
|
vmovd(dest, PRIMARY_COLOR.cvt32());
|
||||||
|
pmovzxbd(dest, dest);
|
||||||
|
break;
|
||||||
|
case Source::PrimaryFragmentColor:
|
||||||
|
vmovd(dest, PRIMARY_FRAGMENT_COLOR.cvt32());
|
||||||
|
pmovzxbd(dest, dest);
|
||||||
|
break;
|
||||||
|
case Source::SecondaryFragmentColor:
|
||||||
|
vmovd(dest, SECONDARY_FRAGMENT_COLOR.cvt32());
|
||||||
|
pmovzxbd(dest, dest);
|
||||||
|
break;
|
||||||
|
case Source::Texture0:
|
||||||
|
case Source::Texture1:
|
||||||
|
case Source::Texture2:
|
||||||
|
case Source::Texture3: {
|
||||||
|
const u32 index = static_cast<u32>(source) - static_cast<u32>(Source::Texture0);
|
||||||
|
vmovd(dest, dword[TEXTURE_COLOR + index * sizeof(u32)]);
|
||||||
|
pmovzxbd(dest, dest);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Source::PreviousBuffer:
|
||||||
|
vmovdqa(dest, COMBINER_BUFFER);
|
||||||
|
break;
|
||||||
|
case Source::Constant:
|
||||||
|
mov(eax, tev_stage.const_color);
|
||||||
|
vmovd(dest, eax);
|
||||||
|
pmovzxbd(dest, dest);
|
||||||
|
break;
|
||||||
|
case Source::Previous:
|
||||||
|
vmovdqa(dest, COMBINER_OUTPUT);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG_ERROR(HW_GPU, "Unknown color combiner source {}", source);
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
vmovdqa(dest, ZERO);
|
||||||
|
}
|
||||||
|
return dest;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Load the color modifiers to VEC0/1/2.
|
||||||
|
GetColorModifier(get_source(VEC0, tev_stage.color_source1), tev_stage.color_modifier1);
|
||||||
|
GetColorModifier(get_source(VEC1, tev_stage.color_source2), tev_stage.color_modifier2);
|
||||||
|
GetColorModifier(get_source(VEC2, tev_stage.color_source3), tev_stage.color_modifier3);
|
||||||
|
|
||||||
|
// Combine the texture colors to COLOR_OUTPUT.
|
||||||
|
ColorCombine(COLOR_OUTPUT, tev_stage.color_op);
|
||||||
|
|
||||||
|
if (tev_stage.color_op == TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
|
||||||
|
// Result of Dot3_RGBA operation is also placed to the alpha component
|
||||||
|
vmovd(ALPHA_OUTPUT.cvt32(), COLOR_OUTPUT);
|
||||||
|
} else {
|
||||||
|
// Load the alpha modifers to VEC0/1/2.
|
||||||
|
GetAlphaModifier(get_source(VEC0, tev_stage.alpha_source1), A0,
|
||||||
|
tev_stage.alpha_modifier1);
|
||||||
|
GetAlphaModifier(get_source(VEC1, tev_stage.alpha_source2), A1,
|
||||||
|
tev_stage.alpha_modifier2);
|
||||||
|
GetAlphaModifier(get_source(VEC2, tev_stage.alpha_source3), A2,
|
||||||
|
tev_stage.alpha_modifier3);
|
||||||
|
|
||||||
|
// Combine the alpha values to ALPHA_OUTPUT.
|
||||||
|
AlphaCombine(ALPHA_OUTPUT, tev_stage.alpha_op);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load the color multipler to an SSE vector.
|
||||||
|
mov(eax, tev_stage.GetColorMultiplier());
|
||||||
|
movd(VEC0, eax);
|
||||||
|
pshufd(VEC0, VEC0, 0);
|
||||||
|
|
||||||
|
// Multiply color output with the multiplier and take the minimum.
|
||||||
|
pmulld(COLOR_OUTPUT, VEC0);
|
||||||
|
pminsd(COLOR_OUTPUT, MAX_COLOR);
|
||||||
|
|
||||||
|
// Load the alpha multiplier, multiply it with the alpha output.
|
||||||
|
mov(eax, tev_stage.GetAlphaMultiplier());
|
||||||
|
imul(ALPHA_OUTPUT, eax);
|
||||||
|
|
||||||
|
// Load result to a vector and take the minimum
|
||||||
|
movd(VEC0, ALPHA_OUTPUT);
|
||||||
|
pshufd(VEC0, VEC0, 0);
|
||||||
|
pminsd(VEC0, MAX_COLOR);
|
||||||
|
|
||||||
|
// Blend vectors to get the combiner output
|
||||||
|
vpblendd(COMBINER_OUTPUT, COLOR_OUTPUT, VEC0, 0b1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set combiner buffer to the next buffer
|
||||||
|
movq(COMBINER_BUFFER, NEXT_COMBINER_BUFFER);
|
||||||
|
|
||||||
|
if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(
|
||||||
|
tev_stage_index)) {
|
||||||
|
vpblendd(NEXT_COMBINER_BUFFER, COMBINER_OUTPUT, NEXT_COMBINER_BUFFER, 0b1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(
|
||||||
|
tev_stage_index)) {
|
||||||
|
vpblendd(NEXT_COMBINER_BUFFER, COMBINER_OUTPUT, NEXT_COMBINER_BUFFER, 0b0111);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pack combiner output to a u32 to be returned.
|
||||||
|
vpextrd(edx, COMBINER_OUTPUT, 3);
|
||||||
|
vpextrd(eax, COMBINER_OUTPUT, 2);
|
||||||
|
sal(edx, 8);
|
||||||
|
or_(eax, edx);
|
||||||
|
vpextrd(edx, COMBINER_OUTPUT, 1);
|
||||||
|
sal(eax, 8);
|
||||||
|
or_(edx, eax);
|
||||||
|
vmovd(eax, COMBINER_OUTPUT);
|
||||||
|
sal(edx, 8);
|
||||||
|
or_(eax, edx);
|
||||||
|
|
||||||
|
ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
|
||||||
|
ret();
|
||||||
|
ready();
|
||||||
|
}
|
||||||
|
|
||||||
|
void TevConfig::GetColorModifier(const Xbyak::Xmm& dest, TevStageConfig::ColorModifier factor) {
|
||||||
|
using ColorModifier = TevStageConfig::ColorModifier;
|
||||||
|
|
||||||
|
const auto broadcast = [&](u32 comp) {
|
||||||
|
const u8 mask = comp | (comp << 2) | (comp << 4);
|
||||||
|
vpshufd(dest, dest, mask);
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (factor) {
|
||||||
|
case ColorModifier::SourceColor:
|
||||||
|
vpblendd(dest, dest, ZERO, 0b1000);
|
||||||
|
break;
|
||||||
|
case ColorModifier::OneMinusSourceColor:
|
||||||
|
vpsubd(dest, MAX_COLOR, dest);
|
||||||
|
break;
|
||||||
|
case ColorModifier::SourceAlpha:
|
||||||
|
broadcast(3);
|
||||||
|
break;
|
||||||
|
case ColorModifier::OneMinusSourceAlpha:
|
||||||
|
broadcast(3);
|
||||||
|
vpsubd(dest, MAX_COLOR, dest);
|
||||||
|
break;
|
||||||
|
case ColorModifier::SourceRed:
|
||||||
|
broadcast(0);
|
||||||
|
break;
|
||||||
|
case ColorModifier::OneMinusSourceRed:
|
||||||
|
broadcast(0);
|
||||||
|
vpsubd(dest, MAX_COLOR, dest);
|
||||||
|
break;
|
||||||
|
case ColorModifier::SourceGreen:
|
||||||
|
broadcast(1);
|
||||||
|
break;
|
||||||
|
case ColorModifier::OneMinusSourceGreen:
|
||||||
|
broadcast(1);
|
||||||
|
vpsubd(dest, MAX_COLOR, dest);
|
||||||
|
break;
|
||||||
|
case ColorModifier::SourceBlue:
|
||||||
|
broadcast(2);
|
||||||
|
break;
|
||||||
|
case ColorModifier::OneMinusSourceBlue:
|
||||||
|
broadcast(2);
|
||||||
|
vpsubd(dest, MAX_COLOR, dest);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
pand(dest, MAX_COLOR);
|
||||||
|
};
|
||||||
|
|
||||||
|
void TevConfig::ColorCombine(const Xbyak::Xmm& dest, TevStageConfig::Operation op) {
|
||||||
|
using Operation = TevStageConfig::Operation;
|
||||||
|
|
||||||
|
switch (op) {
|
||||||
|
case Operation::Replace:
|
||||||
|
vmovdqa(dest, VEC0);
|
||||||
|
break;
|
||||||
|
case Operation::Modulate:
|
||||||
|
pmulld(VEC0, VEC1);
|
||||||
|
vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255
|
||||||
|
break;
|
||||||
|
case Operation::Add:
|
||||||
|
vpaddd(VEC0, VEC0, VEC1);
|
||||||
|
vpminsd(dest, MAX_COLOR, VEC0);
|
||||||
|
break;
|
||||||
|
case Operation::AddSigned:
|
||||||
|
vpaddd(VEC0, VEC0, VEC1);
|
||||||
|
vpsubd(VEC0, VEC0, MID_COLOR);
|
||||||
|
vpminsd(VEC0, VEC0, MAX_COLOR);
|
||||||
|
vpmaxsd(dest, VEC0, ZERO);
|
||||||
|
break;
|
||||||
|
case Operation::Lerp:
|
||||||
|
pmulld(VEC0, VEC2);
|
||||||
|
psubd(VEC2, MAX_COLOR);
|
||||||
|
pmulld(VEC1, VEC2);
|
||||||
|
vpaddd(dest, VEC0, VEC1);
|
||||||
|
vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255
|
||||||
|
break;
|
||||||
|
case Operation::Subtract:
|
||||||
|
psubd(VEC0, VEC1);
|
||||||
|
vpmaxsd(dest, VEC0, ZERO);
|
||||||
|
break;
|
||||||
|
case Operation::MultiplyThenAdd:
|
||||||
|
pmulld(VEC0, VEC1);
|
||||||
|
pmulld(VEC2, MAX_COLOR);
|
||||||
|
paddd(VEC0, VEC2);
|
||||||
|
pminsd(VEC0, MAX_COLOR);
|
||||||
|
vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255
|
||||||
|
break;
|
||||||
|
case Operation::AddThenMultiply:
|
||||||
|
paddd(VEC0, VEC1);
|
||||||
|
pminsd(VEC0, MAX_COLOR);
|
||||||
|
pmulld(VEC0, VEC2);
|
||||||
|
vpsrlq(dest, VEC0, 8); // TODO: This is a very crude approximation of division by 255
|
||||||
|
break;
|
||||||
|
case Operation::Dot3_RGB:
|
||||||
|
case Operation::Dot3_RGBA:
|
||||||
|
pslld(VEC0, 1);
|
||||||
|
psubd(VEC0, MAX_COLOR);
|
||||||
|
pslld(VEC1, 1);
|
||||||
|
psubd(VEC1, MAX_COLOR);
|
||||||
|
pmulld(VEC0, VEC1);
|
||||||
|
paddd(VEC0, MID_COLOR);
|
||||||
|
psrld(VEC0, 8);
|
||||||
|
vpblendd(VEC0, VEC0, ZERO, 0b1000);
|
||||||
|
phaddd(VEC0, VEC0);
|
||||||
|
phaddd(VEC0, VEC0);
|
||||||
|
pminsd(VEC0, MAX_COLOR);
|
||||||
|
pmaxsd(VEC0, ZERO);
|
||||||
|
pshufd(dest, VEC0, 0);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG_ERROR(HW_GPU, "Unknown color combiner operation {}", (int)op);
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
}
|
||||||
|
pand(dest, MAX_COLOR);
|
||||||
|
};
|
||||||
|
|
||||||
|
void TevConfig::GetAlphaModifier(const Xbyak::Xmm& src, const Xbyak::Reg32& dest,
|
||||||
|
TevStageConfig::AlphaModifier factor) {
|
||||||
|
using AlphaModifier = TevStageConfig::AlphaModifier;
|
||||||
|
|
||||||
|
const auto get_comp = [&](u32 comp, bool minus = false) {
|
||||||
|
const auto& reg = minus ? eax : dest;
|
||||||
|
vpextrd(reg, src, comp);
|
||||||
|
if (minus) {
|
||||||
|
mov(dest, 255);
|
||||||
|
sub(dest, reg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (factor) {
|
||||||
|
case AlphaModifier::SourceAlpha:
|
||||||
|
get_comp(3);
|
||||||
|
break;
|
||||||
|
case AlphaModifier::OneMinusSourceAlpha:
|
||||||
|
get_comp(3, true);
|
||||||
|
break;
|
||||||
|
case AlphaModifier::SourceRed:
|
||||||
|
get_comp(0);
|
||||||
|
break;
|
||||||
|
case AlphaModifier::OneMinusSourceRed:
|
||||||
|
get_comp(0, true);
|
||||||
|
break;
|
||||||
|
case AlphaModifier::SourceGreen:
|
||||||
|
get_comp(1);
|
||||||
|
break;
|
||||||
|
case AlphaModifier::OneMinusSourceGreen:
|
||||||
|
get_comp(1, true);
|
||||||
|
break;
|
||||||
|
case AlphaModifier::SourceBlue:
|
||||||
|
get_comp(2);
|
||||||
|
break;
|
||||||
|
case AlphaModifier::OneMinusSourceBlue:
|
||||||
|
get_comp(2, true);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void TevConfig::AlphaCombine(const Xbyak::Reg32& dest, TevStageConfig::Operation op) {
|
||||||
|
using Operation = TevStageConfig::Operation;
|
||||||
|
|
||||||
|
const auto div_255 = [&](const Reg32& dst, const Reg32& src) {
|
||||||
|
mov(dst, 0x80808081);
|
||||||
|
imul(dst.cvt64(), src.cvt64());
|
||||||
|
shr(dst.cvt64(), 39);
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (op) {
|
||||||
|
case Operation::Replace:
|
||||||
|
mov(dest, A0);
|
||||||
|
break;
|
||||||
|
case Operation::Modulate:
|
||||||
|
imul(A0, A1);
|
||||||
|
div_255(dest, A0);
|
||||||
|
break;
|
||||||
|
case Operation::Add:
|
||||||
|
add(A0, A1);
|
||||||
|
cmp(A0, 255);
|
||||||
|
mov(eax, 255);
|
||||||
|
cmovb(A0, eax);
|
||||||
|
break;
|
||||||
|
case Operation::AddSigned:
|
||||||
|
xor_(eax, eax);
|
||||||
|
add(A0, A1);
|
||||||
|
sub(A0, 128);
|
||||||
|
test(A0, A0);
|
||||||
|
cmovg(eax, A0);
|
||||||
|
cmp(eax, 255);
|
||||||
|
mov(A0, 255);
|
||||||
|
cmovb(A0, eax);
|
||||||
|
break;
|
||||||
|
case Operation::Lerp:
|
||||||
|
imul(A0, A2);
|
||||||
|
mov(eax, 255);
|
||||||
|
sub(eax, A2);
|
||||||
|
imul(A1, eax);
|
||||||
|
add(A0, A1);
|
||||||
|
div_255(dest, A0);
|
||||||
|
break;
|
||||||
|
case Operation::Subtract:
|
||||||
|
sub(A0, A1);
|
||||||
|
xor_(eax, eax);
|
||||||
|
test(A0, A0);
|
||||||
|
cmovl(A0, eax);
|
||||||
|
mov(dest, A0);
|
||||||
|
break;
|
||||||
|
case Operation::MultiplyThenAdd:
|
||||||
|
imul(A0, A1);
|
||||||
|
mov(dest, A2);
|
||||||
|
shl(dest, 8);
|
||||||
|
sub(dest, A2);
|
||||||
|
add(dest, A0);
|
||||||
|
div_255(eax, dest);
|
||||||
|
cmp(eax, 255);
|
||||||
|
mov(dest, 255);
|
||||||
|
cmovb(dest, eax);
|
||||||
|
break;
|
||||||
|
case Operation::AddThenMultiply:
|
||||||
|
add(A0, A1);
|
||||||
|
cmp(A0, 255);
|
||||||
|
mov(eax, 255);
|
||||||
|
cmovg(A0, eax);
|
||||||
|
imul(A0, A2);
|
||||||
|
div_255(dest, A0);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG_ERROR(HW_GPU, "Unknown alpha combiner operation {}", (int)op);
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace SwRenderer
|
64
src/video_core/renderer_software/sw_tev_jit.h
Normal file
64
src/video_core/renderer_software/sw_tev_jit.h
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
// Copyright 2023 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <span>
|
||||||
|
#include <xbyak/xbyak.h>
|
||||||
|
|
||||||
|
#include "common/hash.h"
|
||||||
|
#include "common/vector_math.h"
|
||||||
|
#include "video_core/regs_texturing.h"
|
||||||
|
|
||||||
|
namespace Pica {
|
||||||
|
struct State;
|
||||||
|
struct Regs;
|
||||||
|
} // namespace Pica
|
||||||
|
|
||||||
|
namespace SwRenderer {
|
||||||
|
|
||||||
|
struct TevConfigKey {
|
||||||
|
explicit TevConfigKey(const Pica::TexturingRegs& regs);
|
||||||
|
|
||||||
|
u64 Hash() const noexcept {
|
||||||
|
return Common::ComputeHash64(this, sizeof(TevConfigKey));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<Pica::TexturingRegs::TevStageConfig, 6> stages;
|
||||||
|
};
|
||||||
|
|
||||||
|
class TevConfig : public Xbyak::CodeGenerator {
|
||||||
|
public:
|
||||||
|
explicit TevConfig(const Pica::Regs& regs, const TevConfigKey& key);
|
||||||
|
~TevConfig();
|
||||||
|
|
||||||
|
Common::Vec4<u8> Run(std::span<Common::Vec4<u8>, 4> texture_color_,
|
||||||
|
Common::Vec4<u8> primary_color_, Common::Vec4<u8> primary_fragment_color_,
|
||||||
|
Common::Vec4<u8> secondary_fragment_color_, u64 tev_combiner_buffer_color);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void WriteTevConfig(const TevConfigKey& key);
|
||||||
|
|
||||||
|
void GetColorModifier(const Xbyak::Xmm& dest,
|
||||||
|
Pica::TexturingRegs::TevStageConfig::ColorModifier factor);
|
||||||
|
|
||||||
|
void GetAlphaModifier(const Xbyak::Xmm& src, const Xbyak::Reg32& dest,
|
||||||
|
Pica::TexturingRegs::TevStageConfig::AlphaModifier factor);
|
||||||
|
|
||||||
|
void ColorCombine(const Xbyak::Xmm& dest, Pica::TexturingRegs::TevStageConfig::Operation op);
|
||||||
|
|
||||||
|
void AlphaCombine(const Xbyak::Reg32& dest, Pica::TexturingRegs::TevStageConfig::Operation op);
|
||||||
|
|
||||||
|
private:
|
||||||
|
const Pica::Regs& regs;
|
||||||
|
|
||||||
|
using CompiledTevFun = u32(u32* texture_color, u32 primary_color, u32 primary_fragment_color,
|
||||||
|
u64 secondary_fragment_color_and_tev_combiner_buffer_color);
|
||||||
|
|
||||||
|
CompiledTevFun* program = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
using TevCache = std::unordered_map<u64, std::unique_ptr<TevConfig>, Common::IdentityHash<u64>>;
|
||||||
|
|
||||||
|
} // namespace SwRenderer
|
Loading…
Reference in a new issue