From 6b7ebb3f82ae9674f2a4d66e870a53102b412003 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 21 Mar 2016 02:48:40 -0400 Subject: [PATCH 01/33] hle: Get rid of global access to g_reschedule This shouldn't be directly exposed if there's already a partial API that operates on it. We can just provide the rest of that API. --- src/core/core.cpp | 2 +- src/core/hle/hle.cpp | 20 ++++++++++++++++---- src/core/hle/hle.h | 4 ++-- src/core/hle/kernel/thread.cpp | 3 ++- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index 84d6c392e..609ca860d 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -52,7 +52,7 @@ void RunLoop(int tight_loop) { } HW::Update(); - if (HLE::g_reschedule) { + if (HLE::RescheduleIsPending()) { Kernel::Reschedule(); } } diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp index 331b1b22a..3b1369800 100644 --- a/src/core/hle/hle.cpp +++ b/src/core/hle/hle.cpp @@ -14,9 +14,13 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// -namespace HLE { +namespace { -bool g_reschedule; ///< If true, immediately reschedules the CPU to a new thread +bool reschedule; ///< If true, immediately reschedules the CPU to a new thread + +} + +namespace HLE { void Reschedule(const char *reason) { DEBUG_ASSERT_MSG(reason != nullptr && strlen(reason) < 256, "Reschedule: Invalid or too long reason."); @@ -29,13 +33,21 @@ void Reschedule(const char *reason) { Core::g_app_core->PrepareReschedule(); - g_reschedule = true; + reschedule = true; +} + +bool RescheduleIsPending() { + return reschedule; +} + +void DoneRescheduling() { + reschedule = false; } void Init() { Service::Init(); - g_reschedule = false; + reschedule = false; LOG_DEBUG(Kernel, "initialized OK"); } diff --git a/src/core/hle/hle.h b/src/core/hle/hle.h index e0b97797c..58dffe587 100644 --- a/src/core/hle/hle.h +++ b/src/core/hle/hle.h @@ -13,9 +13,9 @@ const Handle INVALID_HANDLE = 0; namespace HLE { -extern bool g_reschedule; ///< If true, immediately reschedules the CPU to a new thread - void Reschedule(const char *reason); +bool RescheduleIsPending(); +void DoneRescheduling(); void Init(); void Shutdown(); diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index bf32f653d..6dc95d0f1 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -483,7 +483,8 @@ void Reschedule() { Thread* cur = GetCurrentThread(); Thread* next = PopNextReadyThread(); - HLE::g_reschedule = false; + + HLE::DoneRescheduling(); // Don't bother switching to the same thread if (next == cur) From 5ec1140f8b930c2f3da776e599ac0fc94653a532 Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Tue, 3 May 2016 12:34:52 +0200 Subject: [PATCH 02/33] OpenGL: Don't copy const_color (Reverts #1745) --- src/video_core/renderer_opengl/gl_rasterizer.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 63ff7716d..82fa61742 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -49,7 +49,9 @@ struct PicaShaderConfig { res.alpha_test_func = regs.output_merger.alpha_test.enable ? regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; - // Copy tev stages + // Copy relevant tev stages fields. + // We don't sync const_color here because of the high variance, it is a + // shader uniform instead. const auto& tev_stages = regs.GetTevStages(); DEBUG_ASSERT(res.tev_stages.size() == tev_stages.size()); for (size_t i = 0; i < tev_stages.size(); i++) { @@ -57,7 +59,6 @@ struct PicaShaderConfig { res.tev_stages[i].sources_raw = tev_stage.sources_raw; res.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; res.tev_stages[i].ops_raw = tev_stage.ops_raw; - res.tev_stages[i].const_color = tev_stage.const_color; res.tev_stages[i].scales_raw = tev_stage.scales_raw; } From f3f7018c9e4c398d50902eb1011ad9a731bdbc3b Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Sat, 23 Apr 2016 15:19:41 +0200 Subject: [PATCH 03/33] Pica: Make PicaShaderConfig trivially_copyable and clear it before use --- .../renderer_opengl/gl_rasterizer.h | 49 +++++++++++-------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 82fa61742..47fd40f97 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -41,9 +41,12 @@ struct ScreenInfo; * two separate shaders sharing the same key. */ struct PicaShaderConfig { + /// Construct a PicaShaderConfig with the current Pica register configuration. static PicaShaderConfig CurrentConfig() { PicaShaderConfig res; + std::memset(&res, 0, sizeof(PicaShaderConfig)); + const auto& regs = Pica::g_state.regs; res.alpha_test_func = regs.output_merger.alpha_test.enable ? @@ -134,38 +137,42 @@ struct PicaShaderConfig { return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; }; - Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; - std::array tev_stages = {}; - u8 combiner_buffer_input = 0; + Pica::Regs::CompareFunc alpha_test_func; + std::array tev_stages; + u8 combiner_buffer_input; struct { struct { - unsigned num = 0; - bool directional = false; - bool two_sided_diffuse = false; - bool dist_atten_enable = false; - GLfloat dist_atten_scale = 0.0f; - GLfloat dist_atten_bias = 0.0f; + unsigned num; + bool directional; + bool two_sided_diffuse; + bool dist_atten_enable; + GLfloat dist_atten_scale; + GLfloat dist_atten_bias; } light[8]; - bool enable = false; - unsigned src_num = 0; - Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None; - unsigned bump_selector = 0; - bool bump_renorm = false; - bool clamp_highlights = false; + bool enable; + unsigned src_num; + Pica::Regs::LightingBumpMode bump_mode; + unsigned bump_selector; + bool bump_renorm; + bool clamp_highlights; - Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; - Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None; + Pica::Regs::LightingConfig config; + Pica::Regs::LightingFresnelSelector fresnel_selector; struct { - bool enable = false; - bool abs_input = false; - Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; - float scale = 1.0f; + bool enable; + bool abs_input; + Pica::Regs::LightingLutInput type; + float scale; } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; } lighting; + }; +#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) +static_assert(std::is_trivially_copyable::value, "PicaShaderConfig must be trivially copyable"); +#endif namespace std { From 5fc8eb227a6a70b0cc9b268fb9d2f693af0d8fec Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Sat, 30 Apr 2016 10:45:17 +0200 Subject: [PATCH 04/33] Pica: Add TevStageConfigRaw to PicaShaderConfig (MSVC workaround) --- .../renderer_opengl/gl_rasterizer.h | 23 ++++++++++++++++++- .../renderer_opengl/gl_shader_gen.cpp | 2 +- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 47fd40f97..cc12a5f62 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -138,7 +138,28 @@ struct PicaShaderConfig { }; Pica::Regs::CompareFunc alpha_test_func; - std::array tev_stages; + + // NOTE: MSVC15 (Update 2) doesn't think `delete`'d constructors and operators are TC. + // This makes BitField not TC when used in a union or struct so we have to resort + // to this ugly hack. + // Once that bug is fixed we can use Pica::Regs::TevStageConfig here. + // Doesn't include const_color because we don't sync it, see comment in CurrentConfig() + struct TevStageConfigRaw { + u32 sources_raw; + u32 modifiers_raw; + u32 ops_raw; + u32 scales_raw; + explicit operator Pica::Regs::TevStageConfig() const noexcept { + Pica::Regs::TevStageConfig stage; + stage.sources_raw = sources_raw; + stage.modifiers_raw = modifiers_raw; + stage.ops_raw = ops_raw; + stage.const_color = 0; + stage.scales_raw = scales_raw; + return stage; + } + }; + std::array tev_stages; u8 combiner_buffer_input; struct { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9011caa39..51984389c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -287,7 +287,7 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) { /// Writes the code to emulate the specified TEV stage static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { - auto& stage = config.tev_stages[index]; + const auto stage = static_cast(config.tev_stages[index]); if (!IsPassThroughTevStage(stage)) { std::string index_name = std::to_string(index); From f74652d2fe0f1b10ad15066bacd47c9ba00bed09 Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Sun, 1 May 2016 23:28:39 +0200 Subject: [PATCH 05/33] Pica: Use a union for PicaShaderConfig --- .../renderer_opengl/gl_rasterizer.h | 168 ++++++++++-------- .../renderer_opengl/gl_shader_gen.cpp | 78 ++++---- .../renderer_opengl/gl_shader_gen.h | 2 +- 3 files changed, 131 insertions(+), 117 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index cc12a5f62..4f9a032fb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -39,17 +39,24 @@ struct ScreenInfo; * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) * two separate shaders sharing the same key. + * + * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X." + * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X." + * = Bytewise copy instead of memberwise copy. + * This is important because the padding bytes are included in the hash and comparison between objects. */ -struct PicaShaderConfig { +union PicaShaderConfig { /// Construct a PicaShaderConfig with the current Pica register configuration. static PicaShaderConfig CurrentConfig() { PicaShaderConfig res; - std::memset(&res, 0, sizeof(PicaShaderConfig)); + + auto& state = res.state; + std::memset(&state, 0, sizeof(PicaShaderConfig::State)); const auto& regs = Pica::g_state.regs; - res.alpha_test_func = regs.output_merger.alpha_test.enable ? + state.alpha_test_func = regs.output_merger.alpha_test.enable ? regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; // Copy relevant tev stages fields. @@ -59,86 +66,84 @@ struct PicaShaderConfig { DEBUG_ASSERT(res.tev_stages.size() == tev_stages.size()); for (size_t i = 0; i < tev_stages.size(); i++) { const auto& tev_stage = tev_stages[i]; - res.tev_stages[i].sources_raw = tev_stage.sources_raw; - res.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; - res.tev_stages[i].ops_raw = tev_stage.ops_raw; - res.tev_stages[i].scales_raw = tev_stage.scales_raw; + state.tev_stages[i].sources_raw = tev_stage.sources_raw; + state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; + state.tev_stages[i].ops_raw = tev_stage.ops_raw; + state.tev_stages[i].scales_raw = tev_stage.scales_raw; } - res.combiner_buffer_input = + state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; // Fragment lighting - res.lighting.enable = !regs.lighting.disable; - res.lighting.src_num = regs.lighting.num_lights + 1; + state.lighting.enable = !regs.lighting.disable; + state.lighting.src_num = regs.lighting.num_lights + 1; - for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { + for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { unsigned num = regs.lighting.light_enable.GetNum(light_index); const auto& light = regs.lighting.light[num]; - res.lighting.light[light_index].num = num; - res.lighting.light[light_index].directional = light.directional != 0; - res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; - res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); - res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); - res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); + state.lighting.light[light_index].num = num; + state.lighting.light[light_index].directional = light.directional != 0; + state.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; + state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); + state.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); + state.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); } - res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; - res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; - res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); - res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); + state.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; + state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; + state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); + state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); - res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; - res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; - res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); - res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + state.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; + state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; + state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); + state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); - res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; - res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; - res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); - res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); + state.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; + state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; + state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); + state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); - res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; - res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; - res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); - res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); + state.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; + state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; + state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); + state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); - res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; - res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; - res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); - res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); + state.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; + state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; + state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); + state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); - res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; - res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; - res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); - res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); + state.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; + state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; + state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); + state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); - res.lighting.config = regs.lighting.config; - res.lighting.fresnel_selector = regs.lighting.fresnel_selector; - res.lighting.bump_mode = regs.lighting.bump_mode; - res.lighting.bump_selector = regs.lighting.bump_selector; - res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; - res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; + state.lighting.config = regs.lighting.config; + state.lighting.fresnel_selector = regs.lighting.fresnel_selector; + state.lighting.bump_mode = regs.lighting.bump_mode; + state.lighting.bump_selector = regs.lighting.bump_selector; + state.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; + state.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; return res; } bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { - return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index)); + return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); } bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { - return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index)); + return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); } bool operator ==(const PicaShaderConfig& o) const { - return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; + return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0; }; - Pica::Regs::CompareFunc alpha_test_func; - // NOTE: MSVC15 (Update 2) doesn't think `delete`'d constructors and operators are TC. // This makes BitField not TC when used in a union or struct so we have to resort // to this ugly hack. @@ -159,40 +164,45 @@ struct PicaShaderConfig { return stage; } }; - std::array tev_stages; - u8 combiner_buffer_input; - struct { - struct { - unsigned num; - bool directional; - bool two_sided_diffuse; - bool dist_atten_enable; - GLfloat dist_atten_scale; - GLfloat dist_atten_bias; - } light[8]; + struct State { - bool enable; - unsigned src_num; - Pica::Regs::LightingBumpMode bump_mode; - unsigned bump_selector; - bool bump_renorm; - bool clamp_highlights; - - Pica::Regs::LightingConfig config; - Pica::Regs::LightingFresnelSelector fresnel_selector; + Pica::Regs::CompareFunc alpha_test_func; + std::array tev_stages; + u8 combiner_buffer_input; struct { + struct { + unsigned num; + bool directional; + bool two_sided_diffuse; + bool dist_atten_enable; + GLfloat dist_atten_scale; + GLfloat dist_atten_bias; + } light[8]; + bool enable; - bool abs_input; - Pica::Regs::LightingLutInput type; - float scale; - } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; - } lighting; + unsigned src_num; + Pica::Regs::LightingBumpMode bump_mode; + unsigned bump_selector; + bool bump_renorm; + bool clamp_highlights; + Pica::Regs::LightingConfig config; + Pica::Regs::LightingFresnelSelector fresnel_selector; + + struct { + bool enable; + bool abs_input; + Pica::Regs::LightingLutInput type; + float scale; + } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; + } lighting; + + } state; }; #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) -static_assert(std::is_trivially_copyable::value, "PicaShaderConfig must be trivially copyable"); +static_assert(std::is_trivially_copyable::value, "PicaShaderConfig::State must be trivially copyable"); #endif namespace std { @@ -200,7 +210,7 @@ namespace std { template <> struct hash { size_t operator()(const PicaShaderConfig& k) const { - return Common::ComputeHash64(&k, sizeof(PicaShaderConfig)); + return Common::ComputeHash64(&k.state, sizeof(PicaShaderConfig::State)); } }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 51984389c..0890adb12 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -287,7 +287,7 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) { /// Writes the code to emulate the specified TEV stage static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { - const auto stage = static_cast(config.tev_stages[index]); + const auto stage = static_cast(config.state.tev_stages[index]); if (!IsPassThroughTevStage(stage)) { std::string index_name = std::to_string(index); @@ -331,6 +331,8 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi /// Writes the code to emulate fragment lighting static void WriteLighting(std::string& out, const PicaShaderConfig& config) { + const auto& lighting = config.state.lighting; + // Define lighting globals out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" @@ -338,17 +340,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { "vec3 refl_value = vec3(0.0);\n"; // Compute fragment normals - if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { + if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture - std::string bump_selector = std::to_string(config.lighting.bump_selector); + std::string bump_selector = std::to_string(lighting.bump_selector); out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result - if (config.lighting.bump_renorm) { + if (lighting.bump_renorm) { std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; } - } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { + } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { // Bump mapping is enabled using a tangent map LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); UNIMPLEMENTED(); @@ -361,7 +363,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; // Gets the index into the specified lookup table for specular lighting - auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { + auto GetLutIndex = [&lighting](unsigned light_num, Regs::LightingLutInput input, bool abs) { const std::string half_angle = "normalize(normalize(view) + light_vector)"; std::string index; switch (input) { @@ -389,7 +391,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (abs) { // LUT index is in the range of (0.0, 1.0) - index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; + index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; } else { // LUT index is in the range of (-1.0, 1.0) @@ -407,8 +409,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { }; // Write the code to emulate each enabled light - for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { - const auto& light_config = config.lighting.light[light_index]; + for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) { + const auto& light_config = lighting.light[light_index]; std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; // Compute light vector (directional or positional) @@ -432,39 +434,39 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { } // If enabled, clamp specular component if lighting result is negative - std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; + std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; // Specular 0 component std::string d0_lut_value = "1.0"; - if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { + if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) { // Lookup specular "distribution 0" LUT value - std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); - d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; + std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); + d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; } std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; // If enabled, lookup ReflectRed value, otherwise, 1.0 is used - if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { - std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; + if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { + std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; out += "refl_value.r = " + value + ";\n"; } else { out += "refl_value.r = 1.0;\n"; } // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used - if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { - std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; + if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { + std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; out += "refl_value.g = " + value + ";\n"; } else { out += "refl_value.g = refl_value.r;\n"; } // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used - if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { - std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; + if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { + std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; out += "refl_value.b = " + value + ";\n"; } else { out += "refl_value.b = refl_value.r;\n"; @@ -472,27 +474,27 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // Specular 1 component std::string d1_lut_value = "1.0"; - if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { + if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) { // Lookup specular "distribution 1" LUT value - std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); - d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; + std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); + d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; } std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; // Fresnel - if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { + if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value - std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; + std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); + std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; // Enabled for difffuse lighting alpha component - if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || - config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || + lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) out += "diffuse_sum.a *= " + value + ";\n"; // Enabled for the specular lighting alpha component - if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || - config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || + lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) out += "specular_sum.a *= " + value + ";\n"; } @@ -510,6 +512,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { } std::string GenerateFragmentShader(const PicaShaderConfig& config) { + const auto& state = config.state; + std::string out = R"( #version 330 core #define NUM_TEV_STAGES 6 @@ -555,24 +559,24 @@ vec4 secondary_fragment_color = vec4(0.0); )"; // Do not do any sort of processing if it's obvious we're not going to pass the alpha test - if (config.alpha_test_func == Regs::CompareFunc::Never) { + if (state.alpha_test_func == Regs::CompareFunc::Never) { out += "discard; }"; return out; } - if (config.lighting.enable) + if (state.lighting.enable) WriteLighting(out, config); out += "vec4 combiner_buffer = vec4(0.0);\n"; out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; out += "vec4 last_tex_env_out = vec4(0.0);\n"; - for (size_t index = 0; index < config.tev_stages.size(); ++index) + for (size_t index = 0; index < state.tev_stages.size(); ++index) WriteTevStage(out, config, (unsigned)index); - if (config.alpha_test_func != Regs::CompareFunc::Always) { + if (state.alpha_test_func != Regs::CompareFunc::Always) { out += "if ("; - AppendAlphaTestCondition(out, config.alpha_test_func); + AppendAlphaTestCondition(out, state.alpha_test_func); out += ") discard;\n"; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 3eb07d57a..bef3249cf 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -6,7 +6,7 @@ #include -struct PicaShaderConfig; +union PicaShaderConfig; namespace GLShader { From b242bdf9458642201bab4f1f884556ef73051554 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 25 Apr 2016 08:54:57 +0100 Subject: [PATCH 06/33] DSP/HLE: Implement Source processing --- src/audio_core/CMakeLists.txt | 2 + src/audio_core/hle/common.h | 2 +- src/audio_core/hle/dsp.cpp | 24 +++ src/audio_core/hle/dsp.h | 8 +- src/audio_core/hle/filter.h | 1 + src/audio_core/hle/source.cpp | 320 ++++++++++++++++++++++++++++++++++ src/audio_core/hle/source.h | 144 +++++++++++++++ 7 files changed, 496 insertions(+), 5 deletions(-) create mode 100644 src/audio_core/hle/source.cpp create mode 100644 src/audio_core/hle/source.h diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index 5a2747e78..4cd7aba67 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -4,6 +4,7 @@ set(SRCS hle/dsp.cpp hle/filter.cpp hle/pipe.cpp + hle/source.cpp interpolate.cpp sink_details.cpp ) @@ -15,6 +16,7 @@ set(HEADERS hle/dsp.h hle/filter.h hle/pipe.h + hle/source.h interpolate.h null_sink.h sink.h diff --git a/src/audio_core/hle/common.h b/src/audio_core/hle/common.h index 7910f42ae..596b67eaf 100644 --- a/src/audio_core/hle/common.h +++ b/src/audio_core/hle/common.h @@ -27,7 +27,7 @@ using QuadFrame32 = std::array, samples_per_frame>; */ template void FilterFrame(FrameT& frame, FilterT& filter) { - std::transform(frame.begin(), frame.end(), frame.begin(), [&filter](const typename FrameT::value_type& sample) { + std::transform(frame.begin(), frame.end(), frame.begin(), [&filter](const auto& sample) { return filter.ProcessSample(sample); }); } diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp index 4d44bd2d9..0cdbdb06a 100644 --- a/src/audio_core/hle/dsp.cpp +++ b/src/audio_core/hle/dsp.cpp @@ -2,10 +2,12 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "audio_core/hle/dsp.h" #include "audio_core/hle/pipe.h" +#include "audio_core/hle/source.h" #include "audio_core/sink.h" namespace DSP { @@ -38,16 +40,38 @@ static SharedMemory& WriteRegion() { return g_regions[1 - CurrentRegionIndex()]; } +static std::array sources = { + Source(0), Source(1), Source(2), Source(3), Source(4), Source(5), + Source(6), Source(7), Source(8), Source(9), Source(10), Source(11), + Source(12), Source(13), Source(14), Source(15), Source(16), Source(17), + Source(18), Source(19), Source(20), Source(21), Source(22), Source(23) +}; + static std::unique_ptr sink; void Init() { DSP::HLE::ResetPipes(); + for (auto& source : sources) { + source.Reset(); + } } void Shutdown() { } bool Tick() { + SharedMemory& read = ReadRegion(); + SharedMemory& write = WriteRegion(); + + std::array intermediate_mixes = {}; + + for (size_t i = 0; i < num_sources; i++) { + write.source_statuses.status[i] = sources[i].Tick(read.source_configurations.config[i], read.adpcm_coefficients.coeff[i]); + for (size_t mix = 0; mix < 3; mix++) { + sources[i].MixInto(intermediate_mixes[mix], mix); + } + } + return true; } diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h index 4f2410c27..4459a5668 100644 --- a/src/audio_core/hle/dsp.h +++ b/src/audio_core/hle/dsp.h @@ -169,9 +169,9 @@ struct SourceConfiguration { float_le rate_multiplier; enum class InterpolationMode : u8 { - None = 0, + Polyphase = 0, Linear = 1, - Polyphase = 2 + None = 2 }; InterpolationMode interpolation_mode; @@ -318,10 +318,10 @@ ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20); struct SourceStatus { struct Status { u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) - u8 previous_buffer_id_dirty; ///< Non-zero when previous_buffer_id changes + u8 current_buffer_id_dirty; ///< Non-zero when current_buffer_id changes u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync u32_dsp buffer_position; ///< Number of samples into the current buffer - u16_le previous_buffer_id; ///< Updated when a buffer finishes playing + u16_le current_buffer_id; ///< Updated when a buffer finishes playing INSERT_PADDING_DSPWORDS(1); }; diff --git a/src/audio_core/hle/filter.h b/src/audio_core/hle/filter.h index 75738f600..43d2035cd 100644 --- a/src/audio_core/hle/filter.h +++ b/src/audio_core/hle/filter.h @@ -16,6 +16,7 @@ namespace HLE { /// Preprocessing filters. There is an independent set of filters for each Source. class SourceFilters final { +public: SourceFilters() { Reset(); } /// Reset internal state. diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp new file mode 100644 index 000000000..daaf6e3f3 --- /dev/null +++ b/src/audio_core/hle/source.cpp @@ -0,0 +1,320 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "audio_core/codec.h" +#include "audio_core/hle/common.h" +#include "audio_core/hle/source.h" +#include "audio_core/interpolate.h" + +#include "common/assert.h" +#include "common/logging/log.h" + +#include "core/memory.h" + +namespace DSP { +namespace HLE { + +SourceStatus::Status Source::Tick(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]) { + ParseConfig(config, adpcm_coeffs); + + if (state.enabled) { + GenerateFrame(); + } + + return GetCurrentStatus(); +} + +void Source::MixInto(QuadFrame32& dest, size_t intermediate_mix_id) const { + if (!state.enabled) + return; + + const std::array& gains = state.gain.at(intermediate_mix_id); + for (size_t samplei = 0; samplei < samples_per_frame; samplei++) { + // Conversion from stereo (current_frame) to quadraphonic (dest) occurs here. + dest[samplei][0] += static_cast(gains[0] * current_frame[samplei][0]); + dest[samplei][1] += static_cast(gains[1] * current_frame[samplei][1]); + dest[samplei][2] += static_cast(gains[2] * current_frame[samplei][0]); + dest[samplei][3] += static_cast(gains[3] * current_frame[samplei][1]); + } +} + +void Source::Reset() { + current_frame.fill({}); + state = {}; +} + +void Source::ParseConfig(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]) { + if (!config.dirty_raw) { + return; + } + + if (config.reset_flag) { + config.reset_flag.Assign(0); + Reset(); + LOG_TRACE(Audio_DSP, "source_id=%zu reset", source_id); + } + + if (config.partial_reset_flag) { + config.partial_reset_flag.Assign(0); + state.input_queue = std::priority_queue, BufferOrder>{}; + LOG_TRACE(Audio_DSP, "source_id=%zu partial_reset", source_id); + } + + if (config.enable_dirty) { + config.enable_dirty.Assign(0); + state.enabled = config.enable != 0; + LOG_TRACE(Audio_DSP, "source_id=%zu enable=%d", source_id, state.enabled); + } + + if (config.sync_dirty) { + config.sync_dirty.Assign(0); + state.sync = config.sync; + LOG_TRACE(Audio_DSP, "source_id=%zu sync=%u", source_id, state.sync); + } + + if (config.rate_multiplier_dirty) { + config.rate_multiplier_dirty.Assign(0); + state.rate_multiplier = config.rate_multiplier; + LOG_TRACE(Audio_DSP, "source_id=%zu rate=%f", source_id, state.rate_multiplier); + + if (state.rate_multiplier <= 0) { + LOG_ERROR(Audio_DSP, "Was given an invalid rate multiplier: source_id=%zu rate=%f", source_id, state.rate_multiplier); + state.rate_multiplier = 1.0f; + // Note: Actual firmware starts producing garbage if this occurs. + } + } + + if (config.adpcm_coefficients_dirty) { + config.adpcm_coefficients_dirty.Assign(0); + std::transform(adpcm_coeffs, adpcm_coeffs + state.adpcm_coeffs.size(), state.adpcm_coeffs.begin(), + [](const auto& coeff) { return static_cast(coeff); }); + LOG_TRACE(Audio_DSP, "source_id=%zu adpcm update", source_id); + } + + if (config.gain_0_dirty) { + config.gain_0_dirty.Assign(0); + std::transform(config.gain[0], config.gain[0] + state.gain[0].size(), state.gain[0].begin(), + [](const auto& coeff) { return static_cast(coeff); }); + LOG_TRACE(Audio_DSP, "source_id=%zu gain 0 update", source_id); + } + + if (config.gain_1_dirty) { + config.gain_1_dirty.Assign(0); + std::transform(config.gain[1], config.gain[1] + state.gain[1].size(), state.gain[1].begin(), + [](const auto& coeff) { return static_cast(coeff); }); + LOG_TRACE(Audio_DSP, "source_id=%zu gain 1 update", source_id); + } + + if (config.gain_2_dirty) { + config.gain_2_dirty.Assign(0); + std::transform(config.gain[2], config.gain[2] + state.gain[2].size(), state.gain[2].begin(), + [](const auto& coeff) { return static_cast(coeff); }); + LOG_TRACE(Audio_DSP, "source_id=%zu gain 2 update", source_id); + } + + if (config.filters_enabled_dirty) { + config.filters_enabled_dirty.Assign(0); + state.filters.Enable(config.simple_filter_enabled.ToBool(), config.biquad_filter_enabled.ToBool()); + LOG_TRACE(Audio_DSP, "source_id=%zu enable_simple=%hu enable_biquad=%hu", + source_id, config.simple_filter_enabled.Value(), config.biquad_filter_enabled.Value()); + } + + if (config.simple_filter_dirty) { + config.simple_filter_dirty.Assign(0); + state.filters.Configure(config.simple_filter); + LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update"); + } + + if (config.biquad_filter_dirty) { + config.biquad_filter_dirty.Assign(0); + state.filters.Configure(config.biquad_filter); + LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update"); + } + + if (config.interpolation_dirty) { + config.interpolation_dirty.Assign(0); + state.interpolation_mode = config.interpolation_mode; + LOG_TRACE(Audio_DSP, "source_id=%zu interpolation_mode=%zu", source_id, static_cast(state.interpolation_mode)); + } + + if (config.format_dirty || config.embedded_buffer_dirty) { + config.format_dirty.Assign(0); + state.format = config.format; + LOG_TRACE(Audio_DSP, "source_id=%zu format=%zu", source_id, static_cast(state.format)); + } + + if (config.mono_or_stereo_dirty || config.embedded_buffer_dirty) { + config.mono_or_stereo_dirty.Assign(0); + state.mono_or_stereo = config.mono_or_stereo; + LOG_TRACE(Audio_DSP, "source_id=%zu mono_or_stereo=%zu", source_id, static_cast(state.mono_or_stereo)); + } + + if (config.embedded_buffer_dirty) { + config.embedded_buffer_dirty.Assign(0); + state.input_queue.emplace(Buffer{ + config.physical_address, + config.length, + static_cast(config.adpcm_ps), + { config.adpcm_yn[0], config.adpcm_yn[1] }, + config.adpcm_dirty.ToBool(), + config.is_looping.ToBool(), + config.buffer_id, + state.mono_or_stereo, + state.format, + false + }); + LOG_TRACE(Audio_DSP, "enqueuing embedded addr=0x%08x len=%u id=%hu", config.physical_address, config.length, config.buffer_id); + } + + if (config.buffer_queue_dirty) { + config.buffer_queue_dirty.Assign(0); + for (size_t i = 0; i < 4; i++) { + if (config.buffers_dirty & (1 << i)) { + const auto& b = config.buffers[i]; + state.input_queue.emplace(Buffer{ + b.physical_address, + b.length, + static_cast(b.adpcm_ps), + { b.adpcm_yn[0], b.adpcm_yn[1] }, + b.adpcm_dirty != 0, + b.is_looping != 0, + b.buffer_id, + state.mono_or_stereo, + state.format, + true + }); + LOG_TRACE(Audio_DSP, "enqueuing queued %zu addr=0x%08x len=%u id=%hu", i, b.physical_address, b.length, b.buffer_id); + } + } + config.buffers_dirty = 0; + } + + if (config.dirty_raw) { + LOG_DEBUG(Audio_DSP, "source_id=%zu remaining_dirty=%x", source_id, config.dirty_raw); + } + + config.dirty_raw = 0; +} + +void Source::GenerateFrame() { + current_frame.fill({}); + + if (state.current_buffer.empty() && !DequeueBuffer()) { + state.enabled = false; + state.buffer_update = true; + state.current_buffer_id = 0; + return; + } + + size_t frame_position = 0; + + state.current_sample_number = state.next_sample_number; + while (frame_position < current_frame.size()) { + if (state.current_buffer.empty() && !DequeueBuffer()) { + break; + } + + const size_t size_to_copy = std::min(state.current_buffer.size(), current_frame.size() - frame_position); + + std::copy(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy, current_frame.begin() + frame_position); + state.current_buffer.erase(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy); + + frame_position += size_to_copy; + state.next_sample_number += static_cast(size_to_copy); + } + + state.filters.ProcessFrame(current_frame); +} + + +bool Source::DequeueBuffer() { + ASSERT_MSG(state.current_buffer.empty(), "Shouldn't dequeue; we still have data in current_buffer"); + + if (state.input_queue.empty()) + return false; + + const Buffer buf = state.input_queue.top(); + state.input_queue.pop(); + + if (buf.adpcm_dirty) { + state.adpcm_state.yn1 = buf.adpcm_yn[0]; + state.adpcm_state.yn2 = buf.adpcm_yn[1]; + } + + if (buf.is_looping) { + LOG_ERROR(Audio_DSP, "Looped buffers are unimplemented at the moment"); + } + + const u8* const memory = Memory::GetPhysicalPointer(buf.physical_address); + if (memory) { + const unsigned num_channels = buf.mono_or_stereo == MonoOrStereo::Stereo ? 2 : 1; + switch (buf.format) { + case Format::PCM8: + state.current_buffer = Codec::DecodePCM8(num_channels, memory, buf.length); + break; + case Format::PCM16: + state.current_buffer = Codec::DecodePCM16(num_channels, memory, buf.length); + break; + case Format::ADPCM: + DEBUG_ASSERT(num_channels == 1); + state.current_buffer = Codec::DecodeADPCM(memory, buf.length, state.adpcm_coeffs, state.adpcm_state); + break; + default: + UNIMPLEMENTED(); + break; + } + } else { + LOG_WARNING(Audio_DSP, "source_id=%zu buffer_id=%hu length=%u: Invalid physical address 0x%08X", + source_id, buf.buffer_id, buf.length, buf.physical_address); + state.current_buffer.clear(); + return true; + } + + switch (state.interpolation_mode) { + case InterpolationMode::None: + state.current_buffer = AudioInterp::None(state.interp_state, state.current_buffer, state.rate_multiplier); + break; + case InterpolationMode::Linear: + state.current_buffer = AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier); + break; + case InterpolationMode::Polyphase: + // TODO(merry): Implement polyphase interpolation + state.current_buffer = AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier); + break; + default: + UNIMPLEMENTED(); + break; + } + + state.current_sample_number = 0; + state.next_sample_number = 0; + state.current_buffer_id = buf.buffer_id; + state.buffer_update = buf.from_queue; + + LOG_TRACE(Audio_DSP, "source_id=%zu buffer_id=%hu from_queue=%s current_buffer.size()=%zu", + source_id, buf.buffer_id, buf.from_queue ? "true" : "false", state.current_buffer.size()); + return true; +} + +SourceStatus::Status Source::GetCurrentStatus() { + SourceStatus::Status ret; + + // Applications depend on the correct emulation of + // current_buffer_id_dirty and current_buffer_id to synchronise + // audio with video. + ret.is_enabled = state.enabled; + ret.current_buffer_id_dirty = state.buffer_update ? 1 : 0; + state.buffer_update = false; + ret.current_buffer_id = state.current_buffer_id; + ret.buffer_position = state.current_sample_number; + ret.sync = state.sync; + + return ret; +} + +} // namespace HLE +} // namespace DSP diff --git a/src/audio_core/hle/source.h b/src/audio_core/hle/source.h new file mode 100644 index 000000000..7ee08d424 --- /dev/null +++ b/src/audio_core/hle/source.h @@ -0,0 +1,144 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "audio_core/codec.h" +#include "audio_core/hle/common.h" +#include "audio_core/hle/dsp.h" +#include "audio_core/hle/filter.h" +#include "audio_core/interpolate.h" + +#include "common/common_types.h" + +namespace DSP { +namespace HLE { + +/** + * This module performs: + * - Buffer management + * - Decoding of buffers + * - Buffer resampling and interpolation + * - Per-source filtering (SimpleFilter, BiquadFilter) + * - Per-source gain + * - Other per-source processing + */ +class Source final { +public: + explicit Source(size_t source_id_) : source_id(source_id_) { + Reset(); + } + + /// Resets internal state. + void Reset(); + + /** + * This is called once every audio frame. This performs per-source processing every frame. + * @param config The new configuration we've got for this Source from the application. + * @param adpcm_coeffs ADPCM coefficients to use if config tells us to use them (may contain invalid values otherwise). + * @return The current status of this Source. This is given back to the emulated application via SharedMemory. + */ + SourceStatus::Status Tick(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]); + + /** + * Mix this source's output into dest, using the gains for the `intermediate_mix_id`-th intermediate mixer. + * @param dest The QuadFrame32 to mix into. + * @param intermediate_mix_id The id of the intermediate mix whose gains we are using. + */ + void MixInto(QuadFrame32& dest, size_t intermediate_mix_id) const; + +private: + const size_t source_id; + StereoFrame16 current_frame; + + using Format = SourceConfiguration::Configuration::Format; + using InterpolationMode = SourceConfiguration::Configuration::InterpolationMode; + using MonoOrStereo = SourceConfiguration::Configuration::MonoOrStereo; + + /// Internal representation of a buffer for our buffer queue + struct Buffer { + PAddr physical_address; + u32 length; + u8 adpcm_ps; + std::array adpcm_yn; + bool adpcm_dirty; + bool is_looping; + u16 buffer_id; + + MonoOrStereo mono_or_stereo; + Format format; + + bool from_queue; + }; + + struct BufferOrder { + bool operator() (const Buffer& a, const Buffer& b) const { + // Lower buffer_id comes first. + return a.buffer_id > b.buffer_id; + } + }; + + struct { + + // State variables + + bool enabled = false; + u16 sync = 0; + + // Mixing + + std::array, 3> gain = {}; + + // Buffer queue + + std::priority_queue, BufferOrder> input_queue; + MonoOrStereo mono_or_stereo = MonoOrStereo::Mono; + Format format = Format::ADPCM; + + // Current buffer + + u32 current_sample_number = 0; + u32 next_sample_number = 0; + std::vector> current_buffer; + + // buffer_id state + + bool buffer_update = false; + u32 current_buffer_id = 0; + + // Decoding state + + std::array adpcm_coeffs = {}; + Codec::ADPCMState adpcm_state = {}; + + // Resampling state + + float rate_multiplier = 1.0; + InterpolationMode interpolation_mode = InterpolationMode::Polyphase; + AudioInterp::State interp_state = {}; + + // Filter state + + SourceFilters filters; + + } state; + + // Internal functions + + /// INTERNAL: Update our internal state based on the current config. + void ParseConfig(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]); + /// INTERNAL: Generate the current audio output for this frame based on our internal state. + void GenerateFrame(); + /// INTERNAL: Dequeues a buffer and does preprocessing on it (decoding, resampling). Puts it into current_buffer. + bool DequeueBuffer(); + /// INTERNAL: Generates a SourceStatus::Status based on our internal state. + SourceStatus::Status GetCurrentStatus(); +}; + +} // namespace HLE +} // namespace DSP From 7a77b8356ca0aebbd29b88402c9470dd22fedeab Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Wed, 4 May 2016 10:21:51 +0200 Subject: [PATCH 07/33] Pica: Rename VertexLoaded breakpoint to VertexShaderInvocation --- src/citra_qt/debugger/graphics_breakpoints.cpp | 2 +- src/citra_qt/debugger/graphics_vertex_shader.cpp | 4 ++-- src/video_core/command_processor.cpp | 10 ++++------ src/video_core/debug_utils/debug_utils.h | 2 +- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp index c8510128a..fe66918a8 100644 --- a/src/citra_qt/debugger/graphics_breakpoints.cpp +++ b/src/citra_qt/debugger/graphics_breakpoints.cpp @@ -44,7 +44,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const { Pica::DebugContext::Event::PicaCommandProcessed, tr("Pica command processed") }, { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") }, { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") }, - { Pica::DebugContext::Event::VertexLoaded, tr("Vertex loaded") }, + { Pica::DebugContext::Event::VertexShaderInvocation, tr("Vertex shader invocation") }, { Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") }, { Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") }, { Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") } diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index d648d4640..6e8d7ef42 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp @@ -365,7 +365,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De input_data[i]->setValidator(new QDoubleValidator(input_data[i])); } - breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)")); + breakpoint_warning = new QLabel(tr("(data only available at vertex shader invocation breakpoints)")); // TODO: Add some button for jumping to the shader entry point @@ -454,7 +454,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { auto input = static_cast(data); - if (event == Pica::DebugContext::Event::VertexLoaded) { + if (event == Pica::DebugContext::Event::VertexShaderInvocation) { Reload(true, data); } else { // No vertex data is retrievable => invalidate currently stored vertex data diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index be1a936b2..dd1379503 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -146,10 +146,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { Shader::UnitState shader_unit; Shader::Setup(); - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast(&immediate_input)); - // Send to vertex shader + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast(&immediate_input)); Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); // Send to renderer @@ -272,10 +271,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { Shader::InputVertex input; loader.LoadVertex(base_address, index, vertex, input, memory_accesses); - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); - // Send to vertex shader + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes()); if (is_indexed) { diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index be2d0301a..f628292a4 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -40,7 +40,7 @@ public: PicaCommandProcessed, IncomingPrimitiveBatch, FinishedPrimitiveBatch, - VertexLoaded, + VertexShaderInvocation, IncomingDisplayTransfer, GSPCommandProcessed, BufferSwapped, From 5d5dd66d9222ced82dd61747ef4078fc1eae2496 Mon Sep 17 00:00:00 2001 From: wwylele Date: Thu, 14 Apr 2016 00:04:05 +0300 Subject: [PATCH 08/33] add icon & title to game list --- src/citra_qt/game_list.cpp | 10 +++- src/citra_qt/game_list.h | 2 +- src/citra_qt/game_list_p.h | 106 +++++++++++++++++++++++++++++++++---- src/core/loader/3dsx.cpp | 27 ++++++++++ src/core/loader/3dsx.h | 9 +++- src/core/loader/loader.cpp | 50 ++++++++++------- src/core/loader/loader.h | 57 ++++++++++++++++++++ src/core/loader/ncch.cpp | 22 ++++++-- src/core/loader/ncch.h | 7 +++ 9 files changed, 254 insertions(+), 36 deletions(-) diff --git a/src/citra_qt/game_list.cpp b/src/citra_qt/game_list.cpp index d14532102..32339e6a6 100644 --- a/src/citra_qt/game_list.cpp +++ b/src/citra_qt/game_list.cpp @@ -34,8 +34,8 @@ GameList::GameList(QWidget* parent) tree_view->setUniformRowHeights(true); item_model->insertColumns(0, COLUMN_COUNT); - item_model->setHeaderData(COLUMN_FILE_TYPE, Qt::Horizontal, "File type"); item_model->setHeaderData(COLUMN_NAME, Qt::Horizontal, "Name"); + item_model->setHeaderData(COLUMN_FILE_TYPE, Qt::Horizontal, "File type"); item_model->setHeaderData(COLUMN_SIZE, Qt::Horizontal, "Size"); connect(tree_view, SIGNAL(activated(const QModelIndex&)), this, SLOT(ValidateEntry(const QModelIndex&))); @@ -143,9 +143,15 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, bool d LOG_WARNING(Frontend, "Filetype and extension of file %s do not match.", physical_name.c_str()); } + std::vector smdh; + std::unique_ptr loader = Loader::GetLoader(FileUtil::IOFile(physical_name, "rb"), filetype, filename_filename, physical_name); + + if (loader) + loader->ReadIcon(smdh); + emit EntryReady({ + new GameListItemPath(QString::fromStdString(physical_name), smdh), new GameListItem(QString::fromStdString(Loader::GetFileTypeString(filetype))), - new GameListItemPath(QString::fromStdString(physical_name)), new GameListItemSize(FileUtil::GetSize(physical_name)), }); } diff --git a/src/citra_qt/game_list.h b/src/citra_qt/game_list.h index 48febdc60..198674f04 100644 --- a/src/citra_qt/game_list.h +++ b/src/citra_qt/game_list.h @@ -20,8 +20,8 @@ class GameList : public QWidget { public: enum { - COLUMN_FILE_TYPE, COLUMN_NAME, + COLUMN_FILE_TYPE, COLUMN_SIZE, COLUMN_COUNT, // Number of columns }; diff --git a/src/citra_qt/game_list_p.h b/src/citra_qt/game_list_p.h index 820012bce..284f5da81 100644 --- a/src/citra_qt/game_list_p.h +++ b/src/citra_qt/game_list_p.h @@ -6,13 +6,85 @@ #include +#include #include #include #include #include "citra_qt/util/util.h" #include "common/string_util.h" +#include "common/color.h" +#include "core/loader/loader.h" + +#include "video_core/utils.h" + +/** + * Tests if data is a valid SMDH by its length and magic number. + * @param smdh_data data buffer to test + * @return bool test result + */ +static bool IsValidSMDH(const std::vector& smdh_data) { + if (smdh_data.size() < sizeof(Loader::SMDH)) + return false; + + u32 magic; + memcpy(&magic, smdh_data.data(), 4); + + return Loader::MakeMagic('S', 'M', 'D', 'H') == magic; +} + +/** + * Gets game icon from SMDH + * @param sdmh SMDH data + * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24) + * @return QPixmap game icon + */ +static QPixmap GetIconFromSMDH(const Loader::SMDH& smdh, bool large) { + u32 size; + const u8* icon_data; + + if (large) { + size = 48; + icon_data = smdh.large_icon.data(); + } else { + size = 24; + icon_data = smdh.small_icon.data(); + } + + QImage icon(size, size, QImage::Format::Format_RGB888); + for (u32 x = 0; x < size; ++x) { + for (u32 y = 0; y < size; ++y) { + u32 coarse_y = y & ~7; + auto v = Color::DecodeRGB565( + icon_data + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * size * 2); + icon.setPixel(x, y, qRgb(v.r(), v.g(), v.b())); + } + } + return QPixmap::fromImage(icon); +} + +/** + * Gets the default icon (for games without valid SMDH) + * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24) + * @return QPixmap default icon + */ +static QPixmap GetDefaultIcon(bool large) { + int size = large ? 48 : 24; + QPixmap icon(size, size); + icon.fill(Qt::transparent); + return icon; +} + +/** + * Gets the short game title fromn SMDH + * @param sdmh SMDH data + * @param language title language + * @return QString short title + */ +static QString GetShortTitleFromSMDH(const Loader::SMDH& smdh, Loader::SMDH::TitleLanguage language) { + return QString::fromUtf16(smdh.titles[static_cast(language)].short_title.data()); +} class GameListItem : public QStandardItem { @@ -27,29 +99,43 @@ public: * A specialization of GameListItem for path values. * This class ensures that for every full path value it holds, a correct string representation * of just the filename (with no extension) will be displayed to the user. + * If this class recieves valid SMDH data, it will also display game icons and titles. */ class GameListItemPath : public GameListItem { public: static const int FullPathRole = Qt::UserRole + 1; + static const int TitleRole = Qt::UserRole + 2; GameListItemPath(): GameListItem() {} - GameListItemPath(const QString& game_path): GameListItem() + GameListItemPath(const QString& game_path, const std::vector& smdh_data): GameListItem() { setData(game_path, FullPathRole); + + if (!IsValidSMDH(smdh_data)) { + // SMDH is not valid, set a default icon + setData(GetDefaultIcon(true), Qt::DecorationRole); + return; + } + + Loader::SMDH smdh; + memcpy(&smdh, smdh_data.data(), sizeof(Loader::SMDH)); + + // Get icon from SMDH + setData(GetIconFromSMDH(smdh, true), Qt::DecorationRole); + + // Get title form SMDH + setData(GetShortTitleFromSMDH(smdh, Loader::SMDH::TitleLanguage::English), TitleRole); } - void setData(const QVariant& value, int role) override - { - // By specializing setData for FullPathRole, we can ensure that the two string - // representations of the data are always accurate and in the correct format. - if (role == FullPathRole) { + QVariant data(int role) const override { + if (role == Qt::DisplayRole) { std::string filename; - Common::SplitPath(value.toString().toStdString(), nullptr, &filename, nullptr); - GameListItem::setData(QString::fromStdString(filename), Qt::DisplayRole); - GameListItem::setData(value, FullPathRole); + Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, nullptr); + QString title = data(TitleRole).toString(); + return QString::fromStdString(filename) + (title.isEmpty() ? "" : "\n " + title); } else { - GameListItem::setData(value, role); + return GameListItem::data(role); } } }; diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp index 5fb3b9e2b..48a11ef81 100644 --- a/src/core/loader/3dsx.cpp +++ b/src/core/loader/3dsx.cpp @@ -303,4 +303,31 @@ ResultStatus AppLoader_THREEDSX::ReadRomFS(std::shared_ptr& ro return ResultStatus::ErrorNotUsed; } +ResultStatus AppLoader_THREEDSX::ReadIcon(std::vector& buffer) { + if (!file.IsOpen()) + return ResultStatus::Error; + + // Reset read pointer in case this file has been read before. + file.Seek(0, SEEK_SET); + + THREEDSX_Header hdr; + if (file.ReadBytes(&hdr, sizeof(THREEDSX_Header)) != sizeof(THREEDSX_Header)) + return ResultStatus::Error; + + if (hdr.header_size != sizeof(THREEDSX_Header)) + return ResultStatus::Error; + + // Check if the 3DSX has a SMDH... + if (hdr.smdh_offset != 0) { + file.Seek(hdr.smdh_offset, SEEK_SET); + buffer.resize(hdr.smdh_size); + + if (file.ReadBytes(&buffer[0], hdr.smdh_size) != hdr.smdh_size) + return ResultStatus::Error; + + return ResultStatus::Success; + } + return ResultStatus::ErrorNotUsed; +} + } // namespace Loader diff --git a/src/core/loader/3dsx.h b/src/core/loader/3dsx.h index 365ddb7a5..3ee686703 100644 --- a/src/core/loader/3dsx.h +++ b/src/core/loader/3dsx.h @@ -17,7 +17,7 @@ namespace Loader { /// Loads an 3DSX file class AppLoader_THREEDSX final : public AppLoader { public: - AppLoader_THREEDSX(FileUtil::IOFile&& file, std::string filename, const std::string& filepath) + AppLoader_THREEDSX(FileUtil::IOFile&& file, const std::string& filename, const std::string& filepath) : AppLoader(std::move(file)), filename(std::move(filename)), filepath(filepath) {} /** @@ -33,6 +33,13 @@ public: */ ResultStatus Load() override; + /** + * Get the icon (typically icon section) of the application + * @param buffer Reference to buffer to store data + * @return ResultStatus result of function + */ + ResultStatus ReadIcon(std::vector& buffer) override; + /** * Get the RomFS of the application * @param romfs_file Reference to buffer to store data diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp index 886501c41..0d4c1d351 100644 --- a/src/core/loader/loader.cpp +++ b/src/core/loader/loader.cpp @@ -90,6 +90,28 @@ const char* GetFileTypeString(FileType type) { return "unknown"; } +std::unique_ptr GetLoader(FileUtil::IOFile&& file, FileType type, + const std::string& filename, const std::string& filepath) { + switch (type) { + + // 3DSX file format. + case FileType::THREEDSX: + return std::make_unique(std::move(file), filename, filepath); + + // Standard ELF file format. + case FileType::ELF: + return std::make_unique(std::move(file), filename); + + // NCCH/NCSD container formats. + case FileType::CXI: + case FileType::CCI: + return std::make_unique(std::move(file), filepath); + + default: + return std::unique_ptr(); + } +} + ResultStatus LoadFile(const std::string& filename) { FileUtil::IOFile file(filename, "rb"); if (!file.IsOpen()) { @@ -111,15 +133,19 @@ ResultStatus LoadFile(const std::string& filename) { LOG_INFO(Loader, "Loading file %s as %s...", filename.c_str(), GetFileTypeString(type)); + std::unique_ptr app_loader = GetLoader(std::move(file), type, filename_filename, filename); + switch (type) { - //3DSX file format... + // 3DSX file format... + // or NCCH/NCSD container formats... case FileType::THREEDSX: + case FileType::CXI: + case FileType::CCI: { - AppLoader_THREEDSX app_loader(std::move(file), filename_filename, filename); // Load application and RomFS - if (ResultStatus::Success == app_loader.Load()) { - Service::FS::RegisterArchiveType(std::make_unique(app_loader), Service::FS::ArchiveIdCode::RomFS); + if (ResultStatus::Success == app_loader->Load()) { + Service::FS::RegisterArchiveType(std::make_unique(*app_loader), Service::FS::ArchiveIdCode::RomFS); return ResultStatus::Success; } break; @@ -127,21 +153,7 @@ ResultStatus LoadFile(const std::string& filename) { // Standard ELF file format... case FileType::ELF: - return AppLoader_ELF(std::move(file), filename_filename).Load(); - - // NCCH/NCSD container formats... - case FileType::CXI: - case FileType::CCI: - { - AppLoader_NCCH app_loader(std::move(file), filename); - - // Load application and RomFS - ResultStatus result = app_loader.Load(); - if (ResultStatus::Success == result) { - Service::FS::RegisterArchiveType(std::make_unique(app_loader), Service::FS::ArchiveIdCode::RomFS); - } - return result; - } + return app_loader->Load(); // CIA file format... case FileType::CIA: diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index 84a4ce5fc..9d3e9ed3b 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h @@ -10,8 +10,10 @@ #include #include +#include "common/common_funcs.h" #include "common/common_types.h" #include "common/file_util.h" +#include "common/swap.h" namespace Kernel { struct AddressMapping; @@ -78,6 +80,51 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { return a | b << 8 | c << 16 | d << 24; } +/// SMDH data structure that contains titles, icons etc. See https://www.3dbrew.org/wiki/SMDH +struct SMDH { + u32_le magic; + u16_le version; + INSERT_PADDING_BYTES(2); + + struct Title { + std::array short_title; + std::array long_title; + std::array publisher; + }; + std::array titles; + + std::array ratings; + u32_le region_lockout; + u32_le match_maker_id; + u64_le match_maker_bit_id; + u32_le flags; + u16_le eula_version; + INSERT_PADDING_BYTES(2); + float_le banner_animation_frame; + u32_le cec_id; + INSERT_PADDING_BYTES(8); + + std::array small_icon; + std::array large_icon; + + /// indicates the language used for each title entry + enum class TitleLanguage { + Japanese = 0, + English = 1, + French = 2, + German = 3, + Italian = 4, + Spanish = 5, + SimplifiedChinese = 6, + Korean= 7, + Dutch = 8, + Portuguese = 9, + Russian = 10, + TraditionalChinese = 11 + }; +}; +static_assert(sizeof(SMDH) == 0x36C0, "SMDH structure size is wrong"); + /// Interface for loading an application class AppLoader : NonCopyable { public: @@ -149,6 +196,16 @@ protected: */ extern const std::initializer_list default_address_mappings; +/** + * Get a loader for a file with a specific type + * @param file The file to load + * @param type The type of the file + * @param filename the file name (without path) + * @param filepath the file full path (with name) + * @return std::unique_ptr a pointer to a loader object; nullptr for unsupported type + */ +std::unique_ptr GetLoader(FileUtil::IOFile&& file, FileType type, const std::string& filename, const std::string& filepath); + /** * Identifies and loads a bootable file * @param filename String filename of bootable file diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 066e91a9e..d362a4419 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -173,6 +173,10 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector& if (!file.IsOpen()) return ResultStatus::Error; + ResultStatus result = LoadExeFS(); + if (result != ResultStatus::Success) + return result; + LOG_DEBUG(Loader, "%d sections:", kMaxSections); // Iterate through the ExeFs archive until we find a section with the specified name... for (unsigned section_number = 0; section_number < kMaxSections; section_number++) { @@ -215,9 +219,9 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector& return ResultStatus::ErrorNotUsed; } -ResultStatus AppLoader_NCCH::Load() { - if (is_loaded) - return ResultStatus::ErrorAlreadyLoaded; +ResultStatus AppLoader_NCCH::LoadExeFS() { + if (is_exefs_loaded) + return ResultStatus::Success; if (!file.IsOpen()) return ResultStatus::Error; @@ -282,6 +286,18 @@ ResultStatus AppLoader_NCCH::Load() { if (file.ReadBytes(&exefs_header, sizeof(ExeFs_Header)) != sizeof(ExeFs_Header)) return ResultStatus::Error; + is_exefs_loaded = true; + return ResultStatus::Success; +} + +ResultStatus AppLoader_NCCH::Load() { + if (is_loaded) + return ResultStatus::ErrorAlreadyLoaded; + + ResultStatus result = LoadExeFS(); + if (result != ResultStatus::Success) + return result; + is_loaded = true; // Set state to loaded return LoadExec(); // Load the executable into memory for booting diff --git a/src/core/loader/ncch.h b/src/core/loader/ncch.h index ca6772a78..fd852c3de 100644 --- a/src/core/loader/ncch.h +++ b/src/core/loader/ncch.h @@ -232,6 +232,13 @@ private: */ ResultStatus LoadExec(); + /** + * Ensure ExeFS is loaded and ready for reading sections + * @return ResultStatus result of function + */ + ResultStatus LoadExeFS(); + + bool is_exefs_loaded = false; bool is_compressed = false; u32 entry_point = 0; From 0176e2786fc7a042e06abb2d6ce8a3eb95e96e28 Mon Sep 17 00:00:00 2001 From: wwylele Date: Sat, 30 Apr 2016 02:40:54 +0300 Subject: [PATCH 09/33] make the name column larger as default --- src/citra_qt/game_list.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/citra_qt/game_list.cpp b/src/citra_qt/game_list.cpp index 32339e6a6..d4ac9c96e 100644 --- a/src/citra_qt/game_list.cpp +++ b/src/citra_qt/game_list.cpp @@ -109,7 +109,11 @@ void GameList::SaveInterfaceLayout() void GameList::LoadInterfaceLayout() { auto header = tree_view->header(); - header->restoreState(UISettings::values.gamelist_header_state); + if (!header->restoreState(UISettings::values.gamelist_header_state)) { + // We are using the name column to display icons and titles + // so make it as large as possible as default. + header->resizeSection(COLUMN_NAME, header->width()); + } item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder()); } From 9da1534237dfbe72be36200453dc52fce38ae557 Mon Sep 17 00:00:00 2001 From: wwylele Date: Sat, 30 Apr 2016 10:33:11 +0300 Subject: [PATCH 10/33] add missing header --- src/citra_qt/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index cc9e0c624..3f0099200 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt @@ -55,6 +55,7 @@ set(HEADERS configure_dialog.h configure_general.h game_list.h + game_list_p.h hotkeys.h main.h ui_settings.h From e311398a2202a174541144aeaa297752fab4d79b Mon Sep 17 00:00:00 2001 From: mailwl Date: Sun, 24 Apr 2016 12:39:06 +0300 Subject: [PATCH 11/33] Layout Mii parameters input/output, and return success as result of applet work --- src/core/hle/applets/mii_selector.cpp | 5 +++ src/core/hle/applets/mii_selector.h | 44 +++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/src/core/hle/applets/mii_selector.cpp b/src/core/hle/applets/mii_selector.cpp index 708d2f630..5191c821d 100644 --- a/src/core/hle/applets/mii_selector.cpp +++ b/src/core/hle/applets/mii_selector.cpp @@ -55,6 +55,11 @@ ResultCode MiiSelector::StartImpl(const Service::APT::AppletStartupParameter& pa // TODO(Subv): Set the expected fields in the response buffer before resending it to the application. // TODO(Subv): Reverse the parameter format for the Mii Selector + if(parameter.buffer_size >= sizeof(u32)) { + // TODO: defaults return no error, but garbage in other unknown fields + memset(parameter.data, 0, sizeof(u32)); + } + // Let the application know that we're closing Service::APT::MessageParameter message; message.buffer_size = parameter.buffer_size; diff --git a/src/core/hle/applets/mii_selector.h b/src/core/hle/applets/mii_selector.h index 6a3e7c8eb..c02dded4a 100644 --- a/src/core/hle/applets/mii_selector.h +++ b/src/core/hle/applets/mii_selector.h @@ -16,6 +16,50 @@ namespace HLE { namespace Applets { +struct MiiConfig { + u8 unk_000; + u8 unk_001; + u8 unk_002; + u8 unk_003; + u8 unk_004; + INSERT_PADDING_BYTES(3); + u16 unk_008; + INSERT_PADDING_BYTES(0x8C - 0xA); + u8 unk_08C; + INSERT_PADDING_BYTES(3); + u16 unk_090; + INSERT_PADDING_BYTES(2); + u32 unk_094; + u16 unk_098; + u8 unk_09A[0x64]; + u8 unk_0FE; + u8 unk_0FF; + u32 unk_100; +}; + +static_assert(sizeof(MiiConfig) == 0x104, "MiiConfig structure has incorrect size"); +#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(MiiConfig, field_name) == position, "Field "#field_name" has invalid position") +ASSERT_REG_POSITION(unk_008, 0x08); +ASSERT_REG_POSITION(unk_08C, 0x8C); +ASSERT_REG_POSITION(unk_090, 0x90); +ASSERT_REG_POSITION(unk_094, 0x94); +ASSERT_REG_POSITION(unk_0FE, 0xFE); +#undef ASSERT_REG_POSITION + +struct MiiResult { + u32 result_code; + u8 unk_04; + INSERT_PADDING_BYTES(7); + u8 unk_0C[0x60]; + u8 unk_6C[0x16]; + INSERT_PADDING_BYTES(2); +}; +static_assert(sizeof(MiiResult) == 0x84, "MiiResult structure has incorrect size"); +#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(MiiResult, field_name) == position, "Field "#field_name" has invalid position") +ASSERT_REG_POSITION(unk_0C, 0x0C); +ASSERT_REG_POSITION(unk_6C, 0x6C); +#undef ASSERT_REG_POSITION + class MiiSelector final : public Applet { public: MiiSelector(Service::APT::AppletId id); From 4cb2995c6159a5c6508ea0571ffa5fc5bcd21a47 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 5 May 2016 21:34:10 -0400 Subject: [PATCH 12/33] HLE: Rename RescheduleIsPending to IsReschedulePending. --- src/core/core.cpp | 2 +- src/core/hle/hle.cpp | 2 +- src/core/hle/hle.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index 609ca860d..a156682aa 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -52,7 +52,7 @@ void RunLoop(int tight_loop) { } HW::Update(); - if (HLE::RescheduleIsPending()) { + if (HLE::IsReschedulePending()) { Kernel::Reschedule(); } } diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp index 3b1369800..96d3ec05b 100644 --- a/src/core/hle/hle.cpp +++ b/src/core/hle/hle.cpp @@ -36,7 +36,7 @@ void Reschedule(const char *reason) { reschedule = true; } -bool RescheduleIsPending() { +bool IsReschedulePending() { return reschedule; } diff --git a/src/core/hle/hle.h b/src/core/hle/hle.h index 58dffe587..69ac0ade6 100644 --- a/src/core/hle/hle.h +++ b/src/core/hle/hle.h @@ -14,7 +14,7 @@ const Handle INVALID_HANDLE = 0; namespace HLE { void Reschedule(const char *reason); -bool RescheduleIsPending(); +bool IsReschedulePending(); void DoneRescheduling(); void Init(); From aa4d4ff23c92340baa1771a7d7308d1d91d3b655 Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Sat, 19 Mar 2016 01:31:01 +0000 Subject: [PATCH 13/33] Frontends, VideoCore: Move glad initialisation to the frontend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On SDL2 this allows it to use SDL_GL_GetProcAddress() instead of the default function loader, and fixes a crash when using apitrace with an EGL context. On Qt we will need to migrate from QGLWidget to QOpenGLWidget and QOpenGLContext before we can use gladLoadGLLoader() instead of gladLoadGL(), since the former doesn’t expose a function loader. --- src/citra/emu_window/emu_window_sdl2.cpp | 7 +++++++ src/citra_qt/main.cpp | 11 +++++++++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 6 ------ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/citra/emu_window/emu_window_sdl2.cpp b/src/citra/emu_window/emu_window_sdl2.cpp index 924189f4c..12cdd9d95 100644 --- a/src/citra/emu_window/emu_window_sdl2.cpp +++ b/src/citra/emu_window/emu_window_sdl2.cpp @@ -9,6 +9,8 @@ #define SDL_MAIN_HANDLED #include +#include + #include "common/key_map.h" #include "common/logging/log.h" #include "common/scm_rev.h" @@ -98,6 +100,11 @@ EmuWindow_SDL2::EmuWindow_SDL2() { exit(1); } + if (!gladLoadGLLoader(static_cast(SDL_GL_GetProcAddress))) { + LOG_CRITICAL(Frontend, "Failed to initialize GL functions! Exiting..."); + exit(1); + } + OnResize(); OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); SDL_PumpEvents(); diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index f1ab29755..a85c94a4b 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -6,6 +6,9 @@ #include #include +#include + +#define QT_NO_OPENGL #include #include #include @@ -240,6 +243,14 @@ bool GMainWindow::InitializeSystem() { if (emu_thread != nullptr) ShutdownGame(); + render_window->MakeCurrent(); + if (!gladLoadGL()) { + QMessageBox::critical(this, tr("Error while starting Citra!"), + tr("Failed to initialize the video core!\n\n" + "Please ensure that your GPU supports OpenGL 3.3 and that you have the latest graphics driver.")); + return false; + } + // Initialize the core emulation System::Result system_result = System::Init(render_window); if (System::Result::Success != system_result) { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 0e9a0be8b..7fcd36409 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -473,12 +473,6 @@ static void DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, bool RendererOpenGL::Init() { render_window->MakeCurrent(); - // TODO: Make frontends initialize this, so they can use gladLoadGLLoader with their own loaders - if (!gladLoadGL()) { - LOG_CRITICAL(Render_OpenGL, "Failed to initialize GL functions! Exiting..."); - exit(-1); - } - if (GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); glDebugMessageCallback(DebugHandler, nullptr); From d2182568ad7541078543c89a7ec4bd852b645228 Mon Sep 17 00:00:00 2001 From: wwylele Date: Fri, 6 May 2016 21:42:13 +0300 Subject: [PATCH 14/33] fix:return proper error --- src/core/loader/loader.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp index 0d4c1d351..af3f62248 100644 --- a/src/core/loader/loader.cpp +++ b/src/core/loader/loader.cpp @@ -144,11 +144,12 @@ ResultStatus LoadFile(const std::string& filename) { case FileType::CCI: { // Load application and RomFS - if (ResultStatus::Success == app_loader->Load()) { + ResultStatus result = app_loader->Load(); + if (ResultStatus::Success == result) { Service::FS::RegisterArchiveType(std::make_unique(*app_loader), Service::FS::ArchiveIdCode::RomFS); return ResultStatus::Success; } - break; + return result; } // Standard ELF file format... From 1a6cd7eb4b0b17f51d5cce036aa586f909a068e0 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 May 2016 00:54:31 -0400 Subject: [PATCH 15/33] HLE: Fix recent DSP change for Visual Studio. --- src/audio_core/hle/dsp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h index 4459a5668..f6e53f68f 100644 --- a/src/audio_core/hle/dsp.h +++ b/src/audio_core/hle/dsp.h @@ -33,13 +33,9 @@ namespace HLE { // double-buffer. The frame counter is located as the very last u16 of each region and is incremented // each audio tick. -struct SharedMemory; - constexpr VAddr region0_base = 0x1FF50000; constexpr VAddr region1_base = 0x1FF70000; -extern std::array g_regions; - /** * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from * its memory regions, the higher and lower 16-bit halves are swapped compared to the little-endian @@ -507,6 +503,8 @@ struct SharedMemory { }; ASSERT_DSP_STRUCT(SharedMemory, 0x8000); +extern std::array g_regions; + // Structures must have an offset that is a multiple of two. static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned"); static_assert(offsetof(SharedMemory, source_configurations) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned"); From 920d2cf41d9366a597bbd30d1dea5ba1884b3800 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 27 Apr 2016 10:57:29 +0100 Subject: [PATCH 16/33] AudioCore: SDL2 Sink --- CMakeLists.txt | 3 + src/audio_core/CMakeLists.txt | 11 +++ src/audio_core/sdl2_sink.cpp | 126 ++++++++++++++++++++++++++++++++ src/audio_core/sdl2_sink.h | 30 ++++++++ src/audio_core/sink.h | 2 +- src/audio_core/sink_details.cpp | 7 ++ src/citra/default_ini.h | 2 +- src/common/logging/backend.cpp | 1 + src/common/logging/log.h | 3 +- 9 files changed, 182 insertions(+), 3 deletions(-) create mode 100644 src/audio_core/sdl2_sink.cpp create mode 100644 src/audio_core/sdl2_sink.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d628ecc50..8f2898973 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -152,12 +152,15 @@ if (ENABLE_SDL2) download_bundled_external("sdl2/" ${SDL2_VER} SDL2_PREFIX) endif() + set(SDL2_FOUND YES) set(SDL2_INCLUDE_DIR "${SDL2_PREFIX}/include" CACHE PATH "Path to SDL2 headers") set(SDL2_LIBRARY "${SDL2_PREFIX}/lib/x64/SDL2.lib" CACHE PATH "Path to SDL2 library") set(SDL2_DLL_DIR "${SDL2_PREFIX}/lib/x64/" CACHE PATH "Path to SDL2.dll") else() find_package(SDL2 REQUIRED) endif() +else() + set(SDL2_FOUND NO) endif() IF (APPLE) diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index 5a2747e78..899155a30 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -23,7 +23,18 @@ set(HEADERS include_directories(../../externals/soundtouch/include) +if(SDL2_FOUND) + set(SRCS ${SRCS} sdl2_sink.cpp) + set(HEADERS ${HEADERS} sdl2_sink.h) + include_directories(${SDL2_INCLUDE_DIR}) +endif() + create_directory_groups(${SRCS} ${HEADERS}) add_library(audio_core STATIC ${SRCS} ${HEADERS}) target_link_libraries(audio_core SoundTouch) + +if(SDL2_FOUND) + target_link_libraries(audio_core ${SDL2_LIBRARY}) + set_property(TARGET audio_core APPEND PROPERTY COMPILE_DEFINITIONS HAVE_SDL2) +endif() diff --git a/src/audio_core/sdl2_sink.cpp b/src/audio_core/sdl2_sink.cpp new file mode 100644 index 000000000..dc75c04ee --- /dev/null +++ b/src/audio_core/sdl2_sink.cpp @@ -0,0 +1,126 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "audio_core/audio_core.h" +#include "audio_core/sdl2_sink.h" + +#include "common/assert.h" +#include "common/logging/log.h" +#include + +namespace AudioCore { + +struct SDL2Sink::Impl { + unsigned int sample_rate = 0; + + SDL_AudioDeviceID audio_device_id = 0; + + std::list> queue; + + static void Callback(void* impl_, u8* buffer, int buffer_size_in_bytes); +}; + +SDL2Sink::SDL2Sink() : impl(std::make_unique()) { + if (SDL_Init(SDL_INIT_AUDIO) < 0) { + LOG_CRITICAL(Audio_Sink, "SDL_Init(SDL_INIT_AUDIO) failed"); + impl->audio_device_id = 0; + return; + } + + SDL_AudioSpec desired_audiospec; + SDL_zero(desired_audiospec); + desired_audiospec.format = AUDIO_S16; + desired_audiospec.channels = 2; + desired_audiospec.freq = native_sample_rate; + desired_audiospec.samples = 1024; + desired_audiospec.userdata = impl.get(); + desired_audiospec.callback = &Impl::Callback; + + SDL_AudioSpec obtained_audiospec; + SDL_zero(obtained_audiospec); + + impl->audio_device_id = SDL_OpenAudioDevice(nullptr, false, &desired_audiospec, &obtained_audiospec, 0); + if (impl->audio_device_id <= 0) { + LOG_CRITICAL(Audio_Sink, "SDL_OpenAudioDevice failed"); + return; + } + + impl->sample_rate = obtained_audiospec.freq; + + // SDL2 audio devices start out paused, unpause it: + SDL_PauseAudioDevice(impl->audio_device_id, 0); +} + +SDL2Sink::~SDL2Sink() { + if (impl->audio_device_id <= 0) + return; + + SDL_CloseAudioDevice(impl->audio_device_id); +} + +unsigned int SDL2Sink::GetNativeSampleRate() const { + if (impl->audio_device_id <= 0) + return native_sample_rate; + + return impl->sample_rate; +} + +void SDL2Sink::EnqueueSamples(const std::vector& samples) { + if (impl->audio_device_id <= 0) + return; + + ASSERT_MSG(samples.size() % 2 == 0, "Samples must be in interleaved stereo PCM16 format (size must be a multiple of two)"); + + SDL_LockAudioDevice(impl->audio_device_id); + impl->queue.emplace_back(samples); + SDL_UnlockAudioDevice(impl->audio_device_id); +} + +size_t SDL2Sink::SamplesInQueue() const { + if (impl->audio_device_id <= 0) + return 0; + + SDL_LockAudioDevice(impl->audio_device_id); + + size_t total_size = std::accumulate(impl->queue.begin(), impl->queue.end(), static_cast(0), + [](size_t sum, const auto& buffer) { + // Division by two because each stereo sample is made of two s16. + return sum + buffer.size() / 2; + }); + + SDL_UnlockAudioDevice(impl->audio_device_id); + + return total_size; +} + +void SDL2Sink::Impl::Callback(void* impl_, u8* buffer, int buffer_size_in_bytes) { + Impl* impl = reinterpret_cast(impl_); + + size_t remaining_size = static_cast(buffer_size_in_bytes) / sizeof(s16); // Keep track of size in 16-bit increments. + + while (remaining_size > 0 && !impl->queue.empty()) { + if (impl->queue.front().size() <= remaining_size) { + memcpy(buffer, impl->queue.front().data(), impl->queue.front().size() * sizeof(s16)); + buffer += impl->queue.front().size() * sizeof(s16); + remaining_size -= impl->queue.front().size(); + impl->queue.pop_front(); + } else { + memcpy(buffer, impl->queue.front().data(), remaining_size * sizeof(s16)); + buffer += remaining_size * sizeof(s16); + impl->queue.front().erase(impl->queue.front().begin(), impl->queue.front().begin() + remaining_size); + remaining_size = 0; + } + } + + if (remaining_size > 0) { + memset(buffer, 0, remaining_size * sizeof(s16)); + } +} + +} // namespace AudioCore diff --git a/src/audio_core/sdl2_sink.h b/src/audio_core/sdl2_sink.h new file mode 100644 index 000000000..0f296b673 --- /dev/null +++ b/src/audio_core/sdl2_sink.h @@ -0,0 +1,30 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "audio_core/sink.h" + +namespace AudioCore { + +class SDL2Sink final : public Sink { +public: + SDL2Sink(); + ~SDL2Sink() override; + + unsigned int GetNativeSampleRate() const override; + + void EnqueueSamples(const std::vector& samples) override; + + size_t SamplesInQueue() const override; + +private: + struct Impl; + std::unique_ptr impl; +}; + +} // namespace AudioCore diff --git a/src/audio_core/sink.h b/src/audio_core/sink.h index cad21a85e..1c881c3d2 100644 --- a/src/audio_core/sink.h +++ b/src/audio_core/sink.h @@ -19,7 +19,7 @@ public: virtual ~Sink() = default; /// The native rate of this sink. The sink expects to be fed samples that respect this. (Units: samples/sec) - virtual unsigned GetNativeSampleRate() const = 0; + virtual unsigned int GetNativeSampleRate() const = 0; /** * Feed stereo samples to sink. diff --git a/src/audio_core/sink_details.cpp b/src/audio_core/sink_details.cpp index d2cc74103..ba5e83d17 100644 --- a/src/audio_core/sink_details.cpp +++ b/src/audio_core/sink_details.cpp @@ -8,10 +8,17 @@ #include "audio_core/null_sink.h" #include "audio_core/sink_details.h" +#ifdef HAVE_SDL2 +#include "audio_core/sdl2_sink.h" +#endif + namespace AudioCore { // g_sink_details is ordered in terms of desirability, with the best choice at the top. const std::vector g_sink_details = { +#ifdef HAVE_SDL2 + { "sdl2", []() { return std::make_unique(); } }, +#endif { "null", []() { return std::make_unique(); } }, }; diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h index 0e6171736..49126356f 100644 --- a/src/citra/default_ini.h +++ b/src/citra/default_ini.h @@ -58,7 +58,7 @@ bg_green = [Audio] # Which audio output engine to use. -# auto (default): Auto-select, null: No audio output +# auto (default): Auto-select, null: No audio output, sdl2: SDL2 (if available) output_engine = [Data Storage] diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 3d39f94d5..d7008fc66 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -65,6 +65,7 @@ namespace Log { SUB(Render, OpenGL) \ CLS(Audio) \ SUB(Audio, DSP) \ + SUB(Audio, Sink) \ CLS(Loader) // GetClassName is a macro defined by Windows.h, grrr... diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 521362317..c6910b1c7 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -78,8 +78,9 @@ enum class Class : ClassType { Render, ///< Emulator video output and hardware acceleration Render_Software, ///< Software renderer backend Render_OpenGL, ///< OpenGL backend - Audio, ///< Emulator audio output + Audio, ///< Audio emulation Audio_DSP, ///< The HLE implementation of the DSP + Audio_Sink, ///< Emulator audio output backend Loader, ///< ROM loader Count ///< Total number of logging classes From 6970b7d3d511194d3b229358375c528b0136e82c Mon Sep 17 00:00:00 2001 From: Subv Date: Thu, 5 May 2016 12:36:07 -0500 Subject: [PATCH 17/33] HLE/Applets: Use the correct size for the framebuffer SharedMemory in the swkbd and MiiSelector applets. --- src/core/hle/applets/mii_selector.cpp | 19 +++++++++++-------- src/core/hle/applets/mii_selector.h | 6 +++--- src/core/hle/applets/swkbd.cpp | 20 ++++++++++++-------- src/core/hle/applets/swkbd.h | 7 +++---- src/core/hle/service/apt/apt.h | 15 +++++++++++++++ 5 files changed, 44 insertions(+), 23 deletions(-) diff --git a/src/core/hle/applets/mii_selector.cpp b/src/core/hle/applets/mii_selector.cpp index 708d2f630..1dd6757f7 100644 --- a/src/core/hle/applets/mii_selector.cpp +++ b/src/core/hle/applets/mii_selector.cpp @@ -21,13 +21,6 @@ namespace HLE { namespace Applets { -MiiSelector::MiiSelector(Service::APT::AppletId id) : Applet(id), started(false) { - // Create the SharedMemory that will hold the framebuffer data - // TODO(Subv): What size should we use here? - using Kernel::MemoryPermission; - framebuffer_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, "MiiSelector Memory"); -} - ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& parameter) { if (parameter.signal != static_cast(Service::APT::SignalType::LibAppJustStarted)) { LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal); @@ -36,8 +29,18 @@ ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& p return ResultCode(-1); } + // The LibAppJustStarted message contains a buffer with the size of the framebuffer shared memory. + // Create the SharedMemory that will hold the framebuffer data + Service::APT::CaptureBufferInfo capture_info; + ASSERT(sizeof(capture_info) == parameter.buffer_size); + + memcpy(&capture_info, parameter.data, sizeof(capture_info)); + using Kernel::MemoryPermission; + framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite, + MemoryPermission::ReadWrite, "MiiSelector Memory"); + + // Send the response message with the newly created SharedMemory Service::APT::MessageParameter result; - // The buffer passed in parameter contains the data returned by GSPGPU::ImportDisplayCaptureInfo result.signal = static_cast(Service::APT::SignalType::LibAppFinished); result.data = nullptr; result.buffer_size = 0; diff --git a/src/core/hle/applets/mii_selector.h b/src/core/hle/applets/mii_selector.h index 6a3e7c8eb..5619f8399 100644 --- a/src/core/hle/applets/mii_selector.h +++ b/src/core/hle/applets/mii_selector.h @@ -18,15 +18,15 @@ namespace Applets { class MiiSelector final : public Applet { public: - MiiSelector(Service::APT::AppletId id); + MiiSelector(Service::APT::AppletId id) : Applet(id), started(false) { } ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override; ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override; void Update() override; bool IsRunning() const override { return started; } - /// TODO(Subv): Find out what this is actually used for. - /// It is believed that the application stores the current screen image here. + /// This SharedMemory will be created when we receive the LibAppJustStarted message. + /// It holds the framebuffer info retrieved by the application with GSPGPU::ImportDisplayCaptureInfo Kernel::SharedPtr framebuffer_memory; /// Whether this applet is currently running instead of the host application or not. diff --git a/src/core/hle/applets/swkbd.cpp b/src/core/hle/applets/swkbd.cpp index 1db6b5a17..87238aa1c 100644 --- a/src/core/hle/applets/swkbd.cpp +++ b/src/core/hle/applets/swkbd.cpp @@ -24,13 +24,6 @@ namespace HLE { namespace Applets { -SoftwareKeyboard::SoftwareKeyboard(Service::APT::AppletId id) : Applet(id), started(false) { - // Create the SharedMemory that will hold the framebuffer data - // TODO(Subv): What size should we use here? - using Kernel::MemoryPermission; - framebuffer_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, "SoftwareKeyboard Memory"); -} - ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter const& parameter) { if (parameter.signal != static_cast(Service::APT::SignalType::LibAppJustStarted)) { LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal); @@ -39,8 +32,19 @@ ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter con return ResultCode(-1); } + // The LibAppJustStarted message contains a buffer with the size of the framebuffer shared memory. + // Create the SharedMemory that will hold the framebuffer data + Service::APT::CaptureBufferInfo capture_info; + ASSERT(sizeof(capture_info) == parameter.buffer_size); + + memcpy(&capture_info, parameter.data, sizeof(capture_info)); + + using Kernel::MemoryPermission; + framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite, + MemoryPermission::ReadWrite, "SoftwareKeyboard Memory"); + + // Send the response message with the newly created SharedMemory Service::APT::MessageParameter result; - // The buffer passed in parameter contains the data returned by GSPGPU::ImportDisplayCaptureInfo result.signal = static_cast(Service::APT::SignalType::LibAppFinished); result.data = nullptr; result.buffer_size = 0; diff --git a/src/core/hle/applets/swkbd.h b/src/core/hle/applets/swkbd.h index cb95b8d90..cf26a8fb7 100644 --- a/src/core/hle/applets/swkbd.h +++ b/src/core/hle/applets/swkbd.h @@ -53,8 +53,7 @@ static_assert(sizeof(SoftwareKeyboardConfig) == 0x400, "Software Keyboard Config class SoftwareKeyboard final : public Applet { public: - SoftwareKeyboard(Service::APT::AppletId id); - ~SoftwareKeyboard() {} + SoftwareKeyboard(Service::APT::AppletId id) : Applet(id), started(false) { } ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override; ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override; @@ -72,8 +71,8 @@ public: */ void Finalize(); - /// TODO(Subv): Find out what this is actually used for. - /// It is believed that the application stores the current screen image here. + /// This SharedMemory will be created when we receive the LibAppJustStarted message. + /// It holds the framebuffer info retrieved by the application with GSPGPU::ImportDisplayCaptureInfo Kernel::SharedPtr framebuffer_memory; /// SharedMemory where the output text will be stored diff --git a/src/core/hle/service/apt/apt.h b/src/core/hle/service/apt/apt.h index 668b4a66f..1a1034fcc 100644 --- a/src/core/hle/service/apt/apt.h +++ b/src/core/hle/service/apt/apt.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "common/swap.h" #include "core/hle/kernel/kernel.h" @@ -31,6 +32,20 @@ struct AppletStartupParameter { u8* data = nullptr; }; +/// Used by the application to pass information about the current framebuffer to applets. +struct CaptureBufferInfo { + u32_le size; + u8 is_3d; + INSERT_PADDING_BYTES(0x3); // Padding for alignment + u32_le top_screen_left_offset; + u32_le top_screen_right_offset; + u32_le top_screen_format; + u32_le bottom_screen_left_offset; + u32_le bottom_screen_right_offset; + u32_le bottom_screen_format; +}; +static_assert(sizeof(CaptureBufferInfo) == 0x20, "CaptureBufferInfo struct has incorrect size"); + /// Signals used by APT functions enum class SignalType : u32 { None = 0x0, From 5b7f86708cd72b5d42b16ba4a0cc348dc83129d1 Mon Sep 17 00:00:00 2001 From: Subv Date: Sun, 17 Apr 2016 14:01:40 -0500 Subject: [PATCH 18/33] Kernel/Threading: Warn when a thread can be scheduled in the Syscore (Core 1). We do not currently implement any cores other than the AppCore (Core 0). --- src/core/hle/kernel/process.h | 2 ++ src/core/hle/svc.cpp | 5 +++++ src/core/loader/ncch.cpp | 3 +++ 3 files changed, 10 insertions(+) diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 6d2ca96a2..a06afef2b 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -107,6 +107,8 @@ public: ProcessFlags flags; /// Kernel compatibility version for this process u16 kernel_version = 0; + /// The default CPU for this process, threads are scheduled on this cpu by default. + u8 ideal_processor = 0; /// The id of this process u32 process_id = next_process_id++; diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index ae54afb1c..761e1b45b 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp @@ -497,6 +497,11 @@ static ResultCode CreateThread(Handle* out_handle, s32 priority, u32 entry_point break; } + if (processor_id == THREADPROCESSORID_1 || processor_id == THREADPROCESSORID_ALL || + (processor_id == THREADPROCESSORID_DEFAULT && Kernel::g_current_process->ideal_processor == THREADPROCESSORID_1)) { + LOG_WARNING(Kernel_SVC, "Newly created thread is allowed to be run in the SysCore, unimplemented."); + } + CASCADE_RESULT(SharedPtr thread, Kernel::Thread::Create( name, entry_point, priority, arg, processor_id, stack_top)); CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread))); diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index a4b47ef8c..52c5fbaaf 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -156,6 +156,9 @@ ResultStatus AppLoader_NCCH::LoadExec() { Kernel::g_current_process->resource_limit = Kernel::ResourceLimit::GetForCategory( static_cast(exheader_header.arm11_system_local_caps.resource_limit_category)); + // Set the default CPU core for this process + Kernel::g_current_process->ideal_processor = exheader_header.arm11_system_local_caps.ideal_processor; + // Copy data while converting endianess std::array kernel_caps; std::copy_n(exheader_header.arm11_kernel_caps.descriptors, kernel_caps.size(), begin(kernel_caps)); From 0a31e373f1728316b3dfed391ddcb99a474e4102 Mon Sep 17 00:00:00 2001 From: Alexander Laties Date: Mon, 25 Apr 2016 16:10:03 -0400 Subject: [PATCH 19/33] fixup simple type conversions where possible --- src/audio_core/hle/pipe.cpp | 9 +++++-- src/audio_core/hle/pipe.h | 12 ++++++---- src/citra/config.cpp | 2 +- .../debugger/graphics_vertex_shader.cpp | 2 +- src/citra_qt/util/util.cpp | 2 +- src/core/gdbstub/gdbstub.cpp | 20 ++++++++-------- src/core/hle/service/dsp_dsp.cpp | 4 ++-- src/core/hw/gpu.cpp | 4 ++-- src/core/loader/3dsx.cpp | 6 ++--- src/core/tracer/recorder.cpp | 24 +++++++++---------- src/video_core/debug_utils/debug_utils.cpp | 9 +++---- src/video_core/pica_state.h | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/renderer_opengl.cpp | 2 +- 14 files changed, 55 insertions(+), 45 deletions(-) diff --git a/src/audio_core/hle/pipe.cpp b/src/audio_core/hle/pipe.cpp index 03280780f..44dff1345 100644 --- a/src/audio_core/hle/pipe.cpp +++ b/src/audio_core/hle/pipe.cpp @@ -36,12 +36,17 @@ std::vector PipeRead(DspPipe pipe_number, u32 length) { return {}; } + if (length > UINT16_MAX) { // Can only read at most UINT16_MAX from the pipe + LOG_ERROR(Audio_DSP, "length of %u greater than max of %u", length, UINT16_MAX); + return {}; + } + std::vector& data = pipe_data[pipe_index]; if (length > data.size()) { LOG_WARNING(Audio_DSP, "pipe_number = %zu is out of data, application requested read of %u but %zu remain", pipe_index, length, data.size()); - length = data.size(); + length = static_cast(data.size()); } if (length == 0) @@ -94,7 +99,7 @@ static void AudioPipeWriteStructAddresses() { }; // Begin with a u16 denoting the number of structs. - WriteU16(DspPipe::Audio, struct_addresses.size()); + WriteU16(DspPipe::Audio, static_cast(struct_addresses.size())); // Then write the struct addresses. for (u16 addr : struct_addresses) { WriteU16(DspPipe::Audio, addr); diff --git a/src/audio_core/hle/pipe.h b/src/audio_core/hle/pipe.h index 64d97f8ba..b714c0496 100644 --- a/src/audio_core/hle/pipe.h +++ b/src/audio_core/hle/pipe.h @@ -24,10 +24,14 @@ enum class DspPipe { constexpr size_t NUM_DSP_PIPE = 8; /** - * Read a DSP pipe. - * @param pipe_number The Pipe ID - * @param length How much data to request. - * @return The data read from the pipe. The size of this vector can be less than the length requested. + * Reads `length` bytes from the DSP pipe identified with `pipe_number`. + * @note Can read up to the maximum value of a u16 in bytes (65,535). + * @note IF an error is encoutered with either an invalid `pipe_number` or `length` value, an empty vector will be returned. + * @note IF `length` is set to 0, an empty vector will be returned. + * @note IF `length` is greater than the amount of data available, this function will only read the available amount. + * @param pipe_number a `DspPipe` + * @param length the number of bytes to read. The max is 65,535 (max of u16). + * @returns a vector of bytes from the specified pipe. On error, will be empty. */ std::vector PipeRead(DspPipe pipe_number, u32 length); diff --git a/src/citra/config.cpp b/src/citra/config.cpp index 0d17c80bf..c5cb4fb38 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -85,7 +85,7 @@ void Config::ReadValues() { // Debugging Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false); - Settings::values.gdbstub_port = sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689); + Settings::values.gdbstub_port = static_cast(sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689)); } void Config::Reload() { diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index 6e8d7ef42..854f6ff16 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp @@ -515,7 +515,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d } // Initialize debug info text for current cycle count - cycle_index->setMaximum(debug_data.records.size() - 1); + cycle_index->setMaximum(static_cast(debug_data.records.size() - 1)); OnCycleIndexChanged(cycle_index->value()); model->endResetModel(); diff --git a/src/citra_qt/util/util.cpp b/src/citra_qt/util/util.cpp index 8734a8efd..2f9beb5cc 100644 --- a/src/citra_qt/util/util.cpp +++ b/src/citra_qt/util/util.cpp @@ -19,7 +19,7 @@ QString ReadableByteSize(qulonglong size) { static const std::array units = { "B", "KiB", "MiB", "GiB", "TiB", "PiB" }; if (size == 0) return "0"; - int digit_groups = std::min((int)(std::log10(size) / std::log10(1024)), units.size()); + int digit_groups = std::min(static_cast(std::log10(size) / std::log10(1024)), static_cast(units.size())); return QString("%L1 %2").arg(size / std::pow(1024, digit_groups), 0, 'f', 1) .arg(units[digit_groups]); } diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index ae0c116ef..1360ee845 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -374,7 +374,7 @@ static void SendReply(const char* reply) { memset(command_buffer, 0, sizeof(command_buffer)); - command_length = strlen(reply); + command_length = static_cast(strlen(reply)); if (command_length + 4 > sizeof(command_buffer)) { LOG_ERROR(Debug_GDBStub, "command_buffer overflow in SendReply"); return; @@ -515,7 +515,7 @@ static bool IsDataAvailable() { return false; } - return FD_ISSET(gdbserver_socket, &fd_socket); + return FD_ISSET(gdbserver_socket, &fd_socket) != 0; } /// Send requested register to gdb client. @@ -633,10 +633,10 @@ static void ReadMemory() { auto start_offset = command_buffer+1; auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); - PAddr addr = HexToInt(start_offset, addr_pos - start_offset); + PAddr addr = HexToInt(start_offset, static_cast(addr_pos - start_offset)); start_offset = addr_pos+1; - u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); + u32 len = HexToInt(start_offset, static_cast((command_buffer + command_length) - start_offset)); LOG_DEBUG(Debug_GDBStub, "gdb: addr: %08x len: %08x\n", addr, len); @@ -658,11 +658,11 @@ static void ReadMemory() { static void WriteMemory() { auto start_offset = command_buffer+1; auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); - PAddr addr = HexToInt(start_offset, addr_pos - start_offset); + PAddr addr = HexToInt(start_offset, static_cast(addr_pos - start_offset)); start_offset = addr_pos+1; auto len_pos = std::find(start_offset, command_buffer+command_length, ':'); - u32 len = HexToInt(start_offset, len_pos - start_offset); + u32 len = HexToInt(start_offset, static_cast(len_pos - start_offset)); u8* dst = Memory::GetPointer(addr); if (!dst) { @@ -752,10 +752,10 @@ static void AddBreakpoint() { auto start_offset = command_buffer+3; auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); - PAddr addr = HexToInt(start_offset, addr_pos - start_offset); + PAddr addr = HexToInt(start_offset, static_cast(addr_pos - start_offset)); start_offset = addr_pos+1; - u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); + u32 len = HexToInt(start_offset, static_cast((command_buffer + command_length) - start_offset)); if (type == BreakpointType::Access) { // Access is made up of Read and Write types, so add both breakpoints @@ -800,10 +800,10 @@ static void RemoveBreakpoint() { auto start_offset = command_buffer+3; auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); - PAddr addr = HexToInt(start_offset, addr_pos - start_offset); + PAddr addr = HexToInt(start_offset, static_cast(addr_pos - start_offset)); start_offset = addr_pos+1; - u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); + u32 len = HexToInt(start_offset, static_cast((command_buffer + command_length) - start_offset)); if (type == BreakpointType::Access) { // Access is made up of Read and Write types, so add both breakpoints diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp index 995bee3f9..274fc751a 100644 --- a/src/core/hle/service/dsp_dsp.cpp +++ b/src/core/hle/service/dsp_dsp.cpp @@ -288,7 +288,7 @@ static void WriteProcessPipe(Service::Interface* self) { ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer); std::vector message(size); - for (size_t i = 0; i < size; i++) { + for (u32 i = 0; i < size; i++) { message[i] = Memory::Read8(buffer + i); } @@ -403,7 +403,7 @@ static void GetPipeReadableSize(Service::Interface* self) { cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0); cmd_buff[1] = RESULT_SUCCESS.raw; // No error - cmd_buff[2] = DSP::HLE::GetPipeReadableSize(pipe); + cmd_buff[2] = static_cast(DSP::HLE::GetPipeReadableSize(pipe)); LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, cmd_buff[2]); } diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 2fe856293..a4dfb7e43 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -188,10 +188,10 @@ inline void Write(u32 addr, const T data) { u32 output_gap = config.texture_copy.output_gap * 16; size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); - Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), contiguous_input_size); + Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast(contiguous_input_size)); size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), contiguous_output_size); + Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast(contiguous_output_size)); u32 remaining_size = config.texture_copy.size; u32 remaining_input = input_width; diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp index 48a11ef81..98e7ab48f 100644 --- a/src/core/loader/3dsx.cpp +++ b/src/core/loader/3dsx.cpp @@ -178,11 +178,11 @@ static THREEDSX_Error Load3DSXFile(FileUtil::IOFile& file, u32 base_addr, Shared for (unsigned current_inprogress = 0; current_inprogress < remaining && pos < end_pos; current_inprogress++) { const auto& table = reloc_table[current_inprogress]; LOG_TRACE(Loader, "(t=%d,skip=%u,patch=%u)", current_segment_reloc_table, - (u32)table.skip, (u32)table.patch); + static_cast(table.skip), static_cast(table.patch)); pos += table.skip; s32 num_patches = table.patch; while (0 < num_patches && pos < end_pos) { - u32 in_addr = (u8*)pos - program_image.data(); + u32 in_addr = static_cast(reinterpret_cast(pos) - program_image.data()); u32 addr = TranslateAddr(*pos, &loadinfo, offsets); LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)", base_addr + in_addr, addr, current_segment_reloc_table, *pos); @@ -284,7 +284,7 @@ ResultStatus AppLoader_THREEDSX::ReadRomFS(std::shared_ptr& ro // Check if the 3DSX has a RomFS... if (hdr.fs_offset != 0) { u32 romfs_offset = hdr.fs_offset; - u32 romfs_size = file.GetSize() - hdr.fs_offset; + u32 romfs_size = static_cast(file.GetSize()) - hdr.fs_offset; LOG_DEBUG(Loader, "RomFS offset: 0x%08X", romfs_offset); LOG_DEBUG(Loader, "RomFS size: 0x%08X", romfs_size); diff --git a/src/core/tracer/recorder.cpp b/src/core/tracer/recorder.cpp index c6dc35c83..7abaacf70 100644 --- a/src/core/tracer/recorder.cpp +++ b/src/core/tracer/recorder.cpp @@ -26,17 +26,17 @@ void Recorder::Finish(const std::string& filename) { // Calculate file offsets auto& initial = header.initial_state_offsets; - initial.gpu_registers_size = initial_state.gpu_registers.size(); - initial.lcd_registers_size = initial_state.lcd_registers.size(); - initial.pica_registers_size = initial_state.pica_registers.size(); - initial.default_attributes_size = initial_state.default_attributes.size(); - initial.vs_program_binary_size = initial_state.vs_program_binary.size(); - initial.vs_swizzle_data_size = initial_state.vs_swizzle_data.size(); - initial.vs_float_uniforms_size = initial_state.vs_float_uniforms.size(); - initial.gs_program_binary_size = initial_state.gs_program_binary.size(); - initial.gs_swizzle_data_size = initial_state.gs_swizzle_data.size(); - initial.gs_float_uniforms_size = initial_state.gs_float_uniforms.size(); - header.stream_size = stream.size(); + initial.gpu_registers_size = static_cast(initial_state.gpu_registers.size()); + initial.lcd_registers_size = static_cast(initial_state.lcd_registers.size()); + initial.pica_registers_size = static_cast(initial_state.pica_registers.size()); + initial.default_attributes_size = static_cast(initial_state.default_attributes.size()); + initial.vs_program_binary_size = static_cast(initial_state.vs_program_binary.size()); + initial.vs_swizzle_data_size = static_cast(initial_state.vs_swizzle_data.size()); + initial.vs_float_uniforms_size = static_cast(initial_state.vs_float_uniforms.size()); + initial.gs_program_binary_size = static_cast(initial_state.gs_program_binary.size()); + initial.gs_swizzle_data_size = static_cast(initial_state.gs_swizzle_data.size()); + initial.gs_float_uniforms_size = static_cast(initial_state.gs_float_uniforms.size()); + header.stream_size = static_cast(stream.size()); initial.gpu_registers = sizeof(header); initial.lcd_registers = initial.gpu_registers + initial.gpu_registers_size * sizeof(u32); @@ -68,7 +68,7 @@ void Recorder::Finish(const std::string& filename) { DEBUG_ASSERT(stream_element.extra_data.size() == 0); break; } - header.stream_offset += stream_element.extra_data.size(); + header.stream_offset += static_cast(stream_element.extra_data.size()); } try { diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index fb20f81dd..2f645b441 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -208,11 +208,12 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c // TODO: Reduce the amount of binary code written to relevant portions dvlp.binary_offset = write_offset - dvlp_offset; - dvlp.binary_size_words = setup.program_code.size(); - QueueForWriting(reinterpret_cast(setup.program_code.data()), setup.program_code.size() * sizeof(u32)); + dvlp.binary_size_words = static_cast(setup.program_code.size()); + QueueForWriting(reinterpret_cast(setup.program_code.data()), + static_cast(setup.program_code.size()) * sizeof(u32)); dvlp.swizzle_info_offset = write_offset - dvlp_offset; - dvlp.swizzle_info_num_entries = setup.swizzle_data.size(); + dvlp.swizzle_info_num_entries = static_cast(setup.swizzle_data.size()); u32 dummy = 0; for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { QueueForWriting(reinterpret_cast(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i])); @@ -264,7 +265,7 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c constant_table.emplace_back(constant); } dvle.constant_table_offset = write_offset - dvlb.dvle_offset; - dvle.constant_table_size = constant_table.size(); + dvle.constant_table_size = static_cast(constant_table.size()); for (const auto& constant : constant_table) { QueueForWriting(reinterpret_cast(&constant), sizeof(constant)); } diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index bbecad850..1059c6ae4 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -56,7 +56,7 @@ struct State { // Used to buffer partial vertices for immediate-mode rendering. Shader::InputVertex input_vertex; // Index of the next attribute to be loaded into `input_vertex`. - int current_attribute = 0; + u32 current_attribute = 0; } immediate; // This is constructed with a dummy triangle topology diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 519d81aeb..0b471dfd2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -93,7 +93,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { state.Apply(); for (size_t i = 0; i < lighting_luts.size(); ++i) { - glActiveTexture(GL_TEXTURE3 + i); + glActiveTexture(static_cast(GL_TEXTURE3 + i)); glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7fcd36409..8f424a435 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -192,7 +192,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram // only allows rows to have a memory alignement of 4. ASSERT(pixel_stride % 4 == 0); - if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, pixel_stride, screen_info)) { + if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast(pixel_stride), screen_info)) { // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; screen_info.display_texcoords = MathUtil::Rectangle(0.f, 0.f, 1.f, 1.f); From aef463010282d2daac4ce4aead1f20a8fa588ae3 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 8 May 2016 23:11:59 -0400 Subject: [PATCH 20/33] swap: Remove unused methods Also gets rid of pointer data variants as this prevents the use of the regular swapping routines as unary predicates in std lib functions. They also cast to stricter alignment types, which is undefined behavior. --- src/common/swap.h | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/src/common/swap.h b/src/common/swap.h index a7c37bc44..f51751d29 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -58,9 +58,6 @@ namespace Common { -inline u8 swap8(u8 _data) {return _data;} -inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) | _data[2];} - #ifdef _MSC_VER inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);} inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);} @@ -115,31 +112,6 @@ inline double swapd(double f) { return dat2.f; } -inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} -inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} -inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} - -template -void swap(u8*); - -template <> -inline void swap<1>(u8* data) { } - -template <> -inline void swap<2>(u8* data) { - *reinterpret_cast(data) = swap16(data); -} - -template <> -inline void swap<4>(u8* data) { - *reinterpret_cast(data) = swap32(data); -} - -template <> -inline void swap<8>(u8* data) { - *reinterpret_cast(data) = swap64(data); -} - } // Namespace Common From 47ca79ba4bc7e5c18cf9427c975a789efd65a414 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 8 May 2016 23:21:44 -0400 Subject: [PATCH 21/33] swap: Get rid of undefined behavior in swapf and swapd This isn't well-defined in C++. --- src/common/swap.h | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/common/swap.h b/src/common/swap.h index f51751d29..beedd6c7e 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -25,6 +25,8 @@ #include #endif +#include + #include "common/common_types.h" // GCC 4.6+ @@ -89,27 +91,29 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3 #endif inline float swapf(float f) { - union { - float f; - unsigned int u32; - } dat1, dat2; + static_assert(sizeof(u32) == sizeof(float), + "float must be the same size as uint32_t."); - dat1.f = f; - dat2.u32 = swap32(dat1.u32); + u32 value; + std::memcpy(&value, &f, sizeof(u32)); - return dat2.f; + value = swap32(value); + std::memcpy(&f, &value, sizeof(u32)); + + return f; } inline double swapd(double f) { - union { - double f; - unsigned long long u64; - } dat1, dat2; + static_assert(sizeof(u64) == sizeof(double), + "double must be the same size as uint64_t."); - dat1.f = f; - dat2.u64 = swap64(dat1.u64); + u64 value; + std::memcpy(&value, &f, sizeof(u64)); - return dat2.f; + value = swap64(value); + std::memcpy(&f, &value, sizeof(u64)); + + return f; } } // Namespace Common From d5b983a8c000533ca17b727595e813113cd9d54c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 8 May 2016 23:33:46 -0400 Subject: [PATCH 22/33] swap: Get rid of pointer casting for swapping structs These shouldn't haphazardly convert types --- src/common/swap.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/common/swap.h b/src/common/swap.h index beedd6c7e..1749bd7a4 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -510,35 +510,35 @@ bool operator==(const S &p, const swap_struct_t v) { template struct swap_64_t { static T swap(T x) { - return (T)Common::swap64(*(u64 *)&x); + return static_cast(Common::swap64(x)); } }; template struct swap_32_t { static T swap(T x) { - return (T)Common::swap32(*(u32 *)&x); + return static_cast(Common::swap32(x)); } }; template struct swap_16_t { static T swap(T x) { - return (T)Common::swap16(*(u16 *)&x); + return static_cast(Common::swap16(x)); } }; template struct swap_float_t { static T swap(T x) { - return (T)Common::swapf(*(float *)&x); + return static_cast(Common::swapf(x)); } }; template struct swap_double_t { static T swap(T x) { - return (T)Common::swapd(*(double *)&x); + return static_cast(Common::swapd(x)); } }; From da2400a601ff6ee15ed82f0248c6a0a507a8209e Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 9 May 2016 09:41:00 -0400 Subject: [PATCH 23/33] source: Fix missing logging arguments Silences two warnings on OSX. --- src/audio_core/hle/source.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp index daaf6e3f3..30552fe26 100644 --- a/src/audio_core/hle/source.cpp +++ b/src/audio_core/hle/source.cpp @@ -126,13 +126,13 @@ void Source::ParseConfig(SourceConfiguration::Configuration& config, const s16_l if (config.simple_filter_dirty) { config.simple_filter_dirty.Assign(0); state.filters.Configure(config.simple_filter); - LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update"); + LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update", source_id); } if (config.biquad_filter_dirty) { config.biquad_filter_dirty.Assign(0); state.filters.Configure(config.biquad_filter); - LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update"); + LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update", source_id); } if (config.interpolation_dirty) { From 0f941d0245a42497e430a7c76f868aa9b120159e Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 9 May 2016 16:28:03 -0400 Subject: [PATCH 24/33] dyncom: Reset the context into user mode correctly The other mode was system mode. --- src/core/arm/dyncom/arm_dyncom.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp index a3581132c..13492a08b 100644 --- a/src/core/arm/dyncom/arm_dyncom.cpp +++ b/src/core/arm/dyncom/arm_dyncom.cpp @@ -93,7 +93,7 @@ void ARM_DynCom::ResetContext(Core::ThreadContext& context, u32 stack_top, u32 e context.cpu_registers[0] = arg; context.pc = entry_point; context.sp = stack_top; - context.cpsr = 0x1F | ((entry_point & 1) << 5); // Usermode and THUMB mode + context.cpsr = USER32MODE | ((entry_point & 1) << 5); // Usermode and THUMB mode } void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) { From 4c98113b572551f34a907094ca059de8a724c9b1 Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Sun, 20 Mar 2016 01:53:49 +0100 Subject: [PATCH 25/33] Pica: Implement W-Buffer in SW rasterizer --- src/video_core/clipper.cpp | 4 +-- src/video_core/pica.h | 20 +++++++++++--- src/video_core/rasterizer.cpp | 26 ++++++++++++++++--- .../renderer_opengl/gl_rasterizer.cpp | 4 +-- 4 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 2bc747102..db99ce666 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -75,8 +75,6 @@ static void InitScreenCoordinates(OutputVertex& vtx) viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); viewport.offset_x = float24::FromFloat32(static_cast(regs.viewport_corner.x)); viewport.offset_y = float24::FromFloat32(static_cast(regs.viewport_corner.y)); - viewport.zscale = float24::FromRaw(regs.viewport_depth_range); - viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane); float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; vtx.color *= inv_w; @@ -89,7 +87,7 @@ static void InitScreenCoordinates(OutputVertex& vtx) vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; - vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale; + vtx.screenpos[2] = vtx.pos.z * inv_w; } void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 5891fb72a..a81a7b984 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -70,7 +70,7 @@ struct Regs { INSERT_PADDING_WORDS(0x9); BitField<0, 24, u32> viewport_depth_range; // float24 - BitField<0, 24, u32> viewport_depth_far_plane; // float24 + BitField<0, 24, u32> viewport_depth_near_plane; // float24 BitField<0, 3, u32> vs_output_total; @@ -122,7 +122,20 @@ struct Regs { BitField<16, 10, s32> y; } viewport_corner; - INSERT_PADDING_WORDS(0x17); + INSERT_PADDING_WORDS(0x1); + + //TODO: early depth + INSERT_PADDING_WORDS(0x1); + + INSERT_PADDING_WORDS(0x2); + + enum DepthBuffering : u32 { + WBuffering = 0, + ZBuffering = 1, + }; + BitField< 0, 1, DepthBuffering> depthmap_enable; + + INSERT_PADDING_WORDS(0x12); struct TextureConfig { enum WrapMode : u32 { @@ -1279,10 +1292,11 @@ ASSERT_REG_POSITION(cull_mode, 0x40); ASSERT_REG_POSITION(viewport_size_x, 0x41); ASSERT_REG_POSITION(viewport_size_y, 0x43); ASSERT_REG_POSITION(viewport_depth_range, 0x4d); -ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); +ASSERT_REG_POSITION(viewport_depth_near_plane, 0x4e); ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); ASSERT_REG_POSITION(viewport_corner, 0x68); +ASSERT_REG_POSITION(depthmap_enable, 0x6D); ASSERT_REG_POSITION(texture0_enable, 0x80); ASSERT_REG_POSITION(texture0, 0x81); ASSERT_REG_POSITION(texture0_format, 0x8e); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index df67b9081..80cad9056 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -862,10 +862,30 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } } + // interpolated_z = z / w + float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + + v1.screenpos[2].ToFloat32() * w1 + + v2.screenpos[2].ToFloat32() * w2) / wsum; + + // Not fully accurate. About 3 bits in precision are missing. + // Z-Buffer (z / w * scale + offset) + float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32(); + float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32(); + float depth = interpolated_z_over_w * depth_scale + depth_offset; + + // Potentially switch to W-Buffer + if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { + + // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w) + depth *= interpolated_w_inverse.ToFloat32() * wsum; + } + + // Clamp the result + depth = MathUtil::Clamp(depth, 0.0f, 1.0f); + + // Convert float to integer unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); - u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + - v1.screenpos[2].ToFloat32() * w1 + - v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); + u32 z = (u32)(depth * ((1 << num_bits) - 1)); if (output_merger.depth_test_enable) { u32 ref_z = GetDepth(x >> 4, y >> 4); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0b471dfd2..5fc885961 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -256,7 +256,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Depth modifiers case PICA_REG_INDEX(viewport_depth_range): - case PICA_REG_INDEX(viewport_depth_far_plane): + case PICA_REG_INDEX(viewport_depth_near_plane): SyncDepthModifiers(); break; @@ -911,7 +911,7 @@ void RasterizerOpenGL::SyncCullMode() { void RasterizerOpenGL::SyncDepthModifiers() { float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); - float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32() / 2.0f; // TODO: Implement scale modifier uniform_block_data.data.depth_offset = depth_offset; From fc9cc21024bff71d98e0106c9d0fd0476ab3c17e Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Wed, 30 Mar 2016 19:27:04 +0200 Subject: [PATCH 26/33] OpenGL: Implement W-Buffers and fix depth-mapping --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 11 ++++++++--- src/video_core/renderer_opengl/gl_rasterizer.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_gen.cpp | 11 ++++++++++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 5fc885961..d1d9beccb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -260,6 +260,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncDepthModifiers(); break; + // Depth buffering + case PICA_REG_INDEX(depthmap_enable): + shader_dirty = true; + break; + // Blending case PICA_REG_INDEX(output_merger.alphablend_enable): SyncBlendEnabled(); @@ -910,10 +915,10 @@ void RasterizerOpenGL::SyncCullMode() { } void RasterizerOpenGL::SyncDepthModifiers() { - float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); - float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32() / 2.0f; + float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); - // TODO: Implement scale modifier + uniform_block_data.data.depth_scale = depth_scale; uniform_block_data.data.depth_offset = depth_offset; uniform_block_data.dirty = true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4f9a032fb..6fdb7f61b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -56,6 +56,8 @@ union PicaShaderConfig { const auto& regs = Pica::g_state.regs; + state.depthmap_enable = regs.depthmap_enable; + state.alpha_test_func = regs.output_merger.alpha_test.enable ? regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; @@ -171,6 +173,8 @@ union PicaShaderConfig { std::array tev_stages; u8 combiner_buffer_input; + Pica::Regs::DepthBuffering depthmap_enable; + struct { struct { unsigned num; @@ -315,6 +319,7 @@ private: GLvec4 const_color[6]; GLvec4 tev_combiner_buffer_color; GLint alphatest_ref; + GLfloat depth_scale; GLfloat depth_offset; alignas(16) GLvec3 lighting_global_ambient; LightSrc light_src[8]; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 0890adb12..0c3153e8f 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -540,6 +540,7 @@ layout (std140) uniform shader_data { vec4 const_color[NUM_TEV_STAGES]; vec4 tev_combiner_buffer_color; int alphatest_ref; + float depth_scale; float depth_offset; vec3 lighting_global_ambient; LightSrc light_src[NUM_LIGHTS]; @@ -581,7 +582,15 @@ vec4 secondary_fragment_color = vec4(0.0); } out += "color = last_tex_env_out;\n"; - out += "gl_FragDepth = gl_FragCoord.z + depth_offset;\n}"; + + out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; + out += "float depth = z_over_w * depth_scale + depth_offset;\n"; + if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { + out += "depth /= gl_FragCoord.w;\n"; + } + out += "gl_FragDepth = depth;\n"; + + out += "}"; return out; } From 0a52e1f587b899a759309ecb2f3f3f794a25a752 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 10 May 2016 09:17:07 -0400 Subject: [PATCH 27/33] gdbstub: Silence missing prototype warnings --- src/core/gdbstub/gdbstub.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 1360ee845..820b19e1a 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -437,7 +437,7 @@ static void HandleSetThread() { * * @param signal Signal to be sent to client. */ -void SendSignal(u32 signal) { +static void SendSignal(u32 signal) { if (gdbserver_socket == -1) { return; } @@ -713,7 +713,7 @@ static void Continue() { * @param addr Address of breakpoint. * @param len Length of breakpoint. */ -bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) { +static bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) { std::map& p = GetBreakpointList(type); Breakpoint breakpoint; @@ -907,7 +907,7 @@ void ToggleServer(bool status) { } } -void Init(u16 port) { +static void Init(u16 port) { if (!g_server_enabled) { // Set the halt loop to false in case the user enabled the gdbstub mid-execution. // This way the CPU can still execute normally. From 75e5d0a6a0bd6851a3986e4fd51adf6d9034707d Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 10 May 2016 09:04:56 -0400 Subject: [PATCH 28/33] gl_rasterizer: Fix compilation for debug builds --- src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4f9a032fb..5afcf296d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -63,7 +63,7 @@ union PicaShaderConfig { // We don't sync const_color here because of the high variance, it is a // shader uniform instead. const auto& tev_stages = regs.GetTevStages(); - DEBUG_ASSERT(res.tev_stages.size() == tev_stages.size()); + DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); for (size_t i = 0; i < tev_stages.size(); i++) { const auto& tev_stage = tev_stages[i]; state.tev_stages[i].sources_raw = tev_stage.sources_raw; From 9cfebb93345bc81347ae0ab22d5bc8e2c994d76a Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Mon, 18 Apr 2016 10:44:30 +0200 Subject: [PATCH 29/33] Pica: Add texture type to state --- src/video_core/pica.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index a81a7b984..86c0a0096 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -138,6 +138,15 @@ struct Regs { INSERT_PADDING_WORDS(0x12); struct TextureConfig { + enum TextureType : u32 { + Texture2D = 0, + TextureCube = 1, + Shadow2D = 2, + Projection2D = 3, + ShadowCube = 4, + Disabled = 5, + }; + enum WrapMode : u32 { ClampToEdge = 0, ClampToBorder = 1, @@ -168,6 +177,7 @@ struct Regs { BitField< 2, 1, TextureFilter> min_filter; BitField< 8, 2, WrapMode> wrap_t; BitField<12, 2, WrapMode> wrap_s; + BitField<28, 2, TextureType> type; ///< @note Only valid for texture 0 according to 3DBrew. }; INSERT_PADDING_WORDS(0x1); From 2f8e8e14551040168e60d3553a0847fd5a6d2b7b Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Mon, 18 Apr 2016 10:51:13 +0200 Subject: [PATCH 30/33] Pica: Add tc0.w to OutputVertex --- src/video_core/shader/shader.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 56b83bfeb..891d2fb19 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -43,7 +43,8 @@ struct OutputVertex { Math::Vec4 color; Math::Vec2 tc0; Math::Vec2 tc1; - INSERT_PADDING_WORDS(2); + float24 tc0_w; + INSERT_PADDING_WORDS(1); Math::Vec3 view; INSERT_PADDING_WORDS(1); Math::Vec2 tc2; From 4311297eb1c62b5f7b2213bff87ce2fd3630256b Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Tue, 19 Apr 2016 00:53:28 +0200 Subject: [PATCH 31/33] Rasterizer: Implement texture type 3 --- src/video_core/rasterizer.cpp | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 80cad9056..65168f05a 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -442,8 +442,33 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, DEBUG_ASSERT(0 != texture.config.address); - int s = (int)(uv[i].u() * float24::FromFloat32(static_cast(texture.config.width))).ToFloat32(); - int t = (int)(uv[i].v() * float24::FromFloat32(static_cast(texture.config.height))).ToFloat32(); + float24 u = uv[i].u(); + float24 v = uv[i].v(); + + // Only unit 0 respects the texturing type (according to 3DBrew) + // TODO: Refactor so cubemaps and shadowmaps can be handled + if (i == 0) { + switch(texture.config.type) { + case Regs::TextureConfig::Texture2D: + break; + case Regs::TextureConfig::Projection2D: { + auto tc0_w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w); + u /= tc0_w; + v /= tc0_w; + break; + } + default: + // TODO: Change to LOG_ERROR when more types are handled. + LOG_DEBUG(HW_GPU, "Unhandled texture type %x", (int)texture.config.type); + UNIMPLEMENTED(); + break; + } + } + + int s = (int)(u * float24::FromFloat32(static_cast(texture.config.width))).ToFloat32(); + int t = (int)(v * float24::FromFloat32(static_cast(texture.config.height))).ToFloat32(); + + static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { switch (mode) { case Regs::TextureConfig::ClampToEdge: From 5a7306d6dfb04cf21990a58cddb5b03ca2541fa0 Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Tue, 19 Apr 2016 00:53:42 +0200 Subject: [PATCH 32/33] OpenGL: Implement texture type 3 --- .../renderer_opengl/gl_rasterizer.cpp | 8 ++ .../renderer_opengl/gl_rasterizer.h | 5 ++ .../renderer_opengl/gl_shader_gen.cpp | 88 +++++++++++-------- .../renderer_opengl/gl_shader_util.h | 1 + 4 files changed, 67 insertions(+), 35 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d1d9beccb..ed2e2f3ae 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -76,6 +76,9 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); + glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); + glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W); + glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); @@ -319,6 +322,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncLogicOp(); break; + // Texture 0 type + case PICA_REG_INDEX(texture0.type): + shader_dirty = true; + break; + // TEV stages case PICA_REG_INDEX(tev_stage0.color_source1): case PICA_REG_INDEX(tev_stage0.color_modifier1): diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b9315ed33..eed00011a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -61,6 +61,8 @@ union PicaShaderConfig { state.alpha_test_func = regs.output_merger.alpha_test.enable ? regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; + state.texture0_type = regs.texture0.type; + // Copy relevant tev stages fields. // We don't sync const_color here because of the high variance, it is a // shader uniform instead. @@ -170,6 +172,7 @@ union PicaShaderConfig { struct State { Pica::Regs::CompareFunc alpha_test_func; + Pica::Regs::TextureConfig::TextureType texture0_type; std::array tev_stages; u8 combiner_buffer_input; @@ -281,6 +284,7 @@ private: tex_coord1[1] = v.tc1.y.ToFloat32(); tex_coord2[0] = v.tc2.x.ToFloat32(); tex_coord2[1] = v.tc2.y.ToFloat32(); + tex_coord0_w = v.tc0_w.ToFloat32(); normquat[0] = v.quat.x.ToFloat32(); normquat[1] = v.quat.y.ToFloat32(); normquat[2] = v.quat.z.ToFloat32(); @@ -301,6 +305,7 @@ private: GLfloat tex_coord0[2]; GLfloat tex_coord1[2]; GLfloat tex_coord2[2]; + GLfloat tex_coord0_w; GLfloat normquat[4]; GLfloat view[3]; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 0c3153e8f..71d60e69c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -32,8 +32,9 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) { } /// Writes the specified TEV stage source component(s) -static void AppendSource(std::string& out, TevStageConfig::Source source, +static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source, const std::string& index_name) { + const auto& state = config.state; using Source = TevStageConfig::Source; switch (source) { case Source::PrimaryColor: @@ -46,7 +47,20 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, out += "secondary_fragment_color"; break; case Source::Texture0: - out += "texture(tex[0], texcoord[0])"; + // Only unit 0 respects the texturing type (according to 3DBrew) + switch(state.texture0_type) { + case Pica::Regs::TextureConfig::Texture2D: + out += "texture(tex[0], texcoord[0])"; + break; + case Pica::Regs::TextureConfig::Projection2D: + out += "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))"; + break; + default: + out += "texture(tex[0], texcoord[0])"; + LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast(state.texture0_type)); + UNIMPLEMENTED(); + break; + } break; case Source::Texture1: out += "texture(tex[1], texcoord[1])"; @@ -71,53 +85,53 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, } /// Writes the color components to use for the specified TEV stage color modifier -static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier modifier, +static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier, TevStageConfig::Source source, const std::string& index_name) { using ColorModifier = TevStageConfig::ColorModifier; switch (modifier) { case ColorModifier::SourceColor: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rgb"; break; case ColorModifier::OneMinusSourceColor: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rgb"; break; case ColorModifier::SourceAlpha: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".aaa"; break; case ColorModifier::OneMinusSourceAlpha: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".aaa"; break; case ColorModifier::SourceRed: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rrr"; break; case ColorModifier::OneMinusSourceRed: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rrr"; break; case ColorModifier::SourceGreen: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".ggg"; break; case ColorModifier::OneMinusSourceGreen: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".ggg"; break; case ColorModifier::SourceBlue: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".bbb"; break; case ColorModifier::OneMinusSourceBlue: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".bbb"; break; default: @@ -128,44 +142,44 @@ static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier } /// Writes the alpha component to use for the specified TEV stage alpha modifier -static void AppendAlphaModifier(std::string& out, TevStageConfig::AlphaModifier modifier, +static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier, TevStageConfig::Source source, const std::string& index_name) { using AlphaModifier = TevStageConfig::AlphaModifier; switch (modifier) { case AlphaModifier::SourceAlpha: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".a"; break; case AlphaModifier::OneMinusSourceAlpha: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".a"; break; case AlphaModifier::SourceRed: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".r"; break; case AlphaModifier::OneMinusSourceRed: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".r"; break; case AlphaModifier::SourceGreen: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".g"; break; case AlphaModifier::OneMinusSourceGreen: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".g"; break; case AlphaModifier::SourceBlue: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".b"; break; case AlphaModifier::OneMinusSourceBlue: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".b"; break; default: @@ -292,11 +306,11 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi std::string index_name = std::to_string(index); out += "vec3 color_results_" + index_name + "[3] = vec3[3]("; - AppendColorModifier(out, stage.color_modifier1, stage.color_source1, index_name); + AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name); out += ", "; - AppendColorModifier(out, stage.color_modifier2, stage.color_source2, index_name); + AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name); out += ", "; - AppendColorModifier(out, stage.color_modifier3, stage.color_source3, index_name); + AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name); out += ");\n"; out += "vec3 color_output_" + index_name + " = "; @@ -304,11 +318,11 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi out += ";\n"; out += "float alpha_results_" + index_name + "[3] = float[3]("; - AppendAlphaModifier(out, stage.alpha_modifier1, stage.alpha_source1, index_name); + AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name); out += ", "; - AppendAlphaModifier(out, stage.alpha_modifier2, stage.alpha_source2, index_name); + AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name); out += ", "; - AppendAlphaModifier(out, stage.alpha_modifier3, stage.alpha_source3, index_name); + AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name); out += ");\n"; out += "float alpha_output_" + index_name + " = "; @@ -523,6 +537,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { in vec4 primary_color; in vec2 texcoord[3]; +in float texcoord0_w; in vec4 normquat; in vec3 view; @@ -598,17 +613,19 @@ vec4 secondary_fragment_color = vec4(0.0); std::string GenerateVertexShader() { std::string out = "#version 330 core\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; out += R"( out vec4 primary_color; out vec2 texcoord[3]; +out float texcoord0_w; out vec4 normquat; out vec3 view; @@ -617,6 +634,7 @@ void main() { texcoord[0] = vert_texcoord0; texcoord[1] = vert_texcoord1; texcoord[2] = vert_texcoord2; + texcoord0_w = vert_texcoord0_w; normquat = vert_normquat; view = vert_view; gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 097242f6f..f59912f79 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -14,6 +14,7 @@ enum Attributes { ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW, }; From ae7a82fa1c65b4efc9e7bf9863fa229778a72d1c Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Wed, 30 Mar 2016 02:45:18 +0200 Subject: [PATCH 33/33] Turn ShaderSetup into struct --- .../debugger/graphics_vertex_shader.cpp | 2 +- src/video_core/command_processor.cpp | 8 +- src/video_core/pica.cpp | 2 +- src/video_core/shader/shader.cpp | 22 ++--- src/video_core/shader/shader.h | 83 ++++++++++--------- 5 files changed, 59 insertions(+), 58 deletions(-) diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index 854f6ff16..391666d35 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp @@ -501,7 +501,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d info.labels.insert({ entry_point, "main" }); // Generate debug information - debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup); + debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup); // Reload widget state for (int attr = 0; attr < num_attributes; ++attr) { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index dd1379503..e7dc5ddac 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -144,12 +144,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { immediate_attribute_id = 0; Shader::UnitState shader_unit; - Shader::Setup(); + g_state.vs.Setup(); // Send to vertex shader if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast(&immediate_input)); - Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); + Shader::OutputVertex output = g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); // Send to renderer using Pica::Shader::OutputVertex; @@ -237,7 +237,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { vertex_cache_ids.fill(-1); Shader::UnitState shader_unit; - Shader::Setup(); + g_state.vs.Setup(); for (unsigned int index = 0; index < regs.num_vertices; ++index) { @@ -274,7 +274,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // Send to vertex shader if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); - output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes()); + output = g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); if (is_indexed) { vertex_cache[vertex_cache_pos] = output; diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index be82cf4b5..ec78f9593 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -500,7 +500,7 @@ void Init() { } void Shutdown() { - Shader::Shutdown(); + Shader::ClearCache(); } template diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 65dcc9156..449fc703f 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -35,7 +35,13 @@ static std::unordered_map> shader_map; static const JitShader* jit_shader; #endif // ARCHITECTURE_x86_64 -void Setup() { +void ClearCache() { +#ifdef ARCHITECTURE_x86_64 + shader_map.clear(); +#endif // ARCHITECTURE_x86_64 +} + +void ShaderSetup::Setup() { #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ @@ -54,18 +60,12 @@ void Setup() { #endif // ARCHITECTURE_x86_64 } -void Shutdown() { -#ifdef ARCHITECTURE_x86_64 - shader_map.clear(); -#endif // ARCHITECTURE_x86_64 -} +MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240)); - -OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { +OutputVertex ShaderSetup::Run(UnitState& state, const InputVertex& input, int num_attributes) { auto& config = g_state.regs.vs; - MICROPROFILE_SCOPE(GPU_VertexShader); + MICROPROFILE_SCOPE(GPU_Shader); state.program_counter = config.main_offset; state.debug.max_offset = 0; @@ -140,7 +140,7 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attr return ret; } -DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { +DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { UnitState state; state.program_counter = config.main_offset; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 56b83bfeb..cfbb7f2ee 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -83,23 +83,6 @@ struct OutputVertex { static_assert(std::is_pod::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); -/// Vertex shader memory -struct ShaderSetup { - struct { - // The float uniforms are accessed by the shader JIT using SSE instructions, and are - // therefore required to be 16-byte aligned. - alignas(16) Math::Vec4 f[96]; - - std::array b; - std::array, 4> i; - } uniforms; - - Math::Vec4 default_attributes[16]; - - std::array program_code; - std::array swizzle_data; -}; - // Helper structure used to keep track of data useful for inspection of shader emulation template struct DebugData; @@ -342,33 +325,51 @@ struct UnitState { } }; -/** - * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per - * vertex, which would happen within the `Run` function). - */ -void Setup(); +/// Clears the shader cache +void ClearCache(); -/// Performs any cleanup when the emulator is shutdown -void Shutdown(); +struct ShaderSetup { -/** - * Runs the currently setup shader - * @param state Shader unit state, must be setup per shader and per shader unit - * @param input Input vertex into the shader - * @param num_attributes The number of vertex shader attributes - * @return The output vertex, after having been processed by the vertex shader - */ -OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); + struct { + // The float uniforms are accessed by the shader JIT using SSE instructions, and are + // therefore required to be 16-byte aligned. + alignas(16) Math::Vec4 f[96]; -/** - * Produce debug information based on the given shader and input vertex - * @param input Input vertex into the shader - * @param num_attributes The number of vertex shader attributes - * @param config Configuration object for the shader pipeline - * @param setup Setup object for the shader pipeline - * @return Debug information for this shader with regards to the given vertex - */ -DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); + std::array b; + std::array, 4> i; + } uniforms; + + Math::Vec4 default_attributes[16]; + + std::array program_code; + std::array swizzle_data; + + /** + * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per + * vertex, which would happen within the `Run` function). + */ + void Setup(); + + /** + * Runs the currently setup shader + * @param state Shader unit state, must be setup per shader and per shader unit + * @param input Input vertex into the shader + * @param num_attributes The number of vertex shader attributes + * @return The output vertex, after having been processed by the vertex shader + */ + OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); + + /** + * Produce debug information based on the given shader and input vertex + * @param input Input vertex into the shader + * @param num_attributes The number of vertex shader attributes + * @param config Configuration object for the shader pipeline + * @param setup Setup object for the shader pipeline + * @return Debug information for this shader with regards to the given vertex + */ + DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); + +}; } // namespace Shader