diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 04a3a5216f..344268b99b 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -153,6 +153,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Shader, SPIRV) \ SUB(Shader, GLASM) \ SUB(Shader, GLSL) \ + SUB(Shader, MSL) \ CLS(Audio) \ SUB(Audio, DSP) \ SUB(Audio, Sink) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 197c920a09..a3607b0f90 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -121,6 +121,7 @@ enum class Class : u8 { Shader_SPIRV, ///< Shader SPIR-V code generation Shader_GLASM, ///< Shader GLASM code generation Shader_GLSL, ///< Shader GLSL code generation + Shader_MSL, ///< Shader MSL code generation Audio, ///< Audio emulation Audio_DSP, ///< The HLE implementation of the DSP Audio_Sink, ///< Emulator audio output backend diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 3252010d9c..81d898a178 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -75,6 +75,31 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_warp.cpp backend/spirv/spirv_emit_context.cpp backend/spirv/spirv_emit_context.h + backend/msl/emit_msl.cpp + backend/msl/emit_msl.h + backend/msl/emit_msl_atomic.cpp + backend/msl/emit_msl_barriers.cpp + backend/msl/emit_msl_bitwise_conversion.cpp + backend/msl/emit_msl_composite.cpp + backend/msl/emit_msl_context_get_set.cpp + backend/msl/emit_msl_control_flow.cpp + backend/msl/emit_msl_convert.cpp + backend/msl/emit_msl_floating_point.cpp + backend/msl/emit_msl_image.cpp + backend/msl/emit_msl_instructions.h + backend/msl/emit_msl_integer.cpp + backend/msl/emit_msl_logical.cpp + backend/msl/emit_msl_memory.cpp + backend/msl/emit_msl_not_implemented.cpp + backend/msl/emit_msl_select.cpp + backend/msl/emit_msl_shared_memory.cpp + backend/msl/emit_msl_special.cpp + backend/msl/emit_msl_undefined.cpp + backend/msl/emit_msl_warp.cpp + backend/msl/msl_emit_context.cpp + backend/msl/msl_emit_context.h + backend/msl/var_alloc.cpp + backend/msl/var_alloc.h environment.h exception.h frontend/ir/abstract_syntax_list.h diff --git a/src/shader_recompiler/backend/msl/emit_msl.cpp b/src/shader_recompiler/backend/msl/emit_msl.cpp new file mode 100644 index 0000000000..dae8c4d1ee --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl.cpp @@ -0,0 +1,252 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include + +#include "common/div_ceil.h" +#include "common/settings.h" +#include "shader_recompiler/backend/msl/emit_msl.h" +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" + +namespace Shader::Backend::MSL { +namespace { +template +struct FuncTraits {}; + +template +struct FuncTraits { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template + using ArgType = std::tuple_element_t>; +}; + +template +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { + inst->SetDefinition(func(ctx, std::forward(args)...)); +} + +template +auto Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v) { + return ctx.var_alloc.Consume(arg); + } else if constexpr (std::is_same_v) { + return arg; + } else if constexpr (std::is_same_v) { + return arg.U32(); + } else if constexpr (std::is_same_v) { + return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); + } else if constexpr (std::is_same_v) { + return arg.Reg(); + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = FuncTraits; + if constexpr (std::is_same_v) { + if constexpr (is_first_arg_inst) { + SetDefinition( + ctx, inst, *inst, + Arg>(ctx, inst->Arg(I))...); + } else { + SetDefinition( + ctx, inst, Arg>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + func(ctx, *inst, Arg>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg>(ctx, inst->Arg(I))...); + } + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v; + using Indices = std::make_index_sequence; + Invoke(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} + +bool IsReference(IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::Reference; +} + +void PrecolorInst(IR::Inst& phi) { + // Insert phi moves before references to avoid overwriting other phis + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Block& phi_block{*phi.PhiBlock(i)}; + auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; + IR::IREmitter ir{phi_block, it}; + const IR::Value arg{phi.Arg(i)}; + if (arg.IsImmediate()) { + ir.PhiMove(phi, arg); + } else { + ir.PhiMove(phi, IR::Value{arg.InstRecursive()}); + } + } + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); + } +} + +void Precolor(const IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& phi : block->Instructions()) { + if (!IR::IsPhi(phi)) { + break; + } + PrecolorInst(phi); + } + } +} + +void EmitCode(EmitContext& ctx, const IR::Program& program) { + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: + ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.if_node.cond)); + break; + case IR::AbstractSyntaxNode::Type::EndIf: + ctx.Add("}}"); + break; + case IR::AbstractSyntaxNode::Type::Break: + if (node.data.break_node.cond.IsImmediate()) { + if (node.data.break_node.cond.U1()) { + ctx.Add("break;"); + } + } else { + ctx.Add("if({}){{break;}}", ctx.var_alloc.Consume(node.data.break_node.cond)); + } + break; + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.Add("return;"); + break; + case IR::AbstractSyntaxNode::Type::Loop: + ctx.Add("for(;;){{"); + break; + case IR::AbstractSyntaxNode::Type::Repeat: + if (Settings::values.disable_shader_loop_safety_checks) { + ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond)); + } else { + ctx.Add("if(--loop{}<0 || !{}){{break;}}}}", ctx.num_safety_loop_vars++, + ctx.var_alloc.Consume(node.data.repeat.cond)); + } + break; + default: + throw NotImplementedException("AbstractSyntaxNode Type {}", node.type); + } + } +} + +std::string MslVersionSpecifier(const EmitContext& ctx) { + if (ctx.uses_y_direction) { + return " compatibility"; + } + return ""; +} + +bool IsPreciseType(MslVarType type) { + switch (type) { + case MslVarType::PrecF32: + case MslVarType::PrecF64: + return true; + default: + return false; + } +} + +void DefineVariables(const EmitContext& ctx, std::string& header) { + for (u32 i = 0; i < static_cast(MslVarType::Void); ++i) { + const auto type{static_cast(i)}; + const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; + const auto type_name{ctx.var_alloc.GetMslType(type)}; + const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug}; + const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""}; + // Temps/return types that are never used are stored at index 0 + if (tracker.uses_temp) { + header += fmt::format("{}{} t{}={}(0);", precise, type_name, + ctx.var_alloc.Representation(0, type), type_name); + } + for (u32 index = 0; index < tracker.num_used; ++index) { + header += fmt::format("{}{} {}={}(0);", precise, type_name, + ctx.var_alloc.Representation(index, type), type_name); + } + } + for (u32 i = 0; i < ctx.num_safety_loop_vars; ++i) { + header += fmt::format("int loop{}=0x2000;", i); + } +} +} // Anonymous namespace + +std::string EmitMSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, + Bindings& bindings) { + EmitContext ctx{program, bindings, profile, runtime_info}; + Precolor(program); + EmitCode(ctx, program); + const std::string version{fmt::format("#version 460{}\n", MslVersionSpecifier(ctx))}; + ctx.header.insert(0, version); + if (program.shared_memory_size > 0) { + const auto requested_size{program.shared_memory_size}; + const auto max_size{profile.gl_max_compute_smem_size}; + const bool needs_clamp{requested_size > max_size}; + if (needs_clamp) { + LOG_WARNING(Shader_MSL, "Requested shared memory size ({}) exceeds device limit ({})", + requested_size, max_size); + } + const auto smem_size{needs_clamp ? max_size : requested_size}; + ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U)); + } + ctx.header += "void main(){\n"; + if (program.local_memory_size > 0) { + ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U)); + } + DefineVariables(ctx, ctx.header); + if (ctx.uses_cc_carry) { + ctx.header += "uint carry;"; + } + if (program.info.uses_subgroup_shuffles) { + ctx.header += "bool shfl_in_bounds;"; + ctx.header += "uint shfl_result;"; + } + ctx.code.insert(0, ctx.header); + ctx.code += '}'; + return ctx.code; +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl.h b/src/shader_recompiler/backend/msl/emit_msl.h new file mode 100644 index 0000000000..76ba8a1f5c --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl.h @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::MSL { + +[[nodiscard]] std::string EmitMSL(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); + +[[nodiscard]] inline std::string EmitMSL(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitMSL(profile, {}, program, binding); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_atomic.cpp b/src/shader_recompiler/backend/msl/emit_msl_atomic.cpp new file mode 100644 index 0000000000..ffb59b1a34 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_atomic.cpp @@ -0,0 +1,559 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::MSL { +namespace { +constexpr char cas_loop[]{ + "for (;;){{uint old={};{}=atomicCompSwap({},old,{}({},{}));if({}==old){{break;}}}}"}; + +void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view value, std::string_view function) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)}; + const std::string smem{fmt::format("smem[{}>>2]", offset)}; + ctx.Add(cas_loop, smem, ret, smem, function, smem, value, ret); +} + +void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, std::string_view function) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)}; + const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset))}; + ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); +} + +void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, + std::string_view function) { + const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset))}; + const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)}; + ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); + ctx.AddF32("{}=utof({});", inst, ret); +} +} // Anonymous namespace + +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMinS32"); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMaxS32"); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + SharedCasFunction(ctx, inst, pointer_offset, value, "CasIncrement"); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + SharedCasFunction(ctx, inst, pointer_offset, value, "CasDecrement"); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, + pointer_offset); + ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", + pointer_offset, value, pointer_offset, value); +} + +void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, pointer_offset, pointer_offset); + ctx.Add("smem[{}>>2]={}.x;smem[({}+4)>>2]={}.y;", pointer_offset, value, pointer_offset, value); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32"); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32"); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasIncrement"); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasDecrement"); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" + ");}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" + ");}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int " + "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}" + "))[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x)," + "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("{}_ssbo{}[{}>>2]+={}.x;{}_ssbo{}[({}>>2)+1]+={}.y;", ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int " + "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],{}[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int " + "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],{}[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicAnd({}_ssbo{}[{}>>2],{}.x),atomicAnd({}_ssbo{}[({}>>2)+1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicOr({}_ssbo{}[{}>>2],{}.x),atomicOr({}_ssbo{}[({}>>2)+1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicXor({}_ssbo{}[{}>>2],{}.x),atomicXor({}_ssbo{}[({}>>2)+1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicExchange({}_ssbo{}[{}>>2],{}.x),atomicExchange({}_ssbo{}[({}>>2)+" + "1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); +} + +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2"); +} + +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2"); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2"); +} + +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2"); +} + +void EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicIAdd32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicSMin32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicUMin32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicSMax32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicUMax32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicInc32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicDec32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicAnd32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicOr32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicXor32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicExchange32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("MSL Instruction"); +} +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_barriers.cpp b/src/shader_recompiler/backend/msl/emit_msl_barriers.cpp new file mode 100644 index 0000000000..3d231b26cf --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_barriers.cpp @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" + +namespace Shader::Backend::MSL { +void EmitBarrier(EmitContext& ctx) { + ctx.Add("barrier();"); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + ctx.Add("groupMemoryBarrier();"); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + ctx.Add("memoryBarrier();"); +} +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_bitwise_conversion.cpp b/src/shader_recompiler/backend/msl/emit_msl_bitwise_conversion.cpp new file mode 100644 index 0000000000..7dd9bdc6c9 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_bitwise_conversion.cpp @@ -0,0 +1,95 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::MSL { +namespace { +void Alias(IR::Inst& inst, const IR::Value& value) { + if (value.IsImmediate()) { + return; + } + IR::Inst& value_inst{*value.InstRecursive()}; + value_inst.DestructiveAddUsage(inst.UseCount()); + value_inst.DestructiveRemoveUsage(); + inst.SetDefinition(value_inst.Definition()); +} +} // Anonymous namespace + +void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + // Fake one usage to get a real variable out of the condition + inst.DestructiveAddUsage(1); + const auto ret{ctx.var_alloc.Define(inst, MslVarType::U1)}; + const auto input{ctx.var_alloc.Consume(value)}; + const auto suffix{ctx.profile.has_gl_bool_ref_bug ? "?true:false" : ""}; + if (ret != input) { + ctx.Add("{}={}{};", ret, input, suffix); + } +} + +void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { + NotImplemented(); +} + +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=ftou({});", inst, value); +} + +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=doubleBitsToUint64({});", inst, value); +} + +void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { + NotImplemented(); +} + +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=utof({});", inst, value); +} + +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=uint64BitsToDouble({});", inst, value); +} + +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=packUint2x32({});", inst, value); +} + +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32x2("{}=unpackUint2x32({});", inst, value); +} + +void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=packFloat2x16({});", inst, value); +} + +void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value); +} + +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=packHalf2x16({});", inst, value); +} + +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32x2("{}=unpackHalf2x16({});", inst, value); +} + +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=packDouble2x32({});", inst, value); +} + +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32x2("{}=unpackDouble2x32({});", inst, value); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_composite.cpp b/src/shader_recompiler/backend/msl/emit_msl_composite.cpp new file mode 100644 index 0000000000..710ed151f9 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_composite.cpp @@ -0,0 +1,218 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::MSL { +namespace { +constexpr std::string_view SWIZZLE{"xyzw"}; +void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite, + std::string_view object, u32 index) { + if (result == composite) { + // The result is aliased with the composite + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); + } else { + ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object); + } +} +} // Anonymous namespace + +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2) { + ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3) { + ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4) { + ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32x2)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32x3)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32x4)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3, + [[maybe_unused]] std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2) { + ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2); +} + +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3) { + ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3); +} + +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4) { + ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4); +} + +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::F32x2)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::F32x3)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::F32x4)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +} + +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +} + +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +} +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_context_get_set.cpp b/src/shader_recompiler/backend/msl/emit_msl_context_get_set.cpp new file mode 100644 index 0000000000..e51d063a32 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_context_get_set.cpp @@ -0,0 +1,469 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-FileCopyrightText: 2024 sudachi Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::MSL { +namespace { +constexpr char SWIZZLE[]{"xyzw"}; + +u32 CbufIndex(u32 offset) { + return (offset / 4) % 4; +} + +char OffsetSwizzle(u32 offset) { + return SWIZZLE[CbufIndex(offset)]; +} + +bool IsInputArray(Stage stage) { + return stage == Stage::Geometry || stage == Stage::TessellationControl || + stage == Stage::TessellationEval; +} + +std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { + return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; +} + +std::string_view OutputVertexIndex(EmitContext& ctx) { + return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; +} + +std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) { + if (binding.IsImmediate()) { + return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index); + } else { + const auto binding_var{ctx.var_alloc.Consume(binding)}; + return fmt::format("GetCbufIndirect({},{})", binding_var, index); + } +} + +void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, + const IR::Value& offset, u32 num_bits, std::string_view cast = {}, + std::string_view bit_offset = {}) { + const bool is_immediate{offset.IsImmediate()}; + const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug}; + if (is_immediate) { + const s32 signed_offset{static_cast(offset.U32())}; + static constexpr u32 cbuf_size{0x10000}; + if (signed_offset < 0 || offset.U32() > cbuf_size) { + LOG_WARNING(Shader_MSL, "Immediate constant buffer offset is out of bounds"); + ctx.Add("{}=0u;", ret); + return; + } + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16) + : fmt::format("{}>>4", offset_var)}; + const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32())) + : fmt::format("[({}>>2)%4]", offset_var)}; + + const auto cbuf{ChooseCbuf(ctx, binding, index)}; + const auto cbuf_cast{fmt::format("{}({}{{}})", cast, cbuf)}; + const auto extraction{num_bits == 32 ? cbuf_cast + : fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast, + bit_offset, num_bits)}; + if (!component_indexing_bug) { + const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; + ctx.Add("{}={};", ret, result); + return; + } + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 i = 0; i < 4; ++i) { + const auto swizzle_string{fmt::format(".{}", "xyzw"[i])}; + const auto result{fmt::format(fmt::runtime(extraction), swizzle_string)}; + ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result); + } +} + +void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, + std::string_view cast) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)}; + if (offset.IsImmediate()) { + const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)}; + GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); + } else { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("({}%4)*8", offset_var)}; + GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); + } +} + +void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, + std::string_view cast) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)}; + if (offset.IsImmediate()) { + const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)}; + GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); + } else { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)}; + GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); + } +} +} // Anonymous namespace + +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; + GetCbuf8(ctx, inst, binding, offset, cast); +} + +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"}; + GetCbuf8(ctx, inst, binding, offset, cast); +} + +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; + GetCbuf16(ctx, inst, binding, offset, cast); +} + +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"}; + GetCbuf16(ctx, inst, binding, offset, cast); +} + +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)}; + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; + GetCbuf(ctx, ret, binding, offset, 32, cast); +} + +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto ret{ctx.var_alloc.Define(inst, MslVarType::F32)}; + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "utof" : ""}; + GetCbuf(ctx, ret, binding, offset, 32, cast); +} + +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; + if (offset.IsImmediate()) { + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; + static constexpr u32 cbuf_size{0x10000}; + const u32 u32_offset{offset.U32()}; + const s32 signed_offset{static_cast(offset.U32())}; + if (signed_offset < 0 || u32_offset > cbuf_size) { + LOG_WARNING(Shader_MSL, "Immediate constant buffer offset is out of bounds"); + ctx.AddU32x2("{}=uvec2(0u);", inst); + return; + } + if (u32_offset % 2 == 0) { + ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16, + OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); + } else { + ctx.AddU32x2("{}=uvec2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf, + u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf, + (u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4)); + } + return; + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto cbuf{ChooseCbuf(ctx, binding, fmt::format("{}>>4", offset_var))}; + if (!ctx.profile.has_gl_component_indexing_bug) { + ctx.AddU32x2("{}=uvec2({}({}[({}>>2)%4]),{}({}[(({}+4)>>2)%4]));", inst, cast, cbuf, + offset_var, cast, cbuf, offset_var); + return; + } + const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32x2)}; + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + ctx.Add("if(({}&3)=={}){}=uvec2({}({}.{}),{}({}.{}));", cbuf_offset, swizzle, ret, cast, + cbuf, "xyzw"[swizzle], cast, cbuf, "xyzw"[(swizzle + 1) % 4]); + } +} + +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex) { + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + if (element == 3) { + ctx.AddF32("{}=1.f;", inst, attr); + } else { + ctx.AddF32("{}=0.f;", inst, attr); + } + return; + } + ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); + return; + } + switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.AddF32("{}=itof(gl_PrimitiveID);", inst); + break; + case IR::Attribute::Layer: + ctx.AddF32("{}=itof(gl_Layer);", inst); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: { + const bool is_array{IsInputArray(ctx.stage)}; + const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""}; + ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle); + break; + } + case IR::Attribute::PointSpriteS: + case IR::Attribute::PointSpriteT: + ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle); + break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle); + break; + case IR::Attribute::InstanceId: + ctx.AddF32("{}=itof(gl_InstanceID);", inst); + break; + case IR::Attribute::VertexId: + ctx.AddF32("{}=itof(gl_VertexID);", inst); + break; + case IR::Attribute::FrontFace: + ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst); + break; + case IR::Attribute::BaseInstance: + ctx.AddF32("{}=itof(gl_BaseInstance);", inst); + break; + case IR::Attribute::BaseVertex: + ctx.AddF32("{}=itof(gl_BaseVertex);", inst); + break; + case IR::Attribute::DrawID: + ctx.AddF32("{}=itof(gl_DrawID);", inst); + break; + default: + throw NotImplementedException("Get attribute {}", attr); + } +} + +void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view) { + switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.AddU32("{}=uint(gl_PrimitiveID);", inst); + break; + case IR::Attribute::InstanceId: + ctx.AddU32("{}=uint(gl_InstanceID);", inst); + break; + case IR::Attribute::VertexId: + ctx.AddU32("{}=uint(gl_VertexID);", inst); + break; + case IR::Attribute::BaseInstance: + ctx.AddU32("{}=uint(gl_BaseInstance);", inst); + break; + case IR::Attribute::BaseVertex: + ctx.AddU32("{}=uint(gl_BaseVertex);", inst); + break; + case IR::Attribute::DrawID: + ctx.AddU32("{}=uint(gl_DrawID);", inst); + break; + default: + throw NotImplementedException("Get U32 attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + [[maybe_unused]] std::string_view vertex) { + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + const u32 attr_element{IR::GenericAttributeElement(attr)}; + const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)}; + const auto output_decorator{OutputVertexIndex(ctx)}; + if (info.num_components == 1) { + ctx.Add("{}{}={};", info.name, output_decorator, value); + } else { + const u32 index_element{attr_element - info.first_element}; + ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value); + } + return; + } + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; + switch (attr) { + case IR::Attribute::Layer: + if (ctx.stage != Stage::Geometry && + !ctx.profile.support_viewport_index_layer_non_geometry) { + LOG_WARNING(Shader_MSL, "Shader stores viewport layer but device does not support " + "viewport layer extension"); + break; + } + ctx.Add("gl_Layer=ftoi({});", value); + break; + case IR::Attribute::ViewportIndex: + if (ctx.stage != Stage::Geometry && + !ctx.profile.support_viewport_index_layer_non_geometry) { + LOG_WARNING(Shader_MSL, "Shader stores viewport index but device does not support " + "viewport layer extension"); + break; + } + ctx.Add("gl_ViewportIndex=ftoi({});", value); + break; + case IR::Attribute::ViewportMask: + if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) { + LOG_WARNING( + Shader_MSL, + "Shader stores viewport mask but device does not support viewport mask extension"); + break; + } + ctx.Add("gl_ViewportMask[0]=ftoi({});", value); + break; + case IR::Attribute::PointSize: + ctx.Add("gl_PointSize={};", value); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.Add("gl_Position.{}={};", swizzle, value); + break; + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: { + const u32 index{static_cast(attr) - static_cast(IR::Attribute::ClipDistance0)}; + ctx.Add("gl_ClipDistance[{}]={};", index, value); + break; + } + default: + throw NotImplementedException("Set attribute {}", attr); + } +} + +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view vertex) { + const bool is_array{ctx.stage == Stage::Geometry}; + const auto vertex_arg{is_array ? fmt::format(",{}", vertex) : ""}; + ctx.AddF32("{}=IndexedAttrLoad(int({}){});", inst, offset, vertex_arg); +} + +void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view vertex) { + NotImplemented(); +} + +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + const char swizzle{"xyzw"[element]}; + ctx.AddF32("{}=patch{}.{};", inst, index, swizzle); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + ctx.Add("patch{}.{}={};", index, "xyzw"[element], value); + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; + ctx.Add("gl_TessLevelOuter[{}]={};", index, value); + break; + } + case IR::Patch::TessellationLodInteriorU: + ctx.Add("gl_TessLevelInner[0]={};", value); + break; + case IR::Patch::TessellationLodInteriorV: + ctx.Add("gl_TessLevelInner[1]={};", value); + break; + default: + throw NotImplementedException("Patch {}", patch); + } +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { + const char swizzle{"xyzw"[component]}; + ctx.Add("frag_color{}.{}={};", index, swizzle, value); +} + +void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { + ctx.Add("gl_SampleMask[0]=int({});", value); +} + +void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { + ctx.Add("gl_FragDepth={};", value); +} + +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32x3("{}=gl_LocalInvocationID;", inst); +} + +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32x3("{}=gl_WorkGroupID;", inst); +} + +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_InvocationID);", inst); +} + +void EmitInvocationInfo(EmitContext& ctx, IR::Inst& inst) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + ctx.AddU32("{}=uint(gl_PatchVerticesIn)<<16;", inst); + break; + case Stage::Geometry: + ctx.AddU32("{}=uint({}<<16);", inst, + InputTopologyVertices::vertices(ctx.runtime_info.input_topology)); + break; + default: + LOG_WARNING(Shader, "(STUBBED) called"); + ctx.AddU32("{}=uint(0x00ff0000);", inst); + } +} + +void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SampleID);", inst); +} + +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU1("{}=gl_HelperInvocation;", inst); +} + +void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { + ctx.uses_y_direction = true; + ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); +} + +void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { + ctx.AddF32("{}=scaling.z;", inst); +} + +void EmitRenderArea(EmitContext& ctx, IR::Inst& inst) { + ctx.AddF32x4("{}=render_area;", inst); +} + +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { + ctx.AddU32("{}=lmem[{}];", inst, word_offset); +} + +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { + ctx.Add("lmem[{}]={};", word_offset, value); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_control_flow.cpp b/src/shader_recompiler/backend/msl/emit_msl_control_flow.cpp new file mode 100644 index 0000000000..e5d1e11a7f --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_control_flow.cpp @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/exception.h" + +namespace Shader::Backend::MSL { + +void EmitJoin(EmitContext&) { + throw NotImplementedException("Join shouldn't be emitted"); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx) { + ctx.Add("discard;"); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_convert.cpp b/src/shader_recompiler/backend/msl/emit_msl_convert.cpp new file mode 100644 index 0000000000..01938e55bc --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_convert.cpp @@ -0,0 +1,229 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::MSL { +void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value); +} + +void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=int({});", inst, value); +} + +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=int({});", inst, value); +} + +void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=int64_t({});", inst, value); +} + +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=int64_t({});", inst, value); +} + +void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float(int({}));", inst, value); +} + +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float(int64_t({}));", inst, value); +} + +void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({}&0xffff);", inst, value); +} + +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double(int({}));", inst, value); +} + +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double(int64_t({}));", inst, value); +} + +void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_floating_point.cpp b/src/shader_recompiler/backend/msl/emit_msl_floating_point.cpp new file mode 100644 index 0000000000..114e84a70e --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_floating_point.cpp @@ -0,0 +1,455 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::MSL { +namespace { +void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs, + std::string_view op, bool ordered) { + const auto nan_op{ordered ? "&&!" : "||"}; + ctx.AddU1("{}={}{}{}" + "{}isnan({}){}isnan({});", + inst, lhs, op, rhs, nan_op, lhs, nan_op, rhs); +} + +bool IsPrecise(const IR::Inst& inst) { + return inst.Flags().no_contraction; +} +} // Anonymous namespace + +void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=abs({});", inst, value); +} + +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=abs({});", inst, value); +} + +void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + NotImplemented(); +} + +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF32("{}={}+{};", inst, a, b); + } else { + ctx.AddF32("{}={}+{};", inst, a, b); + } +} + +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF64("{}={}+{};", inst, a, b); + } else { + ctx.AddF64("{}={}+{};", inst, a, b); + } +} + +void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, + [[maybe_unused]] std::string_view c) { + NotImplemented(); +} + +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { + if (IsPrecise(inst)) { + ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c); + } else { + ctx.AddF32("{}=fma({},{},{});", inst, a, b, c); + } +} + +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { + if (IsPrecise(inst)) { + ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c); + } else { + ctx.AddF64("{}=fma({},{},{});", inst, a, b, c); + } +} + +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF32("{}=max({},{});", inst, a, b); +} + +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF64("{}=max({},{});", inst, a, b); +} + +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF32("{}=min({},{});", inst, a, b); +} + +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF64("{}=min({},{});", inst, a, b); +} + +void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + NotImplemented(); +} + +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF32("{}={}*{};", inst, a, b); + } else { + ctx.AddF32("{}={}*{};", inst, a, b); + } +} + +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF64("{}={}*{};", inst, a, b); + } else { + ctx.AddF64("{}={}*{};", inst, a, b); + } +} + +void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=0.f-({});", inst, value); +} + +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double(0.)-({});", inst, value); +} + +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=sin({});", inst, value); +} + +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=cos({});", inst, value); +} + +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=exp2({});", inst, value); +} + +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=log2({});", inst, value); +} + +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=(1.0f)/{};", inst, value); +} + +void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=1.0/{};", inst, value); +} + +void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=inversesqrt({});", inst, value); +} + +void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=sqrt({});", inst, value); +} + +void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value); +} + +void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value); +} + +void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view min_value, + [[maybe_unused]] std::string_view max_value) { + NotImplemented(); +} + +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value) { + // MSL's clamp does not produce desirable results + ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value); +} + +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value) { + // MSL's clamp does not produce desirable results + ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value); +} + +void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=roundEven({});", inst, value); +} + +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=roundEven({});", inst, value); +} + +void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=floor({});", inst, value); +} + +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=floor({});", inst, value); +} + +void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=ceil({});", inst, value); +} + +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=ceil({});", inst, value); +} + +void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=trunc({});", inst, value); +} + +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=trunc({});", inst, value); +} + +void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", true); +} + +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", true); +} + +void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", false); +} + +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", false); +} + +void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", true); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", true); +} + +void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", false); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", false); +} + +void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", true); +} + +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", true); +} + +void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", false); +} + +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", false); +} + +void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", true); +} + +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", true); +} + +void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", false); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", false); +} + +void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", true); +} + +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", true); +} + +void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", false); +} + +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", false); +} + +void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", true); +} + +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", true); +} + +void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", false); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", false); +} + +void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU1("{}=isnan({});", inst, value); +} + +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU1("{}=isnan({});", inst, value); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_image.cpp b/src/shader_recompiler/backend/msl/emit_msl_image.cpp new file mode 100644 index 0000000000..17104b7294 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_image.cpp @@ -0,0 +1,869 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::MSL { +namespace { +std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index) + : ctx.textures.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("tex{}{}", def.binding, index_offset); +} + +std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index) + : ctx.images.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("img{}{}", def.binding, index_offset); +} + +bool IsTextureMsaa(EmitContext& ctx, const IR::TextureInstInfo& info) { + if (info.type == TextureType::Buffer) { + return false; + } + return ctx.info.texture_descriptors.at(info.descriptor_index).is_multisample; +} + +std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { + switch (info.type) { + case TextureType::Color1D: + case TextureType::Buffer: + return fmt::format("int({})", value); + case TextureType::ColorArray1D: + case TextureType::Color2D: + case TextureType::ColorArray2D: + return fmt::format("ivec2({})", value); + case TextureType::Color3D: + case TextureType::ColorCube: + return fmt::format("ivec3({})", value); + case TextureType::ColorArrayCube: + return fmt::format("ivec4({})", value); + default: + throw NotImplementedException("Integer cast for TextureType {}", info.type.Value()); + } +} + +std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& info) { + switch (info.type) { + case TextureType::Color1D: + case TextureType::Buffer: + return fmt::format("int({})", value); + case TextureType::ColorArray1D: + case TextureType::Color2D: + return fmt::format("ivec2({})", value); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorCube: + return fmt::format("ivec3({})", value); + case TextureType::ColorArrayCube: + return fmt::format("ivec4({})", value); + default: + throw NotImplementedException("TexelFetchCast type {}", info.type.Value()); + } +} + +bool NeedsShadowLodExt(TextureType type) { + switch (type) { + case TextureType::ColorArray2D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + return true; + default: + return false; + } +} + +std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { + if (offset.IsImmediate()) { + return fmt::format("int({})", offset.U32()); + } + IR::Inst* const inst{offset.InstRecursive()}; + if (inst->AreAllArgsImmediates()) { + switch (inst->GetOpcode()) { + case IR::Opcode::CompositeConstructU32x2: + return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32()); + case IR::Opcode::CompositeConstructU32x3: + return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), + inst->Arg(2).U32()); + case IR::Opcode::CompositeConstructU32x4: + return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), + inst->Arg(2).U32(), inst->Arg(3).U32()); + default: + break; + } + } + const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi}; + if (!has_var_aoffi) { + LOG_WARNING(Shader_MSL, "Device does not support variable texture offsets, STUBBING"); + } + const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"}; + switch (offset.Type()) { + case IR::Type::U32: + return fmt::format("int({})", offset_str); + case IR::Type::U32x2: + return fmt::format("ivec2({})", offset_str); + case IR::Type::U32x3: + return fmt::format("ivec3({})", offset_str); + case IR::Type::U32x4: + return fmt::format("ivec4({})", offset_str); + default: + throw NotImplementedException("Offset type {}", offset.Type()); + } +} + +std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) { + const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; + if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { + LOG_WARNING(Shader_MSL, "Not all arguments in PTP are immediate, STUBBING"); + return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))"; + } + const IR::Opcode opcode{values[0]->GetOpcode()}; + if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + throw LogicError("Invalid PTP arguments"); + } + auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; + + return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0), + read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2), + read(1, 3)); +} + +IR::Inst* PrepareSparse(IR::Inst& inst) { + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (sparse_inst) { + sparse_inst->Invalidate(); + } + return sparse_inst; +} + +std::string ImageGatherSubpixelOffset(const IR::TextureInstInfo& info, std::string_view texture, + std::string_view coords) { + switch (info.type) { + case TextureType::Color2D: + case TextureType::Color2DRect: + return fmt::format("{}+vec2(0.001953125)/vec2(textureSize({}, 0))", coords, texture); + case TextureType::ColorArray2D: + case TextureType::ColorCube: + return fmt::format("vec3({0}.xy+vec2(0.001953125)/vec2(textureSize({1}, 0)),{0}.z)", coords, + texture); + default: + return std::string{coords}; + } +} +} // Anonymous namespace + +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset) { + const auto info{inst.Flags()}; + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; + const auto texel{ctx.var_alloc.Define(inst, MslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_MSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (!offset.IsEmpty()) { + const auto offset_str{GetOffsetVec(ctx, offset)}; + if (ctx.stage == Stage::Fragment) { + ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias); + } else { + ctx.Add("{}=textureLodOffset({},{},0.0,{});", texel, texture, coords, offset_str); + } + } else { + if (ctx.stage == Stage::Fragment) { + ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias); + } else { + ctx.Add("{}=textureLod({},{},0.0);", texel, texture, coords); + } + } + return; + } + if (!offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));", + *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias); + } else { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst, + texture, coords, texel, bias); + } +} + +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset) { + const auto info{inst.Flags()}; + if (info.has_bias) { + throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, MslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_MSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (!offset.IsEmpty()) { + ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc, + GetOffsetVec(ctx, offset)); + } else { + ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc); + } + return; + } + if (!offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, + GetOffsetVec(ctx, offset), texel); + } else { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst, + texture, coords, lod_lc, texel); + } +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset) { + const auto info{inst.Flags()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageSampleDrefImplicitLod Sparse texture samples"); + } + if (info.has_bias) { + throw NotImplementedException("EmitImageSampleDrefImplicitLod Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && + ctx.stage != Stage::Fragment && needs_shadow_ext}; + if (use_grad) { + LOG_WARNING(Shader_MSL, + "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + LOG_WARNING(Shader_MSL, "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } + if (!offset.IsEmpty()) { + const auto offset_str{GetOffsetVec(ctx, offset)}; + if (ctx.stage == Stage::Fragment) { + ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref, + offset_str, bias); + } else { + ctx.AddF32("{}=textureLodOffset({},{}({},{}),0.0,{});", inst, texture, cast, coords, + dref, offset_str); + } + } else { + if (ctx.stage == Stage::Fragment) { + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref); + } else { + ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias); + } + } else { + ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref); + } + } +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset) { + const auto info{inst.Flags()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageSampleDrefExplicitLod Sparse texture samples"); + } + if (info.has_bias) { + throw NotImplementedException("EmitImageSampleDrefExplicitLod Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + if (use_grad) { + LOG_WARNING(Shader_MSL, + "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + LOG_WARNING(Shader_MSL, "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } + if (!offset.IsEmpty()) { + const auto offset_str{GetOffsetVec(ctx, offset)}; + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc, + offset_str); + } else { + ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords, + dref, lod_lc, offset_str); + } + } else { + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc); + } else { + ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref, + lod_lc); + } + } +} + +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { + const auto info{inst.Flags()}; + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, MslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_MSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + std::string coords_with_subpixel_offset; + if (ctx.profile.need_gather_subpixel_offset) { + // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on + // AMD hardware as on Maxwell or other Nvidia architectures. + coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords); + coords = coords_with_subpixel_offset; + } + if (!sparse_inst || !supports_sparse) { + if (offset.IsEmpty()) { + ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, + info.gather_component); + return; + } + if (offset2.IsEmpty()) { + ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords, + GetOffsetVec(ctx, offset), info.gather_component); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.Add("{}=textureGatherOffsets({},{},{},int({}));", texel, texture, coords, offsets, + info.gather_component); + return; + } + if (offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));", + *sparse_inst, texture, coords, texel, info.gather_component); + return; + } + if (offset2.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", + *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset), + texel, info.gather_component); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", + *sparse_inst, texture, CastToIntVec(coords, info), offsets, texel, + info.gather_component); +} + +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref) { + const auto info{inst.Flags()}; + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, MslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_MSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + std::string coords_with_subpixel_offset; + if (ctx.profile.need_gather_subpixel_offset) { + // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on + // AMD hardware as on Maxwell or other Nvidia architectures. + coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords); + coords = coords_with_subpixel_offset; + } + if (!sparse_inst || !supports_sparse) { + if (offset.IsEmpty()) { + ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); + return; + } + if (offset2.IsEmpty()) { + ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref, + GetOffsetVec(ctx, offset)); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets); + return; + } + if (offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst, + texture, coords, dref, texel); + return; + } + if (offset2.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), dref, + GetOffsetVec(ctx, offset), texel); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel); +} + +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, std::string_view lod, + std::string_view ms) { + const auto info{inst.Flags()}; + if (info.has_bias) { + throw NotImplementedException("EmitImageFetch Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageFetch Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto sparse_inst{PrepareSparse(inst)}; + const auto texel{ctx.var_alloc.Define(inst, MslVarType::F32x4)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_MSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + const auto int_coords{CoordsCastToInt(coords, info)}; + if (!ms.empty()) { + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms); + } else if (!offset.IsEmpty()) { + ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod, + GetOffsetVec(ctx, offset)); + } else { + if (info.type == TextureType::Buffer) { + ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); + } else { + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, lod); + } + } + return; + } + if (!ms.empty()) { + throw NotImplementedException("EmitImageFetch Sparse MSAA samples"); + } + if (!offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod, GetOffsetVec(ctx, offset), + texel); + } else { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod, texel); + } +} + +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view lod, const IR::Value& skip_mips_val) { + const auto info{inst.Flags()}; + const auto texture{Texture(ctx, info, index)}; + const bool is_msaa{IsTextureMsaa(ctx, info)}; + const bool skip_mips{skip_mips_val.U1()}; + const auto mips{skip_mips ? "0u" : fmt::format("uint(textureQueryLevels({}))", texture)}; + if (is_msaa && !skip_mips) { + throw NotImplementedException("EmitImageQueryDimensions MSAA QueryLevels"); + } + if (info.type == TextureType::Buffer && !skip_mips) { + throw NotImplementedException("EmitImageQueryDimensions TextureType::Buffer QueryLevels"); + } + const bool uses_lod{!is_msaa && info.type != TextureType::Buffer}; + const auto lod_str{uses_lod ? fmt::format(",int({})", lod) : ""}; + switch (info.type) { + case TextureType::Color1D: + return ctx.AddU32x4("{}=uvec4(uint(textureSize({}{})),0u,0u,{});", inst, texture, lod_str, + mips); + case TextureType::ColorArray1D: + case TextureType::Color2D: + case TextureType::ColorCube: + case TextureType::Color2DRect: + return ctx.AddU32x4("{}=uvec4(uvec2(textureSize({}{})),0u,{});", inst, texture, lod_str, + mips); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorArrayCube: + return ctx.AddU32x4("{}=uvec4(uvec3(textureSize({}{})),{});", inst, texture, lod_str, mips); + case TextureType::Buffer: + return ctx.AddU32x4("{}=uvec4(uint(textureSize({})),0u,0u,{});", inst, texture, mips); + } + throw LogicError("Unspecified image type {}", info.type.Value()); +} + +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords) { + const auto info{inst.Flags()}; + const auto texture{Texture(ctx, info, index)}; + return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords); +} + +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& derivatives, + const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) { + const auto info{inst.Flags()}; + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageGradient Lod clamp samples"); + } + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageGradient Sparse"); + } + if (!offset.IsEmpty() && info.num_derivatives <= 2) { + throw NotImplementedException("EmitImageGradient offset"); + } + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, MslVarType::F32x4)}; + const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp}; + const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; + if (multi_component) { + if (info.num_derivatives >= 3) { + const auto offset_vec{ctx.var_alloc.Consume(offset)}; + ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture, + coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); + return; + } + ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, + derivatives_vec, derivatives_vec); + } else { + ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords, + derivatives_vec, derivatives_vec); + } +} + +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords) { + const auto info{inst.Flags()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageRead Sparse"); + } + const auto image{Image(ctx, info, index)}; + ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info)); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view color) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.Add("imageStore({},{},{});", image, CoordsCastToInt(coords, info), color); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicInc32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, + std::string_view) { + NotImplemented(); +} + +void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, + std::string_view) { + NotImplemented(); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { + if (!index.IsImmediate()) { + throw NotImplementedException("Non-constant texture rescaling"); + } + const u32 image_index{index.U32()}; + ctx.AddU1("{}=(ftou(scaling.x)&{})!=0;", inst, 1u << image_index); +} + +void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { + if (!index.IsImmediate()) { + throw NotImplementedException("Non-constant texture rescaling"); + } + const u32 image_index{index.U32()}; + ctx.AddU1("{}=(ftou(scaling.y)&{})!=0;", inst, 1u << image_index); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_instructions.h b/src/shader_recompiler/backend/msl/emit_msl_instructions.h new file mode 100644 index 0000000000..c0a29432cb --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_instructions.h @@ -0,0 +1,741 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::MSL { +class EmitContext; + +#define NotImplemented() throw NotImplementedException("MSL instruction {}", __func__) + +// Microinstruction emitters +void EmitPhi(EmitContext& ctx, IR::Inst& inst); +void EmitVoid(EmitContext& ctx); +void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitReference(EmitContext& ctx, const IR::Value& value); +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); +void EmitJoin(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex); +void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex); +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); +void EmitSetSampleMask(EmitContext& ctx, std::string_view value); +void EmitSetFragDepth(EmitContext& ctx, std::string_view value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); +void EmitInvocationInfo(EmitContext& ctx, IR::Inst& inst); +void EmitSampleId(EmitContext& ctx, IR::Inst& inst); +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); +void EmitYDirection(EmitContext& ctx, IR::Inst& inst); +void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst); +void EmitRenderArea(EmitContext& ctx, IR::Inst& inst); +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address); +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address); +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst); +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst); +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPNeg16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecipSqrt64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPClamp16(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPRoundEven16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPIsNan16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view insert, std::string_view offset, std::string_view count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitGlobalAtomicIAdd32(EmitContext& ctx); +void EmitGlobalAtomicSMin32(EmitContext& ctx); +void EmitGlobalAtomicUMin32(EmitContext& ctx); +void EmitGlobalAtomicSMax32(EmitContext& ctx); +void EmitGlobalAtomicUMax32(EmitContext& ctx); +void EmitGlobalAtomicInc32(EmitContext& ctx); +void EmitGlobalAtomicDec32(EmitContext& ctx); +void EmitGlobalAtomicAnd32(EmitContext& ctx); +void EmitGlobalAtomicOr32(EmitContext& ctx); +void EmitGlobalAtomicXor32(EmitContext& ctx); +void EmitGlobalAtomicExchange32(EmitContext& ctx); +void EmitGlobalAtomicIAdd64(EmitContext& ctx); +void EmitGlobalAtomicSMin64(EmitContext& ctx); +void EmitGlobalAtomicUMin64(EmitContext& ctx); +void EmitGlobalAtomicSMax64(EmitContext& ctx); +void EmitGlobalAtomicUMax64(EmitContext& ctx); +void EmitGlobalAtomicInc64(EmitContext& ctx); +void EmitGlobalAtomicDec64(EmitContext& ctx); +void EmitGlobalAtomicAnd64(EmitContext& ctx); +void EmitGlobalAtomicOr64(EmitContext& ctx); +void EmitGlobalAtomicXor64(EmitContext& ctx); +void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicIAdd32x2(EmitContext& ctx); +void EmitGlobalAtomicSMin32x2(EmitContext& ctx); +void EmitGlobalAtomicUMin32x2(EmitContext& ctx); +void EmitGlobalAtomicSMax32x2(EmitContext& ctx); +void EmitGlobalAtomicUMax32x2(EmitContext& ctx); +void EmitGlobalAtomicInc32x2(EmitContext& ctx); +void EmitGlobalAtomicDec32x2(EmitContext& ctx); +void EmitGlobalAtomicAnd32x2(EmitContext& ctx); +void EmitGlobalAtomicOr32x2(EmitContext& ctx); +void EmitGlobalAtomicXor32x2(EmitContext& ctx); +void EmitGlobalAtomicExchange32x2(EmitContext& ctx); +void EmitGlobalAtomicAddF32(EmitContext& ctx); +void EmitGlobalAtomicAddF16x2(EmitContext& ctx); +void EmitGlobalAtomicAddF32x2(EmitContext& ctx); +void EmitGlobalAtomicMinF16x2(EmitContext& ctx); +void EmitGlobalAtomicMinF32x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBindlessImageSampleImplicitLod(EmitContext&); +void EmitBindlessImageSampleExplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +void EmitBindlessImageGather(EmitContext&); +void EmitBindlessImageGatherDref(EmitContext&); +void EmitBindlessImageFetch(EmitContext&); +void EmitBindlessImageQueryDimensions(EmitContext&); +void EmitBindlessImageQueryLod(EmitContext&); +void EmitBindlessImageGradient(EmitContext&); +void EmitBindlessImageRead(EmitContext&); +void EmitBindlessImageWrite(EmitContext&); +void EmitBoundImageSampleImplicitLod(EmitContext&); +void EmitBoundImageSampleExplicitLod(EmitContext&); +void EmitBoundImageSampleDrefImplicitLod(EmitContext&); +void EmitBoundImageSampleDrefExplicitLod(EmitContext&); +void EmitBoundImageGather(EmitContext&); +void EmitBoundImageGatherDref(EmitContext&); +void EmitBoundImageFetch(EmitContext&); +void EmitBoundImageQueryDimensions(EmitContext&); +void EmitBoundImageQueryLod(EmitContext&); +void EmitBoundImageGradient(EmitContext&); +void EmitBoundImageRead(EmitContext&); +void EmitBoundImageWrite(EmitContext&); +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset); +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, std::string_view lod, + std::string_view ms); +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view lod, const IR::Value& skip_mips); +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords); +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& derivatives, + const IR::Value& offset, const IR::Value& lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view color); +void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); +void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); +void EmitBindlessImageAtomicIAdd32(EmitContext&); +void EmitBindlessImageAtomicSMin32(EmitContext&); +void EmitBindlessImageAtomicUMin32(EmitContext&); +void EmitBindlessImageAtomicSMax32(EmitContext&); +void EmitBindlessImageAtomicUMax32(EmitContext&); +void EmitBindlessImageAtomicInc32(EmitContext&); +void EmitBindlessImageAtomicDec32(EmitContext&); +void EmitBindlessImageAtomicAnd32(EmitContext&); +void EmitBindlessImageAtomicOr32(EmitContext&); +void EmitBindlessImageAtomicXor32(EmitContext&); +void EmitBindlessImageAtomicExchange32(EmitContext&); +void EmitBoundImageAtomicIAdd32(EmitContext&); +void EmitBoundImageAtomicSMin32(EmitContext&); +void EmitBoundImageAtomicUMin32(EmitContext&); +void EmitBoundImageAtomicSMax32(EmitContext&); +void EmitBoundImageAtomicUMax32(EmitContext&); +void EmitBoundImageAtomicInc32(EmitContext&); +void EmitBoundImageAtomicDec32(EmitContext&); +void EmitBoundImageAtomicAnd32(EmitContext&); +void EmitBoundImageAtomicOr32(EmitContext&); +void EmitBoundImageAtomicXor32(EmitContext&); +void EmitBoundImageAtomicExchange32(EmitContext&); +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitLaneId(EmitContext& ctx, IR::Inst& inst); +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, + std::string_view swizzle); +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_integer.cpp b/src/shader_recompiler/backend/msl/emit_msl_integer.cpp new file mode 100644 index 0000000000..e208bdeaed --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_integer.cpp @@ -0,0 +1,258 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::MSL { +namespace { +void SetZeroFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { + IR::Inst* const zero{inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; + if (!zero) { + return; + } + ctx.AddU1("{}={}==0;", *zero, result); + zero->Invalidate(); +} + +void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { + IR::Inst* const sign{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; + if (!sign) { + return; + } + ctx.AddU1("{}=int({})<0;", *sign, result); + sign->Invalidate(); +} + +void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + char lop) { + const auto result{ctx.var_alloc.Define(inst, MslVarType::U32)}; + ctx.Add("{}={}{}{};", result, a, lop, b); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} +} // Anonymous namespace + +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + // Compute the overflow CC first as it requires the original operand values, + // which may be overwritten by the result of the addition + if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { + // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c + constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; + const auto sub_a{fmt::format("{}u-{}", s32_max, a)}; + const auto positive_result{fmt::format("int({})>int({})", b, sub_a)}; + const auto negative_result{fmt::format("int({})=0?{}:{};", *overflow, a, positive_result, negative_result); + overflow->Invalidate(); + } + const auto result{ctx.var_alloc.Define(inst, MslVarType::U32)}; + if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { + ctx.uses_cc_carry = true; + ctx.Add("{}=uaddCarry({},{},carry);", result, a, b); + ctx.AddU1("{}=carry!=0;", *carry); + carry->Invalidate(); + } else { + ctx.Add("{}={}+{};", result, a, b); + } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU64("{}={}+{};", inst, a, b); +} + +void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}={}-{};", inst, a, b); +} + +void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU64("{}={}-{};", inst, a, b); +} + +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=uint({}*{});", inst, a, b); +} + +void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=uint(int({})/int({}));", inst, a, b); +} + +void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}={}/{};", inst, a, b); +} + +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint(int(0)-int({}));", inst, value); +} + +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=uint64_t(int64_t(0)-int64_t({}));", inst, value); +} + +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=abs(int({}));", inst, value); +} + +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU32("{}={}<<{};", inst, base, shift); +} + +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU64("{}={}<<{};", inst, base, shift); +} + +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU32("{}={}>>{};", inst, base, shift); +} + +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU64("{}={}>>{};", inst, base, shift); +} + +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU32("{}=int({})>>{};", inst, base, shift); +} + +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift); +} + +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + BitwiseLogicalOp(ctx, inst, a, b, '&'); +} + +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + BitwiseLogicalOp(ctx, inst, a, b, '|'); +} + +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + BitwiseLogicalOp(ctx, inst, a, b, '^'); +} + +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view insert, std::string_view offset, std::string_view count) { + ctx.AddU32("{}=bitfieldInsert({},{},int({}),int({}));", inst, base, insert, offset, count); +} + +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count) { + const auto result{ctx.var_alloc.Define(inst, MslVarType::U32)}; + ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count) { + const auto result{ctx.var_alloc.Define(inst, MslVarType::U32)}; + ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=bitfieldReverse({});", inst, value); +} + +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=bitCount({});", inst, value); +} + +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=~{};", inst, value); +} + +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=findMSB(int({}));", inst, value); +} + +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=findMSB(uint({}));", inst, value); +} + +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=min(int({}),int({}));", inst, a, b); +} + +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=min(uint({}),uint({}));", inst, a, b); +} + +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=max(int({}),int({}));", inst, a, b); +} + +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=max(uint({}),uint({}));", inst, a, b); +} + +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max) { + const auto result{ctx.var_alloc.Define(inst, MslVarType::U32)}; + ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max) { + const auto result{ctx.var_alloc.Define(inst, MslVarType::U32)}; + ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { + ctx.AddU1("{}=int({})int({});", inst, lhs, rhs); +} + +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs); +} + +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { + ctx.AddU1("{}={}!={};", inst, lhs, rhs); +} + +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs); +} + +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs); +} +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_logical.cpp b/src/shader_recompiler/backend/msl/emit_msl_logical.cpp new file mode 100644 index 0000000000..3207e54212 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_logical.cpp @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::MSL { + +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}||{};", inst, a, b); +} + +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}&&{};", inst, a, b); +} + +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}^^{};", inst, a, b); +} + +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU1("{}=!{};", inst, value); +} +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_memory.cpp b/src/shader_recompiler/backend/msl/emit_msl_memory.cpp new file mode 100644 index 0000000000..f70ca69331 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_memory.cpp @@ -0,0 +1,201 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::MSL { +namespace { +constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " + "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" + "if(cas_result==old_value){{break;}}}}"}; + +void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view offset_var, + std::string_view value, std::string_view bit_offset, u32 num_bits) { + const auto ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), offset_var)}; + ctx.Add(cas_loop, ssbo, ssbo, ssbo, value, bit_offset, num_bits); +} +} // Anonymous namespace + +void EmitLoadGlobalU8(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobalS8(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobalU16(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobalS16(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + if (ctx.profile.support_int64) { + return ctx.AddU32("{}=LoadGlobal32({});", inst, address); + } + LOG_WARNING(Shader_MSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32("{}=0u;", inst); +} + +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + if (ctx.profile.support_int64) { + return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); + } + LOG_WARNING(Shader_MSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x2("{}=uvec2(0);", inst); +} + +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + if (ctx.profile.support_int64) { + return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); + } + LOG_WARNING(Shader_MSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x4("{}=uvec4(0);", inst); +} + +void EmitWriteGlobalU8(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobalS8(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobalU16(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobalS16(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal32({},{});", address, value); + } + LOG_WARNING(Shader_MSL, "Int64 not supported, ignoring memory operation"); +} + +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal64({},{});", address, value); + } + LOG_WARNING(Shader_MSL, "Int64 not supported, ignoring memory operation"); +} + +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal128({},{});", address, value); + } + LOG_WARNING(Shader_MSL, "Int64 not supported, ignoring memory operation"); +} + +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst, + ctx.stage_name, binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var); +} + +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name, + binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); +} + +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}" + "+12)>>2]);", + inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), + offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, + binding.U32(), offset_var); +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); +} +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_not_implemented.cpp b/src/shader_recompiler/backend/msl/emit_msl_not_implemented.cpp new file mode 100644 index 0000000000..9e16b3ebb6 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_not_implemented.cpp @@ -0,0 +1,100 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::MSL { + +void EmitGetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetZeroFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSignFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCarryFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOverflowFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSparseFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetInBoundsFromOp(EmitContext& ctx) { + NotImplemented(); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_select.cpp b/src/shader_recompiler/backend/msl/emit_msl_select.cpp new file mode 100644 index 0000000000..772efe0f07 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_select.cpp @@ -0,0 +1,54 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::MSL { +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddU1("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddF64("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_shared_memory.cpp b/src/shader_recompiler/backend/msl/emit_msl_shared_memory.cpp new file mode 100644 index 0000000000..50983add37 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_shared_memory.cpp @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::MSL { +namespace { +constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " + "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" + "if(cas_result==old_value){{break;}}}}"}; + +void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view bit_offset, u32 num_bits) { + const auto smem{fmt::format("smem[{}>>2]", offset)}; + ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits); +} +} // Anonymous namespace + +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); +} + +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset); +} + +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset); +} + +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset); +} + +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=smem[{}>>2];", inst, offset); +} + +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset); +} + +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst, + offset, offset, offset, offset); +} + +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { + const auto bit_offset{fmt::format("int({}%4)*8", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 8); +} + +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 16); +} + +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { + ctx.Add("smem[{}>>2]={};", offset, value); +} + +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { + ctx.Add("smem[{}>>2]={}.x;", offset, value); + ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); +} + +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { + ctx.Add("smem[{}>>2]={}.x;", offset, value); + ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); + ctx.Add("smem[({}+8)>>2]={}.z;", offset, value); + ctx.Add("smem[({}+12)>>2]={}.w;", offset, value); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_special.cpp b/src/shader_recompiler/backend/msl/emit_msl_special.cpp new file mode 100644 index 0000000000..5b9f3af771 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_special.cpp @@ -0,0 +1,110 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::MSL { +namespace { +std::string_view OutputVertexIndex(EmitContext& ctx) { + return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; +} + +void InitializeOutputVaryings(EmitContext& ctx) { + if (ctx.uses_geometry_passthrough) { + return; + } + if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { + ctx.Add("gl_Position=vec4(0,0,0,1);"); + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!ctx.info.stores.Generic(index)) { + continue; + } + const auto& info_array{ctx.output_generics.at(index)}; + const auto output_decorator{OutputVertexIndex(ctx)}; + size_t element{}; + while (element < info_array.size()) { + const auto& info{info_array.at(element)}; + const auto varying_name{fmt::format("{}{}", info.name, output_decorator)}; + switch (info.num_components) { + case 1: { + const char value{element == 3 ? '1' : '0'}; + ctx.Add("{}={}.f;", varying_name, value); + break; + } + case 2: + case 3: + if (element + info.num_components < 4) { + ctx.Add("{}=vec{}(0);", varying_name, info.num_components); + } else { + // last element is the w component, must be initialized to 1 + const auto zeros{info.num_components == 3 ? "0,0," : "0,"}; + ctx.Add("{}=vec{}({}1);", varying_name, info.num_components, zeros); + } + break; + case 4: + ctx.Add("{}=vec4(0,0,0,1);", varying_name); + break; + default: + break; + } + element += info.num_components; + } + } +} +} // Anonymous namespace + +void EmitPhi(EmitContext& ctx, IR::Inst& phi) { + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + ctx.var_alloc.Consume(phi.Arg(i)); + } + if (!phi.Definition().is_valid) { + // The phi node wasn't forward defined + ctx.var_alloc.PhiDefine(phi, phi.Type()); + } +} + +void EmitVoid(EmitContext&) {} + +void EmitReference(EmitContext& ctx, const IR::Value& value) { + ctx.var_alloc.Consume(value); +} + +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { + IR::Inst& phi{*phi_value.InstRecursive()}; + const auto phi_type{phi.Type()}; + if (!phi.Definition().is_valid) { + // The phi node wasn't forward defined + ctx.var_alloc.PhiDefine(phi, phi_type); + } + const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})}; + const auto val_reg{ctx.var_alloc.Consume(value)}; + if (phi_reg == val_reg) { + return; + } + const bool needs_workaround{ctx.profile.has_gl_bool_ref_bug && phi_type == IR::Type::U1}; + const auto suffix{needs_workaround ? "?true:false" : ""}; + ctx.Add("{}={}{};", phi_reg, val_reg, suffix); +} + +void EmitPrologue(EmitContext& ctx) { + InitializeOutputVaryings(ctx); +} + +void EmitEpilogue(EmitContext&) {} + +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); + InitializeOutputVaryings(ctx); +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream)); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_undefined.cpp b/src/shader_recompiler/backend/msl/emit_msl_undefined.cpp new file mode 100644 index 0000000000..5e15463f96 --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_undefined.cpp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" + +namespace Shader::Backend::MSL { + +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU1("{}=false;", inst); +} + +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU64("{}=0u;", inst); +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/emit_msl_warp.cpp b/src/shader_recompiler/backend/msl/emit_msl_warp.cpp new file mode 100644 index 0000000000..f0857bea6c --- /dev/null +++ b/src/shader_recompiler/backend/msl/emit_msl_warp.cpp @@ -0,0 +1,246 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/backend/msl/emit_msl_instructions.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::MSL { +namespace { +constexpr char THREAD_ID[]{"gl_SubGroupInvocationARB"}; + +void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { + IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (!in_bounds) { + return; + } + ctx.AddU1("{}=shfl_in_bounds;", *in_bounds); + in_bounds->Invalidate(); +} + +std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) { + return fmt::format("({}&{})", thread_id, segmentation_mask); +} + +std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp, + std::string_view not_seg_mask) { + return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask); +} + +std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, + std::string_view segmentation_mask) { + const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; + const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; + return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); +} + +void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, + std::string_view value, std::string_view index, + [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) { + const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)}; + ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); + SetInBoundsFlag(ctx, inst); +} + +std::string_view BallotIndex(EmitContext& ctx) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ".x"; + } + return "[gl_SubGroupInvocationARB>>5]"; +} + +std::string GetMask(EmitContext& ctx, std::string_view mask) { + const auto ballot_index{BallotIndex(ctx)}; + return fmt::format("uint(uvec2({}){})", mask, ballot_index); +} +} // Anonymous namespace + +void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}={}&31u;", inst, THREAD_ID); +} + +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + return; + } + const auto ballot_index{BallotIndex(ctx)}; + const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; + const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; + ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); +} + +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=anyInvocationARB({});", inst, pred); + return; + } + const auto ballot_index{BallotIndex(ctx)}; + const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; + const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; + ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); +} + +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + return; + } + const auto ballot_index{BallotIndex(ctx)}; + const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; + const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; + const auto value{fmt::format("({}^{})", ballot, active_mask)}; + ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); +} + +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + const auto ballot_index{BallotIndex(ctx)}; + ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index); +} + +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupEqMaskARB")); +} + +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLtMaskARB")); +} + +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLeMaskARB")); +} + +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGtMaskARB")); +} + +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGeMaskARB")); +} + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, std::string_view seg_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, seg_mask); + return; + } + const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; + const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; + const auto upper_index{fmt::format("{}?{}+32:{}", is_upper_partition, index, index)}; + const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; + + const auto not_seg_mask{fmt::format("(~{})", seg_mask)}; + const auto min_thread_id{ComputeMinThreadId(THREAD_ID, seg_mask)}; + const auto max_thread_id{ + ComputeMaxThreadId(min_thread_id, big_warp ? upper_clamp : clamp, not_seg_mask)}; + + const auto lhs{fmt::format("({}&{})", big_warp ? upper_index : index, not_seg_mask)}; + const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.Add("shfl_result=readInvocationARB({},{});", value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?shfl_result:{};", inst, value); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view seg_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, seg_mask); + return; + } + const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; + const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; + const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; + + const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; + const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)}; + ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.Add("shfl_result=readInvocationARB({},{});", value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?shfl_result:{};", inst, value); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, std::string_view seg_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, seg_mask); + return; + } + const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; + const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; + const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; + + const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; + const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.Add("shfl_result=readInvocationARB({},{});", value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?shfl_result:{};", inst, value); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view seg_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, seg_mask); + return; + } + const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; + const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; + const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; + + const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; + const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.Add("shfl_result=readInvocationARB({},{});", value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?shfl_result:{};", inst, value); +} + +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, + std::string_view swizzle) { + const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)}; + const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask); + const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask); + ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b); +} + +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_MSL, "Device does not support dFdxFine, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } +} + +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_MSL, "Device does not support dFdyFine, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } +} + +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_MSL, "Device does not support dFdxCoarse, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } +} + +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_MSL, "Device does not support dFdyCoarse, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } +} +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/msl_emit_context.cpp b/src/shader_recompiler/backend/msl/msl_emit_context.cpp new file mode 100644 index 0000000000..ee99db3f73 --- /dev/null +++ b/src/shader_recompiler/backend/msl/msl_emit_context.cpp @@ -0,0 +1,709 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/div_ceil.h" +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/msl/msl_emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::MSL { +namespace { +u32 CbufIndex(size_t offset) { + return (offset / 4) % 4; +} + +char Swizzle(size_t offset) { + return "xyzw"[CbufIndex(offset)]; +} + +std::string_view InterpDecorator(Interpolation interp) { + switch (interp) { + case Interpolation::Smooth: + return ""; + case Interpolation::Flat: + return "flat "; + case Interpolation::NoPerspective: + return "noperspective "; + } + throw InvalidArgument("Invalid interpolation {}", interp); +} + +std::string_view InputArrayDecorator(Stage stage) { + switch (stage) { + case Stage::Geometry: + case Stage::TessellationControl: + case Stage::TessellationEval: + return "[]"; + default: + return ""; + } +} + +bool StoresPerVertexAttributes(Stage stage) { + switch (stage) { + case Stage::VertexA: + case Stage::VertexB: + case Stage::Geometry: + case Stage::TessellationEval: + return true; + default: + return false; + } +} + +std::string OutputDecorator(Stage stage, u32 size) { + switch (stage) { + case Stage::TessellationControl: + return fmt::format("[{}]", size); + default: + return ""; + } +} + +std::string_view DepthSamplerType(TextureType type) { + switch (type) { + case TextureType::Color1D: + return "sampler1DShadow"; + case TextureType::ColorArray1D: + return "sampler1DArrayShadow"; + case TextureType::Color2D: + return "sampler2DShadow"; + case TextureType::ColorArray2D: + return "sampler2DArrayShadow"; + case TextureType::ColorCube: + return "samplerCubeShadow"; + case TextureType::ColorArrayCube: + return "samplerCubeArrayShadow"; + default: + throw NotImplementedException("Texture type: {}", type); + } +} + +std::string_view ColorSamplerType(TextureType type, bool is_multisample = false) { + if (is_multisample) { + ASSERT(type == TextureType::Color2D || type == TextureType::ColorArray2D); + } + switch (type) { + case TextureType::Color1D: + return "sampler1D"; + case TextureType::ColorArray1D: + return "sampler1DArray"; + case TextureType::Color2D: + case TextureType::Color2DRect: + return is_multisample ? "sampler2DMS" : "sampler2D"; + case TextureType::ColorArray2D: + return is_multisample ? "sampler2DMSArray" : "sampler2DArray"; + case TextureType::Color3D: + return "sampler3D"; + case TextureType::ColorCube: + return "samplerCube"; + case TextureType::ColorArrayCube: + return "samplerCubeArray"; + case TextureType::Buffer: + return "samplerBuffer"; + default: + throw NotImplementedException("Texture type: {}", type); + } +} + +std::string_view ImageType(TextureType type) { + switch (type) { + case TextureType::Color1D: + return "uimage1D"; + case TextureType::ColorArray1D: + return "uimage1DArray"; + case TextureType::Color2D: + return "uimage2D"; + case TextureType::ColorArray2D: + return "uimage2DArray"; + case TextureType::Color3D: + return "uimage3D"; + case TextureType::ColorCube: + return "uimageCube"; + case TextureType::ColorArrayCube: + return "uimageCubeArray"; + case TextureType::Buffer: + return "uimageBuffer"; + default: + throw NotImplementedException("Image type: {}", type); + } +} + +std::string_view ImageFormatString(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return ""; + case ImageFormat::R8_UINT: + return ",r8ui"; + case ImageFormat::R8_SINT: + return ",r8i"; + case ImageFormat::R16_UINT: + return ",r16ui"; + case ImageFormat::R16_SINT: + return ",r16i"; + case ImageFormat::R32_UINT: + return ",r32ui"; + case ImageFormat::R32G32_UINT: + return ",rg32ui"; + case ImageFormat::R32G32B32A32_UINT: + return ",rgba32ui"; + default: + throw NotImplementedException("Image format: {}", format); + } +} + +std::string_view ImageAccessQualifier(bool is_written, bool is_read) { + if (is_written && !is_read) { + return "writeonly "; + } + if (is_read && !is_written) { + return "readonly "; + } + return ""; +} + +std::string_view GetTessMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Triangles: + return "triangles"; + case TessPrimitive::Quads: + return "quads"; + case TessPrimitive::Isolines: + return "isolines"; + } + throw InvalidArgument("Invalid tessellation primitive {}", primitive); +} + +std::string_view GetTessSpacing(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return "equal_spacing"; + case TessSpacing::FractionalOdd: + return "fractional_odd_spacing"; + case TessSpacing::FractionalEven: + return "fractional_even_spacing"; + } + throw InvalidArgument("Invalid tessellation spacing {}", spacing); +} + +std::string_view InputPrimitive(InputTopology topology) { + switch (topology) { + case InputTopology::Points: + return "points"; + case InputTopology::Lines: + return "lines"; + case InputTopology::LinesAdjacency: + return "lines_adjacency"; + case InputTopology::Triangles: + return "triangles"; + case InputTopology::TrianglesAdjacency: + return "triangles_adjacency"; + } + throw InvalidArgument("Invalid input topology {}", topology); +} + +std::string_view OutputPrimitive(OutputTopology topology) { + switch (topology) { + case OutputTopology::PointList: + return "points"; + case OutputTopology::LineStrip: + return "line_strip"; + case OutputTopology::TriangleStrip: + return "triangle_strip"; + } + throw InvalidArgument("Invalid output topology {}", topology); +} + +void SetupOutPerVertex(EmitContext& ctx, std::string& header) { + if (!StoresPerVertexAttributes(ctx.stage)) { + return; + } + if (ctx.uses_geometry_passthrough) { + return; + } + header += "out gl_PerVertex{vec4 gl_Position;"; + if (ctx.info.stores[IR::Attribute::PointSize]) { + header += "float gl_PointSize;"; + } + if (ctx.info.stores.ClipDistances()) { + header += "float gl_ClipDistance[];"; + } + if (ctx.info.stores[IR::Attribute::ViewportIndex] && + ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { + header += "int gl_ViewportIndex;"; + } + header += "};"; + if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { + header += "out int gl_ViewportIndex;"; + } +} + +void SetupInPerVertex(EmitContext& ctx, std::string& header) { + // Currently only required for TessellationControl to adhere to + // ARB_separate_shader_objects requirements + if (ctx.stage != Stage::TessellationControl) { + return; + } + const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)}; + const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]}; + const bool loads_clip_distance{ctx.info.loads.ClipDistances()}; + const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance}; + if (!loads_per_vertex) { + return; + } + header += "in gl_PerVertex{"; + if (loads_position) { + header += "vec4 gl_Position;"; + } + if (loads_point_size) { + header += "float gl_PointSize;"; + } + if (loads_clip_distance) { + header += "float gl_ClipDistance[];"; + } + header += "}gl_in[gl_MaxPatchVertices];"; +} +} // Anonymous namespace + +EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_) + : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage}, + uses_geometry_passthrough{program.is_geometry_passthrough && + profile.support_geometry_shader_passthrough} { + if (profile.need_fastmath_off) { + header += "#pragma optionNV(fastmath off)\n"; + } + SetupExtensions(); + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + stage_name = "vs"; + break; + case Stage::TessellationControl: + stage_name = "tcs"; + header += fmt::format("layout(vertices={})out;", program.invocations); + break; + case Stage::TessellationEval: + stage_name = "tes"; + header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive), + GetTessSpacing(runtime_info.tess_spacing), + runtime_info.tess_clockwise ? "cw" : "ccw"); + break; + case Stage::Geometry: + stage_name = "gs"; + header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology)); + if (uses_geometry_passthrough) { + header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};"; + break; + } else if (program.is_geometry_passthrough && + !profile.support_geometry_shader_passthrough) { + LOG_WARNING(Shader_MSL, "Passthrough geometry program used but not supported"); + } + header += fmt::format( + "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];", + OutputPrimitive(program.output_topology), program.output_vertices); + break; + case Stage::Fragment: + stage_name = "fs"; + position_name = "gl_FragCoord"; + if (runtime_info.force_early_z) { + header += "layout(early_fragment_tests)in;"; + } + break; + case Stage::Compute: + stage_name = "cs"; + const u32 local_x{std::max(program.workgroup_size[0], 1u)}; + const u32 local_y{std::max(program.workgroup_size[1], 1u)}; + const u32 local_z{std::max(program.workgroup_size[2], 1u)}; + header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;", + local_x, local_y, local_z); + break; + } + SetupOutPerVertex(*this, header); + SetupInPerVertex(*this, header); + + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { + continue; + } + const auto qualifier{uses_geometry_passthrough ? "passthrough" + : fmt::format("location={}", index)}; + header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier, + InterpDecorator(info.interpolation[index]), index, + InputArrayDecorator(stage)); + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"}; + header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index); + } + if (stage == Stage::Fragment) { + for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { + if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { + continue; + } + header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); + } + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + DefineGenericOutput(index, program.invocations); + } + } + if (info.uses_rescaling_uniform) { + header += "layout(location=0) uniform vec4 scaling;"; + } + if (info.uses_render_area) { + header += "layout(location=1) uniform vec4 render_area;"; + } + DefineConstantBuffers(bindings); + DefineConstantBufferIndirect(); + DefineStorageBuffers(bindings); + SetupImages(bindings); + SetupTextures(bindings); + DefineHelperFunctions(); + DefineConstants(); +} + +void EmitContext::SetupExtensions() { + header += "#extension GL_ARB_separate_shader_objects : enable\n"; + if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { + header += "#extension GL_EXT_texture_shadow_lod : enable\n"; + } + if (info.uses_int64 && profile.support_int64) { + header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; + } + if (info.uses_int64_bit_atomics) { + header += "#extension GL_NV_shader_atomic_int64 : enable\n"; + } + if (info.uses_atomic_f32_add) { + header += "#extension GL_NV_shader_atomic_float : enable\n"; + } + if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { + header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; + } + if (info.uses_fp16) { + if (profile.support_gl_nv_gpu_shader_5) { + header += "#extension GL_NV_gpu_shader5 : enable\n"; + } + if (profile.support_gl_amd_gpu_shader_half_float) { + header += "#extension GL_AMD_gpu_shader_half_float : enable\n"; + } + } + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || + info.uses_subgroup_shuffles || info.uses_fswzadd) { + header += "#extension GL_ARB_shader_ballot : enable\n" + "#extension GL_ARB_shader_group_vote : enable\n"; + if (!info.uses_int64 && profile.support_int64) { + header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; + } + if (profile.support_gl_warp_intrinsics) { + header += "#extension GL_NV_shader_thread_shuffle : enable\n"; + } + } + if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && + profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { + header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; + } + if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { + header += "#extension GL_ARB_sparse_texture2 : enable\n"; + } + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { + header += "#extension GL_NV_viewport_array2 : enable\n"; + } + if (info.uses_typeless_image_reads) { + header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; + } + if (info.uses_derivatives && profile.support_gl_derivative_control) { + header += "#extension GL_ARB_derivative_control : enable\n"; + } + if (uses_geometry_passthrough) { + header += "#extension GL_NV_geometry_shader_passthrough : enable\n"; + } +} + +void EmitContext::DefineConstantBuffers(Bindings& bindings) { + if (info.constant_buffer_descriptors.empty()) { + return; + } + for (const auto& desc : info.constant_buffer_descriptors) { + const auto cbuf_type{profile.has_gl_cbuf_ftou_bug ? "uvec4" : "vec4"}; + const u32 cbuf_used_size{Common::DivCeil(info.constant_buffer_used_sizes[desc.index], 16U)}; + const u32 cbuf_binding_size{info.uses_global_memory ? 0x1000U : cbuf_used_size}; + header += fmt::format("layout(std140,binding={}) uniform {}_cbuf_{}{{{} {}_cbuf{}[{}];}};", + bindings.uniform_buffer, stage_name, desc.index, cbuf_type, + stage_name, desc.index, cbuf_binding_size); + bindings.uniform_buffer += desc.count; + } +} + +void EmitContext::DefineConstantBufferIndirect() { + if (!info.uses_cbuf_indirect) { + return; + } + + header += profile.has_gl_cbuf_ftou_bug ? "uvec4 " : "vec4 "; + header += "GetCbufIndirect(uint binding, uint offset){" + "switch(binding){" + "default:"; + + for (const auto& desc : info.constant_buffer_descriptors) { + header += + fmt::format("case {}:return {}_cbuf{}[offset];", desc.index, stage_name, desc.index); + } + + header += "}}"; +} + +void EmitContext::DefineStorageBuffers(Bindings& bindings) { + if (info.storage_buffers_descriptors.empty()) { + return; + } + u32 index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};", + bindings.storage_buffer, stage_name, bindings.storage_buffer, + stage_name, index); + bindings.storage_buffer += desc.count; + index += desc.count; + } +} + +void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { + static constexpr std::string_view swizzle{"xyzw"}; + const size_t base_index{static_cast(IR::Attribute::Generic0X) + index * 4}; + u32 element{0}; + while (element < 4) { + std::string definition{fmt::format("layout(location={}", index)}; + const u32 remainder{4 - element}; + const TransformFeedbackVarying* xfb_varying{}; + const size_t xfb_varying_index{base_index + element}; + if (xfb_varying_index < runtime_info.xfb_count) { + xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index]; + xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; + } + const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; + if (element > 0) { + definition += fmt::format(",component={}", element); + } + if (xfb_varying) { + definition += + fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer, + xfb_varying->stride, xfb_varying->offset); + } + std::string name{fmt::format("out_attr{}", index)}; + if (num_components < 4 || element > 0) { + name += fmt::format("_{}", swizzle.substr(element, num_components)); + } + const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)}; + definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations)); + header += definition; + + const GenericElementInfo element_info{ + .name = name, + .first_element = element, + .num_components = num_components, + }; + std::fill_n(output_generics[index].begin() + element, num_components, element_info); + element += num_components; + } +} + +void EmitContext::DefineHelperFunctions() { + header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n" + "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n"; + if (info.uses_global_increment || info.uses_shared_increment) { + header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}"; + } + if (info.uses_global_decrement || info.uses_shared_decrement) { + header += "uint CasDecrement(uint op_a,uint op_b){" + "return op_a==0||op_a>op_b?op_b:(op_a-1u);}"; + } + if (info.uses_atomic_f32_add) { + header += "uint CasFloatAdd(uint op_a,float op_b){" + "return ftou(utof(op_a)+op_b);}"; + } + if (info.uses_atomic_f32x2_add) { + header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){" + "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}"; + } + if (info.uses_atomic_f32x2_min) { + header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}"; + } + if (info.uses_atomic_f32x2_max) { + header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}"; + } + if (info.uses_atomic_f16x2_add) { + header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(unpackFloat2x16(op_a)+op_b);}"; + } + if (info.uses_atomic_f16x2_min) { + header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}"; + } + if (info.uses_atomic_f16x2_max) { + header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}"; + } + if (info.uses_atomic_s32_min) { + header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; + } + if (info.uses_atomic_s32_max) { + header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; + } + if (info.uses_global_memory && profile.support_int64) { + header += DefineGlobalMemoryFunctions(); + } + if (info.loads_indexed_attributes) { + const bool is_array{stage == Stage::Geometry}; + const auto vertex_arg{is_array ? ",uint vertex" : ""}; + std::string func{ + fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " + "masked_index=uint(base_index)&3u;switch(base_index>>2){{", + vertex_arg)}; + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { + const auto position_idx{is_array ? "gl_in[vertex]." : ""}; + func += fmt::format("case {}:return {}{}[masked_index];", + static_cast(IR::Attribute::PositionX) >> 2, position_idx, + position_name); + } + const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; + for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { + continue; + } + const auto vertex_idx{is_array ? "[vertex]" : ""}; + func += fmt::format("case {}:return in_attr{}{}[masked_index];", + base_attribute_value + index, index, vertex_idx); + } + func += "default: return 0.0;}}"; + header += func; + } + if (info.stores_indexed_attributes) { + // TODO + } +} + +std::string EmitContext::DefineGlobalMemoryFunctions() { + const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) { + const auto& ssbo{info.storage_buffers_descriptors[index]}; + const u32 size_cbuf_offset{ssbo.cbuf_offset + 8}; + const auto ssbo_addr{fmt::format("ssbo_addr{}", index)}; + const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)}; + std::array addr_xy; + std::array size_xy; + for (size_t i = 0; i < addr_xy.size(); ++i) { + const auto addr_loc{ssbo.cbuf_offset + 4 * i}; + const auto size_loc{size_cbuf_offset + 4 * i}; + addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); + size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); + } + const u32 ssbo_align_mask{~(static_cast(profile.min_ssbo_alignment) - 1U)}; + const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)}; + const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])}; + const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)}; + const auto addr_statement{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; + func += addr_statement; + + const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])}; + const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)}; + const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)}; + const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)}; + func += comparison; + + const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)}; + func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr); + }}; + std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"}; + std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"}; + std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"}; + std::string load_func{"uint LoadGlobal32(uint64_t addr){"}; + std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"}; + std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"}; + const size_t num_buffers{info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + if (!info.nvn_buffer_used[index]) { + continue; + } + define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}"); + define_body(write_func_64, index, + "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}"); + define_body(write_func_128, index, + "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint(" + "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}"); + define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}"); + define_body(load_func_64, index, + "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}"); + define_body(load_func_128, index, + "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" + "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); + } + write_func += '}'; + write_func_64 += '}'; + write_func_128 += '}'; + load_func += "return 0u;}"; + load_func_64 += "return uvec2(0);}"; + load_func_128 += "return uvec4(0);}"; + return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128; +} + +void EmitContext::SetupImages(Bindings& bindings) { + image_buffers.reserve(info.image_buffer_descriptors.size()); + for (const auto& desc : info.image_buffer_descriptors) { + image_buffers.push_back({bindings.image, desc.count}); + const auto format{ImageFormatString(desc.format)}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", + bindings.image, format, qualifier, bindings.image, array_decorator); + bindings.image += desc.count; + } + images.reserve(info.image_descriptors.size()); + for (const auto& desc : info.image_descriptors) { + images.push_back({bindings.image, desc.count}); + const auto format{ImageFormatString(desc.format)}; + const auto image_type{ImageType(desc.type)}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, + qualifier, image_type, bindings.image, array_decorator); + bindings.image += desc.count; + } +} + +void EmitContext::SetupTextures(Bindings& bindings) { + texture_buffers.reserve(info.texture_buffer_descriptors.size()); + for (const auto& desc : info.texture_buffer_descriptors) { + texture_buffers.push_back({bindings.texture, desc.count}); + const auto sampler_type{ColorSamplerType(TextureType::Buffer)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); + bindings.texture += desc.count; + } + textures.reserve(info.texture_descriptors.size()); + for (const auto& desc : info.texture_descriptors) { + textures.push_back({bindings.texture, desc.count}); + const auto sampler_type{desc.is_depth ? DepthSamplerType(desc.type) + : ColorSamplerType(desc.type, desc.is_multisample)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); + bindings.texture += desc.count; + } +} + +void EmitContext::DefineConstants() { + if (info.uses_fswzadd) { + header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);" + "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);"; + } +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/msl_emit_context.h b/src/shader_recompiler/backend/msl/msl_emit_context.h new file mode 100644 index 0000000000..1a6b5d9769 --- /dev/null +++ b/src/shader_recompiler/backend/msl/msl_emit_context.h @@ -0,0 +1,174 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include + +#include + +#include "shader_recompiler/backend/msl/var_alloc.h" +#include "shader_recompiler/stage.h" + +namespace Shader { +struct Info; +struct Profile; +struct RuntimeInfo; +} // namespace Shader + +namespace Shader::Backend { +struct Bindings; +} + +namespace Shader::IR { +class Inst; +struct Program; +} // namespace Shader::IR + +namespace Shader::Backend::MSL { + +struct GenericElementInfo { + std::string name; + u32 first_element{}; + u32 num_components{}; +}; + +struct TextureImageDefinition { + u32 binding; + u32 count; +}; + +class EmitContext { +public: + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_); + + template + void Add(const char* format_str, IR::Inst& inst, Args&&... args) { + const auto var_def{var_alloc.AddDefine(inst, type)}; + if (var_def.empty()) { + // skip assignment. + code += fmt::format(fmt::runtime(format_str + 3), std::forward(args)...); + } else { + code += fmt::format(fmt::runtime(format_str), var_def, std::forward(args)...); + } + // TODO: Remove this + code += '\n'; + } + + template + void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddPrecF32(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddPrecF64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void Add(const char* format_str, Args&&... args) { + code += fmt::format(fmt::runtime(format_str), std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + + std::string header; + std::string code; + VarAlloc var_alloc; + const Info& info; + const Profile& profile; + const RuntimeInfo& runtime_info; + + Stage stage{}; + std::string_view stage_name = "invalid"; + std::string_view position_name = "gl_Position"; + + std::vector texture_buffers; + std::vector image_buffers; + std::vector textures; + std::vector images; + std::array, 32> output_generics{}; + + u32 num_safety_loop_vars{}; + + bool uses_y_direction{}; + bool uses_cc_carry{}; + bool uses_geometry_passthrough{}; + +private: + void SetupExtensions(); + void DefineConstantBuffers(Bindings& bindings); + void DefineConstantBufferIndirect(); + void DefineStorageBuffers(Bindings& bindings); + void DefineGenericOutput(size_t index, u32 invocations); + void DefineHelperFunctions(); + void DefineConstants(); + std::string DefineGlobalMemoryFunctions(); + void SetupImages(Bindings& bindings); + void SetupTextures(Bindings& bindings); +}; + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/var_alloc.cpp b/src/shader_recompiler/backend/msl/var_alloc.cpp new file mode 100644 index 0000000000..2a5826598f --- /dev/null +++ b/src/shader_recompiler/backend/msl/var_alloc.cpp @@ -0,0 +1,306 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include + +#include "shader_recompiler/backend/msl/var_alloc.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::MSL { +namespace { +std::string TypePrefix(MslVarType type) { + switch (type) { + case MslVarType::U1: + return "b_"; + case MslVarType::F16x2: + return "f16x2_"; + case MslVarType::U32: + return "u_"; + case MslVarType::F32: + return "f_"; + case MslVarType::U64: + return "u64_"; + case MslVarType::F64: + return "d_"; + case MslVarType::U32x2: + return "u2_"; + case MslVarType::F32x2: + return "f2_"; + case MslVarType::U32x3: + return "u3_"; + case MslVarType::F32x3: + return "f3_"; + case MslVarType::U32x4: + return "u4_"; + case MslVarType::F32x4: + return "f4_"; + case MslVarType::PrecF32: + return "pf_"; + case MslVarType::PrecF64: + return "pd_"; + case MslVarType::Void: + return ""; + default: + throw NotImplementedException("Type {}", type); + } +} + +std::string FormatFloat(std::string_view value, IR::Type type) { + // TODO: Confirm FP64 nan/inf + if (type == IR::Type::F32) { + if (value == "nan") { + return "utof(0x7fc00000)"; + } + if (value == "inf") { + return "utof(0x7f800000)"; + } + if (value == "-inf") { + return "utof(0xff800000)"; + } + } + if (value.find_first_of('e') != std::string_view::npos) { + // scientific notation + const auto cast{type == IR::Type::F32 ? "float" : "double"}; + return fmt::format("{}({})", cast, value); + } + const bool needs_dot{value.find_first_of('.') == std::string_view::npos}; + const bool needs_suffix{!value.ends_with('f')}; + const auto suffix{type == IR::Type::F32 ? "f" : "lf"}; + return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : ""); +} + +std::string MakeImm(const IR::Value& value) { + switch (value.Type()) { + case IR::Type::U1: + return fmt::format("{}", value.U1() ? "true" : "false"); + case IR::Type::U32: + return fmt::format("{}u", value.U32()); + case IR::Type::F32: + return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32); + case IR::Type::U64: + return fmt::format("{}ul", value.U64()); + case IR::Type::F64: + return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64); + case IR::Type::Void: + return ""; + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} +} // Anonymous namespace + +std::string VarAlloc::Representation(u32 index, MslVarType type) const { + const auto prefix{TypePrefix(type)}; + return fmt::format("{}{}", prefix, index); +} + +std::string VarAlloc::Representation(Id id) const { + return Representation(id.index, id.type); +} + +std::string VarAlloc::Define(IR::Inst& inst, MslVarType type) { + if (inst.HasUses()) { + inst.SetDefinition(Alloc(type)); + return Representation(inst.Definition()); + } else { + Id id{}; + id.type.Assign(type); + GetUseTracker(type).uses_temp = true; + inst.SetDefinition(id); + return 't' + Representation(inst.Definition()); + } +} + +std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) { + return Define(inst, RegType(type)); +} + +std::string VarAlloc::PhiDefine(IR::Inst& inst, IR::Type type) { + return AddDefine(inst, RegType(type)); +} + +std::string VarAlloc::AddDefine(IR::Inst& inst, MslVarType type) { + if (inst.HasUses()) { + inst.SetDefinition(Alloc(type)); + return Representation(inst.Definition()); + } else { + return ""; + } +} + +std::string VarAlloc::Consume(const IR::Value& value) { + return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); +} + +std::string VarAlloc::ConsumeInst(IR::Inst& inst) { + inst.DestructiveRemoveUsage(); + if (!inst.HasUses()) { + Free(inst.Definition()); + } + return Representation(inst.Definition()); +} + +std::string VarAlloc::GetMslType(IR::Type type) const { + return GetMslType(RegType(type)); +} + +Id VarAlloc::Alloc(MslVarType type) { + auto& use_tracker{GetUseTracker(type)}; + const auto num_vars{use_tracker.var_use.size()}; + for (size_t var = 0; var < num_vars; ++var) { + if (use_tracker.var_use[var]) { + continue; + } + use_tracker.num_used = std::max(use_tracker.num_used, var + 1); + use_tracker.var_use[var] = true; + Id ret{}; + ret.is_valid.Assign(1); + ret.type.Assign(type); + ret.index.Assign(static_cast(var)); + return ret; + } + // Allocate a new variable + use_tracker.var_use.push_back(true); + Id ret{}; + ret.is_valid.Assign(1); + ret.type.Assign(type); + ret.index.Assign(static_cast(use_tracker.num_used)); + ++use_tracker.num_used; + return ret; +} + +void VarAlloc::Free(Id id) { + if (id.is_valid == 0) { + throw LogicError("Freeing invalid variable"); + } + auto& use_tracker{GetUseTracker(id.type)}; + use_tracker.var_use[id.index] = false; +} + +MslVarType VarAlloc::RegType(IR::Type type) const { + switch (type) { + case IR::Type::U1: + return MslVarType::U1; + case IR::Type::U32: + return MslVarType::U32; + case IR::Type::F32: + return MslVarType::F32; + case IR::Type::U64: + return MslVarType::U64; + case IR::Type::F64: + return MslVarType::F64; + default: + throw NotImplementedException("IR type {}", type); + } +} + +std::string VarAlloc::GetMslType(MslVarType type) const { + switch (type) { + case MslVarType::U1: + return "bool"; + case MslVarType::F16x2: + return "f16vec2"; + case MslVarType::U32: + return "uint"; + case MslVarType::F32: + case MslVarType::PrecF32: + return "float"; + case MslVarType::U64: + return "uint64_t"; + case MslVarType::F64: + case MslVarType::PrecF64: + return "double"; + case MslVarType::U32x2: + return "uvec2"; + case MslVarType::F32x2: + return "vec2"; + case MslVarType::U32x3: + return "uvec3"; + case MslVarType::F32x3: + return "vec3"; + case MslVarType::U32x4: + return "uvec4"; + case MslVarType::F32x4: + return "vec4"; + case MslVarType::Void: + return ""; + default: + throw NotImplementedException("Type {}", type); + } +} + +VarAlloc::UseTracker& VarAlloc::GetUseTracker(MslVarType type) { + switch (type) { + case MslVarType::U1: + return var_bool; + case MslVarType::F16x2: + return var_f16x2; + case MslVarType::U32: + return var_u32; + case MslVarType::F32: + return var_f32; + case MslVarType::U64: + return var_u64; + case MslVarType::F64: + return var_f64; + case MslVarType::U32x2: + return var_u32x2; + case MslVarType::F32x2: + return var_f32x2; + case MslVarType::U32x3: + return var_u32x3; + case MslVarType::F32x3: + return var_f32x3; + case MslVarType::U32x4: + return var_u32x4; + case MslVarType::F32x4: + return var_f32x4; + case MslVarType::PrecF32: + return var_precf32; + case MslVarType::PrecF64: + return var_precf64; + default: + throw NotImplementedException("Type {}", type); + } +} + +const VarAlloc::UseTracker& VarAlloc::GetUseTracker(MslVarType type) const { + switch (type) { + case MslVarType::U1: + return var_bool; + case MslVarType::F16x2: + return var_f16x2; + case MslVarType::U32: + return var_u32; + case MslVarType::F32: + return var_f32; + case MslVarType::U64: + return var_u64; + case MslVarType::F64: + return var_f64; + case MslVarType::U32x2: + return var_u32x2; + case MslVarType::F32x2: + return var_f32x2; + case MslVarType::U32x3: + return var_u32x3; + case MslVarType::F32x3: + return var_f32x3; + case MslVarType::U32x4: + return var_u32x4; + case MslVarType::F32x4: + return var_f32x4; + case MslVarType::PrecF32: + return var_precf32; + case MslVarType::PrecF64: + return var_precf64; + default: + throw NotImplementedException("Type {}", type); + } +} + +} // namespace Shader::Backend::MSL diff --git a/src/shader_recompiler/backend/msl/var_alloc.h b/src/shader_recompiler/backend/msl/var_alloc.h new file mode 100644 index 0000000000..8d7afcc0fb --- /dev/null +++ b/src/shader_recompiler/backend/msl/var_alloc.h @@ -0,0 +1,104 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" + +namespace Shader::IR { +class Inst; +class Value; +enum class Type; +} // namespace Shader::IR + +namespace Shader::Backend::MSL { +enum class MslVarType : u32 { + U1, + F16x2, + U32, + F32, + U64, + F64, + U32x2, + F32x2, + U32x3, + F32x3, + U32x4, + F32x4, + PrecF32, + PrecF64, + Void, +}; + +struct Id { + union { + u32 raw; + BitField<0, 1, u32> is_valid; + BitField<1, 4, MslVarType> type; + BitField<6, 26, u32> index; + }; + + bool operator==(Id rhs) const noexcept { + return raw == rhs.raw; + } + bool operator!=(Id rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(sizeof(Id) == sizeof(u32)); + +class VarAlloc { +public: + struct UseTracker { + bool uses_temp{}; + size_t num_used{}; + std::vector var_use; + }; + + /// Used for explicit usages of variables, may revert to temporaries + std::string Define(IR::Inst& inst, MslVarType type); + std::string Define(IR::Inst& inst, IR::Type type); + + /// Used to assign variables used by the IR. May return a blank string if + /// the instruction's result is unused in the IR. + std::string AddDefine(IR::Inst& inst, MslVarType type); + std::string PhiDefine(IR::Inst& inst, IR::Type type); + + std::string Consume(const IR::Value& value); + std::string ConsumeInst(IR::Inst& inst); + + std::string GetMslType(MslVarType type) const; + std::string GetMslType(IR::Type type) const; + + const UseTracker& GetUseTracker(MslVarType type) const; + std::string Representation(u32 index, MslVarType type) const; + +private: + MslVarType RegType(IR::Type type) const; + Id Alloc(MslVarType type); + void Free(Id id); + UseTracker& GetUseTracker(MslVarType type); + std::string Representation(Id id) const; + + UseTracker var_bool{}; + UseTracker var_f16x2{}; + UseTracker var_u32{}; + UseTracker var_u32x2{}; + UseTracker var_u32x3{}; + UseTracker var_u32x4{}; + UseTracker var_f32{}; + UseTracker var_f32x2{}; + UseTracker var_f32x3{}; + UseTracker var_f32x4{}; + UseTracker var_u64{}; + UseTracker var_f64{}; + UseTracker var_precf32{}; + UseTracker var_precf64{}; +}; + +} // namespace Shader::Backend::MSL