attempt to compile msl shaders

This commit is contained in:
Samuliak 2024-05-04 12:00:51 +02:00
parent 5a2c073ebe
commit 16986bf42f
6 changed files with 80 additions and 189 deletions

View file

@ -174,13 +174,6 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) {
}
}
std::string MslVersionSpecifier(const EmitContext& ctx) {
if (ctx.uses_y_direction) {
return " compatibility";
}
return "";
}
bool IsPreciseType(MslVarType type) {
switch (type) {
case MslVarType::PrecF32:
@ -219,8 +212,7 @@ std::string EmitMSL(const Profile& profile, const RuntimeInfo& runtime_info, IR:
EmitContext ctx{program, bindings, profile, runtime_info};
Precolor(program);
EmitCode(ctx, program);
const std::string version{fmt::format("#version 460{}\n", MslVersionSpecifier(ctx))};
ctx.header.insert(0, version);
ctx.header.insert(0, "#include <metal_stdlib>\nusing namespace metal;\n");
if (program.shared_memory_size > 0) {
const auto requested_size{program.shared_memory_size};
const auto max_size{profile.gl_max_compute_smem_size};
@ -232,7 +224,7 @@ std::string EmitMSL(const Profile& profile, const RuntimeInfo& runtime_info, IR:
const auto smem_size{needs_clamp ? max_size : requested_size};
ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U));
}
ctx.header += "void main(){\n";
ctx.header += "void main_(){\n";
if (program.local_memory_size > 0) {
ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U));
}

View file

@ -40,12 +40,12 @@ std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info
case TextureType::ColorArray1D:
case TextureType::Color2D:
case TextureType::ColorArray2D:
return fmt::format("ivec2({})", value);
return fmt::format("int2({})", value);
case TextureType::Color3D:
case TextureType::ColorCube:
return fmt::format("ivec3({})", value);
return fmt::format("int3({})", value);
case TextureType::ColorArrayCube:
return fmt::format("ivec4({})", value);
return fmt::format("int4({})", value);
default:
throw NotImplementedException("Integer cast for TextureType {}", info.type.Value());
}
@ -58,13 +58,13 @@ std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& i
return fmt::format("int({})", value);
case TextureType::ColorArray1D:
case TextureType::Color2D:
return fmt::format("ivec2({})", value);
return fmt::format("int2({})", value);
case TextureType::ColorArray2D:
case TextureType::Color3D:
case TextureType::ColorCube:
return fmt::format("ivec3({})", value);
return fmt::format("int3({})", value);
case TextureType::ColorArrayCube:
return fmt::format("ivec4({})", value);
return fmt::format("int4({})", value);
default:
throw NotImplementedException("TexelFetchCast type {}", info.type.Value());
}
@ -89,12 +89,12 @@ std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) {
if (inst->AreAllArgsImmediates()) {
switch (inst->GetOpcode()) {
case IR::Opcode::CompositeConstructU32x2:
return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32());
return fmt::format("int2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32());
case IR::Opcode::CompositeConstructU32x3:
return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
return fmt::format("int3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
inst->Arg(2).U32());
case IR::Opcode::CompositeConstructU32x4:
return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
return fmt::format("int4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
inst->Arg(2).U32(), inst->Arg(3).U32());
default:
break;
@ -109,11 +109,11 @@ std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) {
case IR::Type::U32:
return fmt::format("int({})", offset_str);
case IR::Type::U32x2:
return fmt::format("ivec2({})", offset_str);
return fmt::format("int2({})", offset_str);
case IR::Type::U32x3:
return fmt::format("ivec3({})", offset_str);
return fmt::format("int3({})", offset_str);
case IR::Type::U32x4:
return fmt::format("ivec4({})", offset_str);
return fmt::format("int4({})", offset_str);
default:
throw NotImplementedException("Offset type {}", offset.Type());
}
@ -123,7 +123,7 @@ std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) {
const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
LOG_WARNING(Shader_MSL, "Not all arguments in PTP are immediate, STUBBING");
return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))";
return "int2[](int2(0), int2(1), int2(2), int2(3))";
}
const IR::Opcode opcode{values[0]->GetOpcode()};
if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
@ -131,7 +131,7 @@ std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) {
}
auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }};
return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0),
return fmt::format("int2[](int2({},{}),int2({},{}),int2({},{}),int2({},{}))", read(0, 0),
read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2),
read(1, 3));
}
@ -149,11 +149,11 @@ std::string ImageGatherSubpixelOffset(const IR::TextureInstInfo& info, std::stri
switch (info.type) {
case TextureType::Color2D:
case TextureType::Color2DRect:
return fmt::format("{}+vec2(0.001953125)/vec2(textureSize({}, 0))", coords, texture);
return fmt::format("{}+float2(0.001953125)/float2(textureSize({}, 0))", coords, texture);
case TextureType::ColorArray2D:
case TextureType::ColorCube:
return fmt::format("vec3({0}.xy+vec2(0.001953125)/vec2(textureSize({1}, 0)),{0}.z)", coords,
texture);
return fmt::format("float3({0}.xy+float2(0.001953125)/float2(textureSize({1}, 0)),{0}.z)",
coords, texture);
default:
return std::string{coords};
}
@ -512,20 +512,20 @@ void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value&
const auto lod_str{uses_lod ? fmt::format(",int({})", lod) : ""};
switch (info.type) {
case TextureType::Color1D:
return ctx.AddU32x4("{}=uvec4(uint(textureSize({}{})),0u,0u,{});", inst, texture, lod_str,
return ctx.AddU32x4("{}=uint4(uint(textureSize({}{})),0u,0u,{});", inst, texture, lod_str,
mips);
case TextureType::ColorArray1D:
case TextureType::Color2D:
case TextureType::ColorCube:
case TextureType::Color2DRect:
return ctx.AddU32x4("{}=uvec4(uvec2(textureSize({}{})),0u,{});", inst, texture, lod_str,
return ctx.AddU32x4("{}=uint4(uint2(textureSize({}{})),0u,{});", inst, texture, lod_str,
mips);
case TextureType::ColorArray2D:
case TextureType::Color3D:
case TextureType::ColorArrayCube:
return ctx.AddU32x4("{}=uvec4(uvec3(textureSize({}{})),{});", inst, texture, lod_str, mips);
return ctx.AddU32x4("{}=uint4(uint3(textureSize({}{})),{});", inst, texture, lod_str, mips);
case TextureType::Buffer:
return ctx.AddU32x4("{}=uvec4(uint(textureSize({})),0u,0u,{});", inst, texture, mips);
return ctx.AddU32x4("{}=uint4(uint(textureSize({})),0u,0u,{});", inst, texture, mips);
}
throw LogicError("Unspecified image type {}", info.type.Value());
}
@ -534,7 +534,7 @@ void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
std::string_view coords) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const auto texture{Texture(ctx, info, index)};
return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords);
return ctx.AddF32x4("{}=float4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords);
}
void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
@ -558,11 +558,11 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
if (multi_component) {
if (info.num_derivatives >= 3) {
const auto offset_vec{ctx.var_alloc.Consume(offset)};
ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture,
coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec);
ctx.Add("{}=textureGrad({},{},float3({}.xz, {}.x),float3({}.yw, {}.y));", texel,
texture, coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec);
return;
}
ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords,
ctx.Add("{}=textureGrad({},{},float2({}.xz),float2({}.yz));", texel, texture, coords,
derivatives_vec, derivatives_vec);
} else {
ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords,
@ -578,7 +578,7 @@ void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
throw NotImplementedException("EmitImageRead Sparse");
}
const auto image{Image(ctx, info, index)};
ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info));
ctx.AddU32x4("{}=uint4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info));
}
void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,

View file

@ -50,7 +50,7 @@ void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address
return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address);
}
LOG_WARNING(Shader_MSL, "Int64 not supported, ignoring memory operation");
ctx.AddU32x2("{}=uvec2(0);", inst);
ctx.AddU32x2("{}=uint2(0);", inst);
}
void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
@ -58,7 +58,7 @@ void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view addres
return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address);
}
LOG_WARNING(Shader_MSL, "Int64 not supported, ignoring memory operation");
ctx.AddU32x4("{}=uvec4(0);", inst);
ctx.AddU32x4("{}=uint4(0);", inst);
}
void EmitWriteGlobalU8(EmitContext&) {

View file

@ -87,9 +87,7 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value&
if (phi_reg == val_reg) {
return;
}
const bool needs_workaround{ctx.profile.has_gl_bool_ref_bug && phi_type == IR::Type::U1};
const auto suffix{needs_workaround ? "?true:false" : ""};
ctx.Add("{}={}{};", phi_reg, val_reg, suffix);
ctx.Add("{}={};", phi_reg, val_reg);
}
void EmitPrologue(EmitContext& ctx) {

View file

@ -41,18 +41,6 @@ std::string_view InputArrayDecorator(Stage stage) {
}
}
bool StoresPerVertexAttributes(Stage stage) {
switch (stage) {
case Stage::VertexA:
case Stage::VertexB:
case Stage::Geometry:
case Stage::TessellationEval:
return true;
default:
return false;
}
}
std::string OutputDecorator(Stage stage, u32 size) {
switch (stage) {
case Stage::TessellationControl:
@ -62,6 +50,7 @@ std::string OutputDecorator(Stage stage, u32 size) {
}
}
// TODO
std::string_view DepthSamplerType(TextureType type) {
switch (type) {
case TextureType::Color1D:
@ -81,56 +70,57 @@ std::string_view DepthSamplerType(TextureType type) {
}
}
// TODO: emit sampler as well
// TODO: handle multisample
// TODO: handle texture buffer
std::string_view ColorSamplerType(TextureType type, bool is_multisample = false) {
if (is_multisample) {
ASSERT(type == TextureType::Color2D || type == TextureType::ColorArray2D);
}
switch (type) {
case TextureType::Color1D:
return "sampler1D";
return "texture1d";
case TextureType::ColorArray1D:
return "sampler1DArray";
return "texture1d_array";
case TextureType::Color2D:
case TextureType::Color2DRect:
return is_multisample ? "sampler2DMS" : "sampler2D";
return "texture2d";
case TextureType::ColorArray2D:
return is_multisample ? "sampler2DMSArray" : "sampler2DArray";
return "texture2d_array";
case TextureType::Color3D:
return "sampler3D";
return "texture3d";
case TextureType::ColorCube:
return "samplerCube";
return "texturecube";
case TextureType::ColorArrayCube:
return "samplerCubeArray";
case TextureType::Buffer:
return "samplerBuffer";
return "texturecube_array";
default:
throw NotImplementedException("Texture type: {}", type);
}
}
// TODO: handle texture buffer
std::string_view ImageType(TextureType type) {
switch (type) {
case TextureType::Color1D:
return "uimage1D";
return "texture1d";
case TextureType::ColorArray1D:
return "uimage1DArray";
return "texture1d_array";
case TextureType::Color2D:
return "uimage2D";
return "texture2d";
case TextureType::ColorArray2D:
return "uimage2DArray";
return "texture2d_array";
case TextureType::Color3D:
return "uimage3D";
return "texture3d";
case TextureType::ColorCube:
return "uimageCube";
return "texturecube";
case TextureType::ColorArrayCube:
return "uimageCubeArray";
case TextureType::Buffer:
return "uimageBuffer";
return "texturecube_array";
default:
throw NotImplementedException("Image type: {}", type);
}
}
// TODO: is this needed?
std::string_view ImageFormatString(ImageFormat format) {
switch (format) {
case ImageFormat::Typeless:
@ -155,15 +145,19 @@ std::string_view ImageFormatString(ImageFormat format) {
}
std::string_view ImageAccessQualifier(bool is_written, bool is_read) {
if (is_written && !is_read) {
return "writeonly ";
if (is_written && is_read) {
return "access::read, access::write";
}
if (is_read && !is_written) {
return "readonly ";
if (is_written) {
return "access::write";
}
if (is_read) {
return "access::read";
}
return "";
}
// TODO
std::string_view GetTessMode(TessPrimitive primitive) {
switch (primitive) {
case TessPrimitive::Triangles:
@ -176,6 +170,7 @@ std::string_view GetTessMode(TessPrimitive primitive) {
throw InvalidArgument("Invalid tessellation primitive {}", primitive);
}
// TODO
std::string_view GetTessSpacing(TessSpacing spacing) {
switch (spacing) {
case TessSpacing::Equal:
@ -188,6 +183,7 @@ std::string_view GetTessSpacing(TessSpacing spacing) {
throw InvalidArgument("Invalid tessellation spacing {}", spacing);
}
// TODO
std::string_view InputPrimitive(InputTopology topology) {
switch (topology) {
case InputTopology::Points:
@ -204,6 +200,7 @@ std::string_view InputPrimitive(InputTopology topology) {
throw InvalidArgument("Invalid input topology {}", topology);
}
// TODO
std::string_view OutputPrimitive(OutputTopology topology) {
switch (topology) {
case OutputTopology::PointList:
@ -215,56 +212,6 @@ std::string_view OutputPrimitive(OutputTopology topology) {
}
throw InvalidArgument("Invalid output topology {}", topology);
}
void SetupOutPerVertex(EmitContext& ctx, std::string& header) {
if (!StoresPerVertexAttributes(ctx.stage)) {
return;
}
if (ctx.uses_geometry_passthrough) {
return;
}
header += "out gl_PerVertex{vec4 gl_Position;";
if (ctx.info.stores[IR::Attribute::PointSize]) {
header += "float gl_PointSize;";
}
if (ctx.info.stores.ClipDistances()) {
header += "float gl_ClipDistance[];";
}
if (ctx.info.stores[IR::Attribute::ViewportIndex] &&
ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) {
header += "int gl_ViewportIndex;";
}
header += "};";
if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) {
header += "out int gl_ViewportIndex;";
}
}
void SetupInPerVertex(EmitContext& ctx, std::string& header) {
// Currently only required for TessellationControl to adhere to
// ARB_separate_shader_objects requirements
if (ctx.stage != Stage::TessellationControl) {
return;
}
const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)};
const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]};
const bool loads_clip_distance{ctx.info.loads.ClipDistances()};
const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance};
if (!loads_per_vertex) {
return;
}
header += "in gl_PerVertex{";
if (loads_position) {
header += "vec4 gl_Position;";
}
if (loads_point_size) {
header += "float gl_PointSize;";
}
if (loads_clip_distance) {
header += "float gl_ClipDistance[];";
}
header += "}gl_in[gl_MaxPatchVertices];";
}
} // Anonymous namespace
EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
@ -273,9 +220,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
uses_geometry_passthrough{program.is_geometry_passthrough &&
profile.support_geometry_shader_passthrough} {
if (profile.need_fastmath_off) {
header += "#pragma optionNV(fastmath off)\n";
// TODO
}
SetupExtensions();
switch (program.stage) {
case Stage::VertexA:
case Stage::VertexB:
@ -321,8 +267,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
local_x, local_y, local_z);
break;
}
SetupOutPerVertex(*this, header);
SetupInPerVertex(*this, header);
// TODO
// SetupOutPerVertex(*this, header);
// SetupInPerVertex(*this, header);
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) {
@ -369,63 +316,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
DefineConstants();
}
void EmitContext::SetupExtensions() {
header += "#extension GL_ARB_separate_shader_objects : enable\n";
if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) {
header += "#extension GL_EXT_texture_shadow_lod : enable\n";
}
if (info.uses_int64 && profile.support_int64) {
header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
}
if (info.uses_int64_bit_atomics) {
header += "#extension GL_NV_shader_atomic_int64 : enable\n";
}
if (info.uses_atomic_f32_add) {
header += "#extension GL_NV_shader_atomic_float : enable\n";
}
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
}
if (info.uses_fp16) {
if (profile.support_gl_nv_gpu_shader_5) {
header += "#extension GL_NV_gpu_shader5 : enable\n";
}
if (profile.support_gl_amd_gpu_shader_half_float) {
header += "#extension GL_AMD_gpu_shader_half_float : enable\n";
}
}
if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
info.uses_subgroup_shuffles || info.uses_fswzadd) {
header += "#extension GL_ARB_shader_ballot : enable\n"
"#extension GL_ARB_shader_group_vote : enable\n";
if (!info.uses_int64 && profile.support_int64) {
header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
}
if (profile.support_gl_warp_intrinsics) {
header += "#extension GL_NV_shader_thread_shuffle : enable\n";
}
}
if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) &&
profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) {
header += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
}
if (info.uses_sparse_residency && profile.support_gl_sparse_textures) {
header += "#extension GL_ARB_sparse_texture2 : enable\n";
}
if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
header += "#extension GL_NV_viewport_array2 : enable\n";
}
if (info.uses_typeless_image_reads) {
header += "#extension GL_EXT_shader_image_load_formatted : enable\n";
}
if (info.uses_derivatives && profile.support_gl_derivative_control) {
header += "#extension GL_ARB_derivative_control : enable\n";
}
if (uses_geometry_passthrough) {
header += "#extension GL_NV_geometry_shader_passthrough : enable\n";
}
}
void EmitContext::DefineConstantBuffers(Bindings& bindings) {
if (info.constant_buffer_descriptors.empty()) {
return;

View file

@ -269,8 +269,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const std::string code{EmitMSL(profile, runtime_info, program, binding)};
// HACK
std::cout << code << std::endl;
// TODO: create MTL::Function
// functions[stage_index] = ;
MTL::CompileOptions* compile_options = MTL::CompileOptions::alloc()->init();
NS::Error* error = nullptr;
MTL::Library* library = device.GetDevice()->newLibrary(
NS::String::string(code.c_str(), NS::ASCIIStringEncoding), compile_options, &error);
if (error) {
LOG_ERROR(Render_Metal, "failed to create library: {}",
error->description()->cString(NS::ASCIIStringEncoding));
// HACK
throw;
}
functions[index] =
library->newFunction(NS::String::string("main_", NS::ASCIIStringEncoding));
previous_stage = &program;
}
@ -309,7 +320,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
NS::ASCIIStringEncoding),
compile_options, &error);
if (error) {
LOG_ERROR(Render_Metal, "failed to create blit library: {}",
LOG_ERROR(Render_Metal, "failed to create library: {}",
error->description()->cString(NS::ASCIIStringEncoding));
}