fix: some errors in msl backend

This commit is contained in:
Samuliak 2024-05-04 15:31:54 +02:00
parent 7920249ed1
commit 055112b739
13 changed files with 122 additions and 176 deletions

View file

@ -174,30 +174,18 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) {
} }
} }
bool IsPreciseType(MslVarType type) {
switch (type) {
case MslVarType::PrecF32:
case MslVarType::PrecF64:
return true;
default:
return false;
}
}
void DefineVariables(const EmitContext& ctx, std::string& header) { void DefineVariables(const EmitContext& ctx, std::string& header) {
for (u32 i = 0; i < static_cast<u32>(MslVarType::Void); ++i) { for (u32 i = 0; i < static_cast<u32>(MslVarType::Void); ++i) {
const auto type{static_cast<MslVarType>(i)}; const auto type{static_cast<MslVarType>(i)};
const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; const auto& tracker{ctx.var_alloc.GetUseTracker(type)};
const auto type_name{ctx.var_alloc.GetMslType(type)}; const auto type_name{ctx.var_alloc.GetMslType(type)};
const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug};
const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""};
// Temps/return types that are never used are stored at index 0 // Temps/return types that are never used are stored at index 0
if (tracker.uses_temp) { if (tracker.uses_temp) {
header += fmt::format("{}{} t{}={}(0);", precise, type_name, header += fmt::format("{} t{}={}(0);", type_name, ctx.var_alloc.Representation(0, type),
ctx.var_alloc.Representation(0, type), type_name); type_name);
} }
for (u32 index = 0; index < tracker.num_used; ++index) { for (u32 index = 0; index < tracker.num_used; ++index) {
header += fmt::format("{}{} {}={}(0);", precise, type_name, header += fmt::format("{} {}={}(0);", type_name,
ctx.var_alloc.Representation(index, type), type_name); ctx.var_alloc.Representation(index, type), type_name);
} }
} }

View file

@ -34,7 +34,7 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi
ctx.var_alloc.Consume(offset))}; ctx.var_alloc.Consume(offset))};
const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)}; const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)};
ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
ctx.AddF32("{}=utof({});", inst, ret); ctx.AddF32("{}=as_type<float>({});", inst, ret);
} }
} // Anonymous namespace } // Anonymous namespace
@ -98,7 +98,7 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi
void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value) { std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, ctx.AddU64("{}=packUint2x32(uint2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset,
pointer_offset); pointer_offset);
ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;",
pointer_offset, value, pointer_offset, value); pointer_offset, value, pointer_offset, value);
@ -107,7 +107,7 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_vi
void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value) { std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, pointer_offset, pointer_offset); ctx.AddU32x2("{}=uint2(smem[{}>>2],smem[({}+4)>>2]);", inst, pointer_offset, pointer_offset);
ctx.Add("smem[{}>>2]={}.x;smem[({}+4)>>2]={}.y;", pointer_offset, value, pointer_offset, value); ctx.Add("smem[{}>>2]={}.x;smem[({}+4)>>2]={}.y;", pointer_offset, value, pointer_offset, value);
} }
@ -178,7 +178,7 @@ void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Val
void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.AddU64("{}=packUint2x32(uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset)); binding.U32(), ctx.var_alloc.Consume(offset));
ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;", ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;",
@ -189,9 +189,9 @@ void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.AddU64("{}=packInt2x32(int2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
binding.U32(), ctx.var_alloc.Consume(offset)); ctx.var_alloc.Consume(offset));
ctx.Add("for(int i=0;i<2;++i){{ " ctx.Add("for(int i=0;i<2;++i){{ "
"{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
");}}", ");}}",
@ -202,7 +202,7 @@ void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.AddU64("{}=packUint2x32(uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset)); binding.U32(), ctx.var_alloc.Consume(offset));
ctx.Add("for(int i=0;i<2;++i){{ " ctx.Add("for(int i=0;i<2;++i){{ "
@ -214,9 +214,9 @@ void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.AddU64("{}=packInt2x32(int2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
binding.U32(), ctx.var_alloc.Consume(offset)); ctx.var_alloc.Consume(offset));
ctx.Add("for(int i=0;i<2;++i){{ " ctx.Add("for(int i=0;i<2;++i){{ "
"{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
");}}", ");}}",
@ -227,7 +227,7 @@ void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.AddU64("{}=packUint2x32(uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset)); binding.U32(), ctx.var_alloc.Consume(offset));
ctx.Add("for(int " ctx.Add("for(int "
@ -240,7 +240,7 @@ void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
ctx.AddU64( ctx.AddU64(
"{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_" "{}=packUint2x32(uint2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_"
"ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), value); binding.U32(), ctx.var_alloc.Consume(offset), value);
@ -248,7 +248,7 @@ void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b
void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_" ctx.AddU64("{}=packUint2x32(uint2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_"
"ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
@ -257,7 +257,7 @@ void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& bi
void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
ctx.AddU64( ctx.AddU64(
"{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_" "{}=packUint2x32(uint2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_"
"ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), value); binding.U32(), ctx.var_alloc.Consume(offset), value);
@ -265,7 +265,7 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b
void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x)," ctx.AddU64("{}=packUint2x32(uint2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x),"
"atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
@ -274,7 +274,7 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val
void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, ctx.AddU32x2("{}=uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset)); ctx.var_alloc.Consume(offset));
ctx.Add("{}_ssbo{}[{}>>2]+={}.x;{}_ssbo{}[({}>>2)+1]+={}.y;", ctx.stage_name, binding.U32(), ctx.Add("{}_ssbo{}[{}>>2]+={}.x;{}_ssbo{}[({}>>2)+1]+={}.y;", ctx.stage_name, binding.U32(),
@ -285,7 +285,7 @@ void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value
void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, ctx.AddU32x2("{}=int2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset)); ctx.var_alloc.Consume(offset));
ctx.Add("for(int " ctx.Add("for(int "
@ -297,7 +297,7 @@ void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value
void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, ctx.AddU32x2("{}=uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset)); ctx.var_alloc.Consume(offset));
ctx.Add("for(int i=0;i<2;++i){{ " ctx.Add("for(int i=0;i<2;++i){{ "
@ -309,7 +309,7 @@ void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value
void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, ctx.AddU32x2("{}=int2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset)); ctx.var_alloc.Consume(offset));
ctx.Add("for(int " ctx.Add("for(int "
@ -321,7 +321,7 @@ void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value
void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, ctx.AddU32x2("{}=uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset)); ctx.var_alloc.Consume(offset));
ctx.Add("for(int i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],{}[i]);}}", ctx.Add("for(int i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],{}[i]);}}",
@ -332,7 +332,7 @@ void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value
void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2");
ctx.AddU32x2("{}=uvec2(atomicAnd({}_ssbo{}[{}>>2],{}.x),atomicAnd({}_ssbo{}[({}>>2)+1],{}.y));", ctx.AddU32x2("{}=uint2(atomicAnd({}_ssbo{}[{}>>2],{}.x),atomicAnd({}_ssbo{}[({}>>2)+1],{}.y));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
} }
@ -340,7 +340,7 @@ void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value&
void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2");
ctx.AddU32x2("{}=uvec2(atomicOr({}_ssbo{}[{}>>2],{}.x),atomicOr({}_ssbo{}[({}>>2)+1],{}.y));", ctx.AddU32x2("{}=uint2(atomicOr({}_ssbo{}[{}>>2],{}.x),atomicOr({}_ssbo{}[({}>>2)+1],{}.y));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
} }
@ -348,7 +348,7 @@ void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value&
void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2");
ctx.AddU32x2("{}=uvec2(atomicXor({}_ssbo{}[{}>>2],{}.x),atomicXor({}_ssbo{}[({}>>2)+1],{}.y));", ctx.AddU32x2("{}=uint2(atomicXor({}_ssbo{}[{}>>2],{}.x),atomicXor({}_ssbo{}[({}>>2)+1],{}.y));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
} }
@ -356,7 +356,7 @@ void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value&
void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) { const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2"); LOG_WARNING(Shader_MSL, "Int64 atomics not supported, fallback to 32x2");
ctx.AddU32x2("{}=uvec2(atomicExchange({}_ssbo{}[{}>>2],{}.x),atomicExchange({}_ssbo{}[({}>>2)+" ctx.AddU32x2("{}=uint2(atomicExchange({}_ssbo{}[{}>>2],{}.x),atomicExchange({}_ssbo{}[({}>>2)+"
"1],{}.y));", "1],{}.y));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);

View file

@ -41,7 +41,7 @@ void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I
} }
void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
ctx.AddU32("{}=ftou({});", inst, value); ctx.AddU32("{}=as_type<uint>({});", inst, value);
} }
void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
@ -53,7 +53,7 @@ void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I
} }
void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
ctx.AddF32("{}=utof({});", inst, value); ctx.AddF32("{}=as_type<float>({});", inst, value);
} }
void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {

View file

@ -23,17 +23,17 @@ void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view
void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
std::string_view e2) { std::string_view e2) {
ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); ctx.AddU32x2("{}=uint2({},{});", inst, e1, e2);
} }
void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
std::string_view e2, std::string_view e3) { std::string_view e2, std::string_view e3) {
ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3); ctx.AddU32x3("{}=uint3({},{},{});", inst, e1, e2, e3);
} }
void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
std::string_view e2, std::string_view e3, std::string_view e4) { std::string_view e2, std::string_view e3, std::string_view e4) {
ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4); ctx.AddU32x4("{}=uint4({},{},{},{});", inst, e1, e2, e3, e4);
} }
void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
@ -131,17 +131,17 @@ void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
std::string_view e2) { std::string_view e2) {
ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2); ctx.AddF32x2("{}=float2({},{});", inst, e1, e2);
} }
void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
std::string_view e2, std::string_view e3) { std::string_view e2, std::string_view e3) {
ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3); ctx.AddF32x3("{}=float3({},{},{});", inst, e1, e2, e3);
} }
void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
std::string_view e2, std::string_view e3, std::string_view e4) { std::string_view e2, std::string_view e3, std::string_view e4) {
ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4); ctx.AddF32x4("{}=float4({},{},{},{});", inst, e1, e2, e3, e4);
} }
void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,

View file

@ -111,45 +111,45 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const
void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) { const IR::Value& offset) {
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "as_type<uint>"};
GetCbuf8(ctx, inst, binding, offset, cast); GetCbuf8(ctx, inst, binding, offset, cast);
} }
void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) { const IR::Value& offset) {
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"}; const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "as_type<int>"};
GetCbuf8(ctx, inst, binding, offset, cast); GetCbuf8(ctx, inst, binding, offset, cast);
} }
void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) { const IR::Value& offset) {
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "as_type<uint>"};
GetCbuf16(ctx, inst, binding, offset, cast); GetCbuf16(ctx, inst, binding, offset, cast);
} }
void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) { const IR::Value& offset) {
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"}; const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "as_type<int>"};
GetCbuf16(ctx, inst, binding, offset, cast); GetCbuf16(ctx, inst, binding, offset, cast);
} }
void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) { const IR::Value& offset) {
const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)}; const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32)};
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "as_type<uint>"};
GetCbuf(ctx, ret, binding, offset, 32, cast); GetCbuf(ctx, ret, binding, offset, 32, cast);
} }
void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) { const IR::Value& offset) {
const auto ret{ctx.var_alloc.Define(inst, MslVarType::F32)}; const auto ret{ctx.var_alloc.Define(inst, MslVarType::F32)};
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "utof" : ""}; const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "as_type<float>" : ""};
GetCbuf(ctx, ret, binding, offset, 32, cast); GetCbuf(ctx, ret, binding, offset, 32, cast);
} }
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) { const IR::Value& offset) {
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "as_type<uint>"};
if (offset.IsImmediate()) { if (offset.IsImmediate()) {
const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
static constexpr u32 cbuf_size{0x10000}; static constexpr u32 cbuf_size{0x10000};
@ -157,14 +157,14 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
const s32 signed_offset{static_cast<s32>(offset.U32())}; const s32 signed_offset{static_cast<s32>(offset.U32())};
if (signed_offset < 0 || u32_offset > cbuf_size) { if (signed_offset < 0 || u32_offset > cbuf_size) {
LOG_WARNING(Shader_MSL, "Immediate constant buffer offset is out of bounds"); LOG_WARNING(Shader_MSL, "Immediate constant buffer offset is out of bounds");
ctx.AddU32x2("{}=uvec2(0u);", inst); ctx.AddU32x2("{}=uint2(0u);", inst);
return; return;
} }
if (u32_offset % 2 == 0) { if (u32_offset % 2 == 0) {
ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16, ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16,
OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
} else { } else {
ctx.AddU32x2("{}=uvec2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf, ctx.AddU32x2("{}=uint2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf,
u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf, u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf,
(u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4)); (u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4));
} }
@ -173,14 +173,14 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto offset_var{ctx.var_alloc.Consume(offset)};
const auto cbuf{ChooseCbuf(ctx, binding, fmt::format("{}>>4", offset_var))}; const auto cbuf{ChooseCbuf(ctx, binding, fmt::format("{}>>4", offset_var))};
if (!ctx.profile.has_gl_component_indexing_bug) { if (!ctx.profile.has_gl_component_indexing_bug) {
ctx.AddU32x2("{}=uvec2({}({}[({}>>2)%4]),{}({}[(({}+4)>>2)%4]));", inst, cast, cbuf, ctx.AddU32x2("{}=uint2({}({}[({}>>2)%4]),{}({}[(({}+4)>>2)%4]));", inst, cast, cbuf,
offset_var, cast, cbuf, offset_var); offset_var, cast, cbuf, offset_var);
return; return;
} }
const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32x2)}; const auto ret{ctx.var_alloc.Define(inst, MslVarType::U32x2)};
const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
for (u32 swizzle = 0; swizzle < 4; ++swizzle) { for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
ctx.Add("if(({}&3)=={}){}=uvec2({}({}.{}),{}({}.{}));", cbuf_offset, swizzle, ret, cast, ctx.Add("if(({}&3)=={}){}=uint2({}({}.{}),{}({}.{}));", cbuf_offset, swizzle, ret, cast,
cbuf, "xyzw"[swizzle], cast, cbuf, "xyzw"[(swizzle + 1) % 4]); cbuf, "xyzw"[swizzle], cast, cbuf, "xyzw"[(swizzle + 1) % 4]);
} }
} }
@ -199,23 +199,21 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
} }
return; return;
} }
ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); ctx.AddF32("{}=__in.attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle);
return; return;
} }
switch (attr) { switch (attr) {
case IR::Attribute::PrimitiveId: case IR::Attribute::PrimitiveId:
ctx.AddF32("{}=itof(gl_PrimitiveID);", inst); ctx.AddF32("{}=as_type<float>(gl_PrimitiveID);", inst);
break; break;
case IR::Attribute::Layer: case IR::Attribute::Layer:
ctx.AddF32("{}=itof(gl_Layer);", inst); ctx.AddF32("{}=as_type<float>(gl_Layer);", inst);
break; break;
case IR::Attribute::PositionX: case IR::Attribute::PositionX:
case IR::Attribute::PositionY: case IR::Attribute::PositionY:
case IR::Attribute::PositionZ: case IR::Attribute::PositionZ:
case IR::Attribute::PositionW: { case IR::Attribute::PositionW: {
const bool is_array{IsInputArray(ctx.stage)}; ctx.AddF32("{}={}.{};", inst, "__out.position", swizzle);
const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""};
ctx.AddF32("{}={}{}.{};", inst, input_decorator, "__out.position", swizzle);
break; break;
} }
case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteS:
@ -227,22 +225,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle); ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle);
break; break;
case IR::Attribute::InstanceId: case IR::Attribute::InstanceId:
ctx.AddF32("{}=itof(gl_InstanceID);", inst); ctx.AddF32("{}=as_type<float>(gl_InstanceID);", inst);
break; break;
case IR::Attribute::VertexId: case IR::Attribute::VertexId:
ctx.AddF32("{}=itof(gl_VertexID);", inst); ctx.AddF32("{}=as_type<float>(gl_VertexID);", inst);
break; break;
case IR::Attribute::FrontFace: case IR::Attribute::FrontFace:
ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst); ctx.AddF32("{}=as_type<float>(gl_FrontFacing?-1:0);", inst);
break; break;
case IR::Attribute::BaseInstance: case IR::Attribute::BaseInstance:
ctx.AddF32("{}=itof(gl_BaseInstance);", inst); ctx.AddF32("{}=as_type<float>(gl_BaseInstance);", inst);
break; break;
case IR::Attribute::BaseVertex: case IR::Attribute::BaseVertex:
ctx.AddF32("{}=itof(gl_BaseVertex);", inst); ctx.AddF32("{}=as_type<float>(gl_BaseVertex);", inst);
break; break;
case IR::Attribute::DrawID: case IR::Attribute::DrawID:
ctx.AddF32("{}=itof(gl_DrawID);", inst); ctx.AddF32("{}=as_type<float>(gl_DrawID);", inst);
break; break;
default: default:
throw NotImplementedException("Get attribute {}", attr); throw NotImplementedException("Get attribute {}", attr);
@ -299,7 +297,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
"viewport layer extension"); "viewport layer extension");
break; break;
} }
ctx.Add("gl_Layer=ftoi({});", value); ctx.Add("gl_Layer=as_type<int>({});", value);
break; break;
case IR::Attribute::ViewportIndex: case IR::Attribute::ViewportIndex:
if (ctx.stage != Stage::Geometry && if (ctx.stage != Stage::Geometry &&
@ -308,7 +306,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
"viewport layer extension"); "viewport layer extension");
break; break;
} }
ctx.Add("gl_ViewportIndex=ftoi({});", value); ctx.Add("gl_ViewportIndex=as_type<int>({});", value);
break; break;
case IR::Attribute::ViewportMask: case IR::Attribute::ViewportMask:
if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) { if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) {
@ -317,7 +315,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
"Shader stores viewport mask but device does not support viewport mask extension"); "Shader stores viewport mask but device does not support viewport mask extension");
break; break;
} }
ctx.Add("gl_ViewportMask[0]=ftoi({});", value); ctx.Add("gl_ViewportMask[0]=as_type<int>({});", value);
break; break;
case IR::Attribute::PointSize: case IR::Attribute::PointSize:
ctx.Add("gl_PointSize={};", value); ctx.Add("gl_PointSize={};", value);

View file

@ -256,7 +256,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
const auto texture{Texture(ctx, info, index)}; const auto texture{Texture(ctx, info, index)};
const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; const auto cast{needs_shadow_ext ? "float4" : "float3"};
const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod &&
ctx.stage != Stage::Fragment && needs_shadow_ext}; ctx.stage != Stage::Fragment && needs_shadow_ext};
if (use_grad) { if (use_grad) {
@ -267,7 +267,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
ctx.AddF32("{}=0.0f;", inst); ctx.AddF32("{}=0.0f;", inst);
return; return;
} }
const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; const auto d_cast{info.type == TextureType::ColorArray2D ? "float2" : "float3"};
ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
d_cast, d_cast); d_cast, d_cast);
return; return;
@ -284,7 +284,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
} else { } else {
if (ctx.stage == Stage::Fragment) { if (ctx.stage == Stage::Fragment) {
if (info.type == TextureType::ColorArrayCube) { if (info.type == TextureType::ColorArrayCube) {
ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref); ctx.AddF32("{}=texture({},float4({}),{});", inst, texture, coords, dref);
} else { } else {
ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias); ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias);
} }
@ -311,7 +311,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
const auto texture{Texture(ctx, info, index)}; const auto texture{Texture(ctx, info, index)};
const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext};
const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; const auto cast{needs_shadow_ext ? "float3" : "float3"};
if (use_grad) { if (use_grad) {
LOG_WARNING(Shader_MSL, LOG_WARNING(Shader_MSL,
"Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
@ -320,7 +320,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
ctx.AddF32("{}=0.0f;", inst); ctx.AddF32("{}=0.0f;", inst);
return; return;
} }
const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; const auto d_cast{info.type == TextureType::ColorArray2D ? "float2" : "float3"};
ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
d_cast, d_cast); d_cast, d_cast);
return; return;
@ -671,7 +671,7 @@ void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
throw NotImplementedException("Non-constant texture rescaling"); throw NotImplementedException("Non-constant texture rescaling");
} }
const u32 image_index{index.U32()}; const u32 image_index{index.U32()};
ctx.AddU1("{}=(ftou(scaling.x)&{})!=0;", inst, 1u << image_index); ctx.AddU1("{}=(as_type<uint>(scaling.x)&{})!=0;", inst, 1u << image_index);
} }
void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
@ -679,7 +679,7 @@ void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index)
throw NotImplementedException("Non-constant texture rescaling"); throw NotImplementedException("Non-constant texture rescaling");
} }
const u32 image_index{index.U32()}; const u32 image_index{index.U32()};
ctx.AddU1("{}=(ftou(scaling.y)&{})!=0;", inst, 1u << image_index); ctx.AddU1("{}=(as_type<uint>(scaling.y)&{})!=0;", inst, 1u << image_index);
} }
void EmitBindlessImageSampleImplicitLod(EmitContext&) { void EmitBindlessImageSampleImplicitLod(EmitContext&) {

View file

@ -135,14 +135,14 @@ void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindin
void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) { const IR::Value& offset) {
const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto offset_var{ctx.var_alloc.Consume(offset)};
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name, ctx.AddU32x2("{}=uint2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name,
binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var);
} }
void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) { const IR::Value& offset) {
const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto offset_var{ctx.var_alloc.Consume(offset)};
ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}" ctx.AddU32x4("{}=uint4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}"
"+12)>>2]);", "+12)>>2]);",
inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(),
offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name,

View file

@ -39,11 +39,11 @@ void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset
} }
void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset); ctx.AddU32x2("{}=uint2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset);
} }
void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst, ctx.AddU32x4("{}=uint4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst,
offset, offset, offset, offset); offset, offset, offset, offset);
} }

View file

@ -20,7 +20,7 @@ void InitializeOutputVaryings(EmitContext& ctx) {
} }
ctx.Add("__Output __out;"); ctx.Add("__Output __out;");
if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
ctx.Add("__out.position=vec4(0,0,0,1);"); ctx.Add("__out.position=float4(0,0,0,1);");
} }
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
if (!ctx.info.stores.Generic(index)) { if (!ctx.info.stores.Generic(index)) {
@ -31,7 +31,7 @@ void InitializeOutputVaryings(EmitContext& ctx) {
size_t element{}; size_t element{};
while (element < info_array.size()) { while (element < info_array.size()) {
const auto& info{info_array.at(element)}; const auto& info{info_array.at(element)};
const auto varying_name{fmt::format("__out.{}{}", info.name, output_decorator)}; const auto varying_name{fmt::format("{}{}", info.name, output_decorator)};
switch (info.num_components) { switch (info.num_components) {
case 1: { case 1: {
const char value{element == 3 ? '1' : '0'}; const char value{element == 3 ? '1' : '0'};

View file

@ -54,7 +54,7 @@ std::string_view BallotIndex(EmitContext& ctx) {
std::string GetMask(EmitContext& ctx, std::string_view mask) { std::string GetMask(EmitContext& ctx, std::string_view mask) {
const auto ballot_index{BallotIndex(ctx)}; const auto ballot_index{BallotIndex(ctx)};
return fmt::format("uint(uvec2({}){})", mask, ballot_index); return fmt::format("uint(uint2({}){})", mask, ballot_index);
} }
} // Anonymous namespace } // Anonymous namespace
@ -68,8 +68,8 @@ void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
return; return;
} }
const auto ballot_index{BallotIndex(ctx)}; const auto ballot_index{BallotIndex(ctx)};
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; const auto active_mask{fmt::format("uint2(ballotARB(true)){}", ballot_index)};
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; const auto ballot{fmt::format("uint2(ballotARB({})){}", pred, ballot_index)};
ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
} }
@ -79,8 +79,8 @@ void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
return; return;
} }
const auto ballot_index{BallotIndex(ctx)}; const auto ballot_index{BallotIndex(ctx)};
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; const auto active_mask{fmt::format("uint2(ballotARB(true)){}", ballot_index)};
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; const auto ballot{fmt::format("uint2(ballotARB({})){}", pred, ballot_index)};
ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
} }
@ -90,15 +90,15 @@ void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
return; return;
} }
const auto ballot_index{BallotIndex(ctx)}; const auto ballot_index{BallotIndex(ctx)};
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; const auto active_mask{fmt::format("uint2(ballotARB(true)){}", ballot_index)};
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; const auto ballot{fmt::format("uint2(ballotARB({})){}", pred, ballot_index)};
const auto value{fmt::format("({}^{})", ballot, active_mask)}; const auto value{fmt::format("({}^{})", ballot, active_mask)};
ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
} }
void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
const auto ballot_index{BallotIndex(ctx)}; const auto ballot_index{BallotIndex(ctx)};
ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index); ctx.AddU32("{}=uint2(ballotARB({})){};", inst, pred, ballot_index);
} }
void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {

View file

@ -52,6 +52,7 @@ std::string OutputDecorator(Stage stage, u32 size) {
} }
} }
/*
// TODO // TODO
std::string_view GetTessMode(TessPrimitive primitive) { std::string_view GetTessMode(TessPrimitive primitive) {
switch (primitive) { switch (primitive) {
@ -107,6 +108,7 @@ std::string_view OutputPrimitive(OutputTopology topology) {
} }
throw InvalidArgument("Invalid output topology {}", topology); throw InvalidArgument("Invalid output topology {}", topology);
} }
*/
// TODO // TODO
std::string_view DepthSamplerType(TextureType type) { std::string_view DepthSamplerType(TextureType type) {
@ -233,33 +235,15 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
break; break;
case Stage::TessellationControl: case Stage::TessellationControl:
stage_name = "kernel"; stage_name = "kernel";
header += fmt::format("layout(vertices={})out;", program.invocations);
break; break;
case Stage::TessellationEval: case Stage::TessellationEval:
stage_name = "vertex"; stage_name = "vertex";
header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive),
GetTessSpacing(runtime_info.tess_spacing),
runtime_info.tess_clockwise ? "cw" : "ccw");
break; break;
case Stage::Geometry: case Stage::Geometry:
stage_name = "vertex"; stage_name = "vertex";
header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology));
if (uses_geometry_passthrough) {
header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};";
break;
} else if (program.is_geometry_passthrough &&
!profile.support_geometry_shader_passthrough) {
LOG_WARNING(Shader_MSL, "Passthrough geometry program used but not supported");
}
header += fmt::format(
"layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];",
OutputPrimitive(program.output_topology), program.output_vertices);
break; break;
case Stage::Fragment: case Stage::Fragment:
stage_name = "fragment"; stage_name = "fragment";
if (runtime_info.force_early_z) {
header += "layout(early_fragment_tests)in;";
}
break; break;
case Stage::Compute: case Stage::Compute:
stage_name = "kernel"; stage_name = "kernel";
@ -300,6 +284,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
} }
} }
header += "struct __Output {\n"; header += "struct __Output {\n";
if (stage == Stage::VertexB || stage == Stage::Geometry) {
header += "float4 position [[position]];\n";
}
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
if (info.stores.Generic(index)) { if (info.stores.Generic(index)) {
DefineGenericOutput(index, program.invocations); DefineGenericOutput(index, program.invocations);
@ -332,8 +319,8 @@ bool EmitContext::DefineInputs(Bindings& bindings) {
const u32 cbuf_binding_size{info.uses_global_memory ? 0x1000U : cbuf_used_size}; const u32 cbuf_binding_size{info.uses_global_memory ? 0x1000U : cbuf_used_size};
if (added) if (added)
input_str += ","; input_str += ",";
input_str += fmt::format("constant float4& cbuf{}[{}] [[buffer({})]]", desc.index, input_str += fmt::format("constant float4& {}_cbuf{}[{}] [[buffer({})]]", stage_name,
cbuf_binding_size, bindings.uniform_buffer); desc.index, cbuf_binding_size, bindings.uniform_buffer);
bindings.uniform_buffer += desc.count; bindings.uniform_buffer += desc.count;
added = true; added = true;
} }
@ -346,8 +333,8 @@ bool EmitContext::DefineInputs(Bindings& bindings) {
for (const auto& desc : info.storage_buffers_descriptors) { for (const auto& desc : info.storage_buffers_descriptors) {
if (added) if (added)
input_str += ","; input_str += ",";
input_str += input_str += fmt::format("device uint& {}_ssbo{}[] [[buffer({})]]", stage_name, index,
fmt::format("device uint& ssbo{}[] [[buffer({})]]", index, bindings.storage_buffer); bindings.storage_buffer);
bindings.storage_buffer += desc.count; bindings.storage_buffer += desc.count;
index += desc.count; index += desc.count;
added = true; added = true;
@ -377,8 +364,8 @@ bool EmitContext::DefineInputs(Bindings& bindings) {
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
if (added) if (added)
input_str += ","; input_str += ",";
input_str += fmt::format("{}<{}> img{}{} [[texture({})]]", qualifier, image_type, input_str += fmt::format("{}<{}> {}_img{}{} [[texture({})]]", qualifier, image_type,
bindings.image, array_decorator, bindings.image); stage_name, bindings.image, array_decorator, bindings.image);
bindings.image += desc.count; bindings.image += desc.count;
added = true; added = true;
} }
@ -404,10 +391,10 @@ bool EmitContext::DefineInputs(Bindings& bindings) {
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
if (added) if (added)
input_str += ","; input_str += ",";
input_str += fmt::format("{} tex{}{} [[texture({})]]", texture_type, bindings.texture, input_str += fmt::format("{} {}_tex{}{} [[texture({})]]", texture_type, stage_name,
array_decorator, bindings.texture); bindings.texture, array_decorator, bindings.texture);
input_str += fmt::format(",sampler samp{}{} [[sampler({})]]", bindings.texture, input_str += fmt::format(",sampler {}_samp{}{} [[sampler({})]]", stage_name,
array_decorator, bindings.texture); bindings.texture, array_decorator, bindings.texture);
bindings.texture += desc.count; bindings.texture += desc.count;
added = true; added = true;
} }
@ -417,48 +404,20 @@ bool EmitContext::DefineInputs(Bindings& bindings) {
// TODO // TODO
void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
static constexpr std::string_view swizzle{"xyzw"}; const auto type{fmt::format("float{}", 4)};
const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; std::string name{fmt::format("attr{}", index)};
u32 element{0}; header += fmt::format("{} {}{} [[user(locn{})]];\n", type, name,
while (element < 4) { OutputDecorator(stage, invocations), index);
std::string definition{fmt::format("layout(location={}", index)};
const u32 remainder{4 - element};
const TransformFeedbackVarying* xfb_varying{};
const size_t xfb_varying_index{base_index + element};
if (xfb_varying_index < runtime_info.xfb_count) {
xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index];
xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
}
const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
if (element > 0) {
definition += fmt::format(",component={}", element);
}
if (xfb_varying) {
definition +=
fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer,
xfb_varying->stride, xfb_varying->offset);
}
std::string name{fmt::format("out_attr{}", index)};
if (num_components < 4 || element > 0) {
name += fmt::format("_{}", swizzle.substr(element, num_components));
}
const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)};
definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations));
header += definition;
const GenericElementInfo element_info{ const GenericElementInfo element_info{
.name = name, .name = "__out." + name,
.first_element = element, .first_element = 0,
.num_components = num_components, .num_components = 4,
}; };
std::fill_n(output_generics[index].begin() + element, num_components, element_info); std::fill_n(output_generics[index].begin(), 4, element_info);
element += num_components;
}
} }
void EmitContext::DefineHelperFunctions() { void EmitContext::DefineHelperFunctions() {
header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n"
"#define itof intBitsToFloat\n#define utof uintBitsToFloat\n";
if (info.uses_global_increment || info.uses_shared_increment) { if (info.uses_global_increment || info.uses_shared_increment) {
header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}"; header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}";
} }
@ -468,7 +427,7 @@ void EmitContext::DefineHelperFunctions() {
} }
if (info.uses_atomic_f32_add) { if (info.uses_atomic_f32_add) {
header += "uint CasFloatAdd(uint op_a,float op_b){" header += "uint CasFloatAdd(uint op_a,float op_b){"
"return ftou(utof(op_a)+op_b);}"; "return as_type<uint>(as_type<float>(op_a)+op_b);}";
} }
if (info.uses_atomic_f32x2_add) { if (info.uses_atomic_f32x2_add) {
header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){" header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){"
@ -544,8 +503,10 @@ std::string EmitContext::DefineGlobalMemoryFunctions() {
for (size_t i = 0; i < addr_xy.size(); ++i) { for (size_t i = 0; i < addr_xy.size(); ++i) {
const auto addr_loc{ssbo.cbuf_offset + 4 * i}; const auto addr_loc{ssbo.cbuf_offset + 4 * i};
const auto size_loc{size_cbuf_offset + 4 * i}; const auto size_loc{size_cbuf_offset + 4 * i};
addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); addr_xy[i] =
size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); fmt::format("as_type<uint>({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc));
size_xy[i] =
fmt::format("as_type<uint>({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc));
} }
const u32 ssbo_align_mask{~(static_cast<u32>(profile.min_ssbo_alignment) - 1U)}; const u32 ssbo_align_mask{~(static_cast<u32>(profile.min_ssbo_alignment) - 1U)};
const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)}; const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)};

View file

@ -53,13 +53,13 @@ std::string FormatFloat(std::string_view value, IR::Type type) {
// TODO: Confirm FP64 nan/inf // TODO: Confirm FP64 nan/inf
if (type == IR::Type::F32) { if (type == IR::Type::F32) {
if (value == "nan") { if (value == "nan") {
return "utof(0x7fc00000)"; return "as_type<float>(0x7fc00000)";
} }
if (value == "inf") { if (value == "inf") {
return "utof(0x7f800000)"; return "as_type<float>(0x7f800000)";
} }
if (value == "-inf") { if (value == "-inf") {
return "utof(0xff800000)"; return "as_type<float>(0xff800000)";
} }
} }
if (value.find_first_of('e') != std::string_view::npos) { if (value.find_first_of('e') != std::string_view::npos) {
@ -203,7 +203,7 @@ std::string VarAlloc::GetMslType(MslVarType type) const {
case MslVarType::U1: case MslVarType::U1:
return "bool"; return "bool";
case MslVarType::F16x2: case MslVarType::F16x2:
return "f16vec2"; return "half2";
case MslVarType::U32: case MslVarType::U32:
return "uint"; return "uint";
case MslVarType::F32: case MslVarType::F32:
@ -215,17 +215,17 @@ std::string VarAlloc::GetMslType(MslVarType type) const {
case MslVarType::PrecF64: case MslVarType::PrecF64:
return "double"; return "double";
case MslVarType::U32x2: case MslVarType::U32x2:
return "uvec2"; return "uint2";
case MslVarType::F32x2: case MslVarType::F32x2:
return "vec2"; return "float2";
case MslVarType::U32x3: case MslVarType::U32x3:
return "uvec3"; return "float3";
case MslVarType::F32x3: case MslVarType::F32x3:
return "vec3"; return "float3";
case MslVarType::U32x4: case MslVarType::U32x4:
return "uvec4"; return "uint4";
case MslVarType::F32x4: case MslVarType::F32x4:
return "vec4"; return "float4";
case MslVarType::Void: case MslVarType::Void:
return ""; return "";
default: default:

View file

@ -276,8 +276,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
if (error) { if (error) {
LOG_ERROR(Render_Metal, "failed to create library: {}", LOG_ERROR(Render_Metal, "failed to create library: {}",
error->description()->cString(NS::ASCIIStringEncoding)); error->description()->cString(NS::ASCIIStringEncoding));
// HACK // std::cout << error->description()->cString(NS::ASCIIStringEncoding) << std::endl;
std::cout << error->description()->cString(NS::ASCIIStringEncoding) << std::endl;
// HACK // HACK
throw; throw;
} }