shader_recompiler: emulate 8-bit and 16-bit storage writes with cas loop

This commit is contained in:
Liam 2024-01-11 16:50:59 -05:00
parent 6533dfd7ce
commit 2a0d707ce1
3 changed files with 86 additions and 8 deletions

View file

@ -65,6 +65,14 @@ void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value&
WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32),
&StorageDefinitions::U32, index_offset); &StorageDefinitions::U32, index_offset);
} }
void WriteStorageByCasLoop(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value, Id bit_offset, Id bit_count) {
const Id pointer{StoragePointer(ctx, binding, offset, ctx.storage_types.U32, sizeof(u32),
&StorageDefinitions::U32)};
ctx.OpFunctionCall(ctx.TypeVoid(), ctx.write_storage_cas_loop_func, pointer, value, bit_offset,
bit_count);
}
} // Anonymous namespace } // Anonymous namespace
void EmitLoadGlobalU8(EmitContext&) { void EmitLoadGlobalU8(EmitContext&) {
@ -219,26 +227,42 @@ Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Valu
void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) { Id value) {
if (ctx.profile.support_int8) {
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8,
sizeof(u8), &StorageDefinitions::U8); sizeof(u8), &StorageDefinitions::U8);
} else {
WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u));
}
} }
void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) { Id value) {
if (ctx.profile.support_int8) {
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8,
sizeof(s8), &StorageDefinitions::S8); sizeof(s8), &StorageDefinitions::S8);
} else {
WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u));
}
} }
void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) { Id value) {
if (ctx.profile.support_int16) {
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16,
sizeof(u16), &StorageDefinitions::U16); sizeof(u16), &StorageDefinitions::U16);
} else {
WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u));
}
} }
void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) { Id value) {
if (ctx.profile.support_int16) {
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16,
sizeof(s16), &StorageDefinitions::S16); sizeof(s16), &StorageDefinitions::S16);
} else {
WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u));
}
} }
void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,

View file

@ -480,6 +480,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
DefineTextures(program.info, texture_binding, bindings.texture_scaling_index); DefineTextures(program.info, texture_binding, bindings.texture_scaling_index);
DefineImages(program.info, image_binding, bindings.image_scaling_index); DefineImages(program.info, image_binding, bindings.image_scaling_index);
DefineAttributeMemAccess(program.info); DefineAttributeMemAccess(program.info);
DefineWriteStorageCasLoopFunction(program.info);
DefineGlobalMemoryFunctions(program.info); DefineGlobalMemoryFunctions(program.info);
DefineRescalingInput(program.info); DefineRescalingInput(program.info);
DefineRenderArea(program.info); DefineRenderArea(program.info);
@ -877,6 +878,56 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
} }
} }
void EmitContext::DefineWriteStorageCasLoopFunction(const Info& info) {
if (profile.support_int8 && profile.support_int16) {
return;
}
if (!info.uses_int8 && !info.uses_int16) {
return;
}
AddCapability(spv::Capability::VariablePointersStorageBuffer);
const Id ptr_type{TypePointer(spv::StorageClass::StorageBuffer, U32[1])};
const Id func_type{TypeFunction(void_id, ptr_type, U32[1], U32[1], U32[1])};
const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
const Id pointer{OpFunctionParameter(ptr_type)};
const Id value{OpFunctionParameter(U32[1])};
const Id bit_offset{OpFunctionParameter(U32[1])};
const Id bit_count{OpFunctionParameter(U32[1])};
AddLabel();
const Id scope_device{Const(1u)};
const Id ordering_relaxed{u32_zero_value};
const Id body_label{OpLabel()};
const Id continue_label{OpLabel()};
const Id endloop_label{OpLabel()};
const Id beginloop_label{OpLabel()};
OpBranch(beginloop_label);
AddLabel(beginloop_label);
OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone);
OpBranch(body_label);
AddLabel(body_label);
const Id expected_value{OpLoad(U32[1], pointer)};
const Id desired_value{OpBitFieldInsert(U32[1], expected_value, value, bit_offset, bit_count)};
const Id actual_value{OpAtomicCompareExchange(U32[1], pointer, scope_device, ordering_relaxed,
ordering_relaxed, desired_value, expected_value)};
const Id store_successful{OpIEqual(U1, expected_value, actual_value)};
OpBranchConditional(store_successful, endloop_label, continue_label);
AddLabel(endloop_label);
OpReturn();
AddLabel(continue_label);
OpBranch(beginloop_label);
OpFunctionEnd();
write_storage_cas_loop_func = func;
}
void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
if (!info.uses_global_memory || !profile.support_int64) { if (!info.uses_global_memory || !profile.support_int64) {
return; return;

View file

@ -325,6 +325,8 @@ public:
Id f32x2_min_cas{}; Id f32x2_min_cas{};
Id f32x2_max_cas{}; Id f32x2_max_cas{};
Id write_storage_cas_loop_func{};
Id load_global_func_u32{}; Id load_global_func_u32{};
Id load_global_func_u32x2{}; Id load_global_func_u32x2{};
Id load_global_func_u32x4{}; Id load_global_func_u32x4{};
@ -372,6 +374,7 @@ private:
void DefineTextures(const Info& info, u32& binding, u32& scaling_index); void DefineTextures(const Info& info, u32& binding, u32& scaling_index);
void DefineImages(const Info& info, u32& binding, u32& scaling_index); void DefineImages(const Info& info, u32& binding, u32& scaling_index);
void DefineAttributeMemAccess(const Info& info); void DefineAttributeMemAccess(const Info& info);
void DefineWriteStorageCasLoopFunction(const Info& info);
void DefineGlobalMemoryFunctions(const Info& info); void DefineGlobalMemoryFunctions(const Info& info);
void DefineRescalingInput(const Info& info); void DefineRescalingInput(const Info& info);
void DefineRescalingInputPushConstant(); void DefineRescalingInputPushConstant();