From e315f647d69cbaf35cede70074f52525a7a2a4b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Fri, 12 Apr 2024 21:21:14 -0700 Subject: [PATCH] Updated spirv-cross. --- 3rdparty/spirv-cross/spirv_common.hpp | 5 + 3rdparty/spirv-cross/spirv_cross.cpp | 51 +++- 3rdparty/spirv-cross/spirv_glsl.cpp | 12 +- 3rdparty/spirv-cross/spirv_msl.cpp | 407 +++++++++++++++++--------- 3rdparty/spirv-cross/spirv_msl.hpp | 9 +- 5 files changed, 333 insertions(+), 151 deletions(-) diff --git a/3rdparty/spirv-cross/spirv_common.hpp b/3rdparty/spirv-cross/spirv_common.hpp index 51fffe7a1..93b266977 100644 --- a/3rdparty/spirv-cross/spirv_common.hpp +++ b/3rdparty/spirv-cross/spirv_common.hpp @@ -1121,6 +1121,9 @@ struct SPIRVariable : IVariant // Set to true while we're inside the for loop. bool loop_variable_enable = false; + // Used to find global LUTs + bool is_written_to = false; + SPIRFunction::Parameter *parameter = nullptr; SPIRV_CROSS_DECLARE_CLONE(SPIRVariable) @@ -1668,6 +1671,8 @@ enum ExtendedDecorations // lack of constructors in the 'threadgroup' address space. SPIRVCrossDecorationWorkgroupStruct, + SPIRVCrossDecorationOverlappingBinding, + SPIRVCrossDecorationCount }; diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index 3f0fec334..244c40a7a 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -1227,7 +1227,7 @@ const SPIRType &Compiler::get_pointee_type(uint32_t type_id) const uint32_t Compiler::get_variable_data_type_id(const SPIRVariable &var) const { - if (var.phi_variable) + if (var.phi_variable || var.storage == spv::StorageClass::StorageClassAtomicCounter) return var.basetype; return get_pointee_type_id(var.basetype); } @@ -3335,13 +3335,11 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle_terminator(const SPIRBl bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) { // Keep track of the types of temporaries, so we can hoist them out as necessary. - uint32_t result_type, result_id; + uint32_t result_type = 0, result_id = 0; if (compiler.instruction_to_result_type(result_type, result_id, op, args, length)) { // For some opcodes, we will need to override the result id. // If we need to hoist the temporary, the temporary type is the input, not the result. - // FIXME: This will likely break with OpCopyObject + hoisting, but we'll have to - // solve it if we ever get there ... if (op == OpConvertUToAccelerationStructureKHR) { auto itr = result_id_to_type.find(args[2]); @@ -3450,6 +3448,13 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 case OpCopyObject: { + // OpCopyObject copies the underlying non-pointer type, + // so any temp variable should be declared using the underlying type. + // If the type is a pointer, get its base type and overwrite the result type mapping. + auto &type = compiler.get(result_type); + if (type.pointer) + result_id_to_type[result_id] = type.parent_type; + if (length < 3) return false; @@ -3731,6 +3736,14 @@ void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariab auto &var = get(accessed_var.first); auto &type = expression_type(accessed_var.first); + // First check if there are writes to the variable. Later, if there are none, we'll + // reconsider it as globally accessed LUT. + if (!var.is_written_to) + { + var.is_written_to = handler.complete_write_variables_to_block.count(var.self) != 0 || + handler.partial_write_variables_to_block.count(var.self) != 0; + } + // Only consider function local variables here. // If we only have a single function in our CFG, private storage is also fine, // since it behaves like a function local variable. @@ -3755,8 +3768,7 @@ void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariab static_constant_expression = var.initializer; // There can be no stores to this variable, we have now proved we have a LUT. - if (handler.complete_write_variables_to_block.count(var.self) != 0 || - handler.partial_write_variables_to_block.count(var.self) != 0) + if (var.is_written_to) continue; } else @@ -4423,11 +4435,9 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args for (uint32_t i = 0; i < count; i++) { // Pointers + // PtrAccessChain functions more like a pointer offset. Type remains the same. if (opcode == OpPtrAccessChain && i == 0) - { - type = &compiler.get(type->parent_type); continue; - } // Arrays if (!type->array.empty()) @@ -4615,6 +4625,29 @@ void Compiler::build_function_control_flow_graphs_and_analyze() } } } + + // Find LUTs which are not function local. Only consider this case if the CFG is multi-function, + // otherwise we treat Private as Function trivially. + // Needs to be analyzed from the outside since we have to block the LUT optimization if at least + // one function writes to it. + if (!single_function) + { + for (auto &id : global_variables) + { + auto &var = get(id); + auto &type = get_variable_data_type(var); + + if (is_array(type) && var.storage == StorageClassPrivate && + var.initializer && !var.is_written_to && + ir.ids[var.initializer].get_type() == TypeConstant) + { + get(var.initializer).is_used_as_lut = true; + var.static_expression = var.initializer; + var.statically_assigned = true; + var.remapped_variable = true; + } + } + } } Compiler::CFGBuilder::CFGBuilder(Compiler &compiler_) diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index 0d37ba2e8..f371bca6f 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -2568,7 +2568,7 @@ const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) return var.storage == StorageClassInput ? "in " : "out "; } else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || - var.storage == StorageClassPushConstant) + var.storage == StorageClassPushConstant || var.storage == StorageClassAtomicCounter) { return "uniform "; } @@ -16747,8 +16747,11 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries); + bool flushes_phi = flush_phi_required(block.self, block.true_block) || + flush_phi_required(block.self, block.false_block); + // This can work! We only did trivial things which could be forwarded in block body! - if (current_count == statement_count && condition_is_temporary) + if (!flushes_phi && current_count == statement_count && condition_is_temporary) { switch (continue_type) { @@ -16827,7 +16830,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); - if (current_count == statement_count && condition_is_temporary) + bool flushes_phi = flush_phi_required(child.self, child.true_block) || + flush_phi_required(child.self, child.false_block); + + if (!flushes_phi && current_count == statement_count && condition_is_temporary) { uint32_t target_block = child.true_block; diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index e73751d7f..013ef2301 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -1373,6 +1373,7 @@ void CompilerMSL::emit_entry_point_declarations() const auto &type = get_variable_data_type(var); const auto &buffer_type = get_variable_element_type(var); const string name = to_name(var.self); + if (is_var_runtime_size_array(var)) { if (msl_options.argument_buffers_tier < Options::ArgumentBuffersTier::Tier2) @@ -1391,10 +1392,10 @@ void CompilerMSL::emit_entry_point_declarations() case SPIRType::Image: case SPIRType::Sampler: case SPIRType::AccelerationStructure: - statement("spvDescriptorArray<", type_to_glsl(buffer_type), "> ", name, " {", resource_name, "};"); + statement("spvDescriptorArray<", type_to_glsl(buffer_type, var.self), "> ", name, " {", resource_name, "};"); break; case SPIRType::SampledImage: - statement("spvDescriptorArray<", type_to_glsl(buffer_type), "> ", name, " {", resource_name, "};"); + statement("spvDescriptorArray<", type_to_glsl(buffer_type, var.self), "> ", name, " {", resource_name, "};"); // Unsupported with argument buffer for now. statement("spvDescriptorArray ", name, "Smplr {", name, "Smplr_};"); break; @@ -2377,7 +2378,9 @@ uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t comp if (basetype != SPIRType::Unknown) type->basetype = basetype; type->self = new_type_id; - type->parent_type = type_id; + // We want parent type to point to the scalar type. + type->parent_type = is_scalar(*p_old_type) ? TypeID(p_old_type->self) : p_old_type->parent_type; + assert(is_scalar(get(type->parent_type))); type->array.clear(); type->array_size_literal.clear(); type->pointer = false; @@ -4450,13 +4453,13 @@ uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn buil ((builtin == BuiltInLayer || builtin == BuiltInViewportIndex || builtin == BuiltInFragStencilRefEXT) && pointee_type.basetype != SPIRType::UInt)) { - uint32_t next_id = ir.increase_bound_by(type_is_pointer(type) ? 2 : 1); + uint32_t next_id = ir.increase_bound_by(is_pointer(type) ? 2 : 1); uint32_t base_type_id = next_id++; auto &base_type = set(base_type_id, OpTypeInt); base_type.basetype = SPIRType::UInt; base_type.width = 32; - if (!type_is_pointer(type)) + if (!is_pointer(type)) return base_type_id; uint32_t ptr_type_id = next_id++; @@ -5338,6 +5341,8 @@ void CompilerMSL::emit_header() // This particular line can be overridden during compilation, so make it a flag and not a pragma line. if (suppress_missing_prototypes) statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\""); + if (suppress_incompatible_pointer_types_discard_qualifiers) + statement("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\""); // Disable warning about missing braces for array template to make arrays a value type if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0) @@ -7487,6 +7492,12 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplImageFence: + statement("template "); + statement("void spvImageFence(ImageT img) { img.fence(); }"); + statement(""); + break; + default: break; } @@ -8963,7 +8974,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) // Metal requires explicit fences to break up RAW hazards, even within the same shader invocation if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable)) - statement(to_expression(img_id), ".fence();"); + { + add_spv_func_and_recompile(SPVFuncImplImageFence); + // Need to wrap this with a value type, + // since the Metal headers are broken and do not consider case when the image is a reference. + statement("spvImageFence(", to_expression(img_id), ");"); + } emit_texture_op(instruction, false); break; @@ -10127,7 +10143,8 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, { string exp; - auto &type = get_pointee_type(expression_type(obj)); + auto &ptr_type = expression_type(obj); + auto &type = get_pointee_type(ptr_type); auto expected_type = type.basetype; if (opcode == OpAtomicUMax || opcode == OpAtomicUMin) expected_type = to_unsigned_basetype(type.width); @@ -10147,15 +10164,13 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, remapped_type.basetype = expected_type; auto *var = maybe_get_backing_variable(obj); - if (!var) - SPIRV_CROSS_THROW("No backing variable for atomic operation."); - const auto &res_type = get(var->basetype); + const auto *res_type = var ? &get(var->basetype) : nullptr; + assert(type.storage != StorageClassImage || res_type); bool is_atomic_compare_exchange_strong = op1_is_pointer && op1; bool check_discard = opcode != OpAtomicLoad && needs_frag_discard_checks() && - ((res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image) || - var->storage == StorageClassStorageBuffer || var->storage == StorageClassUniform); + ptr_type.storage != StorageClassWorkgroup; // Even compare exchange atomics are vec4 on metal for ... reasons :v uint32_t vec4_temporary_id = 0; @@ -10196,26 +10211,58 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, // Will only be false if we're in "force recompile later" mode. if (split_index != string::npos) - exp += join(obj_expression.substr(0, split_index), ".", op, "(", obj_expression.substr(split_index + 1)); + { + auto coord = obj_expression.substr(split_index + 1); + exp += join(obj_expression.substr(0, split_index), ".", op, "("); + if (ptr_type.storage == StorageClassImage && res_type->image.arrayed) + { + switch (res_type->image.dim) + { + case Dim1D: + if (msl_options.texture_1D_as_2D) + exp += join("uint2(", coord, ".x, 0), ", coord, ".y"); + else + exp += join(coord, ".x, ", coord, ".y"); + + break; + case Dim2D: + exp += join(coord, ".xy, ", coord, ".z"); + break; + default: + SPIRV_CROSS_THROW("Cannot do atomics on Cube textures."); + } + } + else if (ptr_type.storage == StorageClassImage && res_type->image.dim == Dim1D && msl_options.texture_1D_as_2D) + exp += join("uint2(", coord, ", 0)"); + else + exp += coord; + } else + { exp += obj_expression; + } } else { exp += string(op) + "_explicit("; exp += "("; // Emulate texture2D atomic operations - if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image) + if (ptr_type.storage == StorageClassImage) { auto &flags = ir.get_decoration_bitset(var->self); if (decoration_flags_signal_volatile(flags)) exp += "volatile "; exp += "device"; } - else + else if (var && ptr_type.storage != StorageClassPhysicalStorageBuffer) { exp += get_argument_address_space(*var); } + else + { + // Fallback scenario, could happen for raw pointers. + exp += ptr_type.storage == StorageClassWorkgroup ? "threadgroup" : "device"; + } exp += " atomic_"; // For signed and unsigned min/max, we can signal this through the pointer type. @@ -12260,7 +12307,11 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_ else decl_type = type_to_glsl(*declared_type, orig_id, true); - auto result = join(pack_pfx, decl_type, " ", qualifier, + const char *overlapping_binding_tag = + has_extended_member_decoration(type.self, index, SPIRVCrossDecorationOverlappingBinding) ? + "// Overlapping binding: " : ""; + + auto result = join(overlapping_binding_tag, pack_pfx, decl_type, " ", qualifier, to_member_name(type, index), member_attribute_qualifier(type, index), array_type, ";"); is_using_builtin_array = false; @@ -13451,7 +13502,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) struct Resource { SPIRVariable *var; - SPIRVariable *descriptor_alias; + SPIRVariable *discrete_descriptor_alias; string name; SPIRType::BaseType basetype; uint32_t index; @@ -13486,9 +13537,12 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) } } - // Handle descriptor aliasing. We can handle aliasing of buffers by casting pointers, - // but not for typed resources. - SPIRVariable *descriptor_alias = nullptr; + // Handle descriptor aliasing of simple discrete cases. + // We can handle aliasing of buffers by casting pointers. + // The amount of aliasing we can perform for discrete descriptors is very limited. + // For fully mutable-style aliasing, we need argument buffers where we can exploit the fact + // that descriptors are all 8 bytes. + SPIRVariable *discrete_descriptor_alias = nullptr; if (var.storage == StorageClassUniform || var.storage == StorageClassStorageBuffer) { for (auto &resource : resources) @@ -13501,10 +13555,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) (resource.var->storage == StorageClassUniform || resource.var->storage == StorageClassStorageBuffer)) { - descriptor_alias = resource.var; + discrete_descriptor_alias = resource.var; // Self-reference marks that we should declare the resource, // and it's being used as an alias (so we can emit void* instead). - resource.descriptor_alias = resource.var; + resource.discrete_descriptor_alias = resource.var; // Need to promote interlocked usage so that the primary declaration is correct. if (interlocked_resources.count(var_id)) interlocked_resources.insert(resource.var->self); @@ -13541,13 +13595,13 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) entry_point_bindings.push_back(&var); for (uint32_t i = 0; i < plane_count; i++) - resources.push_back({ &var, descriptor_alias, to_name(var_id), SPIRType::Image, - get_metal_resource_index(var, SPIRType::Image, i), i, secondary_index }); + resources.push_back({&var, discrete_descriptor_alias, to_name(var_id), SPIRType::Image, + get_metal_resource_index(var, SPIRType::Image, i), i, secondary_index }); if (type.image.dim != DimBuffer && !constexpr_sampler) { - resources.push_back({ &var, descriptor_alias, to_sampler_expression(var_id), SPIRType::Sampler, - get_metal_resource_index(var, SPIRType::Sampler), 0, 0 }); + resources.push_back({&var, discrete_descriptor_alias, to_sampler_expression(var_id), SPIRType::Sampler, + get_metal_resource_index(var, SPIRType::Sampler), 0, 0 }); } } else if (!constexpr_sampler) @@ -13557,12 +13611,12 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) // Don't allocate resource indices for aliases. uint32_t resource_index = ~0u; - if (!descriptor_alias) + if (!discrete_descriptor_alias) resource_index = get_metal_resource_index(var, type.basetype); entry_point_bindings.push_back(&var); - resources.push_back({ &var, descriptor_alias, to_name(var_id), type.basetype, - resource_index, 0, secondary_index }); + resources.push_back({&var, discrete_descriptor_alias, to_name(var_id), type.basetype, + resource_index, 0, secondary_index }); } } }); @@ -13586,9 +13640,9 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) if (m.members.size() == 0) break; - if (r.descriptor_alias) + if (r.discrete_descriptor_alias) { - if (r.var == r.descriptor_alias) + if (r.var == r.discrete_descriptor_alias) { auto primary_name = join("spvBufferAliasSet", get_decoration(var_id, DecorationDescriptorSet), @@ -14499,24 +14553,6 @@ bool CompilerMSL::type_is_msl_framebuffer_fetch(const SPIRType &type) const msl_options.use_framebuffer_fetch_subpasses; } -bool CompilerMSL::type_is_pointer(const SPIRType &type) const -{ - if (!type.pointer) - return false; - auto &parent_type = get(type.parent_type); - // Safeguards when we forget to set pointer_depth (there is an assert for it in type_to_glsl), - // but the extra check shouldn't hurt. - return (type.pointer_depth > parent_type.pointer_depth) || !parent_type.pointer; -} - -bool CompilerMSL::type_is_pointer_to_pointer(const SPIRType &type) const -{ - if (!type.pointer) - return false; - auto &parent_type = get(type.parent_type); - return type.pointer_depth > parent_type.pointer_depth && type_is_pointer(parent_type); -} - const char *CompilerMSL::descriptor_address_space(uint32_t id, StorageClass storage, const char *plain_address_space) const { if (msl_options.argument_buffers) @@ -14646,7 +14682,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) else { // The type is a pointer type we need to emit cv_qualifier late. - if (type_is_pointer(type)) + if (is_pointer(type)) { decl = type_to_glsl(type, arg.id); if (*cv_qualifier != '\0') @@ -14760,7 +14796,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) // for the reference has to go before the '&', but after the '*'. if (!address_space.empty()) { - if (type_is_pointer(type)) + if (is_pointer(type)) { if (*cv_qualifier == '\0') decl += ' '; @@ -15277,13 +15313,13 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id, bool member) // We could always go this route, but it makes the code unnatural. // Prefer emitting thread T *foo over T thread* foo since it's more readable, // but we'll have to emit thread T * thread * T constant bar; for example. - if (type_is_pointer_to_pointer(type)) + if (is_pointer(type) && is_pointer(*p_parent_type)) type_name = join(type_to_glsl(*p_parent_type, id), " ", type_address_space, " "); else { // Since this is not a pointer-to-pointer, ensure we've dug down to the base type. // Some situations chain pointers even though they are not formally pointers-of-pointers. - while (type_is_pointer(*p_parent_type)) + while (is_pointer(*p_parent_type)) p_parent_type = &get(p_parent_type->parent_type); // If we're emitting BDA, just use the templated type. @@ -16830,7 +16866,7 @@ uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_p // stopping when we hit a pointer that is not also an array. int32_t dim_idx = (int32_t)type.array.size() - 1; auto *p_type = &type; - while (!type_is_pointer(*p_type) && dim_idx >= 0) + while (!is_pointer(*p_type) && dim_idx >= 0) { type_size *= to_array_size_literal(*p_type, dim_idx); p_type = &get(p_type->parent_type); @@ -17836,6 +17872,101 @@ bool CompilerMSL::is_supported_argument_buffer_type(const SPIRType &type) const return is_supported_type && !type_is_msl_framebuffer_fetch(type); } +void CompilerMSL::emit_argument_buffer_aliased_descriptor(const SPIRVariable &aliased_var, + const SPIRVariable &base_var) +{ + // To deal with buffer <-> image aliasing, we need to perform an unholy UB ritual. + // A texture type in Metal 3.0 is a pointer. However, we cannot simply cast a pointer to texture. + // What we *can* do is to cast pointer-to-pointer to pointer-to-texture. + + // We need to explicitly reach into the descriptor buffer lvalue, not any spvDescriptorArray wrapper. + auto *var_meta = ir.find_meta(base_var.self); + bool old_explicit_qualifier = var_meta && var_meta->decoration.qualified_alias_explicit_override; + if (var_meta) + var_meta->decoration.qualified_alias_explicit_override = false; + auto unqualified_name = to_name(base_var.self, false); + if (var_meta) + var_meta->decoration.qualified_alias_explicit_override = old_explicit_qualifier; + + // For non-arrayed buffers, we have already performed a de-reference. + // We need a proper lvalue to cast, so strip away the de-reference. + if (unqualified_name.size() > 2 && unqualified_name[0] == '(' && unqualified_name[1] == '*') + { + unqualified_name.erase(unqualified_name.begin(), unqualified_name.begin() + 2); + unqualified_name.pop_back(); + } + + string name; + + auto &var_type = get(aliased_var.basetype); + auto &data_type = get_variable_data_type(aliased_var); + string descriptor_storage = descriptor_address_space(aliased_var.self, aliased_var.storage, ""); + + if (aliased_var.storage == StorageClassUniformConstant) + { + if (is_var_runtime_size_array(aliased_var)) + { + // This becomes a plain pointer to spvDescriptor. + name = join("reinterpret_cast<", descriptor_storage, " ", + type_to_glsl(get_variable_data_type(aliased_var), aliased_var.self, true), ">(&", + unqualified_name, ")"); + } + else + { + name = join("reinterpret_cast<", descriptor_storage, " ", + type_to_glsl(get_variable_data_type(aliased_var), aliased_var.self, true), " &>(", + unqualified_name, ");"); + } + } + else + { + // Buffer types. + bool old_is_using_builtin_array = is_using_builtin_array; + is_using_builtin_array = true; + + bool needs_post_cast_deref = !is_array(data_type); + string ref_type = needs_post_cast_deref ? "&" : join("(&)", type_to_array_glsl(var_type)); + + if (is_var_runtime_size_array(aliased_var)) + { + name = join("reinterpret_cast<", + type_to_glsl(var_type, aliased_var.self, true), " ", descriptor_storage, " *>(&", + unqualified_name, ")"); + } + else + { + name = join(needs_post_cast_deref ? "*" : "", "reinterpret_cast<", + type_to_glsl(var_type, aliased_var.self, true), " ", descriptor_storage, " ", + ref_type, + ">(", unqualified_name, ");"); + } + + if (needs_post_cast_deref) + descriptor_storage = get_type_address_space(var_type, aliased_var.self, false); + + // These kinds of ridiculous casts trigger warnings in compiler. Just ignore them. + if (!suppress_incompatible_pointer_types_discard_qualifiers) + { + suppress_incompatible_pointer_types_discard_qualifiers = true; + force_recompile_guarantee_forward_progress(); + } + + is_using_builtin_array = old_is_using_builtin_array; + } + + if (!is_var_runtime_size_array(aliased_var)) + { + // Lower to temporary, so drop the qualification. + set_qualified_name(aliased_var.self, ""); + statement(descriptor_storage, " auto &", to_name(aliased_var.self), " = ", name); + } + else + { + // This will get wrapped in a separate temporary when a spvDescriptorArray wrapper is emitted. + set_qualified_name(aliased_var.self, name); + } +} + void CompilerMSL::analyze_argument_buffers() { // Gather all used resources and sort them out into argument buffers. @@ -17852,11 +17983,11 @@ void CompilerMSL::analyze_argument_buffers() struct Resource { SPIRVariable *var; - SPIRVariable *descriptor_alias; string name; SPIRType::BaseType basetype; uint32_t index; uint32_t plane; + uint32_t overlapping_var_id; }; SmallVector resources_in_set[kMaxArgumentBuffers]; SmallVector inline_block_vars; @@ -17892,32 +18023,6 @@ void CompilerMSL::analyze_argument_buffers() } } - // Handle descriptor aliasing as well as we can. - // We can handle aliasing of buffers by casting pointers, but not for typed resources. - // Inline UBOs cannot be handled since it's not a pointer, but inline data. - SPIRVariable *descriptor_alias = nullptr; - if (var.storage == StorageClassUniform || var.storage == StorageClassStorageBuffer) - { - for (auto &resource : resources_in_set[desc_set]) - { - if (get_decoration(resource.var->self, DecorationBinding) == - get_decoration(var_id, DecorationBinding) && - resource.basetype == SPIRType::Struct && type.basetype == SPIRType::Struct && - (resource.var->storage == StorageClassUniform || - resource.var->storage == StorageClassStorageBuffer)) - { - descriptor_alias = resource.var; - // Self-reference marks that we should declare the resource, - // and it's being used as an alias (so we can emit void* instead). - resource.descriptor_alias = resource.var; - // Need to promote interlocked usage so that the primary declaration is correct. - if (interlocked_resources.count(var_id)) - interlocked_resources.insert(resource.var->self); - break; - } - } - } - uint32_t binding = get_decoration(var_id, DecorationBinding); if (type.basetype == SPIRType::SampledImage) { @@ -17931,14 +18036,14 @@ void CompilerMSL::analyze_argument_buffers() { uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image, i); resources_in_set[desc_set].push_back( - { &var, descriptor_alias, to_name(var_id), SPIRType::Image, image_resource_index, i }); + { &var, to_name(var_id), SPIRType::Image, image_resource_index, i, 0 }); } if (type.image.dim != DimBuffer && !constexpr_sampler) { uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler); resources_in_set[desc_set].push_back( - { &var, descriptor_alias, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0 }); + { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0, 0 }); } } else if (inline_uniform_blocks.count(SetBindingPair{ desc_set, binding })) @@ -17951,19 +18056,17 @@ void CompilerMSL::analyze_argument_buffers() // Inline uniform blocks are always emitted at the end. add_resource_name(var_id); - uint32_t resource_index = ~0u; - if (!descriptor_alias) - resource_index = get_metal_resource_index(var, type.basetype); + uint32_t resource_index = get_metal_resource_index(var, type.basetype); resources_in_set[desc_set].push_back( - { &var, descriptor_alias, to_name(var_id), type.basetype, resource_index, 0 }); + { &var, to_name(var_id), type.basetype, resource_index, 0, 0 }); // Emulate texture2D atomic operations if (atomic_image_vars_emulated.count(var.self)) { uint32_t buffer_resource_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0); resources_in_set[desc_set].push_back( - { &var, descriptor_alias, to_name(var_id) + "_atomic", SPIRType::Struct, buffer_resource_index, 0 }); + { &var, to_name(var_id) + "_atomic", SPIRType::Struct, buffer_resource_index, 0, 0 }); } } @@ -18011,7 +18114,7 @@ void CompilerMSL::analyze_argument_buffers() set_decoration(var_id, DecorationDescriptorSet, desc_set); set_decoration(var_id, DecorationBinding, kSwizzleBufferBinding); resources_in_set[desc_set].push_back( - { &var, nullptr, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 }); + { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0, 0 }); } if (set_needs_buffer_sizes[desc_set]) @@ -18022,7 +18125,7 @@ void CompilerMSL::analyze_argument_buffers() set_decoration(var_id, DecorationDescriptorSet, desc_set); set_decoration(var_id, DecorationBinding, kBufferSizeBufferBinding); resources_in_set[desc_set].push_back( - { &var, nullptr, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 }); + { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0, 0 }); } } } @@ -18034,7 +18137,7 @@ void CompilerMSL::analyze_argument_buffers() uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); add_resource_name(var_id); resources_in_set[desc_set].push_back( - { &var, nullptr, to_name(var_id), SPIRType::Struct, get_metal_resource_index(var, SPIRType::Struct), 0 }); + { &var, to_name(var_id), SPIRType::Struct, get_metal_resource_index(var, SPIRType::Struct), 0, 0 }); } for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++) @@ -18083,6 +18186,22 @@ void CompilerMSL::analyze_argument_buffers() return tie(lhs.index, lhs.basetype) < tie(rhs.index, rhs.basetype); }); + for (size_t i = 0; i < resources.size() - 1; i++) + { + auto &r1 = resources[i]; + auto &r2 = resources[i + 1]; + + if (r1.index == r2.index) + { + if (r1.overlapping_var_id) + r2.overlapping_var_id = r1.overlapping_var_id; + else + r2.overlapping_var_id = r1.var->self; + + set_extended_decoration(r2.var->self, SPIRVCrossDecorationOverlappingBinding, r2.overlapping_var_id); + } + } + uint32_t member_index = 0; uint32_t next_arg_buff_index = 0; for (auto &resource : resources) @@ -18098,43 +18217,40 @@ void CompilerMSL::analyze_argument_buffers() if (msl_options.pad_argument_buffer_resources) { auto &rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index); - if (!resource.descriptor_alias) + while (resource.index > next_arg_buff_index) { - while (resource.index > next_arg_buff_index) + switch (rez_bind.basetype) { - switch (rez_bind.basetype) - { - case SPIRType::Void: - case SPIRType::Boolean: - case SPIRType::SByte: - case SPIRType::UByte: - case SPIRType::Short: - case SPIRType::UShort: - case SPIRType::Int: - case SPIRType::UInt: - case SPIRType::Int64: - case SPIRType::UInt64: - case SPIRType::AtomicCounter: - case SPIRType::Half: - case SPIRType::Float: - case SPIRType::Double: - add_argument_buffer_padding_buffer_type(buffer_type, member_index, next_arg_buff_index, rez_bind); - break; - case SPIRType::Image: - add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind); - break; - case SPIRType::Sampler: + case SPIRType::Void: + case SPIRType::Boolean: + case SPIRType::SByte: + case SPIRType::UByte: + case SPIRType::Short: + case SPIRType::UShort: + case SPIRType::Int: + case SPIRType::UInt: + case SPIRType::Int64: + case SPIRType::UInt64: + case SPIRType::AtomicCounter: + case SPIRType::Half: + case SPIRType::Float: + case SPIRType::Double: + add_argument_buffer_padding_buffer_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + case SPIRType::Image: + add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + case SPIRType::Sampler: + add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + case SPIRType::SampledImage: + if (next_arg_buff_index == rez_bind.msl_sampler) add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind); - break; - case SPIRType::SampledImage: - if (next_arg_buff_index == rez_bind.msl_sampler) - add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind); - else - add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind); - break; - default: - break; - } + else + add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind); + break; + default: + break; } } @@ -18182,23 +18298,29 @@ void CompilerMSL::analyze_argument_buffers() { // Drop pointer information when we emit the resources into a struct. buffer_type.member_types.push_back(get_variable_data_type_id(var)); - if (resource.plane == 0) + if (has_extended_decoration(var.self, SPIRVCrossDecorationOverlappingBinding)) + { + if (!msl_options.supports_msl_version(3, 0)) + SPIRV_CROSS_THROW("Full mutable aliasing of argument buffer descriptors only works on Metal 3+."); + + auto &entry_func = get(ir.default_entry_point); + entry_func.fixup_hooks_in.push_back([this, resource]() { + emit_argument_buffer_aliased_descriptor(*resource.var, this->get(resource.overlapping_var_id)); + }); + } + else if (resource.plane == 0) + { set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); + } } else if (buffers_requiring_dynamic_offset.count(pair)) { - if (resource.descriptor_alias) - SPIRV_CROSS_THROW("Descriptor aliasing is currently not supported with dynamic offsets."); - // Don't set the qualified name here; we'll define a variable holding the corrected buffer address later. buffer_type.member_types.push_back(var.basetype); buffers_requiring_dynamic_offset[pair].second = var.self; } else if (inline_uniform_blocks.count(pair)) { - if (resource.descriptor_alias) - SPIRV_CROSS_THROW("Descriptor aliasing is currently not supported with inline UBOs."); - // Put the buffer block itself into the argument buffer. buffer_type.member_types.push_back(get_variable_data_type_id(var)); set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); @@ -18231,11 +18353,22 @@ void CompilerMSL::analyze_argument_buffers() } else { - if (!resource.descriptor_alias || resource.descriptor_alias == resource.var) - buffer_type.member_types.push_back(var.basetype); + buffer_type.member_types.push_back(var.basetype); + if (has_extended_decoration(var.self, SPIRVCrossDecorationOverlappingBinding)) + { + // Casting raw pointers is fine since their ABI is fixed, but anything opaque is deeply questionable on Metal 2. + if (get(resource.overlapping_var_id).storage == StorageClassUniformConstant && + !msl_options.supports_msl_version(3, 0)) + { + SPIRV_CROSS_THROW("Full mutable aliasing of argument buffer descriptors only works on Metal 3+."); + } - if (resource.descriptor_alias && resource.descriptor_alias != resource.var) - buffer_aliases_argument.push_back({ var.self, resource.descriptor_alias->self }); + auto &entry_func = get(ir.default_entry_point); + + entry_func.fixup_hooks_in.push_back([this, resource]() { + emit_argument_buffer_aliased_descriptor(*resource.var, this->get(resource.overlapping_var_id)); + }); + } else if (type.array.empty()) set_qualified_name(var.self, join("(*", to_name(buffer_variable_id), ".", mbr_name, ")")); else @@ -18247,6 +18380,8 @@ void CompilerMSL::analyze_argument_buffers() resource.index); set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID, var.self); + if (has_extended_decoration(var.self, SPIRVCrossDecorationOverlappingBinding)) + set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationOverlappingBinding); member_index++; } } diff --git a/3rdparty/spirv-cross/spirv_msl.hpp b/3rdparty/spirv-cross/spirv_msl.hpp index ed9180972..41c14e28c 100644 --- a/3rdparty/spirv-cross/spirv_msl.hpp +++ b/3rdparty/spirv-cross/spirv_msl.hpp @@ -824,7 +824,8 @@ protected: SPVFuncImplVariableSizedDescriptor, SPVFuncImplVariableDescriptorArray, SPVFuncImplPaddedStd140, - SPVFuncImplReduceAdd + SPVFuncImplReduceAdd, + SPVFuncImplImageFence }; // If the underlying resource has been used for comparison then duplicate loads of that resource must be too @@ -1225,6 +1226,9 @@ protected: uint32_t argument_buffer_discrete_mask = 0; uint32_t argument_buffer_device_storage_mask = 0; + void emit_argument_buffer_aliased_descriptor(const SPIRVariable &aliased_var, + const SPIRVariable &base_var); + void analyze_argument_buffers(); bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx); @@ -1239,14 +1243,13 @@ protected: uint32_t build_msl_interpolant_type(uint32_t type_id, bool is_noperspective); bool suppress_missing_prototypes = false; + bool suppress_incompatible_pointer_types_discard_qualifiers = false; void add_spv_func_and_recompile(SPVFuncImpl spv_func); void activate_argument_buffer_resources(); bool type_is_msl_framebuffer_fetch(const SPIRType &type) const; - bool type_is_pointer(const SPIRType &type) const; - bool type_is_pointer_to_pointer(const SPIRType &type) const; bool is_supported_argument_buffer_type(const SPIRType &type) const; bool variable_storage_requires_stage_io(spv::StorageClass storage) const;