From ad4a3e095bfc1e95184e8399e8ee6faf6c035849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Mon, 27 May 2024 21:10:01 -0700 Subject: [PATCH] Updated spirv-cross. --- 3rdparty/spirv-cross/main.cpp | 19 + 3rdparty/spirv-cross/spirv_cpp.cpp | 10 +- 3rdparty/spirv-cross/spirv_cross.cpp | 106 +++- 3rdparty/spirv-cross/spirv_cross.hpp | 2 + 3rdparty/spirv-cross/spirv_cross_c.cpp | 4 + 3rdparty/spirv-cross/spirv_cross_c.h | 3 +- .../spirv-cross/spirv_cross_parsed_ir.hpp | 2 +- 3rdparty/spirv-cross/spirv_glsl.cpp | 231 +++++++- 3rdparty/spirv-cross/spirv_glsl.hpp | 15 +- 3rdparty/spirv-cross/spirv_hlsl.cpp | 34 +- 3rdparty/spirv-cross/spirv_msl.cpp | 518 ++++++++++++------ 3rdparty/spirv-cross/spirv_msl.hpp | 48 +- 12 files changed, 755 insertions(+), 237 deletions(-) diff --git a/3rdparty/spirv-cross/main.cpp b/3rdparty/spirv-cross/main.cpp index 7e14f1102..6361fc8ab 100644 --- a/3rdparty/spirv-cross/main.cpp +++ b/3rdparty/spirv-cross/main.cpp @@ -675,11 +675,13 @@ struct CLIArguments bool msl_force_sample_rate_shading = false; bool msl_manual_helper_invocation_updates = true; bool msl_check_discarded_frag_stores = false; + bool msl_force_fragment_with_side_effects_execution = false; bool msl_sample_dref_lod_array_as_grad = false; bool msl_runtime_array_rich_descriptor = false; bool msl_replace_recursive_inputs = false; bool msl_readwrite_texture_fences = true; bool msl_agx_manual_cube_grad_fixup = false; + bool msl_input_attachment_is_ds_attachment = false; const char *msl_combined_sampler_suffix = nullptr; bool glsl_emit_push_constant_as_ubo = false; bool glsl_emit_ubo_as_plain_uniforms = false; @@ -872,6 +874,10 @@ static void print_help_msl() "\t[--msl-runtime-array-rich-descriptor]:\n\t\tWhen declaring a runtime array of SSBOs, declare an array of {ptr, len} pairs to support OpArrayLength.\n" "\t[--msl-replace-recursive-inputs]:\n\t\tWorks around a Metal 3.1 regression bug, which causes an infinite recursion crash during Metal's analysis of an entry point input structure that itself contains internal recursion.\n" "\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n" + "\t[--msl-input-attachment-is-ds-attachment]:\n\t\tAdds a simple depth passthrough in fragment shaders when they do not modify the depth value.\n" + "\t\tRequired to force Metal to write to the depth/stencil attachment post fragment execution.\n" + "\t\tOtherwise, Metal may optimize the write to pre fragment execution which goes against the Vulkan spec.\n" + "\t\tOnly required if an input attachment and depth/stencil attachment reference the same resource.\n" "\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n" "\t\tEmits [[color(N)]] inputs in fragment stage.\n" "\t\tRequires an Apple GPU.\n" @@ -956,6 +962,15 @@ static void print_help_msl() "\t\tSome Metal devices have a bug where stores to resources from a fragment shader\n" "\t\tcontinue to execute, even when the fragment is discarded. These checks\n" "\t\tprevent these stores from executing.\n" + "\t[--msl-force-frag-execution]:\n\t\tEnforces fragment execution to avoid early discard by Metal\n" + "\t\tMetal will prematurely discard fragments before execution when side effects are present.\n" + "\t\tThis condition is triggered under the following conditions (side effect operations happen before discard):\n" + "\t\t\t1. Pre fragment depth test fails.\n" + "\t\t\t2. Modify depth value in fragment shader to constant value known at compile time.\n" + "\t\t\t3. Constant value will not pass post fragment depth test.\n" + "\t\t\t4. Fragment is always discarded in fragment execution.\n" + "\t\tHowever, Vulkan expects fragment shader to be executed since it cannot be discarded until the discard\n" + "\t\tpresent in the fragment execution, which would also execute the operations with side effects.\n" "\t[--msl-sample-dref-lod-array-as-grad]:\n\t\tUse a gradient instead of a level argument.\n" "\t\tSome Metal devices have a bug where the level() argument to\n" "\t\tdepth2d_array::sample_compare() in a fragment shader is biased by some\n" @@ -1242,10 +1257,12 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_opts.force_sample_rate_shading = args.msl_force_sample_rate_shading; msl_opts.manual_helper_invocation_updates = args.msl_manual_helper_invocation_updates; msl_opts.check_discarded_frag_stores = args.msl_check_discarded_frag_stores; + msl_opts.force_fragment_with_side_effects_execution = args.msl_force_fragment_with_side_effects_execution; msl_opts.sample_dref_lod_array_as_grad = args.msl_sample_dref_lod_array_as_grad; msl_opts.ios_support_base_vertex_instance = true; msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor; msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs; + msl_opts.input_attachment_is_ds_attachment = args.msl_input_attachment_is_ds_attachment; msl_opts.readwrite_texture_fences = args.msl_readwrite_texture_fences; msl_opts.agx_manual_cube_grad_fixup = args.msl_agx_manual_cube_grad_fixup; msl_comp->set_msl_options(msl_opts); @@ -1800,6 +1817,7 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-no-manual-helper-invocation-updates", [&args](CLIParser &) { args.msl_manual_helper_invocation_updates = false; }); cbs.add("--msl-check-discarded-frag-stores", [&args](CLIParser &) { args.msl_check_discarded_frag_stores = true; }); + cbs.add("--msl-force-frag-with-side-effects-execution", [&args](CLIParser &) { args.msl_force_fragment_with_side_effects_execution = true; }); cbs.add("--msl-sample-dref-lod-array-as-grad", [&args](CLIParser &) { args.msl_sample_dref_lod_array_as_grad = true; }); cbs.add("--msl-no-readwrite-texture-fences", [&args](CLIParser &) { args.msl_readwrite_texture_fences = false; }); @@ -1811,6 +1829,7 @@ static int main_inner(int argc, char *argv[]) [&args](CLIParser &) { args.msl_runtime_array_rich_descriptor = true; }); cbs.add("--msl-replace-recursive-inputs", [&args](CLIParser &) { args.msl_replace_recursive_inputs = true; }); + cbs.add("--msl-input-attachment-is-ds-attachment", [&args](CLIParser &) { args.msl_input_attachment_is_ds_attachment = true; }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); diff --git a/3rdparty/spirv-cross/spirv_cpp.cpp b/3rdparty/spirv-cross/spirv_cpp.cpp index dd0a84c83..61c30e9e5 100644 --- a/3rdparty/spirv-cross/spirv_cpp.cpp +++ b/3rdparty/spirv-cross/spirv_cpp.cpp @@ -40,7 +40,7 @@ void CompilerCPP::emit_buffer_block(const SPIRVariable &var) emit_block_struct(type); auto buffer_name = to_name(type.self); - statement("internal::Resource<", buffer_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement("internal::Resource<", buffer_name, type_to_array_glsl(type, var.self), "> ", instance_name, "__;"); statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); resource_registrations.push_back( join("s.register_resource(", instance_name, "__", ", ", descriptor_set, ", ", binding, ");")); @@ -68,7 +68,7 @@ void CompilerCPP::emit_interface_block(const SPIRVariable &var) else buffer_name = type_to_glsl(type); - statement("internal::", qual, "<", buffer_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement("internal::", qual, "<", buffer_name, type_to_array_glsl(type, var.self), "> ", instance_name, "__;"); statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); resource_registrations.push_back(join("s.register_", lowerqual, "(", instance_name, "__", ", ", location, ");")); statement(""); @@ -100,14 +100,14 @@ void CompilerCPP::emit_uniform(const SPIRVariable &var) if (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::AtomicCounter) { - statement("internal::Resource<", type_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement("internal::Resource<", type_name, type_to_array_glsl(type, var.self), "> ", instance_name, "__;"); statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); resource_registrations.push_back( join("s.register_resource(", instance_name, "__", ", ", descriptor_set, ", ", binding, ");")); } else { - statement("internal::UniformConstant<", type_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement("internal::UniformConstant<", type_name, type_to_array_glsl(type, var.self), "> ", instance_name, "__;"); statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); resource_registrations.push_back( join("s.register_uniform_constant(", instance_name, "__", ", ", location, ");")); @@ -130,7 +130,7 @@ void CompilerCPP::emit_push_constant_block(const SPIRVariable &var) auto buffer_name = to_name(type.self); auto instance_name = to_name(var.self); - statement("internal::PushConstant<", buffer_name, type_to_array_glsl(type), "> ", instance_name, ";"); + statement("internal::PushConstant<", buffer_name, type_to_array_glsl(type, var.self), "> ", instance_name, ";"); statement_no_indent("#define ", instance_name, " __res->", instance_name, ".get()"); resource_registrations.push_back(join("s.register_push_constant(", instance_name, "__", ");")); statement(""); diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index 244c40a7a..8c3e7d381 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -93,6 +93,97 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v) return !is_restrict && (ssbo || image || counter || buffer_reference); } +bool Compiler::block_is_control_dependent(const SPIRBlock &block) +{ + for (auto &i : block.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + switch (op) + { + case OpFunctionCall: + { + uint32_t func = ops[2]; + if (function_is_control_dependent(get(func))) + return true; + break; + } + + // Derivatives + case OpDPdx: + case OpDPdxCoarse: + case OpDPdxFine: + case OpDPdy: + case OpDPdyCoarse: + case OpDPdyFine: + case OpFwidth: + case OpFwidthCoarse: + case OpFwidthFine: + + // Anything implicit LOD + case OpImageSampleImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageQueryLod: + case OpImageDrefGather: + case OpImageGather: + case OpImageSparseDrefGather: + case OpImageSparseGather: + + // Anything subgroups + case OpGroupNonUniformElect: + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + case OpGroupNonUniformBallot: + case OpGroupNonUniformInverseBallot: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformBallotBitCount: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformFAdd: + case OpGroupNonUniformIMul: + case OpGroupNonUniformFMul: + case OpGroupNonUniformSMin: + case OpGroupNonUniformUMin: + case OpGroupNonUniformFMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMax: + case OpGroupNonUniformFMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + case OpGroupNonUniformLogicalAnd: + case OpGroupNonUniformLogicalOr: + case OpGroupNonUniformLogicalXor: + case OpGroupNonUniformQuadBroadcast: + case OpGroupNonUniformQuadSwap: + + // Control barriers + case OpControlBarrier: + return true; + + default: + break; + } + } + + return false; +} + bool Compiler::block_is_pure(const SPIRBlock &block) { // This is a global side effect of the function. @@ -247,18 +338,21 @@ string Compiler::to_name(uint32_t id, bool allow_alias) const bool Compiler::function_is_pure(const SPIRFunction &func) { for (auto block : func.blocks) - { if (!block_is_pure(get(block))) - { - //fprintf(stderr, "Function %s is impure!\n", to_name(func.self).c_str()); return false; - } - } - //fprintf(stderr, "Function %s is pure!\n", to_name(func.self).c_str()); return true; } +bool Compiler::function_is_control_dependent(const SPIRFunction &func) +{ + for (auto block : func.blocks) + if (block_is_control_dependent(get(block))) + return true; + + return false; +} + void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id) { for (auto &i : block.ops) diff --git a/3rdparty/spirv-cross/spirv_cross.hpp b/3rdparty/spirv-cross/spirv_cross.hpp index 2ba602502..e9062b485 100644 --- a/3rdparty/spirv-cross/spirv_cross.hpp +++ b/3rdparty/spirv-cross/spirv_cross.hpp @@ -744,6 +744,8 @@ protected: bool function_is_pure(const SPIRFunction &func); bool block_is_pure(const SPIRBlock &block); + bool function_is_control_dependent(const SPIRFunction &func); + bool block_is_control_dependent(const SPIRBlock &block); bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const; bool execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const; diff --git a/3rdparty/spirv-cross/spirv_cross_c.cpp b/3rdparty/spirv-cross/spirv_cross_c.cpp index 679333dac..e0cc68ca9 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.cpp +++ b/3rdparty/spirv-cross/spirv_cross_c.cpp @@ -754,6 +754,10 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_MSL_AGX_MANUAL_CUBE_GRAD_FIXUP: options->msl.agx_manual_cube_grad_fixup = value != 0; break; + + case SPVC_COMPILER_OPTION_MSL_FORCE_FRAGMENT_WITH_SIDE_EFFECTS_EXECUTION: + options->msl.force_fragment_with_side_effects_execution = value != 0; + break; #endif default: diff --git a/3rdparty/spirv-cross/spirv_cross_c.h b/3rdparty/spirv-cross/spirv_cross_c.h index 47369964a..acae93558 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.h +++ b/3rdparty/spirv-cross/spirv_cross_c.h @@ -40,7 +40,7 @@ extern "C" { /* Bumped if ABI or API breaks backwards compatibility. */ #define SPVC_C_API_VERSION_MAJOR 0 /* Bumped if APIs or enumerations are added in a backwards compatible way. */ -#define SPVC_C_API_VERSION_MINOR 59 +#define SPVC_C_API_VERSION_MINOR 60 /* Bumped if internal implementation details change. */ #define SPVC_C_API_VERSION_PATCH 0 @@ -728,6 +728,7 @@ typedef enum spvc_compiler_option SPVC_COMPILER_OPTION_MSL_READWRITE_TEXTURE_FENCES = 86 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_REPLACE_RECURSIVE_INPUTS = 87 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_AGX_MANUAL_CUBE_GRAD_FIXUP = 88 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_FRAGMENT_WITH_SIDE_EFFECTS_EXECUTION = 89 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; diff --git a/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp b/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp index 7f35c3815..3892248aa 100644 --- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp +++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp @@ -169,7 +169,7 @@ public: ~LoopLock(); private: - uint32_t *lock; + uint32_t *lock = nullptr; }; // This must be held while iterating over a type ID array. diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index f371bca6f..3f13febcc 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -639,7 +639,8 @@ void CompilerGLSL::find_static_extensions() void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed) { - uint32_t &polyfills = (relaxed && options.es) ? required_polyfills_relaxed : required_polyfills; + uint32_t &polyfills = (relaxed && (options.es || options.vulkan_semantics)) ? + required_polyfills_relaxed : required_polyfills; if ((polyfills & polyfill) == 0) { @@ -715,7 +716,7 @@ string CompilerGLSL::compile() if (required_polyfills != 0) emit_polyfills(required_polyfills, false); - if (options.es && required_polyfills_relaxed != 0) + if ((options.es || options.vulkan_semantics) && required_polyfills_relaxed != 0) emit_polyfills(required_polyfills_relaxed, true); emit_function(get(ir.default_entry_point), Bitset()); @@ -2434,7 +2435,7 @@ void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_de else { auto &pointee_type = get_pointee_type(type); - statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";"); + statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type, 0), ";"); } end_scope_decl(); @@ -2513,7 +2514,7 @@ void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var) // It will need to be reset if we have to recompile. preserve_alias_on_reset(var.self); add_resource_name(var.self); - end_scope_decl(to_name(var.self) + type_to_array_glsl(type)); + end_scope_decl(to_name(var.self) + type_to_array_glsl(type, var.self)); statement(""); } @@ -2780,7 +2781,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) } add_resource_name(var.self); - end_scope_decl(join(to_name(var.self), type_to_array_glsl(type))); + end_scope_decl(join(to_name(var.self), type_to_array_glsl(type, var.self))); statement(""); } } @@ -3977,7 +3978,7 @@ void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var) auto &c = get(var.initializer); for (uint32_t j = 0; j < array_size; j++) exprs.push_back(to_expression(get(c.subconstants[j]).subconstants[i])); - statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ", + statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type, 0), " = ", type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");"); } @@ -4035,7 +4036,7 @@ void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var) else if (is_control_point) { auto lut_name = join("_", var.self, "_init"); - statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type), + statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type, 0), " = ", to_expression(var.initializer), ";"); entry_func.fixup_hooks_in.push_back([&, lut_name]() { statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];"); @@ -4060,7 +4061,7 @@ void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var) { auto lut_name = join("_", var.self, "_init"); statement("const ", type_to_glsl(type), " ", lut_name, - type_to_array_glsl(type), " = ", to_expression(var.initializer), ";"); + type_to_array_glsl(type, var.self), " = ", to_expression(var.initializer), ";"); entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() { if (is_patch) { @@ -4842,6 +4843,109 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) end_scope(); statement(""); } + + if (!relaxed) + { + static const Polyfill polys[3][3] = { + { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 }, + { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 }, + { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 }, + }; + + static const GLSLstd450 glsl_ops[] = { GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp }; + static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" }; + bool has_poly = false; + + for (uint32_t i = 0; i < 3; i++) + { + for (uint32_t j = 0; j < 3; j++) + { + if ((polyfills & polys[i][j]) == 0) + continue; + + const char *types[3][4] = { + { "float16_t", "f16vec2", "f16vec3", "f16vec4" }, + { "float", "vec2", "vec3", "vec4" }, + { "double", "dvec2", "dvec3", "dvec4" }, + }; + + for (uint32_t k = 0; k < 4; k++) + { + auto *type = types[j][k]; + + if (i < 2) + { + statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ", + type, " ", spv_ops[i], "(", type, ", ", type, ");"); + } + else + { + statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ", + type, " ", spv_ops[i], "(", type, ", ", type, ", ", type, ");"); + } + + has_poly = true; + } + } + } + + if (has_poly) + statement(""); + } + else + { + // Mediump intrinsics don't work correctly, so wrap the intrinsic in an outer shell that ensures mediump + // propagation. + + static const Polyfill polys[3][3] = { + { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 }, + { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 }, + { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 }, + }; + + static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" }; + + for (uint32_t i = 0; i < 3; i++) + { + for (uint32_t j = 0; j < 3; j++) + { + if ((polyfills & polys[i][j]) == 0) + continue; + + const char *types[3][4] = { + { "float16_t", "f16vec2", "f16vec3", "f16vec4" }, + { "float", "vec2", "vec3", "vec4" }, + { "double", "dvec2", "dvec3", "dvec4" }, + }; + + for (uint32_t k = 0; k < 4; k++) + { + auto *type = types[j][k]; + + if (i < 2) + { + statement("mediump ", type, " ", spv_ops[i], "Relaxed(", + "mediump ", type, " a, mediump ", type, " b)"); + begin_scope(); + statement("mediump ", type, " res = ", spv_ops[i], "(a, b);"); + statement("return res;"); + end_scope(); + statement(""); + } + else + { + statement("mediump ", type, " ", spv_ops[i], "Relaxed(", + "mediump ", type, " a, mediump ", type, " b, mediump ", type, " c)"); + begin_scope(); + statement("mediump ", type, " res = ", spv_ops[i], "(a, b, c);"); + statement("return res;"); + end_scope(); + statement(""); + } + } + } + } + } } // Returns a string representation of the ID, usable as a function arg. @@ -7847,6 +7951,7 @@ std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool args.grad_x = grad_x; args.grad_y = grad_y; args.lod = lod; + args.has_array_offsets = coffsets != 0; if (coffsets) args.offset = coffsets; @@ -8864,23 +8969,97 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, case GLSLstd450NMin: case GLSLstd450NMax: { - emit_nminmax_op(result_type, id, args[0], args[1], op); + if (options.vulkan_semantics) + { + require_extension_internal("GL_EXT_spirv_intrinsics"); + bool relaxed = has_decoration(id, DecorationRelaxedPrecision); + Polyfill poly = {}; + switch (get(result_type).width) + { + case 16: + poly = op == GLSLstd450NMin ? PolyfillNMin16 : PolyfillNMax16; + break; + + case 32: + poly = op == GLSLstd450NMin ? PolyfillNMin32 : PolyfillNMax32; + break; + + case 64: + poly = op == GLSLstd450NMin ? PolyfillNMin64 : PolyfillNMax64; + break; + + default: + SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax."); + } + + require_polyfill(poly, relaxed); + + // Function return decorations are broken, so need to do double polyfill. + if (relaxed) + require_polyfill(poly, false); + + const char *op_str; + if (relaxed) + op_str = op == GLSLstd450NMin ? "spvNMinRelaxed" : "spvNMaxRelaxed"; + else + op_str = op == GLSLstd450NMin ? "spvNMin" : "spvNMax"; + + emit_binary_func_op(result_type, id, args[0], args[1], op_str); + } + else + { + emit_nminmax_op(result_type, id, args[0], args[1], op); + } break; } case GLSLstd450NClamp: { - // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. - // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. - uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX]; - if (!max_id) - max_id = ir.increase_bound_by(1); + if (options.vulkan_semantics) + { + require_extension_internal("GL_EXT_spirv_intrinsics"); + bool relaxed = has_decoration(id, DecorationRelaxedPrecision); + Polyfill poly = {}; + switch (get(result_type).width) + { + case 16: + poly = PolyfillNClamp16; + break; - // Inherit precision qualifiers. - ir.meta[max_id] = ir.meta[id]; + case 32: + poly = PolyfillNClamp32; + break; - emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax); - emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin); + case 64: + poly = PolyfillNClamp64; + break; + + default: + SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax."); + } + + require_polyfill(poly, relaxed); + + // Function return decorations are broken, so need to do double polyfill. + if (relaxed) + require_polyfill(poly, false); + + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], relaxed ? "spvNClampRelaxed" : "spvNClamp"); + } + else + { + // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. + // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. + uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX]; + if (!max_id) + max_id = ir.increase_bound_by(1); + + // Inherit precision qualifiers. + ir.meta[max_id] = ir.meta[id]; + + emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax); + emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin); + } break; } @@ -11610,6 +11789,10 @@ uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &inst case OpUGreaterThanEqual: return expression_type(ops[2]).width; + case OpSMulExtended: + case OpUMulExtended: + return get(get(ops[0]).member_types[0]).width; + default: { // We can look at result type which is more robust. @@ -12167,6 +12350,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto &callee = get(func); auto &return_type = get(callee.return_type); bool pure = function_is_pure(callee); + bool control_dependent = function_is_control_dependent(callee); bool callee_has_out_variables = false; bool emit_return_value_as_argument = false; @@ -12198,7 +12382,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (emit_return_value_as_argument) { - statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";"); + statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type, 0), ";"); arglist.push_back(to_name(id)); } @@ -12260,6 +12444,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) else statement(funexpr, ";"); + if (control_dependent) + register_control_dependent_expression(id); + break; } @@ -15166,7 +15353,7 @@ string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uin { string type_name = type_to_glsl(type, id); remap_variable_type_name(type, name, type_name); - return join(type_name, " ", name, type_to_array_glsl(type)); + return join(type_name, " ", name, type_to_array_glsl(type, id)); } bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const @@ -15523,7 +15710,7 @@ string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) return ""; } -string CompilerGLSL::type_to_array_glsl(const SPIRType &type) +string CompilerGLSL::type_to_array_glsl(const SPIRType &type, uint32_t) { if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) { @@ -16087,7 +16274,7 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret auto &type = get(func.return_type); decl += flags_to_qualifiers_glsl(type, return_flags); decl += type_to_glsl(type); - decl += type_to_array_glsl(type); + decl += type_to_array_glsl(type, 0); decl += " "; if (func.self == ir.default_entry_point) diff --git a/3rdparty/spirv-cross/spirv_glsl.hpp b/3rdparty/spirv-cross/spirv_glsl.hpp index 3ce044e39..f3e545e9f 100644 --- a/3rdparty/spirv-cross/spirv_glsl.hpp +++ b/3rdparty/spirv-cross/spirv_glsl.hpp @@ -477,7 +477,7 @@ protected: uint32_t coord = 0, coord_components = 0, dref = 0; uint32_t grad_x = 0, grad_y = 0, lod = 0, offset = 0; uint32_t bias = 0, component = 0, sample = 0, sparse_texel = 0, min_lod = 0; - bool nonuniform_expression = false; + bool nonuniform_expression = false, has_array_offsets = false; }; virtual std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward); @@ -564,8 +564,8 @@ protected: Options options; - virtual std::string type_to_array_glsl( - const SPIRType &type); // Allow Metal to use the array template to make arrays a value type + // Allow Metal to use the array template to make arrays a value type + virtual std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id); std::string to_array_size(const SPIRType &type, uint32_t index); uint32_t to_array_size_literal(const SPIRType &type, uint32_t index) const; uint32_t to_array_size_literal(const SPIRType &type) const; @@ -933,6 +933,15 @@ protected: PolyfillMatrixInverse2x2 = 1 << 6, PolyfillMatrixInverse3x3 = 1 << 7, PolyfillMatrixInverse4x4 = 1 << 8, + PolyfillNMin16 = 1 << 9, + PolyfillNMin32 = 1 << 10, + PolyfillNMin64 = 1 << 11, + PolyfillNMax16 = 1 << 12, + PolyfillNMax32 = 1 << 13, + PolyfillNMax64 = 1 << 14, + PolyfillNClamp16 = 1 << 15, + PolyfillNClamp32 = 1 << 16, + PolyfillNClamp64 = 1 << 17, }; uint32_t required_polyfills = 0; diff --git a/3rdparty/spirv-cross/spirv_hlsl.cpp b/3rdparty/spirv-cross/spirv_hlsl.cpp index 9b8349365..ac1d262af 100644 --- a/3rdparty/spirv-cross/spirv_hlsl.cpp +++ b/3rdparty/spirv-cross/spirv_hlsl.cpp @@ -1002,7 +1002,7 @@ void CompilerHLSL::emit_interface_block_member_in_struct(const SPIRVariable &var statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, member_index)), type_to_glsl(mbr_type), - " ", mbr_name, type_to_array_glsl(mbr_type), + " ", mbr_name, type_to_array_glsl(mbr_type, var.self), " : ", semantic, ";"); // Structs and arrays should consume more locations. @@ -2277,7 +2277,7 @@ void CompilerHLSL::emit_resources() // Need out variable since HLSL does not support returning arrays. auto &type = get(type_id); auto type_str = type_to_glsl(type); - auto type_arr_str = type_to_array_glsl(type); + auto type_arr_str = type_to_array_glsl(type, 0); statement("void spvSelectComposite(out ", type_str, " out_value", type_arr_str, ", bool cond, ", type_str, " true_val", type_arr_str, ", ", type_str, " false_val", type_arr_str, ")"); @@ -2679,7 +2679,7 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) type_name = is_readonly ? "ByteAddressBuffer" : is_interlocked ? "RasterizerOrderedByteAddressBuffer" : "RWByteAddressBuffer"; add_resource_name(var.self); - statement(is_coherent ? "globallycoherent " : "", type_name, " ", to_name(var.self), type_to_array_glsl(type), + statement(is_coherent ? "globallycoherent " : "", type_name, " ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";"); } else @@ -2766,7 +2766,7 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) } emit_struct(get(type.self)); - statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type), + statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";"); } } @@ -2952,7 +2952,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret out_argument += type_to_glsl(type); out_argument += " "; out_argument += "spvReturnValue"; - out_argument += type_to_array_glsl(type); + out_argument += type_to_array_glsl(type, 0); arglist.push_back(std::move(out_argument)); } @@ -2978,7 +2978,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret { // Manufacture automatic sampler arg for SampledImage texture arglist.push_back(join(is_depth_image(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ", - to_sampler_expression(arg.id), type_to_array_glsl(arg_type))); + to_sampler_expression(arg.id), type_to_array_glsl(arg_type, arg.id))); } // Hold a pointer to the parameter so we can invalidate the readonly field if needed. @@ -4076,16 +4076,16 @@ void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var) is_coherent = has_decoration(var.self, DecorationCoherent); statement(is_coherent ? "globallycoherent " : "", image_type_hlsl_modern(type, var.self), " ", - to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); + to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";"); if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) { // For combined image samplers, also emit a combined image sampler. if (is_depth_image(type, var.self)) - statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type), + statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type, var.self), to_resource_binding_sampler(var), ";"); else - statement("SamplerState ", to_sampler_expression(var.self), type_to_array_glsl(type), + statement("SamplerState ", to_sampler_expression(var.self), type_to_array_glsl(type, var.self), to_resource_binding_sampler(var), ";"); } break; @@ -4093,10 +4093,10 @@ void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var) case SPIRType::Sampler: if (comparison_ids.count(var.self)) - statement("SamplerComparisonState ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), + statement("SamplerComparisonState ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";"); else - statement("SamplerState ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); + statement("SamplerState ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";"); break; default: @@ -4448,6 +4448,18 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); break; + case GLSLstd450NMin: + CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FMin, args, count); + break; + + case GLSLstd450NMax: + CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FMax, args, count); + break; + + case GLSLstd450NClamp: + CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FClamp, args, count); + break; + default: CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); break; diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index 013ef2301..1e24af40c 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -201,13 +201,17 @@ bool CompilerMSL::is_var_runtime_size_array(const SPIRVariable &var) const uint32_t CompilerMSL::get_resource_array_size(const SPIRType &type, uint32_t id) const { uint32_t array_size = to_array_size_literal(type); - if (array_size) + + // If we have argument buffers, we need to honor the ABI by using the correct array size + // from the layout. Only use shader declared size if we're not using argument buffers. + uint32_t desc_set = get_decoration(id, DecorationDescriptorSet); + if (!descriptor_set_is_argument_buffer(desc_set) && array_size) return array_size; - StageSetBinding tuple = { get_entry_point().model, get_decoration(id, DecorationDescriptorSet), + StageSetBinding tuple = { get_entry_point().model, desc_set, get_decoration(id, DecorationBinding) }; auto itr = resource_bindings.find(tuple); - return itr != end(resource_bindings) ? itr->second.first.count : 0; + return itr != end(resource_bindings) ? itr->second.first.count : array_size; } uint32_t CompilerMSL::get_automatic_msl_resource_binding(uint32_t id) const @@ -267,11 +271,14 @@ void CompilerMSL::build_implicit_builtins() active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance)); bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId); bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups); + bool force_frag_depth_passthrough = + get_execution_model() == ExecutionModelFragment && !uses_explicit_early_fragment_test() && need_subpass_input && + msl_options.enable_frag_depth_builtin && msl_options.input_attachment_is_ds_attachment; if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params || need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params || needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation || - has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size) + has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || force_frag_depth_passthrough) { bool has_frag_coord = false; bool has_sample_id = false; @@ -288,6 +295,7 @@ void CompilerMSL::build_implicit_builtins() bool has_helper_invocation = false; bool has_local_invocation_index = false; bool has_workgroup_size = false; + bool has_frag_depth = false; uint32_t workgroup_id_type = 0; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { @@ -308,6 +316,13 @@ void CompilerMSL::build_implicit_builtins() mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var.self); does_shader_write_sample_mask = true; } + + if (force_frag_depth_passthrough && builtin == BuiltInFragDepth) + { + builtin_frag_depth_id = var.self; + mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var.self); + has_frag_depth = true; + } } if (var.storage != StorageClassInput) @@ -898,6 +913,36 @@ void CompilerMSL::build_implicit_builtins() builtin_workgroup_size_id = var_id; mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id); } + + if (!has_frag_depth && force_frag_depth_passthrough) + { + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + // Create gl_FragDepth + SPIRType float_type { OpTypeFloat }; + float_type.basetype = SPIRType::Float; + float_type.width = 32; + float_type.vecsize = 1; + set(type_id, float_type); + + SPIRType float_type_ptr_in = float_type; + float_type_ptr_in.op = spv::OpTypePointer; + float_type_ptr_in.pointer = true; + float_type_ptr_in.pointer_depth++; + float_type_ptr_in.parent_type = type_id; + float_type_ptr_in.storage = StorageClassOutput; + + auto &ptr_in_type = set(type_ptr_id, float_type_ptr_in); + ptr_in_type.self = type_id; + set(var_id, type_ptr_id, StorageClassOutput); + set_decoration(var_id, DecorationBuiltIn, BuiltInFragDepth); + builtin_frag_depth_id = var_id; + mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var_id); + active_output_builtins.set(BuiltInFragDepth); + } } if (needs_swizzle_buffer_def) @@ -1314,48 +1359,29 @@ void CompilerMSL::emit_entry_point_declarations() uint32_t arg_id = argument_buffer_ids[desc_set]; uint32_t base_index = dynamic_buffer.second.first; - if (!type.array.empty()) + if (is_array(type)) { - // This is complicated, because we need to support arrays of arrays. - // And it's even worse if the outermost dimension is a runtime array, because now - // all this complicated goop has to go into the shader itself. (FIXME) if (!type.array[type.array.size() - 1]) SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet."); - else + + is_using_builtin_array = true; + statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, true), name, + type_to_array_glsl(type, var_id), " ="); + + uint32_t array_size = to_array_size_literal(type); + begin_scope(); + + for (uint32_t i = 0; i < array_size; i++) { - is_using_builtin_array = true; - statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, true), name, - type_to_array_glsl(type), " ="); - - uint32_t dim = uint32_t(type.array.size()); - uint32_t j = 0; - for (SmallVector indices(type.array.size()); - indices[type.array.size() - 1] < to_array_size_literal(type); j++) - { - while (dim > 0) - { - begin_scope(); - --dim; - } - - string arrays; - for (uint32_t i = uint32_t(type.array.size()); i; --i) - arrays += join("[", indices[i - 1], "]"); - statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ", - to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ", - to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"), - arrays, " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + j, "]),"); - - while (++indices[dim] >= to_array_size_literal(type, dim) && dim < type.array.size() - 1) - { - end_scope(","); - indices[dim++] = 0; - } - } - end_scope_decl(); - statement_no_indent(""); - is_using_builtin_array = false; + statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ", + to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ", + to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"), + "[", i, "]", " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + i, "]),"); } + + end_scope_decl(); + statement_no_indent(""); + is_using_builtin_array = false; } else { @@ -1469,7 +1495,7 @@ void CompilerMSL::emit_entry_point_declarations() is_using_builtin_array = true; statement(desc_addr_space, " auto& ", to_restrict(var_id, true), to_name(var_id), " = (", addr_space, " ", type_to_glsl(type), "* ", desc_addr_space, " (&)", - type_to_array_glsl(type), ")", ir.meta[alias_id].decoration.qualified_alias, ";"); + type_to_array_glsl(type, var_id), ")", ir.meta[alias_id].decoration.qualified_alias, ";"); is_using_builtin_array = false; } } @@ -1553,8 +1579,10 @@ string CompilerMSL::compile() if (needs_manual_helper_invocation_updates() && (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation)) { - string discard_expr = - join(builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), " = true, discard_fragment()"); + string builtin_helper_invocation = builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput); + string discard_expr = join(builtin_helper_invocation, " = true, discard_fragment()"); + if (msl_options.force_fragment_with_side_effects_execution) + discard_expr = join("!", builtin_helper_invocation, " ? (", discard_expr, ") : (void)0"); backend.discard_literal = discard_expr; backend.demote_literal = discard_expr; } @@ -1584,6 +1612,8 @@ string CompilerMSL::compile() add_active_interface_variable(builtin_dispatch_base_id); if (builtin_sample_mask_id) add_active_interface_variable(builtin_sample_mask_id); + if (builtin_frag_depth_id) + add_active_interface_variable(builtin_frag_depth_id); // Create structs to hold input, output and uniform variables. // Do output first to ensure out. is declared at top of entry function. @@ -1703,14 +1733,15 @@ void CompilerMSL::preprocess_op_codes() // Fragment shaders that both write to storage resources and discard fragments // need checks on the writes, to work around Metal allowing these writes despite - // the fragment being dead. - if (msl_options.check_discarded_frag_stores && preproc.uses_discard && - (preproc.uses_buffer_write || preproc.uses_image_write)) + // the fragment being dead. We also require to force Metal to execute fragment + // shaders instead of being prematurely discarded. + if (preproc.uses_discard && (preproc.uses_buffer_write || preproc.uses_image_write)) { - frag_shader_needs_discard_checks = true; - needs_helper_invocation = true; + bool should_enable = (msl_options.check_discarded_frag_stores || msl_options.force_fragment_with_side_effects_execution); + frag_shader_needs_discard_checks |= msl_options.check_discarded_frag_stores; + needs_helper_invocation |= should_enable; // Fragment discard store checks imply manual HelperInvocation updates. - msl_options.manual_helper_invocation_updates = true; + msl_options.manual_helper_invocation_updates |= should_enable; } if (is_intersection_query()) @@ -1881,8 +1912,13 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: { uint32_t base_id = ops[0]; if (global_var_ids.find(base_id) != global_var_ids.end()) + { added_arg_ids.insert(base_id); + if (msl_options.input_attachment_is_ds_attachment && base_id == builtin_frag_depth_id) + writes_to_depth = true; + } + uint32_t rvalue_id = ops[1]; if (global_var_ids.find(rvalue_id) != global_var_ids.end()) added_arg_ids.insert(rvalue_id); @@ -2921,20 +2957,35 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass uint32_t mbr_idx, InterfaceBlockMeta &meta, const string &mbr_name_qual, const string &var_chain_qual, - uint32_t &location, uint32_t &var_mbr_idx) + uint32_t &location, uint32_t &var_mbr_idx, + const Bitset &interpolation_qual) { auto &entry_func = get(ir.default_entry_point); BuiltIn builtin = BuiltInMax; bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); - bool is_flat = - has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat); - bool is_noperspective = has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) || + bool is_flat = interpolation_qual.get(DecorationFlat) || + has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || + has_decoration(var.self, DecorationFlat); + bool is_noperspective = interpolation_qual.get(DecorationNoPerspective) || + has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) || has_decoration(var.self, DecorationNoPerspective); - bool is_centroid = has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) || + bool is_centroid = interpolation_qual.get(DecorationCentroid) || + has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) || has_decoration(var.self, DecorationCentroid); - bool is_sample = - has_member_decoration(var_type.self, mbr_idx, DecorationSample) || has_decoration(var.self, DecorationSample); + bool is_sample = interpolation_qual.get(DecorationSample) || + has_member_decoration(var_type.self, mbr_idx, DecorationSample) || + has_decoration(var.self, DecorationSample); + + Bitset inherited_qual; + if (is_flat) + inherited_qual.set(DecorationFlat); + if (is_noperspective) + inherited_qual.set(DecorationNoPerspective); + if (is_centroid) + inherited_qual.set(DecorationCentroid); + if (is_sample) + inherited_qual.set(DecorationSample); uint32_t mbr_type_id = var_type.member_types[mbr_idx]; auto &mbr_type = get(mbr_type_id); @@ -2998,7 +3049,7 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_type, sub_mbr_idx, meta, mbr_name, var_chain, - location, var_mbr_idx); + location, var_mbr_idx, inherited_qual); // FIXME: Recursive structs and tessellation breaks here. var_mbr_idx++; } @@ -3485,7 +3536,7 @@ void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, boo get_entry_point().output_vertices; statement("threadgroup ", type_to_glsl(type), " ", "spvStorage", to_name(masked_var.self), "[", max_num_instances, "]", - type_to_array_glsl(type), ";"); + type_to_array_glsl(type, 0), ";"); // Assign a threadgroup slice to each PrimitiveID. // We assume here that workgroup size is rounded to 32, @@ -3684,7 +3735,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, var_type, mbr_idx, meta, mbr_name_qual, var_chain_qual, - location, var_mbr_idx); + location, var_mbr_idx, {}); } else { @@ -5564,9 +5615,8 @@ void CompilerMSL::emit_custom_templates() // otherwise they will cause problems when linked together in a single Metallib. void CompilerMSL::emit_custom_functions() { - for (uint32_t i = kArrayCopyMultidimMax; i >= 2; i--) - if (spv_function_implementations.count(static_cast(SPVFuncImplArrayCopyMultidimBase + i))) - spv_function_implementations.insert(static_cast(SPVFuncImplArrayCopyMultidimBase + i - 1)); + if (spv_function_implementations.count(SPVFuncImplArrayCopyMultidim)) + spv_function_implementations.insert(SPVFuncImplArrayCopy); if (spv_function_implementations.count(SPVFuncImplDynamicImageSampler)) { @@ -5677,11 +5727,7 @@ void CompilerMSL::emit_custom_functions() break; case SPVFuncImplArrayCopy: - case SPVFuncImplArrayOfArrayCopy2Dim: - case SPVFuncImplArrayOfArrayCopy3Dim: - case SPVFuncImplArrayOfArrayCopy4Dim: - case SPVFuncImplArrayOfArrayCopy5Dim: - case SPVFuncImplArrayOfArrayCopy6Dim: + case SPVFuncImplArrayCopyMultidim: { // Unfortunately we cannot template on the address space, so combinatorial explosion it is. static const char *function_name_tags[] = { @@ -5704,36 +5750,19 @@ void CompilerMSL::emit_custom_functions() for (uint32_t variant = 0; variant < 12; variant++) { - uint8_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase; - string tmp = "template" : ">"); + statement("inline void spvArrayCopy", function_name_tags[variant], "(", + dst_address_space[variant], " T (&dst)", dim, ", ", + src_address_space[variant], " T (&src)", dim, ")"); begin_scope(); - statement("for (uint i = 0; i < A; i++)"); + statement("for (uint i = 0; i < N; i++)"); begin_scope(); - - if (dimensions == 1) - statement("dst[i] = src[i];"); + if (is_multidim) + statement("spvArrayCopy", function_name_tags[variant], "(dst[i], src[i]);"); else - statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);"); + statement("dst[i] = src[i];"); end_scope(); end_scope(); statement(""); @@ -6234,6 +6263,57 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplGatherConstOffsets: + statement("// Wrapper function that processes a texture gather with a constant offset array."); + statement("template class Tex, " + "typename Toff, typename... Tp>"); + statement("inline vec spvGatherConstOffsets(const thread Tex& t, sampler s, " + "Toff coffsets, component c, Tp... params) METAL_CONST_ARG(c)"); + begin_scope(); + statement("vec rslts[4];"); + statement("for (uint i = 0; i < 4; i++)"); + begin_scope(); + statement("switch (c)"); + begin_scope(); + // Work around texture::gather() requiring its component parameter to be a constant expression + statement("case component::x:"); + statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::x);"); + statement(" break;"); + statement("case component::y:"); + statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::y);"); + statement(" break;"); + statement("case component::z:"); + statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::z);"); + statement(" break;"); + statement("case component::w:"); + statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::w);"); + statement(" break;"); + end_scope(); + end_scope(); + // Pull all values from the i0j0 component of each gather footprint + statement("return vec(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplGatherCompareConstOffsets: + statement("// Wrapper function that processes a texture gather with a constant offset array."); + statement("template class Tex, " + "typename Toff, typename... Tp>"); + statement("inline vec spvGatherCompareConstOffsets(const thread Tex& t, sampler s, " + "Toff coffsets, Tp... params)"); + begin_scope(); + statement("vec rslts[4];"); + statement("for (uint i = 0; i < 4; i++)"); + begin_scope(); + statement(" rslts[i] = t.gather_compare(s, spvForward(params)..., coffsets[i]);"); + end_scope(); + // Pull all values from the i0j0 component of each gather footprint + statement("return vec(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);"); + end_scope(); + statement(""); + break; + case SPVFuncImplSubgroupBroadcast: // Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting // them as integers. @@ -7426,14 +7506,14 @@ void CompilerMSL::emit_custom_functions() statement("template"); statement("struct spvDescriptorArray"); begin_scope(); - statement("spvDescriptorArray(const device spvDescriptor* ptr) : ptr(ptr)"); + statement("spvDescriptorArray(const device spvDescriptor* ptr) : ptr(&ptr->value)"); begin_scope(); end_scope(); statement("const device T& operator [] (size_t i) const"); begin_scope(); - statement("return ptr[i].value;"); + statement("return ptr[i];"); end_scope(); - statement("const device spvDescriptor* ptr;"); + statement("const device T* ptr;"); end_scope_decl(); statement(""); } @@ -7498,6 +7578,17 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplTextureCast: + statement("template "); + statement("T spvTextureCast(U img)"); + begin_scope(); + // MSL complains if you try to cast the texture itself, but casting the reference type is ... ok? *shrug* + // Gotta go what you gotta do I suppose. + statement("return reinterpret_cast(img);"); + end_scope(); + statement(""); + break; + default: break; } @@ -9428,32 +9519,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) uint32_t op1 = ops[3]; auto &type = get(result_type); auto input_type = opcode == OpSMulExtended ? int_type : uint_type; - auto &output_type = get_type(result_type); string cast_op0, cast_op1; - auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, false); - + binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, false); emit_uninitialized_temporary_expression(result_type, result_id); - - string mullo_expr, mulhi_expr; - mullo_expr = join(cast_op0, " * ", cast_op1); - mulhi_expr = join("mulhi(", cast_op0, ", ", cast_op1, ")"); - - auto &low_type = get_type(output_type.member_types[0]); - auto &high_type = get_type(output_type.member_types[1]); - if (low_type.basetype != input_type) - { - expected_type.basetype = input_type; - mullo_expr = join(bitcast_glsl_op(low_type, expected_type), "(", mullo_expr, ")"); - } - if (high_type.basetype != input_type) - { - expected_type.basetype = input_type; - mulhi_expr = join(bitcast_glsl_op(high_type, expected_type), "(", mulhi_expr, ")"); - } - - statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", mullo_expr, ";"); - statement(to_expression(result_id), ".", to_member_name(type, 1), " = ", mulhi_expr, ";"); + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", cast_op0, " * ", cast_op1, ";"); + statement(to_expression(result_id), ".", to_member_name(type, 1), " = mulhi(", cast_op0, ", ", cast_op1, ");"); break; } @@ -10020,15 +10091,7 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh // we cannot easily detect this case ahead of time since it's // context dependent. We might have to force a recompile here // if this is the only use of array copies in our shader. - if (type.array.size() > 1) - { - if (type.array.size() > kArrayCopyMultidimMax) - SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); - auto func = static_cast(SPVFuncImplArrayCopyMultidimBase + type.array.size()); - add_spv_func_and_recompile(func); - } - else - add_spv_func_and_recompile(SPVFuncImplArrayCopy); + add_spv_func_and_recompile(type.array.size() > 1 ? SPVFuncImplArrayCopyMultidim : SPVFuncImplArrayCopy); const char *tag = nullptr; if (lhs_is_thread_storage && is_constant) @@ -10060,13 +10123,13 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh // Pass internal array of spvUnsafeArray<> into wrapper functions if (lhs_is_array_template && rhs_is_array_template && !msl_options.force_native_arrays) - statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);"); + statement("spvArrayCopy", tag, "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);"); if (lhs_is_array_template && !msl_options.force_native_arrays) - statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");"); + statement("spvArrayCopy", tag, "(", lhs, ".elements, ", to_expression(rhs_id), ");"); else if (rhs_is_array_template && !msl_options.force_native_arrays) - statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);"); + statement("spvArrayCopy", tag, "(", lhs, ", ", to_expression(rhs_id), ".elements);"); else - statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");"); + statement("spvArrayCopy", tag, "(", lhs, ", ", to_expression(rhs_id), ");"); } return true; @@ -10213,7 +10276,35 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, if (split_index != string::npos) { auto coord = obj_expression.substr(split_index + 1); - exp += join(obj_expression.substr(0, split_index), ".", op, "("); + auto image_expr = obj_expression.substr(0, split_index); + + // Handle problem cases with sign where we need signed min/max on a uint image for example. + // It seems to work to cast the texture type itself, even if it is probably wildly outside of spec, + // but SPIR-V requires this to work. + if ((opcode == OpAtomicUMax || opcode == OpAtomicUMin || + opcode == OpAtomicSMax || opcode == OpAtomicSMin) && + type.basetype != expected_type) + { + auto *backing_var = maybe_get_backing_variable(obj); + if (backing_var) + { + add_spv_func_and_recompile(SPVFuncImplTextureCast); + + const auto *backing_type = &get(backing_var->basetype); + while (backing_type->op != OpTypeImage) + backing_type = &get(backing_type->parent_type); + + auto img_type = *backing_type; + auto tmp_type = type; + tmp_type.basetype = expected_type; + img_type.image.type = ir.increase_bound_by(1); + set(img_type.image.type, tmp_type); + + image_expr = join("spvTextureCast<", type_to_glsl(img_type, obj), ">(", image_expr, ")"); + } + } + + exp += join(image_expr, ".", op, "("); if (ptr_type.storage == StorageClassImage && res_type->image.arrayed) { switch (res_type->image.dim) @@ -10401,19 +10492,54 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, op = get_remapped_glsl_op(op); + auto &restype = get(result_type); + switch (op) { case GLSLstd450Sinh: - emit_unary_func_op(result_type, id, args[0], "fast::sinh"); + if (restype.basetype == SPIRType::Half) + { + // MSL does not have overload for half. Force-cast back to half. + auto expr = join("half(fast::sinh(", to_unpacked_expression(args[0]), "))"); + emit_op(result_type, id, expr, should_forward(args[0])); + inherit_expression_dependencies(id, args[0]); + } + else + emit_unary_func_op(result_type, id, args[0], "fast::sinh"); break; case GLSLstd450Cosh: - emit_unary_func_op(result_type, id, args[0], "fast::cosh"); + if (restype.basetype == SPIRType::Half) + { + // MSL does not have overload for half. Force-cast back to half. + auto expr = join("half(fast::cosh(", to_unpacked_expression(args[0]), "))"); + emit_op(result_type, id, expr, should_forward(args[0])); + inherit_expression_dependencies(id, args[0]); + } + else + emit_unary_func_op(result_type, id, args[0], "fast::cosh"); break; case GLSLstd450Tanh: - emit_unary_func_op(result_type, id, args[0], "precise::tanh"); + if (restype.basetype == SPIRType::Half) + { + // MSL does not have overload for half. Force-cast back to half. + auto expr = join("half(fast::tanh(", to_unpacked_expression(args[0]), "))"); + emit_op(result_type, id, expr, should_forward(args[0])); + inherit_expression_dependencies(id, args[0]); + } + else + emit_unary_func_op(result_type, id, args[0], "precise::tanh"); break; case GLSLstd450Atan2: - emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2"); + if (restype.basetype == SPIRType::Half) + { + // MSL does not have overload for half. Force-cast back to half. + auto expr = join("half(fast::atan2(", to_unpacked_expression(args[0]), ", ", to_unpacked_expression(args[1]), "))"); + emit_op(result_type, id, expr, should_forward(args[0]) && should_forward(args[1])); + inherit_expression_dependencies(id, args[0]); + inherit_expression_dependencies(id, args[1]); + } + else + emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2"); break; case GLSLstd450InverseSqrt: emit_unary_func_op(result_type, id, args[0], "rsqrt"); @@ -10809,7 +10935,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) decl += "thread "; decl += type_to_glsl(type); decl += " (&spvReturnValue)"; - decl += type_to_array_glsl(type); + decl += type_to_array_glsl(type, 0); if (!func.arguments.empty()) decl += ", "; } @@ -10933,8 +11059,7 @@ string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args) is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); } - // Special-case gather. We have to alter the component being looked up - // in the swizzle case. + // Special-case gather. We have to alter the component being looked up in the swizzle case. if (msl_options.swizzle_texture_samples && args.base.is_gather && !is_dynamic_img_sampler && (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable)) { @@ -10943,6 +11068,16 @@ string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args) return is_compare ? "spvGatherCompareSwizzle" : "spvGatherSwizzle"; } + // Special-case gather with an array of offsets. We have to lower into 4 separate gathers. + if (args.has_array_offsets && !is_dynamic_img_sampler && + (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable)) + { + bool is_compare = comparison_ids.count(img); + add_spv_func_and_recompile(is_compare ? SPVFuncImplGatherCompareConstOffsets : SPVFuncImplGatherConstOffsets); + add_spv_func_and_recompile(SPVFuncImplForwardArgs); + return is_compare ? "spvGatherCompareConstOffsets" : "spvGatherConstOffsets"; + } + auto *combined = maybe_get(img); // Texture reference @@ -11123,6 +11258,10 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool farg_str += to_expression(combined ? combined->image : img); } + // Gathers with constant offsets call a special function, so include the texture. + if (args.has_array_offsets) + farg_str += to_expression(img); + // Sampler reference if (!args.base.is_fetch) { @@ -11139,11 +11278,17 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool used_swizzle_buffer = true; } - // Swizzled gather puts the component before the other args, to allow template - // deduction to work. - if (args.component && msl_options.swizzle_texture_samples) + // Const offsets gather puts the const offsets before the other args. + if (args.has_array_offsets) { - forward = should_forward(args.component); + forward = forward && should_forward(args.offset); + farg_str += ", " + to_expression(args.offset); + } + + // Const offsets gather or swizzled gather puts the component before the other args. + if (args.component && (args.has_array_offsets || msl_options.swizzle_texture_samples)) + { + forward = forward && should_forward(args.component); farg_str += ", " + to_component_argument(args.component); } } @@ -11554,7 +11699,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool // Add offsets string offset_expr; const SPIRType *offset_type = nullptr; - if (args.offset && !args.base.is_fetch) + if (args.offset && !args.base.is_fetch && !args.has_array_offsets) { forward = forward && should_forward(args.offset); offset_expr = to_expression(args.offset); @@ -11593,7 +11738,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool } } - if (args.component) + if (args.component && !args.has_array_offsets) { // If 2D has gather component, ensure it also has an offset arg if (imgtype.image.dim == Dim2D && offset_expr.empty()) @@ -12273,7 +12418,7 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_ variable_storage_requires_stage_io(StorageClassInput))); if (is_ib_in_out && is_member_builtin(type, index, &builtin)) is_using_builtin_array = true; - array_type = type_to_array_glsl(physical_type); + array_type = type_to_array_glsl(physical_type, orig_id); } if (orig_id) @@ -12843,14 +12988,14 @@ uint32_t CompilerMSL::get_or_allocate_builtin_output_member_location(spv::BuiltI string CompilerMSL::func_type_decl(SPIRType &type) { // The regular function return type. If not processing the entry point function, that's all we need - string return_type = type_to_glsl(type) + type_to_array_glsl(type); + string return_type = type_to_glsl(type) + type_to_array_glsl(type, 0); if (!processing_entry_point) return return_type; // If an outgoing interface block has been defined, and it should be returned, override the entry point return type bool ep_should_return_output = !get_is_rasterization_disabled(); if (stage_out_var_id && ep_should_return_output) - return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type); + return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type, 0); // Prepend a entry type, based on the execution model string entry_type; @@ -14416,16 +14561,33 @@ void CompilerMSL::fix_up_shader_inputs_outputs() } } else if (var.storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment && - is_builtin_variable(var) && active_output_builtins.get(bi_type) && - bi_type == BuiltInSampleMask && has_additional_fixed_sample_mask()) + is_builtin_variable(var) && active_output_builtins.get(bi_type)) { - // If the additional fixed sample mask was set, we need to adjust the sample_mask - // output to reflect that. If the shader outputs the sample_mask itself too, we need - // to AND the two masks to get the final one. - string op_str = does_shader_write_sample_mask ? " &= " : " = "; - entry_func.fixup_hooks_out.push_back([=]() { - statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";"); - }); + switch (bi_type) + { + case BuiltInSampleMask: + if (has_additional_fixed_sample_mask()) + { + // If the additional fixed sample mask was set, we need to adjust the sample_mask + // output to reflect that. If the shader outputs the sample_mask itself too, we need + // to AND the two masks to get the final one. + string op_str = does_shader_write_sample_mask ? " &= " : " = "; + entry_func.fixup_hooks_out.push_back([=]() { + statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";"); + }); + } + break; + case BuiltInFragDepth: + if (msl_options.input_attachment_is_ds_attachment && !writes_to_depth) + { + entry_func.fixup_hooks_out.push_back([=]() { + statement(to_expression(builtin_frag_depth_id), " = ", to_expression(builtin_frag_coord_id), ".z;"); + }); + } + break; + default: + break; + } } }); } @@ -14717,7 +14879,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } decl += to_expression(name_id); decl += ")"; - decl += type_to_array_glsl(type); + decl += type_to_array_glsl(type, name_id); } else { @@ -14768,7 +14930,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } else { - auto array_size_decl = type_to_array_glsl(type); + auto array_size_decl = type_to_array_glsl(type, name_id); if (array_size_decl.empty()) decl += "& "; else @@ -15508,7 +15670,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) return type_to_glsl(type, id, false); } -string CompilerMSL::type_to_array_glsl(const SPIRType &type) +string CompilerMSL::type_to_array_glsl(const SPIRType &type, uint32_t variable_id) { // Allow Metal to use the array template to make arrays a value type switch (type.basetype) @@ -15516,11 +15678,20 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type) case SPIRType::AtomicCounter: case SPIRType::ControlPointArray: case SPIRType::RayQuery: - return CompilerGLSL::type_to_array_glsl(type); + return CompilerGLSL::type_to_array_glsl(type, variable_id); default: if (type_is_array_of_pointers(type) || using_builtin_array()) - return CompilerGLSL::type_to_array_glsl(type); + { + const SPIRVariable *var = variable_id ? &get(variable_id) : nullptr; + if (var && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer) && + is_array(get_variable_data_type(*var))) + { + return join("[", get_resource_array_size(type, variable_id), "]"); + } + else + return CompilerGLSL::type_to_array_glsl(type, variable_id); + } else return ""; } @@ -15686,8 +15857,8 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb string img_type_name; - // Bypass pointers because we need the real image struct - auto &img_type = get(type.self).image; + auto &img_type = type.image; + if (is_depth_image(type, id)) { switch (img_type.dim) @@ -17925,7 +18096,7 @@ void CompilerMSL::emit_argument_buffer_aliased_descriptor(const SPIRVariable &al is_using_builtin_array = true; bool needs_post_cast_deref = !is_array(data_type); - string ref_type = needs_post_cast_deref ? "&" : join("(&)", type_to_array_glsl(var_type)); + string ref_type = needs_post_cast_deref ? "&" : join("(&)", type_to_array_glsl(var_type, aliased_var.self)); if (is_var_runtime_size_array(aliased_var)) { @@ -18214,9 +18385,9 @@ void CompilerMSL::analyze_argument_buffers() // If needed, synthesize and add padding members. // member_index and next_arg_buff_index are incremented when padding members are added. - if (msl_options.pad_argument_buffer_resources) + if (msl_options.pad_argument_buffer_resources && resource.overlapping_var_id == 0) { - auto &rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index); + auto rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index); while (resource.index > next_arg_buff_index) { switch (rez_bind.basetype) @@ -18252,11 +18423,14 @@ void CompilerMSL::analyze_argument_buffers() default: break; } + + // After padding, retrieve the resource again. It will either be more padding, or the actual resource. + rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index); } // Adjust the number of slots consumed by current member itself. // Use the count value from the app, instead of the shader, in case the - // shader is only accesing part, or even one element, of the array. + // shader is only accessing part, or even one element, of the array. next_arg_buff_index += rez_bind.count; } @@ -18391,7 +18565,7 @@ void CompilerMSL::analyze_argument_buffers() // that matches the resource index of the argument buffer index. // This is a two-step lookup, first lookup the resource binding number from the argument buffer index, // then lookup the resource binding using the binding number. -MSLResourceBinding &CompilerMSL::get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx) +const MSLResourceBinding &CompilerMSL::get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx) const { auto stage = get_entry_point().model; StageSetBinding arg_idx_tuple = { stage, desc_set, arg_idx }; diff --git a/3rdparty/spirv-cross/spirv_msl.hpp b/3rdparty/spirv-cross/spirv_msl.hpp index 41c14e28c..9a1715808 100644 --- a/3rdparty/spirv-cross/spirv_msl.hpp +++ b/3rdparty/spirv-cross/spirv_msl.hpp @@ -287,9 +287,6 @@ static const uint32_t kArgumentBufferBinding = ~(3u); static const uint32_t kMaxArgumentBuffers = 8; -// The arbitrary maximum for the nesting of array of array copies. -static const uint32_t kArrayCopyMultidimMax = 6; - // Decompiles SPIR-V to Metal Shading Language class CompilerMSL : public CompilerGLSL { @@ -519,6 +516,26 @@ public: // transformed. bool agx_manual_cube_grad_fixup = false; + // Metal will discard fragments with side effects under certain circumstances prematurely. + // Example: CTS test dEQP-VK.fragment_operations.early_fragment.discard_no_early_fragment_tests_depth + // Test will render a full screen quad with varying depth [0,1] for each fragment. + // Each fragment will do an operation with side effects, modify the depth value and + // discard the fragment. The test expects the fragment to be run due to: + // https://registry.khronos.org/vulkan/specs/1.0-extensions/html/vkspec.html#fragops-shader-depthreplacement + // which states that the fragment shader must be run due to replacing the depth in shader. + // However, Metal may prematurely discards fragments without executing them + // (I believe this to be due to a greedy optimization on their end) making the test fail. + // This option enforces fragment execution for such cases where the fragment has operations + // with side effects. Provided as an option hoping Metal will fix this issue in the future. + bool force_fragment_with_side_effects_execution = false; + + // If set, adds a depth pass through statement to circumvent the following issue: + // When the same depth/stencil is used as input and depth/stencil attachment, we need to + // force Metal to perform the depth/stencil write after fragment execution. Otherwise, + // Metal will write to the depth attachment before fragment execution. This happens + // if the fragment does not modify the depth value. + bool input_attachment_is_ds_attachment = false; + bool is_ios() const { return platform == iOS; @@ -752,15 +769,8 @@ protected: SPVFuncImplFindSMsb, SPVFuncImplFindUMsb, SPVFuncImplSSign, - SPVFuncImplArrayCopyMultidimBase, - // Unfortunately, we cannot use recursive templates in the MSL compiler properly, - // so stamp out variants up to some arbitrary maximum. - SPVFuncImplArrayCopy = SPVFuncImplArrayCopyMultidimBase + 1, - SPVFuncImplArrayOfArrayCopy2Dim = SPVFuncImplArrayCopyMultidimBase + 2, - SPVFuncImplArrayOfArrayCopy3Dim = SPVFuncImplArrayCopyMultidimBase + 3, - SPVFuncImplArrayOfArrayCopy4Dim = SPVFuncImplArrayCopyMultidimBase + 4, - SPVFuncImplArrayOfArrayCopy5Dim = SPVFuncImplArrayCopyMultidimBase + 5, - SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6, + SPVFuncImplArrayCopy, + SPVFuncImplArrayCopyMultidim, SPVFuncImplTexelBufferCoords, SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations SPVFuncImplGradientCube, @@ -782,6 +792,8 @@ protected: SPVFuncImplTextureSwizzle, SPVFuncImplGatherSwizzle, SPVFuncImplGatherCompareSwizzle, + SPVFuncImplGatherConstOffsets, + SPVFuncImplGatherCompareConstOffsets, SPVFuncImplSubgroupBroadcast, SPVFuncImplSubgroupBroadcastFirst, SPVFuncImplSubgroupBallot, @@ -825,7 +837,8 @@ protected: SPVFuncImplVariableDescriptorArray, SPVFuncImplPaddedStd140, SPVFuncImplReduceAdd, - SPVFuncImplImageFence + SPVFuncImplImageFence, + SPVFuncImplTextureCast }; // If the underlying resource has been used for comparison then duplicate loads of that resource must be too @@ -856,7 +869,7 @@ protected: void emit_block_hints(const SPIRBlock &block) override; // Allow Metal to use the array template to make arrays a value type - std::string type_to_array_glsl(const SPIRType &type) override; + std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id) override; std::string constant_op_expression(const SPIRConstantOp &cop) override; bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override; @@ -960,7 +973,8 @@ protected: uint32_t mbr_idx, InterfaceBlockMeta &meta, const std::string &mbr_name_qual, const std::string &var_chain_qual, - uint32_t &location, uint32_t &var_mbr_idx); + uint32_t &location, uint32_t &var_mbr_idx, + const Bitset &interpolation_qual); void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var); void add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var); @@ -1087,6 +1101,7 @@ protected: uint32_t builtin_stage_input_size_id = 0; uint32_t builtin_local_invocation_index_id = 0; uint32_t builtin_workgroup_size_id = 0; + uint32_t builtin_frag_depth_id = 0; uint32_t swizzle_buffer_id = 0; uint32_t buffer_size_buffer_id = 0; uint32_t view_mask_buffer_id = 0; @@ -1183,6 +1198,7 @@ protected: bool needs_subgroup_size = false; bool needs_sample_id = false; bool needs_helper_invocation = false; + bool writes_to_depth = false; std::string qual_pos_var_name; std::string stage_in_var_name = "in"; std::string stage_out_var_name = "out"; @@ -1231,7 +1247,7 @@ protected: void analyze_argument_buffers(); bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; - MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx); + const MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx) const; void add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); void add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);