From 84557d87eb443be18702c5b1598963fa56b163c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Sun, 11 Oct 2020 20:49:02 -0700 Subject: [PATCH] Updated spirv-cross. --- 3rdparty/spirv-cross/spirv_common.hpp | 1 + 3rdparty/spirv-cross/spirv_cross.cpp | 9 +- 3rdparty/spirv-cross/spirv_cross.hpp | 2 + .../spirv-cross/spirv_cross_containers.hpp | 7 +- .../spirv-cross/spirv_cross_parsed_ir.cpp | 20 + 3rdparty/spirv-cross/spirv_glsl.cpp | 991 ++++++++++++++++-- 3rdparty/spirv-cross/spirv_glsl.hpp | 92 +- 3rdparty/spirv-cross/spirv_hlsl.cpp | 2 +- 3rdparty/spirv-cross/spirv_msl.cpp | 4 +- 9 files changed, 1018 insertions(+), 110 deletions(-) diff --git a/3rdparty/spirv-cross/spirv_common.hpp b/3rdparty/spirv-cross/spirv_common.hpp index 6ff6fa9ac..53df37616 100644 --- a/3rdparty/spirv-cross/spirv_common.hpp +++ b/3rdparty/spirv-cross/spirv_common.hpp @@ -1634,6 +1634,7 @@ struct Meta uint32_t offset = 0; uint32_t xfb_buffer = 0; uint32_t xfb_stride = 0; + uint32_t stream = 0; uint32_t array_stride = 0; uint32_t matrix_stride = 0; uint32_t input_attachment = 0; diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index aa276572b..77e7619fc 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -1717,7 +1717,7 @@ uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const break; case OpSNegate: - value = -eval_u32(spec.arguments[0]); + value = uint32_t(-int32_t(eval_u32(spec.arguments[0]))); break; case OpSelect: @@ -2062,6 +2062,13 @@ bool Compiler::is_tessellation_shader(ExecutionModel model) return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation; } +bool Compiler::is_vertex_like_shader() const +{ + auto model = get_execution_model(); + return model == ExecutionModelVertex || model == ExecutionModelGeometry || + model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation; +} + bool Compiler::is_tessellation_shader() const { return is_tessellation_shader(get_execution_model()); diff --git a/3rdparty/spirv-cross/spirv_cross.hpp b/3rdparty/spirv-cross/spirv_cross.hpp index a502da118..f20ed4269 100644 --- a/3rdparty/spirv-cross/spirv_cross.hpp +++ b/3rdparty/spirv-cross/spirv_cross.hpp @@ -1063,6 +1063,8 @@ protected: uint32_t evaluate_spec_constant_u32(const SPIRConstantOp &spec) const; uint32_t evaluate_constant_u32(uint32_t id) const; + bool is_vertex_like_shader() const; + private: // Used only to implement the old deprecated get_entry_point() interface. const SPIREntryPoint &get_first_entry_point(const std::string &name) const; diff --git a/3rdparty/spirv-cross/spirv_cross_containers.hpp b/3rdparty/spirv-cross/spirv_cross_containers.hpp index 05498f672..892a489b0 100644 --- a/3rdparty/spirv-cross/spirv_cross_containers.hpp +++ b/3rdparty/spirv-cross/spirv_cross_containers.hpp @@ -63,8 +63,7 @@ public: private: #if defined(_MSC_VER) && _MSC_VER < 1900 // MSVC 2013 workarounds, sigh ... - union - { + union { char aligned_char[sizeof(T) * N]; double dummy_aligner; } u; @@ -212,6 +211,10 @@ public: this->buffer_size = count; } + SmallVector(std::initializer_list init) SPIRV_CROSS_NOEXCEPT : SmallVector(init.begin(), init.end()) + { + } + SmallVector(SmallVector &&other) SPIRV_CROSS_NOEXCEPT : SmallVector() { *this = std::move(other); diff --git a/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp b/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp index d7b82fbf6..f409d65fb 100644 --- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp +++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp @@ -388,6 +388,10 @@ void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument) dec.xfb_stride = argument; break; + case DecorationStream: + dec.stream = argument; + break; + case DecorationArrayStride: dec.array_stride = argument; break; @@ -467,6 +471,10 @@ void ParsedIR::set_member_decoration(TypeID id, uint32_t index, Decoration decor dec.xfb_stride = argument; break; + case DecorationStream: + dec.stream = argument; + break; + case DecorationSpecId: dec.spec_id = argument; break; @@ -584,6 +592,8 @@ uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const return dec.xfb_buffer; case DecorationXfbStride: return dec.xfb_stride; + case DecorationStream: + return dec.stream; case DecorationBinding: return dec.binding; case DecorationDescriptorSet: @@ -656,6 +666,10 @@ void ParsedIR::unset_decoration(ID id, Decoration decoration) dec.xfb_stride = 0; break; + case DecorationStream: + dec.stream = 0; + break; + case DecorationBinding: dec.binding = 0; break; @@ -730,6 +744,8 @@ uint32_t ParsedIR::get_member_decoration(TypeID id, uint32_t index, Decoration d return dec.xfb_buffer; case DecorationXfbStride: return dec.xfb_stride; + case DecorationStream: + return dec.stream; case DecorationSpecId: return dec.spec_id; case DecorationIndex: @@ -826,6 +842,10 @@ void ParsedIR::unset_member_decoration(TypeID id, uint32_t index, Decoration dec dec.xfb_stride = 0; break; + case DecorationStream: + dec.stream = 0; + break; + case DecorationSpecId: dec.spec_id = 0; break; diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index b17c0e9f6..130c28848 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -548,6 +548,7 @@ string CompilerGLSL::compile() emit_header(); emit_resources(); + emit_extension_workarounds(get_execution_model()); emit_function(get(ir.default_entry_point), Bitset()); @@ -620,6 +621,21 @@ void CompilerGLSL::build_workgroup_size(SmallVector &arguments, const Sp arguments.push_back(join("local_size_z = ", execution.workgroup_size.z)); } +void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature) +{ + if (options.vulkan_semantics) + { + auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature); + require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension)); + } + else + { + if (!shader_subgroup_supporter.is_feature_requested(feature)) + force_recompile(); + shader_subgroup_supporter.request_feature(feature); + } +} + void CompilerGLSL::emit_header() { auto &execution = get_entry_point(); @@ -722,6 +738,45 @@ void CompilerGLSL::emit_header() statement("#extension ", ext, " : require"); } + if (!options.vulkan_semantics) + { + using Supp = ShaderSubgroupSupportHelper; + auto result = shader_subgroup_supporter.resolve(); + + for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++) + { + auto feature = static_cast(feature_index); + if (!shader_subgroup_supporter.is_feature_requested(feature)) + continue; + + auto exts = Supp::get_candidates_for_feature(feature, result); + if (exts.empty()) + continue; + + statement(""); + + for (auto &ext : exts) + { + const char *name = Supp::get_extension_name(ext); + const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext); + auto extra_names = Supp::get_extra_required_extension_names(ext); + statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")", + (*extra_predicate != '\0' ? " && " : ""), extra_predicate); + for (const auto &e : extra_names) + statement("#extension ", e, " : enable"); + statement("#extension ", name, " : require"); + } + + if (!Supp::can_feature_be_implemented_without_extensions(feature)) + { + statement("#else"); + statement("#error No extensions available to emulate requested subgroup feature."); + } + + statement("#endif"); + } + } + for (auto &header : header_lines) statement(header); @@ -1590,7 +1645,8 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) uint32_t member_count = uint32_t(type.member_types.size()); bool have_xfb_buffer_stride = false; bool have_any_xfb_offset = false; - uint32_t xfb_stride = 0, xfb_buffer = 0; + bool have_geom_stream = false; + uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride)) { @@ -1599,9 +1655,24 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) xfb_stride = get_decoration(var.self, DecorationXfbStride); } + if (flags.get(DecorationStream)) + { + have_geom_stream = true; + geom_stream = get_decoration(var.self, DecorationStream); + } + // Verify that none of the members violate our assumption. for (uint32_t i = 0; i < member_count; i++) { + if (has_member_decoration(type.self, i, DecorationStream)) + { + uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream); + if (have_geom_stream && member_geom_stream != geom_stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = member_geom_stream; + } + // Only members with an Offset decoration participate in XFB. if (!has_member_decoration(type.self, i, DecorationOffset)) continue; @@ -1632,15 +1703,40 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) attr.push_back(join("xfb_stride = ", xfb_stride)); uses_enhanced_layouts = true; } + + if (have_geom_stream) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream))); + } } - else if (var.storage == StorageClassOutput && flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && - flags.get(DecorationOffset)) + else if (var.storage == StorageClassOutput) { - // XFB for standalone variables, we can emit all decorations. - attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer))); - attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride))); - attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset))); - uses_enhanced_layouts = true; + if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && + flags.get(DecorationOffset)) + { + // XFB for standalone variables, we can emit all decorations. + attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer))); + attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride))); + attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset))); + uses_enhanced_layouts = true; + } + + if (flags.get(DecorationStream)) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream))); + } } // Can only declare Component if we can declare location. @@ -2700,8 +2796,9 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo uint32_t clip_distance_size = 0; bool have_xfb_buffer_stride = false; + bool have_geom_stream = false; bool have_any_xfb_offset = false; - uint32_t xfb_stride = 0, xfb_buffer = 0; + uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; std::unordered_map builtin_xfb_offsets; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { @@ -2718,15 +2815,24 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo { builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) - cull_distance_size = this->get(type.member_types[index]).array.front(); + cull_distance_size = to_array_size_literal(this->get(type.member_types[index])); else if (m.builtin_type == BuiltInClipDistance) - clip_distance_size = this->get(type.member_types[index]).array.front(); + clip_distance_size = to_array_size_literal(this->get(type.member_types[index])); if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset)) { have_any_xfb_offset = true; builtin_xfb_offsets[m.builtin_type] = m.offset; } + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) + { + uint32_t stream = m.stream; + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } } index++; } @@ -2744,6 +2850,15 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo xfb_buffer = buffer_index; xfb_stride = stride; } + + if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream)) + { + uint32_t stream = get_decoration(var.self, DecorationStream); + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } } else if (var.storage == storage && !block && is_builtin_variable(var)) { @@ -2753,9 +2868,9 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo { global_builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) - cull_distance_size = type.array.front(); + cull_distance_size = to_array_size_literal(type); else if (m.builtin_type == BuiltInClipDistance) - clip_distance_size = type.array.front(); + clip_distance_size = to_array_size_literal(type); if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) && m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset)) @@ -2772,6 +2887,15 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo xfb_buffer = buffer_index; xfb_stride = stride; } + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) + { + uint32_t stream = get_decoration(var.self, DecorationStream); + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } } } @@ -2801,9 +2925,9 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo if (storage == StorageClassOutput) { + SmallVector attr; if (have_xfb_buffer_stride && have_any_xfb_offset) { - statement("layout(xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride, ") out gl_PerVertex"); if (!options.es) { if (options.version < 440 && options.version >= 140) @@ -2815,7 +2939,22 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo } else if (options.es) SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer."); + attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride)); } + + if (have_geom_stream) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", geom_stream)); + } + + if (!attr.empty()) + statement("layout(", merge(attr), ") out gl_PerVertex"); else statement("out gl_PerVertex"); } @@ -2890,8 +3029,7 @@ void CompilerGLSL::declare_undefined_values() if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) initializer = join(" = ", to_zero_initialized_expression(undef.basetype)); - statement(variable_decl(type, to_name(undef.self), undef.self), initializer, - ";"); + statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";"); emitted = true; }); @@ -3258,6 +3396,410 @@ void CompilerGLSL::emit_resources() declare_undefined_values(); } +void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) +{ + static const char *workaround_types[] = { + "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4", + "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" + }; + + if (!options.vulkan_semantics) + { + using Supp = ShaderSubgroupSupportHelper; + auto result = shader_subgroup_supporter.resolve(); + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupSize gl_WarpSizeNV"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupSize gl_SubGroupSizeARB"); + break; + case Supp::AMD_gcn_shader: + statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupID gl_WarpIDNV"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups)) + { + auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_NumSubgroups gl_WarpsPerSMNV"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBrodcast_First)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBrodcast_First, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_shuffle: + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcastFirst(", t, + " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }"); + } + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcast(", t, + " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }"); + } + break; + case Supp::ARB_shader_ballot: + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcastFirst(", t, + " value) { return readFirstInvocationARB(value); }"); + } + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcast(", t, + " value, uint id) { return readInvocationARB(value, id); }"); + } + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }"); + statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }"); + break; + default: + break; + } + } + statement("#else"); + statement("uint subgroupBallotFindLSB(uvec4 value)"); + begin_scope(); + statement("int firstLive = findLSB(value.x);"); + statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));"); + end_scope(); + statement("uint subgroupBallotFindMSB(uvec4 value)"); + begin_scope(); + statement("int firstLive = findMSB(value.y);"); + statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_gpu_shader_5: + statement("bool subgroupAll(bool value) { return allThreadsNV(value); }"); + statement("bool subgroupAny(bool value) { return anyThreadNV(value); }"); + statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }"); + break; + case Supp::ARB_shader_group_vote: + statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }"); + statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }"); + statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }"); + break; + case Supp::AMD_gcn_shader: + statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }"); + statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }"); + statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || " + "b == ballotAMD(true); }"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT)) + { + statement("#ifndef GL_KHR_shader_subgroup_vote"); + statement( + "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return " + "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }"); + for (const char *t : workaround_types) + statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")"); + statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND"); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }"); + break; + case Supp::ARB_shader_ballot: + statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect)) + { + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("bool subgroupElect()"); + begin_scope(); + statement("uvec4 activeMask = subgroupBallot(true);"); + statement("uint firstLive = subgroupBallotFindLSB(activeMask);"); + statement("return gl_SubgroupInvocationID == firstLive;"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier)) + { + // Extensions we're using in place of GL_KHR_shader_subgroup_basic state + // that subgroup execute in lockstep so this barrier is implicit. + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupBarrier() { /*NOOP*/ }"); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier)) + { + if (model == spv::ExecutionModelGLCompute) + { + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }"); + statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }"); + statement("void subgroupMemoryBarrierShared() { groupMemoryBarrier(); }"); + statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }"); + statement("#endif"); + } + else + { + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupMemoryBarrier() { memoryBarrier(); }"); + statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }"); + statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }"); + statement("#endif"); + } + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout)) + { + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("bool subgroupInverseBallot(uvec4 value)"); + begin_scope(); + statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));"); + end_scope(); + + statement("uint subgroupBallotInclusiveBitCount(uvec4 value)"); + begin_scope(); + statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;"); + statement("ivec2 c = bitCount(v);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); + + statement("uint subgroupBallotExclusiveBitCount(uvec4 value)"); + begin_scope(); + statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;"); + statement("ivec2 c = bitCount(v);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount)) + { + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("uint subgroupBallotBitCount(uvec4 value)"); + begin_scope(); + statement("ivec2 c = bitCount(value.xy);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract)) + { + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("bool subgroupBallotBitExtract(uvec4 value, uint index)"); + begin_scope(); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("uint shifted = value.x >> index;"); + statement_no_indent("#else"); + statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);"); + statement_no_indent("#endif"); + statement("return (shifted & 1u) != 0u;"); + end_scope(); + statement("#endif"); + statement(""); + } + } +} + // Returns a string representation of the ID, usable as a function arg. // Default is to simply return the expression representation fo the arg ID. // Subclasses may override to modify the return value. @@ -4365,8 +4907,8 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t // Fake unsigned constant literals with signed ones if possible. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. if (c.scalar_i32(vector, i) < 0) - SPIRV_CROSS_THROW( - "Tried to convert uint literal into int, but this made the literal negative."); + SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made " + "the literal negative."); } else if (backend.uint32_t_literal_suffix) res += "u"; @@ -5395,9 +5937,9 @@ string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID s return to_expression(itr->id) + array_expr; else { - SPIRV_CROSS_THROW( - "Cannot find mapping for combined sampler parameter, was build_combined_image_samplers() used " - "before compile() was called?"); + SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was " + "build_combined_image_samplers() used " + "before compile() was called?"); } } else @@ -5418,6 +5960,30 @@ string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID s } } +bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op) +{ + switch (op) + { + case OpGroupNonUniformElect: + case OpGroupNonUniformBallot: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + case OpControlBarrier: + case OpMemoryBarrier: + case OpGroupNonUniformBallotBitCount: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformInverseBallot: + return true; + default: + return false; + } +} + void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) { if (options.vulkan_semantics && combined_image_samplers.empty()) @@ -5829,8 +6395,8 @@ string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args) { if (!expression_is_constant_null(args.lod)) { - SPIRV_CROSS_THROW( - "textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be expressed in GLSL."); + SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be " + "expressed in GLSL."); } workaround_lod_array_shadow_as_grad = true; } @@ -5898,8 +6464,8 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) else { if (!dummy_sampler_id) - SPIRV_CROSS_THROW( - "Cannot find dummy sampler ID. Was build_dummy_sampler_for_combined_images() called?"); + SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was " + "build_dummy_sampler_for_combined_images() called?"); return to_combined_image_sampler(id, dummy_sampler_id); } @@ -6673,8 +7239,8 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i) const uint32_t *ops = stream(i); auto op = static_cast(i.op); - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics."); + if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op)) + SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics."); // If we need to do implicit bitcasts, make sure we do it with the correct type. uint32_t integer_width = get_integer_width_for_instruction(i); @@ -6684,18 +7250,39 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i) switch (op) { case OpGroupNonUniformElect: - require_extension_internal("GL_KHR_shader_subgroup_basic"); + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect); + break; + + case OpGroupNonUniformBallotBitCount: + { + const GroupOperation operation = static_cast(ops[3]); + if (operation == GroupOperationReduce) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount); + else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); + } + break; + + case OpGroupNonUniformBallotBitExtract: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract); + break; + + case OpGroupNonUniformInverseBallot: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); + break; + + case OpGroupNonUniformBallot: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot); + break; + + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB); break; case OpGroupNonUniformBroadcast: case OpGroupNonUniformBroadcastFirst: - case OpGroupNonUniformBallot: - case OpGroupNonUniformInverseBallot: - case OpGroupNonUniformBallotBitExtract: - case OpGroupNonUniformBallotBitCount: - case OpGroupNonUniformBallotFindLSB: - case OpGroupNonUniformBallotFindMSB: - require_extension_internal("GL_KHR_shader_subgroup_ballot"); + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBrodcast_First); break; case OpGroupNonUniformShuffle: @@ -6711,8 +7298,14 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i) case OpGroupNonUniformAll: case OpGroupNonUniformAny: case OpGroupNonUniformAllEqual: - require_extension_internal("GL_KHR_shader_subgroup_vote"); - break; + { + const SPIRType &type = expression_type(ops[3]); + if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool); + else + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT); + } + break; case OpGroupNonUniformFAdd: case OpGroupNonUniformFMul: @@ -7070,8 +7663,8 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) return "gl_CullDistance"; case BuiltInVertexId: if (options.vulkan_semantics) - SPIRV_CROSS_THROW( - "Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics."); + SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created " + "with GL semantics."); return "gl_VertexID"; case BuiltInInstanceId: if (options.vulkan_semantics) @@ -7086,8 +7679,8 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) break; default: - SPIRV_CROSS_THROW( - "Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics."); + SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was " + "created with GL semantics."); } } if (!options.es && options.version < 140) @@ -7261,57 +7854,39 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) } case BuiltInNumSubgroups: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_basic"); + request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups); return "gl_NumSubgroups"; case BuiltInSubgroupId: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_basic"); + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID); return "gl_SubgroupID"; case BuiltInSubgroupSize: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_basic"); + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize); return "gl_SubgroupSize"; case BuiltInSubgroupLocalInvocationId: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_basic"); + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID); return "gl_SubgroupInvocationID"; case BuiltInSubgroupEqMask: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_ballot"); + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); return "gl_SubgroupEqMask"; case BuiltInSubgroupGeMask: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_ballot"); + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); return "gl_SubgroupGeMask"; case BuiltInSubgroupGtMask: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_ballot"); + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); return "gl_SubgroupGtMask"; case BuiltInSubgroupLeMask: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_ballot"); + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); return "gl_SubgroupLeMask"; case BuiltInSubgroupLtMask: - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); - require_extension_internal("GL_KHR_shader_subgroup_ballot"); + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); return "gl_SubgroupLtMask"; case BuiltInLaunchIdNV: @@ -8108,10 +8683,11 @@ std::pair CompilerGLSL::flattened_access_chain_offset( // Dynamic array access. if (array_stride % word_stride) { - SPIRV_CROSS_THROW( - "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. " - "Likely culprit here is a float or vec2 array inside a push constant block which is std430. " - "This cannot be flattened. Try using std140 layout instead."); + SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " + "of a 4-component vector. " + "Likely culprit here is a float or vec2 array inside a push " + "constant block which is std430. " + "This cannot be flattened. Try using std140 layout instead."); } expr += to_enclosed_expression(index); @@ -8134,10 +8710,11 @@ std::pair CompilerGLSL::flattened_access_chain_offset( // Dynamic array access. if (array_stride % word_stride) { - SPIRV_CROSS_THROW( - "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. " - "Likely culprit here is a float or vec2 array inside a push constant block which is std430. " - "This cannot be flattened. Try using std140 layout instead."); + SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " + "of a 4-component vector. " + "Likely culprit here is a float or vec2 array inside a push " + "constant block which is std430. " + "This cannot be flattened. Try using std140 layout instead."); } expr += to_enclosed_expression(index, false); @@ -8193,10 +8770,10 @@ std::pair CompilerGLSL::flattened_access_chain_offset( // Dynamic array access. if (indexing_stride % word_stride) { - SPIRV_CROSS_THROW( - "Matrix stride for dynamic indexing must be divisible by the size of a 4-component vector. " - "Likely culprit here is a row-major matrix being accessed dynamically. " - "This cannot be flattened. Try using std140 layout instead."); + SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a " + "4-component vector. " + "Likely culprit here is a row-major matrix being accessed dynamically. " + "This cannot be flattened. Try using std140 layout instead."); } expr += to_enclosed_expression(index, false); @@ -8223,9 +8800,9 @@ std::pair CompilerGLSL::flattened_access_chain_offset( // Dynamic array access. if (indexing_stride % word_stride) { - SPIRV_CROSS_THROW( - "Stride for dynamic vector indexing must be divisible by the size of a 4-component vector. " - "This cannot be flattened in legacy targets."); + SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the " + "size of a 4-component vector. " + "This cannot be flattened in legacy targets."); } expr += to_enclosed_expression(index, false); @@ -9852,7 +10429,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto &type = get(ops[0]); if (type.vecsize > 1) - GLSL_UFOP(not ); + GLSL_UFOP(not); else GLSL_UOP(!); break; @@ -10587,8 +11164,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) - SPIRV_CROSS_THROW( - "Multisampled image used in OpImageRead, but unexpected operand mask was used."); + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); uint32_t samples = ops[5]; imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")"); @@ -10602,8 +11179,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) - SPIRV_CROSS_THROW( - "Multisampled image used in OpImageRead, but unexpected operand mask was used."); + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); uint32_t samples = ops[5]; imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", @@ -10639,8 +11216,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) - SPIRV_CROSS_THROW( - "Multisampled image used in OpImageRead, but unexpected operand mask was used."); + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); uint32_t samples = ops[5]; statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ", @@ -10660,8 +11237,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) - SPIRV_CROSS_THROW( - "Multisampled image used in OpImageRead, but unexpected operand mask was used."); + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); uint32_t samples = ops[5]; imgexpr = @@ -10817,9 +11394,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup) { - if (!options.vulkan_semantics) - SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics."); - require_extension_internal("GL_KHR_shader_subgroup_basic"); + // OpControlBarrier with ScopeSubgroup is subgroupBarrier() + if (opcode != OpControlBarrier) + { + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier); + } + else + { + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier); + } } if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl) @@ -11979,7 +12562,8 @@ string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) if (backend.use_array_constructor && type.array.size() > 1) { if (options.flatten_multidimensional_arrays) - SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, e.g. float[][]()."); + SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, " + "e.g. float[][]()."); else if (!options.es && options.version < 430) require_extension_internal("GL_ARB_arrays_of_arrays"); else if (options.es && options.version < 310) @@ -12530,8 +13114,7 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) void CompilerGLSL::emit_fixup() { - auto &execution = get_entry_point(); - if (execution.model == ExecutionModelVertex) + if (is_vertex_like_shader()) { if (options.vertex.fixup_clipspace) { @@ -14276,7 +14859,8 @@ void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs() if (!subpass_var) continue; if (!output_var) - SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able to read from it."); + SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able " + "to read from it."); if (is_array(get(output_var->basetype))) SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs."); @@ -14301,3 +14885,212 @@ bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const { return image_is_comparison(get(get(id).basetype), id); } + +const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c) +{ + static const char * const retval[CandidateCount] = { + "GL_KHR_shader_subgroup_ballot", + "GL_KHR_shader_subgroup_basic", + "GL_KHR_shader_subgroup_vote", + "GL_NV_gpu_shader_5", + "GL_NV_shader_thread_group", + "GL_NV_shader_thread_shuffle", + "GL_ARB_shader_ballot", + "GL_ARB_shader_group_vote", + "GL_AMD_gcn_shader" + }; + return retval[c]; +} + +SmallVector CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c) +{ + switch (c) + { + case ARB_shader_ballot: + return { "GL_ARB_shader_int64" }; + case AMD_gcn_shader: + return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" }; + default: + return {}; + } +} + +const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c) +{ + switch (c) + { + case ARB_shader_ballot: + return "defined(GL_ARB_shader_int64)"; + case AMD_gcn_shader: + return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))"; + default: + return ""; + } +} + +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector +CompilerGLSL::ShaderSubgroupSupportHelper::get_feature_dependencies(Feature feature) +{ + switch (feature) + { + case SubgroupAllEqualT: + return { SubgroupBrodcast_First, SubgroupAll_Any_AllEqualBool }; + case SubgroupElect: + return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID }; + case SubgroupInverseBallot_InclBitCount_ExclBitCout: + return { SubgroupMask }; + case SubgroupBallotBitCount: + return { SubgroupBallot }; + default: + return {}; + } +} + +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask +CompilerGLSL::ShaderSubgroupSupportHelper::get_feature_dependency_mask(Feature feature) +{ + return build_mask(get_feature_dependencies(feature)); +} + +bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature) +{ + static const bool retval[FeatureCount] = { + false, false, false, false, false, false, + true, // SubgroupBalloFindLSB_MSB + false, false, false, false, + true, // SubgroupMemBarrier - replaced with workgroup memory barriers + false, false, true, false + }; + + return retval[feature]; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::Candidate +CompilerGLSL::ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(Feature feature) +{ + static const Candidate extensions[FeatureCount] = { + KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, + KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, + KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, + KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot + }; + + return extensions[feature]; +} + +void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature) +{ + feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature); +} + +bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const +{ + return (feature_mask & (1u << feature)) != 0; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::Result +CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const +{ + Result res; + + for (uint32_t i = 0u; i < FeatureCount; ++i) + { + if (feature_mask & (1u << i)) + { + auto feature = static_cast(i); + std::unordered_set unique_candidates; + + auto candidates = get_candidates_for_feature(feature); + unique_candidates.insert(candidates.begin(), candidates.end()); + + auto deps = get_feature_dependencies(feature); + for (Feature d : deps) + { + candidates = get_candidates_for_feature(d); + if (!candidates.empty()) + unique_candidates.insert(candidates.begin(), candidates.end()); + } + + for (uint32_t c : unique_candidates) + ++res.weights[static_cast(c)]; + } + } + + return res; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector +CompilerGLSL::ShaderSubgroupSupportHelper::get_candidates_for_feature(Feature ft, const Result &r) +{ + auto c = get_candidates_for_feature(ft); + auto cmp = [&r](Candidate a, Candidate b) { + if (r.weights[a] == r.weights[b]) + return a < b; // Prefer candidates with lower enum value + return r.weights[a] > r.weights[b]; + }; + std::sort(c.begin(), c.end(), cmp); + return c; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector +CompilerGLSL::ShaderSubgroupSupportHelper::get_candidates_for_feature(Feature feature) +{ + switch (feature) + { + case SubgroupMask: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupSize: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot }; + case SubgroupInvocationID: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupID: + return { KHR_shader_subgroup_basic, NV_shader_thread_group }; + case NumSubgroups: + return { KHR_shader_subgroup_basic, NV_shader_thread_group }; + case SubgroupBrodcast_First: + return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot }; + case SubgroupBallotFindLSB_MSB: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group }; + case SubgroupAll_Any_AllEqualBool: + return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader }; + case SubgroupAllEqualT: + return {}; // depends on other features only + case SubgroupElect: + return {}; // depends on other features only + case SubgroupBallot: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupBarrier: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader }; + case SubgroupMemBarrier: + return { KHR_shader_subgroup_basic }; + case SubgroupInverseBallot_InclBitCount_ExclBitCout: + return {}; + case SubgroupBallotBitExtract: + return { NV_shader_thread_group }; + case SubgroupBallotBitCount: + return {}; + default: + return {}; + } +} + +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask +CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(const SmallVector &features) +{ + FeatureMask mask = 0; + for (Feature f : features) + mask |= FeatureMask(1) << f; + return mask; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result() +{ + for (auto &weight : weights) + weight = 0; + + // Make sure KHR_shader_subgroup extensions are always prefered. + const uint32_t big_num = FeatureCount; + weights[KHR_shader_subgroup_ballot] = big_num; + weights[KHR_shader_subgroup_basic] = big_num; + weights[KHR_shader_subgroup_vote] = big_num; +} diff --git a/3rdparty/spirv-cross/spirv_glsl.hpp b/3rdparty/spirv-cross/spirv_glsl.hpp index 79f6582c3..33625f381 100644 --- a/3rdparty/spirv-cross/spirv_glsl.hpp +++ b/3rdparty/spirv-cross/spirv_glsl.hpp @@ -135,12 +135,14 @@ public: struct VertexOptions { - // GLSL: In vertex shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style). - // MSL: In vertex shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. - // HLSL: In vertex shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. + // "Vertex-like shader" here is any shader stage that can write BuiltInPosition. + + // GLSL: In vertex-like shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style). + // MSL: In vertex-like shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. + // HLSL: In vertex-like shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. bool fixup_clipspace = false; - // Inverts gl_Position.y or equivalent. + // In vertex-like shaders, inverts gl_Position.y or equivalent. bool flip_vert_y = false; // GLSL only, for HLSL version of this option, see CompilerHLSL. @@ -241,7 +243,84 @@ public: // - Images which are statically used at least once with Dref opcodes. bool variable_is_depth_or_compare(VariableID id) const; + protected: + struct ShaderSubgroupSupportHelper + { + // lower enum value = greater priority + enum Candidate + { + KHR_shader_subgroup_ballot, + KHR_shader_subgroup_basic, + KHR_shader_subgroup_vote, + NV_gpu_shader_5, + NV_shader_thread_group, + NV_shader_thread_shuffle, + ARB_shader_ballot, + ARB_shader_group_vote, + AMD_gcn_shader, + + CandidateCount + }; + + static const char *get_extension_name(Candidate c); + static SmallVector get_extra_required_extension_names(Candidate c); + static const char *get_extra_required_extension_predicate(Candidate c); + + enum Feature + { + SubgroupMask, + SubgroupSize, + SubgroupInvocationID, + SubgroupID, + NumSubgroups, + SubgroupBrodcast_First, + SubgroupBallotFindLSB_MSB, + SubgroupAll_Any_AllEqualBool, + SubgroupAllEqualT, + SubgroupElect, + SubgroupBarrier, + SubgroupMemBarrier, + SubgroupBallot, + SubgroupInverseBallot_InclBitCount_ExclBitCout, + SubgroupBallotBitExtract, + SubgroupBallotBitCount, + + FeatureCount + }; + + using FeatureMask = uint32_t; + static_assert(sizeof(FeatureMask) * 8u >= FeatureCount, "Mask type needs more bits."); + + using CandidateVector = SmallVector; + using FeatureVector = SmallVector; + + static FeatureVector get_feature_dependencies(Feature feature); + static FeatureMask get_feature_dependency_mask(Feature feature); + static bool can_feature_be_implemented_without_extensions(Feature feature); + static Candidate get_KHR_extension_for_feature(Feature feature); + + struct Result + { + Result(); + uint32_t weights[CandidateCount]; + }; + + void request_feature(Feature feature); + bool is_feature_requested(Feature feature) const; + Result resolve() const; + + static CandidateVector get_candidates_for_feature(Feature ft, const Result &r); + + private: + static CandidateVector get_candidates_for_feature(Feature ft); + static FeatureMask build_mask(const SmallVector &features); + FeatureMask feature_mask = 0; + }; + + // TODO remove this function when all subgroup ops are supported (or make it always return true) + static bool is_supported_subgroup_op_in_opengl(spv::Op op); + void reset(); void emit_function(SPIRFunction &func, const Bitset &return_flags); @@ -272,6 +351,8 @@ protected: void build_workgroup_size(SmallVector &arguments, const SpecializationConstant &x, const SpecializationConstant &y, const SpecializationConstant &z); + void request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature); + virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id); virtual void emit_texture_op(const Instruction &i, bool sparse); virtual std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, @@ -483,6 +564,7 @@ protected: void emit_struct(SPIRType &type); void emit_resources(); + void emit_extension_workarounds(spv::ExecutionModel model); void emit_buffer_block_native(const SPIRVariable &var); void emit_buffer_reference_block(SPIRType &type, bool forward_declaration); void emit_buffer_block_legacy(const SPIRVariable &var); @@ -680,6 +762,8 @@ protected: std::unordered_set flattened_buffer_blocks; std::unordered_map flattened_structs; + ShaderSubgroupSupportHelper shader_subgroup_supporter; + std::string load_flattened_struct(const std::string &basename, const SPIRType &type); std::string to_flattened_struct_member(const std::string &basename, const SPIRType &type, uint32_t index); void store_flattened_struct(uint32_t lhs_id, uint32_t value); diff --git a/3rdparty/spirv-cross/spirv_hlsl.cpp b/3rdparty/spirv-cross/spirv_hlsl.cpp index 070a83e56..b7cdef739 100644 --- a/3rdparty/spirv-cross/spirv_hlsl.cpp +++ b/3rdparty/spirv-cross/spirv_hlsl.cpp @@ -2649,7 +2649,7 @@ void CompilerHLSL::emit_hlsl_entry_point() void CompilerHLSL::emit_fixup() { - if (get_entry_point().model == ExecutionModelVertex) + if (is_vertex_like_shader()) { // Do various mangling on the gl_Position. if (hlsl_options.shader_model <= 30) diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index 0aff6e7f7..cb6050b52 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -9028,9 +9028,7 @@ string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp // Called automatically at the end of the entry point function void CompilerMSL::emit_fixup() { - if ((get_execution_model() == ExecutionModelVertex || - get_execution_model() == ExecutionModelTessellationEvaluation) && - stage_out_var_id && !qual_pos_var_name.empty() && !capture_output_to_buffer) + if (is_vertex_like_shader() && stage_out_var_id && !qual_pos_var_name.empty() && !capture_output_to_buffer) { if (options.vertex.fixup_clipspace) statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name,