mirror of https://github.com/bkaradzic/bgfx
Updated spirv-cross.
This commit is contained in:
parent
39d1f8c32c
commit
b7fb619125
|
@ -565,6 +565,7 @@ struct CLIArguments
|
|||
bool msl_arrayed_subpass_input = false;
|
||||
uint32_t msl_r32ui_linear_texture_alignment = 4;
|
||||
uint32_t msl_r32ui_alignment_constant_id = 65535;
|
||||
bool msl_texture_1d_as_2d = false;
|
||||
bool glsl_emit_push_constant_as_ubo = false;
|
||||
bool glsl_emit_ubo_as_plain_uniforms = false;
|
||||
bool glsl_force_flattened_io_blocks = false;
|
||||
|
@ -728,7 +729,7 @@ static void print_help_msl()
|
|||
"\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n"
|
||||
"\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n"
|
||||
"\t\tEmits [[color(N)]] inputs in fragment stage.\n"
|
||||
"\t\tRequires iOS Metal.\n"
|
||||
"\t\tRequires an Apple GPU.\n"
|
||||
"\t[--msl-emulate-cube-array]:\n\t\tEmulate cube arrays with 2D array and manual math.\n"
|
||||
"\t[--msl-discrete-descriptor-set <index>]:\n\t\tWhen using argument buffers, forces a specific descriptor set to be implemented without argument buffers.\n"
|
||||
"\t\tUseful for implementing push descriptors in emulation layers.\n"
|
||||
|
@ -774,7 +775,9 @@ static void print_help_msl()
|
|||
"\t[--msl-r32ui-linear-texture-align <alignment>]:\n\t\tThe required alignment of linear textures of format MTLPixelFormatR32Uint.\n"
|
||||
"\t\tThis is used to align the row stride for atomic accesses to such images.\n"
|
||||
"\t[--msl-r32ui-linear-texture-align-constant-id <id>]:\n\t\tThe function constant ID to use for the linear texture alignment.\n"
|
||||
"\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n");
|
||||
"\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n"
|
||||
"\t[--msl-texture-1d-as-2d]:\n\t\tEmit Image variables of dimension Dim1D as texture2d.\n"
|
||||
"\t\tIn Metal, 1D textures do not support all features that 2D textures do. Use this option if your code relies on these features.\n");
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
|
@ -991,9 +994,9 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
|
|||
if (args.msl_ios)
|
||||
{
|
||||
msl_opts.platform = CompilerMSL::Options::iOS;
|
||||
msl_opts.ios_use_framebuffer_fetch_subpasses = args.msl_framebuffer_fetch;
|
||||
msl_opts.emulate_cube_array = args.msl_emulate_cube_array;
|
||||
}
|
||||
msl_opts.use_framebuffer_fetch_subpasses = args.msl_framebuffer_fetch;
|
||||
msl_opts.pad_fragment_output_components = args.msl_pad_fragment_output;
|
||||
msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left;
|
||||
msl_opts.argument_buffers = args.msl_argument_buffers;
|
||||
|
@ -1015,6 +1018,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
|
|||
msl_opts.arrayed_subpass_input = args.msl_arrayed_subpass_input;
|
||||
msl_opts.r32ui_linear_texture_alignment = args.msl_r32ui_linear_texture_alignment;
|
||||
msl_opts.r32ui_alignment_constant_id = args.msl_r32ui_alignment_constant_id;
|
||||
msl_opts.texture_1D_as_2D = args.msl_texture_1d_as_2d;
|
||||
msl_comp->set_msl_options(msl_opts);
|
||||
for (auto &v : args.msl_discrete_descriptor_sets)
|
||||
msl_comp->add_discrete_descriptor_set(v);
|
||||
|
@ -1439,6 +1443,7 @@ static int main_inner(int argc, char *argv[])
|
|||
[&args](CLIParser &parser) { args.msl_r32ui_linear_texture_alignment = parser.next_uint(); });
|
||||
cbs.add("--msl-r32ui-linear-texture-align-constant-id",
|
||||
[&args](CLIParser &parser) { args.msl_r32ui_alignment_constant_id = parser.next_uint(); });
|
||||
cbs.add("--msl-texture-1d-as-2d", [&args](CLIParser &) { args.msl_texture_1d_as_2d = true; });
|
||||
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
|
||||
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
|
||||
auto old_name = parser.next_string();
|
||||
|
|
|
@ -357,28 +357,6 @@ public:
|
|||
return TypedID<U>(*this);
|
||||
}
|
||||
|
||||
bool operator==(const TypedID &other) const
|
||||
{
|
||||
return id == other.id;
|
||||
}
|
||||
|
||||
bool operator!=(const TypedID &other) const
|
||||
{
|
||||
return id != other.id;
|
||||
}
|
||||
|
||||
template <Types type>
|
||||
bool operator==(const TypedID<type> &other) const
|
||||
{
|
||||
return id == uint32_t(other);
|
||||
}
|
||||
|
||||
template <Types type>
|
||||
bool operator!=(const TypedID<type> &other) const
|
||||
{
|
||||
return id != uint32_t(other);
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t id = 0;
|
||||
};
|
||||
|
@ -403,26 +381,6 @@ public:
|
|||
return id;
|
||||
}
|
||||
|
||||
bool operator==(const TypedID &other) const
|
||||
{
|
||||
return id == other.id;
|
||||
}
|
||||
|
||||
bool operator!=(const TypedID &other) const
|
||||
{
|
||||
return id != other.id;
|
||||
}
|
||||
|
||||
bool operator==(const TypedID<TypeNone> &other) const
|
||||
{
|
||||
return id == uint32_t(other);
|
||||
}
|
||||
|
||||
bool operator!=(const TypedID<TypeNone> &other) const
|
||||
{
|
||||
return id != uint32_t(other);
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t id = 0;
|
||||
};
|
||||
|
|
|
@ -599,8 +599,8 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
|
|||
options->msl.enable_base_index_zero = value != 0;
|
||||
break;
|
||||
|
||||
case SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS:
|
||||
options->msl.ios_use_framebuffer_fetch_subpasses = value != 0;
|
||||
case SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS:
|
||||
options->msl.use_framebuffer_fetch_subpasses = value != 0;
|
||||
break;
|
||||
|
||||
case SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH:
|
||||
|
|
|
@ -606,7 +606,11 @@ typedef enum spvc_compiler_option
|
|||
SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT,
|
||||
SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D = 44 | SPVC_COMPILER_OPTION_MSL_BIT,
|
||||
SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO = 45 | SPVC_COMPILER_OPTION_MSL_BIT,
|
||||
|
||||
/* Obsolete. Use MSL_FRAMEBUFFER_FETCH_SUBPASS instead. */
|
||||
SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT,
|
||||
SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT,
|
||||
|
||||
SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH = 47 | SPVC_COMPILER_OPTION_MSL_BIT,
|
||||
SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY = 48 | SPVC_COMPILER_OPTION_MSL_BIT,
|
||||
SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING = 49 | SPVC_COMPILER_OPTION_MSL_BIT,
|
||||
|
|
|
@ -511,6 +511,7 @@ string CompilerGLSL::compile()
|
|||
{
|
||||
// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
|
||||
backend.nonuniform_qualifier = "";
|
||||
backend.needs_row_major_load_workaround = true;
|
||||
}
|
||||
backend.force_gl_in_out_block = true;
|
||||
backend.supports_extensions = true;
|
||||
|
@ -3798,6 +3799,17 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
|
|||
statement("");
|
||||
}
|
||||
}
|
||||
|
||||
if (!workaround_ubo_load_overload_types.empty())
|
||||
{
|
||||
for (auto &type_id : workaround_ubo_load_overload_types)
|
||||
{
|
||||
auto &type = get<SPIRType>(type_id);
|
||||
statement(type_to_glsl(type), " SPIRV_Cross_workaround_load_row_major(", type_to_glsl(type),
|
||||
" wrap) { return wrap; }");
|
||||
}
|
||||
statement("");
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a string representation of the ID, usable as a function arg.
|
||||
|
@ -9496,11 +9508,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
if (forward && ptr_expression)
|
||||
ptr_expression->need_transpose = old_need_transpose;
|
||||
|
||||
bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
|
||||
|
||||
if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
|
||||
rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
|
||||
|
||||
// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
|
||||
// However, if we try to load a complex, composite object from a flattened buffer,
|
||||
// we should avoid emitting the same code over and over and lower the result to a temporary.
|
||||
bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 &&
|
||||
(type.basetype == SPIRType::Struct || (type.columns > 1));
|
||||
bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
|
||||
|
||||
SPIRExpression *e = nullptr;
|
||||
if (!forward && expression_is_non_value_type_array(ptr))
|
||||
|
@ -13253,8 +13269,14 @@ void CompilerGLSL::branch(BlockID from, BlockID to)
|
|||
// and end the chain here.
|
||||
statement("continue;");
|
||||
}
|
||||
else if (is_break(to))
|
||||
else if (from != to && is_break(to))
|
||||
{
|
||||
// We cannot break to ourselves, so check explicitly for from != to.
|
||||
// This case can trigger if a loop header is all three of these things:
|
||||
// - Continue block
|
||||
// - Loop header
|
||||
// - Break merge target all at once ...
|
||||
|
||||
// Very dirty workaround.
|
||||
// Switch constructs are able to break, but they cannot break out of a loop at the same time.
|
||||
// Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
|
||||
|
@ -14578,7 +14600,35 @@ void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::str
|
|||
// so we might have to fixup the OpLoad-ed expression late.
|
||||
|
||||
auto start_array_index = expr.find_first_of('[');
|
||||
auto end_array_index = expr.find_last_of(']');
|
||||
|
||||
if (start_array_index == string::npos)
|
||||
return;
|
||||
|
||||
// Check for the edge case that a non-arrayed resource was marked to be nonuniform,
|
||||
// and the bracket we found is actually part of non-resource related data.
|
||||
if (expr.find_first_of(',') < start_array_index)
|
||||
return;
|
||||
|
||||
// We've opened a bracket, track expressions until we can close the bracket.
|
||||
// This must be our image index.
|
||||
size_t end_array_index = string::npos;
|
||||
unsigned bracket_count = 1;
|
||||
for (size_t index = start_array_index + 1; index < expr.size(); index++)
|
||||
{
|
||||
if (expr[index] == ']')
|
||||
{
|
||||
if (--bracket_count == 0)
|
||||
{
|
||||
end_array_index = index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (expr[index] == '[')
|
||||
bracket_count++;
|
||||
}
|
||||
|
||||
assert(bracket_count == 0);
|
||||
|
||||
// Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
|
||||
// nothing we can do here to express that.
|
||||
if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
|
||||
|
@ -15087,3 +15137,63 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
|
|||
weights[KHR_shader_subgroup_basic] = big_num;
|
||||
weights[KHR_shader_subgroup_vote] = big_num;
|
||||
}
|
||||
|
||||
void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
|
||||
{
|
||||
// Must be ordered to maintain deterministic output, so vector is appropriate.
|
||||
if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
|
||||
end(workaround_ubo_load_overload_types))
|
||||
{
|
||||
force_recompile();
|
||||
workaround_ubo_load_overload_types.push_back(id);
|
||||
}
|
||||
}
|
||||
|
||||
void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
|
||||
{
|
||||
// Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
|
||||
// To load these types correctly, we must first wrap them in a dummy function which only purpose is to
|
||||
// ensure row_major decoration is actually respected.
|
||||
auto *var = maybe_get_backing_variable(ptr);
|
||||
if (!var)
|
||||
return;
|
||||
|
||||
auto &backing_type = get<SPIRType>(var->basetype);
|
||||
bool is_ubo = backing_type.basetype == SPIRType::Struct &&
|
||||
backing_type.storage == StorageClassUniform &&
|
||||
has_decoration(backing_type.self, DecorationBlock);
|
||||
if (!is_ubo)
|
||||
return;
|
||||
|
||||
auto *type = &get<SPIRType>(loaded_type);
|
||||
bool rewrite = false;
|
||||
|
||||
if (is_matrix(*type))
|
||||
{
|
||||
// To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
|
||||
// we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
|
||||
// If there is any row-major action going on, we apply the workaround.
|
||||
// It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
|
||||
// If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
|
||||
type = &backing_type;
|
||||
}
|
||||
|
||||
if (type->basetype == SPIRType::Struct)
|
||||
{
|
||||
// If we're loading a struct where any member is a row-major matrix, apply the workaround.
|
||||
for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
|
||||
{
|
||||
if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
|
||||
{
|
||||
rewrite = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rewrite)
|
||||
{
|
||||
request_workaround_wrapper_overload(loaded_type);
|
||||
expr = join("SPIRV_Cross_workaround_load_row_major(", expr, ")");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -560,6 +560,7 @@ protected:
|
|||
bool support_small_type_sampling_result = false;
|
||||
bool support_case_fallthrough = true;
|
||||
bool use_array_constructor = false;
|
||||
bool needs_row_major_load_workaround = false;
|
||||
} backend;
|
||||
|
||||
void emit_struct(SPIRType &type);
|
||||
|
@ -784,6 +785,10 @@ protected:
|
|||
// Currently used by NMin/Max/Clamp implementations.
|
||||
std::unordered_map<uint32_t, uint32_t> extra_sub_expressions;
|
||||
|
||||
SmallVector<TypeID> workaround_ubo_load_overload_types;
|
||||
void request_workaround_wrapper_overload(TypeID id);
|
||||
void rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr);
|
||||
|
||||
uint32_t statement_count = 0;
|
||||
|
||||
inline bool is_legacy() const
|
||||
|
|
|
@ -160,7 +160,7 @@ void CompilerMSL::build_implicit_builtins()
|
|||
bool need_sample_mask = msl_options.additional_fixed_sample_mask != 0xffffffff;
|
||||
if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
|
||||
need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params ||
|
||||
needs_subgroup_invocation_id || need_sample_mask)
|
||||
needs_subgroup_invocation_id || needs_subgroup_size || need_sample_mask)
|
||||
{
|
||||
bool has_frag_coord = false;
|
||||
bool has_sample_id = false;
|
||||
|
@ -197,7 +197,7 @@ void CompilerMSL::build_implicit_builtins()
|
|||
if (var.storage != StorageClassInput)
|
||||
return;
|
||||
|
||||
if (need_subpass_input && (!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses))
|
||||
if (need_subpass_input && (!msl_options.use_framebuffer_fetch_subpasses))
|
||||
{
|
||||
switch (builtin)
|
||||
{
|
||||
|
@ -287,7 +287,7 @@ void CompilerMSL::build_implicit_builtins()
|
|||
has_subgroup_invocation_id = true;
|
||||
}
|
||||
|
||||
if (need_subgroup_ge_mask && builtin == BuiltInSubgroupSize)
|
||||
if ((need_subgroup_ge_mask || needs_subgroup_size) && builtin == BuiltInSubgroupSize)
|
||||
{
|
||||
builtin_subgroup_size_id = var.self;
|
||||
mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var.self);
|
||||
|
@ -331,7 +331,7 @@ void CompilerMSL::build_implicit_builtins()
|
|||
// Use Metal's native frame-buffer fetch API for subpass inputs.
|
||||
if ((!has_frag_coord || (msl_options.multiview && !has_view_idx) ||
|
||||
(msl_options.arrayed_subpass_input && !msl_options.multiview && !has_layer)) &&
|
||||
(!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses) && need_subpass_input)
|
||||
(!msl_options.use_framebuffer_fetch_subpasses) && need_subpass_input)
|
||||
{
|
||||
if (!has_frag_coord)
|
||||
{
|
||||
|
@ -593,7 +593,7 @@ void CompilerMSL::build_implicit_builtins()
|
|||
mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id);
|
||||
}
|
||||
|
||||
if (!has_subgroup_size && need_subgroup_ge_mask)
|
||||
if (!has_subgroup_size && (need_subgroup_ge_mask || needs_subgroup_size))
|
||||
{
|
||||
uint32_t offset = ir.increase_bound_by(2);
|
||||
uint32_t type_ptr_id = offset;
|
||||
|
@ -1265,7 +1265,8 @@ void CompilerMSL::preprocess_op_codes()
|
|||
add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
|
||||
}
|
||||
|
||||
// Metal vertex functions that write to resources must disable rasterization and return void.
|
||||
// Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to
|
||||
// resources must disable rasterization and return void.
|
||||
if (preproc.uses_resource_write)
|
||||
is_rasterization_disabled = true;
|
||||
|
||||
|
@ -1280,6 +1281,8 @@ void CompilerMSL::preprocess_op_codes()
|
|||
|
||||
if (preproc.needs_subgroup_invocation_id)
|
||||
needs_subgroup_invocation_id = true;
|
||||
if (preproc.needs_subgroup_size)
|
||||
needs_subgroup_size = true;
|
||||
}
|
||||
|
||||
// Move the Private and Workgroup global variables to the entry function.
|
||||
|
@ -1372,7 +1375,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
|
|||
// Use Metal's native frame-buffer fetch API for subpass inputs.
|
||||
auto &type = get<SPIRType>(ops[0]);
|
||||
if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
|
||||
(!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses))
|
||||
(!msl_options.use_framebuffer_fetch_subpasses))
|
||||
{
|
||||
// Implicitly reads gl_FragCoord.
|
||||
assert(builtin_frag_coord_id != 0);
|
||||
|
@ -4608,6 +4611,59 @@ void CompilerMSL::emit_custom_functions()
|
|||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplSubgroupBroadcast:
|
||||
// Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting
|
||||
// them as integers.
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupBroadcast(T value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return quad_broadcast(value, lane);");
|
||||
else
|
||||
statement("return simd_broadcast(value, lane);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupBroadcast(bool value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return !!quad_broadcast((ushort)value, lane);");
|
||||
else
|
||||
statement("return !!simd_broadcast((ushort)value, lane);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
|
||||
else
|
||||
statement("return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplSubgroupBroadcastFirst:
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupBroadcastFirst(T value)");
|
||||
begin_scope();
|
||||
statement("return simd_broadcast_first(value);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupBroadcastFirst(bool value)");
|
||||
begin_scope();
|
||||
statement("return !!simd_broadcast_first((ushort)value);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)");
|
||||
begin_scope();
|
||||
statement("return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplSubgroupBallot:
|
||||
statement("inline uint4 spvSubgroupBallot(bool value)");
|
||||
begin_scope();
|
||||
|
@ -4631,8 +4687,11 @@ void CompilerMSL::emit_custom_functions()
|
|||
break;
|
||||
|
||||
case SPVFuncImplSubgroupBallotFindLSB:
|
||||
statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot)");
|
||||
statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)");
|
||||
begin_scope();
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
|
||||
statement("ballot &= mask;");
|
||||
statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + "
|
||||
"ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);");
|
||||
end_scope();
|
||||
|
@ -4640,8 +4699,11 @@ void CompilerMSL::emit_custom_functions()
|
|||
break;
|
||||
|
||||
case SPVFuncImplSubgroupBallotFindMSB:
|
||||
statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot)");
|
||||
statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)");
|
||||
begin_scope();
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
|
||||
statement("ballot &= mask;");
|
||||
statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - "
|
||||
"(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), "
|
||||
"ballot.z == 0), ballot.w == 0);");
|
||||
|
@ -4650,24 +4712,31 @@ void CompilerMSL::emit_custom_functions()
|
|||
break;
|
||||
|
||||
case SPVFuncImplSubgroupBallotBitCount:
|
||||
statement("inline uint spvSubgroupBallotBitCount(uint4 ballot)");
|
||||
statement("inline uint spvPopCount4(uint4 ballot)");
|
||||
begin_scope();
|
||||
statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)");
|
||||
begin_scope();
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
|
||||
statement("return spvPopCount4(ballot & mask);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
|
||||
begin_scope();
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), "
|
||||
"uint2(0));");
|
||||
statement("return spvSubgroupBallotBitCount(ballot & mask);");
|
||||
statement("return spvPopCount4(ballot & mask);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
|
||||
begin_scope();
|
||||
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), "
|
||||
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));");
|
||||
statement("return spvSubgroupBallotBitCount(ballot & mask);");
|
||||
statement("return spvPopCount4(ballot & mask);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
@ -4680,7 +4749,7 @@ void CompilerMSL::emit_custom_functions()
|
|||
statement("template<typename T>");
|
||||
statement("inline bool spvSubgroupAllEqual(T value)");
|
||||
begin_scope();
|
||||
statement("return simd_all(value == simd_broadcast_first(value));");
|
||||
statement("return simd_all(all(value == simd_broadcast_first(value)));");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<>");
|
||||
|
@ -4689,6 +4758,184 @@ void CompilerMSL::emit_custom_functions()
|
|||
statement("return simd_all(value) || !simd_any(value);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<uint N>");
|
||||
statement("inline bool spvSubgroupAllEqual(vec<bool, N> value)");
|
||||
begin_scope();
|
||||
statement("return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplSubgroupShuffle:
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupShuffle(T value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return quad_shuffle(value, lane);");
|
||||
else
|
||||
statement("return simd_shuffle(value, lane);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupShuffle(bool value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return !!quad_shuffle((ushort)value, lane);");
|
||||
else
|
||||
statement("return !!simd_shuffle((ushort)value, lane);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return (vec<bool, N>)quad_shuffle((vec<ushort, N>)value, lane);");
|
||||
else
|
||||
statement("return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplSubgroupShuffleXor:
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupShuffleXor(T value, ushort mask)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return quad_shuffle_xor(value, mask);");
|
||||
else
|
||||
statement("return simd_shuffle_xor(value, mask);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupShuffleXor(bool value, ushort mask)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return !!quad_shuffle_xor((ushort)value, mask);");
|
||||
else
|
||||
statement("return !!simd_shuffle_xor((ushort)value, mask);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, mask);");
|
||||
else
|
||||
statement("return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplSubgroupShuffleUp:
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupShuffleUp(T value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return quad_shuffle_up(value, delta);");
|
||||
else
|
||||
statement("return simd_shuffle_up(value, delta);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupShuffleUp(bool value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return !!quad_shuffle_up((ushort)value, delta);");
|
||||
else
|
||||
statement("return !!simd_shuffle_up((ushort)value, delta);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return (vec<bool, N>)quad_shuffle_up((vec<ushort, N>)value, delta);");
|
||||
else
|
||||
statement("return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplSubgroupShuffleDown:
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvSubgroupShuffleDown(T value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return quad_shuffle_down(value, delta);");
|
||||
else
|
||||
statement("return simd_shuffle_down(value, delta);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<>");
|
||||
statement("inline bool spvSubgroupShuffleDown(bool value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return !!quad_shuffle_down((ushort)value, delta);");
|
||||
else
|
||||
statement("return !!simd_shuffle_down((ushort)value, delta);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)");
|
||||
begin_scope();
|
||||
if (msl_options.is_ios())
|
||||
statement("return (vec<bool, N>)quad_shuffle_down((vec<ushort, N>)value, delta);");
|
||||
else
|
||||
statement("return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplQuadBroadcast:
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvQuadBroadcast(T value, uint lane)");
|
||||
begin_scope();
|
||||
statement("return quad_broadcast(value, lane);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<>");
|
||||
statement("inline bool spvQuadBroadcast(bool value, uint lane)");
|
||||
begin_scope();
|
||||
statement("return !!quad_broadcast((ushort)value, lane);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)");
|
||||
begin_scope();
|
||||
statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplQuadSwap:
|
||||
// We can implement this easily based on the following table giving
|
||||
// the target lane ID from the direction and current lane ID:
|
||||
// Direction
|
||||
// | 0 | 1 | 2 |
|
||||
// ---+---+---+---+
|
||||
// L 0 | 1 2 3
|
||||
// a 1 | 0 3 2
|
||||
// n 2 | 3 0 1
|
||||
// e 3 | 2 1 0
|
||||
// Notice that target = source ^ (direction + 1).
|
||||
statement("template<typename T>");
|
||||
statement("inline T spvQuadSwap(T value, uint dir)");
|
||||
begin_scope();
|
||||
statement("return quad_shuffle_xor(value, dir + 1);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<>");
|
||||
statement("inline bool spvQuadSwap(bool value, uint dir)");
|
||||
begin_scope();
|
||||
statement("return !!quad_shuffle_xor((ushort)value, dir + 1);");
|
||||
end_scope();
|
||||
statement("");
|
||||
statement("template<uint N>");
|
||||
statement("inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)");
|
||||
begin_scope();
|
||||
statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplReflectScalar:
|
||||
|
@ -7168,7 +7415,7 @@ void CompilerMSL::emit_texture_op(const Instruction &i, bool sparse)
|
|||
if (sparse)
|
||||
SPIRV_CROSS_THROW("Sparse feedback not yet supported in MSL.");
|
||||
|
||||
if (msl_options.is_ios() && msl_options.ios_use_framebuffer_fetch_subpasses)
|
||||
if (msl_options.use_framebuffer_fetch_subpasses)
|
||||
{
|
||||
auto *ops = stream(i);
|
||||
|
||||
|
@ -8265,25 +8512,26 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
|
|||
break;
|
||||
}
|
||||
|
||||
if (args.base.is_fetch && args.offset)
|
||||
if (args.base.is_fetch && (args.offset || args.coffset))
|
||||
{
|
||||
uint32_t offset_expr = args.offset ? args.offset : args.coffset;
|
||||
// Fetch offsets must be applied directly to the coordinate.
|
||||
forward = forward && should_forward(args.offset);
|
||||
auto &type = expression_type(args.offset);
|
||||
if (type.basetype != SPIRType::UInt)
|
||||
tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.offset);
|
||||
forward = forward && should_forward(offset_expr);
|
||||
auto &type = expression_type(offset_expr);
|
||||
if (imgtype.image.dim == Dim1D && msl_options.texture_1D_as_2D)
|
||||
{
|
||||
if (type.basetype != SPIRType::UInt)
|
||||
tex_coords += join(" + uint2(", bitcast_expression(SPIRType::UInt, offset_expr), ", 0)");
|
||||
else
|
||||
tex_coords += join(" + uint2(", to_enclosed_expression(offset_expr), ", 0)");
|
||||
}
|
||||
else
|
||||
tex_coords += " + " + to_enclosed_expression(args.offset);
|
||||
}
|
||||
else if (args.base.is_fetch && args.coffset)
|
||||
{
|
||||
// Fetch offsets must be applied directly to the coordinate.
|
||||
forward = forward && should_forward(args.coffset);
|
||||
auto &type = expression_type(args.coffset);
|
||||
if (type.basetype != SPIRType::UInt)
|
||||
tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.coffset);
|
||||
else
|
||||
tex_coords += " + " + to_enclosed_expression(args.coffset);
|
||||
{
|
||||
if (type.basetype != SPIRType::UInt)
|
||||
tex_coords += " + " + bitcast_expression(SPIRType::UInt, offset_expr);
|
||||
else
|
||||
tex_coords += " + " + to_enclosed_expression(offset_expr);
|
||||
}
|
||||
}
|
||||
|
||||
// If projection, use alt coord as divisor
|
||||
|
@ -8454,6 +8702,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
|
|||
string grad_opt;
|
||||
switch (imgtype.image.dim)
|
||||
{
|
||||
case Dim1D:
|
||||
case Dim2D:
|
||||
grad_opt = "2d";
|
||||
break;
|
||||
|
@ -8489,30 +8738,42 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
|
|||
|
||||
// Add offsets
|
||||
string offset_expr;
|
||||
const SPIRType *offset_type = nullptr;
|
||||
if (args.coffset && !args.base.is_fetch)
|
||||
{
|
||||
forward = forward && should_forward(args.coffset);
|
||||
offset_expr = to_expression(args.coffset);
|
||||
offset_type = &expression_type(args.coffset);
|
||||
}
|
||||
else if (args.offset && !args.base.is_fetch)
|
||||
{
|
||||
forward = forward && should_forward(args.offset);
|
||||
offset_expr = to_expression(args.offset);
|
||||
offset_type = &expression_type(args.offset);
|
||||
}
|
||||
|
||||
if (!offset_expr.empty())
|
||||
{
|
||||
switch (imgtype.image.dim)
|
||||
{
|
||||
case Dim1D:
|
||||
if (!msl_options.texture_1D_as_2D)
|
||||
break;
|
||||
if (offset_type->vecsize > 1)
|
||||
offset_expr = enclose_expression(offset_expr) + ".x";
|
||||
|
||||
farg_str += join(", int2(", offset_expr, ", 0)");
|
||||
break;
|
||||
|
||||
case Dim2D:
|
||||
if (coord_type.vecsize > 2)
|
||||
if (offset_type->vecsize > 2)
|
||||
offset_expr = enclose_expression(offset_expr) + ".xy";
|
||||
|
||||
farg_str += ", " + offset_expr;
|
||||
break;
|
||||
|
||||
case Dim3D:
|
||||
if (coord_type.vecsize > 3)
|
||||
if (offset_type->vecsize > 3)
|
||||
offset_expr = enclose_expression(offset_expr) + ".xyz";
|
||||
|
||||
farg_str += ", " + offset_expr;
|
||||
|
@ -8532,7 +8793,10 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
|
|||
if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler)
|
||||
{
|
||||
forward = forward && should_forward(args.component);
|
||||
farg_str += ", " + to_component_argument(args.component);
|
||||
|
||||
if (const auto *var = maybe_get_backing_variable(img))
|
||||
if (!image_is_comparison(get<SPIRType>(var->basetype), var->self))
|
||||
farg_str += ", " + to_component_argument(args.component);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8962,9 +9226,9 @@ string CompilerMSL::to_swizzle_expression(uint32_t id)
|
|||
auto index = expr.find_first_of('[');
|
||||
|
||||
// If an image is part of an argument buffer translate this to a legal identifier.
|
||||
for (auto &c : expr)
|
||||
if (c == '.')
|
||||
c = '_';
|
||||
string::size_type period = 0;
|
||||
while ((period = expr.find_first_of('.', period)) != string::npos && period < index)
|
||||
expr[period] = '_';
|
||||
|
||||
if (index == string::npos)
|
||||
return expr + swizzle_name_suffix;
|
||||
|
@ -9828,9 +10092,9 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
|
|||
if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage))
|
||||
{
|
||||
if (!msl_options.supports_msl_version(2))
|
||||
SPIRV_CROSS_THROW("Post-depth coverage requires Metal 2.0.");
|
||||
if (!msl_options.is_ios())
|
||||
SPIRV_CROSS_THROW("Post-depth coverage is only supported on iOS.");
|
||||
SPIRV_CROSS_THROW("Post-depth coverage requires MSL 2.0.");
|
||||
if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
|
||||
SPIRV_CROSS_THROW("Post-depth coverage on Mac requires MSL 2.3.");
|
||||
ep_args += ", post_depth_coverage";
|
||||
}
|
||||
ep_args += "]]";
|
||||
|
@ -10207,6 +10471,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
|
|||
}
|
||||
else
|
||||
{
|
||||
if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
|
||||
SPIRV_CROSS_THROW("Framebuffer fetch on Mac is not supported before MSL 2.3.");
|
||||
ep_args += image_type_glsl(type, var_id) + " " + r.name;
|
||||
ep_args += " [[color(" + convert_to_string(r.index) + ")]]";
|
||||
}
|
||||
|
@ -10449,7 +10715,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
|
|||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
|
||||
});
|
||||
|
@ -10461,25 +10727,25 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
|
|||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
// Case where index < 32, size < 32:
|
||||
// mask0 = bfe(0xFFFFFFFF, index, size - index);
|
||||
// mask1 = bfe(0xFFFFFFFF, 0, 0); // Gives 0
|
||||
// mask0 = bfi(0, 0xFFFFFFFF, index, size - index);
|
||||
// mask1 = bfi(0, 0xFFFFFFFF, 0, 0); // Gives 0
|
||||
// Case where index < 32 but size >= 32:
|
||||
// mask0 = bfe(0xFFFFFFFF, index, 32 - index);
|
||||
// mask1 = bfe(0xFFFFFFFF, 0, size - 32);
|
||||
// mask0 = bfi(0, 0xFFFFFFFF, index, 32 - index);
|
||||
// mask1 = bfi(0, 0xFFFFFFFF, 0, size - 32);
|
||||
// Case where index >= 32:
|
||||
// mask0 = bfe(0xFFFFFFFF, 32, 0); // Gives 0
|
||||
// mask1 = bfe(0xFFFFFFFF, index - 32, size - index);
|
||||
// mask0 = bfi(0, 0xFFFFFFFF, 32, 0); // Gives 0
|
||||
// mask1 = bfi(0, 0xFFFFFFFF, index - 32, size - index);
|
||||
// This is expressed without branches to avoid divergent
|
||||
// control flow--hence the complicated min/max expressions.
|
||||
// This is further complicated by the fact that if you attempt
|
||||
// to bfe out-of-bounds on Metal, undefined behavior is the
|
||||
// to bfi/bfe out-of-bounds on Metal, undefined behavior is the
|
||||
// result.
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(extract_bits(0xFFFFFFFF, min(",
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)",
|
||||
to_expression(builtin_subgroup_size_id), ", 32) - (int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
", 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
|
||||
", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)",
|
||||
to_expression(builtin_subgroup_size_id), " - (int)max(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));");
|
||||
|
@ -10494,11 +10760,11 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
|
|||
// The same logic applies here, except now the index is one
|
||||
// more than the subgroup invocation ID.
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = uint4(extract_bits(0xFFFFFFFF, min(",
|
||||
" = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)",
|
||||
to_expression(builtin_subgroup_size_id), ", 32) - (int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id),
|
||||
" - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
|
||||
" - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)",
|
||||
to_expression(builtin_subgroup_size_id), " - (int)max(",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));");
|
||||
|
@ -10834,8 +11100,8 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
|
|||
|
||||
bool CompilerMSL::type_is_msl_framebuffer_fetch(const SPIRType &type) const
|
||||
{
|
||||
return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && msl_options.is_ios() &&
|
||||
msl_options.ios_use_framebuffer_fetch_subpasses;
|
||||
return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
|
||||
msl_options.use_framebuffer_fetch_subpasses;
|
||||
}
|
||||
|
||||
string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
|
||||
|
@ -11062,6 +11328,11 @@ void CompilerMSL::replace_illegal_names()
|
|||
"fragment",
|
||||
"compute",
|
||||
"bias",
|
||||
"level",
|
||||
"gradient2d",
|
||||
"gradientcube",
|
||||
"gradient3d",
|
||||
"min_lod_clamp",
|
||||
"assert",
|
||||
"VARIABLE_TRACEPOINT",
|
||||
"STATIC_DATA_TRACEPOINT",
|
||||
|
@ -11850,12 +12121,11 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
|
|||
break;
|
||||
|
||||
case OpGroupNonUniformBroadcast:
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4],
|
||||
msl_options.is_ios() ? "quad_broadcast" : "simd_broadcast");
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBroadcast");
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformBroadcastFirst:
|
||||
emit_unary_func_op(result_type, id, ops[3], "simd_broadcast_first");
|
||||
emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBroadcastFirst");
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformBallot:
|
||||
|
@ -11871,46 +12141,50 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
|
|||
break;
|
||||
|
||||
case OpGroupNonUniformBallotFindLSB:
|
||||
emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindLSB");
|
||||
emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindLSB");
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformBallotFindMSB:
|
||||
emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindMSB");
|
||||
emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindMSB");
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformBallotBitCount:
|
||||
{
|
||||
auto operation = static_cast<GroupOperation>(ops[3]);
|
||||
if (operation == GroupOperationReduce)
|
||||
emit_unary_func_op(result_type, id, ops[4], "spvSubgroupBallotBitCount");
|
||||
else if (operation == GroupOperationInclusiveScan)
|
||||
switch (operation)
|
||||
{
|
||||
case GroupOperationReduce:
|
||||
emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_size_id, "spvSubgroupBallotBitCount");
|
||||
break;
|
||||
case GroupOperationInclusiveScan:
|
||||
emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
|
||||
"spvSubgroupBallotInclusiveBitCount");
|
||||
else if (operation == GroupOperationExclusiveScan)
|
||||
break;
|
||||
case GroupOperationExclusiveScan:
|
||||
emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
|
||||
"spvSubgroupBallotExclusiveBitCount");
|
||||
else
|
||||
break;
|
||||
default:
|
||||
SPIRV_CROSS_THROW("Invalid BitCount operation.");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OpGroupNonUniformShuffle:
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4], msl_options.is_ios() ? "quad_shuffle" : "simd_shuffle");
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffle");
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformShuffleXor:
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4],
|
||||
msl_options.is_ios() ? "quad_shuffle_xor" : "simd_shuffle_xor");
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleXor");
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformShuffleUp:
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4],
|
||||
msl_options.is_ios() ? "quad_shuffle_up" : "simd_shuffle_up");
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleUp");
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformShuffleDown:
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4],
|
||||
msl_options.is_ios() ? "quad_shuffle_down" : "simd_shuffle_down");
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleDown");
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformAll:
|
||||
|
@ -12018,26 +12292,11 @@ case OpGroupNonUniform##op: \
|
|||
#undef MSL_GROUP_OP_CAST
|
||||
|
||||
case OpGroupNonUniformQuadSwap:
|
||||
{
|
||||
// We can implement this easily based on the following table giving
|
||||
// the target lane ID from the direction and current lane ID:
|
||||
// Direction
|
||||
// | 0 | 1 | 2 |
|
||||
// ---+---+---+---+
|
||||
// L 0 | 1 2 3
|
||||
// a 1 | 0 3 2
|
||||
// n 2 | 3 0 1
|
||||
// e 3 | 2 1 0
|
||||
// Notice that target = source ^ (direction + 1).
|
||||
uint32_t mask = evaluate_constant_u32(ops[4]) + 1;
|
||||
uint32_t mask_id = ir.increase_bound_by(1);
|
||||
set<SPIRConstant>(mask_id, expression_type_id(ops[4]), mask, false);
|
||||
emit_binary_func_op(result_type, id, ops[3], mask_id, "quad_shuffle_xor");
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadSwap");
|
||||
break;
|
||||
}
|
||||
|
||||
case OpGroupNonUniformQuadBroadcast:
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4], "quad_broadcast");
|
||||
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadBroadcast");
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -12930,7 +13189,8 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
|
|||
}
|
||||
|
||||
case OpImageWrite:
|
||||
uses_resource_write = true;
|
||||
if (!compiler.msl_options.supports_msl_version(2, 2))
|
||||
uses_resource_write = true;
|
||||
break;
|
||||
|
||||
case OpStore:
|
||||
|
@ -12990,8 +13250,15 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
|
|||
needs_subgroup_invocation_id = true;
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformBallotFindLSB:
|
||||
case OpGroupNonUniformBallotFindMSB:
|
||||
needs_subgroup_size = true;
|
||||
break;
|
||||
|
||||
case OpGroupNonUniformBallotBitCount:
|
||||
if (args[3] != GroupOperationReduce)
|
||||
if (args[3] == GroupOperationReduce)
|
||||
needs_subgroup_size = true;
|
||||
else
|
||||
needs_subgroup_invocation_id = true;
|
||||
break;
|
||||
|
||||
|
@ -13035,7 +13302,8 @@ void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id)
|
|||
{
|
||||
auto *p_var = compiler.maybe_get_backing_variable(var_id);
|
||||
StorageClass sc = p_var ? p_var->storage : StorageClassMax;
|
||||
if (sc == StorageClassUniform || sc == StorageClassStorageBuffer)
|
||||
if (!compiler.msl_options.supports_msl_version(2, 1) &&
|
||||
(sc == StorageClassUniform || sc == StorageClassStorageBuffer))
|
||||
uses_resource_write = true;
|
||||
}
|
||||
|
||||
|
@ -13174,6 +13442,12 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
|
|||
break;
|
||||
}
|
||||
|
||||
case OpGroupNonUniformBroadcast:
|
||||
return SPVFuncImplSubgroupBroadcast;
|
||||
|
||||
case OpGroupNonUniformBroadcastFirst:
|
||||
return SPVFuncImplSubgroupBroadcastFirst;
|
||||
|
||||
case OpGroupNonUniformBallot:
|
||||
return SPVFuncImplSubgroupBallot;
|
||||
|
||||
|
@ -13193,6 +13467,24 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
|
|||
case OpGroupNonUniformAllEqual:
|
||||
return SPVFuncImplSubgroupAllEqual;
|
||||
|
||||
case OpGroupNonUniformShuffle:
|
||||
return SPVFuncImplSubgroupShuffle;
|
||||
|
||||
case OpGroupNonUniformShuffleXor:
|
||||
return SPVFuncImplSubgroupShuffleXor;
|
||||
|
||||
case OpGroupNonUniformShuffleUp:
|
||||
return SPVFuncImplSubgroupShuffleUp;
|
||||
|
||||
case OpGroupNonUniformShuffleDown:
|
||||
return SPVFuncImplSubgroupShuffleDown;
|
||||
|
||||
case OpGroupNonUniformQuadBroadcast:
|
||||
return SPVFuncImplQuadBroadcast;
|
||||
|
||||
case OpGroupNonUniformQuadSwap:
|
||||
return SPVFuncImplQuadSwap;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -315,7 +315,7 @@ public:
|
|||
bool ios_support_base_vertex_instance = false;
|
||||
|
||||
// Use Metal's native frame-buffer fetch API for subpass inputs.
|
||||
bool ios_use_framebuffer_fetch_subpasses = false;
|
||||
bool use_framebuffer_fetch_subpasses = false;
|
||||
|
||||
// Enables use of "fma" intrinsic for invariant float math
|
||||
bool invariant_float_math = false;
|
||||
|
@ -600,12 +600,20 @@ protected:
|
|||
SPVFuncImplTextureSwizzle,
|
||||
SPVFuncImplGatherSwizzle,
|
||||
SPVFuncImplGatherCompareSwizzle,
|
||||
SPVFuncImplSubgroupBroadcast,
|
||||
SPVFuncImplSubgroupBroadcastFirst,
|
||||
SPVFuncImplSubgroupBallot,
|
||||
SPVFuncImplSubgroupBallotBitExtract,
|
||||
SPVFuncImplSubgroupBallotFindLSB,
|
||||
SPVFuncImplSubgroupBallotFindMSB,
|
||||
SPVFuncImplSubgroupBallotBitCount,
|
||||
SPVFuncImplSubgroupAllEqual,
|
||||
SPVFuncImplSubgroupShuffle,
|
||||
SPVFuncImplSubgroupShuffleXor,
|
||||
SPVFuncImplSubgroupShuffleUp,
|
||||
SPVFuncImplSubgroupShuffleDown,
|
||||
SPVFuncImplQuadBroadcast,
|
||||
SPVFuncImplQuadSwap,
|
||||
SPVFuncImplReflectScalar,
|
||||
SPVFuncImplRefractScalar,
|
||||
SPVFuncImplFaceForwardScalar,
|
||||
|
@ -913,6 +921,7 @@ protected:
|
|||
bool used_swizzle_buffer = false;
|
||||
bool added_builtin_tess_level = false;
|
||||
bool needs_subgroup_invocation_id = false;
|
||||
bool needs_subgroup_size = false;
|
||||
std::string qual_pos_var_name;
|
||||
std::string stage_in_var_name = "in";
|
||||
std::string stage_out_var_name = "out";
|
||||
|
@ -984,6 +993,7 @@ protected:
|
|||
bool uses_atomics = false;
|
||||
bool uses_resource_write = false;
|
||||
bool needs_subgroup_invocation_id = false;
|
||||
bool needs_subgroup_size = false;
|
||||
};
|
||||
|
||||
// OpcodeHandler that scans for uses of sampled images
|
||||
|
|
Loading…
Reference in New Issue