Updated spirv-cross.

This commit is contained in:
Бранимир Караџић 2020-11-01 21:41:59 -08:00
parent 39d1f8c32c
commit b7fb619125
8 changed files with 526 additions and 142 deletions

View File

@ -565,6 +565,7 @@ struct CLIArguments
bool msl_arrayed_subpass_input = false;
uint32_t msl_r32ui_linear_texture_alignment = 4;
uint32_t msl_r32ui_alignment_constant_id = 65535;
bool msl_texture_1d_as_2d = false;
bool glsl_emit_push_constant_as_ubo = false;
bool glsl_emit_ubo_as_plain_uniforms = false;
bool glsl_force_flattened_io_blocks = false;
@ -728,7 +729,7 @@ static void print_help_msl()
"\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n"
"\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n"
"\t\tEmits [[color(N)]] inputs in fragment stage.\n"
"\t\tRequires iOS Metal.\n"
"\t\tRequires an Apple GPU.\n"
"\t[--msl-emulate-cube-array]:\n\t\tEmulate cube arrays with 2D array and manual math.\n"
"\t[--msl-discrete-descriptor-set <index>]:\n\t\tWhen using argument buffers, forces a specific descriptor set to be implemented without argument buffers.\n"
"\t\tUseful for implementing push descriptors in emulation layers.\n"
@ -774,7 +775,9 @@ static void print_help_msl()
"\t[--msl-r32ui-linear-texture-align <alignment>]:\n\t\tThe required alignment of linear textures of format MTLPixelFormatR32Uint.\n"
"\t\tThis is used to align the row stride for atomic accesses to such images.\n"
"\t[--msl-r32ui-linear-texture-align-constant-id <id>]:\n\t\tThe function constant ID to use for the linear texture alignment.\n"
"\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n");
"\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n"
"\t[--msl-texture-1d-as-2d]:\n\t\tEmit Image variables of dimension Dim1D as texture2d.\n"
"\t\tIn Metal, 1D textures do not support all features that 2D textures do. Use this option if your code relies on these features.\n");
// clang-format on
}
@ -991,9 +994,9 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
if (args.msl_ios)
{
msl_opts.platform = CompilerMSL::Options::iOS;
msl_opts.ios_use_framebuffer_fetch_subpasses = args.msl_framebuffer_fetch;
msl_opts.emulate_cube_array = args.msl_emulate_cube_array;
}
msl_opts.use_framebuffer_fetch_subpasses = args.msl_framebuffer_fetch;
msl_opts.pad_fragment_output_components = args.msl_pad_fragment_output;
msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left;
msl_opts.argument_buffers = args.msl_argument_buffers;
@ -1015,6 +1018,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
msl_opts.arrayed_subpass_input = args.msl_arrayed_subpass_input;
msl_opts.r32ui_linear_texture_alignment = args.msl_r32ui_linear_texture_alignment;
msl_opts.r32ui_alignment_constant_id = args.msl_r32ui_alignment_constant_id;
msl_opts.texture_1D_as_2D = args.msl_texture_1d_as_2d;
msl_comp->set_msl_options(msl_opts);
for (auto &v : args.msl_discrete_descriptor_sets)
msl_comp->add_discrete_descriptor_set(v);
@ -1439,6 +1443,7 @@ static int main_inner(int argc, char *argv[])
[&args](CLIParser &parser) { args.msl_r32ui_linear_texture_alignment = parser.next_uint(); });
cbs.add("--msl-r32ui-linear-texture-align-constant-id",
[&args](CLIParser &parser) { args.msl_r32ui_alignment_constant_id = parser.next_uint(); });
cbs.add("--msl-texture-1d-as-2d", [&args](CLIParser &) { args.msl_texture_1d_as_2d = true; });
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();

View File

@ -357,28 +357,6 @@ public:
return TypedID<U>(*this);
}
bool operator==(const TypedID &other) const
{
return id == other.id;
}
bool operator!=(const TypedID &other) const
{
return id != other.id;
}
template <Types type>
bool operator==(const TypedID<type> &other) const
{
return id == uint32_t(other);
}
template <Types type>
bool operator!=(const TypedID<type> &other) const
{
return id != uint32_t(other);
}
private:
uint32_t id = 0;
};
@ -403,26 +381,6 @@ public:
return id;
}
bool operator==(const TypedID &other) const
{
return id == other.id;
}
bool operator!=(const TypedID &other) const
{
return id != other.id;
}
bool operator==(const TypedID<TypeNone> &other) const
{
return id == uint32_t(other);
}
bool operator!=(const TypedID<TypeNone> &other) const
{
return id != uint32_t(other);
}
private:
uint32_t id = 0;
};

View File

@ -599,8 +599,8 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
options->msl.enable_base_index_zero = value != 0;
break;
case SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS:
options->msl.ios_use_framebuffer_fetch_subpasses = value != 0;
case SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS:
options->msl.use_framebuffer_fetch_subpasses = value != 0;
break;
case SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH:

View File

@ -606,7 +606,11 @@ typedef enum spvc_compiler_option
SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D = 44 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO = 45 | SPVC_COMPILER_OPTION_MSL_BIT,
/* Obsolete. Use MSL_FRAMEBUFFER_FETCH_SUBPASS instead. */
SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH = 47 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY = 48 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING = 49 | SPVC_COMPILER_OPTION_MSL_BIT,

View File

@ -511,6 +511,7 @@ string CompilerGLSL::compile()
{
// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
backend.nonuniform_qualifier = "";
backend.needs_row_major_load_workaround = true;
}
backend.force_gl_in_out_block = true;
backend.supports_extensions = true;
@ -3798,6 +3799,17 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
statement("");
}
}
if (!workaround_ubo_load_overload_types.empty())
{
for (auto &type_id : workaround_ubo_load_overload_types)
{
auto &type = get<SPIRType>(type_id);
statement(type_to_glsl(type), " SPIRV_Cross_workaround_load_row_major(", type_to_glsl(type),
" wrap) { return wrap; }");
}
statement("");
}
}
// Returns a string representation of the ID, usable as a function arg.
@ -9496,11 +9508,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (forward && ptr_expression)
ptr_expression->need_transpose = old_need_transpose;
bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
// However, if we try to load a complex, composite object from a flattened buffer,
// we should avoid emitting the same code over and over and lower the result to a temporary.
bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 &&
(type.basetype == SPIRType::Struct || (type.columns > 1));
bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
SPIRExpression *e = nullptr;
if (!forward && expression_is_non_value_type_array(ptr))
@ -13253,8 +13269,14 @@ void CompilerGLSL::branch(BlockID from, BlockID to)
// and end the chain here.
statement("continue;");
}
else if (is_break(to))
else if (from != to && is_break(to))
{
// We cannot break to ourselves, so check explicitly for from != to.
// This case can trigger if a loop header is all three of these things:
// - Continue block
// - Loop header
// - Break merge target all at once ...
// Very dirty workaround.
// Switch constructs are able to break, but they cannot break out of a loop at the same time.
// Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
@ -14578,7 +14600,35 @@ void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::str
// so we might have to fixup the OpLoad-ed expression late.
auto start_array_index = expr.find_first_of('[');
auto end_array_index = expr.find_last_of(']');
if (start_array_index == string::npos)
return;
// Check for the edge case that a non-arrayed resource was marked to be nonuniform,
// and the bracket we found is actually part of non-resource related data.
if (expr.find_first_of(',') < start_array_index)
return;
// We've opened a bracket, track expressions until we can close the bracket.
// This must be our image index.
size_t end_array_index = string::npos;
unsigned bracket_count = 1;
for (size_t index = start_array_index + 1; index < expr.size(); index++)
{
if (expr[index] == ']')
{
if (--bracket_count == 0)
{
end_array_index = index;
break;
}
}
else if (expr[index] == '[')
bracket_count++;
}
assert(bracket_count == 0);
// Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
// nothing we can do here to express that.
if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
@ -15087,3 +15137,63 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
weights[KHR_shader_subgroup_basic] = big_num;
weights[KHR_shader_subgroup_vote] = big_num;
}
void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
{
// Must be ordered to maintain deterministic output, so vector is appropriate.
if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
end(workaround_ubo_load_overload_types))
{
force_recompile();
workaround_ubo_load_overload_types.push_back(id);
}
}
void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
{
// Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
// To load these types correctly, we must first wrap them in a dummy function which only purpose is to
// ensure row_major decoration is actually respected.
auto *var = maybe_get_backing_variable(ptr);
if (!var)
return;
auto &backing_type = get<SPIRType>(var->basetype);
bool is_ubo = backing_type.basetype == SPIRType::Struct &&
backing_type.storage == StorageClassUniform &&
has_decoration(backing_type.self, DecorationBlock);
if (!is_ubo)
return;
auto *type = &get<SPIRType>(loaded_type);
bool rewrite = false;
if (is_matrix(*type))
{
// To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
// we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
// If there is any row-major action going on, we apply the workaround.
// It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
// If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
type = &backing_type;
}
if (type->basetype == SPIRType::Struct)
{
// If we're loading a struct where any member is a row-major matrix, apply the workaround.
for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
{
if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
{
rewrite = true;
break;
}
}
}
if (rewrite)
{
request_workaround_wrapper_overload(loaded_type);
expr = join("SPIRV_Cross_workaround_load_row_major(", expr, ")");
}
}

View File

@ -560,6 +560,7 @@ protected:
bool support_small_type_sampling_result = false;
bool support_case_fallthrough = true;
bool use_array_constructor = false;
bool needs_row_major_load_workaround = false;
} backend;
void emit_struct(SPIRType &type);
@ -784,6 +785,10 @@ protected:
// Currently used by NMin/Max/Clamp implementations.
std::unordered_map<uint32_t, uint32_t> extra_sub_expressions;
SmallVector<TypeID> workaround_ubo_load_overload_types;
void request_workaround_wrapper_overload(TypeID id);
void rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr);
uint32_t statement_count = 0;
inline bool is_legacy() const

View File

@ -160,7 +160,7 @@ void CompilerMSL::build_implicit_builtins()
bool need_sample_mask = msl_options.additional_fixed_sample_mask != 0xffffffff;
if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params ||
needs_subgroup_invocation_id || need_sample_mask)
needs_subgroup_invocation_id || needs_subgroup_size || need_sample_mask)
{
bool has_frag_coord = false;
bool has_sample_id = false;
@ -197,7 +197,7 @@ void CompilerMSL::build_implicit_builtins()
if (var.storage != StorageClassInput)
return;
if (need_subpass_input && (!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses))
if (need_subpass_input && (!msl_options.use_framebuffer_fetch_subpasses))
{
switch (builtin)
{
@ -287,7 +287,7 @@ void CompilerMSL::build_implicit_builtins()
has_subgroup_invocation_id = true;
}
if (need_subgroup_ge_mask && builtin == BuiltInSubgroupSize)
if ((need_subgroup_ge_mask || needs_subgroup_size) && builtin == BuiltInSubgroupSize)
{
builtin_subgroup_size_id = var.self;
mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var.self);
@ -331,7 +331,7 @@ void CompilerMSL::build_implicit_builtins()
// Use Metal's native frame-buffer fetch API for subpass inputs.
if ((!has_frag_coord || (msl_options.multiview && !has_view_idx) ||
(msl_options.arrayed_subpass_input && !msl_options.multiview && !has_layer)) &&
(!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses) && need_subpass_input)
(!msl_options.use_framebuffer_fetch_subpasses) && need_subpass_input)
{
if (!has_frag_coord)
{
@ -593,7 +593,7 @@ void CompilerMSL::build_implicit_builtins()
mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id);
}
if (!has_subgroup_size && need_subgroup_ge_mask)
if (!has_subgroup_size && (need_subgroup_ge_mask || needs_subgroup_size))
{
uint32_t offset = ir.increase_bound_by(2);
uint32_t type_ptr_id = offset;
@ -1265,7 +1265,8 @@ void CompilerMSL::preprocess_op_codes()
add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
}
// Metal vertex functions that write to resources must disable rasterization and return void.
// Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to
// resources must disable rasterization and return void.
if (preproc.uses_resource_write)
is_rasterization_disabled = true;
@ -1280,6 +1281,8 @@ void CompilerMSL::preprocess_op_codes()
if (preproc.needs_subgroup_invocation_id)
needs_subgroup_invocation_id = true;
if (preproc.needs_subgroup_size)
needs_subgroup_size = true;
}
// Move the Private and Workgroup global variables to the entry function.
@ -1372,7 +1375,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
// Use Metal's native frame-buffer fetch API for subpass inputs.
auto &type = get<SPIRType>(ops[0]);
if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
(!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses))
(!msl_options.use_framebuffer_fetch_subpasses))
{
// Implicitly reads gl_FragCoord.
assert(builtin_frag_coord_id != 0);
@ -4608,6 +4611,59 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
case SPVFuncImplSubgroupBroadcast:
// Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting
// them as integers.
statement("template<typename T>");
statement("inline T spvSubgroupBroadcast(T value, ushort lane)");
begin_scope();
if (msl_options.is_ios())
statement("return quad_broadcast(value, lane);");
else
statement("return simd_broadcast(value, lane);");
end_scope();
statement("");
statement("template<>");
statement("inline bool spvSubgroupBroadcast(bool value, ushort lane)");
begin_scope();
if (msl_options.is_ios())
statement("return !!quad_broadcast((ushort)value, lane);");
else
statement("return !!simd_broadcast((ushort)value, lane);");
end_scope();
statement("");
statement("template<uint N>");
statement("inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)");
begin_scope();
if (msl_options.is_ios())
statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
else
statement("return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);");
end_scope();
statement("");
break;
case SPVFuncImplSubgroupBroadcastFirst:
statement("template<typename T>");
statement("inline T spvSubgroupBroadcastFirst(T value)");
begin_scope();
statement("return simd_broadcast_first(value);");
end_scope();
statement("");
statement("template<>");
statement("inline bool spvSubgroupBroadcastFirst(bool value)");
begin_scope();
statement("return !!simd_broadcast_first((ushort)value);");
end_scope();
statement("");
statement("template<uint N>");
statement("inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)");
begin_scope();
statement("return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);");
end_scope();
statement("");
break;
case SPVFuncImplSubgroupBallot:
statement("inline uint4 spvSubgroupBallot(bool value)");
begin_scope();
@ -4631,8 +4687,11 @@ void CompilerMSL::emit_custom_functions()
break;
case SPVFuncImplSubgroupBallotFindLSB:
statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot)");
statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)");
begin_scope();
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
statement("ballot &= mask;");
statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + "
"ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);");
end_scope();
@ -4640,8 +4699,11 @@ void CompilerMSL::emit_custom_functions()
break;
case SPVFuncImplSubgroupBallotFindMSB:
statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot)");
statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)");
begin_scope();
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
statement("ballot &= mask;");
statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - "
"(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), "
"ballot.z == 0), ballot.w == 0);");
@ -4650,24 +4712,31 @@ void CompilerMSL::emit_custom_functions()
break;
case SPVFuncImplSubgroupBallotBitCount:
statement("inline uint spvSubgroupBallotBitCount(uint4 ballot)");
statement("inline uint spvPopCount4(uint4 ballot)");
begin_scope();
statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);");
end_scope();
statement("");
statement("inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)");
begin_scope();
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
statement("return spvPopCount4(ballot & mask);");
end_scope();
statement("");
statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
begin_scope();
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), "
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), "
"uint2(0));");
statement("return spvSubgroupBallotBitCount(ballot & mask);");
statement("return spvPopCount4(ballot & mask);");
end_scope();
statement("");
statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
begin_scope();
statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), "
"extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));");
statement("return spvSubgroupBallotBitCount(ballot & mask);");
statement("return spvPopCount4(ballot & mask);");
end_scope();
statement("");
break;
@ -4680,7 +4749,7 @@ void CompilerMSL::emit_custom_functions()
statement("template<typename T>");
statement("inline bool spvSubgroupAllEqual(T value)");
begin_scope();
statement("return simd_all(value == simd_broadcast_first(value));");
statement("return simd_all(all(value == simd_broadcast_first(value)));");
end_scope();
statement("");
statement("template<>");
@ -4689,6 +4758,184 @@ void CompilerMSL::emit_custom_functions()
statement("return simd_all(value) || !simd_any(value);");
end_scope();
statement("");
statement("template<uint N>");
statement("inline bool spvSubgroupAllEqual(vec<bool, N> value)");
begin_scope();
statement("return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));");
end_scope();
statement("");
break;
case SPVFuncImplSubgroupShuffle:
statement("template<typename T>");
statement("inline T spvSubgroupShuffle(T value, ushort lane)");
begin_scope();
if (msl_options.is_ios())
statement("return quad_shuffle(value, lane);");
else
statement("return simd_shuffle(value, lane);");
end_scope();
statement("");
statement("template<>");
statement("inline bool spvSubgroupShuffle(bool value, ushort lane)");
begin_scope();
if (msl_options.is_ios())
statement("return !!quad_shuffle((ushort)value, lane);");
else
statement("return !!simd_shuffle((ushort)value, lane);");
end_scope();
statement("");
statement("template<uint N>");
statement("inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)");
begin_scope();
if (msl_options.is_ios())
statement("return (vec<bool, N>)quad_shuffle((vec<ushort, N>)value, lane);");
else
statement("return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);");
end_scope();
statement("");
break;
case SPVFuncImplSubgroupShuffleXor:
statement("template<typename T>");
statement("inline T spvSubgroupShuffleXor(T value, ushort mask)");
begin_scope();
if (msl_options.is_ios())
statement("return quad_shuffle_xor(value, mask);");
else
statement("return simd_shuffle_xor(value, mask);");
end_scope();
statement("");
statement("template<>");
statement("inline bool spvSubgroupShuffleXor(bool value, ushort mask)");
begin_scope();
if (msl_options.is_ios())
statement("return !!quad_shuffle_xor((ushort)value, mask);");
else
statement("return !!simd_shuffle_xor((ushort)value, mask);");
end_scope();
statement("");
statement("template<uint N>");
statement("inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)");
begin_scope();
if (msl_options.is_ios())
statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, mask);");
else
statement("return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);");
end_scope();
statement("");
break;
case SPVFuncImplSubgroupShuffleUp:
statement("template<typename T>");
statement("inline T spvSubgroupShuffleUp(T value, ushort delta)");
begin_scope();
if (msl_options.is_ios())
statement("return quad_shuffle_up(value, delta);");
else
statement("return simd_shuffle_up(value, delta);");
end_scope();
statement("");
statement("template<>");
statement("inline bool spvSubgroupShuffleUp(bool value, ushort delta)");
begin_scope();
if (msl_options.is_ios())
statement("return !!quad_shuffle_up((ushort)value, delta);");
else
statement("return !!simd_shuffle_up((ushort)value, delta);");
end_scope();
statement("");
statement("template<uint N>");
statement("inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)");
begin_scope();
if (msl_options.is_ios())
statement("return (vec<bool, N>)quad_shuffle_up((vec<ushort, N>)value, delta);");
else
statement("return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);");
end_scope();
statement("");
break;
case SPVFuncImplSubgroupShuffleDown:
statement("template<typename T>");
statement("inline T spvSubgroupShuffleDown(T value, ushort delta)");
begin_scope();
if (msl_options.is_ios())
statement("return quad_shuffle_down(value, delta);");
else
statement("return simd_shuffle_down(value, delta);");
end_scope();
statement("");
statement("template<>");
statement("inline bool spvSubgroupShuffleDown(bool value, ushort delta)");
begin_scope();
if (msl_options.is_ios())
statement("return !!quad_shuffle_down((ushort)value, delta);");
else
statement("return !!simd_shuffle_down((ushort)value, delta);");
end_scope();
statement("");
statement("template<uint N>");
statement("inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)");
begin_scope();
if (msl_options.is_ios())
statement("return (vec<bool, N>)quad_shuffle_down((vec<ushort, N>)value, delta);");
else
statement("return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);");
end_scope();
statement("");
break;
case SPVFuncImplQuadBroadcast:
statement("template<typename T>");
statement("inline T spvQuadBroadcast(T value, uint lane)");
begin_scope();
statement("return quad_broadcast(value, lane);");
end_scope();
statement("");
statement("template<>");
statement("inline bool spvQuadBroadcast(bool value, uint lane)");
begin_scope();
statement("return !!quad_broadcast((ushort)value, lane);");
end_scope();
statement("");
statement("template<uint N>");
statement("inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)");
begin_scope();
statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
end_scope();
statement("");
break;
case SPVFuncImplQuadSwap:
// We can implement this easily based on the following table giving
// the target lane ID from the direction and current lane ID:
// Direction
// | 0 | 1 | 2 |
// ---+---+---+---+
// L 0 | 1 2 3
// a 1 | 0 3 2
// n 2 | 3 0 1
// e 3 | 2 1 0
// Notice that target = source ^ (direction + 1).
statement("template<typename T>");
statement("inline T spvQuadSwap(T value, uint dir)");
begin_scope();
statement("return quad_shuffle_xor(value, dir + 1);");
end_scope();
statement("");
statement("template<>");
statement("inline bool spvQuadSwap(bool value, uint dir)");
begin_scope();
statement("return !!quad_shuffle_xor((ushort)value, dir + 1);");
end_scope();
statement("");
statement("template<uint N>");
statement("inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)");
begin_scope();
statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);");
end_scope();
statement("");
break;
case SPVFuncImplReflectScalar:
@ -7168,7 +7415,7 @@ void CompilerMSL::emit_texture_op(const Instruction &i, bool sparse)
if (sparse)
SPIRV_CROSS_THROW("Sparse feedback not yet supported in MSL.");
if (msl_options.is_ios() && msl_options.ios_use_framebuffer_fetch_subpasses)
if (msl_options.use_framebuffer_fetch_subpasses)
{
auto *ops = stream(i);
@ -8265,25 +8512,26 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
break;
}
if (args.base.is_fetch && args.offset)
if (args.base.is_fetch && (args.offset || args.coffset))
{
uint32_t offset_expr = args.offset ? args.offset : args.coffset;
// Fetch offsets must be applied directly to the coordinate.
forward = forward && should_forward(args.offset);
auto &type = expression_type(args.offset);
if (type.basetype != SPIRType::UInt)
tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.offset);
forward = forward && should_forward(offset_expr);
auto &type = expression_type(offset_expr);
if (imgtype.image.dim == Dim1D && msl_options.texture_1D_as_2D)
{
if (type.basetype != SPIRType::UInt)
tex_coords += join(" + uint2(", bitcast_expression(SPIRType::UInt, offset_expr), ", 0)");
else
tex_coords += join(" + uint2(", to_enclosed_expression(offset_expr), ", 0)");
}
else
tex_coords += " + " + to_enclosed_expression(args.offset);
}
else if (args.base.is_fetch && args.coffset)
{
// Fetch offsets must be applied directly to the coordinate.
forward = forward && should_forward(args.coffset);
auto &type = expression_type(args.coffset);
if (type.basetype != SPIRType::UInt)
tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.coffset);
else
tex_coords += " + " + to_enclosed_expression(args.coffset);
{
if (type.basetype != SPIRType::UInt)
tex_coords += " + " + bitcast_expression(SPIRType::UInt, offset_expr);
else
tex_coords += " + " + to_enclosed_expression(offset_expr);
}
}
// If projection, use alt coord as divisor
@ -8454,6 +8702,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
string grad_opt;
switch (imgtype.image.dim)
{
case Dim1D:
case Dim2D:
grad_opt = "2d";
break;
@ -8489,30 +8738,42 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
// Add offsets
string offset_expr;
const SPIRType *offset_type = nullptr;
if (args.coffset && !args.base.is_fetch)
{
forward = forward && should_forward(args.coffset);
offset_expr = to_expression(args.coffset);
offset_type = &expression_type(args.coffset);
}
else if (args.offset && !args.base.is_fetch)
{
forward = forward && should_forward(args.offset);
offset_expr = to_expression(args.offset);
offset_type = &expression_type(args.offset);
}
if (!offset_expr.empty())
{
switch (imgtype.image.dim)
{
case Dim1D:
if (!msl_options.texture_1D_as_2D)
break;
if (offset_type->vecsize > 1)
offset_expr = enclose_expression(offset_expr) + ".x";
farg_str += join(", int2(", offset_expr, ", 0)");
break;
case Dim2D:
if (coord_type.vecsize > 2)
if (offset_type->vecsize > 2)
offset_expr = enclose_expression(offset_expr) + ".xy";
farg_str += ", " + offset_expr;
break;
case Dim3D:
if (coord_type.vecsize > 3)
if (offset_type->vecsize > 3)
offset_expr = enclose_expression(offset_expr) + ".xyz";
farg_str += ", " + offset_expr;
@ -8532,7 +8793,10 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler)
{
forward = forward && should_forward(args.component);
farg_str += ", " + to_component_argument(args.component);
if (const auto *var = maybe_get_backing_variable(img))
if (!image_is_comparison(get<SPIRType>(var->basetype), var->self))
farg_str += ", " + to_component_argument(args.component);
}
}
@ -8962,9 +9226,9 @@ string CompilerMSL::to_swizzle_expression(uint32_t id)
auto index = expr.find_first_of('[');
// If an image is part of an argument buffer translate this to a legal identifier.
for (auto &c : expr)
if (c == '.')
c = '_';
string::size_type period = 0;
while ((period = expr.find_first_of('.', period)) != string::npos && period < index)
expr[period] = '_';
if (index == string::npos)
return expr + swizzle_name_suffix;
@ -9828,9 +10092,9 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage))
{
if (!msl_options.supports_msl_version(2))
SPIRV_CROSS_THROW("Post-depth coverage requires Metal 2.0.");
if (!msl_options.is_ios())
SPIRV_CROSS_THROW("Post-depth coverage is only supported on iOS.");
SPIRV_CROSS_THROW("Post-depth coverage requires MSL 2.0.");
if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
SPIRV_CROSS_THROW("Post-depth coverage on Mac requires MSL 2.3.");
ep_args += ", post_depth_coverage";
}
ep_args += "]]";
@ -10207,6 +10471,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
}
else
{
if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
SPIRV_CROSS_THROW("Framebuffer fetch on Mac is not supported before MSL 2.3.");
ep_args += image_type_glsl(type, var_id) + " " + r.name;
ep_args += " [[color(" + convert_to_string(r.index) + ")]]";
}
@ -10449,7 +10715,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
entry_func.fixup_hooks_in.push_back([=]() {
statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (",
to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (",
to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
});
@ -10461,25 +10727,25 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
entry_func.fixup_hooks_in.push_back([=]() {
// Case where index < 32, size < 32:
// mask0 = bfe(0xFFFFFFFF, index, size - index);
// mask1 = bfe(0xFFFFFFFF, 0, 0); // Gives 0
// mask0 = bfi(0, 0xFFFFFFFF, index, size - index);
// mask1 = bfi(0, 0xFFFFFFFF, 0, 0); // Gives 0
// Case where index < 32 but size >= 32:
// mask0 = bfe(0xFFFFFFFF, index, 32 - index);
// mask1 = bfe(0xFFFFFFFF, 0, size - 32);
// mask0 = bfi(0, 0xFFFFFFFF, index, 32 - index);
// mask1 = bfi(0, 0xFFFFFFFF, 0, size - 32);
// Case where index >= 32:
// mask0 = bfe(0xFFFFFFFF, 32, 0); // Gives 0
// mask1 = bfe(0xFFFFFFFF, index - 32, size - index);
// mask0 = bfi(0, 0xFFFFFFFF, 32, 0); // Gives 0
// mask1 = bfi(0, 0xFFFFFFFF, index - 32, size - index);
// This is expressed without branches to avoid divergent
// control flow--hence the complicated min/max expressions.
// This is further complicated by the fact that if you attempt
// to bfe out-of-bounds on Metal, undefined behavior is the
// to bfi/bfe out-of-bounds on Metal, undefined behavior is the
// result.
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
" = uint4(extract_bits(0xFFFFFFFF, min(",
" = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)",
to_expression(builtin_subgroup_size_id), ", 32) - (int)",
to_expression(builtin_subgroup_invocation_id_id),
", 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)",
to_expression(builtin_subgroup_size_id), " - (int)max(",
to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));");
@ -10494,11 +10760,11 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
// The same logic applies here, except now the index is one
// more than the subgroup invocation ID.
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
" = uint4(extract_bits(0xFFFFFFFF, min(",
" = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)",
to_expression(builtin_subgroup_size_id), ", 32) - (int)",
to_expression(builtin_subgroup_invocation_id_id),
" - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
" - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)",
to_expression(builtin_subgroup_size_id), " - (int)max(",
to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));");
@ -10834,8 +11100,8 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
bool CompilerMSL::type_is_msl_framebuffer_fetch(const SPIRType &type) const
{
return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && msl_options.is_ios() &&
msl_options.ios_use_framebuffer_fetch_subpasses;
return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
msl_options.use_framebuffer_fetch_subpasses;
}
string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
@ -11062,6 +11328,11 @@ void CompilerMSL::replace_illegal_names()
"fragment",
"compute",
"bias",
"level",
"gradient2d",
"gradientcube",
"gradient3d",
"min_lod_clamp",
"assert",
"VARIABLE_TRACEPOINT",
"STATIC_DATA_TRACEPOINT",
@ -11850,12 +12121,11 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
break;
case OpGroupNonUniformBroadcast:
emit_binary_func_op(result_type, id, ops[3], ops[4],
msl_options.is_ios() ? "quad_broadcast" : "simd_broadcast");
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBroadcast");
break;
case OpGroupNonUniformBroadcastFirst:
emit_unary_func_op(result_type, id, ops[3], "simd_broadcast_first");
emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBroadcastFirst");
break;
case OpGroupNonUniformBallot:
@ -11871,46 +12141,50 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
break;
case OpGroupNonUniformBallotFindLSB:
emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindLSB");
emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindLSB");
break;
case OpGroupNonUniformBallotFindMSB:
emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindMSB");
emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindMSB");
break;
case OpGroupNonUniformBallotBitCount:
{
auto operation = static_cast<GroupOperation>(ops[3]);
if (operation == GroupOperationReduce)
emit_unary_func_op(result_type, id, ops[4], "spvSubgroupBallotBitCount");
else if (operation == GroupOperationInclusiveScan)
switch (operation)
{
case GroupOperationReduce:
emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_size_id, "spvSubgroupBallotBitCount");
break;
case GroupOperationInclusiveScan:
emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
"spvSubgroupBallotInclusiveBitCount");
else if (operation == GroupOperationExclusiveScan)
break;
case GroupOperationExclusiveScan:
emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
"spvSubgroupBallotExclusiveBitCount");
else
break;
default:
SPIRV_CROSS_THROW("Invalid BitCount operation.");
break;
}
break;
}
case OpGroupNonUniformShuffle:
emit_binary_func_op(result_type, id, ops[3], ops[4], msl_options.is_ios() ? "quad_shuffle" : "simd_shuffle");
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffle");
break;
case OpGroupNonUniformShuffleXor:
emit_binary_func_op(result_type, id, ops[3], ops[4],
msl_options.is_ios() ? "quad_shuffle_xor" : "simd_shuffle_xor");
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleXor");
break;
case OpGroupNonUniformShuffleUp:
emit_binary_func_op(result_type, id, ops[3], ops[4],
msl_options.is_ios() ? "quad_shuffle_up" : "simd_shuffle_up");
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleUp");
break;
case OpGroupNonUniformShuffleDown:
emit_binary_func_op(result_type, id, ops[3], ops[4],
msl_options.is_ios() ? "quad_shuffle_down" : "simd_shuffle_down");
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleDown");
break;
case OpGroupNonUniformAll:
@ -12018,26 +12292,11 @@ case OpGroupNonUniform##op: \
#undef MSL_GROUP_OP_CAST
case OpGroupNonUniformQuadSwap:
{
// We can implement this easily based on the following table giving
// the target lane ID from the direction and current lane ID:
// Direction
// | 0 | 1 | 2 |
// ---+---+---+---+
// L 0 | 1 2 3
// a 1 | 0 3 2
// n 2 | 3 0 1
// e 3 | 2 1 0
// Notice that target = source ^ (direction + 1).
uint32_t mask = evaluate_constant_u32(ops[4]) + 1;
uint32_t mask_id = ir.increase_bound_by(1);
set<SPIRConstant>(mask_id, expression_type_id(ops[4]), mask, false);
emit_binary_func_op(result_type, id, ops[3], mask_id, "quad_shuffle_xor");
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadSwap");
break;
}
case OpGroupNonUniformQuadBroadcast:
emit_binary_func_op(result_type, id, ops[3], ops[4], "quad_broadcast");
emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadBroadcast");
break;
default:
@ -12930,7 +13189,8 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
}
case OpImageWrite:
uses_resource_write = true;
if (!compiler.msl_options.supports_msl_version(2, 2))
uses_resource_write = true;
break;
case OpStore:
@ -12990,8 +13250,15 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
needs_subgroup_invocation_id = true;
break;
case OpGroupNonUniformBallotFindLSB:
case OpGroupNonUniformBallotFindMSB:
needs_subgroup_size = true;
break;
case OpGroupNonUniformBallotBitCount:
if (args[3] != GroupOperationReduce)
if (args[3] == GroupOperationReduce)
needs_subgroup_size = true;
else
needs_subgroup_invocation_id = true;
break;
@ -13035,7 +13302,8 @@ void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id)
{
auto *p_var = compiler.maybe_get_backing_variable(var_id);
StorageClass sc = p_var ? p_var->storage : StorageClassMax;
if (sc == StorageClassUniform || sc == StorageClassStorageBuffer)
if (!compiler.msl_options.supports_msl_version(2, 1) &&
(sc == StorageClassUniform || sc == StorageClassStorageBuffer))
uses_resource_write = true;
}
@ -13174,6 +13442,12 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
break;
}
case OpGroupNonUniformBroadcast:
return SPVFuncImplSubgroupBroadcast;
case OpGroupNonUniformBroadcastFirst:
return SPVFuncImplSubgroupBroadcastFirst;
case OpGroupNonUniformBallot:
return SPVFuncImplSubgroupBallot;
@ -13193,6 +13467,24 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
case OpGroupNonUniformAllEqual:
return SPVFuncImplSubgroupAllEqual;
case OpGroupNonUniformShuffle:
return SPVFuncImplSubgroupShuffle;
case OpGroupNonUniformShuffleXor:
return SPVFuncImplSubgroupShuffleXor;
case OpGroupNonUniformShuffleUp:
return SPVFuncImplSubgroupShuffleUp;
case OpGroupNonUniformShuffleDown:
return SPVFuncImplSubgroupShuffleDown;
case OpGroupNonUniformQuadBroadcast:
return SPVFuncImplQuadBroadcast;
case OpGroupNonUniformQuadSwap:
return SPVFuncImplQuadSwap;
default:
break;
}

View File

@ -315,7 +315,7 @@ public:
bool ios_support_base_vertex_instance = false;
// Use Metal's native frame-buffer fetch API for subpass inputs.
bool ios_use_framebuffer_fetch_subpasses = false;
bool use_framebuffer_fetch_subpasses = false;
// Enables use of "fma" intrinsic for invariant float math
bool invariant_float_math = false;
@ -600,12 +600,20 @@ protected:
SPVFuncImplTextureSwizzle,
SPVFuncImplGatherSwizzle,
SPVFuncImplGatherCompareSwizzle,
SPVFuncImplSubgroupBroadcast,
SPVFuncImplSubgroupBroadcastFirst,
SPVFuncImplSubgroupBallot,
SPVFuncImplSubgroupBallotBitExtract,
SPVFuncImplSubgroupBallotFindLSB,
SPVFuncImplSubgroupBallotFindMSB,
SPVFuncImplSubgroupBallotBitCount,
SPVFuncImplSubgroupAllEqual,
SPVFuncImplSubgroupShuffle,
SPVFuncImplSubgroupShuffleXor,
SPVFuncImplSubgroupShuffleUp,
SPVFuncImplSubgroupShuffleDown,
SPVFuncImplQuadBroadcast,
SPVFuncImplQuadSwap,
SPVFuncImplReflectScalar,
SPVFuncImplRefractScalar,
SPVFuncImplFaceForwardScalar,
@ -913,6 +921,7 @@ protected:
bool used_swizzle_buffer = false;
bool added_builtin_tess_level = false;
bool needs_subgroup_invocation_id = false;
bool needs_subgroup_size = false;
std::string qual_pos_var_name;
std::string stage_in_var_name = "in";
std::string stage_out_var_name = "out";
@ -984,6 +993,7 @@ protected:
bool uses_atomics = false;
bool uses_resource_write = false;
bool needs_subgroup_invocation_id = false;
bool needs_subgroup_size = false;
};
// OpcodeHandler that scans for uses of sampled images