Updated spirv-cross.

This commit is contained in:
Бранимир Караџић 2024-05-27 21:10:01 -07:00
parent aace41236f
commit ad4a3e095b
12 changed files with 755 additions and 237 deletions

View File

@ -675,11 +675,13 @@ struct CLIArguments
bool msl_force_sample_rate_shading = false;
bool msl_manual_helper_invocation_updates = true;
bool msl_check_discarded_frag_stores = false;
bool msl_force_fragment_with_side_effects_execution = false;
bool msl_sample_dref_lod_array_as_grad = false;
bool msl_runtime_array_rich_descriptor = false;
bool msl_replace_recursive_inputs = false;
bool msl_readwrite_texture_fences = true;
bool msl_agx_manual_cube_grad_fixup = false;
bool msl_input_attachment_is_ds_attachment = false;
const char *msl_combined_sampler_suffix = nullptr;
bool glsl_emit_push_constant_as_ubo = false;
bool glsl_emit_ubo_as_plain_uniforms = false;
@ -872,6 +874,10 @@ static void print_help_msl()
"\t[--msl-runtime-array-rich-descriptor]:\n\t\tWhen declaring a runtime array of SSBOs, declare an array of {ptr, len} pairs to support OpArrayLength.\n"
"\t[--msl-replace-recursive-inputs]:\n\t\tWorks around a Metal 3.1 regression bug, which causes an infinite recursion crash during Metal's analysis of an entry point input structure that itself contains internal recursion.\n"
"\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n"
"\t[--msl-input-attachment-is-ds-attachment]:\n\t\tAdds a simple depth passthrough in fragment shaders when they do not modify the depth value.\n"
"\t\tRequired to force Metal to write to the depth/stencil attachment post fragment execution.\n"
"\t\tOtherwise, Metal may optimize the write to pre fragment execution which goes against the Vulkan spec.\n"
"\t\tOnly required if an input attachment and depth/stencil attachment reference the same resource.\n"
"\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n"
"\t\tEmits [[color(N)]] inputs in fragment stage.\n"
"\t\tRequires an Apple GPU.\n"
@ -956,6 +962,15 @@ static void print_help_msl()
"\t\tSome Metal devices have a bug where stores to resources from a fragment shader\n"
"\t\tcontinue to execute, even when the fragment is discarded. These checks\n"
"\t\tprevent these stores from executing.\n"
"\t[--msl-force-frag-execution]:\n\t\tEnforces fragment execution to avoid early discard by Metal\n"
"\t\tMetal will prematurely discard fragments before execution when side effects are present.\n"
"\t\tThis condition is triggered under the following conditions (side effect operations happen before discard):\n"
"\t\t\t1. Pre fragment depth test fails.\n"
"\t\t\t2. Modify depth value in fragment shader to constant value known at compile time.\n"
"\t\t\t3. Constant value will not pass post fragment depth test.\n"
"\t\t\t4. Fragment is always discarded in fragment execution.\n"
"\t\tHowever, Vulkan expects fragment shader to be executed since it cannot be discarded until the discard\n"
"\t\tpresent in the fragment execution, which would also execute the operations with side effects.\n"
"\t[--msl-sample-dref-lod-array-as-grad]:\n\t\tUse a gradient instead of a level argument.\n"
"\t\tSome Metal devices have a bug where the level() argument to\n"
"\t\tdepth2d_array<T>::sample_compare() in a fragment shader is biased by some\n"
@ -1242,10 +1257,12 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
msl_opts.force_sample_rate_shading = args.msl_force_sample_rate_shading;
msl_opts.manual_helper_invocation_updates = args.msl_manual_helper_invocation_updates;
msl_opts.check_discarded_frag_stores = args.msl_check_discarded_frag_stores;
msl_opts.force_fragment_with_side_effects_execution = args.msl_force_fragment_with_side_effects_execution;
msl_opts.sample_dref_lod_array_as_grad = args.msl_sample_dref_lod_array_as_grad;
msl_opts.ios_support_base_vertex_instance = true;
msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor;
msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs;
msl_opts.input_attachment_is_ds_attachment = args.msl_input_attachment_is_ds_attachment;
msl_opts.readwrite_texture_fences = args.msl_readwrite_texture_fences;
msl_opts.agx_manual_cube_grad_fixup = args.msl_agx_manual_cube_grad_fixup;
msl_comp->set_msl_options(msl_opts);
@ -1800,6 +1817,7 @@ static int main_inner(int argc, char *argv[])
cbs.add("--msl-no-manual-helper-invocation-updates",
[&args](CLIParser &) { args.msl_manual_helper_invocation_updates = false; });
cbs.add("--msl-check-discarded-frag-stores", [&args](CLIParser &) { args.msl_check_discarded_frag_stores = true; });
cbs.add("--msl-force-frag-with-side-effects-execution", [&args](CLIParser &) { args.msl_force_fragment_with_side_effects_execution = true; });
cbs.add("--msl-sample-dref-lod-array-as-grad",
[&args](CLIParser &) { args.msl_sample_dref_lod_array_as_grad = true; });
cbs.add("--msl-no-readwrite-texture-fences", [&args](CLIParser &) { args.msl_readwrite_texture_fences = false; });
@ -1811,6 +1829,7 @@ static int main_inner(int argc, char *argv[])
[&args](CLIParser &) { args.msl_runtime_array_rich_descriptor = true; });
cbs.add("--msl-replace-recursive-inputs",
[&args](CLIParser &) { args.msl_replace_recursive_inputs = true; });
cbs.add("--msl-input-attachment-is-ds-attachment", [&args](CLIParser &) { args.msl_input_attachment_is_ds_attachment = true; });
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();

View File

@ -40,7 +40,7 @@ void CompilerCPP::emit_buffer_block(const SPIRVariable &var)
emit_block_struct(type);
auto buffer_name = to_name(type.self);
statement("internal::Resource<", buffer_name, type_to_array_glsl(type), "> ", instance_name, "__;");
statement("internal::Resource<", buffer_name, type_to_array_glsl(type, var.self), "> ", instance_name, "__;");
statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()");
resource_registrations.push_back(
join("s.register_resource(", instance_name, "__", ", ", descriptor_set, ", ", binding, ");"));
@ -68,7 +68,7 @@ void CompilerCPP::emit_interface_block(const SPIRVariable &var)
else
buffer_name = type_to_glsl(type);
statement("internal::", qual, "<", buffer_name, type_to_array_glsl(type), "> ", instance_name, "__;");
statement("internal::", qual, "<", buffer_name, type_to_array_glsl(type, var.self), "> ", instance_name, "__;");
statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()");
resource_registrations.push_back(join("s.register_", lowerqual, "(", instance_name, "__", ", ", location, ");"));
statement("");
@ -100,14 +100,14 @@ void CompilerCPP::emit_uniform(const SPIRVariable &var)
if (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
type.basetype == SPIRType::AtomicCounter)
{
statement("internal::Resource<", type_name, type_to_array_glsl(type), "> ", instance_name, "__;");
statement("internal::Resource<", type_name, type_to_array_glsl(type, var.self), "> ", instance_name, "__;");
statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()");
resource_registrations.push_back(
join("s.register_resource(", instance_name, "__", ", ", descriptor_set, ", ", binding, ");"));
}
else
{
statement("internal::UniformConstant<", type_name, type_to_array_glsl(type), "> ", instance_name, "__;");
statement("internal::UniformConstant<", type_name, type_to_array_glsl(type, var.self), "> ", instance_name, "__;");
statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()");
resource_registrations.push_back(
join("s.register_uniform_constant(", instance_name, "__", ", ", location, ");"));
@ -130,7 +130,7 @@ void CompilerCPP::emit_push_constant_block(const SPIRVariable &var)
auto buffer_name = to_name(type.self);
auto instance_name = to_name(var.self);
statement("internal::PushConstant<", buffer_name, type_to_array_glsl(type), "> ", instance_name, ";");
statement("internal::PushConstant<", buffer_name, type_to_array_glsl(type, var.self), "> ", instance_name, ";");
statement_no_indent("#define ", instance_name, " __res->", instance_name, ".get()");
resource_registrations.push_back(join("s.register_push_constant(", instance_name, "__", ");"));
statement("");

View File

@ -93,6 +93,97 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
return !is_restrict && (ssbo || image || counter || buffer_reference);
}
bool Compiler::block_is_control_dependent(const SPIRBlock &block)
{
for (auto &i : block.ops)
{
auto ops = stream(i);
auto op = static_cast<Op>(i.op);
switch (op)
{
case OpFunctionCall:
{
uint32_t func = ops[2];
if (function_is_control_dependent(get<SPIRFunction>(func)))
return true;
break;
}
// Derivatives
case OpDPdx:
case OpDPdxCoarse:
case OpDPdxFine:
case OpDPdy:
case OpDPdyCoarse:
case OpDPdyFine:
case OpFwidth:
case OpFwidthCoarse:
case OpFwidthFine:
// Anything implicit LOD
case OpImageSampleImplicitLod:
case OpImageSampleDrefImplicitLod:
case OpImageSampleProjImplicitLod:
case OpImageSampleProjDrefImplicitLod:
case OpImageSparseSampleImplicitLod:
case OpImageSparseSampleDrefImplicitLod:
case OpImageSparseSampleProjImplicitLod:
case OpImageSparseSampleProjDrefImplicitLod:
case OpImageQueryLod:
case OpImageDrefGather:
case OpImageGather:
case OpImageSparseDrefGather:
case OpImageSparseGather:
// Anything subgroups
case OpGroupNonUniformElect:
case OpGroupNonUniformAll:
case OpGroupNonUniformAny:
case OpGroupNonUniformAllEqual:
case OpGroupNonUniformBroadcast:
case OpGroupNonUniformBroadcastFirst:
case OpGroupNonUniformBallot:
case OpGroupNonUniformInverseBallot:
case OpGroupNonUniformBallotBitExtract:
case OpGroupNonUniformBallotBitCount:
case OpGroupNonUniformBallotFindLSB:
case OpGroupNonUniformBallotFindMSB:
case OpGroupNonUniformShuffle:
case OpGroupNonUniformShuffleXor:
case OpGroupNonUniformShuffleUp:
case OpGroupNonUniformShuffleDown:
case OpGroupNonUniformIAdd:
case OpGroupNonUniformFAdd:
case OpGroupNonUniformIMul:
case OpGroupNonUniformFMul:
case OpGroupNonUniformSMin:
case OpGroupNonUniformUMin:
case OpGroupNonUniformFMin:
case OpGroupNonUniformSMax:
case OpGroupNonUniformUMax:
case OpGroupNonUniformFMax:
case OpGroupNonUniformBitwiseAnd:
case OpGroupNonUniformBitwiseOr:
case OpGroupNonUniformBitwiseXor:
case OpGroupNonUniformLogicalAnd:
case OpGroupNonUniformLogicalOr:
case OpGroupNonUniformLogicalXor:
case OpGroupNonUniformQuadBroadcast:
case OpGroupNonUniformQuadSwap:
// Control barriers
case OpControlBarrier:
return true;
default:
break;
}
}
return false;
}
bool Compiler::block_is_pure(const SPIRBlock &block)
{
// This is a global side effect of the function.
@ -247,18 +338,21 @@ string Compiler::to_name(uint32_t id, bool allow_alias) const
bool Compiler::function_is_pure(const SPIRFunction &func)
{
for (auto block : func.blocks)
{
if (!block_is_pure(get<SPIRBlock>(block)))
{
//fprintf(stderr, "Function %s is impure!\n", to_name(func.self).c_str());
return false;
}
}
//fprintf(stderr, "Function %s is pure!\n", to_name(func.self).c_str());
return true;
}
bool Compiler::function_is_control_dependent(const SPIRFunction &func)
{
for (auto block : func.blocks)
if (block_is_control_dependent(get<SPIRBlock>(block)))
return true;
return false;
}
void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id)
{
for (auto &i : block.ops)

View File

@ -744,6 +744,8 @@ protected:
bool function_is_pure(const SPIRFunction &func);
bool block_is_pure(const SPIRBlock &block);
bool function_is_control_dependent(const SPIRFunction &func);
bool block_is_control_dependent(const SPIRBlock &block);
bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const;
bool execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const;

View File

@ -754,6 +754,10 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
case SPVC_COMPILER_OPTION_MSL_AGX_MANUAL_CUBE_GRAD_FIXUP:
options->msl.agx_manual_cube_grad_fixup = value != 0;
break;
case SPVC_COMPILER_OPTION_MSL_FORCE_FRAGMENT_WITH_SIDE_EFFECTS_EXECUTION:
options->msl.force_fragment_with_side_effects_execution = value != 0;
break;
#endif
default:

View File

@ -40,7 +40,7 @@ extern "C" {
/* Bumped if ABI or API breaks backwards compatibility. */
#define SPVC_C_API_VERSION_MAJOR 0
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
#define SPVC_C_API_VERSION_MINOR 59
#define SPVC_C_API_VERSION_MINOR 60
/* Bumped if internal implementation details change. */
#define SPVC_C_API_VERSION_PATCH 0
@ -728,6 +728,7 @@ typedef enum spvc_compiler_option
SPVC_COMPILER_OPTION_MSL_READWRITE_TEXTURE_FENCES = 86 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_REPLACE_RECURSIVE_INPUTS = 87 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_AGX_MANUAL_CUBE_GRAD_FIXUP = 88 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_FORCE_FRAGMENT_WITH_SIDE_EFFECTS_EXECUTION = 89 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff
} spvc_compiler_option;

View File

@ -169,7 +169,7 @@ public:
~LoopLock();
private:
uint32_t *lock;
uint32_t *lock = nullptr;
};
// This must be held while iterating over a type ID array.

View File

@ -639,7 +639,8 @@ void CompilerGLSL::find_static_extensions()
void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed)
{
uint32_t &polyfills = (relaxed && options.es) ? required_polyfills_relaxed : required_polyfills;
uint32_t &polyfills = (relaxed && (options.es || options.vulkan_semantics)) ?
required_polyfills_relaxed : required_polyfills;
if ((polyfills & polyfill) == 0)
{
@ -715,7 +716,7 @@ string CompilerGLSL::compile()
if (required_polyfills != 0)
emit_polyfills(required_polyfills, false);
if (options.es && required_polyfills_relaxed != 0)
if ((options.es || options.vulkan_semantics) && required_polyfills_relaxed != 0)
emit_polyfills(required_polyfills_relaxed, true);
emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
@ -2434,7 +2435,7 @@ void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_de
else
{
auto &pointee_type = get_pointee_type(type);
statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type, 0), ";");
}
end_scope_decl();
@ -2513,7 +2514,7 @@ void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
// It will need to be reset if we have to recompile.
preserve_alias_on_reset(var.self);
add_resource_name(var.self);
end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
end_scope_decl(to_name(var.self) + type_to_array_glsl(type, var.self));
statement("");
}
@ -2780,7 +2781,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
}
add_resource_name(var.self);
end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
end_scope_decl(join(to_name(var.self), type_to_array_glsl(type, var.self)));
statement("");
}
}
@ -3977,7 +3978,7 @@ void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
auto &c = get<SPIRConstant>(var.initializer);
for (uint32_t j = 0; j < array_size; j++)
exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ",
statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type, 0), " = ",
type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
}
@ -4035,7 +4036,7 @@ void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
else if (is_control_point)
{
auto lut_name = join("_", var.self, "_init");
statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type),
statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type, 0),
" = ", to_expression(var.initializer), ";");
entry_func.fixup_hooks_in.push_back([&, lut_name]() {
statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
@ -4060,7 +4061,7 @@ void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
{
auto lut_name = join("_", var.self, "_init");
statement("const ", type_to_glsl(type), " ", lut_name,
type_to_array_glsl(type), " = ", to_expression(var.initializer), ";");
type_to_array_glsl(type, var.self), " = ", to_expression(var.initializer), ";");
entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
if (is_patch)
{
@ -4842,6 +4843,109 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed)
end_scope();
statement("");
}
if (!relaxed)
{
static const Polyfill polys[3][3] = {
{ PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 },
{ PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 },
{ PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 },
};
static const GLSLstd450 glsl_ops[] = { GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp };
static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" };
bool has_poly = false;
for (uint32_t i = 0; i < 3; i++)
{
for (uint32_t j = 0; j < 3; j++)
{
if ((polyfills & polys[i][j]) == 0)
continue;
const char *types[3][4] = {
{ "float16_t", "f16vec2", "f16vec3", "f16vec4" },
{ "float", "vec2", "vec3", "vec4" },
{ "double", "dvec2", "dvec3", "dvec4" },
};
for (uint32_t k = 0; k < 4; k++)
{
auto *type = types[j][k];
if (i < 2)
{
statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ",
type, " ", spv_ops[i], "(", type, ", ", type, ");");
}
else
{
statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ",
type, " ", spv_ops[i], "(", type, ", ", type, ", ", type, ");");
}
has_poly = true;
}
}
}
if (has_poly)
statement("");
}
else
{
// Mediump intrinsics don't work correctly, so wrap the intrinsic in an outer shell that ensures mediump
// propagation.
static const Polyfill polys[3][3] = {
{ PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 },
{ PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 },
{ PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 },
};
static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" };
for (uint32_t i = 0; i < 3; i++)
{
for (uint32_t j = 0; j < 3; j++)
{
if ((polyfills & polys[i][j]) == 0)
continue;
const char *types[3][4] = {
{ "float16_t", "f16vec2", "f16vec3", "f16vec4" },
{ "float", "vec2", "vec3", "vec4" },
{ "double", "dvec2", "dvec3", "dvec4" },
};
for (uint32_t k = 0; k < 4; k++)
{
auto *type = types[j][k];
if (i < 2)
{
statement("mediump ", type, " ", spv_ops[i], "Relaxed(",
"mediump ", type, " a, mediump ", type, " b)");
begin_scope();
statement("mediump ", type, " res = ", spv_ops[i], "(a, b);");
statement("return res;");
end_scope();
statement("");
}
else
{
statement("mediump ", type, " ", spv_ops[i], "Relaxed(",
"mediump ", type, " a, mediump ", type, " b, mediump ", type, " c)");
begin_scope();
statement("mediump ", type, " res = ", spv_ops[i], "(a, b, c);");
statement("return res;");
end_scope();
statement("");
}
}
}
}
}
}
// Returns a string representation of the ID, usable as a function arg.
@ -7847,6 +7951,7 @@ std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool
args.grad_x = grad_x;
args.grad_y = grad_y;
args.lod = lod;
args.has_array_offsets = coffsets != 0;
if (coffsets)
args.offset = coffsets;
@ -8864,23 +8969,97 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
case GLSLstd450NMin:
case GLSLstd450NMax:
{
emit_nminmax_op(result_type, id, args[0], args[1], op);
if (options.vulkan_semantics)
{
require_extension_internal("GL_EXT_spirv_intrinsics");
bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
Polyfill poly = {};
switch (get<SPIRType>(result_type).width)
{
case 16:
poly = op == GLSLstd450NMin ? PolyfillNMin16 : PolyfillNMax16;
break;
case 32:
poly = op == GLSLstd450NMin ? PolyfillNMin32 : PolyfillNMax32;
break;
case 64:
poly = op == GLSLstd450NMin ? PolyfillNMin64 : PolyfillNMax64;
break;
default:
SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax.");
}
require_polyfill(poly, relaxed);
// Function return decorations are broken, so need to do double polyfill.
if (relaxed)
require_polyfill(poly, false);
const char *op_str;
if (relaxed)
op_str = op == GLSLstd450NMin ? "spvNMinRelaxed" : "spvNMaxRelaxed";
else
op_str = op == GLSLstd450NMin ? "spvNMin" : "spvNMax";
emit_binary_func_op(result_type, id, args[0], args[1], op_str);
}
else
{
emit_nminmax_op(result_type, id, args[0], args[1], op);
}
break;
}
case GLSLstd450NClamp:
{
// Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
// IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
if (!max_id)
max_id = ir.increase_bound_by(1);
if (options.vulkan_semantics)
{
require_extension_internal("GL_EXT_spirv_intrinsics");
bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
Polyfill poly = {};
switch (get<SPIRType>(result_type).width)
{
case 16:
poly = PolyfillNClamp16;
break;
// Inherit precision qualifiers.
ir.meta[max_id] = ir.meta[id];
case 32:
poly = PolyfillNClamp32;
break;
emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
case 64:
poly = PolyfillNClamp64;
break;
default:
SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax.");
}
require_polyfill(poly, relaxed);
// Function return decorations are broken, so need to do double polyfill.
if (relaxed)
require_polyfill(poly, false);
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], relaxed ? "spvNClampRelaxed" : "spvNClamp");
}
else
{
// Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
// IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
if (!max_id)
max_id = ir.increase_bound_by(1);
// Inherit precision qualifiers.
ir.meta[max_id] = ir.meta[id];
emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
}
break;
}
@ -11610,6 +11789,10 @@ uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &inst
case OpUGreaterThanEqual:
return expression_type(ops[2]).width;
case OpSMulExtended:
case OpUMulExtended:
return get<SPIRType>(get<SPIRType>(ops[0]).member_types[0]).width;
default:
{
// We can look at result type which is more robust.
@ -12167,6 +12350,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
auto &callee = get<SPIRFunction>(func);
auto &return_type = get<SPIRType>(callee.return_type);
bool pure = function_is_pure(callee);
bool control_dependent = function_is_control_dependent(callee);
bool callee_has_out_variables = false;
bool emit_return_value_as_argument = false;
@ -12198,7 +12382,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (emit_return_value_as_argument)
{
statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type, 0), ";");
arglist.push_back(to_name(id));
}
@ -12260,6 +12444,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
else
statement(funexpr, ";");
if (control_dependent)
register_control_dependent_expression(id);
break;
}
@ -15166,7 +15353,7 @@ string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uin
{
string type_name = type_to_glsl(type, id);
remap_variable_type_name(type, name, type_name);
return join(type_name, " ", name, type_to_array_glsl(type));
return join(type_name, " ", name, type_to_array_glsl(type, id));
}
bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
@ -15523,7 +15710,7 @@ string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
return "";
}
string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
string CompilerGLSL::type_to_array_glsl(const SPIRType &type, uint32_t)
{
if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
{
@ -16087,7 +16274,7 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
auto &type = get<SPIRType>(func.return_type);
decl += flags_to_qualifiers_glsl(type, return_flags);
decl += type_to_glsl(type);
decl += type_to_array_glsl(type);
decl += type_to_array_glsl(type, 0);
decl += " ";
if (func.self == ir.default_entry_point)

View File

@ -477,7 +477,7 @@ protected:
uint32_t coord = 0, coord_components = 0, dref = 0;
uint32_t grad_x = 0, grad_y = 0, lod = 0, offset = 0;
uint32_t bias = 0, component = 0, sample = 0, sparse_texel = 0, min_lod = 0;
bool nonuniform_expression = false;
bool nonuniform_expression = false, has_array_offsets = false;
};
virtual std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward);
@ -564,8 +564,8 @@ protected:
Options options;
virtual std::string type_to_array_glsl(
const SPIRType &type); // Allow Metal to use the array<T> template to make arrays a value type
// Allow Metal to use the array<T> template to make arrays a value type
virtual std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id);
std::string to_array_size(const SPIRType &type, uint32_t index);
uint32_t to_array_size_literal(const SPIRType &type, uint32_t index) const;
uint32_t to_array_size_literal(const SPIRType &type) const;
@ -933,6 +933,15 @@ protected:
PolyfillMatrixInverse2x2 = 1 << 6,
PolyfillMatrixInverse3x3 = 1 << 7,
PolyfillMatrixInverse4x4 = 1 << 8,
PolyfillNMin16 = 1 << 9,
PolyfillNMin32 = 1 << 10,
PolyfillNMin64 = 1 << 11,
PolyfillNMax16 = 1 << 12,
PolyfillNMax32 = 1 << 13,
PolyfillNMax64 = 1 << 14,
PolyfillNClamp16 = 1 << 15,
PolyfillNClamp32 = 1 << 16,
PolyfillNClamp64 = 1 << 17,
};
uint32_t required_polyfills = 0;

View File

@ -1002,7 +1002,7 @@ void CompilerHLSL::emit_interface_block_member_in_struct(const SPIRVariable &var
statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, member_index)),
type_to_glsl(mbr_type),
" ", mbr_name, type_to_array_glsl(mbr_type),
" ", mbr_name, type_to_array_glsl(mbr_type, var.self),
" : ", semantic, ";");
// Structs and arrays should consume more locations.
@ -2277,7 +2277,7 @@ void CompilerHLSL::emit_resources()
// Need out variable since HLSL does not support returning arrays.
auto &type = get<SPIRType>(type_id);
auto type_str = type_to_glsl(type);
auto type_arr_str = type_to_array_glsl(type);
auto type_arr_str = type_to_array_glsl(type, 0);
statement("void spvSelectComposite(out ", type_str, " out_value", type_arr_str, ", bool cond, ",
type_str, " true_val", type_arr_str, ", ",
type_str, " false_val", type_arr_str, ")");
@ -2679,7 +2679,7 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
type_name = is_readonly ? "ByteAddressBuffer" : is_interlocked ? "RasterizerOrderedByteAddressBuffer" : "RWByteAddressBuffer";
add_resource_name(var.self);
statement(is_coherent ? "globallycoherent " : "", type_name, " ", to_name(var.self), type_to_array_glsl(type),
statement(is_coherent ? "globallycoherent " : "", type_name, " ", to_name(var.self), type_to_array_glsl(type, var.self),
to_resource_binding(var), ";");
}
else
@ -2766,7 +2766,7 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
}
emit_struct(get<SPIRType>(type.self));
statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type),
statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type, var.self),
to_resource_binding(var), ";");
}
}
@ -2952,7 +2952,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
out_argument += type_to_glsl(type);
out_argument += " ";
out_argument += "spvReturnValue";
out_argument += type_to_array_glsl(type);
out_argument += type_to_array_glsl(type, 0);
arglist.push_back(std::move(out_argument));
}
@ -2978,7 +2978,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
{
// Manufacture automatic sampler arg for SampledImage texture
arglist.push_back(join(is_depth_image(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ",
to_sampler_expression(arg.id), type_to_array_glsl(arg_type)));
to_sampler_expression(arg.id), type_to_array_glsl(arg_type, arg.id)));
}
// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
@ -4076,16 +4076,16 @@ void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var)
is_coherent = has_decoration(var.self, DecorationCoherent);
statement(is_coherent ? "globallycoherent " : "", image_type_hlsl_modern(type, var.self), " ",
to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";");
to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";");
if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
{
// For combined image samplers, also emit a combined image sampler.
if (is_depth_image(type, var.self))
statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type),
statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type, var.self),
to_resource_binding_sampler(var), ";");
else
statement("SamplerState ", to_sampler_expression(var.self), type_to_array_glsl(type),
statement("SamplerState ", to_sampler_expression(var.self), type_to_array_glsl(type, var.self),
to_resource_binding_sampler(var), ";");
}
break;
@ -4093,10 +4093,10 @@ void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var)
case SPIRType::Sampler:
if (comparison_ids.count(var.self))
statement("SamplerComparisonState ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var),
statement("SamplerComparisonState ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var),
";");
else
statement("SamplerState ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";");
statement("SamplerState ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";");
break;
default:
@ -4448,6 +4448,18 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
case GLSLstd450NMin:
CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FMin, args, count);
break;
case GLSLstd450NMax:
CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FMax, args, count);
break;
case GLSLstd450NClamp:
CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FClamp, args, count);
break;
default:
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;

View File

@ -201,13 +201,17 @@ bool CompilerMSL::is_var_runtime_size_array(const SPIRVariable &var) const
uint32_t CompilerMSL::get_resource_array_size(const SPIRType &type, uint32_t id) const
{
uint32_t array_size = to_array_size_literal(type);
if (array_size)
// If we have argument buffers, we need to honor the ABI by using the correct array size
// from the layout. Only use shader declared size if we're not using argument buffers.
uint32_t desc_set = get_decoration(id, DecorationDescriptorSet);
if (!descriptor_set_is_argument_buffer(desc_set) && array_size)
return array_size;
StageSetBinding tuple = { get_entry_point().model, get_decoration(id, DecorationDescriptorSet),
StageSetBinding tuple = { get_entry_point().model, desc_set,
get_decoration(id, DecorationBinding) };
auto itr = resource_bindings.find(tuple);
return itr != end(resource_bindings) ? itr->second.first.count : 0;
return itr != end(resource_bindings) ? itr->second.first.count : array_size;
}
uint32_t CompilerMSL::get_automatic_msl_resource_binding(uint32_t id) const
@ -267,11 +271,14 @@ void CompilerMSL::build_implicit_builtins()
active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance));
bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId);
bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups);
bool force_frag_depth_passthrough =
get_execution_model() == ExecutionModelFragment && !uses_explicit_early_fragment_test() && need_subpass_input &&
msl_options.enable_frag_depth_builtin && msl_options.input_attachment_is_ds_attachment;
if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params ||
needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation ||
has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size)
has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || force_frag_depth_passthrough)
{
bool has_frag_coord = false;
bool has_sample_id = false;
@ -288,6 +295,7 @@ void CompilerMSL::build_implicit_builtins()
bool has_helper_invocation = false;
bool has_local_invocation_index = false;
bool has_workgroup_size = false;
bool has_frag_depth = false;
uint32_t workgroup_id_type = 0;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
@ -308,6 +316,13 @@ void CompilerMSL::build_implicit_builtins()
mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var.self);
does_shader_write_sample_mask = true;
}
if (force_frag_depth_passthrough && builtin == BuiltInFragDepth)
{
builtin_frag_depth_id = var.self;
mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var.self);
has_frag_depth = true;
}
}
if (var.storage != StorageClassInput)
@ -898,6 +913,36 @@ void CompilerMSL::build_implicit_builtins()
builtin_workgroup_size_id = var_id;
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
}
if (!has_frag_depth && force_frag_depth_passthrough)
{
uint32_t offset = ir.increase_bound_by(3);
uint32_t type_id = offset;
uint32_t type_ptr_id = offset + 1;
uint32_t var_id = offset + 2;
// Create gl_FragDepth
SPIRType float_type { OpTypeFloat };
float_type.basetype = SPIRType::Float;
float_type.width = 32;
float_type.vecsize = 1;
set<SPIRType>(type_id, float_type);
SPIRType float_type_ptr_in = float_type;
float_type_ptr_in.op = spv::OpTypePointer;
float_type_ptr_in.pointer = true;
float_type_ptr_in.pointer_depth++;
float_type_ptr_in.parent_type = type_id;
float_type_ptr_in.storage = StorageClassOutput;
auto &ptr_in_type = set<SPIRType>(type_ptr_id, float_type_ptr_in);
ptr_in_type.self = type_id;
set<SPIRVariable>(var_id, type_ptr_id, StorageClassOutput);
set_decoration(var_id, DecorationBuiltIn, BuiltInFragDepth);
builtin_frag_depth_id = var_id;
mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var_id);
active_output_builtins.set(BuiltInFragDepth);
}
}
if (needs_swizzle_buffer_def)
@ -1314,48 +1359,29 @@ void CompilerMSL::emit_entry_point_declarations()
uint32_t arg_id = argument_buffer_ids[desc_set];
uint32_t base_index = dynamic_buffer.second.first;
if (!type.array.empty())
if (is_array(type))
{
// This is complicated, because we need to support arrays of arrays.
// And it's even worse if the outermost dimension is a runtime array, because now
// all this complicated goop has to go into the shader itself. (FIXME)
if (!type.array[type.array.size() - 1])
SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet.");
else
is_using_builtin_array = true;
statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, true), name,
type_to_array_glsl(type, var_id), " =");
uint32_t array_size = to_array_size_literal(type);
begin_scope();
for (uint32_t i = 0; i < array_size; i++)
{
is_using_builtin_array = true;
statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, true), name,
type_to_array_glsl(type), " =");
uint32_t dim = uint32_t(type.array.size());
uint32_t j = 0;
for (SmallVector<uint32_t> indices(type.array.size());
indices[type.array.size() - 1] < to_array_size_literal(type); j++)
{
while (dim > 0)
{
begin_scope();
--dim;
}
string arrays;
for (uint32_t i = uint32_t(type.array.size()); i; --i)
arrays += join("[", indices[i - 1], "]");
statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ",
to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ",
to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"),
arrays, " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + j, "]),");
while (++indices[dim] >= to_array_size_literal(type, dim) && dim < type.array.size() - 1)
{
end_scope(",");
indices[dim++] = 0;
}
}
end_scope_decl();
statement_no_indent("");
is_using_builtin_array = false;
statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ",
to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ",
to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"),
"[", i, "]", " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + i, "]),");
}
end_scope_decl();
statement_no_indent("");
is_using_builtin_array = false;
}
else
{
@ -1469,7 +1495,7 @@ void CompilerMSL::emit_entry_point_declarations()
is_using_builtin_array = true;
statement(desc_addr_space, " auto& ", to_restrict(var_id, true), to_name(var_id), " = (", addr_space, " ",
type_to_glsl(type), "* ", desc_addr_space, " (&)",
type_to_array_glsl(type), ")", ir.meta[alias_id].decoration.qualified_alias, ";");
type_to_array_glsl(type, var_id), ")", ir.meta[alias_id].decoration.qualified_alias, ";");
is_using_builtin_array = false;
}
}
@ -1553,8 +1579,10 @@ string CompilerMSL::compile()
if (needs_manual_helper_invocation_updates() &&
(active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation))
{
string discard_expr =
join(builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), " = true, discard_fragment()");
string builtin_helper_invocation = builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput);
string discard_expr = join(builtin_helper_invocation, " = true, discard_fragment()");
if (msl_options.force_fragment_with_side_effects_execution)
discard_expr = join("!", builtin_helper_invocation, " ? (", discard_expr, ") : (void)0");
backend.discard_literal = discard_expr;
backend.demote_literal = discard_expr;
}
@ -1584,6 +1612,8 @@ string CompilerMSL::compile()
add_active_interface_variable(builtin_dispatch_base_id);
if (builtin_sample_mask_id)
add_active_interface_variable(builtin_sample_mask_id);
if (builtin_frag_depth_id)
add_active_interface_variable(builtin_frag_depth_id);
// Create structs to hold input, output and uniform variables.
// Do output first to ensure out. is declared at top of entry function.
@ -1703,14 +1733,15 @@ void CompilerMSL::preprocess_op_codes()
// Fragment shaders that both write to storage resources and discard fragments
// need checks on the writes, to work around Metal allowing these writes despite
// the fragment being dead.
if (msl_options.check_discarded_frag_stores && preproc.uses_discard &&
(preproc.uses_buffer_write || preproc.uses_image_write))
// the fragment being dead. We also require to force Metal to execute fragment
// shaders instead of being prematurely discarded.
if (preproc.uses_discard && (preproc.uses_buffer_write || preproc.uses_image_write))
{
frag_shader_needs_discard_checks = true;
needs_helper_invocation = true;
bool should_enable = (msl_options.check_discarded_frag_stores || msl_options.force_fragment_with_side_effects_execution);
frag_shader_needs_discard_checks |= msl_options.check_discarded_frag_stores;
needs_helper_invocation |= should_enable;
// Fragment discard store checks imply manual HelperInvocation updates.
msl_options.manual_helper_invocation_updates = true;
msl_options.manual_helper_invocation_updates |= should_enable;
}
if (is_intersection_query())
@ -1881,8 +1912,13 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
{
uint32_t base_id = ops[0];
if (global_var_ids.find(base_id) != global_var_ids.end())
{
added_arg_ids.insert(base_id);
if (msl_options.input_attachment_is_ds_attachment && base_id == builtin_frag_depth_id)
writes_to_depth = true;
}
uint32_t rvalue_id = ops[1];
if (global_var_ids.find(rvalue_id) != global_var_ids.end())
added_arg_ids.insert(rvalue_id);
@ -2921,20 +2957,35 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass
uint32_t mbr_idx, InterfaceBlockMeta &meta,
const string &mbr_name_qual,
const string &var_chain_qual,
uint32_t &location, uint32_t &var_mbr_idx)
uint32_t &location, uint32_t &var_mbr_idx,
const Bitset &interpolation_qual)
{
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
BuiltIn builtin = BuiltInMax;
bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
bool is_flat =
has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat);
bool is_noperspective = has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) ||
bool is_flat = interpolation_qual.get(DecorationFlat) ||
has_member_decoration(var_type.self, mbr_idx, DecorationFlat) ||
has_decoration(var.self, DecorationFlat);
bool is_noperspective = interpolation_qual.get(DecorationNoPerspective) ||
has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) ||
has_decoration(var.self, DecorationNoPerspective);
bool is_centroid = has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) ||
bool is_centroid = interpolation_qual.get(DecorationCentroid) ||
has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) ||
has_decoration(var.self, DecorationCentroid);
bool is_sample =
has_member_decoration(var_type.self, mbr_idx, DecorationSample) || has_decoration(var.self, DecorationSample);
bool is_sample = interpolation_qual.get(DecorationSample) ||
has_member_decoration(var_type.self, mbr_idx, DecorationSample) ||
has_decoration(var.self, DecorationSample);
Bitset inherited_qual;
if (is_flat)
inherited_qual.set(DecorationFlat);
if (is_noperspective)
inherited_qual.set(DecorationNoPerspective);
if (is_centroid)
inherited_qual.set(DecorationCentroid);
if (is_sample)
inherited_qual.set(DecorationSample);
uint32_t mbr_type_id = var_type.member_types[mbr_idx];
auto &mbr_type = get<SPIRType>(mbr_type_id);
@ -2998,7 +3049,7 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass
add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type,
var, mbr_type, sub_mbr_idx,
meta, mbr_name, var_chain,
location, var_mbr_idx);
location, var_mbr_idx, inherited_qual);
// FIXME: Recursive structs and tessellation breaks here.
var_mbr_idx++;
}
@ -3485,7 +3536,7 @@ void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, boo
get_entry_point().output_vertices;
statement("threadgroup ", type_to_glsl(type), " ",
"spvStorage", to_name(masked_var.self), "[", max_num_instances, "]",
type_to_array_glsl(type), ";");
type_to_array_glsl(type, 0), ";");
// Assign a threadgroup slice to each PrimitiveID.
// We assume here that workgroup size is rounded to 32,
@ -3684,7 +3735,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type,
var, var_type, mbr_idx, meta,
mbr_name_qual, var_chain_qual,
location, var_mbr_idx);
location, var_mbr_idx, {});
}
else
{
@ -5564,9 +5615,8 @@ void CompilerMSL::emit_custom_templates()
// otherwise they will cause problems when linked together in a single Metallib.
void CompilerMSL::emit_custom_functions()
{
for (uint32_t i = kArrayCopyMultidimMax; i >= 2; i--)
if (spv_function_implementations.count(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i)))
spv_function_implementations.insert(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i - 1));
if (spv_function_implementations.count(SPVFuncImplArrayCopyMultidim))
spv_function_implementations.insert(SPVFuncImplArrayCopy);
if (spv_function_implementations.count(SPVFuncImplDynamicImageSampler))
{
@ -5677,11 +5727,7 @@ void CompilerMSL::emit_custom_functions()
break;
case SPVFuncImplArrayCopy:
case SPVFuncImplArrayOfArrayCopy2Dim:
case SPVFuncImplArrayOfArrayCopy3Dim:
case SPVFuncImplArrayOfArrayCopy4Dim:
case SPVFuncImplArrayOfArrayCopy5Dim:
case SPVFuncImplArrayOfArrayCopy6Dim:
case SPVFuncImplArrayCopyMultidim:
{
// Unfortunately we cannot template on the address space, so combinatorial explosion it is.
static const char *function_name_tags[] = {
@ -5704,36 +5750,19 @@ void CompilerMSL::emit_custom_functions()
for (uint32_t variant = 0; variant < 12; variant++)
{
uint8_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase;
string tmp = "template<typename T";
for (uint8_t i = 0; i < dimensions; i++)
{
tmp += ", uint ";
tmp += 'A' + i;
}
tmp += ">";
statement(tmp);
string array_arg;
for (uint8_t i = 0; i < dimensions; i++)
{
array_arg += "[";
array_arg += 'A' + i;
array_arg += "]";
}
statement("inline void spvArrayCopy", function_name_tags[variant], dimensions, "(",
dst_address_space[variant], " T (&dst)", array_arg, ", ", src_address_space[variant],
" T (&src)", array_arg, ")");
bool is_multidim = spv_func == SPVFuncImplArrayCopyMultidim;
const char* dim = is_multidim ? "[N][M]" : "[N]";
statement("template<typename T, uint N", is_multidim ? ", uint M>" : ">");
statement("inline void spvArrayCopy", function_name_tags[variant], "(",
dst_address_space[variant], " T (&dst)", dim, ", ",
src_address_space[variant], " T (&src)", dim, ")");
begin_scope();
statement("for (uint i = 0; i < A; i++)");
statement("for (uint i = 0; i < N; i++)");
begin_scope();
if (dimensions == 1)
statement("dst[i] = src[i];");
if (is_multidim)
statement("spvArrayCopy", function_name_tags[variant], "(dst[i], src[i]);");
else
statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);");
statement("dst[i] = src[i];");
end_scope();
end_scope();
statement("");
@ -6234,6 +6263,57 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
case SPVFuncImplGatherConstOffsets:
statement("// Wrapper function that processes a texture gather with a constant offset array.");
statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
"typename Toff, typename... Tp>");
statement("inline vec<T, 4> spvGatherConstOffsets(const thread Tex<T>& t, sampler s, "
"Toff coffsets, component c, Tp... params) METAL_CONST_ARG(c)");
begin_scope();
statement("vec<T, 4> rslts[4];");
statement("for (uint i = 0; i < 4; i++)");
begin_scope();
statement("switch (c)");
begin_scope();
// Work around texture::gather() requiring its component parameter to be a constant expression
statement("case component::x:");
statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::x);");
statement(" break;");
statement("case component::y:");
statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::y);");
statement(" break;");
statement("case component::z:");
statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::z);");
statement(" break;");
statement("case component::w:");
statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::w);");
statement(" break;");
end_scope();
end_scope();
// Pull all values from the i0j0 component of each gather footprint
statement("return vec<T, 4>(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);");
end_scope();
statement("");
break;
case SPVFuncImplGatherCompareConstOffsets:
statement("// Wrapper function that processes a texture gather with a constant offset array.");
statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
"typename Toff, typename... Tp>");
statement("inline vec<T, 4> spvGatherCompareConstOffsets(const thread Tex<T>& t, sampler s, "
"Toff coffsets, Tp... params)");
begin_scope();
statement("vec<T, 4> rslts[4];");
statement("for (uint i = 0; i < 4; i++)");
begin_scope();
statement(" rslts[i] = t.gather_compare(s, spvForward<Tp>(params)..., coffsets[i]);");
end_scope();
// Pull all values from the i0j0 component of each gather footprint
statement("return vec<T, 4>(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);");
end_scope();
statement("");
break;
case SPVFuncImplSubgroupBroadcast:
// Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting
// them as integers.
@ -7426,14 +7506,14 @@ void CompilerMSL::emit_custom_functions()
statement("template<typename T>");
statement("struct spvDescriptorArray");
begin_scope();
statement("spvDescriptorArray(const device spvDescriptor<T>* ptr) : ptr(ptr)");
statement("spvDescriptorArray(const device spvDescriptor<T>* ptr) : ptr(&ptr->value)");
begin_scope();
end_scope();
statement("const device T& operator [] (size_t i) const");
begin_scope();
statement("return ptr[i].value;");
statement("return ptr[i];");
end_scope();
statement("const device spvDescriptor<T>* ptr;");
statement("const device T* ptr;");
end_scope_decl();
statement("");
}
@ -7498,6 +7578,17 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
case SPVFuncImplTextureCast:
statement("template <typename T, typename U>");
statement("T spvTextureCast(U img)");
begin_scope();
// MSL complains if you try to cast the texture itself, but casting the reference type is ... ok? *shrug*
// Gotta go what you gotta do I suppose.
statement("return reinterpret_cast<thread const T &>(img);");
end_scope();
statement("");
break;
default:
break;
}
@ -9428,32 +9519,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
uint32_t op1 = ops[3];
auto &type = get<SPIRType>(result_type);
auto input_type = opcode == OpSMulExtended ? int_type : uint_type;
auto &output_type = get_type(result_type);
string cast_op0, cast_op1;
auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, false);
binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, false);
emit_uninitialized_temporary_expression(result_type, result_id);
string mullo_expr, mulhi_expr;
mullo_expr = join(cast_op0, " * ", cast_op1);
mulhi_expr = join("mulhi(", cast_op0, ", ", cast_op1, ")");
auto &low_type = get_type(output_type.member_types[0]);
auto &high_type = get_type(output_type.member_types[1]);
if (low_type.basetype != input_type)
{
expected_type.basetype = input_type;
mullo_expr = join(bitcast_glsl_op(low_type, expected_type), "(", mullo_expr, ")");
}
if (high_type.basetype != input_type)
{
expected_type.basetype = input_type;
mulhi_expr = join(bitcast_glsl_op(high_type, expected_type), "(", mulhi_expr, ")");
}
statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", mullo_expr, ";");
statement(to_expression(result_id), ".", to_member_name(type, 1), " = ", mulhi_expr, ";");
statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", cast_op0, " * ", cast_op1, ";");
statement(to_expression(result_id), ".", to_member_name(type, 1), " = mulhi(", cast_op0, ", ", cast_op1, ");");
break;
}
@ -10020,15 +10091,7 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh
// we cannot easily detect this case ahead of time since it's
// context dependent. We might have to force a recompile here
// if this is the only use of array copies in our shader.
if (type.array.size() > 1)
{
if (type.array.size() > kArrayCopyMultidimMax)
SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays.");
auto func = static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size());
add_spv_func_and_recompile(func);
}
else
add_spv_func_and_recompile(SPVFuncImplArrayCopy);
add_spv_func_and_recompile(type.array.size() > 1 ? SPVFuncImplArrayCopyMultidim : SPVFuncImplArrayCopy);
const char *tag = nullptr;
if (lhs_is_thread_storage && is_constant)
@ -10060,13 +10123,13 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh
// Pass internal array of spvUnsafeArray<> into wrapper functions
if (lhs_is_array_template && rhs_is_array_template && !msl_options.force_native_arrays)
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);");
statement("spvArrayCopy", tag, "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);");
if (lhs_is_array_template && !msl_options.force_native_arrays)
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");");
statement("spvArrayCopy", tag, "(", lhs, ".elements, ", to_expression(rhs_id), ");");
else if (rhs_is_array_template && !msl_options.force_native_arrays)
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);");
statement("spvArrayCopy", tag, "(", lhs, ", ", to_expression(rhs_id), ".elements);");
else
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");");
statement("spvArrayCopy", tag, "(", lhs, ", ", to_expression(rhs_id), ");");
}
return true;
@ -10213,7 +10276,35 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
if (split_index != string::npos)
{
auto coord = obj_expression.substr(split_index + 1);
exp += join(obj_expression.substr(0, split_index), ".", op, "(");
auto image_expr = obj_expression.substr(0, split_index);
// Handle problem cases with sign where we need signed min/max on a uint image for example.
// It seems to work to cast the texture type itself, even if it is probably wildly outside of spec,
// but SPIR-V requires this to work.
if ((opcode == OpAtomicUMax || opcode == OpAtomicUMin ||
opcode == OpAtomicSMax || opcode == OpAtomicSMin) &&
type.basetype != expected_type)
{
auto *backing_var = maybe_get_backing_variable(obj);
if (backing_var)
{
add_spv_func_and_recompile(SPVFuncImplTextureCast);
const auto *backing_type = &get<SPIRType>(backing_var->basetype);
while (backing_type->op != OpTypeImage)
backing_type = &get<SPIRType>(backing_type->parent_type);
auto img_type = *backing_type;
auto tmp_type = type;
tmp_type.basetype = expected_type;
img_type.image.type = ir.increase_bound_by(1);
set<SPIRType>(img_type.image.type, tmp_type);
image_expr = join("spvTextureCast<", type_to_glsl(img_type, obj), ">(", image_expr, ")");
}
}
exp += join(image_expr, ".", op, "(");
if (ptr_type.storage == StorageClassImage && res_type->image.arrayed)
{
switch (res_type->image.dim)
@ -10401,19 +10492,54 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
op = get_remapped_glsl_op(op);
auto &restype = get<SPIRType>(result_type);
switch (op)
{
case GLSLstd450Sinh:
emit_unary_func_op(result_type, id, args[0], "fast::sinh");
if (restype.basetype == SPIRType::Half)
{
// MSL does not have overload for half. Force-cast back to half.
auto expr = join("half(fast::sinh(", to_unpacked_expression(args[0]), "))");
emit_op(result_type, id, expr, should_forward(args[0]));
inherit_expression_dependencies(id, args[0]);
}
else
emit_unary_func_op(result_type, id, args[0], "fast::sinh");
break;
case GLSLstd450Cosh:
emit_unary_func_op(result_type, id, args[0], "fast::cosh");
if (restype.basetype == SPIRType::Half)
{
// MSL does not have overload for half. Force-cast back to half.
auto expr = join("half(fast::cosh(", to_unpacked_expression(args[0]), "))");
emit_op(result_type, id, expr, should_forward(args[0]));
inherit_expression_dependencies(id, args[0]);
}
else
emit_unary_func_op(result_type, id, args[0], "fast::cosh");
break;
case GLSLstd450Tanh:
emit_unary_func_op(result_type, id, args[0], "precise::tanh");
if (restype.basetype == SPIRType::Half)
{
// MSL does not have overload for half. Force-cast back to half.
auto expr = join("half(fast::tanh(", to_unpacked_expression(args[0]), "))");
emit_op(result_type, id, expr, should_forward(args[0]));
inherit_expression_dependencies(id, args[0]);
}
else
emit_unary_func_op(result_type, id, args[0], "precise::tanh");
break;
case GLSLstd450Atan2:
emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2");
if (restype.basetype == SPIRType::Half)
{
// MSL does not have overload for half. Force-cast back to half.
auto expr = join("half(fast::atan2(", to_unpacked_expression(args[0]), ", ", to_unpacked_expression(args[1]), "))");
emit_op(result_type, id, expr, should_forward(args[0]) && should_forward(args[1]));
inherit_expression_dependencies(id, args[0]);
inherit_expression_dependencies(id, args[1]);
}
else
emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2");
break;
case GLSLstd450InverseSqrt:
emit_unary_func_op(result_type, id, args[0], "rsqrt");
@ -10809,7 +10935,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
decl += "thread ";
decl += type_to_glsl(type);
decl += " (&spvReturnValue)";
decl += type_to_array_glsl(type);
decl += type_to_array_glsl(type, 0);
if (!func.arguments.empty())
decl += ", ";
}
@ -10933,8 +11059,7 @@ string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args)
is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
}
// Special-case gather. We have to alter the component being looked up
// in the swizzle case.
// Special-case gather. We have to alter the component being looked up in the swizzle case.
if (msl_options.swizzle_texture_samples && args.base.is_gather && !is_dynamic_img_sampler &&
(!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable))
{
@ -10943,6 +11068,16 @@ string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args)
return is_compare ? "spvGatherCompareSwizzle" : "spvGatherSwizzle";
}
// Special-case gather with an array of offsets. We have to lower into 4 separate gathers.
if (args.has_array_offsets && !is_dynamic_img_sampler &&
(!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable))
{
bool is_compare = comparison_ids.count(img);
add_spv_func_and_recompile(is_compare ? SPVFuncImplGatherCompareConstOffsets : SPVFuncImplGatherConstOffsets);
add_spv_func_and_recompile(SPVFuncImplForwardArgs);
return is_compare ? "spvGatherCompareConstOffsets" : "spvGatherConstOffsets";
}
auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
// Texture reference
@ -11123,6 +11258,10 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
farg_str += to_expression(combined ? combined->image : img);
}
// Gathers with constant offsets call a special function, so include the texture.
if (args.has_array_offsets)
farg_str += to_expression(img);
// Sampler reference
if (!args.base.is_fetch)
{
@ -11139,11 +11278,17 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
used_swizzle_buffer = true;
}
// Swizzled gather puts the component before the other args, to allow template
// deduction to work.
if (args.component && msl_options.swizzle_texture_samples)
// Const offsets gather puts the const offsets before the other args.
if (args.has_array_offsets)
{
forward = should_forward(args.component);
forward = forward && should_forward(args.offset);
farg_str += ", " + to_expression(args.offset);
}
// Const offsets gather or swizzled gather puts the component before the other args.
if (args.component && (args.has_array_offsets || msl_options.swizzle_texture_samples))
{
forward = forward && should_forward(args.component);
farg_str += ", " + to_component_argument(args.component);
}
}
@ -11554,7 +11699,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
// Add offsets
string offset_expr;
const SPIRType *offset_type = nullptr;
if (args.offset && !args.base.is_fetch)
if (args.offset && !args.base.is_fetch && !args.has_array_offsets)
{
forward = forward && should_forward(args.offset);
offset_expr = to_expression(args.offset);
@ -11593,7 +11738,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
}
}
if (args.component)
if (args.component && !args.has_array_offsets)
{
// If 2D has gather component, ensure it also has an offset arg
if (imgtype.image.dim == Dim2D && offset_expr.empty())
@ -12273,7 +12418,7 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
variable_storage_requires_stage_io(StorageClassInput)));
if (is_ib_in_out && is_member_builtin(type, index, &builtin))
is_using_builtin_array = true;
array_type = type_to_array_glsl(physical_type);
array_type = type_to_array_glsl(physical_type, orig_id);
}
if (orig_id)
@ -12843,14 +12988,14 @@ uint32_t CompilerMSL::get_or_allocate_builtin_output_member_location(spv::BuiltI
string CompilerMSL::func_type_decl(SPIRType &type)
{
// The regular function return type. If not processing the entry point function, that's all we need
string return_type = type_to_glsl(type) + type_to_array_glsl(type);
string return_type = type_to_glsl(type) + type_to_array_glsl(type, 0);
if (!processing_entry_point)
return return_type;
// If an outgoing interface block has been defined, and it should be returned, override the entry point return type
bool ep_should_return_output = !get_is_rasterization_disabled();
if (stage_out_var_id && ep_should_return_output)
return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type);
return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type, 0);
// Prepend a entry type, based on the execution model
string entry_type;
@ -14416,16 +14561,33 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
}
}
else if (var.storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment &&
is_builtin_variable(var) && active_output_builtins.get(bi_type) &&
bi_type == BuiltInSampleMask && has_additional_fixed_sample_mask())
is_builtin_variable(var) && active_output_builtins.get(bi_type))
{
// If the additional fixed sample mask was set, we need to adjust the sample_mask
// output to reflect that. If the shader outputs the sample_mask itself too, we need
// to AND the two masks to get the final one.
string op_str = does_shader_write_sample_mask ? " &= " : " = ";
entry_func.fixup_hooks_out.push_back([=]() {
statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";");
});
switch (bi_type)
{
case BuiltInSampleMask:
if (has_additional_fixed_sample_mask())
{
// If the additional fixed sample mask was set, we need to adjust the sample_mask
// output to reflect that. If the shader outputs the sample_mask itself too, we need
// to AND the two masks to get the final one.
string op_str = does_shader_write_sample_mask ? " &= " : " = ";
entry_func.fixup_hooks_out.push_back([=]() {
statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";");
});
}
break;
case BuiltInFragDepth:
if (msl_options.input_attachment_is_ds_attachment && !writes_to_depth)
{
entry_func.fixup_hooks_out.push_back([=]() {
statement(to_expression(builtin_frag_depth_id), " = ", to_expression(builtin_frag_coord_id), ".z;");
});
}
break;
default:
break;
}
}
});
}
@ -14717,7 +14879,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
}
decl += to_expression(name_id);
decl += ")";
decl += type_to_array_glsl(type);
decl += type_to_array_glsl(type, name_id);
}
else
{
@ -14768,7 +14930,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
}
else
{
auto array_size_decl = type_to_array_glsl(type);
auto array_size_decl = type_to_array_glsl(type, name_id);
if (array_size_decl.empty())
decl += "& ";
else
@ -15508,7 +15670,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
return type_to_glsl(type, id, false);
}
string CompilerMSL::type_to_array_glsl(const SPIRType &type)
string CompilerMSL::type_to_array_glsl(const SPIRType &type, uint32_t variable_id)
{
// Allow Metal to use the array<T> template to make arrays a value type
switch (type.basetype)
@ -15516,11 +15678,20 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type)
case SPIRType::AtomicCounter:
case SPIRType::ControlPointArray:
case SPIRType::RayQuery:
return CompilerGLSL::type_to_array_glsl(type);
return CompilerGLSL::type_to_array_glsl(type, variable_id);
default:
if (type_is_array_of_pointers(type) || using_builtin_array())
return CompilerGLSL::type_to_array_glsl(type);
{
const SPIRVariable *var = variable_id ? &get<SPIRVariable>(variable_id) : nullptr;
if (var && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer) &&
is_array(get_variable_data_type(*var)))
{
return join("[", get_resource_array_size(type, variable_id), "]");
}
else
return CompilerGLSL::type_to_array_glsl(type, variable_id);
}
else
return "";
}
@ -15686,8 +15857,8 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb
string img_type_name;
// Bypass pointers because we need the real image struct
auto &img_type = get<SPIRType>(type.self).image;
auto &img_type = type.image;
if (is_depth_image(type, id))
{
switch (img_type.dim)
@ -17925,7 +18096,7 @@ void CompilerMSL::emit_argument_buffer_aliased_descriptor(const SPIRVariable &al
is_using_builtin_array = true;
bool needs_post_cast_deref = !is_array(data_type);
string ref_type = needs_post_cast_deref ? "&" : join("(&)", type_to_array_glsl(var_type));
string ref_type = needs_post_cast_deref ? "&" : join("(&)", type_to_array_glsl(var_type, aliased_var.self));
if (is_var_runtime_size_array(aliased_var))
{
@ -18214,9 +18385,9 @@ void CompilerMSL::analyze_argument_buffers()
// If needed, synthesize and add padding members.
// member_index and next_arg_buff_index are incremented when padding members are added.
if (msl_options.pad_argument_buffer_resources)
if (msl_options.pad_argument_buffer_resources && resource.overlapping_var_id == 0)
{
auto &rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index);
auto rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index);
while (resource.index > next_arg_buff_index)
{
switch (rez_bind.basetype)
@ -18252,11 +18423,14 @@ void CompilerMSL::analyze_argument_buffers()
default:
break;
}
// After padding, retrieve the resource again. It will either be more padding, or the actual resource.
rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index);
}
// Adjust the number of slots consumed by current member itself.
// Use the count value from the app, instead of the shader, in case the
// shader is only accesing part, or even one element, of the array.
// shader is only accessing part, or even one element, of the array.
next_arg_buff_index += rez_bind.count;
}
@ -18391,7 +18565,7 @@ void CompilerMSL::analyze_argument_buffers()
// that matches the resource index of the argument buffer index.
// This is a two-step lookup, first lookup the resource binding number from the argument buffer index,
// then lookup the resource binding using the binding number.
MSLResourceBinding &CompilerMSL::get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx)
const MSLResourceBinding &CompilerMSL::get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx) const
{
auto stage = get_entry_point().model;
StageSetBinding arg_idx_tuple = { stage, desc_set, arg_idx };

View File

@ -287,9 +287,6 @@ static const uint32_t kArgumentBufferBinding = ~(3u);
static const uint32_t kMaxArgumentBuffers = 8;
// The arbitrary maximum for the nesting of array of array copies.
static const uint32_t kArrayCopyMultidimMax = 6;
// Decompiles SPIR-V to Metal Shading Language
class CompilerMSL : public CompilerGLSL
{
@ -519,6 +516,26 @@ public:
// transformed.
bool agx_manual_cube_grad_fixup = false;
// Metal will discard fragments with side effects under certain circumstances prematurely.
// Example: CTS test dEQP-VK.fragment_operations.early_fragment.discard_no_early_fragment_tests_depth
// Test will render a full screen quad with varying depth [0,1] for each fragment.
// Each fragment will do an operation with side effects, modify the depth value and
// discard the fragment. The test expects the fragment to be run due to:
// https://registry.khronos.org/vulkan/specs/1.0-extensions/html/vkspec.html#fragops-shader-depthreplacement
// which states that the fragment shader must be run due to replacing the depth in shader.
// However, Metal may prematurely discards fragments without executing them
// (I believe this to be due to a greedy optimization on their end) making the test fail.
// This option enforces fragment execution for such cases where the fragment has operations
// with side effects. Provided as an option hoping Metal will fix this issue in the future.
bool force_fragment_with_side_effects_execution = false;
// If set, adds a depth pass through statement to circumvent the following issue:
// When the same depth/stencil is used as input and depth/stencil attachment, we need to
// force Metal to perform the depth/stencil write after fragment execution. Otherwise,
// Metal will write to the depth attachment before fragment execution. This happens
// if the fragment does not modify the depth value.
bool input_attachment_is_ds_attachment = false;
bool is_ios() const
{
return platform == iOS;
@ -752,15 +769,8 @@ protected:
SPVFuncImplFindSMsb,
SPVFuncImplFindUMsb,
SPVFuncImplSSign,
SPVFuncImplArrayCopyMultidimBase,
// Unfortunately, we cannot use recursive templates in the MSL compiler properly,
// so stamp out variants up to some arbitrary maximum.
SPVFuncImplArrayCopy = SPVFuncImplArrayCopyMultidimBase + 1,
SPVFuncImplArrayOfArrayCopy2Dim = SPVFuncImplArrayCopyMultidimBase + 2,
SPVFuncImplArrayOfArrayCopy3Dim = SPVFuncImplArrayCopyMultidimBase + 3,
SPVFuncImplArrayOfArrayCopy4Dim = SPVFuncImplArrayCopyMultidimBase + 4,
SPVFuncImplArrayOfArrayCopy5Dim = SPVFuncImplArrayCopyMultidimBase + 5,
SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6,
SPVFuncImplArrayCopy,
SPVFuncImplArrayCopyMultidim,
SPVFuncImplTexelBufferCoords,
SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations
SPVFuncImplGradientCube,
@ -782,6 +792,8 @@ protected:
SPVFuncImplTextureSwizzle,
SPVFuncImplGatherSwizzle,
SPVFuncImplGatherCompareSwizzle,
SPVFuncImplGatherConstOffsets,
SPVFuncImplGatherCompareConstOffsets,
SPVFuncImplSubgroupBroadcast,
SPVFuncImplSubgroupBroadcastFirst,
SPVFuncImplSubgroupBallot,
@ -825,7 +837,8 @@ protected:
SPVFuncImplVariableDescriptorArray,
SPVFuncImplPaddedStd140,
SPVFuncImplReduceAdd,
SPVFuncImplImageFence
SPVFuncImplImageFence,
SPVFuncImplTextureCast
};
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
@ -856,7 +869,7 @@ protected:
void emit_block_hints(const SPIRBlock &block) override;
// Allow Metal to use the array<T> template to make arrays a value type
std::string type_to_array_glsl(const SPIRType &type) override;
std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id) override;
std::string constant_op_expression(const SPIRConstantOp &cop) override;
bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override;
@ -960,7 +973,8 @@ protected:
uint32_t mbr_idx, InterfaceBlockMeta &meta,
const std::string &mbr_name_qual,
const std::string &var_chain_qual,
uint32_t &location, uint32_t &var_mbr_idx);
uint32_t &location, uint32_t &var_mbr_idx,
const Bitset &interpolation_qual);
void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var);
void add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var);
@ -1087,6 +1101,7 @@ protected:
uint32_t builtin_stage_input_size_id = 0;
uint32_t builtin_local_invocation_index_id = 0;
uint32_t builtin_workgroup_size_id = 0;
uint32_t builtin_frag_depth_id = 0;
uint32_t swizzle_buffer_id = 0;
uint32_t buffer_size_buffer_id = 0;
uint32_t view_mask_buffer_id = 0;
@ -1183,6 +1198,7 @@ protected:
bool needs_subgroup_size = false;
bool needs_sample_id = false;
bool needs_helper_invocation = false;
bool writes_to_depth = false;
std::string qual_pos_var_name;
std::string stage_in_var_name = "in";
std::string stage_out_var_name = "out";
@ -1231,7 +1247,7 @@ protected:
void analyze_argument_buffers();
bool descriptor_set_is_argument_buffer(uint32_t desc_set) const;
MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx);
const MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx) const;
void add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);
void add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);
void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);