Updated spirv-cross.

This commit is contained in:
Бранимир Караџић 2019-05-18 12:28:43 -07:00
parent abab48ebce
commit d1a6deb8a9
57 changed files with 2625 additions and 255 deletions

View File

@ -11,43 +11,62 @@ matrix:
compiler: gcc
env:
- GENERATOR="Unix Makefiles"
- ARTIFACT=gcc-trusty-64bit
- os: linux
dist: trusty
compiler: clang
env:
- GENERATOR="Unix Makefiles"
- ARTIFACT=clang-trusty-64bit
- os: osx
compiler: clang
osx_image: xcode10
env:
- GENERATOR="Unix Makefiles"
- ARTIFACT=clang-macos-64bit
- os: windows
before_install:
- choco install python3
- choco install python2
- export PATH="/c/Python27:/c/Python27/Scripts:$PATH"
- export PATH="/c/Python37:/c/Python37/Scripts:$PATH"
env:
- GENERATOR="Visual Studio 15 2017"
- ARTIFACT=vs2017-32bit
- os: windows
before_install:
- choco install python3
- choco install python2
- export PATH="/c/Python27:/c/Python27/Scripts:$PATH"
- export PATH="/c/Python37:/c/Python37/Scripts:$PATH"
env:
- GENERATOR="Visual Studio 15 2017 Win64"
- ARTIFACT=vs2017-64bit
before_script:
- ./checkout_glslang_spirv_tools.sh
- "./checkout_glslang_spirv_tools.sh"
script:
- if [[ "$TRAVIS_OS_NAME" == "windows" ]]; then PYTHON3=$(which python); fi
- if [[ "$TRAVIS_OS_NAME" != "windows" ]]; then PYTHON3=$(which python3); fi
- ./build_glslang_spirv_tools.sh Release
- "./build_glslang_spirv_tools.sh Release"
- mkdir build
- cd build
- cmake .. -DSPIRV_CROSS_SHARED=ON -DCMAKE_INSTALL_PREFIX=output -DCMAKE_BUILD_TYPE=Release -G "${GENERATOR}" -DPYTHON_EXECUTABLE:FILEPATH="${PYTHON3}" -DSPIRV_CROSS_ENABLE_TESTS=ON
- cmake --build . --config Release
- cmake --build . --config Release --target install
- ctest --verbose -C Release
- cd ..
before_deploy:
- REV=${ARTIFACT}-$(git rev-parse --short=10 HEAD)
- cd build/output
- tar cf spirv-cross-${REV}.tar *
- gzip spirv-cross-${REV}.tar
- cd ../..
- export FILE_TO_UPLOAD=build/output/spirv-cross-${REV}.tar.gz
deploy:
provider: releases
api_key:
secure: c7YEOyzhE19TFo76UnbLWk/kikRQxsHsOxzkOqN6Q2aL8joNRw5kmcG84rGd+Rf6isX62cykCzA6qHkyJCv9QTIzcyXnLju17rLvgib7cXDcseaq8x4mFvet2yUxCglthDpFY2M2LB0Aqws71lPeYIrKXa6hCFEh8jO3AWxnaor7O3RYfNZylM9d33HgH6KLT3sDx/cukwBstmKeg7EG9OUnrSvairkPW0W2+jlq3SXPlq/WeVhf8hQs3Yg0BluExGbmLOwe9EaeUpeGuJMyHRxXypnToQv1/KwoScKpap5tYxdNWiwRGZ4lYcmKrjAYVvilTioh654oX5LQpn34mE/oe8Ko9AaATkSaoiisRFp6meWtnB39oFBoL5Yn15DqLQpRXPr1AJsnBXSGAac3aDBO1j4MIqTHmYlYlfRw3n2ZsBaFaTZnv++438SNQ54nkivyoDTIWjoOmYa9+K4mQc3415RDdQmjZTJM+lu+GAlMmNBTVbfNvrbU55Usu9Lo6BZJKKdUMvdBB78kJ5FHvcBlL+eMgmk1pABQY0IZROCt7NztHcv1UmAxoWNxveSFs5glydPNNjNS8bogc4dzBGYG0KMmILbBHihVbY2toA1M9CMdDHdp+LucfDMmzECmYSEmlx0h8win+Jjb74/qpOhaXuUZ0NnzVgCOyeUYuMQ=
file: "${FILE_TO_UPLOAD}"
skip_cleanup: true
on:
tags: true

View File

@ -265,10 +265,11 @@ if (SPIRV_CROSS_STATIC)
endif()
endif()
set(spirv-cross-abi-major 0)
set(spirv-cross-abi-minor 9)
set(spirv-cross-abi-patch 0)
if (SPIRV_CROSS_SHARED)
set(spirv-cross-abi-major 0)
set(spirv-cross-abi-minor 6)
set(spirv-cross-abi-patch 0)
set(SPIRV_CROSS_VERSION ${spirv-cross-abi-major}.${spirv-cross-abi-minor}.${spirv-cross-abi-patch})
set(SPIRV_CROSS_INSTALL_LIB_DIR ${CMAKE_INSTALL_PREFIX}/lib)
set(SPIRV_CROSS_INSTALL_INC_DIR ${CMAKE_INSTALL_PREFIX}/include/spirv_cross)
@ -431,7 +432,10 @@ if (SPIRV_CROSS_CLI)
target_compile_options(spirv-cross-c-api-test PRIVATE -std=c89 -Wall -Wextra)
endif()
add_test(NAME spirv-cross-c-api-test
COMMAND $<TARGET_FILE:spirv-cross-c-api-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/c_api_test.spv)
COMMAND $<TARGET_FILE:spirv-cross-c-api-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/c_api_test.spv
${spirv-cross-abi-major}
${spirv-cross-abi-minor}
${spirv-cross-abi-patch})
add_test(NAME spirv-cross-small-vector-test
COMMAND $<TARGET_FILE:spirv-cross-small-vector-test>)
add_test(NAME spirv-cross-test

View File

@ -1,8 +1,8 @@
#!/bin/bash
GLSLANG_REV=ef807f4bc543e061f25dbbee6cb64dd5053b2adc
SPIRV_TOOLS_REV=12e4a7b649e6fe28683de9fc352200c82948a1f0
SPIRV_HEADERS_REV=111a25e4ae45e2b4d7c18415e1d6884712b958c4
GLSLANG_REV=e291f7a09f6733f6634fe077a228056fabee881e
SPIRV_TOOLS_REV=89fe836fe22c3e5c2a062ebeade012e2c2f0839b
SPIRV_HEADERS_REV=c4f8f65792d4bf2657ca751904c511bbcf2ac77b
if [ -z $PROTOCOL ]; then
PROTOCOL=git

View File

@ -511,6 +511,7 @@ struct CLIArguments
bool msl_argument_buffers = false;
bool msl_texture_buffer_native = false;
bool glsl_emit_push_constant_as_ubo = false;
bool glsl_emit_ubo_as_plain_uniforms = false;
SmallVector<uint32_t> msl_discrete_descriptor_sets;
SmallVector<PLSArg> pls_in;
SmallVector<PLSArg> pls_out;
@ -563,6 +564,7 @@ static void print_help()
"\t[--cpp]\n"
"\t[--cpp-interface-name <name>]\n"
"\t[--glsl-emit-push-constant-as-ubo]\n"
"\t[--glsl-emit-ubo-as-plain-uniforms]\n"
"\t[--msl]\n"
"\t[--msl-version <MMmmpp>]\n"
"\t[--msl-capture-output]\n"
@ -854,6 +856,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
opts.vertex.flip_vert_y = args.yflip;
opts.vertex.support_nonzero_base_instance = args.support_nonzero_baseinstance;
opts.emit_push_constant_as_uniform_buffer = args.glsl_emit_push_constant_as_ubo;
opts.emit_uniform_buffer_as_plain_uniforms = args.glsl_emit_ubo_as_plain_uniforms;
compiler->set_common_options(opts);
// Set HLSL specific options.
@ -1025,6 +1028,7 @@ static int main_inner(int argc, char *argv[])
cbs.add("--cpp-interface-name", [&args](CLIParser &parser) { args.cpp_interface_name = parser.next_string(); });
cbs.add("--metal", [&args](CLIParser &) { args.msl = true; }); // Legacy compatibility
cbs.add("--glsl-emit-push-constant-as-ubo", [&args](CLIParser &) { args.glsl_emit_push_constant_as_ubo = true; });
cbs.add("--glsl-emit-ubo-as-plain-uniforms", [&args](CLIParser &) { args.glsl_emit_ubo_as_plain_uniforms = true; });
cbs.add("--msl", [&args](CLIParser &) { args.msl = true; });
cbs.add("--hlsl", [&args](CLIParser &) { args.hlsl = true; });
cbs.add("--hlsl-enable-compat", [&args](CLIParser &) { args.hlsl_compat = true; });

View File

@ -0,0 +1,15 @@
RWByteAddressBuffer _11 : register(u1);
void comp_main()
{
uint _14;
_11.GetDimensions(_14);
_14 = (_14 - 16) / 16;
_11.Store(0, uint(int(_14)));
}
[numthreads(1, 1, 1)]
void main()
{
comp_main();
}

View File

@ -0,0 +1,46 @@
struct UBO_1_1
{
float4 v[64];
};
ConstantBuffer<UBO_1_1> ubos[] : register(b0, space3);
ByteAddressBuffer ssbos[] : register(t0, space4);
Texture2D<float4> uSamplers[] : register(t0, space0);
SamplerState uSamps[] : register(s0, space2);
Texture2D<float4> uCombinedSamplers[] : register(t0, space1);
SamplerState _uCombinedSamplers_sampler[] : register(s0, space1);
static int vIndex;
static float4 FragColor;
static float2 vUV;
struct SPIRV_Cross_Input
{
nointerpolation int vIndex : TEXCOORD0;
float2 vUV : TEXCOORD1;
};
struct SPIRV_Cross_Output
{
float4 FragColor : SV_Target0;
};
void frag_main()
{
int _22 = vIndex + 10;
int _32 = vIndex + 40;
FragColor = uSamplers[NonUniformResourceIndex(_22)].Sample(uSamps[NonUniformResourceIndex(_32)], vUV);
FragColor = uCombinedSamplers[NonUniformResourceIndex(_22)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_22)], vUV);
FragColor += ubos[NonUniformResourceIndex(vIndex + 20)].v[_32];
FragColor += asfloat(ssbos[NonUniformResourceIndex(vIndex + 50)].Load4((vIndex + 60) * 16 + 0));
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
vIndex = stage_input.vIndex;
vUV = stage_input.vUV;
frag_main();
SPIRV_Cross_Output stage_output;
stage_output.FragColor = FragColor;
return stage_output;
}

View File

@ -8,13 +8,15 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
kernel void main0()
{
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
threadgroup_barrier(mem_flags::mem_texture);
threadgroup_barrier(mem_flags::mem_device);
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_none);
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_none);
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
threadgroup_barrier(mem_flags::mem_texture);
threadgroup_barrier(mem_flags::mem_device);
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
threadgroup_barrier(mem_flags::mem_threadgroup);
}

View File

@ -5,11 +5,6 @@
using namespace metal;
struct spvAux
{
uint swizzleConst[1];
};
enum class spvSwizzle : uint
{
none = 0,
@ -130,9 +125,9 @@ inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... p
return t.gather_compare(s, spvForward<Ts>(params)...);
}
kernel void main0(constant spvAux& spvAuxBuffer [[buffer(30)]], texture2d<float> foo [[texture(0)]], texture2d<float, access::write> bar [[texture(1)]], sampler fooSmplr [[sampler(0)]])
kernel void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture2d<float> foo [[texture(0)]], texture2d<float, access::write> bar [[texture(1)]], sampler fooSmplr [[sampler(0)]])
{
constant uint32_t& fooSwzl = spvAuxBuffer.swizzleConst[0];
constant uint32_t& fooSwzl = spvSwizzleConstants[0];
bar.write(spvTextureSwizzle(foo.sample(fooSmplr, float2(1.0), level(0.0)), fooSwzl), uint2(int2(0)));
}

View File

@ -0,0 +1,156 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct spvDescriptorSetBuffer0
{
constant uint* spvSwizzleConstants [[id(0)]];
array<texture2d<float>, 4> uSampler0 [[id(1)]];
array<sampler, 4> uSampler0Smplr [[id(5)]];
};
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float2 vUV [[user(locn0)]];
};
enum class spvSwizzle : uint
{
none = 0,
zero,
one,
red,
green,
blue,
alpha
};
template<typename T> struct spvRemoveReference { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
{
return static_cast<thread T&&>(x);
}
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
{
return static_cast<thread T&&>(x);
}
template<typename T>
inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
{
switch (s)
{
case spvSwizzle::none:
return c;
case spvSwizzle::zero:
return 0;
case spvSwizzle::one:
return 1;
case spvSwizzle::red:
return x.r;
case spvSwizzle::green:
return x.g;
case spvSwizzle::blue:
return x.b;
case spvSwizzle::alpha:
return x.a;
}
}
// Wrapper function that swizzles texture samples and fetches.
template<typename T>
inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)
{
if (!s)
return x;
return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));
}
template<typename T>
inline T spvTextureSwizzle(T x, uint s)
{
return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
}
// Wrapper function that swizzles texture gathers.
template<typename T, typename Tex, typename... Ts>
inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
{
if (sw)
{
switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))
{
case spvSwizzle::none:
break;
case spvSwizzle::zero:
return vec<T, 4>(0, 0, 0, 0);
case spvSwizzle::one:
return vec<T, 4>(1, 1, 1, 1);
case spvSwizzle::red:
return t.gather(s, spvForward<Ts>(params)..., component::x);
case spvSwizzle::green:
return t.gather(s, spvForward<Ts>(params)..., component::y);
case spvSwizzle::blue:
return t.gather(s, spvForward<Ts>(params)..., component::z);
case spvSwizzle::alpha:
return t.gather(s, spvForward<Ts>(params)..., component::w);
}
}
switch (c)
{
case component::x:
return t.gather(s, spvForward<Ts>(params)..., component::x);
case component::y:
return t.gather(s, spvForward<Ts>(params)..., component::y);
case component::z:
return t.gather(s, spvForward<Ts>(params)..., component::z);
case component::w:
return t.gather(s, spvForward<Ts>(params)..., component::w);
}
}
// Wrapper function that swizzles depth texture gathers.
template<typename T, typename Tex, typename... Ts>
inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw)
{
if (sw)
{
switch (spvSwizzle(sw & 0xFF))
{
case spvSwizzle::none:
case spvSwizzle::red:
break;
case spvSwizzle::zero:
case spvSwizzle::green:
case spvSwizzle::blue:
case spvSwizzle::alpha:
return vec<T, 4>(0, 0, 0, 0);
case spvSwizzle::one:
return vec<T, 4>(1, 1, 1, 1);
}
}
return t.gather_compare(s, spvForward<Ts>(params)...);
}
fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(30)]], texture2d<float> uSampler1 [[texture(0)]], sampler uSampler1Smplr [[sampler(0)]])
{
main0_out out = {};
constant uint32_t* spvDescriptorSet0_uSampler0Swzl = &spvDescriptorSet0.spvSwizzleConstants[1];
constant uint32_t& uSampler1Swzl = spvSwizzleConstants[0];
out.FragColor = spvTextureSwizzle(spvDescriptorSet0.uSampler0[2].sample(spvDescriptorSet0.uSampler0Smplr[2], in.vUV), spvDescriptorSet0_uSampler0Swzl[2]);
out.FragColor += spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, in.vUV), uSampler1Swzl);
out.FragColor += spvTextureSwizzle(spvDescriptorSet0.uSampler0[1].sample(spvDescriptorSet0.uSampler0Smplr[1], in.vUV), spvDescriptorSet0_uSampler0Swzl[1]);
out.FragColor += spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, in.vUV), uSampler1Swzl);
return out;
}

View File

@ -0,0 +1,146 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float2 vUV [[user(locn0)]];
};
enum class spvSwizzle : uint
{
none = 0,
zero,
one,
red,
green,
blue,
alpha
};
template<typename T> struct spvRemoveReference { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
{
return static_cast<thread T&&>(x);
}
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
{
return static_cast<thread T&&>(x);
}
template<typename T>
inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
{
switch (s)
{
case spvSwizzle::none:
return c;
case spvSwizzle::zero:
return 0;
case spvSwizzle::one:
return 1;
case spvSwizzle::red:
return x.r;
case spvSwizzle::green:
return x.g;
case spvSwizzle::blue:
return x.b;
case spvSwizzle::alpha:
return x.a;
}
}
// Wrapper function that swizzles texture samples and fetches.
template<typename T>
inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)
{
if (!s)
return x;
return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));
}
template<typename T>
inline T spvTextureSwizzle(T x, uint s)
{
return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
}
// Wrapper function that swizzles texture gathers.
template<typename T, typename Tex, typename... Ts>
inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
{
if (sw)
{
switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))
{
case spvSwizzle::none:
break;
case spvSwizzle::zero:
return vec<T, 4>(0, 0, 0, 0);
case spvSwizzle::one:
return vec<T, 4>(1, 1, 1, 1);
case spvSwizzle::red:
return t.gather(s, spvForward<Ts>(params)..., component::x);
case spvSwizzle::green:
return t.gather(s, spvForward<Ts>(params)..., component::y);
case spvSwizzle::blue:
return t.gather(s, spvForward<Ts>(params)..., component::z);
case spvSwizzle::alpha:
return t.gather(s, spvForward<Ts>(params)..., component::w);
}
}
switch (c)
{
case component::x:
return t.gather(s, spvForward<Ts>(params)..., component::x);
case component::y:
return t.gather(s, spvForward<Ts>(params)..., component::y);
case component::z:
return t.gather(s, spvForward<Ts>(params)..., component::z);
case component::w:
return t.gather(s, spvForward<Ts>(params)..., component::w);
}
}
// Wrapper function that swizzles depth texture gathers.
template<typename T, typename Tex, typename... Ts>
inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw)
{
if (sw)
{
switch (spvSwizzle(sw & 0xFF))
{
case spvSwizzle::none:
case spvSwizzle::red:
break;
case spvSwizzle::zero:
case spvSwizzle::green:
case spvSwizzle::blue:
case spvSwizzle::alpha:
return vec<T, 4>(0, 0, 0, 0);
case spvSwizzle::one:
return vec<T, 4>(1, 1, 1, 1);
}
}
return t.gather_compare(s, spvForward<Ts>(params)...);
}
fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(30)]], array<texture2d<float>, 4> uSampler [[texture(0)]], array<sampler, 4> uSamplerSmplr [[sampler(0)]])
{
main0_out out = {};
constant uint32_t* uSamplerSwzl = &spvSwizzleConstants[0];
out.FragColor = spvTextureSwizzle(uSampler[2].sample(uSamplerSmplr[2], in.vUV), uSamplerSwzl[2]);
out.FragColor += spvTextureSwizzle(uSampler[1].sample(uSamplerSmplr[1], in.vUV), uSamplerSwzl[1]);
return out;
}

View File

@ -0,0 +1,50 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct UBO
{
float4 v[64];
};
struct SSBO
{
float4 v[1];
};
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
int vIndex [[user(locn0)]];
float2 vUV [[user(locn1)]];
};
fragment main0_out main0(main0_in in [[stage_in]], constant UBO* ubos_0 [[buffer(0)]], constant UBO* ubos_1 [[buffer(1)]], const device SSBO* ssbos_0 [[buffer(2)]], const device SSBO* ssbos_1 [[buffer(3)]], array<texture2d<float>, 8> uSamplers [[texture(0)]], array<texture2d<float>, 8> uCombinedSamplers [[texture(8)]], array<sampler, 7> uSamps [[sampler(1)]], array<sampler, 8> uCombinedSamplersSmplr [[sampler(8)]])
{
constant UBO* ubos[] =
{
ubos_0,
ubos_1,
};
const device SSBO* ssbos[] =
{
ssbos_0,
ssbos_1,
};
main0_out out = {};
int _24 = in.vIndex + 10;
int _35 = in.vIndex + 40;
out.FragColor = uSamplers[_24].sample(uSamps[_35], in.vUV);
out.FragColor = uCombinedSamplers[_24].sample(uCombinedSamplersSmplr[_24], in.vUV);
out.FragColor += ubos[(in.vIndex + 20)]->v[_35];
out.FragColor += ssbos[(in.vIndex + 50)]->v[in.vIndex + 60];
return out;
}

View File

@ -0,0 +1,92 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float FragColor;
};
inline uint4 spvSubgroupBallot(bool value)
{
simd_vote vote = simd_ballot(value);
// simd_ballot() returns a 64-bit integer-like object, but
// SPIR-V callers expect a uint4. We must convert.
// FIXME: This won't include higher bits if Apple ever supports
// 128 lanes in an SIMD-group.
return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
}
inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
{
return !!extract_bits(ballot[bit / 32], bit % 32, 1);
}
inline uint spvSubgroupBallotFindLSB(uint4 ballot)
{
return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
}
inline uint spvSubgroupBallotFindMSB(uint4 ballot)
{
return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
}
inline uint spvSubgroupBallotBitCount(uint4 ballot)
{
return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
}
inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
{
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
return spvSubgroupBallotBitCount(ballot & mask);
}
inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
{
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
return spvSubgroupBallotBitCount(ballot & mask);
}
template<typename T>
inline bool spvSubgroupAllEqual(T value)
{
return simd_all(value == simd_broadcast_first(value));
}
template<>
inline bool spvSubgroupAllEqual(bool value)
{
return simd_all(value) || !simd_any(value);
}
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
{
uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
_9.FragColor = float(gl_NumSubgroups);
_9.FragColor = float(gl_SubgroupID);
_9.FragColor = float(gl_SubgroupSize);
_9.FragColor = float(gl_SubgroupInvocationID);
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
simdgroup_barrier(mem_flags::mem_device);
simdgroup_barrier(mem_flags::mem_threadgroup);
simdgroup_barrier(mem_flags::mem_texture);
_9.FragColor = float4(gl_SubgroupEqMask).x;
_9.FragColor = float4(gl_SubgroupGeMask).x;
_9.FragColor = float4(gl_SubgroupGtMask).x;
_9.FragColor = float4(gl_SubgroupLeMask).x;
_9.FragColor = float4(gl_SubgroupLtMask).x;
uint4 _83 = spvSubgroupBallot(true);
float4 _165 = simd_prefix_inclusive_product(simd_product(float4(20.0)));
int4 _167 = simd_prefix_inclusive_product(simd_product(int4(20)));
}

View File

@ -0,0 +1,23 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float FragColor;
};
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]])
{
_9.FragColor = float(gl_NumSubgroups);
_9.FragColor = float(gl_SubgroupID);
_9.FragColor = float(gl_SubgroupSize);
_9.FragColor = float(gl_SubgroupInvocationID);
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
simdgroup_barrier(mem_flags::mem_device);
simdgroup_barrier(mem_flags::mem_threadgroup);
simdgroup_barrier(mem_flags::mem_texture);
}

View File

@ -0,0 +1,14 @@
#version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 1, std140) buffer SSBO
{
uint size;
float v[];
} _11;
void main()
{
_11.size = uint(int(uint(_11.v.length())));
}

View File

@ -0,0 +1,26 @@
#version 450
#extension GL_EXT_buffer_reference : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(buffer_reference) buffer PtrUint;
layout(buffer_reference) buffer PtrInt;
layout(buffer_reference, std430) buffer PtrUint
{
uint value;
};
layout(buffer_reference, std430) buffer PtrInt
{
int value;
};
layout(set = 0, binding = 0, std430) buffer Buf
{
PtrUint ptr;
} _11;
void main()
{
PtrInt(_11.ptr).value = 10;
}

View File

@ -0,0 +1,15 @@
RWByteAddressBuffer _11 : register(u1);
void comp_main()
{
uint _14;
_11.GetDimensions(_14);
_14 = (_14 - 16) / 16;
_11.Store(0, uint(int(_14)));
}
[numthreads(1, 1, 1)]
void main()
{
comp_main();
}

View File

@ -0,0 +1,46 @@
struct UBO_1_1
{
float4 v[64];
};
ConstantBuffer<UBO_1_1> ubos[] : register(b0, space3);
ByteAddressBuffer ssbos[] : register(t0, space4);
Texture2D<float4> uSamplers[] : register(t0, space0);
SamplerState uSamps[] : register(s0, space2);
Texture2D<float4> uCombinedSamplers[] : register(t0, space1);
SamplerState _uCombinedSamplers_sampler[] : register(s0, space1);
static int vIndex;
static float4 FragColor;
static float2 vUV;
struct SPIRV_Cross_Input
{
nointerpolation int vIndex : TEXCOORD0;
float2 vUV : TEXCOORD1;
};
struct SPIRV_Cross_Output
{
float4 FragColor : SV_Target0;
};
void frag_main()
{
int i = vIndex;
FragColor = uSamplers[NonUniformResourceIndex(i + 10)].Sample(uSamps[NonUniformResourceIndex(i + 40)], vUV);
int _47 = i + 10;
FragColor = uCombinedSamplers[NonUniformResourceIndex(_47)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_47)], vUV);
FragColor += ubos[NonUniformResourceIndex(i + 20)].v[i + 40];
FragColor += asfloat(ssbos[NonUniformResourceIndex(i + 50)].Load4((i + 60) * 16 + 0));
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
vIndex = stage_input.vIndex;
vUV = stage_input.vUV;
frag_main();
SPIRV_Cross_Output stage_output;
stage_output.FragColor = FragColor;
return stage_output;
}

View File

@ -5,11 +5,6 @@
using namespace metal;
struct spvAux
{
uint swizzleConst[1];
};
// Returns 2D texture coords corresponding to 1D texel buffer coords
uint2 spvTexelBufferCoord(uint tc)
{
@ -136,18 +131,18 @@ inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... p
return t.gather_compare(s, spvForward<Ts>(params)...);
}
fragment void main0(constant spvAux& spvAuxBuffer [[buffer(30)]], texture1d<float> tex1d [[texture(0)]], texture2d<float> tex2d [[texture(1)]], texture3d<float> tex3d [[texture(2)]], texturecube<float> texCube [[texture(3)]], texture2d_array<float> tex2dArray [[texture(4)]], texturecube_array<float> texCubeArray [[texture(5)]], texture2d<float> texBuffer [[texture(6)]], depth2d<float> depth2d [[texture(7)]], depthcube<float> depthCube [[texture(8)]], depth2d_array<float> depth2dArray [[texture(9)]], depthcube_array<float> depthCubeArray [[texture(10)]], sampler tex1dSamp [[sampler(0)]], sampler tex2dSamp [[sampler(1)]], sampler tex3dSamp [[sampler(2)]], sampler texCubeSamp [[sampler(3)]], sampler tex2dArraySamp [[sampler(4)]], sampler texCubeArraySamp [[sampler(5)]], sampler depth2dSamp [[sampler(7)]], sampler depthCubeSamp [[sampler(8)]], sampler depth2dArraySamp [[sampler(9)]], sampler depthCubeArraySamp [[sampler(10)]])
fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d<float> tex1d [[texture(0)]], texture2d<float> tex2d [[texture(1)]], texture3d<float> tex3d [[texture(2)]], texturecube<float> texCube [[texture(3)]], texture2d_array<float> tex2dArray [[texture(4)]], texturecube_array<float> texCubeArray [[texture(5)]], texture2d<float> texBuffer [[texture(6)]], depth2d<float> depth2d [[texture(7)]], depthcube<float> depthCube [[texture(8)]], depth2d_array<float> depth2dArray [[texture(9)]], depthcube_array<float> depthCubeArray [[texture(10)]], sampler tex1dSamp [[sampler(0)]], sampler tex2dSamp [[sampler(1)]], sampler tex3dSamp [[sampler(2)]], sampler texCubeSamp [[sampler(3)]], sampler tex2dArraySamp [[sampler(4)]], sampler texCubeArraySamp [[sampler(5)]], sampler depth2dSamp [[sampler(7)]], sampler depthCubeSamp [[sampler(8)]], sampler depth2dArraySamp [[sampler(9)]], sampler depthCubeArraySamp [[sampler(10)]])
{
constant uint32_t& tex1dSwzl = spvAuxBuffer.swizzleConst[0];
constant uint32_t& tex2dSwzl = spvAuxBuffer.swizzleConst[1];
constant uint32_t& tex3dSwzl = spvAuxBuffer.swizzleConst[2];
constant uint32_t& texCubeSwzl = spvAuxBuffer.swizzleConst[3];
constant uint32_t& tex2dArraySwzl = spvAuxBuffer.swizzleConst[4];
constant uint32_t& texCubeArraySwzl = spvAuxBuffer.swizzleConst[5];
constant uint32_t& depth2dSwzl = spvAuxBuffer.swizzleConst[7];
constant uint32_t& depthCubeSwzl = spvAuxBuffer.swizzleConst[8];
constant uint32_t& depth2dArraySwzl = spvAuxBuffer.swizzleConst[9];
constant uint32_t& depthCubeArraySwzl = spvAuxBuffer.swizzleConst[10];
constant uint32_t& tex1dSwzl = spvSwizzleConstants[0];
constant uint32_t& tex2dSwzl = spvSwizzleConstants[1];
constant uint32_t& tex3dSwzl = spvSwizzleConstants[2];
constant uint32_t& texCubeSwzl = spvSwizzleConstants[3];
constant uint32_t& tex2dArraySwzl = spvSwizzleConstants[4];
constant uint32_t& texCubeArraySwzl = spvSwizzleConstants[5];
constant uint32_t& depth2dSwzl = spvSwizzleConstants[7];
constant uint32_t& depthCubeSwzl = spvSwizzleConstants[8];
constant uint32_t& depth2dArraySwzl = spvSwizzleConstants[9];
constant uint32_t& depthCubeArraySwzl = spvSwizzleConstants[10];
float4 c = spvTextureSwizzle(tex1d.sample(tex1dSamp, 0.0), tex1dSwzl);
c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float2(0.0)), tex2dSwzl);
c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float3(0.0)), tex3dSwzl);

View File

@ -5,11 +5,6 @@
using namespace metal;
struct spvAux
{
uint swizzleConst[1];
};
// Returns 2D texture coords corresponding to 1D texel buffer coords
uint2 spvTexelBufferCoord(uint tc)
{
@ -136,14 +131,14 @@ inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... p
return t.gather_compare(s, spvForward<Ts>(params)...);
}
fragment void main0(constant spvAux& spvAuxBuffer [[buffer(30)]], texture1d<int> tex1d [[texture(0)]], texture2d<int> tex2d [[texture(1)]], texture3d<int> tex3d [[texture(2)]], texturecube<int> texCube [[texture(3)]], texture2d_array<int> tex2dArray [[texture(4)]], texturecube_array<int> texCubeArray [[texture(5)]], texture2d<int> texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]])
fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d<int> tex1d [[texture(0)]], texture2d<int> tex2d [[texture(1)]], texture3d<int> tex3d [[texture(2)]], texturecube<int> texCube [[texture(3)]], texture2d_array<int> tex2dArray [[texture(4)]], texturecube_array<int> texCubeArray [[texture(5)]], texture2d<int> texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]])
{
constant uint32_t& tex1dSwzl = spvAuxBuffer.swizzleConst[0];
constant uint32_t& tex2dSwzl = spvAuxBuffer.swizzleConst[1];
constant uint32_t& tex3dSwzl = spvAuxBuffer.swizzleConst[2];
constant uint32_t& texCubeSwzl = spvAuxBuffer.swizzleConst[3];
constant uint32_t& tex2dArraySwzl = spvAuxBuffer.swizzleConst[4];
constant uint32_t& texCubeArraySwzl = spvAuxBuffer.swizzleConst[5];
constant uint32_t& tex1dSwzl = spvSwizzleConstants[0];
constant uint32_t& tex2dSwzl = spvSwizzleConstants[1];
constant uint32_t& tex3dSwzl = spvSwizzleConstants[2];
constant uint32_t& texCubeSwzl = spvSwizzleConstants[3];
constant uint32_t& tex2dArraySwzl = spvSwizzleConstants[4];
constant uint32_t& texCubeArraySwzl = spvSwizzleConstants[5];
float4 c = float4(spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl));
c = float4(spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), tex2dSwzl));
c = float4(spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0)), tex3dSwzl));

View File

@ -5,11 +5,6 @@
using namespace metal;
struct spvAux
{
uint swizzleConst[1];
};
// Returns 2D texture coords corresponding to 1D texel buffer coords
uint2 spvTexelBufferCoord(uint tc)
{
@ -183,18 +178,18 @@ float4 doSwizzle(thread texture1d<float> tex1d, thread const sampler tex1dSmplr,
return c;
}
fragment void main0(constant spvAux& spvAuxBuffer [[buffer(30)]], texture1d<float> tex1d [[texture(0)]], texture2d<float> tex2d [[texture(1)]], texture3d<float> tex3d [[texture(2)]], texturecube<float> texCube [[texture(3)]], texture2d_array<float> tex2dArray [[texture(4)]], texturecube_array<float> texCubeArray [[texture(5)]], texture2d<float> texBuffer [[texture(6)]], depth2d<float> depth2d [[texture(7)]], depthcube<float> depthCube [[texture(8)]], depth2d_array<float> depth2dArray [[texture(9)]], depthcube_array<float> depthCubeArray [[texture(10)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(7)]], sampler depthCubeSmplr [[sampler(8)]], sampler depth2dArraySmplr [[sampler(9)]], sampler depthCubeArraySmplr [[sampler(10)]])
fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d<float> tex1d [[texture(0)]], texture2d<float> tex2d [[texture(1)]], texture3d<float> tex3d [[texture(2)]], texturecube<float> texCube [[texture(3)]], texture2d_array<float> tex2dArray [[texture(4)]], texturecube_array<float> texCubeArray [[texture(5)]], texture2d<float> texBuffer [[texture(6)]], depth2d<float> depth2d [[texture(7)]], depthcube<float> depthCube [[texture(8)]], depth2d_array<float> depth2dArray [[texture(9)]], depthcube_array<float> depthCubeArray [[texture(10)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(7)]], sampler depthCubeSmplr [[sampler(8)]], sampler depth2dArraySmplr [[sampler(9)]], sampler depthCubeArraySmplr [[sampler(10)]])
{
constant uint32_t& tex1dSwzl = spvAuxBuffer.swizzleConst[0];
constant uint32_t& tex2dSwzl = spvAuxBuffer.swizzleConst[1];
constant uint32_t& tex3dSwzl = spvAuxBuffer.swizzleConst[2];
constant uint32_t& texCubeSwzl = spvAuxBuffer.swizzleConst[3];
constant uint32_t& tex2dArraySwzl = spvAuxBuffer.swizzleConst[4];
constant uint32_t& texCubeArraySwzl = spvAuxBuffer.swizzleConst[5];
constant uint32_t& depth2dSwzl = spvAuxBuffer.swizzleConst[7];
constant uint32_t& depthCubeSwzl = spvAuxBuffer.swizzleConst[8];
constant uint32_t& depth2dArraySwzl = spvAuxBuffer.swizzleConst[9];
constant uint32_t& depthCubeArraySwzl = spvAuxBuffer.swizzleConst[10];
constant uint32_t& tex1dSwzl = spvSwizzleConstants[0];
constant uint32_t& tex2dSwzl = spvSwizzleConstants[1];
constant uint32_t& tex3dSwzl = spvSwizzleConstants[2];
constant uint32_t& texCubeSwzl = spvSwizzleConstants[3];
constant uint32_t& tex2dArraySwzl = spvSwizzleConstants[4];
constant uint32_t& texCubeArraySwzl = spvSwizzleConstants[5];
constant uint32_t& depth2dSwzl = spvSwizzleConstants[7];
constant uint32_t& depthCubeSwzl = spvSwizzleConstants[8];
constant uint32_t& depth2dArraySwzl = spvSwizzleConstants[9];
constant uint32_t& depthCubeArraySwzl = spvSwizzleConstants[10];
float4 c = doSwizzle(tex1d, tex1dSmplr, tex1dSwzl, tex2d, tex2dSmplr, tex2dSwzl, tex3d, tex3dSmplr, tex3dSwzl, texCube, texCubeSmplr, texCubeSwzl, tex2dArray, tex2dArraySmplr, tex2dArraySwzl, texCubeArray, texCubeArraySmplr, texCubeArraySwzl, depth2d, depth2dSmplr, depth2dSwzl, depthCube, depthCubeSmplr, depthCubeSwzl, depth2dArray, depth2dArraySmplr, depth2dArraySwzl, depthCubeArray, depthCubeArraySmplr, depthCubeArraySwzl, texBuffer);
}

View File

@ -5,11 +5,6 @@
using namespace metal;
struct spvAux
{
uint swizzleConst[1];
};
// Returns 2D texture coords corresponding to 1D texel buffer coords
uint2 spvTexelBufferCoord(uint tc)
{
@ -136,14 +131,14 @@ inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... p
return t.gather_compare(s, spvForward<Ts>(params)...);
}
fragment void main0(constant spvAux& spvAuxBuffer [[buffer(30)]], texture1d<uint> tex1d [[texture(0)]], texture2d<uint> tex2d [[texture(1)]], texture3d<uint> tex3d [[texture(2)]], texturecube<uint> texCube [[texture(3)]], texture2d_array<uint> tex2dArray [[texture(4)]], texturecube_array<uint> texCubeArray [[texture(5)]], texture2d<uint> texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]])
fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d<uint> tex1d [[texture(0)]], texture2d<uint> tex2d [[texture(1)]], texture3d<uint> tex3d [[texture(2)]], texturecube<uint> texCube [[texture(3)]], texture2d_array<uint> tex2dArray [[texture(4)]], texturecube_array<uint> texCubeArray [[texture(5)]], texture2d<uint> texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]])
{
constant uint32_t& tex1dSwzl = spvAuxBuffer.swizzleConst[0];
constant uint32_t& tex2dSwzl = spvAuxBuffer.swizzleConst[1];
constant uint32_t& tex3dSwzl = spvAuxBuffer.swizzleConst[2];
constant uint32_t& texCubeSwzl = spvAuxBuffer.swizzleConst[3];
constant uint32_t& tex2dArraySwzl = spvAuxBuffer.swizzleConst[4];
constant uint32_t& texCubeArraySwzl = spvAuxBuffer.swizzleConst[5];
constant uint32_t& tex1dSwzl = spvSwizzleConstants[0];
constant uint32_t& tex2dSwzl = spvSwizzleConstants[1];
constant uint32_t& tex3dSwzl = spvSwizzleConstants[2];
constant uint32_t& texCubeSwzl = spvSwizzleConstants[3];
constant uint32_t& tex2dArraySwzl = spvSwizzleConstants[4];
constant uint32_t& texCubeArraySwzl = spvSwizzleConstants[5];
float4 c = float4(spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl));
c = float4(spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), tex2dSwzl));
c = float4(spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0)), tex3dSwzl));

View File

@ -5,11 +5,6 @@
using namespace metal;
struct spvAux
{
uint swizzleConst[1];
};
// Returns 2D texture coords corresponding to 1D texel buffer coords
uint2 spvTexelBufferCoord(uint tc)
{
@ -136,18 +131,18 @@ inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... p
return t.gather_compare(s, spvForward<Ts>(params)...);
}
fragment void main0(constant spvAux& spvAuxBuffer [[buffer(30)]], texture1d<float> tex1d [[texture(0)]], texture2d<float> tex2d [[texture(1)]], texture3d<float> tex3d [[texture(2)]], texturecube<float> texCube [[texture(3)]], texture2d_array<float> tex2dArray [[texture(4)]], texturecube_array<float> texCubeArray [[texture(5)]], texture2d<float> texBuffer [[texture(6)]], depth2d<float> depth2d [[texture(7)]], depthcube<float> depthCube [[texture(8)]], depth2d_array<float> depth2dArray [[texture(9)]], depthcube_array<float> depthCubeArray [[texture(10)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(7)]], sampler depthCubeSmplr [[sampler(8)]], sampler depth2dArraySmplr [[sampler(9)]], sampler depthCubeArraySmplr [[sampler(10)]])
fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d<float> tex1d [[texture(0)]], texture2d<float> tex2d [[texture(1)]], texture3d<float> tex3d [[texture(2)]], texturecube<float> texCube [[texture(3)]], texture2d_array<float> tex2dArray [[texture(4)]], texturecube_array<float> texCubeArray [[texture(5)]], texture2d<float> texBuffer [[texture(6)]], depth2d<float> depth2d [[texture(7)]], depthcube<float> depthCube [[texture(8)]], depth2d_array<float> depth2dArray [[texture(9)]], depthcube_array<float> depthCubeArray [[texture(10)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(7)]], sampler depthCubeSmplr [[sampler(8)]], sampler depth2dArraySmplr [[sampler(9)]], sampler depthCubeArraySmplr [[sampler(10)]])
{
constant uint32_t& tex1dSwzl = spvAuxBuffer.swizzleConst[0];
constant uint32_t& tex2dSwzl = spvAuxBuffer.swizzleConst[1];
constant uint32_t& tex3dSwzl = spvAuxBuffer.swizzleConst[2];
constant uint32_t& texCubeSwzl = spvAuxBuffer.swizzleConst[3];
constant uint32_t& tex2dArraySwzl = spvAuxBuffer.swizzleConst[4];
constant uint32_t& texCubeArraySwzl = spvAuxBuffer.swizzleConst[5];
constant uint32_t& depth2dSwzl = spvAuxBuffer.swizzleConst[7];
constant uint32_t& depthCubeSwzl = spvAuxBuffer.swizzleConst[8];
constant uint32_t& depth2dArraySwzl = spvAuxBuffer.swizzleConst[9];
constant uint32_t& depthCubeArraySwzl = spvAuxBuffer.swizzleConst[10];
constant uint32_t& tex1dSwzl = spvSwizzleConstants[0];
constant uint32_t& tex2dSwzl = spvSwizzleConstants[1];
constant uint32_t& tex3dSwzl = spvSwizzleConstants[2];
constant uint32_t& texCubeSwzl = spvSwizzleConstants[3];
constant uint32_t& tex2dArraySwzl = spvSwizzleConstants[4];
constant uint32_t& texCubeArraySwzl = spvSwizzleConstants[5];
constant uint32_t& depth2dSwzl = spvSwizzleConstants[7];
constant uint32_t& depthCubeSwzl = spvSwizzleConstants[8];
constant uint32_t& depth2dArraySwzl = spvSwizzleConstants[9];
constant uint32_t& depthCubeArraySwzl = spvSwizzleConstants[10];
float4 c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl);
c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), tex2dSwzl);
c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0)), tex3dSwzl);

View File

@ -5,11 +5,6 @@
using namespace metal;
struct spvAux
{
uint swizzleConst[1];
};
struct main0_out
{
float4 fragColor [[color(0)]];
@ -188,19 +183,19 @@ float4 do_samples(thread const texture1d<float> t1, thread const sampler t1Smplr
return c;
}
fragment main0_out main0(constant spvAux& spvAuxBuffer [[buffer(30)]], texture1d<float> tex1d [[texture(0)]], texture2d<float> tex2d [[texture(1)]], texture3d<float> tex3d [[texture(2)]], texturecube<float> texCube [[texture(3)]], texture2d_array<float> tex2dArray [[texture(4)]], texturecube_array<float> texCubeArray [[texture(5)]], texture2d<float> texBuffer [[texture(6)]], depth2d<float> depth2d [[texture(7)]], depthcube<float> depthCube [[texture(8)]], depth2d_array<float> depth2dArray [[texture(9)]], depthcube_array<float> depthCubeArray [[texture(10)]], sampler tex1dSmplr [[sampler(0)]], sampler tex3dSmplr [[sampler(2)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(7)]], sampler depthCubeSmplr [[sampler(8)]], sampler depthCubeArraySmplr [[sampler(10)]], sampler defaultSampler [[sampler(11)]], sampler shadowSampler [[sampler(12)]])
fragment main0_out main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d<float> tex1d [[texture(0)]], texture2d<float> tex2d [[texture(1)]], texture3d<float> tex3d [[texture(2)]], texturecube<float> texCube [[texture(3)]], texture2d_array<float> tex2dArray [[texture(4)]], texturecube_array<float> texCubeArray [[texture(5)]], texture2d<float> texBuffer [[texture(6)]], depth2d<float> depth2d [[texture(7)]], depthcube<float> depthCube [[texture(8)]], depth2d_array<float> depth2dArray [[texture(9)]], depthcube_array<float> depthCubeArray [[texture(10)]], sampler tex1dSmplr [[sampler(0)]], sampler tex3dSmplr [[sampler(2)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(7)]], sampler depthCubeSmplr [[sampler(8)]], sampler depthCubeArraySmplr [[sampler(10)]], sampler defaultSampler [[sampler(11)]], sampler shadowSampler [[sampler(12)]])
{
main0_out out = {};
constant uint32_t& tex1dSwzl = spvAuxBuffer.swizzleConst[0];
constant uint32_t& tex2dSwzl = spvAuxBuffer.swizzleConst[1];
constant uint32_t& tex3dSwzl = spvAuxBuffer.swizzleConst[2];
constant uint32_t& texCubeSwzl = spvAuxBuffer.swizzleConst[3];
constant uint32_t& tex2dArraySwzl = spvAuxBuffer.swizzleConst[4];
constant uint32_t& texCubeArraySwzl = spvAuxBuffer.swizzleConst[5];
constant uint32_t& depth2dSwzl = spvAuxBuffer.swizzleConst[7];
constant uint32_t& depthCubeSwzl = spvAuxBuffer.swizzleConst[8];
constant uint32_t& depth2dArraySwzl = spvAuxBuffer.swizzleConst[9];
constant uint32_t& depthCubeArraySwzl = spvAuxBuffer.swizzleConst[10];
constant uint32_t& tex1dSwzl = spvSwizzleConstants[0];
constant uint32_t& tex2dSwzl = spvSwizzleConstants[1];
constant uint32_t& tex3dSwzl = spvSwizzleConstants[2];
constant uint32_t& texCubeSwzl = spvSwizzleConstants[3];
constant uint32_t& tex2dArraySwzl = spvSwizzleConstants[4];
constant uint32_t& texCubeArraySwzl = spvSwizzleConstants[5];
constant uint32_t& depth2dSwzl = spvSwizzleConstants[7];
constant uint32_t& depthCubeSwzl = spvSwizzleConstants[8];
constant uint32_t& depth2dArraySwzl = spvSwizzleConstants[9];
constant uint32_t& depthCubeArraySwzl = spvSwizzleConstants[10];
out.fragColor = do_samples(tex1d, tex1dSmplr, tex1dSwzl, tex2d, tex2dSwzl, tex3d, tex3dSmplr, tex3dSwzl, texCube, texCubeSwzl, tex2dArray, tex2dArraySmplr, tex2dArraySwzl, texCubeArray, texCubeArraySmplr, texCubeArraySwzl, texBuffer, depth2d, depth2dSmplr, depth2dSwzl, depthCube, depthCubeSmplr, depthCubeSwzl, depth2dArray, depth2dArraySwzl, depthCubeArray, depthCubeArraySmplr, depthCubeArraySwzl, defaultSampler, shadowSampler);
return out;
}

View File

@ -14,17 +14,22 @@ void barrier_shared()
void full_barrier()
{
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
}
void image_barrier()
{
threadgroup_barrier(mem_flags::mem_texture);
}
void buffer_barrier()
{
threadgroup_barrier(mem_flags::mem_none);
threadgroup_barrier(mem_flags::mem_device);
}
void group_barrier()
{
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
}
void barrier_shared_exec()
@ -34,17 +39,22 @@ void barrier_shared_exec()
void full_barrier_exec()
{
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
}
void image_barrier_exec()
{
threadgroup_barrier(mem_flags::mem_texture);
}
void buffer_barrier_exec()
{
threadgroup_barrier(mem_flags::mem_none);
threadgroup_barrier(mem_flags::mem_device);
}
void group_barrier_exec()
{
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
}
void exec_barrier()
@ -56,10 +66,12 @@ kernel void main0()
{
barrier_shared();
full_barrier();
image_barrier();
buffer_barrier();
group_barrier();
barrier_shared_exec();
full_barrier_exec();
image_barrier_exec();
buffer_barrier_exec();
group_barrier_exec();
exec_barrier();

View File

@ -5,11 +5,6 @@
using namespace metal;
struct spvAux
{
uint swizzleConst[1];
};
enum class spvSwizzle : uint
{
none = 0,
@ -130,9 +125,9 @@ inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... p
return t.gather_compare(s, spvForward<Ts>(params)...);
}
kernel void main0(constant spvAux& spvAuxBuffer [[buffer(30)]], texture2d<float> foo [[texture(0)]], texture2d<float, access::write> bar [[texture(1)]], sampler fooSmplr [[sampler(0)]])
kernel void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture2d<float> foo [[texture(0)]], texture2d<float, access::write> bar [[texture(1)]], sampler fooSmplr [[sampler(0)]])
{
constant uint32_t& fooSwzl = spvAuxBuffer.swizzleConst[0];
constant uint32_t& fooSwzl = spvSwizzleConstants[0];
float4 a = spvTextureSwizzle(foo.sample(fooSmplr, float2(1.0), level(0.0)), fooSwzl);
bar.write(a, uint2(int2(0)));
}

View File

@ -0,0 +1,171 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct spvDescriptorSetBuffer0
{
constant uint* spvSwizzleConstants [[id(0)]];
array<texture2d<float>, 4> uSampler0 [[id(1)]];
array<sampler, 4> uSampler0Smplr [[id(5)]];
};
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float2 vUV [[user(locn0)]];
};
enum class spvSwizzle : uint
{
none = 0,
zero,
one,
red,
green,
blue,
alpha
};
template<typename T> struct spvRemoveReference { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
{
return static_cast<thread T&&>(x);
}
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
{
return static_cast<thread T&&>(x);
}
template<typename T>
inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
{
switch (s)
{
case spvSwizzle::none:
return c;
case spvSwizzle::zero:
return 0;
case spvSwizzle::one:
return 1;
case spvSwizzle::red:
return x.r;
case spvSwizzle::green:
return x.g;
case spvSwizzle::blue:
return x.b;
case spvSwizzle::alpha:
return x.a;
}
}
// Wrapper function that swizzles texture samples and fetches.
template<typename T>
inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)
{
if (!s)
return x;
return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));
}
template<typename T>
inline T spvTextureSwizzle(T x, uint s)
{
return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
}
// Wrapper function that swizzles texture gathers.
template<typename T, typename Tex, typename... Ts>
inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
{
if (sw)
{
switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))
{
case spvSwizzle::none:
break;
case spvSwizzle::zero:
return vec<T, 4>(0, 0, 0, 0);
case spvSwizzle::one:
return vec<T, 4>(1, 1, 1, 1);
case spvSwizzle::red:
return t.gather(s, spvForward<Ts>(params)..., component::x);
case spvSwizzle::green:
return t.gather(s, spvForward<Ts>(params)..., component::y);
case spvSwizzle::blue:
return t.gather(s, spvForward<Ts>(params)..., component::z);
case spvSwizzle::alpha:
return t.gather(s, spvForward<Ts>(params)..., component::w);
}
}
switch (c)
{
case component::x:
return t.gather(s, spvForward<Ts>(params)..., component::x);
case component::y:
return t.gather(s, spvForward<Ts>(params)..., component::y);
case component::z:
return t.gather(s, spvForward<Ts>(params)..., component::z);
case component::w:
return t.gather(s, spvForward<Ts>(params)..., component::w);
}
}
// Wrapper function that swizzles depth texture gathers.
template<typename T, typename Tex, typename... Ts>
inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw)
{
if (sw)
{
switch (spvSwizzle(sw & 0xFF))
{
case spvSwizzle::none:
case spvSwizzle::red:
break;
case spvSwizzle::zero:
case spvSwizzle::green:
case spvSwizzle::blue:
case spvSwizzle::alpha:
return vec<T, 4>(0, 0, 0, 0);
case spvSwizzle::one:
return vec<T, 4>(1, 1, 1, 1);
}
}
return t.gather_compare(s, spvForward<Ts>(params)...);
}
float4 sample_in_func_1(thread const array<texture2d<float>, 4> uSampler0, thread const array<sampler, 4> uSampler0Smplr, constant uint32_t* uSampler0Swzl, thread float2& vUV)
{
return spvTextureSwizzle(uSampler0[2].sample(uSampler0Smplr[2], vUV), uSampler0Swzl[2]);
}
float4 sample_in_func_2(thread float2& vUV, thread texture2d<float> uSampler1, thread const sampler uSampler1Smplr, constant uint32_t& uSampler1Swzl)
{
return spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, vUV), uSampler1Swzl);
}
float4 sample_single_in_func(thread const texture2d<float> s, thread const sampler sSmplr, constant uint32_t& sSwzl, thread float2& vUV)
{
return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl);
}
fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(30)]], texture2d<float> uSampler1 [[texture(0)]], sampler uSampler1Smplr [[sampler(0)]])
{
main0_out out = {};
constant uint32_t* spvDescriptorSet0_uSampler0Swzl = &spvDescriptorSet0.spvSwizzleConstants[1];
constant uint32_t& uSampler1Swzl = spvSwizzleConstants[0];
out.FragColor = sample_in_func_1(spvDescriptorSet0.uSampler0, spvDescriptorSet0.uSampler0Smplr, spvDescriptorSet0_uSampler0Swzl, in.vUV);
out.FragColor += sample_in_func_2(in.vUV, uSampler1, uSampler1Smplr, uSampler1Swzl);
out.FragColor += sample_single_in_func(spvDescriptorSet0.uSampler0[1], spvDescriptorSet0.uSampler0Smplr[1], spvDescriptorSet0_uSampler0Swzl[1], in.vUV);
out.FragColor += sample_single_in_func(uSampler1, uSampler1Smplr, uSampler1Swzl, in.vUV);
return out;
}

View File

@ -0,0 +1,156 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float2 vUV [[user(locn0)]];
};
enum class spvSwizzle : uint
{
none = 0,
zero,
one,
red,
green,
blue,
alpha
};
template<typename T> struct spvRemoveReference { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
{
return static_cast<thread T&&>(x);
}
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
{
return static_cast<thread T&&>(x);
}
template<typename T>
inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
{
switch (s)
{
case spvSwizzle::none:
return c;
case spvSwizzle::zero:
return 0;
case spvSwizzle::one:
return 1;
case spvSwizzle::red:
return x.r;
case spvSwizzle::green:
return x.g;
case spvSwizzle::blue:
return x.b;
case spvSwizzle::alpha:
return x.a;
}
}
// Wrapper function that swizzles texture samples and fetches.
template<typename T>
inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)
{
if (!s)
return x;
return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));
}
template<typename T>
inline T spvTextureSwizzle(T x, uint s)
{
return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
}
// Wrapper function that swizzles texture gathers.
template<typename T, typename Tex, typename... Ts>
inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
{
if (sw)
{
switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))
{
case spvSwizzle::none:
break;
case spvSwizzle::zero:
return vec<T, 4>(0, 0, 0, 0);
case spvSwizzle::one:
return vec<T, 4>(1, 1, 1, 1);
case spvSwizzle::red:
return t.gather(s, spvForward<Ts>(params)..., component::x);
case spvSwizzle::green:
return t.gather(s, spvForward<Ts>(params)..., component::y);
case spvSwizzle::blue:
return t.gather(s, spvForward<Ts>(params)..., component::z);
case spvSwizzle::alpha:
return t.gather(s, spvForward<Ts>(params)..., component::w);
}
}
switch (c)
{
case component::x:
return t.gather(s, spvForward<Ts>(params)..., component::x);
case component::y:
return t.gather(s, spvForward<Ts>(params)..., component::y);
case component::z:
return t.gather(s, spvForward<Ts>(params)..., component::z);
case component::w:
return t.gather(s, spvForward<Ts>(params)..., component::w);
}
}
// Wrapper function that swizzles depth texture gathers.
template<typename T, typename Tex, typename... Ts>
inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw)
{
if (sw)
{
switch (spvSwizzle(sw & 0xFF))
{
case spvSwizzle::none:
case spvSwizzle::red:
break;
case spvSwizzle::zero:
case spvSwizzle::green:
case spvSwizzle::blue:
case spvSwizzle::alpha:
return vec<T, 4>(0, 0, 0, 0);
case spvSwizzle::one:
return vec<T, 4>(1, 1, 1, 1);
}
}
return t.gather_compare(s, spvForward<Ts>(params)...);
}
float4 sample_in_func(thread const array<texture2d<float>, 4> uSampler, thread const array<sampler, 4> uSamplerSmplr, constant uint32_t* uSamplerSwzl, thread float2& vUV)
{
return spvTextureSwizzle(uSampler[2].sample(uSamplerSmplr[2], vUV), uSamplerSwzl[2]);
}
float4 sample_single_in_func(thread const texture2d<float> s, thread const sampler sSmplr, constant uint32_t& sSwzl, thread float2& vUV)
{
return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl);
}
fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(30)]], array<texture2d<float>, 4> uSampler [[texture(0)]], array<sampler, 4> uSamplerSmplr [[sampler(0)]])
{
main0_out out = {};
constant uint32_t* uSamplerSwzl = &spvSwizzleConstants[0];
out.FragColor = sample_in_func(uSampler, uSamplerSmplr, uSamplerSwzl, in.vUV);
out.FragColor += sample_single_in_func(uSampler[1], uSamplerSmplr[1], uSamplerSwzl[1], in.vUV);
return out;
}

View File

@ -0,0 +1,51 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct UBO
{
float4 v[64];
};
struct SSBO
{
float4 v[1];
};
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
int vIndex [[user(locn0)]];
float2 vUV [[user(locn1)]];
};
fragment main0_out main0(main0_in in [[stage_in]], constant UBO* ubos_0 [[buffer(0)]], constant UBO* ubos_1 [[buffer(1)]], const device SSBO* ssbos_0 [[buffer(2)]], const device SSBO* ssbos_1 [[buffer(3)]], array<texture2d<float>, 8> uSamplers [[texture(0)]], array<texture2d<float>, 8> uCombinedSamplers [[texture(8)]], array<sampler, 7> uSamps [[sampler(1)]], array<sampler, 8> uCombinedSamplersSmplr [[sampler(8)]])
{
constant UBO* ubos[] =
{
ubos_0,
ubos_1,
};
const device SSBO* ssbos[] =
{
ssbos_0,
ssbos_1,
};
main0_out out = {};
int i = in.vIndex;
int _24 = i + 10;
out.FragColor = uSamplers[_24].sample(uSamps[i + 40], in.vUV);
int _50 = i + 10;
out.FragColor = uCombinedSamplers[_50].sample(uCombinedSamplersSmplr[_50], in.vUV);
out.FragColor += ubos[(i + 20)]->v[i + 40];
out.FragColor += ssbos[(i + 50)]->v[i + 60];
return out;
}

View File

@ -0,0 +1,146 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float FragColor;
};
inline uint4 spvSubgroupBallot(bool value)
{
simd_vote vote = simd_ballot(value);
// simd_ballot() returns a 64-bit integer-like object, but
// SPIR-V callers expect a uint4. We must convert.
// FIXME: This won't include higher bits if Apple ever supports
// 128 lanes in an SIMD-group.
return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
}
inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
{
return !!extract_bits(ballot[bit / 32], bit % 32, 1);
}
inline uint spvSubgroupBallotFindLSB(uint4 ballot)
{
return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
}
inline uint spvSubgroupBallotFindMSB(uint4 ballot)
{
return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
}
inline uint spvSubgroupBallotBitCount(uint4 ballot)
{
return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
}
inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
{
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
return spvSubgroupBallotBitCount(ballot & mask);
}
inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
{
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
return spvSubgroupBallotBitCount(ballot & mask);
}
template<typename T>
inline bool spvSubgroupAllEqual(T value)
{
return simd_all(value == simd_broadcast_first(value));
}
template<>
inline bool spvSubgroupAllEqual(bool value)
{
return simd_all(value) || !simd_any(value);
}
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
{
uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
_9.FragColor = float(gl_NumSubgroups);
_9.FragColor = float(gl_SubgroupID);
_9.FragColor = float(gl_SubgroupSize);
_9.FragColor = float(gl_SubgroupInvocationID);
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
simdgroup_barrier(mem_flags::mem_device);
simdgroup_barrier(mem_flags::mem_threadgroup);
simdgroup_barrier(mem_flags::mem_texture);
bool elected = simd_is_first();
_9.FragColor = float4(gl_SubgroupEqMask).x;
_9.FragColor = float4(gl_SubgroupGeMask).x;
_9.FragColor = float4(gl_SubgroupGtMask).x;
_9.FragColor = float4(gl_SubgroupLeMask).x;
_9.FragColor = float4(gl_SubgroupLtMask).x;
float4 broadcasted = simd_broadcast(float4(10.0), 8u);
float3 first = simd_broadcast_first(float3(20.0));
uint4 ballot_value = spvSubgroupBallot(true);
bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
uint bit_count = spvSubgroupBallotBitCount(ballot_value);
uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
uint lsb = spvSubgroupBallotFindLSB(ballot_value);
uint msb = spvSubgroupBallotFindMSB(ballot_value);
uint shuffled = simd_shuffle(10u, 8u);
uint shuffled_xor = simd_shuffle_xor(30u, 8u);
uint shuffled_up = simd_shuffle_up(20u, 4u);
uint shuffled_down = simd_shuffle_down(20u, 4u);
bool has_all = simd_all(true);
bool has_any = simd_any(true);
bool has_equal = spvSubgroupAllEqual(0);
has_equal = spvSubgroupAllEqual(true);
float4 added = simd_sum(float4(20.0));
int4 iadded = simd_sum(int4(20));
float4 multiplied = simd_product(float4(20.0));
int4 imultiplied = simd_product(int4(20));
float4 lo = simd_min(float4(20.0));
float4 hi = simd_max(float4(20.0));
int4 slo = simd_min(int4(20));
int4 shi = simd_max(int4(20));
uint4 ulo = simd_min(uint4(20u));
uint4 uhi = simd_max(uint4(20u));
uint4 anded = simd_and(ballot_value);
uint4 ored = simd_or(ballot_value);
uint4 xored = simd_xor(ballot_value);
added = simd_prefix_inclusive_sum(added);
iadded = simd_prefix_inclusive_sum(iadded);
multiplied = simd_prefix_inclusive_product(multiplied);
imultiplied = simd_prefix_inclusive_product(imultiplied);
added = simd_prefix_exclusive_sum(multiplied);
multiplied = simd_prefix_exclusive_product(multiplied);
iadded = simd_prefix_exclusive_sum(imultiplied);
imultiplied = simd_prefix_exclusive_product(imultiplied);
added = quad_sum(added);
multiplied = quad_product(multiplied);
iadded = quad_sum(iadded);
imultiplied = quad_product(imultiplied);
lo = quad_min(lo);
hi = quad_max(hi);
ulo = quad_min(ulo);
uhi = quad_max(uhi);
slo = quad_min(slo);
shi = quad_max(shi);
anded = quad_and(anded);
ored = quad_or(ored);
xored = quad_xor(xored);
float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u);
float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u);
float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u);
float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u);
}

View File

@ -0,0 +1,31 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float FragColor;
};
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]])
{
_9.FragColor = float(gl_NumSubgroups);
_9.FragColor = float(gl_SubgroupID);
_9.FragColor = float(gl_SubgroupSize);
_9.FragColor = float(gl_SubgroupInvocationID);
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
simdgroup_barrier(mem_flags::mem_device);
simdgroup_barrier(mem_flags::mem_threadgroup);
simdgroup_barrier(mem_flags::mem_texture);
uint shuffled = quad_shuffle(10u, 8u);
uint shuffled_xor = quad_shuffle_xor(30u, 8u);
uint shuffled_up = quad_shuffle_up(20u, 4u);
uint shuffled_down = quad_shuffle_down(20u, 4u);
float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u);
float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u);
float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u);
float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u);
}

View File

@ -0,0 +1,14 @@
#version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 1, std140) buffer SSBO
{
uint size;
float v[];
} _11;
void main()
{
_11.size = uint(int(uint(_11.v.length())));
}

View File

@ -0,0 +1,26 @@
#version 450
#extension GL_EXT_buffer_reference : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(buffer_reference) buffer PtrUint;
layout(buffer_reference) buffer PtrInt;
layout(buffer_reference, std430) buffer PtrUint
{
uint value;
};
layout(buffer_reference, std430) buffer PtrInt
{
int value;
};
layout(set = 0, binding = 0, std430) buffer Buf
{
PtrUint ptr;
} _11;
void main()
{
PtrInt(_11.ptr).value = 10;
}

View File

@ -22,7 +22,7 @@ void copy_node(restrict Node dst, restrict Node a, restrict Node b)
dst.value = a.value + b.value;
}
void overwrite_node(out Node dst, Node src)
void overwrite_node(out restrict Node dst, restrict Node src)
{
dst = src;
}

View File

@ -0,0 +1,12 @@
#version 450
layout(local_size_x = 1) in;
layout(set = 0, binding = 1, std140) buffer SSBO
{
uint size;
float v[];
};
void main()
{
size = v.length();
}

View File

@ -0,0 +1,28 @@
#version 450
#extension GL_EXT_nonuniform_qualifier : require
layout(set = 0, binding = 0) uniform texture2D uSamplers[];
layout(set = 1, binding = 0) uniform sampler2D uCombinedSamplers[];
layout(set = 2, binding = 0) uniform sampler uSamps[];
layout(location = 0) flat in int vIndex;
layout(location = 1) in vec2 vUV;
layout(location = 0) out vec4 FragColor;
layout(set = 3, binding = 0) uniform UBO
{
vec4 v[64];
} ubos[];
layout(set = 4, binding = 0) readonly buffer SSBO
{
vec4 v[];
} ssbos[];
void main()
{
int i = vIndex;
FragColor = texture(sampler2D(uSamplers[nonuniformEXT(i + 10)], uSamps[nonuniformEXT(i + 40)]), vUV);
FragColor = texture(uCombinedSamplers[nonuniformEXT(i + 10)], vUV);
FragColor += ubos[nonuniformEXT(i + 20)].v[nonuniformEXT(i + 40)];
FragColor += ssbos[nonuniformEXT(i + 50)].v[nonuniformEXT(i + 60)];
}

View File

@ -11,12 +11,10 @@ void full_barrier()
memoryBarrier();
}
#if 0
void image_barrier()
{
memoryBarrierImage();
}
#endif
void buffer_barrier()
{
@ -40,13 +38,11 @@ void full_barrier_exec()
barrier();
}
#if 0
void image_barrier_exec()
{
memoryBarrierImage();
barrier();
}
#endif
void buffer_barrier_exec()
{
@ -69,13 +65,13 @@ void main()
{
barrier_shared();
full_barrier();
//image_barrier();
image_barrier();
buffer_barrier();
group_barrier();
barrier_shared_exec();
full_barrier_exec();
//image_barrier_exec();
image_barrier_exec();
buffer_barrier_exec();
group_barrier_exec();

View File

@ -0,0 +1,31 @@
#version 450
layout(set = 0, binding = 1) uniform sampler2D uSampler0[4];
layout(set = 2, binding = 0) uniform sampler2D uSampler1;
layout(set = 1, binding = 4) uniform sampler2D uSamp;
layout(location = 0) in vec2 vUV;
layout(location = 0) out vec4 FragColor;
vec4 sample_in_func_1()
{
return texture(uSampler0[2], vUV);
}
vec4 sample_in_func_2()
{
return texture(uSampler1, vUV);
}
vec4 sample_single_in_func(sampler2D s)
{
return texture(s, vUV);
}
void main()
{
FragColor = sample_in_func_1();
FragColor += sample_in_func_2();
FragColor += sample_single_in_func(uSampler0[1]);
FragColor += sample_single_in_func(uSampler1);
}

View File

@ -0,0 +1,23 @@
#version 450
layout(set = 0, binding = 0) uniform sampler2D uSampler[4];
layout(set = 0, binding = 1) uniform sampler2D uSamp;
layout(location = 0) in vec2 vUV;
layout(location = 0) out vec4 FragColor;
vec4 sample_in_func()
{
return texture(uSampler[2], vUV);
}
vec4 sample_single_in_func(sampler2D s)
{
return texture(s, vUV);
}
void main()
{
FragColor = sample_in_func();
FragColor += sample_single_in_func(uSampler[1]);
}

View File

@ -0,0 +1,28 @@
#version 450
#extension GL_EXT_nonuniform_qualifier : require
layout(binding = 0) uniform texture2D uSamplers[8];
layout(binding = 8) uniform sampler2D uCombinedSamplers[8];
layout(binding = 1) uniform sampler uSamps[7];
layout(location = 0) flat in int vIndex;
layout(location = 1) in vec2 vUV;
layout(location = 0) out vec4 FragColor;
layout(set = 0, binding = 0) uniform UBO
{
vec4 v[64];
} ubos[2];
layout(set = 0, binding = 2) readonly buffer SSBO
{
vec4 v[];
} ssbos[2];
void main()
{
int i = vIndex;
FragColor = texture(sampler2D(uSamplers[nonuniformEXT(i + 10)], uSamps[nonuniformEXT(i + 40)]), vUV);
FragColor = texture(uCombinedSamplers[nonuniformEXT(i + 10)], vUV);
FragColor += ubos[nonuniformEXT(i + 20)].v[nonuniformEXT(i + 40)];
FragColor += ssbos[nonuniformEXT(i + 50)].v[nonuniformEXT(i + 60)];
}

View File

@ -0,0 +1,126 @@
#version 450
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_vote : require
#extension GL_KHR_shader_subgroup_shuffle : require
#extension GL_KHR_shader_subgroup_shuffle_relative : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_clustered : require
#extension GL_KHR_shader_subgroup_quad : require
layout(local_size_x = 1) in;
layout(std430, binding = 0) buffer SSBO
{
float FragColor;
};
void main()
{
// basic
FragColor = float(gl_NumSubgroups);
FragColor = float(gl_SubgroupID);
FragColor = float(gl_SubgroupSize);
FragColor = float(gl_SubgroupInvocationID);
subgroupBarrier();
subgroupMemoryBarrier();
subgroupMemoryBarrierBuffer();
subgroupMemoryBarrierShared();
subgroupMemoryBarrierImage();
bool elected = subgroupElect();
// ballot
FragColor = float(gl_SubgroupEqMask);
FragColor = float(gl_SubgroupGeMask);
FragColor = float(gl_SubgroupGtMask);
FragColor = float(gl_SubgroupLeMask);
FragColor = float(gl_SubgroupLtMask);
vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
vec3 first = subgroupBroadcastFirst(vec3(20.0));
uvec4 ballot_value = subgroupBallot(true);
bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
uint bit_count = subgroupBallotBitCount(ballot_value);
uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
uint lsb = subgroupBallotFindLSB(ballot_value);
uint msb = subgroupBallotFindMSB(ballot_value);
// shuffle
uint shuffled = subgroupShuffle(10u, 8u);
uint shuffled_xor = subgroupShuffleXor(30u, 8u);
// shuffle relative
uint shuffled_up = subgroupShuffleUp(20u, 4u);
uint shuffled_down = subgroupShuffleDown(20u, 4u);
// vote
bool has_all = subgroupAll(true);
bool has_any = subgroupAny(true);
bool has_equal = subgroupAllEqual(0);
has_equal = subgroupAllEqual(true);
// arithmetic
vec4 added = subgroupAdd(vec4(20.0));
ivec4 iadded = subgroupAdd(ivec4(20));
vec4 multiplied = subgroupMul(vec4(20.0));
ivec4 imultiplied = subgroupMul(ivec4(20));
vec4 lo = subgroupMin(vec4(20.0));
vec4 hi = subgroupMax(vec4(20.0));
ivec4 slo = subgroupMin(ivec4(20));
ivec4 shi = subgroupMax(ivec4(20));
uvec4 ulo = subgroupMin(uvec4(20));
uvec4 uhi = subgroupMax(uvec4(20));
uvec4 anded = subgroupAnd(ballot_value);
uvec4 ored = subgroupOr(ballot_value);
uvec4 xored = subgroupXor(ballot_value);
added = subgroupInclusiveAdd(added);
iadded = subgroupInclusiveAdd(iadded);
multiplied = subgroupInclusiveMul(multiplied);
imultiplied = subgroupInclusiveMul(imultiplied);
//lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal
//hi = subgroupInclusiveMax(hi);
//slo = subgroupInclusiveMin(slo);
//shi = subgroupInclusiveMax(shi);
//ulo = subgroupInclusiveMin(ulo);
//uhi = subgroupInclusiveMax(uhi);
//anded = subgroupInclusiveAnd(anded);
//ored = subgroupInclusiveOr(ored);
//xored = subgroupInclusiveXor(ored);
//added = subgroupExclusiveAdd(lo);
added = subgroupExclusiveAdd(multiplied);
multiplied = subgroupExclusiveMul(multiplied);
iadded = subgroupExclusiveAdd(imultiplied);
imultiplied = subgroupExclusiveMul(imultiplied);
//lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal
//hi = subgroupExclusiveMax(hi);
//ulo = subgroupExclusiveMin(ulo);
//uhi = subgroupExclusiveMax(uhi);
//slo = subgroupExclusiveMin(slo);
//shi = subgroupExclusiveMax(shi);
//anded = subgroupExclusiveAnd(anded);
//ored = subgroupExclusiveOr(ored);
//xored = subgroupExclusiveXor(ored);
// clustered
added = subgroupClusteredAdd(added, 4u);
multiplied = subgroupClusteredMul(multiplied, 4u);
iadded = subgroupClusteredAdd(iadded, 4u);
imultiplied = subgroupClusteredMul(imultiplied, 4u);
lo = subgroupClusteredMin(lo, 4u);
hi = subgroupClusteredMax(hi, 4u);
ulo = subgroupClusteredMin(ulo, 4u);
uhi = subgroupClusteredMax(uhi, 4u);
slo = subgroupClusteredMin(slo, 4u);
shi = subgroupClusteredMax(shi, 4u);
anded = subgroupClusteredAnd(anded, 4u);
ored = subgroupClusteredOr(ored, 4u);
xored = subgroupClusteredXor(xored, 4u);
// quad
vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
}

View File

@ -0,0 +1,41 @@
#version 450
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_shuffle : require
#extension GL_KHR_shader_subgroup_shuffle_relative : require
#extension GL_KHR_shader_subgroup_quad : require
layout(local_size_x = 1) in;
layout(std430, binding = 0) buffer SSBO
{
float FragColor;
};
// Reduced test for functionality exposed on iOS.
void main()
{
// basic
FragColor = float(gl_NumSubgroups);
FragColor = float(gl_SubgroupID);
FragColor = float(gl_SubgroupSize);
FragColor = float(gl_SubgroupInvocationID);
subgroupBarrier();
subgroupMemoryBarrier();
subgroupMemoryBarrierBuffer();
subgroupMemoryBarrierShared();
subgroupMemoryBarrierImage();
// shuffle
uint shuffled = subgroupShuffle(10u, 8u);
uint shuffled_xor = subgroupShuffleXor(30u, 8u);
// shuffle relative
uint shuffled_up = subgroupShuffleUp(20u, 4u);
uint shuffled_down = subgroupShuffleDown(20u, 4u);
// quad
vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
}

View File

@ -0,0 +1,12 @@
#version 450
layout(local_size_x = 1) in;
layout(set = 0, binding = 1, std140) buffer SSBO
{
uint size;
float v[];
};
void main()
{
size = v.length();
}

View File

@ -0,0 +1,22 @@
#version 450
#extension GL_EXT_buffer_reference: require
layout(buffer_reference) buffer PtrUint
{
uint value;
};
layout(buffer_reference) buffer PtrInt
{
int value;
};
layout(set = 0, binding = 0) buffer Buf
{
PtrUint ptr;
};
void main()
{
PtrInt(ptr).value = 10;
}

View File

@ -21,7 +21,7 @@ void copy_node(restrict Node dst, restrict Node a, restrict Node b)
dst.value = a.value + b.value;
}
void overwrite_node(out Node dst, Node src)
void overwrite_node(out restrict Node dst, restrict Node src)
{
dst = src;
}

View File

@ -317,7 +317,7 @@ string CompilerCPP::compile()
backend.basic_uint_type = "uint32_t";
backend.swizzle_is_function = true;
backend.shared_is_implied = true;
backend.flexible_member_array_supported = false;
backend.unsized_array_supported = false;
backend.explicit_struct_type = true;
backend.use_initializer_list = true;

View File

@ -4217,7 +4217,7 @@ Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandl
bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t)
{
if (op == OpConvertUToPtr)
if (op == OpConvertUToPtr || op == OpBitcast)
{
auto &type = compiler.get<SPIRType>(args[0]);
if (type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer && type.pointer_depth == 1)

View File

@ -442,6 +442,9 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
case SPVC_COMPILER_OPTION_GLSL_EMIT_PUSH_CONSTANT_AS_UNIFORM_BUFFER:
options->glsl.emit_push_constant_as_uniform_buffer = value != 0;
break;
case SPVC_COMPILER_OPTION_GLSL_EMIT_UNIFORM_BUFFER_AS_PLAIN_UNIFORMS:
options->glsl.emit_uniform_buffer_as_plain_uniforms = value != 0;
break;
#endif
#if SPIRV_CROSS_C_API_HLSL
@ -471,8 +474,8 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
options->msl.texel_buffer_texture_width = value;
break;
case SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX:
options->msl.aux_buffer_index = value;
case SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX:
options->msl.swizzle_buffer_index = value;
break;
case SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX:
@ -723,7 +726,7 @@ spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler)
#endif
}
spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler)
spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler)
{
#if SPIRV_CROSS_C_API_MSL
if (compiler->backend != SPVC_BACKEND_MSL)
@ -733,13 +736,18 @@ spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler)
}
auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
return msl.needs_aux_buffer() ? SPVC_TRUE : SPVC_FALSE;
return msl.needs_swizzle_buffer() ? SPVC_TRUE : SPVC_FALSE;
#else
compiler->context->report_error("MSL function used on a non-MSL backend.");
return SPVC_FALSE;
#endif
}
spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler)
{
return spvc_compiler_msl_needs_swizzle_buffer(compiler);
}
spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler)
{
#if SPIRV_CROSS_C_API_MSL
@ -1273,6 +1281,11 @@ const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, s
.c_str();
}
const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index)
{
return compiler->compiler->get_member_name(id, member_index).c_str();
}
spvc_result spvc_compiler_get_entry_points(spvc_compiler compiler, const spvc_entry_point **entry_points,
size_t *num_entry_points)
{
@ -1410,7 +1423,7 @@ unsigned spvc_type_get_bit_width(spvc_type type)
return type->width;
}
unsigned spvc_type_get_SmallVector_size(spvc_type type)
unsigned spvc_type_get_vector_size(spvc_type type)
{
return type->vecsize;
}
@ -1640,6 +1653,32 @@ spvc_constant_id spvc_compiler_get_work_group_size_specialization_constants(spvc
return ret;
}
spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler,
spvc_variable_id id,
const spvc_buffer_range **ranges,
size_t *num_ranges)
{
SPVC_BEGIN_SAFE_SCOPE
{
auto active_ranges = compiler->compiler->get_active_buffer_ranges(id);
SmallVector<spvc_buffer_range> translated;
translated.reserve(active_ranges.size());
for (auto &r : active_ranges)
{
spvc_buffer_range trans = { r.index, r.offset, r.range };
translated.push_back(trans);
}
auto ptr = spvc_allocate<TemporaryBuffer<spvc_buffer_range>>();
ptr->buffer = std::move(translated);
*ranges = ptr->buffer.data();
*num_ranges = ptr->buffer.size();
compiler->context->allocations.push_back(std::move(ptr));
}
SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY)
return SPVC_SUCCESS;
}
float spvc_constant_get_scalar_fp16(spvc_constant constant, unsigned column, unsigned row)
{
return constant->scalar_f16(column, row);

View File

@ -33,7 +33,7 @@ extern "C" {
/* Bumped if ABI or API breaks backwards compatibility. */
#define SPVC_C_API_VERSION_MAJOR 0
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
#define SPVC_C_API_VERSION_MINOR 6
#define SPVC_C_API_VERSION_MINOR 9
/* Bumped if internal implementation details change. */
#define SPVC_C_API_VERSION_PATCH 0
@ -111,6 +111,14 @@ typedef struct spvc_specialization_constant
unsigned constant_id;
} spvc_specialization_constant;
/* See C++ API. */
typedef struct spvc_buffer_range
{
unsigned index;
size_t offset;
size_t range;
} spvc_buffer_range;
/* See C++ API. */
typedef struct spvc_hlsl_root_constants
{
@ -290,9 +298,12 @@ SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *b
#define SPVC_MSL_PUSH_CONSTANT_DESC_SET (~(0u))
#define SPVC_MSL_PUSH_CONSTANT_BINDING (0)
#define SPVC_MSL_SWIZZLE_BUFFER_BINDING (~(1u))
/* Obsolete. Sticks around for backwards compatibility. */
#define SPVC_MSL_AUX_BUFFER_STRUCT_VERSION 1
/* Runtime check for incompatibility. */
/* Runtime check for incompatibility. Obsolete. */
SPVC_PUBLIC_API unsigned spvc_msl_get_aux_buffer_struct_version(void);
/* Maps to C++ API. */
@ -407,7 +418,11 @@ typedef enum spvc_compiler_option
SPVC_COMPILER_OPTION_MSL_VERSION = 17 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_TEXEL_BUFFER_TEXTURE_WIDTH = 18 | SPVC_COMPILER_OPTION_MSL_BIT,
/* Obsolete, use SWIZZLE_BUFFER_INDEX instead. */
SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX = 20 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_SHADER_OUTPUT_BUFFER_INDEX = 21 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_OUTPUT_BUFFER_INDEX = 22 | SPVC_COMPILER_OPTION_MSL_BIT,
@ -426,6 +441,8 @@ typedef enum spvc_compiler_option
SPVC_COMPILER_OPTION_MSL_TEXTURE_BUFFER_NATIVE = 34 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_GLSL_EMIT_UNIFORM_BUFFER_AS_PLAIN_UNIFORMS = 35 | SPVC_COMPILER_OPTION_GLSL_BIT,
SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff
} spvc_compiler_option;
@ -503,7 +520,11 @@ SPVC_PUBLIC_API spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin
* Maps to C++ API.
*/
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler);
/* Obsolete. Renamed to needs_swizzle_buffer. */
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler);
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler);
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler);
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_patch_output_buffer(spvc_compiler compiler);
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_input_threadgroup_mem(spvc_compiler compiler);
@ -565,6 +586,7 @@ SPVC_PUBLIC_API unsigned spvc_compiler_get_member_decoration(spvc_compiler compi
unsigned member_index, SpvDecoration decoration);
SPVC_PUBLIC_API const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, spvc_type_id id,
unsigned member_index, SpvDecoration decoration);
SPVC_PUBLIC_API const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index);
/*
* Entry points.
@ -657,6 +679,15 @@ SPVC_PUBLIC_API spvc_constant_id spvc_compiler_get_work_group_size_specializatio
spvc_specialization_constant *y,
spvc_specialization_constant *z);
/*
* Buffer ranges
* Maps to C++ API.
*/
SPVC_PUBLIC_API spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler,
spvc_variable_id id,
const spvc_buffer_range **ranges,
size_t *num_ranges);
/*
* No stdint.h until C99, sigh :(
* For smaller types, the result is sign or zero-extended as appropriate.

View File

@ -1459,9 +1459,19 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
attr.push_back(join("set = ", dec.set));
}
bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
bool ssbo_block = var.storage == StorageClassStorageBuffer ||
(var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
// GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
// pretend no UBOs when options say so
if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
can_use_buffer_blocks = false;
bool can_use_binding;
if (options.es)
can_use_binding = options.version >= 310;
@ -1478,12 +1488,6 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
if (flags.get(DecorationOffset))
attr.push_back(join("offset = ", dec.offset));
bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
bool ssbo_block = var.storage == StorageClassStorageBuffer ||
(var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
// Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
// If SPIR-V does not comply with either layout, we cannot really work around it.
if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
@ -1611,9 +1615,13 @@ void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
{
auto &type = get<SPIRType>(var.basetype);
bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
if (flattened_buffer_blocks.count(var.self))
emit_buffer_block_flattened(var);
else if (is_legacy() || (!options.es && options.version == 130))
else if (is_legacy() || (!options.es && options.version == 130) ||
(ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
emit_buffer_block_legacy(var);
else
emit_buffer_block_native(var);
@ -5772,6 +5780,10 @@ case OpGroupNonUniform##op: \
string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
{
// OpBitcast can deal with pointers.
if (out_type.pointer || in_type.pointer)
return type_to_glsl(out_type);
if (out_type.basetype == in_type.basetype)
return "";
@ -7566,7 +7578,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
uint32_t result_type = ops[0];
uint32_t id = ops[1];
auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
set<SPIRExpression>(id, e + ".length()", result_type, true);
set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
true);
break;
}
@ -9913,7 +9926,7 @@ string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
return to_expression(size);
else if (size)
return convert_to_string(size);
else if (!backend.flexible_member_array_supported)
else if (!backend.unsized_array_supported)
{
// For runtime-sized arrays, we can work around
// lack of standard support for this by simply having
@ -11832,6 +11845,9 @@ void CompilerGLSL::bitcast_to_builtin_store(uint32_t target_id, std::string &exp
void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr)
{
if (*backend.nonuniform_qualifier == '\0')
return;
// Handle SPV_EXT_descriptor_indexing.
if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage ||
type.basetype == SPIRType::Image)

View File

@ -99,6 +99,10 @@ public:
// In non-Vulkan GLSL, emit push constant blocks as UBOs rather than plain uniforms.
bool emit_push_constant_as_uniform_buffer = false;
// Always emit uniform blocks as plain uniforms, regardless of the GLSL version, even when UBOs are supported.
// Does not apply to shader storage or push constant blocks.
bool emit_uniform_buffer_as_plain_uniforms = false;
enum Precision
{
DontCare,
@ -375,7 +379,7 @@ protected:
const char *nonuniform_qualifier = "nonuniformEXT";
bool swizzle_is_function = false;
bool shared_is_implied = false;
bool flexible_member_array_supported = true;
bool unsized_array_supported = true;
bool explicit_struct_type = false;
bool use_initializer_list = false;
bool use_typed_initializer_list = false;

View File

@ -4508,6 +4508,25 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
HLSL_UFOP(reversebits);
break;
case OpArrayLength:
{
auto *var = maybe_get<SPIRVariable>(ops[2]);
if (!var)
SPIRV_CROSS_THROW("Array length must point directly to an SSBO block.");
auto &type = get<SPIRType>(var->basetype);
if (!has_decoration(type.self, DecorationBlock) && !has_decoration(type.self, DecorationBufferBlock))
SPIRV_CROSS_THROW("Array length expression must point to a block type.");
// This must be 32-bit uint, so we're good to go.
emit_uninitialized_temporary_expression(ops[0], ops[1]);
statement(to_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");");
uint32_t offset = type_struct_member_offset(type, ops[3]);
uint32_t stride = type_struct_member_array_stride(type, ops[3]);
statement(to_expression(ops[1]), " = (", to_expression(ops[1]), " - ", offset, ") / ", stride, ";");
break;
}
default:
CompilerGLSL::emit_instruction(instruction);
break;
@ -4628,6 +4647,28 @@ uint32_t CompilerHLSL::remap_num_workgroups_builtin()
return variable_id;
}
void CompilerHLSL::validate_shader_model()
{
// Check for nonuniform qualifier.
// Instead of looping over all decorations to find this, just look at capabilities.
for (auto &cap : ir.declared_capabilities)
{
switch (cap)
{
case CapabilityShaderNonUniformEXT:
case CapabilityRuntimeDescriptorArrayEXT:
if (hlsl_options.shader_model < 51)
SPIRV_CROSS_THROW(
"Shader model 5.1 or higher is required to use bindless resources or NonUniformResourceIndex.");
default:
break;
}
}
if (ir.addressing_model != AddressingModelLogical)
SPIRV_CROSS_THROW("Only Logical addressing model can be used with HLSL.");
}
string CompilerHLSL::compile()
{
// Do not deal with ES-isms like precision, older extensions and such.
@ -4644,7 +4685,7 @@ string CompilerHLSL::compile()
backend.basic_uint_type = "uint";
backend.swizzle_is_function = false;
backend.shared_is_implied = true;
backend.flexible_member_array_supported = false;
backend.unsized_array_supported = true;
backend.explicit_struct_type = false;
backend.use_initializer_list = true;
backend.use_constructor_splatting = false;
@ -4653,8 +4694,10 @@ string CompilerHLSL::compile()
backend.can_declare_struct_inline = false;
backend.can_declare_arrays_inline = false;
backend.can_return_array = false;
backend.nonuniform_qualifier = "NonUniformResourceIndex";
build_function_control_flow_graphs_and_analyze();
validate_shader_model();
update_active_builtins();
analyze_image_and_sampler_usage();

View File

@ -220,6 +220,8 @@ private:
// Custom root constant layout, which should be emitted
// when translating push constant ranges.
std::vector<RootConstants> root_constants_layout;
void validate_shader_model();
};
} // namespace SPIRV_CROSS_NAMESPACE

File diff suppressed because it is too large Load Diff

View File

@ -152,11 +152,11 @@ static const uint32_t kPushConstDescSet = ~(0u);
// element to indicate the bindings for the push constants.
static const uint32_t kPushConstBinding = 0;
static const uint32_t kMaxArgumentBuffers = 8;
// Special constant used in a MSLResourceBinding binding
// element to indicate the buffer binding for swizzle buffers.
static const uint32_t kSwizzleBufferBinding = ~(1u);
// The current version of the aux buffer structure. It must be incremented any time a
// new field is added to the aux buffer.
#define SPIRV_CROSS_MSL_AUX_BUFFER_STRUCT_VERSION 1
static const uint32_t kMaxArgumentBuffers = 8;
// Decompiles SPIR-V to Metal Shading Language
class CompilerMSL : public CompilerGLSL
@ -174,7 +174,7 @@ public:
Platform platform = macOS;
uint32_t msl_version = make_msl_version(1, 2);
uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers
uint32_t aux_buffer_index = 30;
uint32_t swizzle_buffer_index = 30;
uint32_t indirect_params_buffer_index = 29;
uint32_t shader_output_buffer_index = 28;
uint32_t shader_patch_output_buffer_index = 27;
@ -243,10 +243,10 @@ public:
}
// Provide feedback to calling API to allow it to pass an auxiliary
// buffer if the shader needs it.
bool needs_aux_buffer() const
// swizzle buffer if the shader needs it.
bool needs_swizzle_buffer() const
{
return used_aux_buffer;
return used_swizzle_buffer;
}
// Provide feedback to calling API to allow it to pass an output
@ -344,6 +344,12 @@ protected:
SPVFuncImplRowMajor4x2,
SPVFuncImplRowMajor4x3,
SPVFuncImplTextureSwizzle,
SPVFuncImplSubgroupBallot,
SPVFuncImplSubgroupBallotBitExtract,
SPVFuncImplSubgroupBallotFindLSB,
SPVFuncImplSubgroupBallotFindMSB,
SPVFuncImplSubgroupBallotBitCount,
SPVFuncImplSubgroupAllEqual,
SPVFuncImplArrayCopyMultidimMax = 6
};
@ -354,6 +360,7 @@ protected:
void emit_header() override;
void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override;
void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
void emit_subgroup_op(const Instruction &i) override;
void emit_fixup() override;
std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
const std::string &qualifier = "");
@ -477,7 +484,9 @@ protected:
uint32_t builtin_base_instance_id = 0;
uint32_t builtin_invocation_id_id = 0;
uint32_t builtin_primitive_id_id = 0;
uint32_t aux_buffer_id = 0;
uint32_t builtin_subgroup_invocation_id_id = 0;
uint32_t builtin_subgroup_size_id = 0;
uint32_t swizzle_buffer_id = 0;
void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
@ -515,9 +524,10 @@ protected:
bool needs_instance_idx_arg = false;
bool is_rasterization_disabled = false;
bool capture_output_to_buffer = false;
bool needs_aux_buffer_def = false;
bool used_aux_buffer = false;
bool needs_swizzle_buffer_def = false;
bool used_swizzle_buffer = false;
bool added_builtin_tess_level = false;
bool needs_subgroup_invocation_id = false;
std::string qual_pos_var_name;
std::string stage_in_var_name = "in";
std::string stage_out_var_name = "out";
@ -561,6 +571,7 @@ protected:
bool suppress_missing_prototypes = false;
bool uses_atomics = false;
bool uses_resource_write = false;
bool needs_subgroup_invocation_id = false;
};
// OpcodeHandler that scans for uses of sampled images

View File

@ -59,6 +59,7 @@ static bool is_valid_spirv_version(uint32_t version)
case 0x10100: // SPIR-V 1.1
case 0x10200: // SPIR-V 1.2
case 0x10300: // SPIR-V 1.3
case 0x10400: // SPIR-V 1.4
return true;
default:

View File

@ -143,7 +143,7 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
spirv_path = create_temporary()
msl_path = create_temporary(os.path.basename(shader))
spirv_cmd = [paths.spirv_as, '-o', spirv_path, shader]
spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader]
if '.preserve.' in shader:
spirv_cmd.append('--preserve-numeric-ids')
@ -220,13 +220,18 @@ def shader_to_win_path(shader):
ignore_fxc = False
def validate_shader_hlsl(shader, force_no_external_validation, paths):
subprocess.check_call([paths.glslang, '-e', 'main', '-D', '--target-env', 'vulkan1.1', '-V', shader])
if not '.nonuniformresource' in shader:
# glslang HLSL does not support this, so rely on fxc to test it.
subprocess.check_call([paths.glslang, '-e', 'main', '-D', '--target-env', 'vulkan1.1', '-V', shader])
is_no_fxc = '.nofxc.' in shader
global ignore_fxc
if (not ignore_fxc) and (not force_no_external_validation) and (not is_no_fxc):
try:
win_path = shader_to_win_path(shader)
subprocess.check_call(['fxc', '-nologo', shader_model_hlsl(shader), win_path])
args = ['fxc', '-nologo', shader_model_hlsl(shader), win_path]
if '.nonuniformresource.' in shader:
args.append('/enable_unbounded_descriptor_tables')
subprocess.check_call(args)
except OSError as oe:
if (oe.errno != errno.ENOENT): # Ignore not found errors
print('Failed to run FXC.')
@ -253,7 +258,7 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati
spirv_path = create_temporary()
hlsl_path = create_temporary(os.path.basename(shader))
spirv_cmd = [paths.spirv_as, '-o', spirv_path, shader]
spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader]
if '.preserve.' in shader:
spirv_cmd.append('--preserve-numeric-ids')
@ -281,7 +286,7 @@ def cross_compile_reflect(shader, spirv, opt, iterations, paths):
spirv_path = create_temporary()
reflect_path = create_temporary(os.path.basename(shader))
spirv_cmd = [paths.spirv_as, '-o', spirv_path, shader]
spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader]
if '.preserve.' in shader:
spirv_cmd.append('--preserve-numeric-ids')
@ -312,7 +317,7 @@ def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, fl
if vulkan or spirv:
vulkan_glsl_path = create_temporary('vk' + os.path.basename(shader))
spirv_cmd = [paths.spirv_as, '-o', spirv_path, shader]
spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader]
if '.preserve.' in shader:
spirv_cmd.append('--preserve-numeric-ids')

View File

@ -116,12 +116,32 @@ int main(int argc, char **argv)
SpvId *buffer = NULL;
size_t word_count = 0;
if (argc != 2)
if (argc != 5)
return 1;
if (read_file(argv[1], &buffer, &word_count) < 0)
return 1;
unsigned abi_major, abi_minor, abi_patch;
spvc_get_version(&abi_major, &abi_minor, &abi_patch);
if (abi_major != strtoul(argv[2], NULL, 0))
{
fprintf(stderr, "VERSION_MAJOR mismatch!\n");
return 1;
}
if (abi_minor != strtoul(argv[3], NULL, 0))
{
fprintf(stderr, "VERSION_MINOR mismatch!\n");
return 1;
}
if (abi_patch != strtoul(argv[4], NULL, 0))
{
fprintf(stderr, "VERSION_PATCH mismatch!\n");
return 1;
}
SPVC_CHECKED_CALL(spvc_context_create(&context));
spvc_context_set_error_callback(context, error_callback, NULL);
SPVC_CHECKED_CALL(spvc_context_parse_spirv(context, buffer, word_count, &ir));