From 932302d8f460e514b933deba8c0e575a00f0bcd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Thu, 18 Apr 2024 10:07:40 -0700 Subject: [PATCH] Compute: Rename Write Only suffix _WR to _WO. --- examples/08-update/cs_update.sc | 2 +- examples/24-nbody/cs_indirect.sc | 2 +- examples/24-nbody/cs_init_instances.sc | 4 +- examples/24-nbody/cs_update_instances.sc | 4 +- .../37-gpudrivenrendering/cs_gdr_copy_z.sc | 2 +- .../cs_gdr_downscale_hi_z.sc | 2 +- .../cs_gdr_occlude_props.sc | 2 +- .../cs_gdr_stream_compaction.sc | 2 +- examples/39-assao/cs_assao_apply.sc | 2 +- .../cs_assao_generate_importance_map.sc | 2 +- examples/39-assao/cs_assao_generate_q.sh | 2 +- .../39-assao/cs_assao_load_counter_clear.sc | 2 +- examples/39-assao/cs_assao_non_smart_apply.sc | 2 +- examples/39-assao/cs_assao_non_smart_blur.sc | 2 +- .../39-assao/cs_assao_non_smart_half_apply.sc | 2 +- .../cs_assao_postprocess_importance_map_a.sc | 2 +- .../cs_assao_postprocess_importance_map_b.sc | 2 +- .../39-assao/cs_assao_prepare_depth_mip.sc | 8 +- examples/39-assao/cs_assao_prepare_depths.sc | 8 +- .../cs_assao_prepare_depths_and_normals.sc | 10 +- ...s_assao_prepare_depths_and_normals_half.sc | 6 +- .../39-assao/cs_assao_prepare_depths_half.sc | 4 +- examples/39-assao/cs_assao_smart_blur.sc | 2 +- examples/39-assao/cs_assao_smart_blur_wide.sc | 2 +- examples/41-tess/cs_terrain_init.sc | 4 +- examples/46-fsr/cs_fsr.h | 140 +++++++++--------- examples/48-drawindirect/cs_drawindirect.sc | 6 +- src/bgfx_compute.sh | 30 ++-- 28 files changed, 130 insertions(+), 128 deletions(-) diff --git a/examples/08-update/cs_update.sc b/examples/08-update/cs_update.sc index 54566d834..aa065c964 100644 --- a/examples/08-update/cs_update.sc +++ b/examples/08-update/cs_update.sc @@ -5,7 +5,7 @@ #include "bgfx_compute.sh" -IMAGE2D_ARRAY_WR(s_texColor, rgba8, 0); +IMAGE2D_ARRAY_WO(s_texColor, rgba8, 0); uniform vec4 u_time; NUM_THREADS(16, 16, 1) diff --git a/examples/24-nbody/cs_indirect.sc b/examples/24-nbody/cs_indirect.sc index e06802f4f..558fe0dc4 100644 --- a/examples/24-nbody/cs_indirect.sc +++ b/examples/24-nbody/cs_indirect.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -BUFFER_WR(indirectBuffer, uvec4, 0); +BUFFER_WO(indirectBuffer, uvec4, 0); NUM_THREADS(1, 1, 1) void main() diff --git a/examples/24-nbody/cs_init_instances.sc b/examples/24-nbody/cs_init_instances.sc index 0844979d1..8c7b1a916 100644 --- a/examples/24-nbody/cs_init_instances.sc +++ b/examples/24-nbody/cs_init_instances.sc @@ -6,8 +6,8 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -BUFFER_WR(prevPositionBuffer, vec4, 0); -BUFFER_WR(currPositionBuffer, vec4, 1); +BUFFER_WO(prevPositionBuffer, vec4, 0); +BUFFER_WO(currPositionBuffer, vec4, 1); uint rotl(uint _x, uint _r) { diff --git a/examples/24-nbody/cs_update_instances.sc b/examples/24-nbody/cs_update_instances.sc index 7031cb572..159a62fb1 100644 --- a/examples/24-nbody/cs_update_instances.sc +++ b/examples/24-nbody/cs_update_instances.sc @@ -8,8 +8,8 @@ BUFFER_RO(prevPositionBuffer, vec4, 0); BUFFER_RO(currPositionBuffer, vec4, 1); -BUFFER_WR(outPrevPositionBuffer, vec4, 2); -BUFFER_WR(outCurrPositionBuffer, vec4, 3); +BUFFER_WO(outPrevPositionBuffer, vec4, 2); +BUFFER_WO(outCurrPositionBuffer, vec4, 3); #define GROUP_SIZE 512 SHARED vec3 otherEntries[GROUP_SIZE]; diff --git a/examples/37-gpudrivenrendering/cs_gdr_copy_z.sc b/examples/37-gpudrivenrendering/cs_gdr_copy_z.sc index c5732156b..71656f366 100644 --- a/examples/37-gpudrivenrendering/cs_gdr_copy_z.sc +++ b/examples/37-gpudrivenrendering/cs_gdr_copy_z.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" SAMPLER2D(s_texOcclusionDepth, 0); -IMAGE2D_WR(s_texOcclusionDepthOut, r32f, 1); +IMAGE2D_WO(s_texOcclusionDepthOut, r32f, 1); uniform vec4 u_inputRTSize; diff --git a/examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc b/examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc index cb3941638..f8a2803e0 100644 --- a/examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc +++ b/examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" IMAGE2D_RO(s_texOcclusionDepthIn, r32f, 0); -IMAGE2D_WR(s_texOcclusionDepthOut, r32f, 1); +IMAGE2D_WO(s_texOcclusionDepthOut, r32f, 1); uniform vec4 u_inputRTSize; diff --git a/examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc b/examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc index fd4ae946f..2cc260d4a 100644 --- a/examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc +++ b/examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc @@ -9,7 +9,7 @@ SAMPLER2D(s_texOcclusionDepth, 0); BUFFER_RO(instanceDataIn, vec4, 1); BUFFER_RW(drawcallInstanceCount, uint, 2); -BUFFER_WR(instancePredicates, bool, 3); +BUFFER_WO(instancePredicates, bool, 3); uniform vec4 u_inputRTSize; uniform vec4 u_cullingConfig; diff --git a/examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc b/examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc index 058d1eafb..24126e9da 100644 --- a/examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc +++ b/examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc @@ -17,7 +17,7 @@ BUFFER_RW(drawcallInstanceCount, uint, 3); //drawcall data that will drive drawIndirect BUFFER_RW(drawcallData, uvec4, 4); //culled instance data -BUFFER_WR(instanceDataOut, vec4, 5); +BUFFER_WO(instanceDataOut, vec4, 5); uniform vec4 u_cullingConfig; diff --git a/examples/39-assao/cs_assao_apply.sc b/examples/39-assao/cs_assao_apply.sc index d6148fb3b..e8687d2be 100644 --- a/examples/39-assao/cs_assao_apply.sc +++ b/examples/39-assao/cs_assao_apply.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -IMAGE2D_WR(s_target, r8, 0); +IMAGE2D_WO(s_target, r8, 0); SAMPLER2DARRAY(s_finalSSAO, 1); // unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions! diff --git a/examples/39-assao/cs_assao_generate_importance_map.sc b/examples/39-assao/cs_assao_generate_importance_map.sc index 7905c8196..3aea2b665 100644 --- a/examples/39-assao/cs_assao_generate_importance_map.sc +++ b/examples/39-assao/cs_assao_generate_importance_map.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -IMAGE2D_WR(s_target, r8, 0); +IMAGE2D_WO(s_target, r8, 0); SAMPLER2DARRAY(s_finalSSAO, 1); NUM_THREADS(8, 8, 1) diff --git a/examples/39-assao/cs_assao_generate_q.sh b/examples/39-assao/cs_assao_generate_q.sh index 5b4726e69..f39a70cfc 100644 --- a/examples/39-assao/cs_assao_generate_q.sh +++ b/examples/39-assao/cs_assao_generate_q.sh @@ -62,7 +62,7 @@ IMAGE2D_RO(s_normalmapSource, rgba8, 2); BUFFER_RO(s_loadCounter, uint, 3); SAMPLER2D(s_importanceMap, 4); IMAGE2D_ARRAY_RO(s_baseSSAO, rg8, 5); -IMAGE2D_ARRAY_WR(s_target, rg8, 6); +IMAGE2D_ARRAY_WO(s_target, rg8, 6); // packing/unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions! float PackEdges( vec4 edgesLRTB ) diff --git a/examples/39-assao/cs_assao_load_counter_clear.sc b/examples/39-assao/cs_assao_load_counter_clear.sc index ef7c1bfac..151c9ce46 100644 --- a/examples/39-assao/cs_assao_load_counter_clear.sc +++ b/examples/39-assao/cs_assao_load_counter_clear.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -BUFFER_WR(s_loadCounter, uint, 0); +BUFFER_WO(s_loadCounter, uint, 0); NUM_THREADS(1, 1, 1) void main() diff --git a/examples/39-assao/cs_assao_non_smart_apply.sc b/examples/39-assao/cs_assao_non_smart_apply.sc index af55615dc..41c5ce7ad 100644 --- a/examples/39-assao/cs_assao_non_smart_apply.sc +++ b/examples/39-assao/cs_assao_non_smart_apply.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -IMAGE2D_WR(s_target, r8, 0); +IMAGE2D_WO(s_target, r8, 0); SAMPLER2DARRAY(s_finalSSAO, 1); // edge-ignorant blur & apply (for the lowest quality level 0) diff --git a/examples/39-assao/cs_assao_non_smart_blur.sc b/examples/39-assao/cs_assao_non_smart_blur.sc index 6c41d5d70..d319dba89 100644 --- a/examples/39-assao/cs_assao_non_smart_blur.sc +++ b/examples/39-assao/cs_assao_non_smart_blur.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -IMAGE2D_ARRAY_WR(s_target, rg8, 0); +IMAGE2D_ARRAY_WO(s_target, rg8, 0); SAMPLER2DARRAY(s_blurInput, 1); // edge-ignorant blur in x and y directions, 9 pixels touched (for the lowest quality level 0) diff --git a/examples/39-assao/cs_assao_non_smart_half_apply.sc b/examples/39-assao/cs_assao_non_smart_half_apply.sc index 5026b4896..4af5dfa59 100644 --- a/examples/39-assao/cs_assao_non_smart_half_apply.sc +++ b/examples/39-assao/cs_assao_non_smart_half_apply.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -IMAGE2D_WR(s_target, r8, 0); +IMAGE2D_WO(s_target, r8, 0); SAMPLER2DARRAY(s_finalSSAO, 1); // edge-ignorant blur & apply, skipping half pixels in checkerboard pattern (for the Lowest quality level 0 and Settings::SkipHalfPixelsOnLowQualityLevel == true ) diff --git a/examples/39-assao/cs_assao_postprocess_importance_map_a.sc b/examples/39-assao/cs_assao_postprocess_importance_map_a.sc index c11eb99ca..339b68c93 100644 --- a/examples/39-assao/cs_assao_postprocess_importance_map_a.sc +++ b/examples/39-assao/cs_assao_postprocess_importance_map_a.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -IMAGE2D_WR(s_target, r8, 0); +IMAGE2D_WO(s_target, r8, 0); SAMPLER2D(s_importanceMap, 1); // Shaders below only needed for adaptive quality level diff --git a/examples/39-assao/cs_assao_postprocess_importance_map_b.sc b/examples/39-assao/cs_assao_postprocess_importance_map_b.sc index 8696292d1..a0707fdbb 100644 --- a/examples/39-assao/cs_assao_postprocess_importance_map_b.sc +++ b/examples/39-assao/cs_assao_postprocess_importance_map_b.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -IMAGE2D_WR(s_target, r8, 0); +IMAGE2D_WO(s_target, r8, 0); SAMPLER2D(s_importanceMap, 1); BUFFER_RW(s_loadCounter, uint, 2); diff --git a/examples/39-assao/cs_assao_prepare_depth_mip.sc b/examples/39-assao/cs_assao_prepare_depth_mip.sc index c033bc799..a9e18b7f7 100644 --- a/examples/39-assao/cs_assao_prepare_depth_mip.sc +++ b/examples/39-assao/cs_assao_prepare_depth_mip.sc @@ -11,10 +11,10 @@ IMAGE2D_RO(s_viewspaceDepthSource1, r16f, 1); IMAGE2D_RO(s_viewspaceDepthSource2, r16f, 2); IMAGE2D_RO(s_viewspaceDepthSource3, r16f, 3); -IMAGE2D_WR(s_target0, r16f, 4); -IMAGE2D_WR(s_target1, r16f, 5); -IMAGE2D_WR(s_target2, r16f, 6); -IMAGE2D_WR(s_target3, r16f, 7); +IMAGE2D_WO(s_target0, r16f, 4); +IMAGE2D_WO(s_target1, r16f, 5); +IMAGE2D_WO(s_target2, r16f, 6); +IMAGE2D_WO(s_target3, r16f, 7); // calculate effect radius and fit our screen sampling pattern inside it void CalculateRadiusParameters( const float pixCenterLength, const vec2 pixelDirRBViewspaceSizeAtCenterZ, out float pixLookupRadiusMod, out float effectRadius, out float falloffCalcMulSq ) diff --git a/examples/39-assao/cs_assao_prepare_depths.sc b/examples/39-assao/cs_assao_prepare_depths.sc index 0b43c0e11..c19b93185 100644 --- a/examples/39-assao/cs_assao_prepare_depths.sc +++ b/examples/39-assao/cs_assao_prepare_depths.sc @@ -8,10 +8,10 @@ SAMPLER2D(s_depthSource, 0); -IMAGE2D_WR(s_target0, r16f, 1); -IMAGE2D_WR(s_target1, r16f, 2); -IMAGE2D_WR(s_target2, r16f, 3); -IMAGE2D_WR(s_target3, r16f, 4); +IMAGE2D_WO(s_target0, r16f, 1); +IMAGE2D_WO(s_target1, r16f, 2); +IMAGE2D_WO(s_target2, r16f, 3); +IMAGE2D_WO(s_target3, r16f, 4); float ScreenSpaceToViewSpaceDepth( float screenDepth ) { diff --git a/examples/39-assao/cs_assao_prepare_depths_and_normals.sc b/examples/39-assao/cs_assao_prepare_depths_and_normals.sc index 836a05439..f7883cfe4 100644 --- a/examples/39-assao/cs_assao_prepare_depths_and_normals.sc +++ b/examples/39-assao/cs_assao_prepare_depths_and_normals.sc @@ -8,11 +8,11 @@ SAMPLER2D(s_depthSource, 0); -IMAGE2D_WR(s_target0, r16f, 1); -IMAGE2D_WR(s_target1, r16f, 2); -IMAGE2D_WR(s_target2, r16f, 3); -IMAGE2D_WR(s_target3, r16f, 4); -IMAGE2D_WR(s_normalsOutputUAV, rgba8, 5); +IMAGE2D_WO(s_target0, r16f, 1); +IMAGE2D_WO(s_target1, r16f, 2); +IMAGE2D_WO(s_target2, r16f, 3); +IMAGE2D_WO(s_target3, r16f, 4); +IMAGE2D_WO(s_normalsOutputUAV, rgba8, 5); float ScreenSpaceToViewSpaceDepth( float screenDepth ) { diff --git a/examples/39-assao/cs_assao_prepare_depths_and_normals_half.sc b/examples/39-assao/cs_assao_prepare_depths_and_normals_half.sc index 279de7191..8980f85d8 100644 --- a/examples/39-assao/cs_assao_prepare_depths_and_normals_half.sc +++ b/examples/39-assao/cs_assao_prepare_depths_and_normals_half.sc @@ -8,9 +8,9 @@ SAMPLER2D(s_depthSource, 0); -IMAGE2D_WR(s_target0, r16f, 1); -IMAGE2D_WR(s_target1, r16f, 2); -IMAGE2D_WR(s_normalsOutputUAV, rgba8, 5); +IMAGE2D_WO(s_target0, r16f, 1); +IMAGE2D_WO(s_target1, r16f, 2); +IMAGE2D_WO(s_normalsOutputUAV, rgba8, 5); float ScreenSpaceToViewSpaceDepth( float screenDepth ) { diff --git a/examples/39-assao/cs_assao_prepare_depths_half.sc b/examples/39-assao/cs_assao_prepare_depths_half.sc index 83b913afc..e25e22994 100644 --- a/examples/39-assao/cs_assao_prepare_depths_half.sc +++ b/examples/39-assao/cs_assao_prepare_depths_half.sc @@ -7,8 +7,8 @@ #include "uniforms.sh" SAMPLER2D(s_depthSource, 0); -IMAGE2D_WR(s_target0, r16f, 1); -IMAGE2D_WR(s_target1, r16f, 2); +IMAGE2D_WO(s_target0, r16f, 1); +IMAGE2D_WO(s_target1, r16f, 2); float ScreenSpaceToViewSpaceDepth( float screenDepth ) { diff --git a/examples/39-assao/cs_assao_smart_blur.sc b/examples/39-assao/cs_assao_smart_blur.sc index 5ddf435fb..b50ac7849 100644 --- a/examples/39-assao/cs_assao_smart_blur.sc +++ b/examples/39-assao/cs_assao_smart_blur.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -IMAGE2D_ARRAY_WR(s_target, rg8, 0); +IMAGE2D_ARRAY_WO(s_target, rg8, 0); SAMPLER2DARRAY(s_blurInput, 1); // unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions! diff --git a/examples/39-assao/cs_assao_smart_blur_wide.sc b/examples/39-assao/cs_assao_smart_blur_wide.sc index e6752700b..89e0d47b7 100644 --- a/examples/39-assao/cs_assao_smart_blur_wide.sc +++ b/examples/39-assao/cs_assao_smart_blur_wide.sc @@ -6,7 +6,7 @@ #include "bgfx_compute.sh" #include "uniforms.sh" -IMAGE2D_ARRAY_WR(s_target, rg8, 0); +IMAGE2D_ARRAY_WO(s_target, rg8, 0); SAMPLER2DARRAY(s_blurInput, 1); // unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions! diff --git a/examples/41-tess/cs_terrain_init.sc b/examples/41-tess/cs_terrain_init.sc index b15f23970..1652b2ad8 100644 --- a/examples/41-tess/cs_terrain_init.sc +++ b/examples/41-tess/cs_terrain_init.sc @@ -2,11 +2,11 @@ #include "uniforms.sh" -BUFFER_WR(u_SubdBufferOut, uint, 1); +BUFFER_WO(u_SubdBufferOut, uint, 1); BUFFER_RW(u_CulledSubdBuffer, uint, 2); BUFFER_RW(indirectBuffer, uvec4, 3); BUFFER_RW(atomicCounterBuffer, uint, 4); -BUFFER_WR(u_SubdBufferIn, uint, 8); +BUFFER_WO(u_SubdBufferIn, uint, 8); NUM_THREADS(1u, 1u, 1u) void main() diff --git a/examples/46-fsr/cs_fsr.h b/examples/46-fsr/cs_fsr.h index 928ec47ed..1d17c705c 100644 --- a/examples/46-fsr/cs_fsr.h +++ b/examples/46-fsr/cs_fsr.h @@ -24,36 +24,36 @@ uniform vec4 u_params[3]; #endif // BGFX_SHADER_LANGUAGE_GLSL #if SAMPLE_SLOW_FALLBACK - #include "ffx_a.h" - SAMPLER2D(InputTexture, 0); - IMAGE2D_WR(OutputTexture, rgba32f, 1); - #if SAMPLE_EASU - #define FSR_EASU_F 1 - AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } - AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } - AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } - #endif - #if SAMPLE_RCAS - #define FSR_RCAS_F - AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } - void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} - #endif +# include "ffx_a.h" + SAMPLER2D(InputTexture, 0); + IMAGE2D_WO(OutputTexture, rgba32f, 1); +# if SAMPLE_EASU + #define FSR_EASU_F 1 + AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } + AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } + AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } +# endif +# if SAMPLE_RCAS + #define FSR_RCAS_F + AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } + void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} +# endif #else - #define A_HALF - #include "ffx_a.h" - SAMPLER2D(InputTexture, 0); - IMAGE2D_WR(OutputTexture, rgba16f, 1); - #if SAMPLE_EASU - #define FSR_EASU_H 1 - AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } - AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } - AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } - #endif - #if SAMPLE_RCAS - #define FSR_RCAS_H - AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } - void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} - #endif +# define A_HALF +# include "ffx_a.h" + SAMPLER2D(InputTexture, 0); + IMAGE2D_WO(OutputTexture, rgba16f, 1); +# if SAMPLE_EASU + #define FSR_EASU_H 1 + AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } + AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } + AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } +# endif +# if SAMPLE_RCAS + #define FSR_RCAS_H + AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } + void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} +# endif #endif #include "ffx_fsr1.h" @@ -61,38 +61,40 @@ uniform vec4 u_params[3]; void CurrFilter(AU2 pos, AU4 Const0, AU4 Const1, AU4 Const2, AU4 Const3, AU4 Sample) { #if SAMPLE_BILINEAR - AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw); - imageStore(OutputTexture, ASU2(pos), texture2DLod(InputTexture, pp, 0.0)); + AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw); + imageStore(OutputTexture, ASU2(pos), texture2DLod(InputTexture, pp, 0.0)); #endif + #if SAMPLE_EASU - #if SAMPLE_SLOW_FALLBACK - AF3 c; - FsrEasuF(c, pos, Const0, Const1, Const2, Const3); - if( Sample.x == 1 ) - c *= c; - imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); - #else - AH3 c; - FsrEasuH(c, pos, Const0, Const1, Const2, Const3); - if( Sample.x == 1 ) - c *= c; - imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); - #endif +# if SAMPLE_SLOW_FALLBACK + AF3 c; + FsrEasuF(c, pos, Const0, Const1, Const2, Const3); + if( Sample.x == 1 ) + c *= c; + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); +# else + AH3 c; + FsrEasuH(c, pos, Const0, Const1, Const2, Const3); + if( Sample.x == 1 ) + c *= c; + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); +# endif #endif + #if SAMPLE_RCAS - #if SAMPLE_SLOW_FALLBACK - AF3 c; - FsrRcasF(c.r, c.g, c.b, pos, Const0); - if( Sample.x == 1 ) - c *= c; - imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); - #else - AH3 c; - FsrRcasH(c.r, c.g, c.b, pos, Const0); - if( Sample.x == 1 ) - c *= c; - imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); - #endif +# if SAMPLE_SLOW_FALLBACK + AF3 c; + FsrRcasF(c.r, c.g, c.b, pos, Const0); + if( Sample.x == 1 ) + c *= c; + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); +# else + AH3 c; + FsrRcasH(c.r, c.g, c.b, pos, Const0); + if( Sample.x == 1 ) + c *= c; + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); +# endif #endif } @@ -108,20 +110,20 @@ void main() SrcSize.x, SrcSize.y, // The size of the input image. DstSize.x, DstSize.y); // The output resolution. Sample.x = 0; // no HDR output -#endif +#endif // SAMPLE_EASU || SAMPLE_BILINEAR + #if SAMPLE_RCAS FsrRcasCon(Const0, ViewportSizeRcasAttenuation.z); Sample.x = 0; // no HDR output -#endif +#endif // SAMPLE_RCAS - // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. - AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); - CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample); - gxy.x += 8u; - CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample); - gxy.y += 8u; - CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample); - gxy.x -= 8u; - CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample); + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); + CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample); + gxy.x += 8u; + CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample); + gxy.y += 8u; + CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample); + gxy.x -= 8u; + CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample); } - diff --git a/examples/48-drawindirect/cs_drawindirect.sc b/examples/48-drawindirect/cs_drawindirect.sc index 0e37a38ec..462361807 100644 --- a/examples/48-drawindirect/cs_drawindirect.sc +++ b/examples/48-drawindirect/cs_drawindirect.sc @@ -9,10 +9,10 @@ BUFFER_RO(instanceDataIn, vec4, 0); // Output -BUFFER_WR(indirectBuffer, uvec4, 1); -BUFFER_WR(instanceBufferOut, vec4, 2); +BUFFER_WO(indirectBuffer, uvec4, 1); +BUFFER_WO(instanceBufferOut, vec4, 2); #ifdef INDIRECT_COUNT -BUFFER_WR(indirectCountBuffer, int, 3); +BUFFER_WO(indirectCountBuffer, int, 3); #endif uniform vec4 u_drawParams; diff --git a/src/bgfx_compute.sh b/src/bgfx_compute.sh index 9e8137fea..d136004a8 100644 --- a/src/bgfx_compute.sh +++ b/src/bgfx_compute.sh @@ -32,22 +32,22 @@ #define readwrite #define IMAGE2D_RO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2D, readonly) #define UIMAGE2D_RO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2D, readonly) -#define IMAGE2D_WR( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2D, writeonly) -#define UIMAGE2D_WR(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2D, writeonly) +#define IMAGE2D_WO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2D, writeonly) +#define UIMAGE2D_WO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2D, writeonly) #define IMAGE2D_RW( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2D, readwrite) #define UIMAGE2D_RW(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2D, readwrite) #define IMAGE2D_ARRAY_RO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2DArray, readonly) #define UIMAGE2D_ARRAY_RO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2DArray, readonly) -#define IMAGE2D_ARRAY_WR( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2DArray, writeonly) -#define UIMAGE2D_ARRAY_WR(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2DArray, writeonly) +#define IMAGE2D_ARRAY_WO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2DArray, writeonly) +#define UIMAGE2D_ARRAY_WO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2DArray, writeonly) #define IMAGE2D_ARRAY_RW( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2DArray, readwrite) #define UIMAGE2D_ARRAY_RW(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2DArray, readwrite) #define IMAGE3D_RO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image3D, readonly) #define UIMAGE3D_RO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage3D, readonly) -#define IMAGE3D_WR( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image3D, writeonly) -#define UIMAGE3D_WR(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage3D, writeonly) +#define IMAGE3D_WO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image3D, writeonly) +#define UIMAGE3D_WO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage3D, writeonly) #define IMAGE3D_RW( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image3D, readwrite) #define UIMAGE3D_RW(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage3D, readwrite) @@ -59,7 +59,7 @@ #define BUFFER_RO(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, readonly) #define BUFFER_RW(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, readwrite) -#define BUFFER_WR(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, writeonly) +#define BUFFER_WO(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, writeonly) #define NUM_THREADS(_x, _y, _z) layout (local_size_x = _x, local_size_y = _y, local_size_z = _z) in; @@ -99,10 +99,10 @@ #define UIMAGE2D_RO(_name, _format, _reg) IMAGE2D_RO(_name, _format, _reg) -#define IMAGE2D_WR( _name, _format, _reg) \ +#define IMAGE2D_WO( _name, _format, _reg) \ WRITEONLY FORMAT(_format) RWTexture2D _name : REGISTER(u, _reg); \ -#define UIMAGE2D_WR(_name, _format, _reg) IMAGE2D_WR(_name, _format, _reg) +#define UIMAGE2D_WO(_name, _format, _reg) IMAGE2D_WO(_name, _format, _reg) #define IMAGE2D_RW( _name, _format, _reg) \ FORMAT(_format) RWTexture2D _name : REGISTER(u, _reg); \ @@ -114,10 +114,10 @@ #define UIMAGE2D_ARRAY_RO(_name, _format, _reg) IMAGE2D_ARRAY_RO(_name, _format, _reg) -#define IMAGE2D_ARRAY_WR( _name, _format, _reg) \ +#define IMAGE2D_ARRAY_WO( _name, _format, _reg) \ WRITEONLY FORMAT(_format) RWTexture2DArray _name : REGISTER(u, _reg); \ -#define UIMAGE2D_ARRAY_WR(_name, _format, _reg) IMAGE2D_ARRAY_WR(_name, _format, _reg) +#define UIMAGE2D_ARRAY_WO(_name, _format, _reg) IMAGE2D_ARRAY_WO(_name, _format, _reg) #define IMAGE2D_ARRAY_RW(_name, _format, _reg) \ FORMAT(_format) RWTexture2DArray _name : REGISTER(u, _reg); \ @@ -129,10 +129,10 @@ #define UIMAGE3D_RO(_name, _format, _reg) IMAGE3D_RO(_name, _format, _reg) -#define IMAGE3D_WR( _name, _format, _reg) \ +#define IMAGE3D_WO( _name, _format, _reg) \ WRITEONLY FORMAT(_format) RWTexture3D _name : REGISTER(u, _reg); -#define UIMAGE3D_WR(_name, _format, _reg) IMAGE3D_RW(_name, _format, _reg) +#define UIMAGE3D_WO(_name, _format, _reg) IMAGE3D_RW(_name, _format, _reg) #define IMAGE3D_RW( _name, _format, _reg) \ FORMAT(_format) RWTexture3D _name : REGISTER(u, _reg); \ @@ -142,11 +142,11 @@ #if BGFX_SHADER_LANGUAGE_METAL || BGFX_SHADER_LANGUAGE_SPIRV #define BUFFER_RO(_name, _struct, _reg) StructuredBuffer<_struct> _name : REGISTER(t, _reg) #define BUFFER_RW(_name, _struct, _reg) RWStructuredBuffer <_struct> _name : REGISTER(u, _reg) -#define BUFFER_WR(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg) +#define BUFFER_WO(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg) #else #define BUFFER_RO(_name, _struct, _reg) Buffer<_struct> _name : REGISTER(t, _reg) #define BUFFER_RW(_name, _struct, _reg) RWBuffer<_struct> _name : REGISTER(u, _reg) -#define BUFFER_WR(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg) +#define BUFFER_WO(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg) #endif #define NUM_THREADS(_x, _y, _z) [numthreads(_x, _y, _z)]